<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">vavilov</journal-id><journal-title-group><journal-title xml:lang="ru">Вавиловский журнал генетики и селекции</journal-title><trans-title-group xml:lang="en"><trans-title>Vavilov Journal of Genetics and Breeding</trans-title></trans-title-group></journal-title-group><issn pub-type="epub">2500-3259</issn><publisher><publisher-name>Institute of Cytology and Genetics of Siberian Branch of the RAS</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.18699/J15.092</article-id><article-id custom-type="elpub" pub-id-type="custom">vavilov-483</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>Геномика и анализ полиморфизмов</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>Genomics and Polymorphism Analysis</subject></subj-group></article-categories><title-group><article-title>Фланкирующие повторы мономеров определяют пониженную контекстную сложность сайтов однонуклеотидных полиморфизмов в геноме человека</article-title><trans-title-group xml:lang="en"><trans-title>Flanking monomer repeats define lower context complexity of sites containing single nucleotide polymorphisms in the human genome</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Сафронова</surname><given-names>Н. С.</given-names></name><name name-style="western" xml:lang="en"><surname>Safronova</surname><given-names>N. S.</given-names></name></name-alternatives><email xlink:type="simple">taschasafronova@bionet.nsc.ru</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Пономаренко</surname><given-names>М. П.</given-names></name><name name-style="western" xml:lang="en"><surname>Ponomarenko</surname><given-names>M. P.</given-names></name></name-alternatives><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Абнизова</surname><given-names>И. И.</given-names></name><name name-style="western" xml:lang="en"><surname>Abnizova</surname><given-names>I. I.</given-names></name></name-alternatives><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Орлова</surname><given-names>Г. В.</given-names></name><name name-style="western" xml:lang="en"><surname>Orlova</surname><given-names>G. V.</given-names></name></name-alternatives><xref ref-type="aff" rid="aff-3"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Чадаева</surname><given-names>И. В.</given-names></name><name name-style="western" xml:lang="en"><surname>Chadaeva</surname><given-names>I. V.</given-names></name></name-alternatives><xref ref-type="aff" rid="aff-3"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Орлов</surname><given-names>Ю. Л.</given-names></name><name name-style="western" xml:lang="en"><surname>Orlov</surname><given-names>Y. L.</given-names></name></name-alternatives><email xlink:type="simple">orlov@bionet.nsc.ru</email><xref ref-type="aff" rid="aff-4"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru">Федеральное государственное бюджетное научное учреждение «Федеральный исследовательский центр Институт цитологии и генетики&#13;
Сибирского отделения Российской академии наук», Новосибирск, Россия&#13;
&#13;
Федеральное государственное автономное образовательное учреждение высшего образования «Новосибирский национальный исследовательский государственный университет», Новосибирск, Россия<country>Россия</country></aff><aff xml:lang="en">Institute of Cytology and Genetics SB RA S, Novosibirsk, Russia&#13;
&#13;
Novosibirsk State University, Novosibirsk, Russia<country>Russian Federation</country></aff></aff-alternatives><aff-alternatives id="aff-2"><aff xml:lang="ru">Центр Сенгера, Кембридж, Великобритания<country>Россия</country></aff><aff xml:lang="en">Sanger Center, Cambridge, UK<country>Russian Federation</country></aff></aff-alternatives><aff-alternatives id="aff-3"><aff xml:lang="ru">Федеральное государственное бюджетное научное учреждение «Федеральный исследовательский центр Институт цитологии и генетики&#13;
Сибирского отделения Российской академии наук», Новосибирск, Россия<country>Россия</country></aff><aff xml:lang="en">Institute of Cytology and Genetics SB RA S, Novosibirsk, Russia<country>Russian Federation</country></aff></aff-alternatives><aff-alternatives id="aff-4"><aff xml:lang="ru">Федеральное государственное бюджетное научное учреждение «Федеральный исследовательский центр Институт цитологии и генетики Сибирского отделения Российской академии наук», Новосибирск, Россия&#13;
&#13;
Федеральное государственное автономное образовательное учреждение высшего образования «Новосибирский национальный исследовательский государственный университет», Новосибирск, Россия<country>Россия</country></aff><aff xml:lang="en">Institute of Cytology and Genetics SB RA S, Novosibirsk, Russia&#13;
&#13;
Novosibirsk State University, Novosibirsk, Russia<country>Russian Federation</country></aff></aff-alternatives><pub-date pub-type="collection"><year>2015</year></pub-date><pub-date pub-type="epub"><day>02</day><month>01</month><year>2016</year></pub-date><volume>19</volume><issue>6</issue><fpage>668</fpage><lpage>674</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Сафронова Н.С., Пономаренко М.П., Абнизова И.И., Орлова Г.В., Чадаева И.В., Орлов Ю.Л., 2016</copyright-statement><copyright-year>2016</copyright-year><copyright-holder xml:lang="ru">Сафронова Н.С., Пономаренко М.П., Абнизова И.И., Орлова Г.В., Чадаева И.В., Орлов Ю.Л.</copyright-holder><copyright-holder xml:lang="en">Safronova N.S., Ponomarenko M.P., Abnizova I.I., Orlova G.V., Chadaeva I.V., Orlov Y.L.</copyright-holder><license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://vavilov.elpub.ru/jour/article/view/483">https://vavilov.elpub.ru/jour/article/view/483</self-uri><abstract><p>Исследование зависимости частоты возникновения мутаций в геноме человека выполнено на примере набора документированных однонуклеотидных полиморфизмов (ОНП) из проекта «1 000 геномов». Рассмотрены задачи разработки новых компьютерных методов статистического анализа генетических текстов на основе оценок сложности последовательности символов. Показано применение профилей сложности в скользящем окне к анализу сайтов, содержащих однонуклеотидные полиморфизмы в геноме человека. Установлено локальное понижение сложности текста в районе ОНП. На основе анализа профилей сложности в участках, содержащих ОНП, показано, что фланкирующие повторы мономеров определяют пониженную контекстную сложность сайтов однонуклеотидных полиморфизмов в геноме человека. Эффект локального понижения уровня сложности текста последовательностей фланкирующих сайты ОНП подтвержден для данных о полиморфизмах в геномах крысы и мыши. Определены различия в контекстной организации для кодирующих и регуляторных последовательностей, которые отражаются в сложности текста нуклеотидных последовательностей, содержащих ОНП. Изменения в частоте точковых мутаций были ранее показаны для последовательностей, содержащих микросателлиты. С использованием более общего математического аппарата и более полных данных в работе показана насыщенность политрактами и простыми повторяющимися последовательностями локального геномного окружения участков, содержащих ОНП. Определены олигонуклеотиды с повышенной частотой встречаемости в геномном окружении ОНП у человека, показана их связь с политрактами. Присутствие политрактов может свидетельствовать о большей вероятности разрыва двойной цепи ДНК в этой точке, приводящей к повышению частоты замен нуклеотидов. Полученные оценки были определены при помощи разработанного ранее комплекса компьютерных программ, который кроме оценки сложности фазированных выборок позволяет эффективно определять частотный спектр олигонуклеотидов фиксированной длины, производить сравнение частот олигонуклеотидов в выборках большого объема.</p></abstract><trans-abstract xml:lang="en"><p>We have investigated a mutation frequency within the human genome for the set of known single nucleotide polymorphisms (SNPs) from the “1000 genomes” project. We have developed and applied novel statistical computational methods to analyze genetic text based on its complexity. A complexity profiling in a sliding window is applied to the sites containing single nucleotide polymorphisms within the human genome. A local decrease in text complexity level in SNP-containing sites has been shown. Analysis of the complexity profiles for SNPcontaining sites shows that flanking monomer repeats define a lower context complexity of sites containing SNPs within the human genome. An effect of local decrease in text complexity in SNP-containing sites is confirmed by analysis of polymorphisms in the rat and mouse genomes. We have found context differences between coding and regulatory sequences. These differences reflect a complexity of SNP-containing loci. The changes in point mutation frequency were shown previously for microsatellite containing sequences. Using enhanced mathematical tools and larger data sets this work shows enrichment of polytracks and simple sequence repeats in local genome surroundings of SNP containing sites. We have found high-frequency oligonucleotides within genomic regions containing SNPs. Such oligonucleotides are related to nucleotide polytracks. The presence of poly-A tracks might be associated with an increased probability of double helix DNA breaks around mutable loci and following fixation of nucleotide changes. The complexity estimates were computed using a previously developed program tool. This tool allows for both (i) complexity estimation of phased samples, and (ii) rapid and effective identification of the frequency spectrum of oligonucleotides with fixed lengths, and a comparison of oligonucleotide frequencies in different samples</p></trans-abstract><kwd-group xml:lang="ru"><kwd>ОНП</kwd><kwd>геном</kwd><kwd>нуклеотидные последовательности</kwd><kwd>повторы</kwd><kwd>энтропия</kwd><kwd>мутации</kwd></kwd-group><kwd-group xml:lang="en"><kwd>SNP</kwd><kwd>genome</kwd><kwd>nucleotide sequences</kwd><kwd>repeats</kwd><kwd>entropy</kwd><kwd>mutations</kwd></kwd-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Игнатьева Е.В., Подколодная О.А., Орлов Ю.Л., Васильев Г.В., Колчанов Н.А. Регуляторная геномика – экспериментально-компьютерные подходы. Генетика. 2015;51(4):409-429.</mixed-citation><mixed-citation xml:lang="en">Babenko V.N., Kosarev P.S., Vishnevsky O.V., Levitsky V.G., Basin V. V., Frolov A.S.  Investigating extended regulatory regions of genomic DNA sequences. Bioinformatics.  1999;15(7/8):644-653.DOI 10.1093/bioinformatics/15.7.644</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Кулакова Е.В., Спицина А.М., Орлова Н.Г., Дергилев А.И., Свичкарев А.В., Сафронова Н.С., Черных И.Г., Орлов Ю.Л. Программы анализа геномных данных секвенирования, полученных на основе технологий ChIP-seq, ChIA-PET и Hi-C. Программные системы: теория и приложения. 2015;6(2):129-148.</mixed-citation><mixed-citation xml:lang="en">Babenko V.N., Matvienko V.F., Safronova N.S. Implication of transposons distribution  on chromatin state and genome architecture in human. J. Biomol. Struct. Dyn.  2015;33(1):10-11. DOI 10.1080/07391102.2015.1032559</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Орлов Ю.Л. Анализ регуляторных геномных последовательностей с помощью компьютерных методов оценок сложности генетических текстов: Дис. … канд. биол. наук. Новосибирск, 2004.</mixed-citation><mixed-citation xml:lang="en">Chuzhanova N.A., Krawczak M., Thomas N., Nemytikova L.A., Gusev V.D., Cooper D.N.  The evolution of the vertebrate beta-globin gene promoter. Evolution. 2002;56(2):224-232.</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Орлов Ю.Л., Брагин А.О., Медведева И.В., Гунбин И.В., Деменков П.С., Вишневский О.В., Левицкий В.Г., Ощепков В.Г., Подколодный Н.Л., Афонников Д.А., Гроссе И., Колчанов Н.А. ICGenomics: программный комплекс анализа символьных последовательностей геномики. Вавиловский журнал генетики и селекции. 2012;16(4/1):732-741.</mixed-citation><mixed-citation xml:lang="en">Goh W.S., Orlov Y., Li J., Clarke N.D. Blurring of high-resolution data shows that  the effect of intrinsic nucleosome occupancy on transcription factor binding is  mostly regional, not local. PLoS Comput. Biol. 2010;6(1):e1000649. DOI 10.1371/journal.pcbi.1000649</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Орлов Ю.Л., Левицкий В.Г., Смирнова О.Г., Подколодная О.А., Хлебодарова Т.М., Колчанов Н.А. Статистический анализ последовательностей ДНК, содержащих сайты формирования нуклеосом. Биофизика. 2006;51(4):608-614.</mixed-citation><mixed-citation xml:lang="en">Gusev V.D., Nemytikova L.A., Chuzhanova N.A. On the complexity measures of genetic  sequences. Bioinformatics. 1999;15(12):994- 999. DOI 10.1093/bioinformatics/15.12.994</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Пономаренко П.М., Савинкова Л.К., Драчкова И.А., Лысова М.В., Аршинова Т.В., Пономаренко М.П., Колчанов Н.А. Пошаговая модель связывания TBP/TATA-бокс позволяет предсказать наследственное заболевание человека по точечному полиморфизму. Докл. РАН. 2008;419(6):828-832.</mixed-citation><mixed-citation xml:lang="en">Ignatieva E.V., Podkolodnaya O.A., Orlov Y.L., Vasiliev G.V., Kolchanov  N.A. Regulatory genomics: Combined experimental and computational approaches. Genetika = Genetics. 2015;51(4):409-429.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Поляновский О.Л., Лебеденко Е.Н., Деев С.М. ERBB-онкогены – мишени моноклональных антител. Биохимия. 2012;77(3): 289-311.</mixed-citation><mixed-citation xml:lang="en">International HapMap 3 Consortium, Altshuler D.M., Gibbs R.A., Peltonen L.,  Dermitzakis E., Schaffner S.F., Yu F., Peltonen L., Dermitzakis E., Bonnen P.E.,  Altshuler D.M., Gibbs R.A., de Bakker P. I., Deloukas P., Gabriel S.B., Gwilliam R.,  Hunt S., Inouye M., Jia X., Palotie A., Parkin M., Whittaker P., Yu F., Chang K.,  Hawes A., Lewis L.R., Ren Y., Wheeler D., Gibbs R.A., Muzny D.M., Barnes C.,  Darvishi K., Hurles M., Korn J.M., Kristiansson K., Lee C., Mc Carrol S.A., Nemesh  J., Dermitzakis E., Keinan A., Montgomery S. B., Pollack S., Price A.L., Soranzo N.,  Bonnen P.E., Gibbs R. A., Gonzaga-Jauregui C., Keinan A., Price A.L., Yu F., Anttila V., Brodeur W., Daly M.J., Leslie S., McVean G., Moutsianas L., Nguyen H., Schaffner  S.F., Zhang Q., Ghori M.J., McGinnis R., McLaren W., Pollack S., Price A.L.,  Schaffner S.F., Takeuchi F., Grossman S. R., Shlyakhter I., Hostetter E.B., Sabeti  P.C., Adebamowo C.A., Foster D.R., Licinio J., Manca M.C., Marshall P.A., Matsuda  I., Ngare D., Wang V.O., Reddy D., Rotimi C.N., Royal C. D., Sharp R.R., Zeng C.,  Brooks L.D., McEwen J.E. Integrating common and rare genetic variation in diverse  human populations. Nature. 2010;467(7311):52-58. DOI 10.1038/nature09298</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Савинкова Л.К., Пономаренко М.П., Пономаренко П.М., Драчкова И.А., Лысова М.В., Аршинова Т.В., Колчанов Н.А. Полиморфизмы ТАТА-боксов промоторов генов человека и ассоциированные с ними наследственные патологии. Биохимия. 2009; 74(2):149-163.</mixed-citation><mixed-citation xml:lang="en">Karlin S., Ost F., Blaisdell B.T. Patterns in DNA and amino-acid sequences and their  statistical significance. Mathematical methods for DNA sequences. Ed. M.S. Waterman. Boca Raton: CRC Press, 1989.</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Спицина А.М., Орлов Ю.Л., Подколодная Н.Н., Свичкарев А.В., Дергилев А.И., Чен М., Кучин Н.В., Черных И.Г., Глинский Б.М. Суперкомпьютерный анализ геномных и транскриптомных данных, полученных с помощью технологий высокопроизводительного секвенирования ДНК. Программные системы: теория и приложения. 2015;6:1(23):157-174.</mixed-citation><mixed-citation xml:lang="en">Kulakova E.V., Spitsina A.M., Orlova N.G., Dergilev A.I., Svichkarev A.V., Safronova  N.S., Chernykh I.G., Orlov Y.L. Program analysis of genomic sequence data, obtained  through technologies ChIP-seq, ChIA-PET and Hi-C. Programmnye sistemy: teoriya i  prilozheniya = Program Systems: Theory and Applications. 2015;6(2): 129-148.</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Babenko V.N., Kosarev P.S., Vishnevsky O.V., Levitsky V.G., Basin V. V., Frolov A.S. Investigating extended regulatory regions of genomic DNA sequences. Bioinformatics. 1999;15(7/8):644-653. DOI 10.1093/bioinformatics/15.7.644</mixed-citation><mixed-citation xml:lang="en">Lenz C., Haerty W., Golding G.B. Increased substitution rates surrounding low- complexity regions within primate proteins. Genome Biol. Evol. 2014;6(3):655-665. DOI 10.1093/gbe/evu042</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Babenko V.N., Matvienko V.F., Safronova N.S. Implication of transposons distribution on chromatin state and genome architecture in human. J. Biomol. Struct. Dyn. 2015;33(1):10-11. DOI 10.1080/07391102.2015.1032559</mixed-citation><mixed-citation xml:lang="en">Medvedeva S.A., Panchin A.Y., Alexeevski A.V., Spirin S.A., Panchin Y.V. Comparative  Analysis of Context-Dependent Mutagenesis Using Human and Mouse Models. BioMed Res.  Intern. 2013;2013. Article ID 989410</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Chuzhanova N.A., Krawczak M., Thomas N., Nemytikova L.A., Gusev V.D., Cooper D.N. The evolution of the vertebrate beta-globin gene promoter. Evolution. 2002;56(2):224-232.</mixed-citation><mixed-citation xml:lang="en">Orlov Y.L. Analiz regulyatornykh genomnykh posledovatelnostey s pomoshchyu  kompyuternykh metodov otsenok slozhnosti geneticheskikh tekstov. Diss. kand. biol.  nauk. [Analysis of regulatory genome sequences using computer methods of genetic  text complexity. Cand. biol. sci. diss.]. Novosibirsk, 2004.</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">Goh W.S., Orlov Y., Li J., Clarke N.D. Blurring of high-resolution data shows that the effect of intrinsic nucleosome occupancy on transcription factor binding is mostly regional, not local. PLoS Comput. Biol. 2010;6(1):e1000649. DOI 10.1371/journal.pcbi.1000649</mixed-citation><mixed-citation xml:lang="en">Orlov Y.L., Bragin A.O., Medvedeva I.V., Podkolodnaia O.A., Khlebodarova T.M.,  Kolchanov N.A. ICGenomics: Software for analysis of symbol genomics sequences.  Vavilovskii Zhurnal Genetiki i Selektsii = Vavilov Journal of Genetics and Breeding. 2012;16(4/1):732-741.</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Gusev V.D., Nemytikova L.A., Chuzhanova N.A. On the complexity measures of genetic sequences. Bioinformatics. 1999;15(12):994-999. DOI 10.1093/bioinformatics/15.12.994</mixed-citation><mixed-citation xml:lang="en">Orlov Y.L., Filippov V.P., Potapov V.N., Kolchanov N.A. Construction of stochastic  context trees for genetic texts. In Silico Biology. 2002;2(3):257-262.</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">International HapMap 3 Consortium, Altshuler D.M., Gibbs R.A., Peltonen L., Dermitzakis E., Schaffner S.F.,Yu.F., Peltonen L., Dermitzakis E., Bonnen P.E., Altshuler D.M., Gibbs R.A., de BakkerP. I., Deloukas P., Gabriel S.B., Gwilliam R., Hunt S., Inouye M., Jia X., Palotie A., Parkin M., Whittaker P., Yu F., Chang K., Hawes A., Lewis L.R., Ren Y., Wheeler D., Gibbs R.A., Muzny D.M., Barnes C., Darvishi K., Hurles M., Korn J.M., Kristiansson K., Lee C., Mc Carrol S.A., Nemesh J., Dermitzakis E., Keinan A., Montgomery S. B., Pollack S., Price A.L., Soranzo N., Bonnen P.E., Gibbs R. A., Gonzaga-Jauregui C., Keinan A., Price A.L., Yu F., Anttila V., Brodeur W., Daly M.J., Leslie S., McVean G., Moutsianas L., Nguyen H., Schaffner S.F., Zhang Q., Ghori M.J., McGinnis R., McLaren W., Pollack S., Price A.L., Schaffner S.F., Takeuchi F., Grossman S. R., Shlyakhter I., Hostetter E.B., Sabeti P.C., Adebamowo C.A., Foster M.W., Gordon D.R., Licinio J., Manca M.C., Marshall P.A., Matsuda I., Ngare D., Wang V.O., Reddy D., Rotimi C.N., Royal C. D., Sharp R.R., Zeng C., Brooks L.D., McEwen J.E. Integrating common and rare genetic variation in diverse human populations. Nature. 2010;467(7311):52-58. DOI 10.1038/nature09298</mixed-citation><mixed-citation xml:lang="en">Orlov Y.L., Levitskii V.G., Smirnova O.G., Gunbin K.V., Demenkov P.S., Vishnevsky  O.V., Levitsky V.G., Oshchepkov D.Y., Podkolodnyi N.L., Afonnikov D.A., Grosse I.,  Kolchanov N.A. Statistical analysis of nucleosome formation sites. Biofizika =  Biophisics (Moscow). 2006;51(4):608-614.</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">Karlin S., Ost F., Blaisdell B.T. Patterns in DNA and amino-acid sequences and their statistical significance. Mathematical methods for DNA sequences. Ed. M.S. Waterman. Boca Raton: CRC Press, 1989.</mixed-citation><mixed-citation xml:lang="en">Orlov Y.L., Potapov V.N. Complexity: an internet resource for analysis of DNA  sequence complexity. Nucl. Acids. Res. 2004;32(Web Server issue):W628-633. DOI 10.1093/nar/gkh466</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Lenz C., Haerty W., Golding G.B. Increased substitution rates surrounding low-complexity regions within primate proteins. Genome Biol. Evol. 2014;6(3):655-665. DOI 10.1093/gbe/evu042</mixed-citation><mixed-citation xml:lang="en">Orlov Y.L., Te Boekhorst R., Abnizova I.I. Statistical measures of the structure of  genomic sequences: entropy, complexity, and position information. J. Bioinform.  Comput. Biol. 2006;4:523-536. DOI 10.1142/S0219720006001801</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Medvedeva S.A., Panchin A.Y., Alexeevski A.V., Spirin S.A., Panchin Y.V. Comparative Analysis of Context-Dependent Mutagenesis Using Human and Mouse Models. BioMed Res. Intern. 2013;2013.Article ID 989410</mixed-citation><mixed-citation xml:lang="en">Polanovski O.L., Lebedenko E.N., Deyev S.M. ERBB oncogenes as targets for monoclonal  antibodies. Biokhimia = Biochemistry (Moscow). 2012;77(3):289-311.</mixed-citation></citation-alternatives></ref><ref id="cit19"><label>19</label><citation-alternatives><mixed-citation xml:lang="ru">Orlov Y.L., Filippov V.P., Potapov V.N., Kolchanov N.A. Construction of stochastic context trees for genetic texts. In Silico Biology. 2002;2(3):257-262.</mixed-citation><mixed-citation xml:lang="en">Ponomarenko J.V., Orlova G.V., Merkulova T.I., Gorshkova E.V., Fokin O.N., Vasiliev  G.V., Frolov A.S., Ponomarenko M.P. rSNP_ Guide: an integrated database-tools system  for studying SNPs and site-directed mutations in transcription factor binding sites.  Hum. Mutat. 2002;20(4):239-248. DOI 10.1002/humu.10116</mixed-citation></citation-alternatives></ref><ref id="cit20"><label>20</label><citation-alternatives><mixed-citation xml:lang="ru">Orlov Y.L., Potapov V.N. Complexity: an internet resource for analysis of DNA sequence complexity. Nucl. Acids. Res. 2004;32(Web Server issue):W628-633. DOI 10.1093/nar/gkh466</mixed-citation><mixed-citation xml:lang="en">Ponomarenko M., Mironova V., Gunbin K., Savinkova L. Hogness Box. Brenner’s  Encyclopedia of Genetics. 2nd edn. Eds S. Maloy, K. Hughe. San Diego: Acad. Press,  Elsevier Inc. 2013а;3:491-494. DOI 10.1016/B978-0-12-374984-0.00720-8</mixed-citation></citation-alternatives></ref><ref id="cit21"><label>21</label><citation-alternatives><mixed-citation xml:lang="ru">Orlov Y.L., Te Boekhorst R., Abnizova I.I. Statistical measures of the structure of genomic sequences: entropy, complexity, and position information. J. Bioinform. Comput. Biol. 2006;4:523-536. DOI 10.1142/S0219720006001801</mixed-citation><mixed-citation xml:lang="en">Ponomarenko M., Savinkova L., Kolchanov N. Initiation Factors. Brenner’s  Encyclopedia of Genetics, 2nd ed. Eds S. Maloy, K.Hughes. San Diego: Acad. Press,  Elsevier Inc. 2013b;4:83-85. DOI 10.1016/B978-0-12-374984-0.00798-1</mixed-citation></citation-alternatives></ref><ref id="cit22"><label>22</label><citation-alternatives><mixed-citation xml:lang="ru">Ponomarenko J.V., Orlova G.V., Merkulova T.I., Gorshkova E.V., Fokin O.N., Vasiliev G.V., 1996;266:554-571. DOI</mixed-citation><mixed-citation xml:lang="en">Ponomarenko P.M., Savinkova L.K., Drachkova I.A., Lysova M.V., Arshinova T.V.,  Ponomarenko M.P., Kolchanov N.A. A step-by-step model of TBP/TATA box binding allows  predicting human hereditary diseases by single nucleotide polymorphism. Doklady RAN  = Proceedings of the Russian Academy of Sciences. 2008;419(6):828-832.</mixed-citation></citation-alternatives></ref><ref id="cit23"><label>23</label><citation-alternatives><mixed-citation xml:lang="ru">1016/S0076-6879(96)66035-2</mixed-citation><mixed-citation xml:lang="en">Putta P., Orlov Y.L., Podkolodnyy N.L., Mitra C.K. Relatively conserved common short  sequences in transcription factor binding sites and miRNA. Vavilov Journal of  Genetics and Breeding. 2011;15(4): 750-756.</mixed-citation></citation-alternatives></ref><ref id="cit24"><label>24</label><citation-alternatives><mixed-citation xml:lang="ru">Rogozin I.B., Kolchanov N.A. Somatic hypermutagenesis in immunoglobulin genes. II.  Influence of neighbouring base sequences on mutagenesis. Biochim. Biophys. Acta.  1992;1171(1):11-18. DOI 10.1016/0167-4781(92)90134-L</mixed-citation><mixed-citation xml:lang="en">Rogozin I.B., Kolchanov N.A. Somatic hypermutagenesis in immunoglobulin genes. II.  Influence of neighbouring base sequences on mutagenesis. Biochim. Biophys. Acta.  1992;1171(1):11-18. DOI 10.1016/0167-4781(92)90134-L</mixed-citation></citation-alternatives></ref><ref id="cit25"><label>25</label><citation-alternatives><mixed-citation xml:lang="ru">Rogozin I.B., Pavlov Y.I., Bebenek K., Matsuda T., Kunkel T.A. Somatic mutation  hotspots correlate with DNA polymerase eta error spectrum. Nat. Immunol. 2001;2(6):530-536. DOI 10.1038/88732</mixed-citation><mixed-citation xml:lang="en">Rogozin I.B., Pavlov Y.I., Bebenek K., Matsuda T., Kunkel T.A. Somatic mutation  hotspots correlate with DNA polymerase eta error spectrum. Nat. Immunol. 2001;2(6):530-536. DOI 10.1038/88732</mixed-citation></citation-alternatives></ref><ref id="cit26"><label>26</label><citation-alternatives><mixed-citation xml:lang="ru">Rogozin I.B., Solovyov V.V., Kolchanov N.A. Somatic hypermutagenesis in  immunoglobulin genes. I. Correlation between somatic mutations and repeats. Somatic  mutation properties and clonal selection. Biochim. Biophys. Acta. 1991;1089(2):175- 182. DOI10.1016/0167-4781(91)90005-7</mixed-citation><mixed-citation xml:lang="en">Rogozin I.B., Solovyov V.V., Kolchanov N.A. Somatic hypermutagenesis in  immunoglobulin genes. I. Correlation between somatic mutations and repeats. Somatic  mutation properties and clonal selection. Biochim. Biophys. Acta. 1991;1089(2):175- 182. DOI10.1016/0167-4781(91)90005-7</mixed-citation></citation-alternatives></ref><ref id="cit27"><label>27</label><citation-alternatives><mixed-citation xml:lang="ru">Safronova N.S., Babenko V.N., Orlov Y.L. 117 Analysis of SNP containing sites in  human genome using text complexity estimates. J. Biomol. Struct. Dyn. 2015;33(1):73- 74. DOI 10.1080/07391102.2015.1032750</mixed-citation><mixed-citation xml:lang="en">Safronova N.S., Babenko V.N., Orlov Y.L. 117 Analysis of SNP containing sites in  human genome using text complexity estimates. J. Biomol. Struct. Dyn. 2015;33(1):73- 74. DOI 10.1080/07391102.2015.1032750</mixed-citation></citation-alternatives></ref><ref id="cit28"><label>28</label><citation-alternatives><mixed-citation xml:lang="ru">Savinkova L.K., Ponomarenko M.P., Ponomarenko P.M., Drachkova I. A., Lysova M.V., Arshinova T.V., Kolchanov N.A. TATA box polymorphisms in human gene promoters and  associated hereditary pathologies. Biokhimiya = Biochemistry (Moscow). 2009;74(2): 149-163.</mixed-citation><mixed-citation xml:lang="en">Savinkova L.K., Ponomarenko M.P., Ponomarenko P.M., Drachkova I. A., Lysova M.V., Arshinova T.V., Kolchanov N.A. TATA box polymorphisms in human gene promoters and  associated hereditary pathologies. Biokhimiya = Biochemistry (Moscow). 2009;74(2): 149-163.</mixed-citation></citation-alternatives></ref><ref id="cit29"><label>29</label><citation-alternatives><mixed-citation xml:lang="ru">Siddle K.J., Goodship J.A., Keavney B., Santibanez-Koref M.F. Bases adjacent to  mononucleotide repeats show an increased single nucleotide polymorphism frequency in  the human genome. Bioinformatics. 2011;27(7):895-898. DOI 10.1093/bioinformatics/btr067</mixed-citation><mixed-citation xml:lang="en">Siddle K.J., Goodship J.A., Keavney B., Santibanez-Koref M.F. Bases adjacent to  mononucleotide repeats show an increased single nucleotide polymorphism frequency in  the human genome. Bioinformatics. 2011;27(7):895-898. DOI 10.1093/bioinformatics/btr067</mixed-citation></citation-alternatives></ref><ref id="cit30"><label>30</label><citation-alternatives><mixed-citation xml:lang="ru">Sidore C., Busonero F., Maschio A., Porcu E., Naitza S., Zoledziewska M., Mulas A., Pistis G., Steri M., Danjou F., Kwong A., Ortega Del Vecchyo V.D., Chiang C.W.,  Bragg-Gresham J., Pitzalis M., NagarajaR., Tarrier B., Brennan C., Uzzau S.,  Fuchsberger C., Atzeni R., Reinier F., Berutti R., Huang J., Timpson N.J., Toniolo  D., Gasparini P., Malerba G., Dedoussis G., Zeggini E., Soranzo N., Jones C., Lyons  R., Angius A., Kang H.M., Novembre J., Sanna S., Schlessinger D., Cucca F., Abecasis  G.R. Genome sequencing elucidates  Sardinian genetic architecture and augments  association analyses for lipid and blood inflammatory markers. Nat. Genet. 2015; 47(11):1272-1281. DOI 10.1038/ng.3368</mixed-citation><mixed-citation xml:lang="en">Sidore C., Busonero F., Maschio A., Porcu E., Naitza S., Zoledziewska M., Mulas A., Pistis G., Steri M., Danjou F., Kwong A., Ortega Del Vecchyo V.D., Chiang C.W.,  Bragg-Gresham J., Pitzalis M., NagarajaR., Tarrier B., Brennan C., Uzzau S.,  Fuchsberger C., Atzeni R., Reinier F., Berutti R., Huang J., Timpson N.J., Toniolo  D., Gasparini P., Malerba G., Dedoussis G., Zeggini E., Soranzo N., Jones C., Lyons  R., Angius A., Kang H.M., Novembre J., Sanna S., Schlessinger D., Cucca F., Abecasis  G.R. Genome sequencing elucidates  Sardinian genetic architecture and augments  association analyses for lipid and blood inflammatory markers. Nat. Genet. 2015; 47(11):1272-1281. DOI 10.1038/ng.3368</mixed-citation></citation-alternatives></ref><ref id="cit31"><label>31</label><citation-alternatives><mixed-citation xml:lang="ru">Spitsina A.M., Orlov Y.L., Podkolodnaya N.N., Svichkarev A.V., Dergilev A.I., Chen  M., Kuchin N.V., Chernykh I.G., Glinskij B.M. Supercomputer analysis of genomics and  transcriptomics data revealed by high-throughput DNA sequencing. Programmnye  sistemy: teoriya i prilozheniya = Program Systems: Theory and Applications. 2015;6:1(23):157-174.</mixed-citation><mixed-citation xml:lang="en">Spitsina A.M., Orlov Y.L., Podkolodnaya N.N., Svichkarev A.V., Dergilev A.I., Chen  M., Kuchin N.V., Chernykh I.G., Glinskij B.M. Supercomputer analysis of genomics and  transcriptomics data revealed by high-throughput DNA sequencing. Programmnye  sistemy: teoriya i prilozheniya = Program Systems: Theory and Applications. 2015;6:1(23):157-174.</mixed-citation></citation-alternatives></ref><ref id="cit32"><label>32</label><citation-alternatives><mixed-citation xml:lang="ru">Trifonov E.N., Volkovich Z., Frenkel Z.M. Multiple levels of meaning in DNA  sequences, and one more. Ann. N.Y. Acad Sci. 2012;1267: 35-38. DOI 10.1111/j.1749- 6632.2012.06589.x</mixed-citation><mixed-citation xml:lang="en">Trifonov E.N., Volkovich Z., Frenkel Z.M. Multiple levels of meaning in DNA  sequences, and one more. Ann. N.Y. Acad Sci. 2012;1267: 35-38. DOI 10.1111/j.1749- 6632.2012.06589.x</mixed-citation></citation-alternatives></ref><ref id="cit33"><label>33</label><citation-alternatives><mixed-citation xml:lang="ru">Troyanskaya O.G., Arbell O., Koren Y. Landau G.M., Bolshoy A. Sequence complexity  profiles of prokaryotic genomic sequences: a fast algorithm for calculating  linguistic complexity. Bioinformatics. 2002;18(5):679-688. DOI 10.1093/bioinformatics/18.5.679</mixed-citation><mixed-citation xml:lang="en">Troyanskaya O.G., Arbell O., Koren Y. Landau G.M., Bolshoy A. Sequence complexity  profiles of prokaryotic genomic sequences: a fast algorithm for calculating  linguistic complexity. Bioinformatics. 2002;18(5):679-688. DOI 10.1093/bioinformatics/18.5.679</mixed-citation></citation-alternatives></ref><ref id="cit34"><label>34</label><citation-alternatives><mixed-citation xml:lang="ru">UK10K Consortium; Walter K., Min J.L., Huang J. Crooks L., Memari Y., McCarthy S.,  Perry J.R., Xu C., Futema M., Lawson D., Iotchkova V., Schiffels S., Hendricks A.E.,  Danecek P., Li R., FloydJ., Wain L.V., Barroso I., Humphries S.E., Hurles M.E.,  Zeggini E., Barrett J.C., Plagnol V., Richards J.B., Greenwood C.M., TimpsonN.J.,  Durbin R., Soranzo N. The UK10K project identifies rare variants in health and  disease. Nature. 2015;526:82-90. DOI 10.1038/nature14962</mixed-citation><mixed-citation xml:lang="en">UK10K Consortium; Walter K., Min J.L., Huang J. Crooks L., Memari Y., McCarthy S.,  Perry J.R., Xu C., Futema M., Lawson D., Iotchkova V., Schiffels S., Hendricks A.E.,  Danecek P., Li R., FloydJ., Wain L.V., Barroso I., Humphries S.E., Hurles M.E.,  Zeggini E., Barrett J.C., Plagnol V., Richards J.B., Greenwood C.M., TimpsonN.J.,  Durbin R., Soranzo N. The UK10K project identifies rare variants in health and  disease. Nature. 2015;526:82-90. DOI 10.1038/nature14962</mixed-citation></citation-alternatives></ref><ref id="cit35"><label>35</label><citation-alternatives><mixed-citation xml:lang="ru">Vowles E.J., Amos W. Evidence for widespread convergent evolution around human  microsatellites. PLoS Biol. 2004;2:E199. DOI 10.1371/journal.pbio.0020199</mixed-citation><mixed-citation xml:lang="en">Vowles E.J., Amos W. Evidence for widespread convergent evolution around human  microsatellites. PLoS Biol. 2004;2:E199. DOI 10.1371/journal.pbio.0020199</mixed-citation></citation-alternatives></ref><ref id="cit36"><label>36</label><citation-alternatives><mixed-citation xml:lang="ru">Wootton J.C., Federhen S. Analysis of compositionally biased regions in sequence  databases. Methods Enzymol. 1996;266:554-571. DOI 10.1016/S0076-6879(96)66035-2</mixed-citation><mixed-citation xml:lang="en">Wootton J.C., Federhen S. Analysis of compositionally biased regions in sequence  databases. Methods Enzymol. 1996;266:554-571. DOI 10.1016/S0076-6879(96)66035-2</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
