<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">vavilov</journal-id><journal-title-group><journal-title xml:lang="ru">Вавиловский журнал генетики и селекции</journal-title><trans-title-group xml:lang="en"><trans-title>Vavilov Journal of Genetics and Breeding</trans-title></trans-title-group></journal-title-group><issn pub-type="epub">2500-3259</issn><publisher><publisher-name>Institute of Cytology and Genetics of Siberian Branch of the RAS</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.18699/VJ21.002</article-id><article-id custom-type="elpub" pub-id-type="custom">vavilov-2911</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>БИОИНФОРМАТИКА И СИСТЕМНАЯ КОМПЬЮТЕРНАЯ БИОЛОГИЯ</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>BIOINFORMATICS AND COMPUTATIONAL SYSTEMS BIOLOGY</subject></subj-group></article-categories><title-group><article-title>Метод поиска структурной гетерогенности сайтов связывания транскрипционных факторов с использованием альтернативных de novo моделей на примере FOXA2</article-title><trans-title-group xml:lang="en"><trans-title>Application of alternative de novo motif recognition models for analysis of structural heterogeneity of transcription factor binding sites: a case study of FOXA2 binding sites</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-5174-6609</contrib-id><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Цуканов</surname><given-names>А. В.</given-names></name><name name-style="western" xml:lang="en"><surname>Tsukanov</surname><given-names>A. V.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Новосибирск</p></bio><bio xml:lang="en"><p>Novosibirsk</p></bio><email xlink:type="simple">tsukanov@bionet.nsc.ru</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-4905-3088</contrib-id><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Левицкий</surname><given-names>В. Г.</given-names></name><name name-style="western" xml:lang="en"><surname>Levitsky</surname><given-names>V. G.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Новосибирск</p></bio><bio xml:lang="en"><p>Novosibirsk</p></bio><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Меркулова</surname><given-names>Т. И.</given-names></name><name name-style="western" xml:lang="en"><surname>Merkulova</surname><given-names>T. I.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Новосибирск</p></bio><bio xml:lang="en"><p>Novosibirsk</p></bio><xref ref-type="aff" rid="aff-3"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru">Федеральный исследовательский центр Институт цитологии и генетики Сибирского отделения Российской академии наук; Новосибирский национальный исследовательский государственный университет<country>Россия</country></aff><aff xml:lang="en">Institute of Cytology and Genetics of Siberian Branch of the Russian Academy of Sciences<country>Russian Federation</country></aff></aff-alternatives><aff-alternatives id="aff-2"><aff xml:lang="ru">Федеральный исследовательский центр Институт цитологии и генетики Сибирского отделения Российской академии наук; Новосибирский национальный исследовательский государственный университет<country>Россия</country></aff><aff xml:lang="en">Institute of Cytology and Genetics of Siberian Branch of the Russian Academy of Sciences; Novosibirsk State University<country>Russian Federation</country></aff></aff-alternatives><aff-alternatives id="aff-3"><aff xml:lang="ru">Федеральный исследовательский центр Институт цитологии и генетики Сибирского отделения Российской академии наук<country>Россия</country></aff><aff xml:lang="en">Institute of Cytology and Genetics of Siberian Branch of the Russian Academy of Sciences; Novosibirsk State University<country>Russian Federation</country></aff></aff-alternatives><pub-date pub-type="collection"><year>2021</year></pub-date><pub-date pub-type="epub"><day>15</day><month>03</month><year>2021</year></pub-date><volume>25</volume><issue>1</issue><fpage>7</fpage><lpage>17</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Цуканов А.В., Левицкий В.Г., Меркулова Т.И., 2021</copyright-statement><copyright-year>2021</copyright-year><copyright-holder xml:lang="ru">Цуканов А.В., Левицкий В.Г., Меркулова Т.И.</copyright-holder><copyright-holder xml:lang="en">Tsukanov A.V., Levitsky V.G., Merkulova T.I.</copyright-holder><license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://vavilov.elpub.ru/jour/article/view/2911">https://vavilov.elpub.ru/jour/article/view/2911</self-uri><abstract><p>В настоящее время самой распространенной моделью поиска сайтов связывания транскрипционных факторов (ССТФ) в пиках ChIP-seq является позиционная весовая матрица (position weight matrix, PWM). Но эта модель не учитывает взаимосвязи между частотами встреч нуклеотидов в разных позициях ССТФ, поэтому не способна гарантировать определение всех возможных структурных вариантов ССТФ. На сегодняшний день уже предложены альтернативные модели, например BaMM и InMoDe, которые учитывают такие взаимосвязи. Однако применение этих моделей обычно сводилось к сравнению их точности с точностью традиционной модели PWM, тогда как анализ совместной встречаемости и относительного расположения ССТФ разных моделей в пиках не производился. В нашей работе мы предлагаем конвейер программ MultiDeNA, позволяющий сочетать разные модели de novo поиска ССТФ для выявления структурной гетерогенности ССТФ в данных ChIP-seq. Разработанный конвейер включает этапы построения моделей на основе заданного набора пиков, оценки точности распознавания моделей с помощью перекрестных тестов, выбора порогов, сканирования пиков ChIP-seq и классификацию пиков по результатам сканирования. С применением конвейера нами проведен анализ 22 экспериментов ChIP-seq для ТФ FOXA2 с помощью четырех моделей: PWM, diPWM, BaMM и InMoDe. Показано, что сочетание моделей позволяет существенно увеличить общее количество распознанных пиков (на 26.3 %) по сравнению с применением только PWM; при этом основной вклад в распознавание внесла модель BaMM. В значительной доле пиков разные модели распознают совпадающие ССТФ; однако для моделей PWM, diPWM, BaMM и InMoDe медианы доли пиков, которые содержали ССТФ только одной модели, составили 1.08, 0.49, 4.15 и 1.73 % соответственно. Таким образом, совокупность ССТФ FOXA2 не описывается полностью только одной моделью, что свидетельствует о наличии структурной гетерогенности в ССТФ у FOXA2.</p></abstract><trans-abstract xml:lang="en"><p>The most popular model for the search of ChIP-seq data for transcription factor binding sites (TFBS) is the positional weight matrix (PWM). However, this model does not take into account dependencies between nucleotide occurrences in different site positions. Currently, two recently proposed models, BaMM and InMoDe, can do as much. However, application of these models was usually limited only to comparing their recognition accuracies with that of PWMs, while none of the analyses of the co-prediction and relative positioning of hits of different models in peaks has yet been performed. To close this gap, we propose the pipeline called MultiDeNA. This pipeline includes stages of model training, assessing their recognition accuracy, scanning ChIP-seq peaks and their classif ication based on scan results. We applied our pipeline to 22 ChIP-seq datasets of TF FOXA2 and considered PWM, dinucleotide PWM (diPWM), BaMM and InMoDe models. The combination of these four models allowed a signif icant increase in the fraction of recognized peaks compared to that for the sole PWM model: the increase was 26.3 %. The BaMM model provided the main contribution to the recognition of sites. Although the major fraction of predicted peaks contained TFBS of different models with coincided positions, the medians of the fraction of peaks containing the predictions of sole models were 1.08, 0.49, 4.15 and 1.73 % for PWM, diPWM, BaMM and InMoDe, respectively. Thus, FOXA2 BSs were not fully described by only a sole model, which indicates theirs heterogeneity. We assume that the BaMM model is the most successful in describing the structure of the FOXA2 BS in ChIP-seq datasets under study.</p></trans-abstract><kwd-group xml:lang="ru"><kwd>сайты связывания транскрипционных факторов (ССТФ)</kwd><kwd>de novo поиск ССТФ</kwd><kwd>СhIP-seq</kwd><kwd>гетерогенность ССТФ</kwd></kwd-group><kwd-group xml:lang="en"><kwd>transcription factor binding sites (TFBS)</kwd><kwd>TFBS de novo searching</kwd><kwd>ChIP-seq</kwd><kwd>heterogeneity of TFBS</kwd></kwd-group><funding-group xml:lang="en"><funding-statement>This work was supported by the Russian Foundation for Basic Research No. 18-29-13040 and the state budget project No. 0259-2019-0008.</funding-statement></funding-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Bailey T.L., Elkan C. Fitting a mixture model by expectation maximization to discover motifs in biopolymers. In: Proc. Int. Conf. Intell. Syst. Mol. Biol. 1994;2:28-36. DOI citeulike-article-id:878292. PMID 7584402.</mixed-citation><mixed-citation xml:lang="en">Bailey T.L., Elkan C. Fitting a mixture model by expectation maximization to discover motifs in biopolymers. In: Proc. Int. Conf. Intell. Syst. Mol. Biol. 1994;2:28-36. DOI citeulike-article-id:878292. PMID 7584402.</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Benos P.V., Bulyk M.L., Stormo G.D. Additivity in protein-DNA interactions: how good an approximation is it? Nucleic Acids Res. 2002;30(20):4442-4451. DOI 10.1093/nar/gkf578.</mixed-citation><mixed-citation xml:lang="en">Benos P.V., Bulyk M.L., Stormo G.D. Additivity in protein-DNA interactions: how good an approximation is it? Nucleic Acids Res. 2002;30(20):4442-4451. DOI 10.1093/nar/gkf578.</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Bi Y., Kim H., Gupta R., Davuluri R.V. Tree-based position weight matrix approach to model transcription factor binding site profiles. PLoS One. 2011;6(9):e24210. DOI 10.1371/journal.pone.0024210.</mixed-citation><mixed-citation xml:lang="en">Bi Y., Kim H., Gupta R., Davuluri R.V. Tree-based position weight matrix approach to model transcription factor binding site profiles. PLoS One. 2011;6(9):e24210. DOI 10.1371/journal.pone.0024210.</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Bulyk M.L., Johnson P.L.F., Church G.M. Nucleotides of transcription factor binding sites exert interdependent effects on the binding affinities of transcription factors. Nucleic Acids Res. 2002;30(5):1255-1261. DOI 10.1093/nar/30.5.1255.</mixed-citation><mixed-citation xml:lang="en">Bulyk M.L., Johnson P.L.F., Church G.M. Nucleotides of transcription factor binding sites exert interdependent effects on the binding affinities of transcription factors. Nucleic Acids Res. 2002;30(5):1255-1261. DOI 10.1093/nar/30.5.1255.</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Chen X., Wei H., Li J., Liang X., Dai S., Jiang L., Guo M., Qu L., Chen Z., Chen L., Chen Y. Structural basis for DNA recognition by FOXC2. Nucleic Acids Res. 2019;47(7):3752-3764. DOI 10.1093/nar/gkz077.</mixed-citation><mixed-citation xml:lang="en">Chen X., Wei H., Li J., Liang X., Dai S., Jiang L., Guo M., Qu L., Chen Z., Chen L., Chen Y. Structural basis for DNA recognition by FOXC2. Nucleic Acids Res. 2019;47(7):3752-3764. DOI 10.1093/nar/gkz077.</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Chèneby J., Ménétrier Z., Mestdagh M., Rosnet T., Douida A., Rhalloussi W., Bergon A., Lopez F., Ballester B. ReMap 2020: a database of regulatory regions from an integrative analysis of Human and Arabidopsis DNA-binding sequencing experiments. Nucleic Acids Res. 2020;48(D1):D180-D188. DOI 10.1093/nar/gkz945.</mixed-citation><mixed-citation xml:lang="en">Chèneby J., Ménétrier Z., Mestdagh M., Rosnet T., Douida A., Rhalloussi W., Bergon A., Lopez F., Ballester B. ReMap 2020: a database of regulatory regions from an integrative analysis of Human and Arabidopsis DNA-binding sequencing experiments. Nucleic Acids Res. 2020;48(D1):D180-D188. DOI 10.1093/nar/gkz945.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Eggeling R., Grosse I., Grau J. InMoDe: tools for learning and visualizing intra-motif dependencies of DNA binding sites. Bioinformatics. 2017;33(4):580-582. DOI 10.1093/bioinformatics/btw689.</mixed-citation><mixed-citation xml:lang="en">Eggeling R., Grosse I., Grau J. InMoDe: tools for learning and visualizing intra-motif dependencies of DNA binding sites. Bioinformatics. 2017;33(4):580-582. DOI 10.1093/bioinformatics/btw689.</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Farnham P.J. Insights from genomic profiling of transcription factors. Nat. Rev. Genet. 2009;10(9):605-616. DOI 10.1038/nrg2636.</mixed-citation><mixed-citation xml:lang="en">Farnham P.J. Insights from genomic profiling of transcription factors. Nat. Rev. Genet. 2009;10(9):605-616. DOI 10.1038/nrg2636.</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Furey T.S. ChIP-seq and beyond: new and improved methodologies to detect and characterize protein-DNA interactions. Nat. Rev. Genet. 2012;13(12):840-852. DOI 10.1038/nrg3306.</mixed-citation><mixed-citation xml:lang="en">Furey T.S. ChIP-seq and beyond: new and improved methodologies to detect and characterize protein-DNA interactions. Nat. Rev. Genet. 2012;13(12):840-852. DOI 10.1038/nrg3306.</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Gheorghe M., Sandve G.K., Khan A., Chèneby J., Ballester B., Mathelier A. A map of direct TF-DNA interactions in the human genome. Nucleic Acids Res. 2019;47(4):e21. DOI 10.1093/nar/gky1210.</mixed-citation><mixed-citation xml:lang="en">Gheorghe M., Sandve G.K., Khan A., Chèneby J., Ballester B., Mathelier A. A map of direct TF-DNA interactions in the human genome. Nucleic Acids Res. 2019;47(4):e21. DOI 10.1093/nar/gky1210.</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Gupta S., Stamatoyannopoulos J.A., Bailey T.L., Noble W.S. Quantifying similarity between motifs. Genome Biol. 2007;8(2):R24. DOI 10.1186/gb-2007-8-2-r24.</mixed-citation><mixed-citation xml:lang="en">Gupta S., Stamatoyannopoulos J.A., Bailey T.L., Noble W.S. Quantifying similarity between motifs. Genome Biol. 2007;8(2):R24. DOI 10.1186/gb-2007-8-2-r24.</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Heinz S., Benner C., Spann N., Bertolino E., Lin Y.C., Laslo P., Cheng J.X., Murre C., Singh H., Glass C.K. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol. Cell. 2010;38(4):576-589. DOI 10.1016/j.molcel.2010.05.004.</mixed-citation><mixed-citation xml:lang="en">Heinz S., Benner C., Spann N., Bertolino E., Lin Y.C., Laslo P., Cheng J.X., Murre C., Singh H., Glass C.K. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol. Cell. 2010;38(4):576-589. DOI 10.1016/j.molcel.2010.05.004.</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">Ignatieva E.V., Oshchepkov D.Y., Levitsky V.G., Vasiliev G.V., Klimova N.V., Busygina T.V., Merkulova T.I. Comparison of the results of search for the SF-1 binding sites in the promoter regions of the steroidogenic genes, using the SiteGA and SITECON methods. In: Proc. Fourth Int. Conf. Bioinform. Genome Regul. Struct. (BGRS). 2004;1:69-72.</mixed-citation><mixed-citation xml:lang="en">Ignatieva E.V., Oshchepkov D.Y., Levitsky V.G., Vasiliev G.V., Klimova N.V., Busygina T.V., Merkulova T.I. Comparison of the results of search for the SF-1 binding sites in the promoter regions of the steroidogenic genes, using the SiteGA and SITECON methods. In: Proc. Fourth Int. Conf. Bioinform. Genome Regul. Struct. (BGRS). 2004;1:69-72.</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Iwafuchi-Doi M. The mechanistic basis for chromatin regulation by pioneer transcription factors. WIREs Syst. Biol. Med. 2019;11(1): e1427. DOI 10.1002/wsbm.1427.</mixed-citation><mixed-citation xml:lang="en">Iwafuchi-Doi M. The mechanistic basis for chromatin regulation by pioneer transcription factors. WIREs Syst. Biol. Med. 2019;11(1): e1427. DOI 10.1002/wsbm.1427.</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">Keilwagen J., Grau J. Varying levels of complexity in transcription factor binding motifs. Nucleic Acids Res. 2015;43(18):e119. DOI 10.1093/nar/gkv577.</mixed-citation><mixed-citation xml:lang="en">Keilwagen J., Grau J. Varying levels of complexity in transcription factor binding motifs. Nucleic Acids Res. 2015;43(18):e119. DOI 10.1093/nar/gkv577.</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">Kiesel A., Roth C., Ge W., Wess M., Meier M., Söding J. The BaMM web server for de-novo motif discovery and regulatory sequence analysis. Nucleic Acids Res. 2018;46(W1):W215-W220. DOI 10.1093/nar/gky431.</mixed-citation><mixed-citation xml:lang="en">Kiesel A., Roth C., Ge W., Wess M., Meier M., Söding J. The BaMM web server for de-novo motif discovery and regulatory sequence analysis. Nucleic Acids Res. 2018;46(W1):W215-W220. DOI 10.1093/nar/gky431.</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Kulakovskiy I.V., Boeva V.A., Favorov A.V., Makeev V.J. Deep and wide digging for binding motifs in ChIP-Seq data. Bioinformatics. 2010;26(20):2622-2623. DOI 10.1093/bioinformatics/btq488.</mixed-citation><mixed-citation xml:lang="en">Kulakovskiy I.V., Boeva V.A., Favorov A.V., Makeev V.J. Deep and wide digging for binding motifs in ChIP-Seq data. Bioinformatics. 2010;26(20):2622-2623. DOI 10.1093/bioinformatics/btq488.</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Kulakovskiy I., Levitsky V., Oshchepkov D., Bryzgalov L., Vorontsov I., Makeev V. From binding motifs in ChIP-Seq data to improved models of transcription factor binding sites. J. Bioinform. Comput. Biol. 2013;11(01):1340004. DOI 10.1142/S0219720013400040.</mixed-citation><mixed-citation xml:lang="en">Kulakovskiy I., Levitsky V., Oshchepkov D., Bryzgalov L., Vorontsov I., Makeev V. From binding motifs in ChIP-Seq data to improved models of transcription factor binding sites. J. Bioinform. Comput. Biol. 2013;11(01):1340004. DOI 10.1142/S0219720013400040.</mixed-citation></citation-alternatives></ref><ref id="cit19"><label>19</label><citation-alternatives><mixed-citation xml:lang="ru">Kulakovskiy I.V., Makeev V.J. Discovery of DNA motifs recognized by transcription factors through integration of different experimental sources. Biophysics (Oxf.). 2009;54(6):667-674. DOI 10.1134/S0006350909060013.</mixed-citation><mixed-citation xml:lang="en">Kulakovskiy I.V., Makeev V.J. Discovery of DNA motifs recognized by transcription factors through integration of different experimental sources. Biophysics (Oxf.). 2009;54(6):667-674. DOI 10.1134/S0006350909060013.</mixed-citation></citation-alternatives></ref><ref id="cit20"><label>20</label><citation-alternatives><mixed-citation xml:lang="ru">Kulakovskiy I.V., Vorontsov I.E., Yevshin I.S., Sharipov R.N., Fedorova A.D., Rumynskiy E.I., Medvedeva Y.A., Magana-Mora A., Bajic V.B., Papatsenko D.A., Kolpakov F.A., Makeev V.J. HOCOMOCO: towards a complete collection of transcription factor binding models for human and mouse via large-scale ChIP-Seq analysis. Nucleic Acids Res. 2018;46(D1):D252-D259. DOI 10.1093/nar/gkx1106.</mixed-citation><mixed-citation xml:lang="en">Kulakovskiy I.V., Vorontsov I.E., Yevshin I.S., Sharipov R.N., Fedorova A.D., Rumynskiy E.I., Medvedeva Y.A., Magana-Mora A., Bajic V.B., Papatsenko D.A., Kolpakov F.A., Makeev V.J. HOCOMOCO: towards a complete collection of transcription factor binding models for human and mouse via large-scale ChIP-Seq analysis. Nucleic Acids Res. 2018;46(D1):D252-D259. DOI 10.1093/nar/gkx1106.</mixed-citation></citation-alternatives></ref><ref id="cit21"><label>21</label><citation-alternatives><mixed-citation xml:lang="ru">Lambert S.A., Jolma A., Campitelli L.F., Das P.K., Yin Y., Albu M., Chen X., Taipale J., Hughes T.R., Weirauch M.T. The human transcription factors. Cell. 2018;172(4):650-665. DOI 10.1016/j.cell.2018.01.029.</mixed-citation><mixed-citation xml:lang="en">Lambert S.A., Jolma A., Campitelli L.F., Das P.K., Yin Y., Albu M., Chen X., Taipale J., Hughes T.R., Weirauch M.T. The human transcription factors. Cell. 2018;172(4):650-665. DOI 10.1016/j.cell.2018.01.029.</mixed-citation></citation-alternatives></ref><ref id="cit22"><label>22</label><citation-alternatives><mixed-citation xml:lang="ru">Latchman D.S. Transcription factors: bound to activate or repress. Trends Biochem. Sci. 2001;26(4):211-213. DOI 10.1016/S0968-0004(01)01812-6.</mixed-citation><mixed-citation xml:lang="en">Latchman D.S. Transcription factors: bound to activate or repress. Trends Biochem. Sci. 2001;26(4):211-213. DOI 10.1016/S0968-0004(01)01812-6.</mixed-citation></citation-alternatives></ref><ref id="cit23"><label>23</label><citation-alternatives><mixed-citation xml:lang="ru">Levitsky V.G., Ignatieva E.V., Ananko E.A., Turnaev I.I., Merkulova T.I., Kolchanov N.A., Hodgman T.C.T. Effective transcription factor binding site prediction using a combination of optimization, a genetic algorithm and discriminant analysis to capture distant interactions. BMC Bioinform. 2007;8(1):1-20. DOI 10.1186/1471-2105-8-481.</mixed-citation><mixed-citation xml:lang="en">Levitsky V.G., Ignatieva E.V., Ananko E.A., Turnaev I.I., Merkulova T.I., Kolchanov N.A., Hodgman T.C.T. Effective transcription factor binding site prediction using a combination of optimization, a genetic algorithm and discriminant analysis to capture distant interactions. BMC Bioinform. 2007;8(1):1-20. DOI 10.1186/1471-2105-8-481.</mixed-citation></citation-alternatives></ref><ref id="cit24"><label>24</label><citation-alternatives><mixed-citation xml:lang="ru">Levitsky V.G., Kulakovskiy I.V., Ershov N.I., Oshchepkov D.Y., Makeev V.J., Hodgman T.C., Merkulova T.I. Application of experimentally verified transcription factor binding sites models for computational analysis of ChIP-Seq data. BMC Genom. 2014;15(1):80. DOI 10.1186/1471-2164-15-80.</mixed-citation><mixed-citation xml:lang="en">Levitsky V.G., Kulakovskiy I.V., Ershov N.I., Oshchepkov D.Y., Makeev V.J., Hodgman T.C., Merkulova T.I. Application of experimentally verified transcription factor binding sites models for computational analysis of ChIP-Seq data. BMC Genom. 2014;15(1):80. DOI 10.1186/1471-2164-15-80.</mixed-citation></citation-alternatives></ref><ref id="cit25"><label>25</label><citation-alternatives><mixed-citation xml:lang="ru">Levitsky V.G., Oshchepkov D.Y., Klimova N.V., Ignatieva E.V., Vasiliev G.V., Merkulov V.M., Merkulova T.I. Hidden heterogeneity of transcription factor binding sites: a case study of SF-1. Comput. Biol. Chem. 2016;64:19-32. DOI 10.1016/j.compbiolchem.2016.04.008.</mixed-citation><mixed-citation xml:lang="en">Levitsky V.G., Oshchepkov D.Y., Klimova N.V., Ignatieva E.V., Vasiliev G.V., Merkulov V.M., Merkulova T.I. Hidden heterogeneity of transcription factor binding sites: a case study of SF-1. Comput. Biol. Chem. 2016;64:19-32. DOI 10.1016/j.compbiolchem.2016.04.008.</mixed-citation></citation-alternatives></ref><ref id="cit26"><label>26</label><citation-alternatives><mixed-citation xml:lang="ru">Lloyd S.M., Bao X. Pinpointing the genomic localizations of chromatin-associated proteins: the yesterday, today, and tomorrow of ChIP-seq. Curr. Protoc. Cell Biol. 2019;84(1):e89. DOI 10.1002/cpcb.89.</mixed-citation><mixed-citation xml:lang="en">Lloyd S.M., Bao X. Pinpointing the genomic localizations of chromatin-associated proteins: the yesterday, today, and tomorrow of ChIP-seq. Curr. Protoc. Cell Biol. 2019;84(1):e89. DOI 10.1002/cpcb.89.</mixed-citation></citation-alternatives></ref><ref id="cit27"><label>27</label><citation-alternatives><mixed-citation xml:lang="ru">Machanick P., Bailey T.L. MEME-ChIP: motif analysis of large DNA datasets. Bioinformatics. 2011;27(12):1696-1697. DOI 10.1093/bioinformatics/btr189.</mixed-citation><mixed-citation xml:lang="en">Machanick P., Bailey T.L. MEME-ChIP: motif analysis of large DNA datasets. Bioinformatics. 2011;27(12):1696-1697. DOI 10.1093/bioinformatics/btr189.</mixed-citation></citation-alternatives></ref><ref id="cit28"><label>28</label><citation-alternatives><mixed-citation xml:lang="ru">Mathelier A., Wasserman W.W. The next generation of transcription factor binding site prediction. PLoS Comput. Biol. 2013;9(9): e1003214. DOI 10.1371/journal.pcbi.1003214.</mixed-citation><mixed-citation xml:lang="en">Mathelier A., Wasserman W.W. The next generation of transcription factor binding site prediction. PLoS Comput. Biol. 2013;9(9): e1003214. DOI 10.1371/journal.pcbi.1003214.</mixed-citation></citation-alternatives></ref><ref id="cit29"><label>29</label><citation-alternatives><mixed-citation xml:lang="ru">McClish D.K. Analyzing a portion of the ROC curve. Med. Decis. Mak. 1989;9(3):190-195. DOI 10.1177/0272989X8900900307.</mixed-citation><mixed-citation xml:lang="en">McClish D.K. Analyzing a portion of the ROC curve. Med. Decis. Mak. 1989;9(3):190-195. DOI 10.1177/0272989X8900900307.</mixed-citation></citation-alternatives></ref><ref id="cit30"><label>30</label><citation-alternatives><mixed-citation xml:lang="ru">Mitra S., Biswas A., Narlikar L. DIVERSITY in binding, regulation, and evolution revealed from high-throughput ChIP. PLoS Comput. Biol. 2018;14(4):1-20. DOI 10.1371/journal.pcbi.1006090.</mixed-citation><mixed-citation xml:lang="en">Mitra S., Biswas A., Narlikar L. DIVERSITY in binding, regulation, and evolution revealed from high-throughput ChIP. PLoS Comput. Biol. 2018;14(4):1-20. DOI 10.1371/journal.pcbi.1006090.</mixed-citation></citation-alternatives></ref><ref id="cit31"><label>31</label><citation-alternatives><mixed-citation xml:lang="ru">Morgunova E., Taipale J. Structural perspective of cooperative transcription factor binding. Curr. Opin. Struct. Biol. 2017;47:1-8. DOI 10.1016/j.sbi.2017.03.006.</mixed-citation><mixed-citation xml:lang="en">Morgunova E., Taipale J. Structural perspective of cooperative transcription factor binding. Curr. Opin. Struct. Biol. 2017;47:1-8. DOI 10.1016/j.sbi.2017.03.006.</mixed-citation></citation-alternatives></ref><ref id="cit32"><label>32</label><citation-alternatives><mixed-citation xml:lang="ru">Morgunova E., Yin Y., Das P.K., Jolma A., Zhu F., Popov A., Xu Y., Nilsson L., Taipale J. Two distinct DNA sequences recognized by transcription factors represent enthalpy and entropy optima. eLife. 2018;7:1-21. DOI 10.7554/eLife.32963.</mixed-citation><mixed-citation xml:lang="en">Morgunova E., Yin Y., Das P.K., Jolma A., Zhu F., Popov A., Xu Y., Nilsson L., Taipale J. Two distinct DNA sequences recognized by transcription factors represent enthalpy and entropy optima. eLife. 2018;7:1-21. DOI 10.7554/eLife.32963.</mixed-citation></citation-alternatives></ref><ref id="cit33"><label>33</label><citation-alternatives><mixed-citation xml:lang="ru">Park P.J. ChIP-seq: advantages and challenges of a maturing technology. Nat. Rev. Genet. 2009;10(10):669-680. DOI 10.1038/nrg2641.</mixed-citation><mixed-citation xml:lang="en">Park P.J. ChIP-seq: advantages and challenges of a maturing technology. Nat. Rev. Genet. 2009;10(10):669-680. DOI 10.1038/nrg2641.</mixed-citation></citation-alternatives></ref><ref id="cit34"><label>34</label><citation-alternatives><mixed-citation xml:lang="ru">Quinlan A.R., Hall I.M. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010;26(6):841-842. DOI 10.1093/bioinformatics/btq033.</mixed-citation><mixed-citation xml:lang="en">Quinlan A.R., Hall I.M. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010;26(6):841-842. DOI 10.1093/bioinformatics/btq033.</mixed-citation></citation-alternatives></ref><ref id="cit35"><label>35</label><citation-alternatives><mixed-citation xml:lang="ru">Rogers J.M., Waters C.T., Seegar T.C.M., Jarrett S.M., Hallworth A.N., Blacklow S.C., Bulyk M.L. Bispecific forkhead transcription factor FoxN3 recognizes two distinct motifs with different DNA shapes. Mol. Cell. 2019;74(2):245-253.DOI 10.1016/j.molcel.2019.01.019.</mixed-citation><mixed-citation xml:lang="en">Rogers J.M., Waters C.T., Seegar T.C.M., Jarrett S.M., Hallworth A.N., Blacklow S.C., Bulyk M.L. Bispecific forkhead transcription factor FoxN3 recognizes two distinct motifs with different DNA shapes. Mol. Cell. 2019;74(2):245-253.DOI 10.1016/j.molcel.2019.01.019.</mixed-citation></citation-alternatives></ref><ref id="cit36"><label>36</label><citation-alternatives><mixed-citation xml:lang="ru">Samee M.A.H., Bruneau B.G., Pollard K.S. A de novo shape motif discovery algorithm reveals preferences of transcription factors for DNA shape beyond sequence motifs. Cell Syst. 2019;8(1):27-42. DOI 10.1016/j.cels.2018.12.001.</mixed-citation><mixed-citation xml:lang="en">Samee M.A.H., Bruneau B.G., Pollard K.S. A de novo shape motif discovery algorithm reveals preferences of transcription factors for DNA shape beyond sequence motifs. Cell Syst. 2019;8(1):27-42. DOI 10.1016/j.cels.2018.12.001.</mixed-citation></citation-alternatives></ref><ref id="cit37"><label>37</label><citation-alternatives><mixed-citation xml:lang="ru">Siebert M., Söding J. Bayesian Markov models consistently outperform PWMs at predicting motifs in nucleotide sequences. Nucleic Acids Res. 2016;44(13):6055-6069. DOI 10.1093/nar/gkw521.</mixed-citation><mixed-citation xml:lang="en">Siebert M., Söding J. Bayesian Markov models consistently outperform PWMs at predicting motifs in nucleotide sequences. Nucleic Acids Res. 2016;44(13):6055-6069. DOI 10.1093/nar/gkw521.</mixed-citation></citation-alternatives></ref><ref id="cit38"><label>38</label><citation-alternatives><mixed-citation xml:lang="ru">Srivastava D., Mahony S. Sequence and chromatin determinants of transcription factor binding and the establishment of cell type-specific binding patterns. Biochim. Biophys. Acta – Gene Regul. Mech. 2020;1863(6):e194443. DOI 10.1016/j.bbagrm.2019.194443.</mixed-citation><mixed-citation xml:lang="en">Srivastava D., Mahony S. Sequence and chromatin determinants of transcription factor binding and the establishment of cell type-specific binding patterns. Biochim. Biophys. Acta – Gene Regul. Mech. 2020;1863(6):e194443. DOI 10.1016/j.bbagrm.2019.194443.</mixed-citation></citation-alternatives></ref><ref id="cit39"><label>39</label><citation-alternatives><mixed-citation xml:lang="ru">Stormo G.D. DNA binding sites: representation and discovery. Bioinformatics. 2000;16(1):16-23. DOI 10.1093/bioinformatics/16.1.16.</mixed-citation><mixed-citation xml:lang="en">Stormo G.D. DNA binding sites: representation and discovery. Bioinformatics. 2000;16(1):16-23. DOI 10.1093/bioinformatics/16.1.16.</mixed-citation></citation-alternatives></ref><ref id="cit40"><label>40</label><citation-alternatives><mixed-citation xml:lang="ru">Wallerman O., Motallebipour M., Enroth S., Patra K., Bysani M.S.R., Komorowski J., Wadelius C. Molecular interactions between HNF4a, FOXA2 and GABP identified at regulatory DNA elements through ChIP-sequencing. Nucleic Acids Res. 2009;37(22):7498-7508. DOI 10.1093/nar/gkp823.</mixed-citation><mixed-citation xml:lang="en">Wallerman O., Motallebipour M., Enroth S., Patra K., Bysani M.S.R., Komorowski J., Wadelius C. Molecular interactions between HNF4a, FOXA2 and GABP identified at regulatory DNA elements through ChIP-sequencing. Nucleic Acids Res. 2009;37(22):7498-7508. DOI 10.1093/nar/gkp823.</mixed-citation></citation-alternatives></ref><ref id="cit41"><label>41</label><citation-alternatives><mixed-citation xml:lang="ru">Wederell E.D., Bilenky M., Cullum R., Thiessen N., Dagpinar M., Delaney A., Varhol R., Zhao Y., Zeng T., Bernier B., Ingham M., Hirst M., Robertson G., Marra M.A., Jones S., Hoodless P.A. Global analysis of in vivo Foxa2-binding sites in mouse adult liver using massively parallel sequencing. Nucleic Acids Res. 2008;36(14): 4549-4564. DOI 10.1093/nar/gkn382.</mixed-citation><mixed-citation xml:lang="en">Wederell E.D., Bilenky M., Cullum R., Thiessen N., Dagpinar M., Delaney A., Varhol R., Zhao Y., Zeng T., Bernier B., Ingham M., Hirst M., Robertson G., Marra M.A., Jones S., Hoodless P.A. Global analysis of in vivo Foxa2-binding sites in mouse adult liver using massively parallel sequencing. Nucleic Acids Res. 2008;36(14): 4549-4564. DOI 10.1093/nar/gkn382.</mixed-citation></citation-alternatives></ref><ref id="cit42"><label>42</label><citation-alternatives><mixed-citation xml:lang="ru">Worsley Hunt R., Wasserman W.W. Non-targeted transcription factors motifs are a systemic component of ChIP-seq datasets. Genome Biol. 2014;15(7):412. DOI 10.1186/s13059-014-0412-4.</mixed-citation><mixed-citation xml:lang="en">Worsley Hunt R., Wasserman W.W. Non-targeted transcription factors motifs are a systemic component of ChIP-seq datasets. Genome Biol. 2014;15(7):412. DOI 10.1186/s13059-014-0412-4.</mixed-citation></citation-alternatives></ref><ref id="cit43"><label>43</label><citation-alternatives><mixed-citation xml:lang="ru">Yang L., Zhou T., Dror I., Mathelier A., Wasserman W.W., Gordân R., Rohs R. TFBSshape: a motif database for DNA shape features of transcription factor binding sites. Nucleic Acids Res. 2014;42(D1): D148-D155. DOI 10.1093/nar/gkt1087.</mixed-citation><mixed-citation xml:lang="en">Yang L., Zhou T., Dror I., Mathelier A., Wasserman W.W., Gordân R., Rohs R. TFBSshape: a motif database for DNA shape features of transcription factor binding sites. Nucleic Acids Res. 2014;42(D1): D148-D155. DOI 10.1093/nar/gkt1087.</mixed-citation></citation-alternatives></ref><ref id="cit44"><label>44</label><citation-alternatives><mixed-citation xml:lang="ru">Zhang M.O., Marr T.G. A weight array method for splicing signal analysis. Bioinformatics. 1993;9(5):499-509. DOI 10.1093/bioinformatics/9.5.499.</mixed-citation><mixed-citation xml:lang="en">Zhang M.O., Marr T.G. A weight array method for splicing signal analysis. Bioinformatics. 1993;9(5):499-509. DOI 10.1093/bioinformatics/9.5.499.</mixed-citation></citation-alternatives></ref><ref id="cit45"><label>45</label><citation-alternatives><mixed-citation xml:lang="ru">Zhang Y., Liu T., Meyer C.A., Eeckhoute J., Johnson D.S., Bernstein B.E., Nusbaum C., Myers R.M., Brown M., Li W., Liu X.S. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008; 9(9):R137. DOI 10.1186/gb-2008-9-9-r137.</mixed-citation><mixed-citation xml:lang="en">Zhang Y., Liu T., Meyer C.A., Eeckhoute J., Johnson D.S., Bernstein B.E., Nusbaum C., Myers R.M., Brown M., Li W., Liu X.S. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008; 9(9):R137. DOI 10.1186/gb-2008-9-9-r137.</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
