<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">vavilov</journal-id><journal-title-group><journal-title xml:lang="ru">Вавиловский журнал генетики и селекции</journal-title><trans-title-group xml:lang="en"><trans-title>Vavilov Journal of Genetics and Breeding</trans-title></trans-title-group></journal-title-group><issn pub-type="epub">2500-3259</issn><publisher><publisher-name>Institute of Cytology and Genetics of Siberian Branch of the RAS</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.18699/VJ15.087</article-id><article-id custom-type="elpub" pub-id-type="custom">vavilov-482</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>Геномика и анализ полиморфизмов</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>Genomics and Polymorphism Analysis</subject></subj-group></article-categories><title-group><article-title>Использование графических ускорителей для выявления функциональных сигналов в регуляторных районах генов прокариот</article-title><trans-title-group xml:lang="en"><trans-title>The use of graphics accelerators to detect functional signals in the regulatory regions of prokaryotic genes</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Вишневский</surname><given-names>О. В.</given-names></name><name name-style="western" xml:lang="en"><surname>Vishnevsky</surname><given-names>O. V.</given-names></name></name-alternatives><email xlink:type="simple">oleg@bionet.nsc.ru</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Бочарников</surname><given-names>А. В.</given-names></name><name name-style="western" xml:lang="en"><surname>Bocharnikov</surname><given-names>A. V.</given-names></name></name-alternatives><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Романенко</surname><given-names>А. А.</given-names></name><name name-style="western" xml:lang="en"><surname>Romanenko</surname><given-names>A. A.</given-names></name></name-alternatives><xref ref-type="aff" rid="aff-2"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru">Федеральное государственное бюджетное научное учреждение «Федеральный исследовательский центр Институт цитологии и генетики Сибирского отделения Российской академии наук», Новосибирск, Россия&#13;
&#13;
Федеральное государственное автономное образовательное учреждение высшего образования «Новосибирский национальный исследовательский государственный университет», Новосибирск, Россия<country>Россия</country></aff><aff xml:lang="en">Institute of Cytology and Genetics SB RA S, Novosibirsk, Russia<country>Russian Federation</country></aff></aff-alternatives><aff-alternatives id="aff-2"><aff xml:lang="ru">Федеральное государственное бюджетное образовательное учреждение высшего профессионального образования "Новосибирский национальный исследовательский государственный университет" (НГУ)<country>Россия</country></aff><aff xml:lang="en">Novosibirsk State University, Novosibirsk, Russia<country>Russian Federation</country></aff></aff-alternatives><pub-date pub-type="collection"><year>2015</year></pub-date><pub-date pub-type="epub"><day>02</day><month>01</month><year>2016</year></pub-date><volume>19</volume><issue>6</issue><fpage>661</fpage><lpage>667</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Вишневский О.В., Бочарников А.В., Романенко А.А., 2016</copyright-statement><copyright-year>2016</copyright-year><copyright-holder xml:lang="ru">Вишневский О.В., Бочарников А.В., Романенко А.А.</copyright-holder><copyright-holder xml:lang="en">Vishnevsky O.V., Bocharnikov A.V., Romanenko A.A.</copyright-holder><license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://vavilov.elpub.ru/jour/article/view/482">https://vavilov.elpub.ru/jour/article/view/482</self-uri><abstract><p>Различные методы выявления значимых контекстных сигналов широко используются для поиска сайтов связывания транскрипционных факторов и выявления структурно-функциональной организации регуляторных районов генов. Такие методы не требуют ни предварительного выравнивания выборки анализируемых последовательностей, ни экспериментальной информации о точном расположении сайтов связывания транскрипционных факторов. Широкое распространение получили методы поиска контекстных сигналов, основанные на выявлении вырожденных олигонуклеотидных мотивов, записанных в 15- буквенном коде номенклатуры IUPAC (International Union of Pure and Applied Chemistry). Существенной сложностью использования вырожденных мотивов является их огромное разнообразие, что заставляет исследователей применять различные эвристические подходы, не гарантирующие нахождение наиболее значимого сигнала. Появление высокопроизводительных вычислительных систем, основанных на использовании графических ускорителей, сделало возможным применение точных полнопереб орных методов для выявления значимых мотивов. Нами разработана новая система выявления значимых вырожденных олигонуклеотидных мотивов заданной длины в регуляторных районах генов, основанная на использовании широко распространенных графических ускорителей и обеспечивающая поиск сигнала с наибольшей значимостью. Показана высокая эффективность использования графических ускорителей (GPU) в сравнении с расчетами на центральном процессоре (CPU). С использованием предложенного подхода проанализированы регуляторные районы генов B. subtilis, E. coli, H. pylori, M. gallisepticum, M. genitalium и M. pneumoniae. Для каждого вида прокариот были выявлены наборы вырожденных мотивов и проведена их классификация на основе сходства с сайтами связывания транскрипционных факторов E. coli.</p></abstract><trans-abstract xml:lang="en"><p>Various methods for identification of significant contextual signals are widely used to search for transcription factor binding sites and to identify the structural and functional organization of regulatory regions. These methods do not require any pre-alignment of the sample sequences analyzed or experimental information about the exact location of transcription factor binding sites. Methods of searching for contextual signals, based on the identification of degenerate oligonucleotide motives recorded in the 15-letter IUPAC code have become widespread. An essential problem with degenerate motifs is their great diversity, which makes the researchers apply heuristics which do not guarantee that the most significant signal will be found. The development of high-performance computing systems based on the use of graphics cards has made it possible to use the exact exhaustive methods to identify significant motifs. We have developed a new system for identifying significant degenerate oligonucleotide motifs of a given length in the regulatory regions based on the use of widespread graphics cards that provides a search for the signal with the greatest significance. High efficiency of the GPU compared with CPU was demonstrated. Using the proposed approach, we analyzed the regulatory regions of B. subtilis, E. coli, H. pylori, M. gallisepticum, M. genitalium and M. pneumoniae genes. Sets of degenerate motifs have been identified for each species of prokaryotes. They were classified on the basis of similarity with the transcription factor binding sites of E. coli.</p></trans-abstract><kwd-group xml:lang="ru"><kwd>вырожденный олигонуклеотидный мотив</kwd><kwd>регуляция транскрипции</kwd><kwd>регуляция трансляции</kwd><kwd>CUDA</kwd><kwd>графические ускорители</kwd></kwd-group><kwd-group xml:lang="en"><kwd>degenerated oligonucleotide motif</kwd><kwd>transcription regulation</kwd><kwd>translation regulation</kwd><kwd>CUDA</kwd><kwd>GPU.</kwd></kwd-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Baker Z.K., Prasanna V.K. An architecture for efficient hardware data mining using reconfigurable computing systems. 14th Annual IEEE Symp. on Field-Programmable Custom Computing Machines, 2006.</mixed-citation><mixed-citation xml:lang="en">Baker Z.K., Prasanna V.K. An architecture for efcient hardware data mining using  reconfigurable computing systems. 14th Annual IEEE Symp. on Field-Programmable  Custom Computing Machines, 2006.</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Benson D.A., Cavanaugh M., Clark K., Karsch-Mizrachi I., Lipman D. J., Ostell J., Sayers E.W. GenBank. Nucl. Acids Res. 2013;41(Database issue):D36-42.</mixed-citation><mixed-citation xml:lang="en">Benson D.A., Cavanaugh M., Clark K., Karsch-Mizrachi I., Lipman D. J., Ostell J.,  Sayers E.W. GenBank. Nucl. Acids Res. 2013;41(Database issue):D36-42.</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Elnitski L., Hardison R.C., Yang S., Kolbe D., Eswara P., O’Connor M. J., Schwartz S., Miller W. Chiaromonte F. Distinguishing regulatory DNA from neutral sites. Genome Res. 2003;13(1):64-72.</mixed-citation><mixed-citation xml:lang="en">Elnitski L., Hardison R.C., Yang S., Kolbe D., Eswara P., O’Connor M. J., Schwartz  S., Miller W. Chiaromonte F. Distinguishing regulatory DNA from neutral sites.  Genome Res. 2003;13(1):64-72.</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Fomin E.S., Alemasov N.A. Implementation of a non-bonded interaction calculation algorithm for the cell architecture. Lect. Notes Comput. Sci. 2009;5698:399-405.</mixed-citation><mixed-citation xml:lang="en">Fomin E.S., Alemasov N.A. Implementation of a non-bonded interaction calculation  algorithm for the cell architecture. Lect. Notes Comput. Sci. 2009;5698:399-405.</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Grundy W.N., Bailey T.L., Elkan C.P. ParaMEME: a parallel implementation and a web interface for a DNA and protein motif discovery tool. CABIOS. 1996;12:303-310.</mixed-citation><mixed-citation xml:lang="en">Grundy W.N., Bailey T.L., Elkan C.P. ParaMEME: a parallel implementation and a web  interface for a DNA and protein motif discovery tool. CABIOS. 1996;12:303-310.</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Hertz G.Z, Stormo G.D. Identifying DNA and protein patterns with statistically significant alignments of multiple sequences. Bioinformatics. 1999;15:563-577.</mixed-citation><mixed-citation xml:lang="en">Hertz G.Z, Stormo G.D. Identifying DNA and protein patterns with statistically significant alignments of multiple sequences. Bioinformatics. 1999;15:563-577.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Kolchanov N.A., Ignatieva E.V., Ananko E.A., Podkolodnaya O.A., Stepanenko I.L., Merkulova T.I., Pozdnyakov M.A., Podkolodny N. L., Naumochkin A.N., Romashchenko A.G. Transcription Regulatory Regions Database (TRRD): its status in 2002. Nucl. Acids Res. 2002;30:312-317.</mixed-citation><mixed-citation xml:lang="en">Kolchanov N.A., Ignatieva E.V., Ananko E.A., Podkolodnaya O.A., Stepanenko I.L.,  Merkulova T.I., Pozdnyakov M.A., Podkolodny N. L., Naumochkin A.N., Romashchenko  A.G. Transcription Regulatory Regions Database (TRRD): its status in 2002. Nucl.  Acids Res. 2002;30:312-317.</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Lawrence C.E., Altschul S.F., Boguski M.S., Liu J.S., Neuwald A.F., Wootton J.C. Detecting subtle sequence signals: a Gibbs sampling strategy for multiple alignment. Science. 1993;262:208-214.</mixed-citation><mixed-citation xml:lang="en">Lawrence C.E., Altschul S.F., Boguski M.S., Liu J.S., Neuwald A.F., Wootton J.C.  Detecting subtle sequence signals: a Gibbs sampling strategy for multiple alignment. Science. 1993;262:208-214.</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Manavski S.A., Valle G. CUDA compatible GPU cards as efficient hardware accelerators for Smith–Waterman sequence alignment. BMC Bioinformatics. 2008;26;9 Suppl 2:S10.</mixed-citation><mixed-citation xml:lang="en">Manavski S.A., Valle G. CUDA compatible GPU cards as efficient hardware accelerators  for Smith–Waterman sequence alignment. BMC Bioinformatics. 2008;26;9 Suppl 2:S10.</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Marsan L., Sagot M.F. Algorithms for extracting structured motifs using a suffix tree with an application to promoter and regulatory site consensus identification. J. Comput. Biol. 2000;7:345-362.</mixed-citation><mixed-citation xml:lang="en">Marsan L., Sagot M.F. Algorithms for extracting structured motifs using a suffix  tree with an application to promoter and regulatory site consensus identification. J. Comput. Biol. 2000;7:345-362.</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Matys V., Kel-Margoulis O.V., Fricke E., Liebich I., Land S., Barre-Dirrie A., Reuter I., Chekmenev D., Krull M., Hornischer K., Voss N., Stegmaier P., Lewicki-Potapov B., Saxel H., Kel A.E., Wingender E. TRANSFAC and its module TRANSCompel: transcriptional gene regulation in eukaryotes. Nucl. Acids Res. 2006;34:D108-10.</mixed-citation><mixed-citation xml:lang="en">Matys V., Kel-Margoulis O.V., Fricke E., Liebich I., Land S., Barre-Dirrie A.,  Reuter I., Chekmenev D., Krull M., Hornischer K., Voss N., Stegmaier P., Lewicki- Potapov B., Saxel H., Kel A.E., Wingender E. TRANSFAC and its module TRANSCompel:  transcriptional gene regulation in eukaryotes. Nucl. Acids Res. 2006;34:D108-10.</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Mrázek J., Gaynon L.H., Karlin S. Frequent oligonucleotide motifs in genomes of three streptococci. Nucl. Acids Res. 2002;19:4216-4221.</mixed-citation><mixed-citation xml:lang="en">Mrázek J., Gaynon L.H., Karlin S. Frequent oligonucleotide motifs in genomes of  three streptococci. Nucl. Acids Res. 2002;19:4216-4221.</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">NVIDIA CUDA programming guide 3.2. [http://developer.download.nvidia.com/compute/cuda/3_2/toolkit/docs/CUDA_C_Programming_Guide.pdf]</mixed-citation><mixed-citation xml:lang="en">NVIDIA CUDA programming guide 3.2. [http://developer.download.nvidia.com/compute/cuda/3_2/toolkit/docs/CUDA_C_Programming_Guide.pdf]</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Osada R., Zaslavsky E., Singh. M. Comparative analysis of methods for representing and searching for transcription factor binding sites. Bioinformatics 2004;20(18):3516-3525.</mixed-citation><mixed-citation xml:lang="en">Osada R., Zaslavsky E., Singh. M. Comparative analysis of methods for representing  and searching for transcription factor binding sites. Bioinformatics 2004;20(18):3516-3525.</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">Pesole G., Liuni S., Dsouza M. PatSearch: a pattern matcher software that finds functional elements in nucleotide and protein sequences and assesses their statistical significance. Bioinformatics. 2000;16:439-450.</mixed-citation><mixed-citation xml:lang="en">Pesole G., Liuni S., Dsouza M. PatSearch: a pattern matcher software  that finds  functional elements in nucleotide and protein sequences and assesses their  statistical significance. Bioinformatics. 2000;16:439-450.</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">Pevzner P.A., Sze S.H. Combinatorial approaches to finding subtle signals in DNA sequences. Proc. of the 8th Int. Conf. on Intelligent Systems for Molecular Biology (ISMB). 2000.</mixed-citation><mixed-citation xml:lang="en">Pevzner P.A., Sze S.H. Combinatorial approaches to finding subtle signals in DNA  sequences. Proc. of the 8th Int. Conf. on Intelligent Systems for Molecular Biology (ISMB). 2000.</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Portales-Casamar E., Thongjuea S., Kwon A.T., Arenillas D., Zhao X., Valen E., Yusuf D., Lenhard B., Wasserman W.W., Sandelin A. JASPAR 2010: the greatly expanded open-access database of transcription factor binding profiles. Nucl. Acids Res. 2010;38:D105-10.</mixed-citation><mixed-citation xml:lang="en">Portales-Casamar E., Thongjuea S., Kwon A.T., Arenillas D., Zhao X., Valen E., Yusuf  D., Lenhard B., Wasserman W.W., Sandelin A. JASPAR 2010: the greatly expanded open- access database of transcription factor binding profiles. Nucl. Acids Res. 2010;38:D105-10.</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Sukhwani B., Herbordt M.C. GPU acceleration of a production molecular docking code. Proc. of 2nd Workshop on General Purpose Processing on Graphics Processing Units. 2009.</mixed-citation><mixed-citation xml:lang="en">Sukhwani B., Herbordt M.C. GPU acceleration of a production molecular docking code.  Proc. of 2nd Workshop on General Purpose Processing on Graphics Processing Units. 2009.</mixed-citation></citation-alternatives></ref><ref id="cit19"><label>19</label><citation-alternatives><mixed-citation xml:lang="ru">Vishnevsky O.V., Gunbin K.V., Bocharnikov A.V., Berezikov E.V. Analysis of the conservative motifs in promoters of miRNA genes, expressed in different tissues of mammalians. Evolutionary Biology Concepts, Molecular and Morphological Evolution. 2011.</mixed-citation><mixed-citation xml:lang="en">Vishnevsky O.V., Gunbin K.V., Bocharnikov A.V., Berezikov E.V. Analysis of the  conservative motifs in promoters of miRNA genes, expressed in different tissues of  mammalians. Evolutionary Biology Concepts, Molecular and Morphological Evolution. 2011.</mixed-citation></citation-alternatives></ref><ref id="cit20"><label>20</label><citation-alternatives><mixed-citation xml:lang="ru">Vishnevsky O.V., Kolchanov N.A. ARGO: a web system for the detection of degenerate motifs and large-scale recognition of eukaryotic promoters. Nucl. Acids Res. 2005;33(Web Server issue):417-22.</mixed-citation><mixed-citation xml:lang="en">Vishnevsky O.V., Kolchanov N.A. ARGO: a web system for the detection of degenerate  motifs and large-scale recognition of eukaryotic promoters. Nucl. Acids Res. 2005;33(Web Server issue):417-22.</mixed-citation></citation-alternatives></ref><ref id="cit21"><label>21</label><citation-alternatives><mixed-citation xml:lang="ru">Yooseph S., Sutton G., Rusch D.B., Halpern A.L., Williamson S.J., Remington K., Eisen J.A., Heidelberg K.B., Manning G., Li W., Jaroszewski L., Cieplak P., Miller C.S., Li H., Mashiyama S.T., Joachimiak M.P., van Belle C., Chandonia J.M., Soergel D.A., Zhai Y., Natarajan K., Lee S., Raphael B.J., Bafna V., Friedman R., Brenner S.E., Godzik A., Eisenberg D., Dixon J.E., Taylor S.S., Strausberg R.L., Frazier M., Venter J.C. The sorcerer II global ocean sampling expedition: expanding the universe of protein families. PLoS Biol. 2007:5(3):e16.</mixed-citation><mixed-citation xml:lang="en">Yooseph S., Sutton G., Rusch D.B., Halpern A.L., Williamson S.J., Remington K.,  Eisen J.A., Heidelberg K.B., Manning G., Li W., Jaroszewski L., Cieplak P., Miller  C.S., Li H., Mashiyama S.T., Joachimiak M.P., van Belle C., Chandonia J.M., Soergel  D.A., Zhai Y., Natarajan K., Lee S., Raphael B.J., Bafna V., Friedman R., Brenner  S.E., Godzik A., Eisenberg D., Dixon J.E., Taylor S.S., Strausberg R.L., Frazier M.,  Venter J.C. The sorcerer II global ocean sampling expedition: expanding the universe  of protein families. PLoS Biol. 2007:5(3):e16.</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
