<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">vavilov</journal-id><journal-title-group><journal-title xml:lang="ru">Вавиловский журнал генетики и селекции</journal-title><trans-title-group xml:lang="en"><trans-title>Vavilov Journal of Genetics and Breeding</trans-title></trans-title-group></journal-title-group><issn pub-type="epub">2500-3259</issn><publisher><publisher-name>Institute of Cytology and Genetics of Siberian Branch of the RAS</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.18699/VJGB-22-97</article-id><article-id custom-type="elpub" pub-id-type="custom">vavilov-3582</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>КОМПЬЮТЕРНАЯ ГЕНОМИКА</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>COMPUTATIONAL GENOMICS</subject></subj-group></article-categories><title-group><article-title>FastContext: инструмент для контекстного анализа последовательностей в данных секвенирования  нового поколения (NGS)</article-title><trans-title-group xml:lang="en"><trans-title>FastContext: A tool for identification of adapters  and other sequence patterns in next generation  sequencing (NGS) data</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-3480-3963</contrib-id><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Весна</surname><given-names>Э.</given-names></name><name name-style="western" xml:lang="en"><surname>Viesná</surname><given-names>E.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Новосибирск</p></bio><bio xml:lang="en"><p>Novosibirsk</p></bio><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-5573-3100</contrib-id><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Фишман</surname><given-names>В. С.</given-names></name><name name-style="western" xml:lang="en"><surname>Fishman</surname><given-names>V.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Новосибирск</p></bio><bio xml:lang="en"><p>Novosibirsk</p></bio><email xlink:type="simple">minja@bionet.nsc.ru</email><xref ref-type="aff" rid="aff-1"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru">Федеральный исследовательский центр Институт цитологии и генетики Сибирского отделения Российской академии наук; Новосибирский национальный исследовательский государственный университет<country>Россия</country></aff><aff xml:lang="en">Institute of Cytology and Genetics of the Siberian Branch of the Russian Academy of Sciences; Novosibirsk State University<country>Russian Federation</country></aff></aff-alternatives><pub-date pub-type="collection"><year>2022</year></pub-date><pub-date pub-type="epub"><day>05</day><month>01</month><year>2023</year></pub-date><volume>26</volume><issue>8</issue><fpage>806</fpage><lpage>809</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Весна Э., Фишман В.С., 2023</copyright-statement><copyright-year>2023</copyright-year><copyright-holder xml:lang="ru">Весна Э., Фишман В.С.</copyright-holder><copyright-holder xml:lang="en">Viesná E., Fishman V.</copyright-holder><license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://vavilov.elpub.ru/jour/article/view/3582">https://vavilov.elpub.ru/jour/article/view/3582</self-uri><abstract><p>Бурное развитие методов секвенирования нового поколения (next generation sequencing, NGS) породило потребность в детальном анализе и контроле качества на каждом этапе протокола приготовления геномных библиотек. Протоколы могут включать в себя этапы с внедрением различного рода служебных последовательностей, таких как адаптеры, праймеры, а также баркоды, специфичные для каждого образца, клетки или молекулы ДНК. Несмотря на достаточно высокий уровень современных знаний в молекулярной биологии, в процессе разработки протоколов NGS исследователи часто сталкиваются с неожиданными экспериментальными данными, которые могут быть результатом недостатка информации о молекулярных процессах, сопровождающих приготовление геномных библиотек, или, в отдельных случаях, дефектом производства реактивов. Обнаружение и анализ распределения служебных последовательностей в полученных молекулах ДНК могут быть важным источником информации, необходимой для оптимизации протокола приготовления геномных биб лиотек. В настоящей статье представлена утилита FastContext, с помощью которой возможен анализ структуры прочтений с точки зрения присутствия определенных последовательностей и их взаимного расположения в прочтении. Алгоритм принимает на вход необработанные данные секвенирования в формате FastQ, а затем генерирует удобные для интерпретации представления структуры прочтений на основе заданных пользователем паттернов, высчитывает количество подобных структур и их долю от общего числа прочтений. Несмотря на простоту алгоритма, FastContext может быть полезен при анализе структуры прочтений, он помогает лучше понять молекулярные процессы, происходящие на разных стадиях приготовления геномных библиотек и, как следствие, открывает возможности для усовершенствования протокола. FastContext – это проект с открытым исходным кодом, распространяемый под свободной лицензией GNU General Public License v3, полностью написанный на языке программирования Python и основанный на широко используемых программных пакетах и форматах данных. Таким образом, он может быть легко использован под любой операционной системой, исправлен и дополнен при необходимости. FastContext доступен в виде пакета в Python Package Index (https:// pypi.org/project/FastContext), исходный код хранится на GitHub (https://github.com/regnveig/FastContext).</p></abstract><trans-abstract xml:lang="en"><p>The development of next generation sequencing (NGS) methods has created the need for detailed analysis and control of each protocol step. NGS library preparation protocols may include steps with incorporation of various service sequences, such as sequencing adapters, primers, sample-, cell-, and molecule-specific barcodes. Despite a fairly high level of current knowledge, during the protocol development process researches often have to deal with various kinds of unexpected experiment outcomes, which result either from lack of information, lack of knowledge, or defects in reagent manufacturing. Detection and analysis of service sequences, their distribution and linkage may provide important information for protocol optimization. Here we introduce FastContext, a tool designed to analyze NGS read structure, based on sequence features found in reads, and their relative position in the read. The algorithm is able to create human readable read structures with user-specified patterns, to calculate counts and percentage of every read structure. Despite the simplicity of the algorithm, FastContext may be useful in read structure analysis and, as a result, can help better understand molecular processes that take place at different stages of NGS library preparation. The project is open-source software, distributed under GNU GPL v3, entirely written in the programming language Python, and based on well-maintained packages and commonly used data formats. Thus, it is cross-platform, may be patched or upgraded by the user if necessary. The FastContext package is available at the Python Package Index (https://pypi. org/project/FastContext), the source code is available at GitHub (https://github.com/regnveig/FastContext). </p></trans-abstract><kwd-group xml:lang="ru"><kwd>секвенирование нового поколения</kwd><kwd>NGS</kwd><kwd>адаптеры</kwd><kwd>поиск паттернов</kwd><kwd>анализ прочтений</kwd></kwd-group><kwd-group xml:lang="en"><kwd>next generation sequencing</kwd><kwd>NGS</kwd><kwd>adapters</kwd><kwd>patterns search</kwd><kwd>read analysis</kwd></kwd-group><funding-group xml:lang="en"><funding-statement>This work was supported by Russian Science Foundation, grant No. 22-14-00247. High-throughoutput computations required for FastContext testing were performed using the Collective usage center of the Institute of Cytology and Genetics SB RAS, 121031800061-7 (Mechanisms of genetic control of development, physiological processes and behavior in animals).</funding-statement></funding-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Aldridge S., Teichmann S. Single cell transcriptomics comes of age. Nat. Commun. 2020;11(1):4307. DOI 10.1038/s41467­020­18158­5.</mixed-citation><mixed-citation xml:lang="en">Aldridge S., Teichmann S. Single cell transcriptomics comes of age. Nat. Commun. 2020;11(1):4307. DOI 10.1038/s41467­020­18158­5.</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Andrews S. FastQC: A quality control tool for high throughput sequence data. 2010. Available online at: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/</mixed-citation><mixed-citation xml:lang="en">Andrews S. FastQC: A quality control tool for high throughput sequence data. 2010. Available online at: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Bravo A., Typas A., Veening J. 2FAST2Q: A general­purpose sequence search and counting program for FASTQ files [preprint]. BioRxiv. 2021. DOI 10.1101/2021.12.17.473121.</mixed-citation><mixed-citation xml:lang="en">Bravo A., Typas A., Veening J. 2FAST2Q: A general­purpose sequence search and counting program for FASTQ files [preprint]. BioRxiv. 2021. DOI 10.1101/2021.12.17.473121.</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Cock P., Antao T., Chang J., Chapman B., Cox C., Dalke A., Friedberg I., Hamelryck T., Kauff F., Wilczynski B., de Hoon M. Biop ython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics. 2009;25(11):1422­1423. DOI 10.1093/bioinformatics/btp163.</mixed-citation><mixed-citation xml:lang="en">Cock P., Antao T., Chang J., Chapman B., Cox C., Dalke A., Friedberg I., Hamelryck T., Kauff F., Wilczynski B., de Hoon M. Biop ython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics. 2009;25(11):1422­1423. DOI 10.1093/bioinformatics/btp163.</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Costa­Luis C., Larroque S., Altendorf K., Mary H., Korobov M., Yorav­Raphael N., Ivanov I., Bargull M., Rodrigues N., Chen G., Newey C., Zugnoni M., Pagel M., Dektyarev M., Rothberg A., Lee A., Panteleit D., Dill F., Kemenade H., McCracken J., Nordlund M., Nechaev N., Desh O. tqdm: A fast, Extensible Progress Bar for Python and CLI. Zenodo. 2022. DOI 10.5281/zenodo.595120.</mixed-citation><mixed-citation xml:lang="en">Costa­Luis C., Larroque S., Altendorf K., Mary H., Korobov M., Yorav­Raphael N., Ivanov I., Bargull M., Rodrigues N., Chen G., Newey C., Zugnoni M., Pagel M., Dektyarev M., Rothberg A., Lee A., Panteleit D., Dill F., Kemenade H., McCracken J., Nordlund M., Nechaev N., Desh O. tqdm: A fast, Extensible Progress Bar for Python and CLI. Zenodo. 2022. DOI 10.5281/zenodo.595120.</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Gridina M., Mozheiko E., Valeev E., Nazarenko L., Lopatkina M., Markova Z., Yablonskaya M., Voinova V., Shilova N., Lebedev I., Fishman V. A cookbook for DNase Hi­C. Epigenetics Chromatin. 2021; 14(1):15. DOI 10.1186/s13072­021­00389­5.</mixed-citation><mixed-citation xml:lang="en">Gridina M., Mozheiko E., Valeev E., Nazarenko L., Lopatkina M., Markova Z., Yablonskaya M., Voinova V., Shilova N., Lebedev I., Fishman V. A cookbook for DNase Hi­C. Epigenetics Chromatin. 2021; 14(1):15. DOI 10.1186/s13072­021­00389­5.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Martin M. Cutadapt removes adapter sequences from high­throughput sequencing reads. EMBnet J. 2011;17(1):10­12. DOI 10.14806/ej.17.1.200.</mixed-citation><mixed-citation xml:lang="en">Martin M. Cutadapt removes adapter sequences from high­throughput sequencing reads. EMBnet J. 2011;17(1):10­12. DOI 10.14806/ej.17.1.200.</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Smirnov A., Fishman V., Yunusova A., Korablev A., Serova I., Skryabin B., Rozhdestvensky T., Battulin N. DNA barcoding reveals that injected transgenes are predominantly processed by homologous recombination in mouse zygote. Nucleic Acids Res. 2020;48(2):719735. DOI 10.1093/nar/gkz1085.</mixed-citation><mixed-citation xml:lang="en">Smirnov A., Fishman V., Yunusova A., Korablev A., Serova I., Skryabin B., Rozhdestvensky T., Battulin N. DNA barcoding reveals that injected transgenes are predominantly processed by homologous recombination in mouse zygote. Nucleic Acids Res. 2020;48(2):719735. DOI 10.1093/nar/gkz1085.</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">The Pandas Development Team. pandas­dev/pandas: Pandas. Zenodo. 2020. DOI 10.5281/zenodo.3509134.</mixed-citation><mixed-citation xml:lang="en">The Pandas Development Team. pandas­dev/pandas: Pandas. Zenodo. 2020. DOI 10.5281/zenodo.3509134.</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
