SoEM-Web: a user-friendly platform for the analysis and visualization of small-organelle-enriched metagenomics data

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 34,096 characters · extracted from preprint-html · click to expand
SoEM-Web: a user-friendly platform for the analysis and visualization of small-organelle-enriched metagenomics data | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results SoEM-Web: a user-friendly platform for the analysis and visualization of small-organelle-enriched metagenomics data Jooseong Oh , Hanjin Kim , View ORCID Profile Chungoo Park doi: https://doi.org/10.1101/2025.02.12.637736 Jooseong Oh 1 School of Biological Sciences and Technology, Chonnam National University , Gwangju, 61186, Republic of Korea Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hanjin Kim 1 School of Biological Sciences and Technology, Chonnam National University , Gwangju, 61186, Republic of Korea Find this author on Google Scholar Find this author on PubMed Search for this author on this site Chungoo Park 1 School of Biological Sciences and Technology, Chonnam National University , Gwangju, 61186, Republic of Korea 2 Institute of Systems Biology & Life Science Informatics, Chonnam National University , Gwangju, 61186, Republic of Korea Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Chungoo Park For correspondence: chungoo{at}jnu.ac.kr Abstract Full Text Info/History Metrics Preview PDF Abstract Background Marine microeukaryotes play a crucial role in global biogeochemical cycles and ecosystem functioning. However, their diversity and distribution patterns remain poorly understood. To improve the biodiversity assessment of environmental samples, we recently developed a PCR-free, small-organelle-enriched metagenomics (SoEM) method, accompanied by a bioinformatics workflow for analyzing SoEM metagenomic data. However, the complexity of these analyses typically requires extensive bioinformatics expertise, potentially limiting their broader adoption. Methods To address this challenge, we present SoEM-Web, a user-friendly web application for the interactive analysis, classification, and visualization of SoEM metagenomic data. SoEM-Web integrates all necessary procedures, including raw sequencing data preprocessing, paired-end read merging, automated taxonomic identification, and diversity visualization, into a streamlined workflow accessible through an intuitive interface. This application is designed to facilitate SoEM data analysis for researchers who may not have advanced bioinformatics skills. Results We demonstrated the capabilities of SoEM-Web by reanalyzing previously published datasets. This reanalysis not only confirmed the utility of the platform but also revealed additional taxa, emphasizing the value of updated reference datasets. The application successfully simplifies complex bioinformatics processes, enhancing accessibility and reproducibility in marine microeukaryote diversity research. SoEM-Web aims to enhance accessibility and reproducibility in marine microeukaryote diversity research. The web server is freely available at http://compsysbio.re.kr/soem-web/ . Introduction The rapid advancement of high-throughput sequencing technology and bioinformatics methods has propelled environmental DNA (eDNA) metabarcoding to the forefront of marine microeukaryotic biodiversity monitoring ( Balasubramanian et al. 2021 ; Brandt et al. 2020 ; Brannock et al. 2016 ). Despite its promise, the systematic use of eDNA metabarcoding in marine biodiversity monitoring encounters several challenges ( Pawlowski et al. 2022 ; Sigsgaard et al. 2020 ; Takahashi et al. 2023 ). These include polymerase chain reaction (PCR) bias—where different species are amplified with varying efficiencies—the need for suitable eDNA-based metabarcoding markers; and the development of efficient protocols for eDNA filtration, extraction, inhibitor removal, and amplification. To address these limitations, researchers have developed alternative approaches such as metagenome skimming ( Dodsworth 2015 ; Greshake et al. 2016 ; Linard et al. 2015 ), mitochondrial metagenomics ( Crampton-Platt et al. 2015 ; Crampton-Platt et al. 2016 ; Tang et al. 2014 ), mitochondrial capture microarray ( Liu et al. 2016 ), and mitochondrial enrichment through differential centrifugation ( Zhou et al. 2013 ). Building on these advances, we recently developed a PCR-free small-organelle-enriched metagenomics (SoEM) method, which demonstrated superior performance in marine species identification compared to multi-marker eDNA metabarcoding ( Jo et al. 2019 ). We subsequently refined this approach to enable efficient eDNA extraction from small-volume water samples using optimized cell disruption and DNA extraction methods ( Jin et al. 2023 ). Complementing these experimental advancements, we developed a bioinformatics pipeline for SoEM metagenomic analysis. This pipeline encompasses raw sequencing data preprocessing, paired-end (PE) read merging, taxonomic identification, and visualization of taxonomic diversity. However, the implementation of these in silico analyses typically requires expertise in various software programs, potentially posing a challenge for researchers primarily focused on wet laboratory experiments. To bridge this gap, we developed SoEM-Web, a web-based tool that makes SoEM bioinformatics analyses accessible to users without extensive programming knowledge. SoEM-Web offers an interactive, point-and-click interface that allows users to easily specify parameters, run tools, and view metadata. The tool is freely available as an online web service at http://compsysbio.re.kr/soem-web/ . By simplifying the complex bioinformatics processes associated with SoEM analysis, SoEM-Web aims to facilitate the broader adoption of this method in marine biodiversity research. We anticipate that this tool will serve not only as a practical application for professional and research purposes but also as a valuable resource for training and educating novice marine biologists in metagenomic analysis techniques. Materials & Methods The SoEM-Web platform integrates five core modules, each designed to perform specific functions in the analysis pipeline: raw sequencing data upload and preprocessing, PE read merging, taxonomic identification assignment, and taxonomic diversity visualization ( Fig. 1 and Table 1 ). The following sections provide a detailed description of each module’s functionality and implementation. View this table: View inline View popup Download powerpoint Table 1: Tools for metagenomic analysis in SoEM-Web. Download figure Open in new tab Fig. 1. Workflow for SoEM-Web Uploading and preprocessing raw next-generation sequencing data SoEM-Web is designed to process user-uploaded FASTQ files containing DNA sequence reads. In the FASTQ format, each read is represented by four lines: an identifier beginning with “@,” the nucleotide sequence, a “+” delimiter, and corresponding quality scores ( Cock et al. 2010 ). For optimal performance, we recommend that users input PE reads, as these can be partially overlapped to generate longer single reads, thereby enhancing the accuracy of subsequent analyses. To optimize data handling and processing efficiency, SoEM-Web supports gz-compressed FASTQ files (e.g., fastq.gz) as input. In addition, the platform features a user-friendly drag-and-drop interface, allowing for the simultaneous upload of multiple files ( Afgan et al. 2018 ). This feature significantly streamlines the data input process, particularly when dealing with large datasets or multiple samples. Upon successful upload, the raw sequences undergo preprocessing using Trimmomatic v0.39 ( Bolger et al. 2014 ). This crucial step uses default settings to remove adapter sequences introduced during library preparation and to eliminate low-quality reads. Merging PE reads Following preprocessing, SoEM-Web merges cleaned forward and reverse reads that overlap to generate longer DNA sequences, a critical step for improving the accuracy of taxonomic assignments. This process is efficiently executed using FLASH (Fast Length Adjustment of SHort reads) version 1.2.11.4 ( Magoc & Salzberg 2011 ). The FLASH algorithm is applied with carefully optimized parameters to balance sensitivity and specificity in read merging. These parameters include a minimum overlap length of 10 base pairs (bps), an average read length of 300 bps, an expected fragment length of 550 bps, and a standard deviation of fragment length set to 55 bps. The resulting merged reads form the basis for subsequent analyses, whereas unmerged reads are excluded to maintain data quality and consistency. Assigning taxonomic identification to merged reads In the taxonomic identification phase, merged reads are treated as operational taxonomic units (OTUs) and mapped against four comprehensive reference databases compiled from NCBI ( Table 2 ). These databases include the non-redundant nucleotide database (NT), primary marker sequences from GenBank (PM), the mitochondrial whole-genome database (MT), and the plastid whole-genome database (PT). View this table: View inline View popup Download powerpoint Table 2: Four reference databases available in SoEM-Web. The mapping process uses BLAST version 2.16.0+ ( Camacho et al. 2009 ) with stringent parameters to ensure high-confidence matches. Specifically, an E-value threshold of 1e-10 is employed to minimize false positive alignments, and the “max_target_seqs” parameter is set to 1 to retain only the best match for each query sequence. Taxonomic assignment is performed using the NCBI taxonomy database ( Schoch et al. 2020 ), which provides a standardized and regularly updated classification system. Each OTU sequence is assigned the best taxonomy based on its BLAST search result and corresponding accession number. To facilitate this process, we have constructed a custom SoEM-Web database that integrates NCBI accession numbers, taxonomic IDs (taxids), taxonomy nodes, and hierarchical information. This custom database is built using taxdump files with trackable taxids ( https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump_archive/ ) and accession2taxid mapping files from the NCBI repository. To ensure the most up-to-date taxonomic information, the four reference databases (NT, PM, MT, and PT) and taxonomy archive files undergo biannual updates. Visualizing taxonomic diversity and distribution patterns The final module of SoEM-Web focuses on the visual representation of taxonomic composition and diversity. We have implemented a robust visualization module using R v4.4.2 and the ggplot v3.5.1 library ( Wickham 2016 ), complemented by custom in-house scripts. All generated graphs can be easily downloaded in multiple formats (.pdf, .jpeg, or .tiff), making them convenient for inclusion in publications, presentations, or further analyses. SoEM-Web analysis protocol The SoEM-Web analysis protocol consists of six main steps, each designed to guide users through the process of analyzing metagenomic data. Below is a detailed description of each step: Step 1: Data Upload Access the SoEM-Web site at http://compsysbio.re.kr/soem-web/ . Click “Upload” in the Tools menu on the left panel. Drag and drop FASTQ files into the middle panel, or click “Choose local files” to select files. Click “Start” to initiate the upload. Uploaded datasets will appear in the History panel. Step 2: Quality Control 5. Click “Trimmomatic” in the Tools menu. 6. Select “Paired-end (two separate input files)” for PE data. 7. Choose the appropriate input FASTQ files for R1 and R2. 8. Click “Run Tool” to start the quality control process. Step 3: Read Merging 9. Click “FLASH” in the Tools menu. 10. Select “Individual datasets” in the first drop box. 11. Choose the appropriate forward and reverse read datasets. 12. Set the parameters: minimum overlap (default: 10), average read length (default: 300), fragment length (default: 550), fragment length standard deviation (default: 55), and minimum fragment size (default: 400). 13. Click “Run Tool” to merge the reads. Step 4: Taxonomic Annotation 14. Click “NCBI BLAST+” in the Tools menu. 15. Select the merged reads dataset. 16. Choose the desired reference database (NT, PM, MT, or PT). 17. Set the E-value (default: 1e-10) and select the output format (default: Tabular). 18. Specify the minimum alignment length (default: 400). 19. Click “Run Tool” to perform the BLAST search. Step Step 5: Taxonomic Assignment 20. Click “Assign Taxa” in the Tools menu. 21. Select the BLAST output dataset. 22. Click “Run Tool” to assign taxonomic names to the merged reads. Step 6: Visualization 22. Click “Visualization” in the Tools menu. 23. Select the taxonomic assignment output dataset. 24. Enter the sample name, plot title, and axis labels, and specify plot dimensions. 25. Choose the desired output file type (.pdf, .jpeg, or .tiff). 26. Click “Run Tool” to generate the visualization. Throughout the analysis, users can monitor job progress and access intermediate results via the History panel. This feature allows users to rerun analyses with modified parameters if necessary, enhancing the flexibility and iterative nature of the analytical process. Results Implementation of SoEM-Web We developed SoEM-Web, a user-friendly web-based application for analyzing and visualizing PCR-free, small organelle-enriched metagenomics data, using the Galaxy platform (Afgan et al., 2018b). The application integrates open-source bioinformatics tools and custom Python and R scripts to provide a comprehensive analysis pipeline. The SoEM-Web interface consists of three main components ( Fig. 2 ): the Tools menu, the Interface panel, and the History panel. These components work together to guide users through the analysis process, which includes six main steps: data upload, quality control, read merging, taxonomic annotation, taxonomic assignment, and visualization. The detailed protocol for each step is described in the Methods section. Download figure Open in new tab Fig. 2. SoEM-Web interface The SoEM-Web analysis interface consists of three main panels: the “Tools” section on the left, the “Main” panel in the center, and the “History” panel on the right. (A) The Tools section lists the available SoEM data analysis tools, allowing users to select the tool of interest. (B) After selecting the tool, users can configure input parameters and execute the tool. (C) Using the “Run” tool, users can monitor the progress of the job. (D) The History panel displays all outcomes and information about the tools used. Case study analysis To demonstrate the functionality of SoEM-Web, we reanalyzed two metagenomic sequence datasets from a previous SoEM assay ( Jo et al. 2019 ). The samples, Baealdo and Yamido, were originally sequenced using Illumina MiSeq (v2, 301-cycle). Following the SoEM-Web protocol, we processed the raw data through all six steps. After quality control, 35.1 and 34.5 million reads were obtained from the Baealdo and Yamido samples, respectively. PE read merging resulted in 9.1–9.7-million-longer reads, approximately half of the input reads ( Table 3 ). The BLAST search against the NCBI NT database and taxonomic assignment identified 444 and 480 OTUs at the species level for Baealdo and Yamido, respectively. Notably, compared to the original study ( Jo et al. 2019 ), SoEM-Web identified more species: 444 vs. 372 for Baealdo and 480 vs. 441 for Yamido. Between 22% and 37% of species were uniquely detected at each site using each method ( Fig. 3 ). The visualization step allowed us to generate clear and informative plots of the taxonomic diversity and distribution patterns, facilitating the interpretation of our results. View this table: View inline View popup Download powerpoint Table 3: Summary statistics of the case study. Download figure Open in new tab Fig. 3. Comparison of analyzed results between SoEM-Web and those reported by Jo et al. (2019) The comparison is based on species-level information collected during the analysis. Discussion The development of SoEM-Web represents a significant advancement in metagenomic analysis, particularly for studying marine microeukaryote diversity. Although the SoEM method was developed to overcome the limitations of PCR-based metabarcoding approaches, analyzing SoEM data still required substantial bioinformatics expertise. SoEM-Web addresses this challenge by providing a user-friendly interface for complex metagenomic analyses, making the process more accessible to a broader range of researchers. SoEM-Web offers several key advantages. First, its user-friendly interface makes it accessible to researchers without extensive bioinformatics expertise. Second, it provides a standardized analysis pipeline, from raw sequencing data preprocessing to taxonomic assignment and visualization. Third, this standardized approach enhances reproducibility, a critical aspect of scientific research. Lastly, regular updates to reference databases ensure accurate taxonomic identification. Our case study, which reanalyzed data from the study by Jo et al. (2019) , demonstrates the efficiency and ease of use of SoEM-Web. We successfully performed the entire analysis without requiring complex bioinformatics expertise. The step-by-step protocol and user-friendly interface significantly streamlined the process, from quality control of raw sequencing data to read merging, taxonomic annotation, and final visualization. The ability to adjust parameters at each stage further highlights the flexibility of SoEM-Web. Interestingly, our reanalysis revealed several notable differences from the original study. We identified more species in both the Baealdo (444 vs. 372) and Yamido (480 vs. 441) samples. Moreover, 22%–37% of species were uniquely detected by each method. These differences can primarily be attributed to the continuous expansion of the GenBank database, which approximately doubles in size every 18 months ( Sayers et al. 2023 ). This expansion provides more comprehensive reference data for taxonomic assignment during reanalysis. The discrepancies in species identification also highlight the impact of changes in taxonomic naming and classification, underscoring the need for regular updates to taxonomic annotation and reference databases. Furthermore, as pointed out by ( Jin et al. 2020 ), the presence of erroneous taxonomic information in public databases emphasizes the importance of careful data curation. These findings demonstrate the value of regularly reanalyzing metagenomic datasets. As databases and analysis tools continue to improve, reanalysis of previously published datasets can uncover additional taxa and provide more accurate taxonomic profiles. However, users should be aware of the limitations associated with reference databases and exercise caution when interpreting results, particularly when comparing analyses conducted at different time points or with different database versions. While SoEM-Web significantly simplifies the metagenomic analysis process, there is still room for further enhancement. Future developments could focus on incorporating more advanced visualization tools and integrating machine learning approaches to improve taxonomic classification. Additionally, continuously refining the pipeline to adapt to emerging sequencing technologies and evolving database structures will be crucial to maintaining the relevance of SoEM-Web in the field of metagenomic analysis. Conclusions SoEM-Web, presented in this study, is a user-friendly web application designed to meet the growing demand for accessible metagenomic analysis tools in marine biology and oceanography. By streamlining the entire analysis process from raw data preprocessing to visualization, it empowers researchers to investigate marine microeukaryote diversity more efficiently. Our case study demonstrates the potential of SoEM-Web to uncover additional taxonomic information when reanalyzing existing datasets, underscoring its value in the context of rapidly expanding sequence databases. As metagenomics continues to evolve, SoEM-Web aims to support the marine research community in understanding microeukaryote biodiversity and its role in ocean ecosystems, while also promoting critical interpretation of results within the limitations of current technologies and taxonomic frameworks. Funding This work was supported by research grants from the Basic Science Research Program through the National Research Foundation of Korea (NRF), funded by the Ministry of Education (NRF-2022R1A2C1010731 to C.P.); the “Research Center for Fishery Resource Management Based on Information and Communication Technology” (2021, grant number 20180384), funded by the Ministry of Oceans and Fisheries, Korea; and the Innovative Human Resource Development for Local Intellectualization program through the Institute of Information & Communications Technology Planning & Evaluation (IITP) grant, funded by the Korea government (MSIT) (IITP-2024-00156287, 30%). References ↵ Afgan E , Baker D , Batut B , van den Beek M , Bouvier D , Cech M , Chilton J , Clements D , Coraor N , Gruning BA , Guerler A , Hillman-Jackson J , Hiltemann S , Jalili V , Rasche H , Soranzo N , Goecks J , Taylor J , Nekrutenko A , and Blankenberg D. 2018 . The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2018 update . Nucleic Acids Res 46 : W537 - W544 . doi: 10.1093/nar/gky379 OpenUrl CrossRef PubMed ↵ Balasubramanian VK , Joseph Maran MI , Ramteke D , Vijaykumar DS , Kottarathail Rajendran A , Ramachandran P , and Ramachandran R. 2021 . Environmental DNA reveals aquatic biodiversity of an urban backwater area, southeast coast of India . Marine Pollution Bulletin 171 : 112786 . doi: 10.1016/j.marpolbul.2021.112786 OpenUrl CrossRef PubMed ↵ Bolger AM , Lohse M , and Usadel B. 2014 . Trimmomatic: a flexible trimmer for Illumina sequence data . Bioinformatics 30 : 2114 - 2120 . doi: 10.1093/bioinformatics/btu170 OpenUrl CrossRef PubMed Web of Science ↵ Brandt MI , Trouche B , Henry N , Liautard-Haag C , Maignien L , de Vargas C , Wincker P , Poulain J , Zeppilli D , and Arnaud-Haond S. 2020 . An Assessment of Environmental Metabarcoding Protocols Aiming at Favoring Contemporary Biodiversity in Inventories of Deep-Sea Communities . Frontiers in Marine Science 7 . ↵ Brannock PM , Ortmann AC , Moss AG , and Halanych KM . 2016 . Metabarcoding reveals environmental factors influencing spatio-temporal variation in pelagic microeukaryotes . Molecular Ecology 25 : 3593 – 3604 . doi: 10.1111/mec.13709 OpenUrl CrossRef ↵ Camacho C , Coulouris G , Avagyan V , Ma N , Papadopoulos J , Bealer K , and Madden TL . 2009 . BLAST+: architecture and applications . BMC Bioinformatics 10 : 421 . doi: 10.1186/1471-2105-10-421 OpenUrl CrossRef PubMed ↵ Cock PJA , Fields CJ , Goto N , Heuer ML , and Rice PM . 2010 . The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants . Nucleic Acids Research 38 : 1767 – 1771 . doi: 10.1093/nar/gkp1137 OpenUrl CrossRef PubMed Web of Science ↵ Crampton-Platt A , Timmermans MJTN , Gimmel ML , Kutty SN , Cockerill TD , Vun Khen C , and Vogler AP . 2015 . Soup to Tree: The Phylogeny of Beetles Inferred by Mitochondrial Metagenomics of a Bornean Rainforest Sample . Molecular Biology and Evolution 32 : 2302 – 2316 . doi: 10.1093/molbev/msv111 OpenUrl CrossRef PubMed ↵ Crampton-Platt A , Yu DW , Zhou X , and Vogler AP . 2016 . Mitochondrial metagenomics: letting the genes out of the bottle . GigaScience 5 : s13742 - 13016 -10120-y. doi: 10.1186/s13742-016-0120-y OpenUrl CrossRef ↵ Dodsworth S. 2015 . Genome skimming for next-generation biodiversity analysis . Trends in Plant Science 20 : 525 – 527 . doi: 10.1016/j.tplants.2015.06.012 OpenUrl CrossRef PubMed ↵ Greshake B , Zehr S , Dal Grande F , Meiser A , Schmitt I , and Ebersberger I. 2016 . Potential and pitfalls of eukaryotic metagenome skimming: a test case for lichens . Molecular Ecology Resources 16 : 511 – 523 . doi: 10.1111/1755-0998.12463 OpenUrl CrossRef PubMed ↵ Jin S , Kim KY , Kim M-S , and Park C. 2020 . An assessment of the taxonomic reliability of DNA barcode sequences in publicly available databases . Algae 35 : 293 – 301 . doi: 10.4490/algae.2020.35.9.4 OpenUrl CrossRef ↵ Jin S , Lee H-G , Park C , and Kim KY . 2023 . Small-organelle-enriched metagenomics: An improved method for environmental DNA-based identification of marine plankton . Limnology and Oceanography: Methods 21 : 178 – 191 . doi: 10.1002/lom3.10538 OpenUrl CrossRef ↵ Jo J , Lee H-G , Kim KY , and Park C. 2019 . SoEM: a novel PCR-free biodiversity assessment method based on small-organelles enriched metagenomics . Algae 34 : 57 – 70 . doi: 10.4490/algae.2019.34.2.26 OpenUrl CrossRef ↵ Linard B , Crampton-Platt A , Gillett CPDT , Timmermans MJTN , and Vogler AP . 2015 . Metagenome Skimming of Insect Specimen Pools: Potential for Comparative Genomics . Genome Biology and Evolution 7 : 1474 – 1489 . doi: 10.1093/gbe/evv086 OpenUrl CrossRef PubMed ↵ Liu S , Wang X , Xie L , Tan M , Li Z , Su X , Zhang H , Misof B , Kjer KM , Tang M , Niehuis O , Jiang H , and Zhou X. 2016 . Mitochondrial capture enriches mito-DNA 100 fold, enabling PCR-free mitogenomics biodiversity analysis . Molecular Ecology Resources 16 : 470 – 479 . doi: 10.1111/1755-0998.12472 OpenUrl CrossRef ↵ Magoc T , and Salzberg SL . 2011 . FLASH: fast length adjustment of short reads to improve genome assemblies . Bioinformatics 27 : 2957 – 2963 . doi: 10.1093/bioinformatics/btr507 OpenUrl CrossRef PubMed Web of Science ↵ Pawlowski J , Bruce K , Panksep K , Aguirre FI , Amalfitano S , Apothéloz-Perret-Gentil L , Baussant T , Bouchez A , Carugati L , Cermakova K , Cordier T , Corinaldesi C , Costa FO , Danovaro R , Dell’Anno A , Duarte S , Eisendle U , Ferrari BJD , Frontalini F , Frühe L , Haegerbaeumer A , Kisand V , Krolicka A , Lanzén A , Leese F , Lejzerowicz F , Lyautey E , Maček I , Sagova-Marečková M , Pearman JK , Pochon X , Stoeck T , Vivien R , Weigand A , and Fazi S. 2022 . Environmental DNA metabarcoding for benthic monitoring: A review of sediment sampling and DNA extraction methods . Science of The Total Environment 818 : 151783 . doi: 10.1016/j.scitotenv.2021.151783 OpenUrl CrossRef PubMed ↵ Sayers EW , Bolton EE , Brister J R , Canese K , Chan J , Comeau Donald C , Farrell Catherine M , Feldgarden M , Fine AM , Funk K , Hatcher E , Kannan S , Kelly C , Kim S , Klimke W , Landrum Melissa J , Lathrop S , Lu Z , Madden Thomas L , Malheiro A , Marchler-Bauer A , Murphy Terence D , Phan L , Pujar S , Rangwala Sanjida H , Schneider Valerie A , Tse T , Wang J , Ye J , Trawick Barton W , Pruitt Kim D , and Sherry Stephen T. 2023 . Database resources of the National Center for Biotechnology Information in 2023 . Nucleic Acids Research 51 : D29 – D38 . doi: 10.1093/nar/gkac1032 OpenUrl CrossRef PubMed ↵ Schoch CL , Ciufo S , Domrachev M , Hotton CL , Kannan S , Khovanskaya R , Leipe D , McVeigh R , O’Neill K , Robbertse B , Sharma S , Soussov V , Sullivan JP , Sun L , Turner S , and Karsch-Mizrachi I. 2020 . NCBI Taxonomy: a comprehensive update on curation, resources and tools . Database 2020 : baaa062 . doi: 10.1093/database/baaa062 OpenUrl CrossRef PubMed ↵ Sigsgaard EE , Jensen MR , Winkelmann IE , Møller PR , Hansen MM , and Thomsen PF . 2020 . Population-level inferences from environmental DNA—Current status and future perspectives . Evolutionary Applications 13 : 245 – 262 . doi: 10.1111/eva.12882 OpenUrl CrossRef PubMed ↵ Takahashi M , Saccò M , Kestel JH , Nester G , Campbell MA , van der Heyde M , Heydenrych MJ , Juszkiewicz DJ , Nevill P , Dawkins KL , Bessey C , Fernandes K , Miller H , Power M , Mousavi-Derazmahalleh M , Newton JP , White NE , Richards ZT , and Allentoft ME . 2023 . Aquatic environmental DNA: A review of the macro-organismal biomonitoring revolution . Science of The Total Environment 873 : 162322 . doi: 10.1016/j.scitotenv.2023.162322 OpenUrl CrossRef PubMed ↵ Tang M , Tan M , Meng G , Yang S , Su X , Liu S , Song W , Li Y , Wu Q , Zhang A , and Zhou X. 2014 . Multiplex sequencing of pooled mitochondrial genomes—a crucial step toward biodiversity analysis using mito-metagenomics . Nucleic Acids Research 42 : e166 – e166 . doi: 10.1093/nar/gku917 OpenUrl CrossRef PubMed ↵ Wickham H. 2016 . ggplot2: Elegant Graphics for Data Analysis . Springer-Verlag New York . ↵ Zhou X , Li Y , Liu S , Yang Q , Su X , Zhou L , Tang M , Fu R , Li J , and Huang Q. 2013 . Ultra-deep sequencing enables high-fidelity recovery of biodiversity for bulk arthropod samples without PCR amplification . GigaScience 2 : 2047 - 2217X -2042-2044. doi: 10.1186/2047-217X-2-4 OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted February 16, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following SoEM-Web: a user-friendly platform for the analysis and visualization of small-organelle-enriched metagenomics data Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share SoEM-Web: a user-friendly platform for the analysis and visualization of small-organelle-enriched metagenomics data Jooseong Oh , Hanjin Kim , Chungoo Park bioRxiv 2025.02.12.637736; doi: https://doi.org/10.1101/2025.02.12.637736 Share This Article: Copy Citation Tools SoEM-Web: a user-friendly platform for the analysis and visualization of small-organelle-enriched metagenomics data Jooseong Oh , Hanjin Kim , Chungoo Park bioRxiv 2025.02.12.637736; doi: https://doi.org/10.1101/2025.02.12.637736 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17691) Bioengineering (13892) Bioinformatics (41937) Biophysics (21452) Cancer Biology (18588) Cell Biology (25504) Clinical Trials (138) Developmental Biology (13378) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24320) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88605) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15156) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00