Full text
29,494 characters
· extracted from
preprint-html
· click to expand
Mapler: Assessing assembly quality in taxonomically rich metagenomes sequenced with HiFi reads | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Mapler: Assessing assembly quality in taxonomically rich metagenomes sequenced with HiFi reads View ORCID Profile Nicolas Maurice , View ORCID Profile Claire Lemaitre , View ORCID Profile Riccardo Vicedomini , View ORCID Profile Clémence Frioux doi: https://doi.org/10.1101/2025.03.10.641994 Nicolas Maurice a Univ Rennes, Inria, CNRS , IRISA - UMR 6074, F-35000 Rennes, France b Inria, Univ. Bordeaux, INRAE , F-33400 Talence, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Nicolas Maurice Claire Lemaitre a Univ Rennes, Inria, CNRS , IRISA - UMR 6074, F-35000 Rennes, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Claire Lemaitre Riccardo Vicedomini a Univ Rennes, Inria, CNRS , IRISA - UMR 6074, F-35000 Rennes, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Riccardo Vicedomini Clémence Frioux b Inria, Univ. Bordeaux, INRAE , F-33400 Talence, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Clémence Frioux For correspondence: clemence.frioux{at}inria.fr Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Summary Metagenome assembly seeks to reconstruct the most high-quality genomes from sequencing data of microbial ecosystems. Despite technological advancements that facilitate assembly, such as Hi-Fi long reads, the process remains challenging in complex environmental samples consisting of hundreds to thousands of populations. Mapler is a metagenome assembly and evaluation pipeline with a focus on evaluating the quality of Hi-Fi long read metagenome assemblies. It incorporates several state-of-the-art metrics, as well as novel metrics assessing the diversity that remains uncaptured by the assembly process. Mapler facilitates the comparison of assembly strategies and helps identify methodological bottlenecks that hinder genome reconstruction. Availability and Implementation Mapler is open source and publicly available under the AGPL-3.0 licence at https://github.com/Nimauric/Mapler . Source code is implemented in Python and Bash as a Snakemake pipeline. Contacts nicolas.maurice{at}inria.fr , clemence.frioux{at}inria.fr . Supplementary information Available online. 1. Introduction Evaluating the quality of metagenome assemblies can be a challenging task, especially when no reference genome is available and when comparing samples with varying taxonomic richness and sequencing depths. Taxonomic richness refers to the number of distinct populations within the sample: microbial communities may consist of only a handful of populations, as in acid mine drainage communities [ 19 ], or of up thousands of distinct populations, as observed in soil ecosystems [ 18 ]. Assembly of metagenomic reads leads, in the best case scenario, to the reconstruction of genomes, but in most cases, to the generation of sequences of varying length called contigs . Those are then grouped into bins , presumed to originate from the same microbial populations; bins of sufficient quality are referred as Metagenome-Assembled Genomes (MAGs) [ 4 ]. A high-quality metagenome assembly is not only expected to yield high-quality bins, but also to be representative of the majority of the read sequences. Recent studies showed significant improvements in both the number and quality of bins obtained using highly accurate PacBio HiFi long reads [ 1 ]. However, in highly taxonomically rich ecosystems, assembly methods still struggle to reconstruct the numerous low-abundance genomes [ 1 , 21 ], and it remains unclear how much of the sample these bins are representative of, resulting in a need for comparison and development of dedicated evaluation methods. Several tools and pipelines exist to evaluate metagenomes. CheckM2 [ 6 ] assesses binned contigs based on the presence of marker genes, allowing the identification of MAGs from bins with low contamination and high completeness scores. The PacBio HiFi-MAG-Pipeline [ 16 ] is a pipeline developed to identify high-quality MAGs from previously generated metagenome assemblies. It follows a “completeness-aware” strategy based on CheckM2 and several state-of-the-art binning tools, incorporating stringent filtering criteria to exclude low-quality bins, which are common in taxonomically rich ecosystems. MetaQUAST [ 13 ] performs a reference-based evaluation, either using user-defined references or retrieving references via taxonomic assignment. However, in complex ecosystems, many species are absent from databases, thus limiting its effectiveness. Finally, custom metrics or visualizations have also been employed for method validation [ 1 , 7 ]. For example, the percentage of reads aligned to the assembly has been used to validate metagenome assembly in [ 1 ]. Nevertheless, these approaches are rarely documented nor provided in an easy-to-use implementation that allows for replication on new datasets. In this work, we present Mapler, a metagenomic assembly and evaluation pipeline. It avoids filtering out any sequences, it does not rely on the availability of reference sequences, and it considers both unassembled reads and unbinned contigs. Mapler integrates several state-of-the-art tools as well as novel metrics and visualizations based on read-to-contig alignments. It provides a broad view of the sequence characteristics after assembly and binning, in order to identify the bottlenecks faced during bioinformatic processes. Mapler is therefore an effective way to examine assembly in taxonomically rich ecosystems, where high-quality bins and references are scarce. 2. Software description 2.1. Pipeline Mapler is a Snakemake [ 14 ] pipeline dedicated to the evaluation of taxonomically rich metagenome assemblies of HiFi long reads. It can be run either locally or on Slurm-based computing environments. Its modular design allows for easy integration of additional custom steps in the pipeline or modification of existing ones. The pipeline can run multiple steps in parallel, including analysing multiple samples at once. The structure of the pipeline is illustrated in Figure 1A . Download figure Open in new tab Figure 1: (A) Overview of the Mapler pipeline. Contigs and bins can either be generated by the pipeline or given as input. Several long-read assemblers are integrated in Mapler. (B) Example of Mapler’s output. Histograms show the aligned read/base percentages for bins of different quality and reveal the increasing complexity of different ecosystems, from the mock community to the gut microbiome sample and the highly diverse deadwood sample, all three assembled with metaMDBG. 2.2. Integrated tools While its focus being on evaluation, Mapler integrates state-of-the-art tools for assembly and binning suitable for HiFi sequencing data: metaMDBG [ 1 ], hifiasm-meta [ 8 ], metaFlye [ 10 ], OPERA-MS [ 2 ], and MetaBAT 2 [ 9 ]. Users may alternatively skip the assembly and/or binning steps by providing their own input contigs and/or bins. Each bin can be taxonomically classified using either GTDB-Tk [ 5 ], or Kraken 2 [ 20 ] in order to facilitate the comparison with the taxonomic assignment of the reads. By default, bins are qualitatively assessed with CheckM2 [ 6 ] and categorized according to the following levels of completeness (comp.) and contamination (cont.): near complete (single contig, ≥99% comp., ≤1% cont.), high quality (≥90% comp., ≤5% cont.), medium quality (≥50% comp., ≤10% cont.), and low quality for the remaining bins. These criteria match the completeness and contamination estimates used by the Genomic Standards Consortium [ 3 ] for defining low-quality to high-quality MAGs. MetaQUAST [ 13 ] is also integrated to compare contigs with reference genomes, if available and provided as input by the user. 2.3. Novel metrics Mapler aligns the reads on the contigs with Minimap2 [ 11 ], and uses these alignments to calculate various metrics. The aligned read percentage is the number of reads aligned to at least one contig divided by the total number of reads, while the aligned base percentage is the number of read bases aligned to at least one contig, divided by the total length of the reads. These metrics can be computed with or without the binning information. In the former case, the percentage is separately calculated for reads or bases that align to contigs belonging to bins of near complete, high, medium or low quality, or to contigs that were assembled but not binned. A text report is produced for both the binning-aware and binning-unaware versions, and a summarizing plot is generated for the binning-aware version ( Figure 1.B ). In cases where a read is aligned to multiple contigs, it is only taken into account for the highest bin quality level. Another analysis proposed by Mapler is the comparison of the sets of reads aligned or unaligned to the contigs, in order to gain insight into the characteristics of reads that participate in, or have been excluded from the assembly. Both sets are analyzed separately with the following tools: FastQC ( https://github.com/s-andrews/FastQC ), used to assess read quality and generate a comprehensive report. It can be used to check whether the assembly process is more effective on higher quality reads, longer reads, or reads with a certain GC ratio. K-mer Analysis Toolkit (KAT) [ 12 ], which computes the abundance of assembled and unassembled reads. The abundance of a given read is estimated by its median k-mer abundance, with k-mer abundances being computed from the full read dataset. Mapler integrates these results to visualize both distributions with two overlapping histograms. Kraken 2 [ 20 ], alongside Krona [ 15 ], is used to analyze the taxonomic composition and abundance of both sets of reads, providing insight on over- or under-represented clades in the assembly. 3. Application We demonstrated Mapler’s ability to evaluate assemblies of diverse samples on three datasets of increasing taxonomic complexity, sequenced with PacBio Sequel II SMRT. Mock community : the ZymoBIOMICS Gut Microbiome Standard D6331 (SRR13128014) consists of 21 populations, including 17 species and 5 strains of Escherichia coli . The sample contains 18.0 Gbp spread over 1, 978, 852 reads. Gut microbiome : A pooled extraction of four stool samples from adult humans following a vegan diet (SRR15275211). Human digestive microbiomes generally host a few hundreds of species. The sample contains 18.8 Gbp spread over 1, 904, 159 reads. Deadwood : four separately sequenced samples of deadwood that were co-assembled as in [ 17 ]. The samples (SRR28211698 to SRR28211701) contain a total of 16.1 Gbp spread over 866, 007 reads. Each dataset was processed by Mapler with metaMDBG, hifiasm-meta, and metaFlye. Mapler first summarises in scatter plots the bins obtained in each sample (Supp. Fig. 1), highlighting that the number and quality of bins vary across the datasets. Compared to the Mock community , the number of low quality bins is much higher in the Gut community and Deadwood samples, due to either low completeness or high contamination scores. Mapler then generates, after mapping reads to contigs and bins, plots that highlight a decreasing proportion of reads assembled and binned at each quality level as dataset complexity increases ( Fig. 1B ). More precisely, on the metaMDBG assemblies, 96.9% of reads and 96.1% of bases map to bins of at least medium quality in the Mock community , while in the Gut microbiome these values drop to 49.5% and 47.8%, respectively. Furthermore, Deadwood ’s high diversity and lower sequencing depth result in a lower-quality assembly with only 12.6% of reads and 9.4% of bases aligned with bins of medium quality or higher. Because a significant proportion of reads of the Deadwood sample did not participate in the assembly (26.7% of reads and 52.7% of bases did not align with any contig), we compared the aligned and unaligned reads in this sample. Despite the read length variation in the original sample, assembled and unassembled reads are of similar length (18, 437 and 18, 583 base pairs on average, respectively, see Supp. Fig. 2). Taxonomic assignment of reads with Mapler illustrates that some microbial populations were only detected in unassembled reads, such as several species of Legionella (Supp. Fig. 3). Sequences were also generally assigned with less precision in the unassembled reads (56% of bacteria are assigned at the phylum level in the unassembled reads, compared to 76% in the assembled reads), suggesting that most low-abundant populations remain unknown in databases. As expected, unassembled reads were mostly made up of rare k-mers: nearly all unassembled reads have a median k-mer abundance as low as 1 (Supp. Fig. 4). These results suggest that assemblers and binners used in the metagenome analysis could not improve the results by much, and that a deeper sequencing would rather be needed to enhance the quality of the assembly. We nonetheless compared the Deadwood assemblies performed with different metagenome assembly tools. MetaMDBG outperformed the other assemblers in terms of total captured diversity: 52.7% of bases do not align with any contig, compared to 76.7% for metaFlye and 66.1% for hifiasm-meta (Supp. Fig. 5). Conversely, hifiasm-meta outperformed metaMDBG in term of bases aligned to at least medium-quality bins (14.2% versus 12.6%). We recorded the execution time of the pipeline on the three datasets. For each dataset, we executed the pipeline on a Intel(R) Xeon(R) CPU E5-2670 v3 @ 2.30GHz node, allocating a total of 48 CPUs and 200G of memory. The detailed breakdown of how much memory was allocated to each substep of the pipeline is described in Supplementary Table 1. We performed the evaluations on the three samples separately. For each sample, we evaluated the time it took to evaluate the assembly and binning quality of three assemblers (the assembly and binning was performed separately). The analysis of the Mock community took 2 hours and 4 minutes in wallclock time to run, followed by the Gut microbiome with 3 hours and 46 minutes and, finally, the Deadwood with 5 hours and 3 minutes. 4. Conclusion Mapler is a metagenome evaluation pipeline that allows a thorough examination of assembly and binning, implemented in an easy-to-use and customizable workflow. Mapler is specifically implemented to analyze HiFi long-read datasets that are currently the most suitable to characterize taxonomically rich microbial ecosystems. On top of integrating multiple state-of-the-art evaluation methods, Mapler incorporates new evaluation metrics such as the aligned read and aligned base percentages. When combined with the bin quality information, these metrics provide a way to measure how much of the sample’s original diversity was assembled at each level of quality, and highlight potential assembly issues. In cases where a significant proportion of reads cannot be aligned back to the assembly, comparing the assembled and unassembled reads provides further insight into the reasons why the assembly may not be sufficiently representative of the sample, and whether the contigs are missing key taxa that are only present in the reads. 5. Competing interests No competing interest is declared. 6. Author contributions statement CF, CL, NM and RV conceived the experiments; NM conducted the experiments; NM developed the software; NM, CF, CL and RV tested the software; CF, CL, NM and RV wrote the manuscript; All authors read and approved the manuscript. 7. Acknowledgments This work was supported by the French National Research Agency (ANR) France 2030 PEPR Agroécologie et Numérique MISTIC ANR-22-PEAE-0011. We acknowledge the GenOuest bioinformatics core facility ( https://www.genouest.org ) for providing the computing infrastructure. A CC-BY public copyright license ( https://creativecommons.org/licenses/by/4.0/ ) has been applied by the authors to the present document, in accordance with the grant’s open access conditions. References [1]. ↵ G. Benoit , S. Raguideau , R. James , A. M. Phillippy , R. Chikhi , and C. Quince . High-quality metagenome assembly from long accurate reads with metaMDBG . Nature Biotechnology , 42 ( 9 ): 1378 – 1383 , Sept . 2024 . ISSN 1087-0156, 1546-1696 . doi: 10.1038/s41587-023-01983-6 . OpenUrl CrossRef [2]. ↵ D. Bertrand , J. Shaw , M. Kalathiyappan , A. H. Q. Ng , M. S. Kumar , C. Li , M. Dvornicic , J. P. Soldo , J. Y. Koh , C. Tong , O. T. Ng , T. Barkham , B. Young , K. Marimuthu , K. R. Chng , M. Sikic , and N. Nagarajan . Hybrid metagenomic assembly enables high-resolution analysis of resistance determinants and mobile elements in human microbiomes . Nature Biotechnology , 37 ( 8 ): 937 – 944 , Aug . 2019 . ISSN 1087-0156, 1546-1696 . doi: 10.1038/s41587-019-0191-2 . OpenUrl CrossRef PubMed [3]. ↵ R. M. Bowers , N. C. Kyrpides , R. Stepanauskas , M. Harmon-Smith , D. Doud , T. B. K. Reddy , F. Schulz , J. Jarett , A. R. Rivers , E. A. Eloe-Fadrosh , S. G. Tringe , N. N. Ivanova , A. Copeland , A. Clum , E. D. Becraft , R. R. Malmstrom , B. Birren , M. Podar , P. Bork , G. M. Weinstock , G. M. Garrity , J. A. Dodsworth , S. Yooseph , G. Sutton , F. O. Glöckner , J. A. Gilbert , W. C. Nelson , S. J. Hallam , S. P. Jungbluth , T. J. G. Ettema , S. Tighe , K. T. Konstantinidis , W.-T. Liu , B. J. Baker , T. Rattei , J. A. Eisen , B. Hedlund , K. D. McMahon , N. Fierer , R. Knight , R. Finn , G. Cochrane , I. Karsch-Mizrachi , G. W. Tyson , C. Rinke , A. Lapidus , F. Meyer , P. Yilmaz , D. H. Parks , A. Murat Eren , L. Schriml , J. F. Banfield , P. Hugenholtz , and T. Woyke . Minimum information about a single amplified genome (MISAG) and a metagenome-assembled genome (MIMAG) of bacteria and archaea . Nature Biotechnology , 35 ( 8 ): 725 – 731 , Aug . 2017 . ISSN 1087-0156, 1546-1696 . doi: 10.1038/nbt.3893 . OpenUrl CrossRef PubMed [4]. ↵ K. Cerk , P. Ugalde-Salas , C. G. Nedjad , M. Lecomte , C. Muller , D. J. Sherman , F. Hildebrand , S. Labarthe , and C. Frioux . Community-scale models of microbiomes: Articulating metabolic modelling and metagenome sequencing . Microbial Biotechnology , 17 ( 1 ): e14396 , 2024 . ISSN 1751-7915 . doi: 10.1111/1751-7915.14396 . OpenUrl CrossRef [5]. ↵ P.-A. Chaumeil , A. J. Mussig , P. Hugenholtz , and D. H. Parks . GTDB-Tk v2: memory friendly classification with the genome taxonomy database . Bioinformatics , 38 ( 23 ): 5315 – 5316 , Nov . 2022 . ISSN 1367-4803, 1367-4811 . doi: 10.1093/bioinformatics/btac672 . OpenUrl CrossRef PubMed [6]. ↵ A. Chklovski , D. H. Parks , B. J. Woodcroft , and G. W. Tyson . CheckM2: a rapid, scalable and accurate tool for assessing microbial genome quality using machine learning . Nature Methods , 20 ( 8 ): 1203 – 1212 , Aug . 2023 . ISSN 1548-7091, 1548-7105 . doi: 10.1038/s41592-023-01940-w . OpenUrl CrossRef PubMed [7]. ↵ X. Feng and H. Li . Evaluating and improving the representation of bacterial contents in long-read metagenome assemblies . Genome Biology , 25 ( 1 ): 92 , Apr . 2024 . ISSN 1474-760X . doi: 10.1186/s13059-024-03234-6 . OpenUrl CrossRef PubMed [8]. ↵ X. Feng , H. Cheng , D. Portik , and H. Li . Metagenome assembly of high-fidelity long reads with hifiasm-meta . Nature Methods , 19 ( 6 ): 671 – 674 , June 2022 . ISSN 1548-7091, 1548-7105 . doi: 10.1038/s41592-022-01478-3 . OpenUrl CrossRef PubMed [9]. ↵ D. D. Kang , F. Li , E. Kirton , A. Thomas , R. Egan , H. An , and Z. Wang . MetaBAT 2: an adaptive binning algorithm for robust and efficient genome reconstruction from metagenome assemblies . PeerJ , 7 : e7359 , July 2019 . ISSN 2167-8359 . doi: 10.7717/peerj.7359 . OpenUrl CrossRef PubMed [10]. ↵ M. Kolmogorov , D. M. Bickhart , B. Behsaz , A. Gurevich , M. Rayko , S. B. Shin , K. Kuhn , J. Yuan , E. Polevikov , T. P. L. Smith , and P. A. Pevzner . metaFlye: scalable long-read metagenome assembly using repeat graphs . Nature Methods , 17 ( 11 ): 1103 – 1110 , Nov . 2020 . ISSN 1548-7091, 1548-7105 . doi: 10.1038/s41592-020-00971-x . OpenUrl CrossRef PubMed [11]. ↵ H. Li . Minimap2: pairwise alignment for nucleotide sequences . Bioinformatics , 34 ( 18 ): 3094 – 3100 , Sept . 2018 . ISSN 1367-4803, 1367-4811 . doi: 10.1093/bioinformatics/bty191 . OpenUrl CrossRef PubMed [12]. ↵ D. Mapleson , G. Garcia Accinelli , G. Kettleborough , J. Wright , and B. J. Clavijo . KAT: a K-mer analysis toolkit to quality control NGS datasets and genome assemblies . Bioinformatics , 33 ( 4 ): 574 – 576 , Feb . 2017 . ISSN 1367-4803, 1367-4811 . doi: 10.1093/bioinformatics/btw663 . OpenUrl CrossRef PubMed [13]. ↵ A. Mikheenko , V. Saveliev , and A. Gurevich . MetaQUAST: evaluation of metagenome assemblies . Bioinformatics , 32 ( 7 ): 1088 – 1090 , Apr . 2016 . ISSN 1367-4811, 1367-4803 . doi: 10.1093/bioinformatics/btv697 . OpenUrl CrossRef PubMed [14]. ↵ F. Mölder , K. P. Jablonski , B. Letcher , M. B. Hall , C. H. Tomkins-Tinch , V. Sochat , J. Forster , S. Lee , S. O. Twardziok , A. Kanitz , A. Wilm , M. Holtgrewe , S. Rahmann , S. Nahnsen , and J. Köster . Sustainable data analysis with Snakemake . F1000Research , 10 : 33 , 2021 . doi: 10.12688/f1000research.29032.2 . OpenUrl CrossRef PubMed [15]. ↵ B. D. Ondov , N. H. Bergman , and A. M. Phillippy . Interactive metagenomic visualization in a Web browser . BMC Bioinformatics , 12 ( 1 ): 385 , Dec . 2011 . ISSN 1471-2105 . doi: 10.1186/1471-2105-12-385 . OpenUrl CrossRef PubMed [16]. ↵ D. M. Portik , X. Feng , G. Benoit , D. J. Nasko , B. Auch , S. J. Bryson , R. Cano , M. Carlin , A. Damerum , B. Farthing , J. R. Grove , M. Islam , K. W. Langford , I. Liachko , K. Locken , H. Mangelson , S. Tang , S. Zhang , C. Quince , and J. E. Wilkinson . Highly accurate metagenome-assembled genomes from human gut microbiota using long-read assembly, binning, and consolidation methods , May 2024 . [17]. ↵ E. Richy , P. Thiago Dobbler , V. Tláskal , R. López-Mondéjar , P. Baldrian , and M. Kyselková . Long-read sequencing sheds light on key bacteria contributing to deadwood decomposition processes . Environmental Microbiome , 19 ( 1 ): 99 , Dec . 2024 . ISSN 2524-6372 . doi: 10.1186/s40793-024-00639-5 . OpenUrl CrossRef PubMed [18]. ↵ L. F. W. Roesch , R. R. Fulthorpe , A. Riva , G. Casella , A. K. M. Hadwin , A. D. Kent , S. H. Daroub , F. A. O. Camargo , W. G. Farmerie , and E. W. Triplett . Pyrosequencing enumerates and contrasts soil microbial diversity . The ISME Journal , 1 ( 4 ): 283 – 290 , Aug . 2007 . ISSN 1751-7362, 1751-7370 . doi: 10.1038/ismej.2007.53 . OpenUrl CrossRef PubMed Web of Science [19]. ↵ G. W. Tyson , J. Chapman , P. Hugenholtz , E. E. Allen , R. J. Ram , P. M. Richardson , V. V. Solovyev , E. M. Rubin , D. S. Rokhsar , and J. F. Banfield . Community structure and metabolism through reconstruction of microbial genomes from the environment . Nature , 428 ( 6978 ): 37 – 43 , Mar . 2004 . ISSN 0028-0836, 1476-4687 . doi: 10.1038/nature02340 . OpenUrl CrossRef PubMed Web of Science [20]. ↵ D. E. Wood , J. Lu , and B. Langmead . Improved metagenomic analysis with Kraken 2 . Genome Biology , 20 ( 1 ): 257 , Nov . 2019 . ISSN 1474-760X . doi: 10.1186/s13059-019-1891-0 . OpenUrl CrossRef PubMed [21]. ↵ L. Xu , Z. Dong , D. Chiniquy , G. Pierroz , S. Deng , C. Gao , S. Diamond , T. Simmons , H. M.-L. Wipf , D. Caddell , N. Varoquaux , M. A. Madera , R. Hutmacher , A. Deutschbauer , J. A. Dahlberg , M. L. Guerinot , E. Purdom , J. F. Banfield , J. W. Taylor , P. G. Lemaux , and D. Coleman-Derr . Genome-resolved metagenomics reveals role of iron metabolism in drought-induced rhizosphere microbiome dynamics . Nature Communications , 12 ( 1 ): 3209 , 2021 . doi: 10.1038/s41467-021-23553-7 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted March 13, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Mapler: Assessing assembly quality in taxonomically rich metagenomes sequenced with HiFi reads Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Mapler: Assessing assembly quality in taxonomically rich metagenomes sequenced with HiFi reads Nicolas Maurice , Claire Lemaitre , Riccardo Vicedomini , Clémence Frioux bioRxiv 2025.03.10.641994; doi: https://doi.org/10.1101/2025.03.10.641994 Share This Article: Copy Citation Tools Mapler: Assessing assembly quality in taxonomically rich metagenomes sequenced with HiFi reads Nicolas Maurice , Claire Lemaitre , Riccardo Vicedomini , Clémence Frioux bioRxiv 2025.03.10.641994; doi: https://doi.org/10.1101/2025.03.10.641994 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7616) Biochemistry (17625) Bioengineering (13852) Bioinformatics (41825) Biophysics (21397) Cancer Biology (18524) Cell Biology (25417) Clinical Trials (138) Developmental Biology (13350) Ecology (19858) Epidemiology (2067) Evolutionary Biology (24277) Genetics (15581) Genomics (22459) Immunology (17698) Microbiology (40278) Molecular Biology (17134) Neuroscience (88400) Paleontology (666) Pathology (2823) Pharmacology and Toxicology (4812) Physiology (7632) Plant Biology (15106) Scientific Communication and Education (2042) Synthetic Biology (4281) Systems Biology (9807) Zoology (2266)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.