Metagenomic classification of ancient viruses

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 26,348 characters · extracted from preprint-html · click to expand
Metagenomic classification of ancient viruses | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Metagenomic classification of ancient viruses View ORCID Profile Luís L. Marques , View ORCID Profile Armando J. Pinho , View ORCID Profile Diogo Pratas doi: https://doi.org/10.1101/2025.11.07.687203 Luís L. Marques 1 IEETA/LASI - Institute of Electronics and Informatics Engineering of Aveiro, and University of Aveiro , 3810-193 Aveiro, Portugal 2 DETI - Department of Electronics, Telecommunications and Informatics, University of Aveiro , 3810-193 Aveiro, Portugal Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Luís L. Marques For correspondence: lclm{at}ua.pt diogo.pratas{at}helsinki.fi Armando J. Pinho 1 IEETA/LASI - Institute of Electronics and Informatics Engineering of Aveiro, and University of Aveiro , 3810-193 Aveiro, Portugal 2 DETI - Department of Electronics, Telecommunications and Informatics, University of Aveiro , 3810-193 Aveiro, Portugal Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Armando J. Pinho Diogo Pratas 1 IEETA/LASI - Institute of Electronics and Informatics Engineering of Aveiro, and University of Aveiro , 3810-193 Aveiro, Portugal 2 DETI - Department of Electronics, Telecommunications and Informatics, University of Aveiro , 3810-193 Aveiro, Portugal 3 DoV - Department of Virology, University of Helsinki , 00014 Helsinki, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Diogo Pratas For correspondence: lclm{at}ua.pt diogo.pratas{at}helsinki.fi Abstract Full Text Info/History Metrics Supplementary material Preview PDF A bstract Ancient DNA (aDNA) sequences present unique challenges for taxonomic classification due to extreme fragmentation (reads 20-100 bp), end-biased cytosine deamination, and high contamination rates. Conventional metagenomic classifiers based on exact k -mer matching or alignment lose discriminative power on such short and damaged reads, limiting the analysis of paleogenomic samples. We present FALCON2, a compression-based metagenomic classifier that leverages position-aware finite-context models to maintain high accuracy on degraded viral ancient viruses. FALCON2 consolidates the capabilities of its predecessor, FALCON-meta, into a unified executable with enhanced features including model persistence, direct processing of compressed inputs, multiple file handling, and optional pre-filtering methodologies for contaminated samples. Under controlled benchmarking with database, taxonomy, and thread parity on simulated viral datasets, FALCON2 achieved an Area Under the Curve of Receiver Operating Characteristic (AUC-ROC) of 0.999, an Area Under Precision-Recall Curve (AUPRC) of 0.968, and an F 1 -score of 0.918, substantially outperforming Centrifuge (AUPRC = 0.625), Kraken2 (AUPRC = 0.184), and CLARK-S (AUPRC = 0.013) on pooled micro-averaged metrics. FALCON2’s advantage is most pronounced on ultra-short reads (20-40 bp), where exact k -mers become sparse. FALCON2 pre-filtering at threshold 0.7 improved precision by 10 percentage points with negligible recall loss. FALCON2 runs on systems with 4-8 GB RAM for typical analyses. FALCON2 is freely available at https://github.com/cobilab/FALCON2 under GPL v3 license. 1 Introduction Ancient DNA (aDNA) recovered from archaeologic, paleontologic and historical specimens yields key insights into extinct species, ancient microbiomes and the evolutionary processes that shaped them [ 13 , 3 ]. However, aDNA sequences are typically characterized by extreme degradation: fragments are generally 20–100 bp in length, exhibit elevated cytosine deamination rates (C → T and G → A substitutions at read termini) and are often contaminated by modern and environmental DNA [ 5 , 1 , 18 ]. These properties compromise the reliability of conventional metagenomic classifiers, which rely on exact k -mer matches or long alignment anchors. In current practice, taxonomic classification of shotgun metagenomes follows two main paradigms: (i) exact or reduced k -mer methods with Lowest Common Ancestor (LCA) post-processing (e.g., Kraken2, Centrifuge, CLARK) [ 20 , 7 , 9 ]; and (ii) alignment-based LCA pipelines, such as MALT used in conjunction with MEGAN, typically relying on FM-index/BWT aligners [ 4 ]. These pipelines are commonly paired with aDNA-aware mapping and processing frameworks that mitigate short, damaged reads [ 17 , 16 ]. These approaches perform strongly on long, low-error reads but lose discriminative power as fragments shorten and post-mortem damage increases, precisely the regime characteristic of aDNA, because exact k -mers and long alignment anchors become sparse. Marker-gene workflows (e.g., 16S/18S/ITS) and downstream functional imputation are powerful but target specific loci, making them less suitable for shotgun aDNA where fragmentation and damage are pervasive [ 2 , 8 ]. Compression-based methods, including FALCON-meta, avoid the need for long exact substrings by comparing relative compressibility between reads and references, retaining signal on ultra-short, damaged fragments [ 12 ]. FALCON-meta [ 12 ] introduced a compression-based approach to taxonomic classification, using Finite-Context Models (FCMs) and Normalized Relative Similarity (NRS) to quantify sequence relationships without requiring exact substring matches. This approach demonstrated robustness to short and damaged reads, but was implemented as a fragmented codebase of multiple scripts, lacked model reuse capabilities, and did not address contamination filtering. We present FALCON2, the successor to FALCON-meta, engineered as a unified, production-ready tool for ancient viral metagenomics. FALCON2 incorporates finite-context models, model persistence via serialization, native handling of compressed inputs (FASTA/FASTQ, gzip), multiple file processing, and optional integration of a lightweight compression-based pre-filter for removing contaminant reads. Benchmarking on synthetic viral datasets with factorial combinations of read length (20-100 bp), deamination rate (0-30%), and sequencing depth (1-60 × ) shows that FALCON2 outperforms established nucleotide-based classifiers on short and damaged fragments while maintaining practical computational requirements. 2 Methods FALCON2 classifies metagenomic reads by computing NRS scores between query sequences and reference genomes [ 12 ]. For each query sequence x , FALCON2 trains multiple FCMs on a reference sequence y , then encodes x using these reference-derived (frozen) models to obtain the compressed length C ( x || y ). The method uses a cooperative mixture of multi-order FCMs and substitution-tolerant Markov models, which estimate symbol probabilities based on preceding context and allow for a degree of mismatch tolerance. The efficiency of relative compression depends on how well the reference-trained model collects and organizes information so that questions about the target can be answered with as few bits as possible. This relative compression setup implies that the compressor cannot exploit intra-target redundancies, only information learned from the reference can reduce code length. Intuitively, if the reference describes the target well, the required relative information is small; if the target is not related, the encoder approaches the maximum number of bits. Formally, the Relative Similarity (RS) is defined as and the normalized version as where | x | is the size of the sequence and Θ is the size of the alphabet (4 for DNA). In read-level classification, FALCON2 assigns x to the reference from the database that produces the highest NRS. Additional details can be found at Supplementary Section 1. For composition profiling (the meta subcommand), we instead treat the sample as a bag/concatenation of reads X and compute NRS( y i || X ) for each reference y i in the database ( y 1 , y 2 , …), thus quantifying how well the sample explains each reference and allowing global composition estimates [ 12 , 11 , 10 ]. For efficiency, the highest-scoring references (top- K by NRS) are cached and reused across computations. 2.1 Unified executable FALCON-meta exposed functionality through several binaries: for global composition (FALCON), local-similarity filtering and visualization (FALCON-filter, FALCON-filter-visual), and inter-reference similarity (FALCON-inter, FALCON-inter-visual) [ 12 , 11 , 10 ]. FALCON2 consolidates these into a single production-ready executable that presents equivalent capabilities as subcommands: meta for composition, filter / fvisual for segmentation and visualization of local profiles, and inter / ivisual for computing and rendering genome–genome similarity. The interface adds practical conventions, native streaming of gzip-compressed FASTA/ FASTQ and colon-separated multi-file tokens for paired reads or multi-FASTA references, as well as model persistence and stricter reproducibility guaranties. In routine use, composition runs as FALCON2 meta [options] READS_GROUP DB_GROUP , where each positional argument is a single colon-separated token (e.g., R1.fq.gz:R2.fq.gz for paired-end reads and ref1.fa:ref2.fa for multiple references). Inputs may be gzip compressed and are streamed directly, avoiding prior decompression. When local profiling is required, meta emits a profile in-process (e.g., with -Z -y profile.tsv ), which is then segmented by filter and rendered by fvisual without format conversion. For database inspection and quality control, inter computes a genome-by-genome similarity matrix from the same multi-FASTA reference set, and ivisual produces a publication-ready heat map. Per-command help follows standard conventions and is available via FALCON2 -h . FALCON2 operates directly on data from sequencers, independently of coverage, and accepts both assembled references and non-assembled read sets. Although FALCON2 is designed for ancient viral metagenomics, it scales to large organismal databases (viral, bacterial, archaeal, fungal) as well as custom collections. 2.2 Model persistence and reuse Trained FCMs can be serialized to .fcm files and reloaded in subsequent runs, enabling computational reuse in multi-sample analyses or when the reference database remains constant. Model persistence is implemented via the -S (save) and -L (load) flags, combined with -M to specify the model path. This functionality reduces steady-state inference time and ensures reproducibility across runs. For reproducibility, -T enables train-only runs and -I prints model metadata (tool version, reference snapshot hash, key parameters). Reloading ( -L -M ) enforces basic compatibility checks so that stale or mismatched models are rejected. 2.3 Pre-filtering FALCON2 optionally integrates a compression-based pre-filter for contaminated samples. It computes approximate similarity scores between reads and a contaminant library (e.g., E. coli , human), retaining only reads with similarity below a configurable threshold τ [ 19 ]. Reads exceeding τ are excluded before FALCON2 classification, reducing computational load and improving precision. The pre-filter is activated with -mg , with threshold controlled by -mt (default 0.9; recommended 0.6–0.7 for aDNA). 2.4 Output and parallelization Outputs are tabular files reporting NRS scores and taxonomic assignments for each read. Multi-threading is controlled via the -n parameter (default: all available cores). Internally, FALCON2 uses cache-aware hashing to memoize local probabilities and maintains a top- K cache of the highest NRS values across passes for speed. The compression depth parameter -l controls model fidelity; -l 47 was used in all benchmarking to maximize discrimination on short reads. The tool is freely available, under the GPLv3 license, at https://github.com/cobilab/FALCON2 . 3 Benchmark 3.1 Experimental design We benchmarked FALCON2 against Centrifuge [ 7 ], Kraken2 [ 20 ] and CLARK [ 9 ] under strict parity conditions (details in Supplementary Section 2). All tools used identical reference databases (NCBI RefSeq-Viruses plus E. coli K-12 and human mitochondrial DNA as contaminants), as well as the same NCBI taxonomy snapshot, and fixed thread counts (n=8). Synthetic datasets were generated via Gargammel [ 14 ] (for aDNA fragmentation and deamination) and ART [ 6 ] (for sequencing errors), spanning combinations of read length (20, 40, 60, 80, 100 bp), deamination rate (0.0, 0.1, 0.2, 0.3), and sequencing depth (1, 5, 10, 20, 40, 60 × ). Ground truth was extracted from simulation metadata, and species-level precision, recall, F 1 -score, AUPRC, and AUC-ROC were computed. AUPRC was prioritized over AUC-ROC due to class imbalance [ 15 ]. 3.2 Results Table 1 summarizes pooled micro-averaged performance across all experimental conditions. Details are in Supple-mentary Section 3-9. FALCON2 achieved the highest AUPRC (0.968), F 1 -score (0.918), and AUC-ROC (0.999), substantially outperforming Centrifuge (AUPRC = 0.625, F 1 = 0.738), Kraken2 (AUPRC = 0.184, F 1 = 0.372), and CLARK (AUPRC = 0.013, F 1 = 0.103). The advantage was most pronounced at read length 20 bp, where FALCON2’s AUPRC exceeded Centrifuge by 0.34 and Kraken2 by 0.75 ( Figure 1 ). At read length 100 bp with low deamination (0.0), performance differences narrowed (ΔAUPRC ≈ 0.06), consistent with the hypothesis that k -mer methods reassert efficiency on long, intact reads. View this table: View inline View popup Download powerpoint Table 1: Pooled micro-averaged performance metrics across all experimental conditions. Best values in bold. Time (Wall) is in minutes and RAM (peak) in in GigaBytes. Download figure Open in new tab Figure 1. AUPRC across depth, read length, and deamination with contamination applied. A) AUPRC before trimming and B) AUPRC after trimming. Pre-filtering at threshold 0.7 increased precision from 0.85 to 0.95 while recall declined minimally from 0.90 to 0.87. The retained fraction k ( τ ) decreased from 0.70 to 0.30, indicating 70% of reads were filtered. An equivalence test confirmed that disabling filtering ( τ = 1.0) produced byte-identical outputs, validating orchestration integrity. In summary, FALCON2’s compression-based approach maintains discriminative capacity on short and damaged aDNA reads where exact k -mer methods degrade. The integration of position-aware models, model persistence, and pre-filtering provides a robust, production-ready tool for ancient DNA metagenomics. The benchmarking framework enforces strict parity conditions and is fully reproducible via the archived scripts and data ( https://doi.org/10.5281/zenodo.17291215 ). 3.3 Computational resources FALCON2 exhibited higher runtime (median 0.88 min per sample) and memory usage (median 1.90 GB) than Kraken2 (0.04 min, 0.45 GB) and Centrifuge (0.20 min, 0.07 GB), reflecting the computational cost of FCM. However, absolute times remain practical for typical metagenomic workflows. On-disk footprint for the viral reference database was 75 MB (FALCON2), 45 MB (Centrifuge), and 255 MB (Kraken2). When using model persistence, build costs shift to a one-time training phase, and steady-state inference accelerates. 4 Conclusions FALCON2 advances ancient viral metagenomic classification with compression-based models robust to fragmentation and deamination. Under controlled benchmarking, FALCON2 achieved superior AUPRC, F 1 , and AUC-ROC compared to established classifiers, with the largest advantages on ultra-short reads (20-40 bp). The tool’s unified architecture, model reuse, and contamination filtering capabilities establish FALCON2 as an open-source solution for ancient viral metagenomic analysis. References [1]. ↵ A. W. Briggs , U. Stenzel , P. L. F. Johnson , R. E. Green , J. Kelso , S. Pääbo , M. Stiller , and M. Meyer . Patterns of damage in genomic DNA sequences from a Neandertal . Proceedings of the National Academy of Sciences , 104 ( 37 ): 14616 – 14621 , 2007 . OpenUrl Abstract / FREE Full Text [2]. ↵ J. G. Caporaso , J. Kuczynski , J. Stombaugh , K. Bittinger , F. D. Bushman , E. K. Costello , N. Fierer , A. G. Pèques , J. K. Goodrich , J. I. Gordon , G. A. Huttley , S. T. Kelley , D. Knights , J. E. Koenig , R. E. Ley , C. A. Lozupone , D. McDonald , B. D. Muegge , M. Pirrung , J. Reeder , J. R. Sevinsky , P. J. Turnbaugh , W. A. Walters , J. Widmann , T. Yatsunenko , J. Zaneveld , and R. Knight . QIIME allows analysis of high-throughput community sequencing data . Nature Methods , 7 ( 5 ): 335 – 336 , 2010 . OpenUrl PubMed [3]. ↵ J. Enk , A. Devault , C. Widga , J. Saunders , P. Szpak , J. Southon , J.-M. Rouillard , B. Shapiro , G. B. Golding , G. Zazula , et al. Complete Columbian mammoth mitogenome suggests interbreeding with woolly mammoths . Genome Biology , 12 ( 5 ): R51 , 2011 . OpenUrl CrossRef PubMed [4]. ↵ A. Herbig , F. Maixner , K. I. Bos , A. Zink , J. Krause , and D. H. Huson . MALT: Fast alignment and analysis of metagenomic dna sequence data applied to the tyrolean iceman . bioRxiv , page 050559 , 2017 . [5]. ↵ M. Hofreiter , V. Jaenicke , D. Serre , A. von Haeseler , and S. Pääbo . DNA sequences from multiple amplifications reveal artifacts induced by cytosine deamination in ancient DNA . Nucleic Acids Research , 29 ( 23 ): 4793 – 4799 , 2001 . OpenUrl CrossRef PubMed Web of Science [6]. ↵ W. Huang , L. Li , J. R. Myers , and G. T. Marth . Art: a next-generation sequencing read simulator . Bioinformatics , 28 ( 4 ): 593 – 594 , 12 2011 . OpenUrl PubMed Web of Science [7]. ↵ D. Kim , L. Song , F. P. Breitwieser , and S. L. Salzberg . Centrifuge: rapid and sensitive classification of metagenomic sequences . Genome Research , 26 ( 12 ): 1721 – 1729 , 2016 . OpenUrl Abstract / FREE Full Text [8]. ↵ M. G. I. Langille , J. Zaneveld , J. G. Caporaso , D. McDonald , D. Knights , J. A. Reyes , J. C. Clemente , D. E. Burkepile , R. L. Vega Thurber , R. Knight , R. G. Beiko , and C. Huttenhower . Predictive functional profiling of microbial communities using 16S rRNA marker gene sequences . Nature Biotechnology , 31 ( 9 ): 814 – 821 , 2013 . OpenUrl CrossRef PubMed [9]. ↵ R. Ounit , S. Wanamaker , T. J. Close , and S. Lonardi . CLARK: fast and accurate classification of metagenomic and genomic sequences using discriminative k-mers . BMC Genomics , 16 : 236 , 2015 . [10]. ↵ D. Pratas , M. Hosseini , G. Grilo , A. J. Pinho , R. M. Silva , T. Caetano , J. Carneiro , and F. Pereira . Metagenomic composition analysis of an ancient sequenced polar bear jawbone from Svalbard . Genes , 9 ( 9 ): 445 , 2018 . OpenUrl CrossRef [11]. ↵ D. Pratas and A. J. Pinho . Metagenomic composition analysis of sedimentary ancient DNA from the Isle of Wight . In 2018 26th european signal processing conference (EUSIPCO) , Pages 1177 – 1181 . IEEE , 2018 . [12]. ↵ D. Pratas , A. J. Pinho , R. M. Silva , J. M. Rodrigues , M. Hosseini , T. Caetano , and P. J. Ferreira . Falcon-meta: A method to infer metagenomic composition of ancient DNA . BioRxiv , page 267179 , 2018 . [13]. ↵ D. Reich , R. E. Green , M. Kircher , J. Krause , N. Patterson , E. Y. Durand , B. Viola , A. W. Briggs , U. Stenzel , P. L. F. Johnson , et al. Genetic history of an archaic hominin group from Denisova Cave in Siberia . Nature , 468 ( 7327 ): 1053 – 1060 , 2010 . OpenUrl CrossRef GeoRef PubMed Web of Science [14]. ↵ G. Renaud , K. Hanghøj , E. Willerslev , and L. Orlando . gargammel: a sequence simulator for ancient DNA . Bioinformatics , 33 ( 4 ): 577 – 579 , 2017 . OpenUrl CrossRef PubMed [15]. ↵ T. Saito and M. Rehmsmeier . The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets . PLOS ONE , 10 ( 3 ): e0118432 , 2015 . OpenUrl CrossRef PubMed [16]. ↵ M. Schubert , L. Ermini , C. Der Sarkissian , S. Översti , H. Ghazal , N. Wales , A. Albrechtsen , and L. Orlando . Characterization of ancient and modern genomes by SNP detection and phylogenomic and metagenomic analysis using PALEOMIX . Nature Protocols , 9 ( 5 ): 1056 – 1082 , 2014 . OpenUrl PubMed [17]. ↵ M. Schubert , A. Ginolhac , S. Lindgreen , J. F. Thompson , K. A. S. Al-Rasheid , E. Willerslev , A. Krogh , and L. Orlando . Improving ancient DNA read mapping against modern reference genomes . BMC Genomics , 13 : 178 , 2012 . [18]. ↵ P. Skoglund , B. H. Northoff , M. V. Shunkov , A. P. Derevianko , S. Pääbo , J. Krause , and M. Jakobsson . Separating endogenous ancient dna from modern day contamination in a siberian neandertal . Proceedings of the National Academy of Sciences , 111 ( 6 ): 2229 – 2234 , 2014 . OpenUrl Abstract / FREE Full Text [19]. ↵ M. J. P. Sousa , A. J. Pinho , and D. Pratas . A sensitive compression-based method for filtering targeted fastq sequencing reads . pages 1561 – 1565 , 2024 . [20]. ↵ D. E. Wood , J. Lu , and B. Langmead . Improved metagenomic analysis with Kraken 2 . Genome Biology , 20 ( 1 ): 257 , 2019 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted November 07, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Metagenomic classification of ancient viruses Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Metagenomic classification of ancient viruses Luís L. Marques , Armando J. Pinho , Diogo Pratas bioRxiv 2025.11.07.687203; doi: https://doi.org/10.1101/2025.11.07.687203 Share This Article: Copy Citation Tools Metagenomic classification of ancient viruses Luís L. Marques , Armando J. Pinho , Diogo Pratas bioRxiv 2025.11.07.687203; doi: https://doi.org/10.1101/2025.11.07.687203 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41910) Biophysics (21436) Cancer Biology (18576) Cell Biology (25480) Clinical Trials (138) Developmental Biology (13368) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15598) Genomics (22482) Immunology (17726) Microbiology (40360) Molecular Biology (17163) Neuroscience (88534) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Outcome instruments

NRS-pain

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00