Full text
51,405 characters
· extracted from
preprint-html
· click to expand
Metagenomic sequencing identifies potential respiratory pathogens in PCR-negative subset of surveillance samples | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Metagenomic sequencing identifies potential respiratory pathogens in PCR-negative subset of surveillance samples View ORCID Profile Anne Caroline Mascarenhas , View ORCID Profile Rose S. Kantor , View ORCID Profile James Thissen , View ORCID Profile Car Reen Kok , View ORCID Profile Monica Borucki , Christina Morales , Sharon Messenger , View ORCID Profile Crystal Jaing , View ORCID Profile Debra A. Wadford doi: https://doi.org/10.1101/2025.09.26.25336420 Anne Caroline Mascarenhas 1 Physical and Life Sciences Division, Lawerence Livermore National Laboratory , Livermore, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Anne Caroline Mascarenhas Rose S. Kantor 1 Physical and Life Sciences Division, Lawerence Livermore National Laboratory , Livermore, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rose S. Kantor James Thissen 1 Physical and Life Sciences Division, Lawerence Livermore National Laboratory , Livermore, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for James Thissen Car Reen Kok 1 Physical and Life Sciences Division, Lawerence Livermore National Laboratory , Livermore, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Car Reen Kok Monica Borucki 1 Physical and Life Sciences Division, Lawerence Livermore National Laboratory , Livermore, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Monica Borucki Christina Morales 2 Viral and Rickettsial Disease Laboratory, Center for Laboratory Sciences, California Department of Public Health , Richmond, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sharon Messenger 2 Viral and Rickettsial Disease Laboratory, Center for Laboratory Sciences, California Department of Public Health , Richmond, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Crystal Jaing 1 Physical and Life Sciences Division, Lawerence Livermore National Laboratory , Livermore, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Crystal Jaing For correspondence: jaing2{at}llnl.gov Debra A. Wadford 2 Viral and Rickettsial Disease Laboratory, Center for Laboratory Sciences, California Department of Public Health , Richmond, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Debra A. Wadford Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Respiratory pathogens are a significant source of global morbidity, mortality, and economic burden, with the COVID-19 pandemic driving increased interest in and funding for respiratory disease surveillance. Syndromic panel multiplex nucleic acid amplification tests (NAATs) such as the BioFire Respiratory Panel (RP) are designed to identify the most common etiologic agents of respiratory illness. Untargeted metagenomic sequencing is a powerful tool for pathogen-agnostic detection, enabling the recovery of complete genomes for genomic epidemiology and variant tracking. In this study, we performed untargeted metagenomic sequencing of 305 samples previously negative by BioFire RP and SARS-CoV-2 testing and 26 samples that were previously positive by either of the diagnostic tests. A subset of 78 samples underwent probe-capture enrichment sequencing targeting human viruses. Using these methods, we identified human respiratory viruses in 16 of the 305 previously negative samples (5%). The most common viruses identified were Influenza C virus, Human Bocavirus, Rhinovirus A and C, and SARS-CoV-2. Consensus genomes were recovered for 14 viruses with >90% coverage breadth, revealing closely related Bocavirus strains from neighboring counties and distinct Rhinovirus strains across samples. We also identified 21 samples with a single predominant bacterial or fungal species in the previous negative cohort. These findings underscore the challenges of identifying causal agents from multiplex NAAT-negative cases and highlight the utility of metagenomics for expanding the scope of pathogen surveillance. 1. Introduction Respiratory pathogens cause significant morbidity, mortality, and economic burden with an estimated 12.8 billion new upper respiratory infections globally in the year 2021 1 . The COVID-19 pandemic led to increased interest in and funding for the surveillance of respiratory diseases. This surveillance has primarily been conducted via qPCR and PCR-based panels for clinical specimens, with a focus on SARS-CoV-2, influenza A virus, and respiratory syncytial virus (RSV) 2 - 4 . Wastewater-based surveillance has also provided a partial view of circulating pathogens, although some respiratory pathogens may not be shed into wastewater at detectable concentrations 5 - 7 . 3 While clinical surveillance increased during and after the pandemic, the range of targets has generally been limited to commonly occurring pathogens whose genomes are conserved enough to be detectable by established PCR methods. Hence, rarer or more divergent pathogens may have been missed. Separately, use of pathogen-agnostic untargeted metagenomic and metatranscriptomic sequencing has increased for characterization of respiratory infections in clinical samples 8 - 10 . Recently, several groups have developed validated clinical metagenomic workflows for respiratory viruses 11 , 12 . Additionally, commercial probe-capture sequencing panels for broad-range viral enrichment, including respiratory viruses, have recently been shown to increase the sensitivity of sequencing 13 , 14 . Clinical metagenomic sequencing may even produce complete pathogen genomes, which, as demonstrated for SARS-CoV-2, can be useful for variant tracking, for genomic epidemiology and for predicting vaccine and treatment efficacy 15 . Yet despite its potential utility, metagenomics has not been widely applied in surveillance beyond SARS-CoV-2 due to its relatively high cost. During the COVID-19 pandemic, the California SARS-CoV-2 and Respiratory Virus Surveillance System (CalSRVSS) was established by the California Department of Public Health (CDPH) to provide outpatient testing of symptomatic individuals in 10 counties across California 16 . From 2020-2023, this program collected 15,325 samples from symptomatic individuals, which were tested for SARS-CoV-2 and 22 additional pathogens included on the BioFire Respiratory Pathogens panel. Of these, 55% were negative for all 22 BioFire pathogen analytes (unpublished data). These BioFire- and SARS-CoV-2 negative samples provided a unique opportunity to examine whether less commonly recognized pathogens may have been associated with the respiratory symptoms observed from these cases. We randomly selected 305 previously negative samples and 26 previously positive samples from the CalSRVSS sample set for untargeted sequencing and analyzed a subset of 78 samples using semi-targeted metagenomics. Here, we report on the frequency of potential pathogens detected. 2. Methods Sample collection De-identified residual nasopharyngeal and other swab eluate samples (n=305, identifiers “N#”) from adult and pediatric patients with upper respiratory tract infection symptoms were obtained from the California Department of Public Health (CDPH) as part of the California SARS-CoV-2 and Respiratory Virus Surveillance System (CalSRVSS) initiative ( Table S1 ) 16 . These samples were collected during outpatient visits between July 2020 to January 2022 in 10 counties in California and were negative for SARS-CoV-2 and for 18 viruses and 4 bacteria included in the BioFire Respiratory Panel 2.0 or 2.1 ( https://www.biofiredx.com ), which covers pathogens commonly associated with upper respiratory tract infections 17 . A set of known positive swabs including SARS-CoV-2 positive samples from individuals previously infected with SARS-CoV-2 (“reinfections”, n=7) and BioFire positive samples (n=19) were included for comparison (identifiers “P#”; Table S1 ). Samples were received in PrimeStore Molecular Transport Medium (ThermoFisher Scientific, Waltham, MA). The sample IDs do not reflect patient identifiers and were solely used for sample tracking purposes to protect the patient ID. The CalSRVSS project was reviewed by the State of California Health and Human Services Agency Committee for the Protection of Human Subjects and determined to be considered Not Research or Exempt (Project # 2021-168). Total nucleic acid extraction Total nucleic acid (DNA and RNA) was extracted from 500 μL of each eluate sample as follows. First, cells were lysed in 100μL ATL Buffer (Qiagen, Redwood City, CA) with bead-beating for 40s at 6 m/s with FastPrep-24 5G equipment using lysing matrix F tubes (MP Biomedicals, Burlingame, CA). Total nucleic acid was extracted from 200 μL of the resulting lysate using the IndiSpin Pathogen Kit (Indical Bioscience, Leipzig, Germany) following the manufacturer’s protocol with final elution in 55μL AVE Buffer. DNA and RNA were quantified separately using the Qubit High Sensitivity dsDNA and RNA kits, respectively, on a Qubit 3.0 fluorometer (Invitrogen, Carlsbad, CA) using the recommended instructions. cDNA synthesis and whole genome amplification The first strand cDNA synthesis reaction contained 11μL of sample, 1μL of random hexamer primers (50mM; Integrated DNA Technologies, Coralville, IA) and 1μL of dNTP mix (10mM; Thermo Fisher Scientific, Waltham, MA) and was incubated at 65°C for 5 min, and 5 min on ice-water slurry. Synthesis was completed by addition of the remaining reaction components (4 μL of 5x First Strand Synthesis Buffer, 1 μL of 0.1 M DTT, 1 μL of RNAseOUT, 1 μL of SuperScript IV reverse transcriptase and 1 μL of DEPC water (Thermo Fisher Scientific, Waltham, MA) to the cooled samples followed by incubation in a thermocycler (protocol: 23°C for 10 min, 55°C for 60 min, and 80°C for 10 min). A total of 10μL from each DNA/cDNA sample was amplified using the Whole Transcriptome Amplification kit (Qiagen, Redwood City, CA) following the manufacturer’s guidelines. Amplified samples were purified with AMPure XP magnetic beads (Beckman Coulter, Brea, CA) using a slightly modified version of the recommended protocol: 1.8x AMPure XP magnetic beads, 3x Ethanol 70% washes, and elution in 60 μL of Ultra-Pure Water (UPW). Purified samples were used in downstream library preparation protocols for sequencing. Untargeted metagenomic sequencing The untargeted metagenomics DNA libraries were prepared from 200-300 ng of amplified nucleic acid with the Illumina DNA Prep kit (Illumina, San Diego, CA) following the standard manufacturer’s protocol but using the Illumina UD 10-bases Nextera Indexes from IDT with 5 PCR cycles and 30μL of RSB for final library elution. Final libraries were quantified, and quality was assessed with the High Sensitivity D5000 assay on a TapeStation 4200 instrument (Agilent Technologies, Santa Clara, CA). Libraries were pooled in equimolar quantities for multiplexing and sequenced in 300 cycles, with 2x151bp inserts and 10bp indexes in paired-end mode on an Illumina NextSeq2000 using its on-board denaturing function. Twist CVRP library preparation and sequencing Samples that were selected for enrichment with the Twist comprehensive viral research panel (Twist Bioscience, South San Francisco, CA) were prepared as follows. First, DNA libraries from the whole genome amplified samples were prepared with the Twist Total Nucleic Acids Library Preparation EF Kit 2.0 for Viral Pathogen Detection and Characterization starting from Step 3 “Perform DNA fragmentation, end repair and dA-tailing” of the protocol. The whole-genome amplified samples (1-2 ng of in 25μL of UPW) were fragmented, repaired and dA-tailed under the following thermal cycler conditions: 1) hold at 4°C, 2) incubation at 37°C for 20 min to generate 180-220bp inserts, 3) 65°C for 30 min and 4) hold at 4°C. Then, the Twist universal adapters were ligated to the dA-tailed DNA fragments, and libraries were purified with magnetic beads, as per the instructions. The adapter-ligated libraries were amplified with the Twist UDI Primers (10bp in length) using 10 cycles for the Polymerase Reaction (PCR), purified using the manufacturer’s instructions but eluted in 25μL of UPW. The resulting libraries were quantified with the High Sensitivity dsDNA Kit on a Qubit 2.0 fluorometer. Libraries were enriched with the Twist Comprehensive Viral Research Panel (CVRP) (36.8 Mb) following the Twist Target Enrichment Standard Hybridization v2 Protocol. Briefly, libraries were pooled in equal quantities (187.5ng each) in groups of 8 samples for a multiplexed 16h hybridization with the probes in the enrichment panel. The hybridized targets were captured with streptavidin beads, amplified using a panel size of 10-50Mb using 8 PCR cycles for multiplexed samples, followed by a purification step with DNA Purification Beads and eluted in 35μL of UPW. The viral enriched libraries were quantified with a High Sensitivity dsDNA Kit on a Qubit 2.0, and quality was measured using a High Sensitivity D5000 assay on a TapeStation 4200 instrument. The Twist library pools were multiplexed in equal quantities and sequenced with 300x cycles with 2x 151bp inserts and 10bp indexes in paired-end mode on an Illumina NextSeq2000 using its on-board denaturing function. Dataset pre-processing Illumina paired-end metagenomic sequencing reads were demultiplexed and converted from bcl to fastq file format with bcl2fastq v2.20.0.422 (Illumina, San Diego, CA) using default parameters. Reads were filtered and trimmed with Fastp v0.23.4 18 , 19 using the command line switches ‘-y -g --poly_g_min_len=5 -x -- poly_x_min_len=5’. Human genomic content was removed from the metagenomic datasets by mapping the sequencing reads to the reference human genome (GRCh38.p14; RefSeq Assembly: GCF_000001405.40) using minimap2 v2.26-r1175 20 configured for short-read alignment with ‘-x sr’. The human-depleted sam files were converted to bam with Samtools v1.13 21 , 22 using the view algorithm and the command line options ‘-bSh -f 4’. Decontaminated bam files were sorted and converted to fastq with samtools. All of the paired-end sequencing reads for untargeted and Twist-enriched metagenomic datasets were de-identified as recommended by the National Center of Biotechnology Information (NCBI) prior to deposition in the Sequence Read Archive (SRA) 23 . Briefly, Illumina paired-end reads were interleaved with SeqFu v1.22.3 24 using the built-in interleave function and careful mode enabled (-c). Interleaved datasets were de-identified using the NCBI’s Human Read Removal Tool (HRRT) v2.2.1 tool (SRA Human Scrubber; https://github.com/ncbi/sra-human-scrubber ) 25 with the ‘-x’ command line switch. The ‘scrubbed’ reads were de-interleaved with SeqFu using its deinterleave function and careful mode enabled (-c). Taxonomic classification and analysis The filtered metagenomic reads were taxonomically classified using a Lower Common Ancestor (LCA) approach by Centrifuge v1.0.4 26 through read alignment against the NCBI nucleotide (NT) database 27 curated in January 2023 28 . Centrifuge was configured with a minimum hit length of 15 and best-hit reporting using ‘--min-hitlen 15’ and ‘-k 1’. Results were refined with Recentrifuge v1.12.0 29 using a minimum hit length of 40. Recentrifuge read count data was imported into R+ v4.2.0 and Python v3.13.0 (pandas v2.2.3) for downstream microbiome (bacterial, fungal, and viral) analyses. Within R, taxonomic compositional analysis was carried out using the phyloseq package v1.48.0 30 . Species-level hits were cross-referenced with a curated list of human infectious pathogens compiled from the Swiss Institute of Bioinformatics ViralZone database (De Castro et al., 2024), the NCBI Pathogen Detection database (Information, 2016) and two previously published compilations of bacterial (Bartlett et al., 2022) and fungal (Bartoszewicz et al., 2022) pathogens. Excluded from consideration were non-respiratory viruses including polyomaviruses, papillomaviruses, herpesviruses, erythroviruses, Molluscum contagiosum virus, GB virus C, rotaviruses, Torque teno virus, and picobirnaviruses ( Supplementary Figure S2 ). We also removed pervasive low read-count hits to human immunodeficiency virus (HIV) caused by human-contaminated viral genomes in the database. Viral detection in previously negative and positive samples used a threshold of ≥10 reads per virus based on Recentrifuge. Two no-template control libraries were also sequenced and found to contain no reads hitting to any human pathogenic virus. Heatmaps were generated showing species-level read counts (with the exception of SARS-CoV-2, which was “no rank” within the taxonomy database used), using plotnine v0.13.6. Viral genome analysis For samples found by Recentrifuge to contain ≥ 10 reads hitting to Bocavirus, Rhinovirus A, Rhinovirus C, Influenza C virus, or SARS-CoV-2, additional analysis was conducted. First, all complete genomes classified as Bocaparvovirus (taxID 1507401), Rhinovirus A (taxID 147711), and Rhinovirus C (taxID 463676) were downloaded from Genbank using the NCBI Virus portal ( https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/ ; accessed April 7, 2025). For each of these groups, Genbank genomes were clustered at 95% identity and 75% coverage using mmseqs2 31 (version 18d8ddc2f468b8a00cec9addb3d2328eb790ac9e). Reads from each sample containing each virus were mapped to cluster representatives, using minimap2 (v2.28-r1209) 20 without secondary hits. Mapping results were filtered to remove supplementary hits (samtools view -F 0x800), sorted, indexed, and used as input for coverage calculations by samtools (v1.21). The genome with highest coverage breadth for each virus in each sample was used for a final mapping step, which provided coverage information for untargeted and targeted sequencing ( Table 1 ). Masked consensus genomes were extracted using samtools (v1.21) with default settings (requiring a frequency of 0.75 to make a base call) from the final mapping results using targeted sequencing data. Best BLAST hits were identified using BLASTn against the NCBI core-nt database (accessed April 14, 2025). For influenza C virus and SARS-CoV-2, the RefSeq entries (strains C/Ann Arbor/1/50 and Wuhan Hu-1, respectively) were used as the reference genomes in a single mapping step followed by coverage calculations and extraction of the consensus. Mapping achieved near 100% coverage breadth for most samples, suggesting these reference genomes were acceptable starting points. For SARS-CoV-2, variant lineages were identified from consensus sequences using the Nextclade web interface ( https://clades.nextstrain.org/ accessed April 28, 2025). View this table: View inline View popup Table 1. Identification, coverage, and relative abundance of viral consensus genomes recovered. 3. Results We randomly selected 305 BioFire-negative samples for sequencing to detect less common respiratory pathogens not included as part of the BioFire Respiratory Pathogens panel. Samples were collected from individuals with respiratory symptoms as part of CalSRVSS across five counties in California between July 2020 and December 2022 16 . In parallel, a set of known viral positive samples and SARS-CoV-2 reinfection samples was selected based on positive results for one or more respiratory pathogens (“previous positives”, n=26). Untargeted Illumina sequencing produced an average of 10.8 million reads per sample, and probe-capture enrichment (Twist CVRP) on a subset of 78 samples (56 previous negative, 22 previous positives) produced an average of 4.9 million reads per sample ( Supplementary Table S1 ). In nearly all samples, a majority of untargeted sequencing reads were derived from the human host, with a few exceptions in which bacteria dominated ( Supplementary Figure S1 ). After host read removal, untargeted sequence data were dominated by unclassified sequences and bacterial sequences. Meanwhile, targeted sequencing via probe-capture enrichment using the Twist CVRP yielded a relatively high fraction of viral reads for a subset of previous negative and most previous positive samples ( Figure 1A ). Download figure Open in new tab Figure 1. Relative abundance of viral, archaeal, bacterial, fungal, non-human and non-fungal eukaryotic reads across the data cohort. Previous ( A ) positive and ( B ) negative samples were sequenced with untargeted metagenomics (top panel), while a subset also underwent viral enrichment (bottom panel). Data are presented after human host removal (see Supplementary Figure S1 for inclusion of host data) and taxonomy classification with Centrifuge and Recentrifuge. 3.1 Viral pathogen detection We first employed Centrifuge and Recentrifuge as a highly sensitive method for detecting viral reads, cross-referencing hits against a list of human pathogenic viruses. In previous positive samples, expected viruses were largely confirmed by both untargeted and probe-capture sequencing, with some exceptions ( Figure 2A ). In particular, of 7 SARS-CoV-2 previous positive samples, just two were successfully sequenced with untargeted sequencing. The remaining 5 samples underwent targeted sequencing to look for additional infections. One of these had a known co-infection with human coronavirus 299E, which was confirmed by sequencing while SARS-CoV-2 was not detected. Rhinovirus was detected in one other SARS-CoV-2 non-detect sample with probe-capture enrichment. Expected viruses were not detected in three other previous positives even with targeted sequencing ( Figure 2A ) . Download figure Open in new tab Figure 2. Human respiratory virus detection. Detection in (A) previous positive samples and (B) previous negative samples shows substantial improvement with probe-capture enrichment (right). Fill color indicates read counts based on Recentrifuge, shown where ≥ 10 reads per virus, per sample; note log10 scale. Samples that did not undergo probe-capture enrichment are indicated with “x”, while unexpected viral detections are indicated with “u”. White tiles for previous positive samples indicate viruses expected based on previous results that were not detected by sequencing. Note that some samples contained multiple viruses. Human respiratory viruses were detected in16 of 305 (5%) of previous negative samples via untargeted and targeted sequencing. Probe-capture enrichment improved viral sequence recovery relative to untargeted sequencing ( Figure 2B ). The most commonly detected viruses in previous negative samples were Influenza C virus (6 samples), Human Bocavirus (4 samples), Rhinovirus A and C (3 samples), and SARS-CoV-2 (3 samples). One additional sample contained Human parvovirus B19 ( Figure 2B ). To assess genomic novelty and potential strain-level relationships between the most common viruses in previous negative samples, we mapped untargeted and targeted sequencing reads to corresponding reference sequences (see Methods). Consensus genomes were extracted for 14 viruses with > 90% coverage breadth. We recovered 3 complete genomes and one partial genome for Human Bocavirus 1 from samples collected from the nearby counties of Contra Costa and Marin between 2021-2022 ( Table 1 ). All genomes were closely related, differing from one another at a total of 25 nucleotide positions (99.6% identity). Sequences were also closely related to the nearest GenBank reference (99.5% identity). We detected Rhinovirus A (RhV-A) from two previous negative samples (N261 and N325; Figure 2B ), one known positive sample (P1012), and unexpectedly from two samples previously positive for other viruses (P1016 and P333; Figure 2A ). Rhinovirus C (RhV-C) was identified from two previous negative samples (N212 and N325; Figure 2B ), two previous positive samples (P1008 and P1009), and one SARS-CoV-2 positive sample using Centrifuge. From these samples, we recovered 7 RhV genomes with > 90% coverage breadth and one partial RhV genome. Notably, each sample contained distinct RhV strain(s), and the percent identity to the nearest reference genome ranged from 95.5% to 99.5%. One sample (N325) contained a co-infection of Rhinovirus A58 and C59, of which both sequences were recovered ( Figure 2B , Table 1 ). This sample also contained Enterovirus A, according to Centrifuge-based analysis, although this was not analyzed further as no other samples contained Enterovirus A. We also detected a co-infection in a previous negative sample (N212), which had both RhV-C and Influenza C virus ( Figure 2B ). From the six samples in which Influenza C virus was identified, we recovered 4 near-complete (>99% coverage breadth) and 2 partial viral genomes. BLAST results against NCBI core-nt for the consensus sequences of the hemagglutinin esterase fusion gene (HEF), suggested that the four near-complete genomes are likely most closely related to Influenza C virus strain C/Minnesota/28/2015. To assess the relationship between Influenza C virus genomes, we aligned the consensus sequences of the hemagglutinin esterase fusion (HEF) and M segments. This alignment showed two sequence groups, each with >99% within-group identity ( Table 1 ). All samples positive for Influenza C were collected between February and July 2022 and represented 3 different counties. However, the near-identical sequences in each group were not from the same county ( Table 1 ). The presence of SARS-CoV-2 in three previous negative samples was unexpected. We recovered complete genomes from two of these samples and were able to classify these as belonging to the Epsilon (sample N055) and Omicron BA.2 (sample N198) lineages, respectively ( Table 1 ) . We searched the two complete and one partial SARS-CoV-2 genomes for the primer and probe sequences of the CDC RT-PCR diagnostic assay for SARS-CoV-2 32 . For one genome (N055), primer and probe sequences for targets N1, N2, and N3 were perfect matches, and for the other two genomes (samples N198 and N323), the primer and probe sequences for N3 were exact matches while the N1 target had a single mismatch near the 5’ end of the probe. Genomes were also recovered from two previous positive samples, allowing classification of these as Omicron BA.1 and BA1.1 ( Table 1 ) . Use of probe-capture enrichment improved genome coverage by as much as 71%, producing the biggest gains for influenza C virus ( Table 1 ). For three Rhinovirus genomes, coverage breadth decreased after probe-capture enrichment although the fraction of total reads mapped to this genome (relative abundance) increased. 3.2 Bacterial and fungal pathogens Following viral assessment, we screened the 305 previous negative samples to identify those in which a single bacterial or fungal species was detected at ≥50% relative abundance after host-read removal ( Figure 3 ). Twenty samples had high prevalence of a single bacterial species, such as Dolosigranulum pigrum, Moraxella spp., Staphylococcus epidermidis and Acinetobacter junii . Surprisingly, 10 samples with high bacterial content were dominated by Pseudomonas azotoformans . Additionally, four bacteria-dominated samples were also positive for viral human pathogens, suggesting possible viral and bacterial co-infections. Lastly, a single sample (N014) was found to be dominated by fungi, with a fungal relative abundance of 72%, most of which were classified as Penicillium and Aspergillus spp. ( Figure 3 ). Download figure Open in new tab Figure 3. Previous negative samples dominated by a single bacterial or fungal species. Only samples with bacterial or fungal relative abundance ≥ 50% considered dominated by these signals. Red asterisks indicate samples in which respiratory viruses were also detected. 4. Discussion 4.1 Pathogen detection and limitations Our metagenomic analysis identified Influenza C virus and Human Bocavirus 1 from previous negative samples. Recent epidemiological studies have shown that these viruses are relatively common, especially in children 33 , yet are not included in routine test respiratory panels. Our results suggest that both Influenza C virus and Human Bocavirus 1 circulated broadly and locally within California, based on closely related genomes of each virus and counties where the respective samples were collected. The Human Bocavirus 1 detections should be interpreted with caution as this virus is often shed well after acute infection 34 and has been detected in healthy controls 35 , and as a common coinfection. Notably, we did not identify any other viruses in these samples, and recent work has shown that Bocavirus may cause respiratory symptoms as a single infection. In addition to identifying less common viral pathogens by metagenomics, we also detected bacterial and fungal colonizers. Twenty samples were dominated by a single bacterial species, which included both opportunistic pathogens and environmental microorganisms ( Figure 3 ). Specifically, four samples contained high relative abundances of Moraxella catarrhalis or Moraxella nonliquefaciens 36 , 37 . These are opportunistic pathogens but are also common members of the respiratory tract microbiome, making it difficult to interpret the significance of this finding without additional data on absolute abundance and clinical symptoms. Species such as Acinetobacter junii and Pseudomonas azotoformans were also predominant in some samples. As these species are not commonly linked to human respiratory infections or to the healthy respiratory microbiome, we speculate that their high abundance in some samples may reflect transient colonization. Likewise, the high relative abundance of fungi in one sample was surprising, but without additional data it remains unclear whether this represented an infection. Lastly, we observed instances of potential viral and bacterial co-infection. Previous studies have described the association of secondary bacterial infections during respiratory viral illness with increased disease severity and mortality 38 , 39 . This is potentially important, as microbial community composition is thought to play a role in disease progression and outcome of respiratory viral infections 40 . Overall, by metagenomic sequencing of 305 previous negative samples, we were able to identify 16 (5%) viral and 35 (12%) bacterial/fungal pathogens. While it is unclear whether these detections were from outright infections or part of the normal microbiome, for bacterial detections in particular, the overwhelming majority (254/305 or 83%) of the previous negative samples from individuals who were symptomatic for respiratory illness yielded no detections of known pathogens. Possible limitations for detection in this study include: 1) non-infectious etiology of respiratory symptoms (e.g., asthma or allergy), 2) poor/low quality of sample/nucleic acid due to multiple freeze-thaw events, 3) low concentration of pathogen nucleic acid, 4) improper timing of sample collection, 5) improper sample collection technique, and/or 6) high concentration of host nucleic acid material that may have interfered with pathogen sequencing reactions. 4.2 Methods evaluation We observed several discrepancies between sequencing results and the prior surveillance test results from BioFire and SARS-CoV-2 diagnostic assays. Of 31 expected viruses from previous positive samples, 8 were not detected by either untargeted or targeted sequencing. In these instances, the sensitivity of sequencing may have been limited due to long sample storage times (up to 43 months), freeze-thaw cycles between previous testing and sequencing, or low initial concentration of viral nucleic acids in the samples. It is also possible that the previous results were false-positive. Additionally, we identified unexpected positive samples containing SARS-CoV-2. Our sequence-based analysis indicated that these viral sequences could have been detected by the CDC RT-qPCR assay, and we are uncertain as to why SARS-CoV-2 was not detected by PCR. Separately, from previously reported negative specimens, we unexpectedly detected RhV genomes (RhV-A and RhV-C) and SARS-CoV-2 genomes via sequencing ( Figure 2 ). The RhVs we detected from previous negative samples were likely too divergent to be captured by the BioFire panel. In particular, one sample contained a sequence closely related to the novel Rhinovirus C59 genotype recently reported from western Washington state, USA ( Table 1 ) (30). As expected, probe-capture enrichment generally increased the coverage breadth and depth of target genomes ( Table 1 ). However, enrichment did not always recover complete genomes even when relative abundance of target reads was >50% (e.g. samples N145 and N323), suggesting selective degradation of the target genome in the sample or incomplete capture by probes due to sequence divergence. Notably, coverage breadth for RhV genomes was lower in targeted than untargeted sequencing, suggesting the probe panel, like the BioFire panel, had limited representation of this diverse viral group. 5. Conclusion Our results support that for public health surveillance, metagenomic sequencing can provide a complementary approach to common clinical diagnostic tools. Knowledge about the prevalence of rarer and more divergent viruses in this dataset and from future sequencing-based surveillance efforts could aid clinical and public health practitioners in expanding test panels for routine diagnostics, surveillance, and outbreak investigations. Untargeted metagenomic sequencing can provide a truly pathogen agnostic method of surveillance, as compared to targeted NAAT based assays. However, more work is needed to determine the appropriate depth of sequencing necessary to identify the rarer and more divergent viruses. Ultra deep sequencing may be cost-prohibitive for analysis of large quantities of samples. The use of a broad probe-capture panel prior to metagenomic sequencing represents an intermediate and potentially more cost-effective option to recover near-complete viral genomes for downstream analysis, as we have demonstrated in this study. In summary, we found that a multi-pronged approach combining NAAT-based test panels with unbiased and targeted sequencing was successful to identify both common and uncommon respiratory pathogens. Pathogen-agnostic sequencing is an active research area that has made significant progress over the past few years. This capability will advance pathogen surveillance and diagnostics, shorten time for detection and response, and ultimately protect public health. Results from our study further support the value of this approach. Author Contributions RSK, ACM, CRK and CJ contributed to the writing of the of the draft manuscript. RSK, CRK, ACM and JBT analyzed and interpreted the data. CJ, DAW, CM and SM acquired the samples. JBT, ACM, MB, CJ, DAW, CM and SM designed the study and experiments. ACM, JBT, DAW, SM and CM performed experiments. CJ and DAW contributed to project management. All authors contributed to reviewing and editing the final manuscript. Data availability The de-identified untargeted metagenomic and TWIST-enriched Illumina sequencing datasets are publicly available in the NCBI Sequencing Read Archive under BioProject PRJNA1296491. Acknowledgements This work was supported by the Lawrence Livermore National Laboratory’s Laboratory Derived Research & Development program. The residual samples used in this study were obtained from the original community surveillance project CalSRVSS which was funded in part and by the Centers for Disease Control and Prevention Epidemiology and Laboratory Capacity for Community Surveillance (grant 6 Nu50CK000539). Footnotes DISCLAIMER The findings and conclusions in this article are those of the authors and do not necessarily represent the views or opinions of the California Department of Public Health, the California Health and Human Services Agency or Lawrence Livermore National Laboratory. This work was performed under the auspices of the U.S. Department of Energy by Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344. References ↵ Sirota , S. B. et al. Global, regional, and national burden of upper respiratory infections and otitis media, 1990–2021: a systematic analysis from the Global Burden of Disease Study 2021 . The Lancet Infectious Diseases 25 , 36 – 51 ( 2025 ). OpenUrl PubMed ↵ Eales , O. et al. Key Challenges for Respiratory Virus Surveillance while Transitioning out of Acute Phase of COVID-19 Pandemic . Emerging Infectious Disease journal 30 , 1 ( 2024 ). OpenUrl ↵ Clark , E. C. et al. Changes to Public Health Surveillance Methods Due to the COVID-19 Pandemic: Scoping Review . JMIR Public Health Surveill 10 , e49185 ( 2024 ). OpenUrl ↵ WHO . WHO policy brief: COVID-19 surveillance. (World Health Organization , 2024 ). ↵ Tisza , M. et al. Wastewater sequencing reveals community and variant dynamics of the collective human virome . Nature Communications 14 , 6878 ( 2023 ). OpenUrl PubMed Wyler , E. et al. Pathogen dynamics and discovery of novel viruses and enzymes by deep nucleic acid sequencing of wastewater . Environment International 190 , 108875 ( 2024 ). OpenUrl PubMed ↵ Smith Matthew , F. et al. Seasonality of respiratory, enteric, and urinary viruses revealed by wastewater genomic surveillance . mSphere 9 , e00105 – 00124 ( 2024 ). OpenUrl PubMed ↵ Rajagopala , S. V. et al. Metatranscriptomics to characterize respiratory virome, microbiome, and host response directly from clinical samples . Cell Reports Methods 1 , 100091 ( 2021 ). OpenUrl PubMed Cheemarla , N. R. et al. Nasal host response-based screening for undiagnosed respiratory viruses: a pathogen surveillance and detection study . Lancet Microbe 4 , e38 – e46 ( 2023 ). OpenUrl ↵ Madi , N. , Al-Nakib , W. , Mustafa , A. S. & Habibi , N. Metagenomic analysis of viral diversity in respiratory samples from patients with respiratory tract infections in Kuwait . J Med Virol 90 , 412 – 420 ( 2018 ). OpenUrl PubMed ↵ Gauthier , N. P. G. et al. Validation of an Automated, End-to-End Metagenomic Sequencing Assay for Agnostic Detection of Respiratory Viruses . The Journal of Infectious Diseases 230 , e1245 – e1253 ( 2024 ). OpenUrl PubMed ↵ Tan , J. K. et al. Laboratory validation of a clinical metagenomic next-generation sequencing assay for respiratory virus detection and discovery . Nature Communications 15 , 9016 ( 2024 ). OpenUrl PubMed ↵ Ceballos-Garzon , A. , Comtet-Marre , S. & Peyret , P. Applying targeted gene hybridization capture to viruses with a focus to SARS-CoV-2 . Virus Research 340 , 199293 ( 2024 ). OpenUrl PubMed ↵ Mourik , K. et al. Comparison of the performance of two targeted metagenomic virus capture probe-based methods using reference control materials and clinical samples . Journal of Clinical Microbiology 62 , e00345 – 00324 ( 2024 ). OpenUrl PubMed ↵ Wani , A. K. et al. Metagenomics in the fight against zoonotic viral infections: A focus on SARS-CoV-2 analogues . Journal of Virological Methods 323 , 114837 ( 2024 ). OpenUrl PubMed ↵ Cooksey , G. L. S. et al. Severe Acute Respiratory Syndrome Coronavirus 2 and Respiratory Virus Sentinel Surveillance, California, USA, May 10, 2020-June 12, 2021 . Emerg Infect Dis 28 , 9 – 19 ( 2022 ). OpenUrl CrossRef PubMed ↵ Creager , H. M. et al. Clinical evaluation of the BioFire® Respiratory Panel 2.1 and detection of SARS-CoV-2 . J Clin Virol 129 , 104538 ( 2020 ). OpenUrl CrossRef PubMed ↵ Chen , S. F. Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp . Imeta 2 ( 2023 ). doi: 10.1002/imt2.107 OpenUrl CrossRef ↵ Chen , S. , Zhou , Y. , Chen , Y. & Gu , J. fastp: an ultra-fast all-in-one FASTQ preprocessor . Bioinformatics 34 , i884 – i890 ( 2018 ). OpenUrl CrossRef PubMed ↵ Li , H. Minimap2: pairwise alignment for nucleotide sequences . Bioinformatics 34 , 3094 – 3100 ( 2018 ). OpenUrl CrossRef PubMed ↵ Danecek , P. et al. Twelve years of SAMtools and BCFtools . Gigascience 10 ( 2021 ). ↵ Li , H. et al. The Sequence Alignment/Map format and SAMtools . Bioinformatics 25 , 2078 – 2079 ( 2009 ). OpenUrl CrossRef PubMed Web of Science ↵ Katz , K. et al. The Sequence Read Archive: a decade more of explosive growth . Nucleic Acids Research 50 , D387 – D390 ( 2022 ). OpenUrl CrossRef PubMed ↵ Stop neglecting fungi . Nat Microbiol 2 , 17120 ( 2017 ). OpenUrl PubMed ↵ NCBI . Human Read Removal Tool, ( 2023 ). ↵ Kim , D. , Song , L. , Breitwieser , F. P. & Salzberg , S. L. Centrifuge: rapid and sensitive classification of metagenomic sequences . Genome Res 26 , 1721 – 1729 ( 2016 ). OpenUrl Abstract / FREE Full Text ↵ Sayers , E. W. et al. Database resources of the national center for biotechnology information . Nucleic Acids Res 50 , D20 – D26 ( 2022 ). OpenUrl CrossRef PubMed ↵ Martí Jose , M. et al. Addressing the dynamic nature of reference data: a new nucleotide database for robust metagenomic classification . mSystems 10 , e01239 – 01224 ( 2025 ). OpenUrl PubMed ↵ Marti , J. M. Recentrifuge: Robust comparative analysis and contamination removal for metagenomics . PLoS Comput Biol 15 , e1006967 ( 2019 ). OpenUrl PubMed ↵ McMurdie , P. J. & Holmes , S. phyloseq: an R package for reproducible interactive analysis and graphics of microbiome census data . PLoS One 8 , e61217 ( 2013 ). OpenUrl CrossRef PubMed ↵ Steinegger , M. & Söding , J. MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets . Nature Biotechnology 35 , 1026 – 1028 ( 2017 ). OpenUrl CrossRef PubMed ↵ Lu , X. et al. US CDC Real-Time Reverse Transcription PCR Panel for Detection of Severe Acute Respiratory Syndrome Coronavirus 2 . Emerg Infect Dis 26 , 1654 – 1665 ( 2020 ). OpenUrl CrossRef PubMed ↵ Sederdahl , B. K. & Williams , J. V. Epidemiology and Clinical Characteristics of Influenza C Virus . Viruses 12 ( 2020 ). ↵ Rayamajhi Thapa , R. , Nascimento-Carvalho , C. , Allander , T. , Jartti , T. & Söderlund-Venermo , M. A Diagnostic Approach to Separate Acute Human Bocavirus 1 Respiratory Tract Infection From Long-lasting Virus Shedding . The Journal of Infectious Diseases, jiaf130 ( 2025 ). ↵ Uršič , T. et al. Acute Human Bocavirus 1 Infection in Children Hospitalized for Acute Bronchiolitis: A 2-Year Prospective Study . The Pediatric Infectious Disease Journal 44 ( 2025 ). ↵ Goldstein , E. J. C. , Murphy , T. F. & Parameswaran , G. I. Moraxella catarrhalis, a Human Respiratory Tract Pathogen . Clinical Infectious Diseases 49 , 124 – 131 ( 2009 ). OpenUrl CrossRef PubMed Web of Science ↵ Yi , H. , Yong , D. , Lee , K. , Cho , Y.-J. & Chun , J. Profiling bacterial community in upper respiratory tracts . BMC Infectious Diseases 14 , 583 ( 2014 ). OpenUrl CrossRef PubMed ↵ Liu , Y. et al. Outcomes of respiratory viral-bacterial co-infection in adult hospitalized patients . eClinicalMedicine 37 ( 2021 ). ↵ Meskill , S. D. & O’Bryant , S. C. Respiratory Virus Co-infection in Acute Respiratory Infections in Children . Current Infectious Disease Reports 22 , 3 ( 2020 ). OpenUrl CrossRef PubMed ↵ Hanada , S. , Pirzadeh , M. , Carver , K. Y. & Deng , J. C. Respiratory Viral Infection-Induced Microbiome Alterations and Secondary Bacterial Pneumonia . Frontiers in Immunology Volume 9 - 2018 ( 2018 ). View the discussion thread. Back to top Previous Next Posted October 03, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Metagenomic sequencing identifies potential respiratory pathogens in PCR-negative subset of surveillance samples Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Metagenomic sequencing identifies potential respiratory pathogens in PCR-negative subset of surveillance samples Anne Caroline Mascarenhas , Rose S. Kantor , James Thissen , Car Reen Kok , Monica Borucki , Christina Morales , Sharon Messenger , Crystal Jaing , Debra A. Wadford medRxiv 2025.09.26.25336420; doi: https://doi.org/10.1101/2025.09.26.25336420 Share This Article: Copy Citation Tools Metagenomic sequencing identifies potential respiratory pathogens in PCR-negative subset of surveillance samples Anne Caroline Mascarenhas , Rose S. Kantor , James Thissen , Car Reen Kok , Monica Borucki , Christina Morales , Sharon Messenger , Crystal Jaing , Debra A. Wadford medRxiv 2025.09.26.25336420; doi: https://doi.org/10.1101/2025.09.26.25336420 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Infectious Diseases (except HIV/AIDS) Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4420) Dentistry and Oral Medicine (443) Dermatology (381) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15212) Forensic Medicine (30) Gastroenterology (1120) Genetic and Genomic Medicine (6581) Geriatric Medicine (667) Health Economics (996) Health Informatics (4519) Health Policy (1366) Health Systems and Quality Improvement (1611) Hematology (538) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15906) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (667) Neurology (6580) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1141) Occupational and Environmental Health (956) Oncology (3323) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5429) Public and Global Health (9212) Radiology and Imaging (2192) Rehabilitation Medicine and Physical Therapy (1368) Respiratory Medicine (1194) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9fefd3ad1fa28650',t:'MTc3OTMyNzE0MQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.