CRESSENT: a Bioinformatic Toolkit to Explore and Improve ssDNA Virus Annotation

doi:10.1101/2025.07.14.664782

CRESSENT: a Bioinformatic Toolkit to Explore and Improve ssDNA Virus Annotation

2025 · doi:10.1101/2025.07.14.664782

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 51,667 characters · extracted from preprint-html · click to expand

CRESSENT: a Bioinformatic Toolkit to Explore and Improve ssDNA Virus Annotation | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results CRESSENT: a Bioinformatic Toolkit to Explore and Improve ssDNA Virus Annotation View ORCID Profile R.R. Pavan , View ORCID Profile M.B. Sullivan , View ORCID Profile M.J. Tisza doi: https://doi.org/10.1101/2025.07.14.664782 R.R. Pavan 1 Department of Microbiology, The Ohio State University , Columbus, Ohio, 43210, USA 2 Centre of Microbiome Science, The Ohio State University , Columbus, Ohio, 43210, USA 3 The Infectious Disease Institute, The Ohio State University , Columbus, Ohio, 43210, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for R.R. Pavan For correspondence: pavan.4{at}osu.edu M.B. Sullivan 1 Department of Microbiology, The Ohio State University , Columbus, Ohio, 43210, USA 2 Centre of Microbiome Science, The Ohio State University , Columbus, Ohio, 43210, USA 3 The Infectious Disease Institute, The Ohio State University , Columbus, Ohio, 43210, USA 4 Department of Civil, Environmental and Geodetic Engineering, The Ohio State University , Columbus, Ohio, 43210, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for M.B. Sullivan M.J. Tisza 5 Molecular Virology & Microbiology - Petrosino Baylor College of Medicine Houston , Texas 77030 USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for M.J. Tisza Abstract Full Text Info/History Metrics Data/Code Preview PDF ABSTRACT Single-stranded DNA (ssDNA) viruses are important components of diverse ecosystems, however, it remains challenging to systematically identify and classify them. This is in part due to their broad host range and resulting genomic diversity, structure, and rapid evolution rates. In addition, distinguishing genuine ssDNA genomes from contaminating sequences in metagenomic datasets (e.g., from commercial kits) has been an unresolved issue for years. Here, we present CRESSENT ( CRESS -DNA E xtended a N notation T oolkit), a comprehensive and modular bioinformatic pipeline focused on ssDNA virus genome-to-analysis and annotation. The pipeline integrates multiple functionalities organized into six modules: sequence dereplication, decontamination, phylogenetic analysis, motif discovery, stem-loop structure prediction, and recombination detection. Each module can be used independently or in combination with others, allowing researchers to customize their analysis workflow. With this tool, researchers can comprehensively and systematically include ssDNA viruses in their viromics workflows and facilitate comparative genomic studies, which are often limited to dsDNA viruses, therefore leaving behind a crucial component of the microbiome community under study. 1. BACKGROUND Single-stranded DNA (ssDNA) viruses are among the most abundant and diverse biological entities on Earth, yet they remain one of the least understood groups of viruses. These small-genome viruses infect hosts across all domains of life, archaea, bacteria, and eukaryotes, and have been found in nearly every environment examined, from human gut, terrestrial soils and oceans to extreme ecosystems like hydrothermal vents and Antarctic lakes ( Bezuidt and Makhalanyane, 2024 ; Kazlauskas et al., 2019 ; Kim et al., 2011 ; Rosario et al., 2018 ). Their ecological roles are broad: they influence host population dynamics, drive genome evolution through horizontal gene transfer, and modulate microbial community structure ( Kazlauskas et al., 2019 ; Qazi, 2016 ; Zhao et al., 2019 ). In clinical and agricultural contexts, ssDNA viruses are highly consequential. Pathogens like Tomato yellow leaf curl virus and Banana bunchy top virus cause billions in crop losses annually ( Golyaev et al., 2025 ; Qazi, 2016 ), while in humans, ssDNA phages such as Microviridae members shape gut microbiota composition with implications for inflammation and disease ( Creasy et al., 2018 ). The advent of metagenomic sequencing has led to a dramatic increase in the discovery of novel ssDNA viruses in diverse environments ( Krupovic et al., 2020 ; Tisza et al., 2021 , 2020 ). Despite their importance, analysis of ssDNA virus sequences remains difficult without in-depth ssDNA genomics expertise, which can vary significantly between host groups ( Kazlauskas et al., 2019 ). Analytical challenges include their small genome size (which can often lead to sequence removal in viromics workflows), rapid evolution rates, frequent recombination events, and the presence of contaminating sequences in metagenomic datasets ( Roux et al., 2019 ; Trubl et al., 2019 ). Furthermore, the absence of universally conserved genes complicates taxonomic classification and the comparative genomics of ssDNA viruses ( Kazlauskas et al., 2018 ; Krupovic et al., 2015 ). Due to these constraints, most studies examine a set of common genes within certain ssDNA groups, including Rep and Cap genes with conserved functions and structures ( Delwart and Li, 2012 ; Desingu and Nagarajan, 2022 ; Krupovic et al., 2020 ). Yet, these genes can be extremely divergent at the sequence level, and recombination frequently occurs between the Rep and Cap genes and, in some cases, within the Rep gene itself. Therefore, identification of these events involves phylogenetic analysis of the Cap gene, the Rep gene, and/or the nuclease and helicase domains of the Rep gene ( Kazlauskas et al., 2019 , Kazlauskas et al., 2017 ; Rosario et al., 2015 ). Followed by tanglegrams to visualize recombination ( de la Higuera et al., 2020 ; Kazlauskas et al., 2017 ). Short motifs (e.g., Walker A and B) within the nuclease and helicase domains vary by clade, and these are often visualized as sequence logos ( Kazlauskas et al., 2017 ). Non-coding stem-loops and iterons are additional conserved structures essential for the replication of these viruses, which can also be used to further delineate ssDNA lineages ( Dai et al., 2024 ; de la Higuera et al., 2020 ; Torralba et al., 2024 ). To address these challenges, we have developed CRESSENT, a modular and comprehensive bioinformatic pipeline specifically designed to analyze and annotate ssDNA virus sequences (See Fig. 1 for an overview). This tool consists of six modules, and each module can be used independently or in sequence, allowing researchers to customize their analysis workflow according to their specific research questions. Since the tool is modular, it allows for new modules to be added in the future. In this paper, we describe the functionality of each module and its integration into a cohesive analysis pipeline. With CRESSENT, researchers can enhance the efficiency and accuracy of ssDNA virus analysis and employ a standardized approach for comparative genomic studies of this important viral group. Download figure Open in new tab Fig 1: Flow chart depicting the CRESSENT modules (blue boxes), analysis (green diamonds), and processes (purple boxes). Gray and green dashed arrows indicate pipelines. 2. IMPLEMENTATION The primary function of CRESSENT is to serve as an auxiliary tool to refine the annotation and detection of putative ssDNA virus sequences identified by tools such as GeNomad (Camargo et al., 2023), VirSorter2 (Guo et al., 2021), and Cenote-Taker 3 ( Tisza et al., 2021 ). Furthermore, the integration of CRESSENT into iVirus ( Bolduc et al., 2021 ) will facilitate the combination of different bioinformatics tools to leverage virus identification and annotation processing. The tool is written mostly in Python and R and can be used through the command line interface. The tool can be found in the GitHub repository: https://github.com/ricrocha82/cressent The modular design of the CRESSENT allows for the flexible integration of the various components depending on the specific research questions and dataset characteristics ( Fig. 1 ). Using the output of the virus discovery tools as input (i.e., fasta and gff files), a typical workflow might begin with dereplication to reduce dataset complexity, followed by decontamination to remove potential laboratory contaminants. The cleaned and non-redundant sequences can then be subjected to recombination detection, followed by phylogenetic analysis to understand evolutionary relationships, possibly after adjusting sequences, to begin with conserved nonanucleotide sequences using the adjust_seq.py utility. Motif discovery and annotation can provide insights into functional elements, while stem-loop and iteron annotation can identify replication-related structures. Below, we go into more detail for each of the six modules. 2.1 Dereplication The dereplication module ( Fig. 1 ) is designed to reduce sequence redundancy in datasets by clustering highly similar sequences. Sequence dereplication is widely used in viral metagenomics to manage dataset complexity and avoid bias from overrepresented sequences ( Roux et al., 2019 ). The tool follows the current community standard for dereplication of viral contigs, as outlined by the MIUViG guidelines: 95% Average Nucleotide Identity (ANI) over at least 85% of the contig length (AF) ( Roux et al., 2019 ). It collapses the sequences into species-level ssDNA viral operational taxonomic units (vOTUs). This module utilizes a combination of tools, including anicalc.py and aniclust.py from CheckV ( Nayfach et al., 2021 ) and BLAST ( Altschul et al., 1990 ), to calculate pairwise ANI and cluster sequences based on user-defined similarity thresholds. It outputs a vOTU catalog with a designated cluster representative, the full set of clustered sequences, pairwise ANI values, and supporting BLAST alignments. By removing redundancy while preserving true biological diversity, the module markedly reduces computational load for all downstream analyses. 2.2 Decontamination The decontamination module ( Fig. 1 ) addresses a critical issue in metagenomic studies: the presence of laboratory contaminants or “kitome” sequences ( Olomu et al., 2020 ). Laboratory contamination is a concern in microbiome metagenomic studies ( Duan et al., 2024 ; Keeler et al., 2021 ; Olomu et al., 2020 ) and can lead to the misidentification of microbial taxa, inflate diversity estimates, or result in erroneous biological interpretations if not properly controlled for through rigorous contamination-aware protocols. The systematic identification and removal of contaminant sequences is therefore essential for accurate analysis and interpretation of results. It has been demonstrated that commercial reagents and extraction kits can contain viral DNA that may be misinterpreted as novel findings ( Asplund et al. 2019 ). To mitigate this, a comprehensive database with 510 potential viral contaminants in laboratory reagents has been compiled ( Porter et al., 2021 ). Contaminants included four small circular virus-like genomes ( Duan et al., 2024 ). CRESS-like viruses were also found in laboratory reagents and may be constituents of the “kitome”. For instance, the presence of CRESS-like viruses such as Parvovirus-like Hybrid Virus ( Naccache et al., 2013 ) and Rengasvirus ( Keeler et al., 2021 ) in negative control samples can be mistaken for novel or biologically relevant viruses, which highlights the need to cross-reference against known reagent-associated sequences to avoid erroneous conclusions in virome and pathogen discovery research. To limit contamination, this module uses BLAST to compare input sequences against a curated database of known contaminants derived from five published studies ( Naccache et al. (2013) , Asplund et al. (2019) , Porter et al. (2021) , Keeler et al. (2021) , and Duan et al. (2024) ). The module produces decontaminated sequences, decontamination statistics, and BLAST results for tracking potential contaminants. The user has full flexibility to define how stringent the decontamination process should be. To balance the removal of contaminants while retaining genuine sequences, users can fine-tune the BLAST parameters, specifically the e-value (default: 1e-10), percent identity (default: 90), and alignment coverage (default: 50). 2.3 Phylogenetic Analysis The phylogenetic analysis module ( Fig. 1 ) consists of three main components: sequence alignment, phylogenetic tree construction, and visualization/annotation of trees. The alignment component uses MAFFT ( Katoh and Standley, 2013 ) for multiple sequence alignment and trimAl (Capella-Gutiérrez et al., 2009) for alignment trimming, while the tree construction component utilizes IQ-TREE2 for maximum likelihood phylogenetic inference. MAFFT and trimAl have been used for viral sequence alignment due to their accuracy and efficiency, which is particularly important for divergent viral sequences ( Kazlauskas et al., 2018 ; de la Higuera et al., 2020 ). Our implementation offers user-friendly customization options, including the ability to select sequences from specific viral families (e.g., Adamaviridae, Bidnaviridae, Geminiviridae) for reference and adjust alignment parameters. The module enables users to incorporate reference sequences from a built-in database of 30 ICTV-recognized ssDNA viral families (MSL39.v4), focusing on replication-associated (Rep) and capsid (Cap) proteins, or to utilize a custom reference database. IQ-TREE2 represents the state-of-the-art in maximum likelihood phylogenetic inference and has been used extensively in viral phylogenomics ( Minh et al., 2020 ). Similar approaches have been used to investigate the evolutionary history of CRESS-DNA viruses, as well as to examine the diversity and evolution of single-stranded DNA viruses in avian hosts ( Kazlauskas et al., 2019 ; Chrzastek et al., 2021 , Olivo et al., 2024 ). Users can visualize phylogenetic trees through two dedicated modules: one for tree visualization and another for tanglegram plotting. These modules leverage specialized R packages including ggtree ( Xu et al., 2022 ), ape ( Paradis and Schliep, 2019 ), and dendextend ( Galili, 2015 ). Additionally, when the tanglegram module is activated, the Robinson-Foulds (RF) distance is automatically calculated to quantify the topological dissimilarity between two phylogenetic trees. This metric provides a standardized way to assess how similar or different two trees are, which is essential for comparing phylogenetic reconstructions and evaluating the impact of different analytical methods or datasets (Briand et al., 2020). 2.4 MOTIF Analysis Motif analysis is crucial for identifying functional elements in viral genomes, such as replication origins (ori), protein binding sites, and conserved protein domains. In ssDNA viruses, conserved motifs within replication-associated proteins are especially informative, as they often reflect evolutionary constraints and mechanistic roles in rolling-circle replication. For example, motifs such as the HUH endonuclease domain and the superfamily 3 (SF3) helicase domain are required for site-specific DNA cleavage and unwinding of the DNA strand, respectively. These functional domains are highly conserved across diverse ssDNA viruses, indicating strong purifying selection. As a result, their conservation across viral families can provide clues to the biochemical mechanisms of replication, robust markers for evolutionary comparisons, and taxonomic classification ( Kazlauskas et al., 2018 ; Krupovic et al., 2020 ; Rosario et al., 2018 , Rosario et al., 2012 ; Varsani et al., 2024 ). MEME and ScanProsite have been used for de novo motif discovery in viral genomics ( Bailey et al., 2015 ; de Castro et al., 2006 ; Gattiker et al., 2002 ). For example, Tools such as MEME have been employed to explore conserved motifs in CRESS-DNA viruses, offering insights into their evolutionary relationships and classification. ( Requião et al., 2020 ; Wang et al., 2013 ). Similarly, motif identification platforms like ScanProsite have been applied to viral metagenomic sequences from avian hosts to detect functionally relevant sequence patterns ( Vibin et al., 2020 ). The pattern-based searching functionality in our tool is particularly useful for identifying known functional motifs, such as the Walker A and Walker B motifs in Rep proteins, which are indicators of rolling-circle replication mechanisms common in ssDNA viruses ( Kazlauskas et al., 2018 ; Varsani et al., 2024 ; Zhao et al., 2019 ). The MOTIF module ( Fig. 1 ) enables both pattern-based motif searching and de novo motif discovery in ssDNA viral sequences. For pattern-based searching, the module uses regex and seqkit ( Shen et al., 2016 ) to identify user-defined patterns and optionally split sequences at motif occurrences. For de novo motif discovery, the module integrates MEME for motif identification and optionally ScanProsite for scanning against known protein motifs. The module produces several outputs, including motif positions, sequence logos of reference and user input motifs, and genome maps to visualize the distribution of identified motifs ( Hackl et al., 2024 ; Wagih, 2017 ). 2.5 Putative Stem Loop and Iterons Annotation This module identifies and annotates critical noncoding secondary structures in ssDNA viral genomes: stem-loops and iterons ( Fig. 1 ). These structures typically form the origin of replication and serve as recognition sites for viral Rep proteins ( Dai et al., 2024 ; de la Higuera et al., 2020 ; D’Souza and Kool, 1992 ). Stem-loop structures and iterons are essential for viral replication in many ssDNA viruses. The stem-loop structure in Faba bean necrotic yellows virus (FBNYV) functions as the origin of replication, with a conserved nonanucleotide sequence within the loop being essential for initiating rolling circle replication ( Timchenko et al., 1999 ). Additionally, a set of iterative sequences (iterons) serves as specific binding sites for Rep proteins, and their spatial arrangement plays a crucial role in determining replication efficiency in Geminiviridae. These structures represent critical functional elements that are conserved across diverse ssDNA viral families despite high sequence variability in other genomic regions ( Bonnamy et al., 2023 ). CRESSENT includes two modules for these analyses: StemLoop-Finder for identifying DNA hairpin structures and CRUISE (CRiteria-based Uncovering of Iteron SEquences) for detecting iteron repeats that serve as recognition sites for replication proteins ( Jones et al., 2022 ; Pratt et al., 2021 ). StemLoop-Finder uses the ViennaRNA library for predicting secondary structures and scores potential stem-loops based on deviation from ideal stem and loop lengths. CRUISE identifies iteron sequences, which are typically found near the origin of replication in many ssDNA viruses. 2.6 Recombination Detection Recombination is a major driver of genetic diversity and evolution in ssDNA viruses ( Lefeuvre et al., 2009 ; Martin et al., 2011 ). The accurate detection of recombination events is therefore crucial for understanding viral evolution, taxonomy, and epidemiology. This module identifies recombination in nucleotide sequences using a suite of methods integrated within the OpenRDP (Recombination Detection Program) framework. The analysis incorporates multiple statistical and phylogenetic approaches to improve detection sensitivity and reliability ( Martin et al., 2005 ). The RDP method examines sequence triplets for recombination signals using a recursive segmentation algorithm, while 3Seq detects recombination based on phylogenetic incongruence among three sequences. GENECONV identifies gene conversion events by scanning for unusually long identical fragments shared between sequences. MaxChi and Chimaera both apply chi-square tests to detect breakpoints by comparing observed and expected substitutions across aligned sequences, with Chimaera using a more refined partitioning strategy. Bootscan assesses phylogenetic relationships along the sequence alignment by generating bootstrapped trees for sliding windows, highlighting regions of differing ancestry. Siscan extends this approach by calculating similarity scores across windows to detect potential recombination breakpoints. The methods implemented in this tool have been widely used in studies of viral recombination, including applications to circoviruses and parvoviruses, demonstrating their effectiveness across diverse ssDNA virus groups ( Varsani et al., 2009 ; Fu et al., 2011 ). The module allows users to run specific methods or all methods together and provides options for customization through a configuration file ( Fig. 1 ). The output includes detailed information about detected recombination events, including breakpoints, parental sequences, and statistical support. 2.7 Visualization and Outputs Visualization components are integrated throughout the pipeline, including tree visualization with ggtree, and sequence logo generation for motif representation. The visualization tools make use of established R packages such as ggtree, ggtreeextra, and ggseqlogo, providing publication-quality figures for downstream use. The output files from each module are designed to be compatible with the input requirements of subsequent modules, facilitating seamless integration. Also, they can be used as inputs for other tools not included in CRESSENT. For instance, the tree and metadata files produced in the phylogenetic analysis module can be imported into online visualization tools such as iTOL ( Letunic and Bork, 2024 ), allowing for more tailored and focused analyses. Similarly, motif-related output files can be used with tools like WeLogo ( Crooks et al., 2004 ) or Seq2Logo ( Thomsen and Nielsen, 2012 ), enabling further customization and in-depth motif analysis. Additionally, intermediate files are preserved by default, allowing for inspection and troubleshooting at each stage of the analysis. Solutions for pairwise sequence comparison and visualization already exist (e.g., SDT (PMID: 25259891) and EFI-EST (PMID: 37356897)) and were not the focus of CRESSENT. 3. RESULTS 3.1 Rep and Cap Proteins Database After acquiring ssDNA viral sequences from the ICTV database, we aimed to evaluate their similarity by constructing sequence similarity networks (SSNs) for both Rep and Cap proteins. For this, sequences from each dataset were concatenated and submitted to the EFI-EST ( Oberg et al., 2023 ) platform for SSN generation, and the resulting networks were visualized using Cytoscape ( Shannon et al., 2003 ). To construct SSNs specifically for Rep domains, sequences were first grouped by viral family and aligned using the align module from Cressent. Sequence logos were then generated via the WebLogo ( Crooks et al., 2004 ) tool to pinpoint the position of the Walker A motif within each family. Based on the identified motif positions, regular expressions (regex) were created using a custom script. These regex patterns were subsequently used with the motif module to split each family’s sequences into functional domains (HUH and S3F domains). The extracted domain sequences were then concatenated and analyzed in EFI-EST to produce domain-specific SSNs, which were again visualized in Cytoscape. Networks at a 95% similarity threshold were examined, and associated metadata were used to annotate and color nodes by viral family, facilitating comparative analysis across groups ( Fig. 2A-D ). Download figure Open in new tab Fig 2: Protein sequence similarity network (SSN) of Cap and Rep (and domains) proteins. The SSN was generated using EFI-EST. Sequences sharing 95% identity are conflated as a single node and visualized by Cytoscape. Nodes are colored by family. Cap protein sequences ( Fig. 2A ) exhibited more cohesive clustering overall compared to Rep proteins ( Fig. 2B ). However, when analyses focused on individual domains (HUH and S3F), Rep proteins formed clearer, more distinct clusters ( Fig. 2C and 2D ). Therefore, users should exercise caution when using full-length Rep sequences from diverse ssDNA virus families in a single analysis. We recommend conducting domain□level analysis and treating each Rep domain separately for accurate comparative studies. 3.2 CRESSENT can produce reproducible results Sequences from two independent studies were employed to demonstrate the utility of CRESSENT in facilitating the annotation of Rep and Caps genes in two putative viral families, Naryaviridae ( Zhang et al., 2025 ) and Genomoviridae ( Leal Rodríguez et al., 2024 ). These viral sequences were identified and annotated using Cenote-Taker3 after undergoing quality trimming, assembly, and clustering. Figures 3 and 4 summarize the possible outputs produced by CRESSENT. Download figure Open in new tab Fig 3: Visualizations produced by CRESSENT using Naryaviridae sequences. (A) Phylogenetic trees of Cap proteins (red = study samples; blue = custom DB sequences). (B) Gene visualization of the motifs found with the de novo motif module. (C) Tanglegram and phylogenetic tree of the Rep proteins. (D) Sequence logos of the HUH domain, the S3F domain, and the Walker A motif of Rep proteins. Download figure Open in new tab Fig 4: Unrooted (B) and circular (A) phylogenetic trees of the Genomoviridae Cap genes (blue = study samples; red = custom DB sequences). 4. CONCLUSION The integration of multiple analysis methods provides researchers with a powerful toolkit for characterizing these important viral groups. The emphasis on data cleaning, visualization, and flexible workflow design enhances the utility of CRESSENT for diverse research applications. We anticipate that this tool will contribute to a more standardized and reproducible approach to ssDNA virus analysis, facilitating comparative studies and advancing our understanding of viral diversity and evolution. Importantly, the modular architecture of this tool was specifically designed to facilitate continuous improvement and expansion. This extensibility ensures that the tool can evolve alongside the rapidly advancing field of viral metagenomics, providing a sustainable platform for ssDNA virus analysis for years to come. Footnotes https://github.com/ricrocha82/cressent 5. REFERENCES ↵ Altschul , S.F. , Gish , W. , Miller , W. , Myers , E.W. , Lipman , D.J. , 1990 . Basic local alignment search tool . J. Mol. Biol . 215 , 403 – 410 . doi: 10.1016/S0022-2836(05)80360-2 OpenUrl CrossRef PubMed Web of Science ↵ Asplund , M. , Kjartansdóttir , K.R. , Mollerup , S. , Vinner , L. , Fridholm , H. , Herrera , J.A.R. , Friis-Nielsen , J. , Hansen , T.A. , Jensen , R.H. , Nielsen , I.B. , Richter , S.R. , Rey-Iglesia , A. , Matey-Hernandez , M.L. , Alquezar-Planas , D.E. , Olsen , P.V.S. , Sicheritz-Pontén , T. , Willerslev , E. , Lund , O. , Brunak , S. , Mourier , T. , Nielsen , L.P. , Izarzugaza , J.M.G. , Hansen , A.J. , 2019 . Contaminating viral sequences in high-throughput sequencing viromics: a linkage study of 700 sequencing libraries . Clin. Microbiol. Infect . 25 , 1277 – 1285 . doi: 10.1016/j.cmi.2019.04.028 OpenUrl CrossRef ↵ Bailey , T.L. , Johnson , J. , Grant , C.E. , Noble , W.S. , 2015 . The MEME Suite . Nucleic Acids Res . 43 , W39 – W49 . doi: 10.1093/nar/gkv416 OpenUrl CrossRef PubMed ↵ Bezuidt , O.K.I. , Makhalanyane , T.P. , 2024 . Phylogenomic analysis expands the known repertoire of single-stranded DNA viruses in benthic zones of the South Indian Ocean . ISME Commun. ycae065 . doi: 10.1093/ismeco/ycae065 OpenUrl CrossRef ↵ Bonnamy , M. , Blanc , S. , Michalakis , Y. , 2023 . Replication mechanisms of circular ssDNA plant viruses and their potential implication in viral gene expression regulation . mBio 14 , e01692 – 23 . doi: 10.1128/mbio.01692-23 OpenUrl CrossRef PubMed ↵ Chrzastek , K. , Kraberger , S. , Schmidlin , K. , Fontenele , R.S. , Kulkarni , A. , Chappell , L. , Dufour-Zavala , L. , Kapczynski , D.R. , Varsani , A. , 2021 . Diverse Single-Stranded DNA Viruses Identified in Chicken Buccal Swabs . Microorganisms 9 , 2602 . doi: 10.3390/microorganisms9122602 OpenUrl CrossRef PubMed ↵ Creasy , A. , Rosario , K. , Leigh , B.A. , Dishaw , L.J. , Breitbart , M. , 2018 . Unprecedented Diversity of ssDNA Phages from the Family Microviridae Detected within the Gut of a Protochordate Model Organism (Ciona robusta) . Viruses 10 , 404 . doi: 10.3390/v10080404 OpenUrl CrossRef PubMed ↵ Crooks , G.E. , Hon , G. , Chandonia , J.-M. , Brenner , S.E. , 2004 . WebLogo: a sequence logo generator . Genome Res . 14 , 1188 – 1190 . doi: 10.1101/gr.849004 OpenUrl Abstract / FREE Full Text ↵ Dai , Z. , Wang , H. , Xu , J. , Lu , X. , Ni , P. , Yang , S. , Shen , Q. , Wang , Xiaochun , Li , W. , Wang , Xiaolong , Zhou , C. , Zhang , W. , Shan , T. , 2024 . Unveiling the Virome of Wild Birds: Exploring CRESS-DNA Viral Dark Matter . Genome Biol. Evol . 16 , evae206 . doi: 10.1093/gbe/evae206 OpenUrl CrossRef PubMed ↵ de Castro , E. , Sigrist , C.J.A. , Gattiker , A. , Bulliard , V. , Langendijk-Genevaux , P.S. , Gasteiger , E. , Bairoch , A. , Hulo , N. , 2006 . ScanProsite: detection of PROSITE signature matches and ProRule-associated functional and structural residues in proteins . Nucleic Acids Res . 34 , W362 – W365 . doi: 10.1093/nar/gkl124 OpenUrl CrossRef PubMed Web of Science ↵ de la Higuera , I. , Kasun , G.W. , Torrance , E.L. , Pratt , A.A. , Maluenda , A. , Colombet , J. , Bisseux , M. , Ravet , V. , Dayaram , A. , Stainton , D. , Kraberger , S. , Zawar-Reza , P. , Goldstien , S. , Briskie , J.V. , White , R. , Taylor , H. , Gomez , C. , Ainley , D.G. , Harding , J.S. , Fontenele , R.S. , Schreck , J. , Ribeiro , S.G. , Oswald , S.A. , Arnold , J.M. , Enault , F. , Varsani , A. , Stedman , K.M. , 2020 . Unveiling Crucivirus Diversity by Mining Metagenomic Data . mBio 11 , 10.1128/mbio.01410-20. doi: 10.1128/mbio.01410-20 OpenUrl CrossRef ↵ Delwart , E. , Li , L. , 2012 . Rapidly expanding genetic diversity and host range of the Circoviridae viral family and other Rep encoding small circular ssDNA genomes . Virus Res . 164 , 114 – 121 . doi: 10.1016/j.virusres.2011.11.021 OpenUrl CrossRef PubMed ↵ Desingu , P.A. , Nagarajan , K. , 2022 . Genetic Diversity and Characterization of Circular Replication (Rep)-Encoding Single-Stranded (CRESS) DNA Viruses . Microbiol. Spectr . doi: 10.1128/spectrum.01057-22 OpenUrl CrossRef ↵ D’Souza , D.J. , Kool , E.T. , 1992 . Strong binding of single-stranded DNA by stem-loop oligonucleotides . J. Biomol. Struct. Dyn . 10 , 141 – 152 . doi: 10.1080/07391102.1992.10508634 OpenUrl CrossRef PubMed Web of Science ↵ Duan , J. , Keeler , E. , McFarland , A. , Scott , P. , Collman , R.G. , Bushman , F.D. , 2024 . The virome of the kitome: small circular virus-like genomes in laboratory reagents . Microbiol. Resour. Announc . 13 , e01261 – 23 . doi: 10.1128/mra.01261-23 OpenUrl CrossRef PubMed ↵ Fu , X. , Wang , X. , Ni , B. , Shen , H. , Wang , H. , Zhang , X. , Chen , S. , Shao , S. , Zhang , W. , 2011 . Recombination analysis based on the complete genome of bocavirus . Virol. J . 8 , 182 . doi: 10.1186/1743-422X-8-182 OpenUrl CrossRef PubMed ↵ Galili , T. , 2015 . dendextend: an R package for visualizing, adjusting and comparing trees of hierarchical clustering . Bioinformatics 31 , 3718 – 3720 . doi: 10.1093/bioinformatics/btv428 OpenUrl CrossRef PubMed ↵ Gattiker , A. , Gasteiger , E. , Bairoch , A. , 2002 . ScanProsite: a reference implementation of a PROSITE scanning tool . Appl. Bioinformatics 1 , 107 – 108 . OpenUrl CrossRef PubMed ↵ Golyaev , V. , Dierickx , S. , Deforche , K. , Dumon , W. , Vanderschuren , H. , 2025 . A method for indepth analysis of circular DNA virus populations by unambiguously profiling the low abundant virus variants and partial genomic components . Nucleic Acids Res . 53 , gkaf221 . doi: 10.1093/nar/gkaf221 OpenUrl CrossRef PubMed ↵ Hackl , T. , Ankenbrand , M. , Adrichem, B. van , Wilkins , D. , Haslinger , K. , 2024 . gggenomes: effective and versatile visualizations for comparative genomics . doi: 10.48550/arXiv.2411.13556 OpenUrl CrossRef ↵ Jones , A. , Kasun , G.W. , Stover , J. , Stedman , K.M. , de la Higuera , I. , 2022 . CRUISE, a Tool for the Detection of Iterons in Circular Rep-Encoding Single-Stranded DNA Viruses . Microbiol. Resour. Announc . 12 , e01123 – 22 . doi: 10.1128/mra.01123-22 OpenUrl CrossRef PubMed ↵ Katoh , K. , Standley , D.M. , 2013 . MAFFT Multiple Sequence Alignment Software Version 7: Improvements in Performance and Usability . Mol. Biol. Evol . 30 , 772 – 780 . doi: 10.1093/molbev/mst010 OpenUrl CrossRef PubMed Web of Science ↵ Kazlauskas , D. , Dayaram , A. , Kraberger , S. , Goldstien , S. , Varsani , A. , Krupovic , M. , 2017 . Evolutionary history of ssDNA bacilladnaviruses features horizontal acquisition of the capsid gene from ssRNA nodaviruses . Virology 504 , 114 – 121 . doi: 10.1016/j.virol.2017.02.001 OpenUrl CrossRef PubMed ↵ Kazlauskas , D. , Varsani , A. , Koonin , E.V. , Krupovic , M. , 2019 . Multiple origins of prokaryotic and eukaryotic single-stranded DNA viruses from bacterial and archaeal plasmids . Nat. Commun . 10 , 3425 . doi: 10.1038/s41467-019-11433-0 OpenUrl CrossRef PubMed ↵ Kazlauskas , D. , Varsani , A. , Krupovic , M. , 2018 . Pervasive Chimerism in the Replication-Associated Proteins of Uncultured Single-Stranded DNA Viruses . Viruses 10 , 187 . doi: 10.3390/v10040187 OpenUrl CrossRef PubMed ↵ Keeler , E.L. , Taylor , L.J. , Abbas , A. , Collman , R.G. , Bushman , F.D. , 2021 . Rengasvirus, a Circular Replication-Associated Protein-Encoding Single-Stranded DNA Virus-Related Genome That Is a Common Contaminant in Metagenomic Data . Microbiol. Resour. Announc . 10 , 10 .1128/mra.00273-21. doi: 10.1128/mra.00273-21 OpenUrl CrossRef ↵ Kim , M.-S. , Park , E.-J. , Roh , S.W. , Bae , J.-W. , 2011 . Diversity and Abundance of Single-Stranded DNA Viruses in Human Feces . Appl. Environ. Microbiol . 77 , 8062 – 8070 . doi: 10.1128/AEM.06331-11 OpenUrl Abstract / FREE Full Text ↵ Krupovic , M. , Varsani , A. , Kazlauskas , D. , Breitbart , M. , Delwart , E. , Rosario , K. , Yutin , N. , Wolf , Y.I. , Harrach , B. , Zerbini , F.M. , Dolja , V.V. , Kuhn , J.H. , Koonin , E.V. , 2020 . Cressdnaviricota: a Virus Phylum Unifying Seven Families of Rep-Encoding Viruses with Single-Stranded, Circular DNA Genomes . J. Virol . 94 , 10.1128/jvi.00582-20. doi: 10.1128/jvi.00582-20 OpenUrl CrossRef ↵ Krupovic , M. , Zhi , N. , Li , J. , Hu , G. , Koonin , E.V. , Wong , S. , Shevchenko , S. , Zhao , K. , Young , N.S. , 2015 . Multiple Layers of Chimerism in a Single-Stranded DNA Virus Discovered by Deep Sequencing . Genome Biol. Evol . 7 , 993 – 1001 . doi: 10.1093/gbe/evv034 OpenUrl CrossRef PubMed ↵ Leal Rodríguez , C. , Shah , S.A. , Rasmussen , M.A. , Thorsen , J. , Boulund , U. , Pedersen , C.-E.T. , Castro-Mejía , J.L. , Poulsen , C.E. , Poulsen , C.S. , Deng , L. , Larsen , F.A.N. , Widdowson , M. , Zhang , Y. , Sørensen , S.J. , Moineau , S. , Petit , M.-A. , Chawes , B. , Bønnelykke , K. , Nielsen , D.S. , Stokholm , J. , 2024 . The infant gut virome is associated with preschool asthma risk independently of bacteria . Nat. Med . 30 , 138 – 148 . doi: 10.1038/s41591-023-02685-x OpenUrl CrossRef PubMed ↵ Lefeuvre , P. , Lett , J.-M. , Varsani , A. , Martin , D.P. , 2009 . Widely conserved recombination patterns among single-stranded DNA viruses . J. Virol . 83 , 2697 – 2707 . doi: 10.1128/JVI.02152-08 OpenUrl Abstract / FREE Full Text ↵ Letunic , I. , Bork , P. , 2024 . Interactive Tree of Life (iTOL) v6: recent updates to the phylogenetic tree display and annotation tool . Nucleic Acids Res . 52 , W78 – W82 . doi: 10.1093/nar/gkae268 OpenUrl CrossRef PubMed ↵ Martin , D.P. , Biagini , P. , Lefeuvre , P. , Golden , M. , Roumagnac , P. , Varsani , A. , 2011 . Recombination in Eukaryotic Single Stranded DNA Viruses . Viruses 3 , 1699 – 1738 . doi: 10.3390/v3091699 OpenUrl CrossRef PubMed Web of Science ↵ Martin , D.P. , Williamson , C. , Posada , D. , 2005 . RDP2: recombination detection and analysis from sequence alignments . Bioinformatics 21 , 260 – 262 . doi: 10.1093/bioinformatics/bth490 OpenUrl CrossRef PubMed Web of Science ↵ Minh , B.Q. , Schmidt , H.A. , Chernomor , O. , Schrempf , D. , Woodhams , M.D. , von Haeseler , A. , Lanfear , R. , 2020 . IQ-TREE 2: New Models and Efficient Methods for Phylogenetic Inference in the Genomic Era . Mol. Biol. Evol . 37 , 1530 – 1534 . doi: 10.1093/molbev/msaa015 OpenUrl CrossRef PubMed ↵ Naccache , S.N. , Greninger , A.L. , Lee , D. , Coffey , L.L. , Phan , T. , Rein-Weston , A. , Aronsohn , A. , Hackett , J. , Delwart , E.L. , Chiu , C.Y. , 2013 . The Perils of Pathogen Discovery: Origin of a Novel Parvovirus-Like Hybrid Genome Traced to Nucleic Acid Extraction Spin Columns . J. Virol . 87 , 11966 – 11977 . doi: 10.1128/jvi.02323-13 OpenUrl Abstract / FREE Full Text ↵ Nayfach , S. , Camargo , A.P. , Schulz , F. , Eloe-Fadrosh , E. , Roux , S. , Kyrpides , N.C. , 2021 . CheckV assesses the quality and completeness of metagenome-assembled viral genomes . Nat. Biotechnol . 39 , 578 – 585 . doi: 10.1038/s41587-020-00774-7 OpenUrl CrossRef PubMed ↵ Oberg , N. , Zallot , R. , Gerlt , J.A. , 2023 . EFI-EST, EFI-GNT, and EFI-CGFP: Enzyme Function Initiative (EFI) Web Resource for Genomic Enzymology Tools . J. Mol. Biol., Computation Resources for Molecular Biology 435 , 168018 . doi: 10.1016/j.jmb.2023.168018 OpenUrl CrossRef PubMed ↵ Olivo , D. , Khalifeh , A. , Custer , J.M. , Kraberger , S. , Varsani , A. , 2024 . Diverse Small Circular DNA Viruses Identified in an American Wigeon Fecal Sample . Microorganisms 12 , 196 . doi: 10.3390/microorganisms12010196 OpenUrl CrossRef PubMed ↵ Olomu , I.N. , Pena-Cortes , L.C. , Long , R.A. , Vyas , A. , Krichevskiy , O. , Luellwitz , R. , Singh , P. , Mulks , M.H. , 2020 . Elimination of “kitome” and “splashome” contamination results in lack of detection of a unique placental microbiome . BMC Microbiol . 20 , 157 . doi: 10.1186/s12866-020-01839-y OpenUrl CrossRef PubMed ↵ Paradis , E. , Schliep , K. , 2019 . ape 5.0: an environment for modern phylogenetics and evolutionary analyses in R . Bioinformatics 35 , 526 – 528 . doi: 10.1093/bioinformatics/bty633 OpenUrl CrossRef PubMed ↵ Porter , A.F. , Cobbin , J. , Li , C.-X. , Eden , J.-S. , Holmes , E.C. , 2021 . Metagenomic Identification of Viral Sequences in Laboratory Reagents . Viruses 13 , 2122 . doi: 10.3390/v13112122 OpenUrl CrossRef PubMed ↵ Pratt , A.A. , Torrance , E.L. , Kasun , G.W. , Stedman , K.M. , de la Higuera , I. , 2021 . StemLoop-Finder: a Tool for the Detection of DNA Hairpins with Conserved Motifs . Microbiol. Resour. Announc . 10 , 10 .1128/mra.00424-21. doi: 10.1128/mra.00424-21 OpenUrl CrossRef ↵ Qazi , J. , 2016 . Banana bunchy top virus and the bunchy top disease . J. Gen. Plant Pathol . 82 , 2 – 11 . doi: 10.1007/s10327-015-0642-7 OpenUrl CrossRef ↵ Requião , R.D. , Carneiro , R.L. , Moreira , M.H. , Ribeiro-Alves , M. , Rossetto , S. , Palhano , F.L. , Domitrovic , T. , 2020 . Viruses with different genome types adopt a similar strategy to pack nucleic acids based on positively charged protein domains . Sci. Rep . 10 , 5470 . doi: 10.1038/s41598-020-62328-w OpenUrl CrossRef PubMed ↵ Rosario , K. , Duffy , S. , Breitbart , M. , 2012 . A field guide to eukaryotic circular single-stranded DNA viruses: insights gained from metagenomics . Arch. Virol . 157 , 1851 – 1871 . doi: 10.1007/s00705-012-1391-y OpenUrl CrossRef PubMed ↵ Rosario , K. , Mettel , K.A. , Benner , B.E. , Johnson , R. , Scott , C. , Yusseff-Vanegas , S.Z. , Baker , C.C.M. , Cassill , D.L. , Storer , C. , Varsani , A. , Breitbart , M. , 2018 . Virus discovery in all three major lineages of terrestrial arthropods highlights the diversity of single-stranded DNA viruses associated with invertebrates . PeerJ 6 , e5761 . doi: 10.7717/peerj.5761 OpenUrl CrossRef PubMed ↵ Rosario , K. , Schenck , R.O. , Harbeitner , R.C. , Lawler , S.N. , Breitbart , M. , 2015 . Novel circular single-stranded DNA viruses identified in marine invertebrates reveal high sequence diversity and consistent predicted intrinsic disorder patterns within putative structural proteins . Front. Microbiol . 6 . doi: 10.3389/fmicb.2015.00696 OpenUrl CrossRef PubMed ↵ Roux , S. , Adriaenssens , E.M. , Dutilh , B.E. , Koonin , E.V. , Kropinski , A.M. , Krupovic , M. , Kuhn , J.H. , Lavigne , R. , Brister , J.R. , Varsani , A. , Amid , C. , Aziz , R.K. , Bordenstein , S.R. , Bork , P. , Breitbart , M. , Cochrane , G.R. , Daly , R.A. , Desnues , C. , Duhaime , M.B. , Emerson , J.B. , Enault , F. , Fuhrman , J.A. , Hingamp , P. , Hugenholtz , P. , Hurwitz , B.L. , Ivanova , N.N. , Labonté , J.M. , Lee , K.-B. , Malmstrom , R.R. , Martinez-Garcia , M. , Mizrachi , I.K. , Ogata , H. , Páez-Espino , D. , Petit , M.-A. , Putonti , C. , Rattei , T. , Reyes , A. , Rodriguez-Valera , F. , Rosario , K. , Schriml , L. , Schulz , F. , Steward , G.F. , Sullivan , M.B. , Sunagawa , S. , Suttle , C.A. , Temperton , B. , Tringe , S.G. , Thurber , R.V. , Webster , N.S. , Whiteson , K.L. , Wilhelm , S.W. , Wommack , K.E. , Woyke , T. , Wrighton , K.C. , Yilmaz , P. , Yoshida , T. , Young , M.J. , Yutin , N. , Allen , L.Z. , Kyrpides , N.C. , Eloe-Fadrosh , E.A. , 2019 . Minimum Information about an Uncultivated Virus Genome (MIUViG) . Nat. Biotechnol . 37 , 29 – 37 . doi: 10.1038/nbt.4306 OpenUrl CrossRef PubMed ↵ Shannon , P. , Markiel , A. , Ozier , O. , Baliga , N.S. , Wang , J.T. , Ramage , D. , Amin , N. , Schwikowski , B. , Ideker , T. , 2003 . Cytoscape: A Software Environment for Integrated Models of Biomolecular Interaction Networks . Genome Res . 13 , 2498 – 2504 . doi: 10.1101/gr.1239303 OpenUrl Abstract / FREE Full Text ↵ Shen , W. , Le , S. , Li , Y. , Hu , F. , 2016 . SeqKit: A Cross-Platform and Ultrafast Toolkit for FASTA/Q File Manipulation . PLOS ONE 11 , e0163962 . doi: 10.1371/journal.pone.0163962 OpenUrl CrossRef PubMed ↵ Thomsen , M.C.F. , Nielsen , M. , 2012 . Seq2Logo: a method for construction and visualization of amino acid binding motifs and sequence profiles including sequence weighting, pseudo counts and two-sided representation of amino acid enrichment and depletion . Nucleic Acids Res . 40 , W281 – W287 . doi: 10.1093/nar/gks469 OpenUrl CrossRef PubMed Web of Science ↵ Timchenko , T. , de Kouchkovsky , F. , Katul , L. , David , C. , Vetten , H.J. , Gronenborn , B. , 1999 . A Single Rep Protein Initiates Replication of Multiple Genome Components of Faba Bean Necrotic Yellows Virus, a Single-Stranded DNA Virus of Plants . J. Virol . 73 , 10173 – 10182 . doi: 10.1128/jvi.73.12.10173-10182.1999 OpenUrl Abstract / FREE Full Text ↵ Tisza , M.J. , Belford , A.K. , Domínguez-Huerta , G. , Bolduc , B. , Buck , C.B. , 2021 . Cenote-Taker 2 democratizes virus discovery and sequence annotation . Virus Evol . 7 , veaa100 . doi: 10.1093/ve/veaa100 OpenUrl CrossRef PubMed ↵ Tisza , M.J. , Pastrana , D.V. , Welch , N.L. , Stewart , B. , Peretti , A. , Starrett , G.J. , Pang , Y.-Y.S. , Krishnamurthy , S.R. , Pesavento , P.A. , McDermott , D.H. , Murphy , P.M. , Whited , J.L. , Miller , B. , Brenchley , J. , Rosshart , S.P. , Rehermann , B. , Doorbar , J. , Ta’ala , B.A. , Pletnikova , O. , Troncoso , J.C. , Resnick , S.M. , Bolduc , B. , Sullivan , M.B. , Varsani , A. , Segall , A.M. , Buck , C.B. , 2020 . Discovery of several thousand highly diverse circular DNA viruses . eLife 9 , e51971 . doi: 10.7554/eLife.51971 OpenUrl CrossRef PubMed ↵ Torralba , B. , Blanc , S. , Michalakis , Y. , 2024 . Reassortments in single-stranded DNA multipartite viruses: Confronting expectations based on molecular constraints with field observations . Virus Evol . 10 , veae010 . doi: 10.1093/ve/veae010 OpenUrl CrossRef PubMed ↵ Trubl , G. , Roux , S. , Solonenko , N. , Li , Y.-F. , Bolduc , B. , Rodríguez-Ramos , J. , Eloe-Fadrosh , E.A. , Rich , V.I. , Sullivan , M.B. , 2019 . Towards optimized viral metagenomes for double-stranded and single-stranded DNA viruses from challenging soils . PeerJ 7 , e7265 . doi: 10.7717/peerj.7265 OpenUrl CrossRef PubMed ↵ Varsani , A. , Harrach , B. , Roumagnac , P. , Benkő , M. , Breitbart , M. , Delwart , E. , Franzo , G. , Kazlauskas , D. , Rosario , K. , Segalés , J. , Dunay , E. , Rukundo , J. , Goldberg , T.L. , Fehér , E. , Kaszab , E. , Bányai , K. , Krupovic , M. , 2024 . 2024 taxonomy update for the family Circoviridae . Arch. Virol . 169 , 176 . doi: 10.1007/s00705-024-06107-2 OpenUrl CrossRef PubMed ↵ Varsani , A. , Shepherd , D.N. , Dent , K. , Monjane , A.L. , Rybicki , E.P. , Martin , D.P. , 2009 . A highly divergent South African geminivirus species illuminates the ancient evolutionary history of this family . Virol. J . 6 , 36 . doi: 10.1186/1743-422X-6-36 OpenUrl CrossRef PubMed ↵ Vibin , J. , Chamings , A. , Klaassen , M. , Alexandersen , S. , 2020 . Metagenomic characterisation of additional and novel avian viruses from Australian wild ducks . Sci. Rep . 10 , 22284 . doi: 10.1038/s41598-020-79413-9 OpenUrl CrossRef PubMed ↵ Wagih , O. , 2017 . ggseqlogo: a versatile R package for drawing sequence logos . Bioinformatics 33 , 3645 – 3647 . doi: 10.1093/bioinformatics/btx469 OpenUrl CrossRef PubMed ↵ Wang , H.-I. , Chang , C.-H. , Lin , P.-H. , Fu , H.-C. , Tang , C. , Yeh , H.-H. , 2013 . Application of Motif-Based Tools on Evolutionary Analysis of Multipartite Single-Stranded DNA Viruses . PLOS ONE 8 , e71565 . doi: 10.1371/journal.pone.0071565 OpenUrl CrossRef PubMed ↵ Xu , S. , Li , L. , Luo , X. , Chen , M. , Tang , W. , Zhan , L. , Dai , Z. , Lam , T.T. , Guan , Y. , Yu , G. , 2022 . Ggtree: A serialized data object for visualization of a phylogenetic tree and annotation data . doi: 10.1002/imt2.56 OpenUrl CrossRef PubMed ↵ Zhang , H. , Fu , Y. , Cao , C. , Jiang , H. , Tang , R. , Dai , Z. , Zhang , W. , 2025 . Identification and Characterization of Novel CRESS-DNA viruses in the Human Respiratory Tract . doi: 10.21203/rs.3.rs-6208723/v1 OpenUrl CrossRef ↵ Zhao , L. , Rosario , K. , Breitbart , M. , Duffy , S. , 2019 . Eukaryotic Circular Rep-Encoding Single-Stranded DNA (CRESS DNA) Viruses: Ubiquitous Viruses With Small Genomes and a Diverse Host Range . Adv. Virus Res . 103 , 71 – 133 . doi: 10.1016/bs.aivir.2018.10.001 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted July 18, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following CRESSENT: a Bioinformatic Toolkit to Explore and Improve ssDNA Virus Annotation Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share CRESSENT: a Bioinformatic Toolkit to Explore and Improve ssDNA Virus Annotation R.R. Pavan , M.B. Sullivan , M.J. Tisza bioRxiv 2025.07.14.664782; doi: https://doi.org/10.1101/2025.07.14.664782 Share This Article: Copy Citation Tools CRESSENT: a Bioinformatic Toolkit to Explore and Improve ssDNA Virus Annotation R.R. Pavan , M.B. Sullivan , M.J. Tisza bioRxiv 2025.07.14.664782; doi: https://doi.org/10.1101/2025.07.14.664782 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41909) Biophysics (21436) Cancer Biology (18576) Cell Biology (25479) Clinical Trials (138) Developmental Biology (13367) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15598) Genomics (22482) Immunology (17726) Microbiology (40359) Molecular Biology (17162) Neuroscience (88532) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7636) Plant Biology (15129) Scientific Communication and Education (2044) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00