Full text
38,992 characters
· extracted from
preprint-html
· click to expand
Phylo-rs: an extensible phylogenetic analysis library in Rust | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Phylo-rs: an extensible phylogenetic analysis library in Rust View ORCID Profile Sriram Vijendran , View ORCID Profile Tavis K. Anderson , View ORCID Profile Alexey Markin , Oliver Eulenstein doi: https://doi.org/10.1101/2025.03.10.642340 Sriram Vijendran 1 Department of Computer Science, Iowa State University , Ames, Iowa, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sriram Vijendran For correspondence: sriramv{at}iastate.edu oeulenst{at}iastate.edu Tavis K. Anderson 2 National Animal Disease Center, Agricultural Research Service, United States Department of Agriculture , Ames, Iowa, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tavis K. Anderson Alexey Markin 2 National Animal Disease Center, Agricultural Research Service, United States Department of Agriculture , Ames, Iowa, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Alexey Markin Oliver Eulenstein 1 Department of Computer Science, Iowa State University , Ames, Iowa, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: sriramv{at}iastate.edu oeulenst{at}iastate.edu Abstract Full Text Info/History Metrics Preview PDF Abstract We introduce Phylo-rs: a fast, extensible, general-purpose library for phylogenetic analysis and inference written in the Rust programming language. Phylo-rs leverages a combination of speed, memory-safety, and native WebAssembly support offered by Rust to provide a robust set of memory-efficient data structures and elementary phylogenetic algorithms. Phylo-rs is focused on efficient and convenient deployment of software aimed at large-scale phylogenetic analysis and inference. Phylo-rs is available under an open-source license on GitHub at https://github.com/sriram98v/phylo-rs , with documentation available at https://docs.rs/phylo/latest/phylo/ . Introduction Phylogenetic trees, or phylogenies , are fundamental to evolutionary biology as they represent hypotheses about the relationships between different taxonomic groups, benefiting diverse disciplines from agronomy [ 1 ] and conservation biology [ 2 – 4 ] to medical sciences [ 5 ] and epidemiology [ 6 ]. Recent advances in next-generation and long-read sequencing technologies [ 7 , 8 ] have improved access to large-scale genomic data and phylogenies. The scale of these data and phylogenetic trees necessitates efficient and effective computational libraries that implement specialized algorithms to analyze phylogenies and uncover hidden statistics and relationships between taxonomic groups [ 9 – 11 ]. Current phylogenetic libraries have, at times, struggled to keep pace with the demands of large-scale phylogenetic analysis. Existing libraries often make trade-offs between runtime efficiency and developmental ease based on the chosen language. Software implemented in libraries like Dendropy [ 12 ], TreeSwift [ 13 ], phytools [ 14 ] and ape [ 15 ] offer simple and intuitive syntax at the cost of the efficiency, low-level control, and functionality necessary for large-scale phylogenetic analysis. In contrast, implementations in libraries like Bio++ [ 16 ] and Gotree [ 17 ] offer memory and runtime efficiency but lack the memory-safety and security features of modern programming languages [ 18 , 19 ]. Rust is a modern programming language that leverages speed and memory-safety with high-level syntactical features. Rust is compiled with LLVM [ 20 ], providing optimal speed with a low memory footprint. Additionally, Rust supports automatic type inference at compile time, reducing the verbosity of written code. The key feature of Rust is the concept of ownership and borrowing of variables, which enables Rust to infer the lifetime of data stored in memory automatically. This eliminates the overhead of online memory management and completely eradicates common memory errors such as segmentation faults. Concomitantly, ownership enforces thread-safety, preventing race conditions in multi-threaded code. These features make Rust an attractive alternative for applications in Bioinformatics. We introduce Phylo-rs, a versatile phylogenetic library that provides an extensible foundation of data structures and algorithms for phylogenetic analysis and inference implemented in Rust [ 21 ]. Phylo-rs utilizes Rust’s modern programming language features, delivering high-performance software while ensuring memory-safety and maintainable code. Additionally, Phylo-rs provides native WebAssembly (WASM) support, offering a highly portable and compact compilation target for software [ 22 ]. This enables access to software written using Phylo-rs on web browsers, eliminating system compatibility issues and narrowing the gap between cutting-edge research and practical application [ 23 ]. To our knowledge, Phylo-rs is the first comprehensive phylogenetic analysis library written in Rust. Design and Implementation At a high level, phylogenies in Phylo-rs are implemented as Rust ‘traits’ that describe their behavior and functionality while making no assumptions on how they are represented in memory. These traits allow using any data structure, also called structs , to represent phylogenies. Structs require the implementation of only a few basic methods to gain access to several iterators, operators, and functions. This includes tree traversals, simulations, distance metrics, edit operations, and file I/O. These traits can be inherited by other user-defined traits, enabling seamless extensions to existing methods and convenient implementation of new algorithms, as shown in Figure 1 . Download figure Open in new tab Figure 1. A trait dependency graph showing how behavior is shared between objects that build up to a phylogenetic tree. Meta tree nodes, stat tree nodes, and weighted tree nodes extend the behavior of a rooted tree node to manipulate the meta-annotation, stat-annotation, and weight-annotation of a node, respectively. Similarly, a rooted meta tree and a rooted stat tree extend the behaviors of a rooted tree, and finally, a phylogenetic tree extends the behavior of all the defined trees Phylo-rs eliminates redundant memory usage by yielding references instead of deep copies. Phylo-rs enforces memory-safety at compilation, which secures software from memory vulnerabilities. Memory-safety is ensured in Phylo-rs by assigning object lifetimes; tree components are retained in memory for as long as the tree itself, eliminating memory-related errors or vulnerabilities. Classical analyses of phylogenies require the pairwise comparison of trees using established metrics such as the Robinson-Foulds metric [ 24 ], cophenetic distances [ 25 , 26 ], and cluster affinity distance [ 27 ]. Phylo-rs offers functions that implement the most efficient known algorithm [ 28 ] to compute these distances. Many phylogenetic inference algorithms employ tree edit operations [ 29 – 32 ] in algorithms aimed at inferring the optimal phylogenetic history of a set of taxa. In line with that, Phylo-rs provides traits to perform tree edit operations such as Subtree Pruning and Regrafting [ 33 ], Tree Bisection and Reconnection [ 34 ], and Nearest Neighbor Interchange [ 35 ]. Phylo-rs supports the widely used Newick [ 36 ] encoding for phylogenies, including constructing and translating trees from live streams of ASCII data over web-based and multi-threaded ports. Phylo-rs implements a Newick trait that can be extended to cloud-based applications. The Newick trait can also be extended to support numerous file formats, such as the Nexus format, without making any metadata structure specifications. Phylo-rs is furnished with an intuitive tree-like struct that implements all the traits of phylogenies, which is fully detailed in the official Phylo-rs documentation. Phylo-rs documents the trade-offs for every method, providing links to alternative methods that achieve the same results differently, where possible. Traits are automatically tested using the standard tree struct via continuous integration and are benchmarked at every stable release. Phylo-rs is equipped with additional features to enable researchers to implement algorithms for large-scale analysis seamlessly. Each feature can be enabled or disabled at compilation time, depending on the infrastructure of the target hardware. Multi-threading Phylo-rs delivers multi-thread support by parallelizing its iterators while guaranteeing data-race freedom. Analyses that require independent computations for each vertex of a phylogeny can be executed simultaneously. Data parallelism can be highly beneficial in large-scale studies where phylogenies with tens of thousands of taxa can be analyzed efficiently by sharing the computational workload between numerous CPUs. Single Instruction, Multiple Data Phylo-rs permits parallelization of bit-level operations on single-CPU environments through the use of Single Instruction, Multiple Data (SIMD). SIMD has been frequently used to improve application performance in a variety of fields [ 37 – 39 ], with cases achieving a 9x speedup [ 40 ]. Phylo-rs utilizes SIMD when inferring and enumerating bipartitions of the taxa induced by a phylogeny. Phylo-rs computes the overlap between two clusters through parallelized bit-level operations on the same core by representing clusters as bit-strings. WASM Phylo-rs achieves platform interoperability, ease of use, and effortless distribution by supporting WASM as a compilation target. WASM is a compact binary instruction format for stack-based virtual machines [ 22 ] and can be called from JavaScript via Node.js or as a command line interface application. With WASM support, Phylo-rs imparts: Safety: Users are protected by software sandboxed virtual environments, protecting them from any damage from running malicious code. Speed: Low-level code generated by compilers is optimized ahead of time, allowing the code to fully utilize machine hardware; WASM supplies users with efficient tools that overcome the inefficient runtimes traditionally seen with sandboxed applications. Portability: Low-level code compiled to WASM as a single architecture targeted for the Web can run across various browsers, operating systems, and hardware types. As such, WASM is an excellent alternative to standard Graphical User Interface applications and provides a robust platform for disseminating bioinformatic tools and applications [ 41 , 42 ]. User interfaces can be standardized using any modern web browser, reducing the redundant graphical overhead of installed applications. Analytic tools written with Phylo-rs can be shared as web apps with built-in graphical interfaces and intuitive visualizations using modern graphical libraries [ 43 ]. Results We present a comparative analysis highlighting the performance of Phylo-rs relative to popular libraries, namely, Dendropy [ 12 ], GoTree [ 17 ], TreeSwift [ 13 ], and ape [ 15 ]. Following this, we demonstrate its utility with two examples of computationally demanding phylogenetic analyses that can be solved using Phylo-rs. All results and corresponding visualizations presented in this section can be reproduced on a typical desktop PC by following the instructions in the official GitHub repository at https://github.com/sriram98v/phylo-rs . Comparative Analysis We compare Phylo-rs with other popular phylogenetic libraries using a benchmark study that contrasts the mean runtime of six foundational algorithms commonly employed in phylogenetic analyses [ 29 , 30 ]: (i) computing the Robinson-Foulds metric, (ii) retrieving the Least Common Ancestor (LCA), (iii) tree traversals in pre- and post-order for vertices and edges, (iv) subtree extraction and contraction, (v) simulating random trees using the Yule evolutionary model, and (vi) applying the Nearest Neighbor Interchange (NNI) operation. We conducted 1000 iterations for each implementation with a precision of ±12 ns on a randomly simulated phylogenetic tree comprising 4000 taxa, excluding libraries that did not provide an implementation. When evaluating the computation of the Robinson-Foulds metric, we measured 1000 iterations on a pair of randomly simulated trees. We assessed tree simulation by measuring 1000 iterations of generating a tree comprising 4000 taxa. All benchmarks were conducted with Cargo bench for Phylo-rs, Python timeit for Dendropy [ 12 ] and TreeSwift [ 13 ], and the Linux time utility for ape [ 15 ] and Gotree [ 17 ], using identical trees for each implementation of the same algorithm. Each benchmark was performed on an Intel(R) Core(TM) i7-10700K 3.80GHz CPU running Arch Linux v6.6.28-2-lts and was executed on a single thread. Benchmarking with timeit and time entails an overhead of approximately 2 ms for reading the trees from files, which was excluded from the recorded runtimes. Benchmark Study Table 1 summarizes the mean runtime of computing the Robinson-Foulds metric, tree contraction, tree traversal, Yule tree simulation, LCA retrieval, and NNI. Notably, Phylo-rs achieves a significant speedup compared to the libraries written in Python and R while showing comparable performance with Gotree [ 17 ], written in Go-lang; Phylo-rs achieves a 10x speedup compared to Dendropy [ 12 ] and ape [ 15 ] on all compared implementations and a nearly 2x speedup in computation compared to GoTree. These operations are fundamental components of many popular algorithms used in practice, including maximum likelihood estimation [ 29 , 30 ] and Bayesian inference [ 44 ]. The improved runtimes indicate that Phylo-rs can significantly reduce the time required to perform large-scale phylogenetic analyses, making it a more efficient choice for researchers and practitioners. Furthermore, the comparable performance of Phylo-rs with GoTree in tree traversal and LCA retrieval suggests that Phylo-rs is a viable alternative for existing phylogenetic analysis workflows. View this table: View inline View popup Download powerpoint Table 1. List of algorithms commonly used in computational phylogenetics. Additionally, Table 1 indicates that there are more methods natively implemented in Phylo-rs than those compared in the previous section. These additional operations are essential for many applications in phylogenetic analysis, such as tree reconciliation and phylogenetic inference. Phylo-rs can be easily integrated into various workflows and pipelines by providing a broader range of fundamental operations. This makes it appealing for researchers and practitioners working on diverse phylogenetic tasks. Quantifying Phylogenetic Diversity for Influenza A Virus Control We quantified the phylogenetic diversity (PD) [ 45 ] of the H1 subtype influenza A virus (IAV) in swine collected between the years 2015 and 2022. The H1 subtype of swine IAV in the United States has at least 11 genetically distinct clades of viruses [ 46 ]. Controlling IAV transmission relies upon vaccination and designing optimal vaccination strategies requires a detailed analysis of the genetic diversity of the circulating viruses [ 47 , 48 ]. To quantify diversity dynamics, we downloaded all 8241 publicly available IAV hemagglutinin (HA) sequences from the USDA influenza A virus in the swine surveillance system collected between 2015–2022. All sequences were classified into one of the named swine IAV clades using octoFLU v.1.0.0 [ 46 , 49 ]. We aligned the nucleotide sequences with mafft v.7.525 [ 50 ] and inferred a maximum likelihood tree using IQ-Tree v2.2.6 [ 30 ] under the generalized time-reversible (GTR) substitution model [ 51 ] with empirical base frequencies and five free-rate categories [ 52 ]. We computed PD for each named clade detected within each year using Phylo-rs and visualized the resulting dynamics in Figure 2 . These data indicate that the 1B.2.1 and 1A.1.1.3 clades demonstrated a steady increase in PD across the years, whereas other clades, e.g., 1B.2.2.1 and 1A.3.3.2, fluctuated. The steady increase in PD in the 1B.2.1 and 1A.1.1.3 clades represents a significant challenge for control strategies, i.e., vaccines to reflect circulating genetic and antigenic diversity may not work adequately as a strain selected as a vaccine antigen in 2016 may not reflect the diversity in the clade in 2018 [ 48 ]. In addition, this analysis identified clades with low PD, which may be susceptible to removal through the use of targeted vaccines that are focused on the genetic diversity observed within these clades. A benefit of using PD to track diversity is that clades may be driven to extinction with a reduction in total genetic diversity and the subsequent minimization of reassortment and antigenic drift [ 53 ]. Download figure Open in new tab Figure 2. Visualization of variation in phylogenetic diversity of the H1 subtype influenza A virus (IAV) collected between the years 2015 and 2024. The phylogenetic clades 1B.2.1 and 1A.1.1.3 demonstrated an almost linear increase in phylogenetic diversity across the years indicating evolution of the pathogen with increases in genetic diversity that may reduce the efficacy of vaccine control strategies. The phylogenetic clades 1B.2.2.2 and 1A.4 demonstrated a decline in phylogenetic diversity, suggesting that vaccine control measures may be designed with a single antigenic component to effectively prevent infection and transmission. The phylogenetic diversity of a tree at each year was computed as the Faith Index [ 45 ] implemented in Phylo-rs. Visualizing Phylogenetic Tree Space Phylogenetic tree spaces are often complex with many local optima, which confounds the phylogenetic inference [ 29 , 30 , 44 ]. A standard approach to searching the tree space for an optimal phylogeny is to sample the tree space using multiple Markov chain Monte Carlo (MCMC) Bayesian analyses [ 29 , 30 , 44 ], resulting in several samples of the tree space. The samples produced by each analysis can then be visualized by computing all pairwise distances between the sampled trees and embedding them into a 2- or dimensional Euclidean space [ 54 ]. A single MCMC analysis can produce upwards of 10000 trees, making the computation of pairwise distances infeasible in large-scale studies involving hundreds of taxa. Phylo-rs makes the computation of all pairwise distances feasible even on large datasets with thousands of taxa and tens of thousands of sampled trees due to its innate speed and in-built multi-threading. We tested this approach on a MCMC analysis that was conducted to assess the emergence and spread of highly pathogenic avian influenza (HPAI) H5N1 viruses in dairy cattle in the US from [ 55 ]. Ten independent MCMC runs were conducted with BEAST v1.10.4 on a set of 587 influenza A virus hemagglutinin H5N1 clade 2.3.4.4b sequences sampled from dairy cattle, poultry, peridomestic mammals, and wild birds. Each run consisted of a single Markov chain lasting 50 million generations, sampled every 5000 steps. This resulted in 10001 sampled trees in each run and 100010 trees in total. We computed all pairwise Robinson-Foulds metrics between the sampled trees using Phylo-rs on a workstation with an Intel(R) Xenon(R) w7-2475X 4.8GHz CPU running Ubuntu 20.04.3 LTS. The computation was conducted with 40 threads, taking 32 hours to calculate the distance of approximately 5 billion tree pairs. To simplify visualization, we omitted four runs that did not converge [ 55 ] and removed the first 20% of trees as the burn-in from the remaining 6 runs. We then embedded the distances between the remaining 48,000 trees into a 2-dimensional space using UMAP ( Figure 3 ). Each independent MCMC run formed a continuous line in the resulting embedding. All runs except for run 10 appear to have traversed a similar subspace of trees while run 10 clusters separately from the other runs. Download figure Open in new tab Figure 3. UMAP embedding of the phylogenetic tree space explored by 6 independent MCMC runs. All runs were conducted under the same conditions. Each color represents the trees from a single run, where the green star indicates the starting tree and the red star indicates the final tree. The distances between the trees were computed using the Robinson-Foulds metric as implemented in Phylo-rs. Availability and Future Work Phylo-rs is a general-purpose phylogenetic analysis library written in Rust. By leveraging the Rust programming language’s memory-safety features and speed, Phylo-rs offers a variety of advanced phylogenetic algorithms and functionality. Phylo-rs fosters the dissemination of complex software for phylogenetic analysis, bridging the gap between theoretical advancement and practical implementation. Phylo-rs is available under an open-source license on GitHub at https://github.com/sriram98v/phylo-rs , with documentation at https://docs.rs/phylo/latest/phylo/ . Support for PhyloXML and PhyloJSON file formats will be included in the future. Further, tree simulations under the Birth-Death and Coalescent evolutionary models will in added in the near future. Phylo-rs will extend bindings to other languages, such as R and Python, and implement tree traits on highly memory-efficient structures provided by libraries such as ts-kit [ 56 ]. Supporting information This project was funded in part by the United States Department of Agriculture (USDA), Agricultural Research Service (ARS project numbers 5030-32000-231-000-D, 5030-32000-231-111-I, 3022-32000-018-017-S, 5030-32000-231-095-S, and 5030-32000-231-103-A) and with federal funds from the National Institute of Allergy and Infectious Diseases, National Institutes of Health, Department of Health and Human Services (Contract No. 75N93021C00015). The funding sources had no role in study design, data collection, and interpretation, or the decision to submit the work for publication. Mention of trade names or commercial products in this article is solely to provide specific information and does not imply recommendation or endorsement by the USDA. USDA is an equal opportunity provider and employer. Acknowledgments We are grateful for the comments on the manuscript provided by Dr. Pawewl Górecki, Dr. Geng Ding, and Paige Falor and code reviews by Sanket Wagle. References 1. ↵ Hufbauer RA , Marrs RA , Jackson AK , Sforza R , Bais HP , Vivanco JM , et al. Population structure, ploidy levels and allelopathy of Centaurea maculosa (spotted knapweed) and C. diffusa (diffuse knapweed) in North America and Eurasia . In: XI International Symposium on Biological Control of Weeds. Citeseer ; 2004 . p. 121 . 2. ↵ Soltis DE , Soltis PS . The role of phylogenetics in comparative genetics . Plant Physiology . 2003 ; 132 ( 4 ): 1790 – 1800 . OpenUrl FREE Full Text 3. Winter M , Devictor V , Schweiger O. Phylogenetic diversity and nature conservation: where are we? Trends in Ecology & Evolution . 2013 ; 28 ( 4 ): 199 – 204 . OpenUrl CrossRef PubMed 4. ↵ Baum D , et al. Reading a phylogenetic tree: the meaning of monophyletic groups . Nature Education . 2008 ; 1 ( 1 ): 190 . OpenUrl 5. ↵ Nik-Zainal S , Van Loo P , Wedge DC , Alexandrov LB , Greenman CD , Lau KW , et al. The life history of 21 breast cancers . Cell . 2012 ; 149 ( 5 ): 994 – 1007 . OpenUrl CrossRef PubMed 6. ↵ Harris SR , Cartwright EJ , Török ME , Holden MT , Brown NM , Ogilvy-Stuart AL , et al. Whole-genome sequencing for analysis of an outbreak of meticillin-resistant Staphylococcus aureus: a descriptive study . The Lancet infectious diseases . 2013 ; 13 ( 2 ): 130 – 136 . OpenUrl CrossRef PubMed Web of Science 7. ↵ Modi A , Vai S , Caramelli D , Lari M. The Illumina sequencing protocol and the NovaSeq 6000 system . In: Bacterial Pangenomics: Methods and Protocols . Springer ; 2021 . p. 15 – 42 . 8. ↵ Wang M , Fu A , Hu B , Tong Y , Liu R , Liu Z , et al. Nanopore targeted sequencing for the accurate and comprehensive detection of SARS-CoV-2 and other respiratory viruses . Small . 2020 ; 16 ( 32 ): 2002169 . OpenUrl CrossRef PubMed 9. ↵ Wang Y , Zhao Y , Bollas A , Wang Y , Au KF . Nanopore sequencing technology, bioinformatics and applications . Nature Biotechnology . 2021 ; 39 ( 11 ): 1348 – 1365 . OpenUrl CrossRef PubMed 10. Kanda K , Pflug JM , Sproul JS , Dasenko MA , Maddison DR . Successful recovery of nuclear protein-coding genes from small insects in museums using Illumina sequencing . PLoS One . 2015 ; 10 ( 12 ): e0143929 . OpenUrl CrossRef PubMed 11. ↵ Ram JL , Karim AS , Sendler ED , Kato I. Strategy for microbiome analysis using 16S rRNA gene sequence analysis on the Illumina sequencing platform . Systems Biology in Reproductive Medicine . 2011 ; 57 ( 3 ): 162 – 170 . OpenUrl CrossRef PubMed 12. ↵ Sukumaran J , Holder MT . DendroPy: a Python library for phylogenetic computing . Bioinformatics . 2010 ; 26 ( 12 ): 1569 – 1571 . OpenUrl CrossRef PubMed Web of Science 13. ↵ Moshiri N. TreeSwift: A massively scalable Python tree package . SoftwareX . 2020 ; 11 : 100436 . OpenUrl CrossRef PubMed 14. ↵ Revell LJ . phytools 2.0: an updated R ecosystem for phylogenetic comparative methods (and other things) . PeerJ . 2024 ; 12 : e16505 . OpenUrl CrossRef PubMed 15. ↵ Paradis E , Schliep K. ape 5.0: an environment for modern phylogenetics and evolutionary analyses in R . Bioinformatics . 2019 ; 35 ( 3 ): 526 – 528 . OpenUrl CrossRef PubMed 16. ↵ Dutheil J , Gaillard S , Bazin E , Glémin S , Ranwez V , Galtier N , et al. Bio++: a set of C++ libraries for sequence analysis, phylogenetics, molecular evolution and population genetics . BMC Bioinformatics . 2006 ; 7 : 1 – 6 . OpenUrl CrossRef PubMed Web of Science 17. ↵ Lemoine F , Gascuel O. Gotree/Goalign: toolkit and Go API to facilitate the development of phylogenetic workflows . NAR Genomics and Bioinformatics . 2021 ; 3 ( 3 ): qab075 . OpenUrl 18. ↵ Perkel JM . Why scientists are turning to Rust . Nature . 2020 ; 588 : 185 . OpenUrl CrossRef PubMed 19. ↵ Fulton KR , Chan A , Votipka D , Hicks M , Mazurek ML . Benefits and drawbacks of adopting a secure programming language: Rust as a case study . In: Seventeenth Symposium on Usable Privacy and Security (SOUPS 2021) ; 2021 . p. 597 – 616 . 20. ↵ Lattner C , Adve V. LLVM: a compilation framework for lifelong program analysis & transformation . In: International Symposium on Code Generation and Optimization, 2004. CGO 2004 .; 2004 . p. 75 – 86 . 21. ↵ Klabnik S , Nichols C. The Rust programming language . No Starch Press ; 2023 . 22. ↵ Haas A , Rossberg A , Schuff DL , Titzer BL , Holman M , Gohman D , et al. Bringing the web up to speed with WebAssembly . SIGPLAN Not . 2017 ; 52 ( 6 ): 185 – 200 . doi: 10.1145/3140587.3062363 . OpenUrl CrossRef 23. ↵ Bhonsle A , Patil V , Valkunde T , Lotlikar T. Linear Algebra in the Browser powered by WebAssembly . In: 2022 International Conference for Advancement in Technology (ICONAT) ; 2022 . p. 1 – 7 . 24. ↵ Day WH . Optimal algorithms for comparing trees with labeled leaves . Journal of classification . 1985 ; 2 : 7 – 28 . OpenUrl CrossRef 25. ↵ Sokal RR , Rohlf FJ . The comparison of dendrograms by objective methods . Taxon . 1962 ; p. 33 – 40 . 26. ↵ Cardona G , Mir A , Rosselló F , Rotger L , Sánchez D. Cophenetic metrics for phylogenetic trees, after Sokal and Rohlf . BMC Bioinformatics . 2013 ; 14 : 1 – 13 . OpenUrl CrossRef PubMed 27. ↵ Moon J , Eulenstein O. The cluster affinity distance for phylogenies . In: Bioinformatics Research and Applications: 15th International Symposium, ISBRA 2019 , Barcelona, Spain , June 3–6, 2019, Proceedings 15. Springer ; 2019 . p. 52 – 64 . 28. ↵ Wang L , Zhu D Górecki P , Markin A , Eulenstein O. Cophenetic Distances: A Near-Linear Time Algorithmic Framework . In: Wang L , Zhu D , editors. COCOON 2018 . Springer, Cham. Cham : Springer International Publishing ; 2018 . p. 168 – 179 . 29. ↵ Kozlov AM , Darriba D , Flouri T , Morel B , Stamatakis A. RAxML-NG: a fast, scalable and user-friendly tool for maximum likelihood phylogenetic inference . Bioinformatics . 2019 ; 35 ( 21 ): 4453 – 4455 . OpenUrl CrossRef PubMed 30. ↵ Minh BQ , Schmidt HA , Chernomor O , Schrempf D , Woodhams MD , Von Haeseler A , et al. IQ-TREE 2: new models and efficient methods for phylogenetic inference in the genomic era . Molecular Biology and Evolution . 2020 ; 37 ( 5 ): 1530 – 1534 . OpenUrl CrossRef PubMed 31. Bininda-Emonds OR , Gittleman JL , Steel MA . The (super) tree of life: procedures, problems, and prospects . Annual Review of Ecology and Systematics . 2002 ; 33 ( 1 ): 265 – 289 . OpenUrl CrossRef Web of Science 32. ↵ Goloboff PA . Parsimony, likelihood, and simplicity . Cladistics . 2003 ; 19 ( 2 ): 91 – 103 . OpenUrl CrossRef Web of Science 33. ↵ Allen BL , Steel M. Subtree transfer operations and their induced metrics on evolutionary trees . Annals of Combinatorics . 2001 ; 5 : 1 – 15 . OpenUrl CrossRef 34. ↵ Kelk S , Linz S , Meuwese R. Deep kernelization for the Tree Bisection and Reconnection (TBR) distance in phylogenetics . Journal of Computer and System Sciences . 2024 ; 142 : 103519 . OpenUrl CrossRef 35. ↵ Day WH . Properties of the nearest neighbor interchange metric for trees of small size . Journal of Theoretical Biology . 1983 ; 101 ( 2 ): 275 – 288 . OpenUrl CrossRef 36. ↵ Felsenstein J. Inferring phylogenies . In: Inferring phylogenies ; 2004 . p. 664 – 664 . 37. ↵ Lomont C. Introduction to intel advanced vector extensions . Intel white paper . 2011 ; 23 : 1 – 21 . OpenUrl 38. Cypher R , Sanz JL . SIMD architectures and algorithms for image processing and computer vision . IEEE Transactions on Acoustics, Speech, and Signal Processing . 1989 ; 37 ( 12 ): 2158 – 2174 . OpenUrl CrossRef 39. ↵ Hassaballah M , Omran S , Mahdy YB . A review of SIMD multimedia extensions and their usage in scientific and engineering applications . The Computer Journal . 2008 ; 51 ( 6 ): 630 – 649 . OpenUrl CrossRef 40. ↵ Alachiotis N , Berger SA , Stamatakis A. Coupling SIMD and SIMT architectures to boost performance of a phylogeny-aware alignment kernel . BMC Bioinformatics . 2012 ; 13 : 1 – 12 . OpenUrl CrossRef PubMed 41. ↵ Kramer A , Turakhia Y , Corbett-Detig R. ShUShER: private browser-based placement of sensitive genome samples on phylogenetic trees . Journal of Open Source Software . 2021 ; 6 ( 66 ): 3677 . OpenUrl CrossRef 42. ↵ Aksamentov I , Roemer C , Hodcroft EB , Neher RA . Nextclade: clade assignment, mutation calling and quality control for viral genomes . Journal of Open Source Software . 2021 ; 6 ( 67 ): 3773 . OpenUrl CrossRef 43. ↵ Meeks E. D3. js in Action: Data visualization with JavaScript . Simon and Schuster ; 2017 . 44. ↵ Bouckaert R , Heled J , Kühnert D , Vaughan T , Wu CH , Xie D , et al. BEAST 2: a software platform for Bayesian evolutionary analysis . PLoS Computational Biology . 2014 ; 10 ( 4 ): e1003537 . OpenUrl CrossRef 45. ↵ Chao A , Chiu CH , Jost L. Phylogenetic diversity measures and their decomposition: a framework based on Hill numbers . Biodiversity Conservation and Phylogenetic Systematics . 2016 ; 14 : 141 – 172 . OpenUrl CrossRef 46. ↵ Anderson TK , Chang J , Arendsee ZW , Venkatesh D , Souza CK , Kimble JB , et al. Swine influenza A viruses and the tangled relationship with humans . Cold Spring Harbor perspectives in medicine . 2021 ; 11 ( 3 ): a038737 . OpenUrl Abstract / FREE Full Text 47. ↵ Neveau MN , Zeller MA , Kaplan BS , Souza CK , Gauger PC , Vincent AL , et al. Genetic and antigenic characterization of an expanding H3 influenza A virus clade in US swine visualized by Nextstrain . Msphere . 2022 ; 7 ( 3 ): e00994 – 21 . OpenUrl PubMed 48. ↵ Markin A , Wagle S , Grover S , Vincent Baker AL , Eulenstein O , Anderson TK . PARNAS: objectively selecting the most representative taxa on a phylogeny . Systematic Biology . 2023 ; 72 ( 5 ): 1052 – 1063 . OpenUrl CrossRef PubMed 49. ↵ Chang J , Anderson TK , Zeller MA , Gauger PC , Vincent AL . octoFLU: automated classification for the evolutionary origin of influenza A virus gene sequences detected in US Swine . Microbiology Resource Announcements . 2019 ; 8 ( 32 ): 10 – 1128 . OpenUrl 50. ↵ Katoh K , Misawa K , Kuma Ki , Miyata T. MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform . Nucleic Acids Research . 2002 ; 30 ( 14 ): 3059 – 3066 . OpenUrl CrossRef PubMed Web of Science 51. ↵ Tavaré S , Miura RM . Lectures on mathematics in the life sciences . In: Am. Math. Soc . vol. 17 ; 1986 . p. 57 – 86 . OpenUrl 52. ↵ Barba-Montoya J , Tao Q , Kumar S. Using a GTR+ Γ substitution model for dating sequence divergence when stationarity and time-reversibility assumptions are violated . Bioinformatics . 2020 ; 36 ( Supplement 2 ): i884 – i894 . OpenUrl CrossRef PubMed 53. ↵ Markin A , Macken CA , Baker AL , Anderson TK . Revealing reassortment in influenza A viruses with TreeSort . bioRxiv . 2024 ; p. 2024 – 11 . 54. ↵ Hillis DM , Heath TA , John KS . Analysis and Visualization of Tree Space . Systematic Biology . 2005 ; 54 ( 3 ): 471 – 482 . OpenUrl CrossRef PubMed Web of Science 55. ↵ Nguyen TQ , Hutter C , Markin A , Thomas M , Lantz K , Killian ML , et al. Emergence and interstate spread of highly pathogenic avian influenza A (H5N1) in dairy cattle . bioRxiv . 2024 ; p. 2024 – 05 . 56. ↵ Kelleher J , Thornton KR , Ashander J , Ralph PL . Efficient pedigree recording for fast population genetics simulation . PLoS Computational Biology . 2018 ; 14 ( 11 ): e1006581 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted March 13, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Phylo-rs: an extensible phylogenetic analysis library in Rust Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Phylo-rs: an extensible phylogenetic analysis library in Rust Sriram Vijendran , Tavis K. Anderson , Alexey Markin , Oliver Eulenstein bioRxiv 2025.03.10.642340; doi: https://doi.org/10.1101/2025.03.10.642340 Share This Article: Copy Citation Tools Phylo-rs: an extensible phylogenetic analysis library in Rust Sriram Vijendran , Tavis K. Anderson , Alexey Markin , Oliver Eulenstein bioRxiv 2025.03.10.642340; doi: https://doi.org/10.1101/2025.03.10.642340 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7636) Biochemistry (17704) Bioengineering (13897) Bioinformatics (41963) Biophysics (21460) Cancer Biology (18598) Cell Biology (25525) Clinical Trials (138) Developmental Biology (13383) Ecology (19908) Epidemiology (2067) Evolutionary Biology (24325) Genetics (15613) Genomics (22512) Immunology (17738) Microbiology (40422) Molecular Biology (17190) Neuroscience (88634) Paleontology (667) Pathology (2835) Pharmacology and Toxicology (4825) Physiology (7645) Plant Biology (15158) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9825) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.