HaploThread: A Scalable Integrated Desktop Platform for Constructing and Visualizing Haplotype Networks for Large-sample Sequences

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 22,709 characters · extracted from preprint-html · click to expand
HaploThread: A Scalable Integrated Desktop Platform for Constructing and Visualizing Haplotype Networks for Large-sample Sequences | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Confirmatory Results HaploThread: A Scalable Integrated Desktop Platform for Constructing and Visualizing Haplotype Networks for Large-sample Sequences Bo Xu , View ORCID Profile Lun Li , Cuiping Li , Anke Wang , Zhuojing Fan , Shuhui Song doi: https://doi.org/10.1101/2025.07.06.659816 Bo Xu 1 National Genomics Data Centre, China National Centre for Bioinformation , Beijing 100101, China 2 Beijing Institute of Genomics, Chinese Academy of Sciences , Beijing 100101, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lun Li 1 National Genomics Data Centre, China National Centre for Bioinformation , Beijing 100101, China 2 Beijing Institute of Genomics, Chinese Academy of Sciences , Beijing 100101, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Lun Li Cuiping Li 1 National Genomics Data Centre, China National Centre for Bioinformation , Beijing 100101, China 2 Beijing Institute of Genomics, Chinese Academy of Sciences , Beijing 100101, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Anke Wang 1 National Genomics Data Centre, China National Centre for Bioinformation , Beijing 100101, China 2 Beijing Institute of Genomics, Chinese Academy of Sciences , Beijing 100101, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Zhuojing Fan 1 National Genomics Data Centre, China National Centre for Bioinformation , Beijing 100101, China 2 Beijing Institute of Genomics, Chinese Academy of Sciences , Beijing 100101, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Shuhui Song 1 National Genomics Data Centre, China National Centre for Bioinformation , Beijing 100101, China 2 Beijing Institute of Genomics, Chinese Academy of Sciences , Beijing 100101, China 3 University of Chinese Academy of Sciences , Beijing 100049, China 4 Sino-Danish College, University of Chinese Academy of Sciences, Beijing, 100049, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: songshh{at}big.ac.cn Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract This note announces HaploThread, a user-friendly GUI desktop software designed for haplotype network construction and visualization. HaploThread is written in C++ using the Qt library, integrating network visualization and multiple multi-threaded haplotype construction algorithms such as McAN and fastHaN (includes MSN, MJN and TCS) based on plugin mechanisms. It offers a straightforward approach to constructing and visualizing haplotype networks from large sample sets, and to extending functionality with plugins, facilitating the analysis of genetic variations and their evolutionary relationships. HaploThread is an open-source software released under the GNU General Public License (GPL). Its precompiled executables for Windows is freely available for download at https://ngdc.cncb.ac.cn/biocode/tool/BT007948 . Introduction Desktop software plays a crucial role in biological research by providing powerful computational tools with user-friendly graphical interfaces, enabling researchers to analyze complex biological data efficiently ( Kumar and Dudley 2007 ). Favoured by biologists, it offers a balanced solution that combines ease of use, high performance, and data security—unlike command-line tools that require programming skills or web-based platforms that depend on internet access. Particularly in population genetics, desktop software of haplotype network analysis is significant in inferring evolutionary relationships, tracking disease transmission, and studying population dynamics ( Song, et al. 2020 ; Li, et al. 2024 ). However, traditional state-of-art desktop software for haplotype network construction and/or visualization, including TCS ( Clement, et al. 2000 ), Network ( https://www.fluxus-engineering.com/ ), PopART ( Leigh and Bryant 2015 ), HapStar ( Teacher and Griffiths 2011 ) and Hapsolutely ( Vences, et al. 2024 ), only integrated single-threaded algorithms ( Templeton, et al. 1992 ; Excoffier and Smouse 1994 ; Bandelt, et al. 1999 ; Matschiner 2016 ), which limits their efficiency when processing large datasets ( Table 1 ) . View this table: View inline View popup Download powerpoint Table 1. Comparison of programs for haplotype networks construction and/or visualization. Currently known multi-threaded construction algorithms include McAN ( Li, et al. 2023 ) and fastHaN ( Chi, et al. 2023 ), which are faster than all existing algorithms even in single-threaded mode. Among them, McAN is currently the only program capable of processing sequences on the order of millions. Nevertheless, McAN provide only a simple command-line interface, which may appear daunting to beginning users, and an online web service, which depends on internet connection availability and stability and may bring concerns about privacy and security risks of user data. To addresses these challenges, we developed HaploThread, a fully local software that integrate intuitive user interfaces with advanced multi-threaded algorithms, ensuring ease of use, data security, and computational efficiency in haplotype network construction and visualization. Built upon McAN, an algorithm from our previous work, and enhanced with the advanced features of fastHaN, HaploThread leverages the performance advantages of desktop environments and multi-threading to deliver rapid, scalable, and reliable analyses for evolutionary studies in population genetics. Functionality HaploThread currently comprises two functional modules: the network construction module and the network visualization module ( Figure 1 ). Each module is available in two distinct visual style of interface: a platform-dependent style and a web-based style. Download figure Open in new tab Figure 1. Screen shot for the interactive interface for network construction/visualization in HaploThread The construction module processes sequence data in either VCF or PHYLIP format and generates the constructed network file in GraphML or GML format. Both styles of interfaces offer identical functionality, ensuring a consistent user experience. Users have the flexibility to choose a multi-threaded construction algorithm from McAN, TCS, MSN, or MJN, specify the required parameters, configure the number of computational threads based on available system resources, and start the process by clicking the “Run” button. The entire workflow is designed to be intuitive and user-friendly, requiring minimal explanation for efficient operation. The visualization module is designed to open network files in GraphML or GML formats and employs a force-directed algorithm to layout the network. If users provide metadata, such as sampling date, geographic location, or clustering information, via a metadata file, all samples and network nodes are automatically assigned distinct colors according to the associated metadata, generating visualization color schemes that are available for user selection and application. Additionally, users can observe the temporal dynamics of the network by either automatically running or manually dragging the timeline. The constructed network can be converted between GraphML and GML formats and exported as a PDF file or images in various formats, such as SVG, PNG, JPEG, or BMP. The functionalities of the two styles of visualization differ slightly. The platform-dependent interface provides a visual browser for haplotype sequence differences, which can be displayed and selected in synchronization with the haplotype network. Users can customize the colour and font attributes of visual elements to suit their preferences. In contrast, the web-like interface offers a map browsing function, allowing nodes to be displayed on a world map when country-level metadata is available. Implementation HaploThread software is written using the Qt cross-platform application framework ( Figure 2 ). Compiled executables for Windows, macOS, and Linux are provided. It is architected with a plugin-based approach. Functional modules are designed in the form of plugins and can be integrated, updated or uninstalled independently of the core application, reflecting the flexibly and extensibility of the software. Download figure Open in new tab Figure 2. UML class diagram for the design of HaploThread. The haplotype network construction module in HaploThread is implemented by integrating the software McAN and fastHaN into a plugin. The platform-dependent style of the module is developed using C++ and built on the Qt Widgets module. This approach ensures high performance and enhanced flexibility for desktop applications, and seamless consistency with the native styles of operating systems such as Windows, macOS, and Linux. The web-based style of the module is written using JavaScript and HTML, and it is embedded within the plugin via the Qt WebEngine module. This design provides users with an experience that is consistent with online web services. Future improvements may involve the implementation of novel algorithms for network construction and layout, 3D visualization of networks, and expanding support for additional input and output formats. Performance Comparison To evaluate the computational performance of different haplotype network construction tools, we conducted a benchmarking experiment using SARS-CoV-2 sequences downloaded from the RCoV19 database. Three datasets were prepared, containing 500, 1,000, and 5,000 sequences, respectively ( Data S1 ). We tested five representative state-of-the-art desktop software tools: PopART, HaploThread, Hapsolutely, Network, and HapNetworkView. For a fair comparison, all tools were run in single-threaded mode regardless of their support for parallel computing. To reflect typical usage scenarios for desktop applications, all tests were conducted on a consumer-grade laptop running Windows 10 Home Edition, equipped with an Intel Core i7-10510U CPU (1.80 GHz, 4 cores, 8 logical processors), 16 GB RAM, and a 512 GB SSD. The results demonstrate that HaploThread consistently outperformed all other tools in terms of execution time across all three datasets ( Table S1 ). For tools that offer multiple algorithms, only the fastest configuration was included in the comparison. Notably, on the 5,000-sequence dataset, HaploThread completed both network construction and visualization in just 23 seconds, while PopART, Hapsolutely, and Network were unable to finish the task within one hour. This highlights the superior efficiency and scalability of HaploThread in handling large-scale haplotype data under real-world computing conditions. Discussions and Conclusion The rapid emergence and evolution of novel and outbreak-associated pathogens pose significant challenges to public health surveillance and epidemic control. Timely and accurate construction of haplotype networks is essential for tracking pathogen evolution, understanding transmission dynamics, and informing intervention strategies. Traditional haplotype network construction GUI desktop software often struggles to handle large-scale genomic datasets efficiently, limiting their applicability in urgent outbreak scenarios. HaploThread was specifically designed to address that problem. Currently, a state-of-the-art desktop software for haplotype network construction and visualization, named HapNetworkView ( Chi, et al. 2025 ), has been developed. It integrates a multithreaded MJN algorithm from fastHaN. In comparison, HaploThread offers broader advantages in terms of algorithmic diversity. In addition to supporting a multithreaded MJN algorithm, HaploThread integrates five state-of-the-art multithreaded haplotype network construction algorithms, including McAN, MJN, MSN, and two implementations of TCS. This comprehensive algorithm suite provides users with enhanced flexibility and computational efficiency, especially when analysing large-scale datasets, making HaploThread a versatile and powerful tool for haplotype network analysis. In addition to its algorithmic strengths, HaploThread also offers advanced visualization capabilities that enhance interpretability and user interaction. HaploThread not only enables colouring of nodes in the haplotype network based on geographic information, but also supports the dynamic display or concealment of nodes according to sampling time. These spatiotemporal visualization capabilities provide users with enhanced flexibility to explore evolutionary relationships and track the temporal and geographic spread of haplotypes, which is especially valuable in infectious disease research. In summary, HaploThread simplifies the complex process of haplotype network analysis by integrating multiple methods of construction and visualization into a unified, user-friendly platform. Its intuitive design ensures accessibility for users with varying levels of expertise. Furthermore, the tool’s ability to handle large datasets, combined with its support for plugin mechanisms, facilitates community-driven improvements and customization, enhancing its adaptability to evolving research needs. Code Availability Installation packages of HaploThread are freely available at https://ngdc.cncb.ac.cn/biocode/tool/BT007948 under the GNU General Public License (GPL). Data Availability The datasets used for performance benchmarking in this study—comprising subsets of 500, 1,000, and 5,000 SARS-CoV-2 sequences from the RCoV19 database—are provided in Supplementary Material 1 . Acknowledgements This work was supported by the Key Collaborative Research Program of the Alliance of National and International·Science Organizations for the Belt·and·Road Regions (Grant No. ANSO-CR-KP-2022-09), and the National Natural Science Foundation of China (Grant No. 32270718, 32170678). Funder Information Declared the Key Collaborative Research Program of the Alliance of National and International Science Organizations for the Belt and Road Regions , ANSO-CR-KP-2022-09 the National Natural Science Foundation of China , 32270718 , 32170678 References ↵ Bandelt HJ , Forster P , Röhl A. 1999 . Median-joining networks for inferring intraspecific phylogenies . Mol Biol Evol 16 : 37 – 48 . OpenUrl CrossRef PubMed Web of Science ↵ Chi L , Dong Y , Wang R , Yang S , Wu L , Xue Y , Chen H. 2025 . HapNetworkView: a tool for haplotype network exploration and visualization . BMC Genomics 26 : 52 . OpenUrl PubMed ↵ Chi L , Zhang X , Xue Y , Chen H. 2023 . fastHaN: a fast and scalable program for constructing haplotype network for large-sample sequences . Mol Ecol Resour . ↵ Clement M , Posada D , Crandall KA . 2000 . TCS: a computer program to estimate gene genealogies . Mol Ecol 9 : 1657 – 1659 . OpenUrl CrossRef PubMed Web of Science ↵ Excoffier L , Smouse PE . 1994 . Using allele frequencies and geographic subdivision to reconstruct gene trees within a species: molecular variance parsimony . Genetics 136 : 343 – 359 . OpenUrl Abstract / FREE Full Text ↵ Kumar S , Dudley J. 2007 . Bioinformatics software for biologists in the genomics era . Bioinformatics 23 : 1713 – 1717 . OpenUrl CrossRef PubMed Web of Science ↵ Leigh JW , Bryant D. 2015 . popart: full-feature software for haplotype network construction . Methods in Ecology and Evolution 6 : 1110 – 1116 . OpenUrl CrossRef ↵ Li L , Li C , Li N , Zou D , Zhao W , Luo H , Xue Y , Zhang Z , Bao Y , Song S. 2024 . Machine Learning Early Detection of SARS-CoV-2 High-Risk Variants . Adv Sci (Weinh) 11 : e2405058 . OpenUrl ↵ Li L , Xu B , Tian D , Wang A , Zhu J , Li C , Li N , Zhao W , Shi L , Xue Y , et al. 2023 . McAN: a novel computational algorithm and platform for constructing and visualizing haplotype networks . Brief Bioinform 24 . ↵ Matschiner M. 2016 . Fitchi: haplotype genealogy graphs based on the Fitch algorithm . Bioinformatics 32 : 1250 – 1252 . OpenUrl CrossRef PubMed ↵ Song S , Ma L , Zou D , Tian D , Li C , Zhu J , Chen M , Wang A , Ma Y , Li M , et al. 2020 . The Global Landscape of SARS-CoV-2 Genomes, Variants, and Haplotypes in 2019nCoVR . Genomics Proteomics Bioinformatics 18 : 749 – 759 . OpenUrl PubMed ↵ Teacher AG , Griffiths DJ . 2011 . HapStar: automated haplotype network layout and visualization . Mol Ecol Resour 11 : 151 – 153 . OpenUrl CrossRef PubMed ↵ Templeton AR , Crandall KA , Sing CF . 1992 . A cladistic analysis of phenotypic associations with haplotypes inferred from restriction endonuclease mapping and DNA sequence data . III. Cladogram estimation. Genetics 132 : 619 – 633 . OpenUrl PubMed ↵ Vences M , Patmanidis S , Schmidt J-C , Matschiner M , Miralles A , Renner SS . 2024 . Hapsolutely: a user-friendly tool integrating haplotype phasing, network construction, and haploweb calculation . Bioinformatics Advances 4 . View the discussion thread. Back to top Previous Next Posted July 10, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following HaploThread: A Scalable Integrated Desktop Platform for Constructing and Visualizing Haplotype Networks for Large-sample Sequences Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share HaploThread: A Scalable Integrated Desktop Platform for Constructing and Visualizing Haplotype Networks for Large-sample Sequences Bo Xu , Lun Li , Cuiping Li , Anke Wang , Zhuojing Fan , Shuhui Song bioRxiv 2025.07.06.659816; doi: https://doi.org/10.1101/2025.07.06.659816 Share This Article: Copy Citation Tools HaploThread: A Scalable Integrated Desktop Platform for Constructing and Visualizing Haplotype Networks for Large-sample Sequences Bo Xu , Lun Li , Cuiping Li , Anke Wang , Zhuojing Fan , Shuhui Song bioRxiv 2025.07.06.659816; doi: https://doi.org/10.1101/2025.07.06.659816 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41911) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13371) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15599) Genomics (22482) Immunology (17728) Microbiology (40363) Molecular Biology (17163) Neuroscience (88536) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00