TinkerHap - A Novel Read-Based Phasing Algorithm with Integrated Multi-Method Support for Enhanced Accuracy

doi:10.1101/2025.02.16.638517

TinkerHap - A Novel Read-Based Phasing Algorithm with Integrated Multi-Method Support for Enhanced Accuracy

2025 · doi:10.1101/2025.02.16.638517

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 26,473 characters · extracted from preprint-html · click to expand

TinkerHap - A Novel Read-Based Phasing Algorithm with Integrated Multi-Method Support for Enhanced Accuracy | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results TinkerHap - A Novel Read-Based Phasing Algorithm with Integrated Multi-Method Support for Enhanced Accuracy View ORCID Profile Uri Hartmann , Eran Shaham , View ORCID Profile Dafna Nathan , Ilana Blech , View ORCID Profile Danny Zeevi doi: https://doi.org/10.1101/2025.02.16.638517 Uri Hartmann 1 Department of Biotechnology, Jerusalem Multidisciplinary College , Jerusalem, Israel Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Uri Hartmann For correspondence: uri.hartman{at}edu.hac.ac.il Eran Shaham 1 Department of Biotechnology, Jerusalem Multidisciplinary College , Jerusalem, Israel Find this author on Google Scholar Find this author on PubMed Search for this author on this site Dafna Nathan 1 Department of Biotechnology, Jerusalem Multidisciplinary College , Jerusalem, Israel Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Dafna Nathan Ilana Blech 1 Department of Biotechnology, Jerusalem Multidisciplinary College , Jerusalem, Israel Find this author on Google Scholar Find this author on PubMed Search for this author on this site Danny Zeevi 1 Department of Biotechnology, Jerusalem Multidisciplinary College , Jerusalem, Israel Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Danny Zeevi Abstract Full Text Info/History Metrics Preview PDF Abstract Phasing, the assignment of alleles to their respective parental chromosomes, is fundamental to studying genetic variation and identifying disease-causing variants. Traditional approaches, including statistical, pedigree-based, and read-based phasing, face challenges such as limited accuracy for rare variants, reliance on external reference panels, and constraints in regions with sparse genetic variation. To address these limitations, we developed TinkerHap, a novel and unique phasing algorithm that integrates a read-based phaser, based on a pairwise distance-based unsupervised classification, with external phased data, such as statistical or pedigree phasing. We evaluated TinkerHap’s performance against other phasing algorithms using 1,040 parent-offspring trios from the UK Biobank (Illumina short-reads) and GIAB Ashkenazi trio (PacBio long-reads). TinkerHap’s read-based phaser alone achieved higher phasing accuracies than all other algorithms with 95.1% for short-reads (second best: 94.8%) and 97.5% for long-reads (second best: 95.5%). Its hybrid approach further enhanced short-read performance to 96.3% accuracy and was able to phase 99.5% of all heterozygous sites. TinkerHap also extended haplotype block sizes to a median of 79,449 base-pairs for long-reads (second best: 68,303 bp) and demonstrated higher accuracy for both SNPs and indels. This combination of a robust read-based algorithm and hybrid strategy makes TinkerHap a uniquely powerful tool for genomic analyses. Introduction Phasing is the process of assigning alleles to their respective maternal or paternal chromosomes. It is essential for determining precise protein sequences in an individual and identifying genes that cause diseases. Various methods of phasing are available, including statistical phasing based on phased reference genomes (e.g. ShapeIT [ 1 ] and Beagle [ 2 ]), pedigree-based phasing (e.g. LINKPHASE3 [ 3 ] and TrioPhaser [ 4 ]), and read-based phasing (e.g. WhatsHap [ 5 ] and HapCUT2 [ 6 ]). Statistical phasing is limited by how well the reference panel represents the sample data and is particularly inaccurate in phasing rare variants [ 7 ]. Pedigree-based phasing is accurate for common and rare variants, but pedigree information is typically not available for the inspected individual. Read-based (or read-aware) phasing works by analyzing sequencing reads that span multiple heterozygous sites to phase them together, resulting in very high accuracy. This method is independent of reference bias, remains unaffected by the rarity of the alleles, and does not require pedigree data. However, since read-based phasing cannot phase regions where heterozygous sites are farther than read sizes, it can only effectively be used in highly variable regions or when using long-reads that span several heterozygous sites. Here we present TinkerHap, a read-based phasing tool offering consistent performance and enhanced accuracy. TinkerHap excels in accurately handling rare variants and variable genomic regions, such as the Human Leukocyte Antigen (HLA) locus, while also effectively phasing long-read data. Moreover, TinkerHap uniquely integrates information from statistical phasing methods into its read-based framework. This hybrid approach bridges gaps in read coverage and extends haplotype blocks. Methods Overview TinkerHap is implemented in Python 3 and utilizes the pysam package [ 8 ] for manipulating alignment and variant calling files. The command-line interface accepts an alignment file (SAM/BAM/CRAM) and a variant calling file (VCF/BCF) as inputs, producing a phased VCF file through read-based haplotype phasing. Optionally, TinkerHap can integrate a pre-phased VCF file from a third-party tool (e.g., ShapeIT [ 1 ] for statistical-based phasing) to align and merge haplotypes with greater accuracy when possible. Additionally, TinkerHap can generate multiple output formats to represent the phased haplotypes: a BED file listing the identified haplotype blocks, a BAM file identical to the original but annotated with haplotype and phase information in the Haplotype Phase field (HP) and Haplotype number field (HT), and two separate BAM files - each containing reads corresponding to one of the phased alleles. These outputs facilitate annotation or the splitting of the original alignment into distinct files for each allele, enabling downstream analyses. Algorithm TinkerHap implements a three-step phasing algorithm, based on a pairwise distance-based unsupervised classification, designed for precision and scalability. Below is a detailed description of each step with mathematical notations. 1. Identification of Heterozygous Sites Let S = { s 1 , s 2 ,…, s m } represent the set of heterozygous sites identified from the input variant call file (VCF). A site s i is considered heterozygous if a i ≠ b i where a i and b i are the two alleles at s i . The loci of these sites are identified as L ( S ) = { l 1 , l 2 ,…, l m }, forming the foundation for subsequent phasing steps. Ambiguous allele calls, characterized by low scores in the VCF’s QUAL column and typically caused by sequencing or alignment errors, are flagged. 2. Association of Reads with Heterozygous Sites Let R = { r 1 , r 2 ,…, r m } denote the set of sequencing reads. Each read r j spans a subset of heterozygous sites S j ⊆ S . For each read, we map its alleles to the overlapping sites: This step ensures precise allele identification by linking reads to heterozygous sites while accounting for potential alignment errors or ambiguities, such as indels. To ensure accuracy, only reads meeting a minimum mapping quality threshold (e.g., MAPQ ≥ 20) are considered, ensuring that low-confidence alignments do not influence the phasing process. 3. Calculation of Phase Scores The phasing process begins by arbitrarily assigning the first read r 1 to one of the haplotypes, for instance H 1 . This initial assignment acts as a seed to propagate haplotypes across all overlapping reads. Each read r is then evaluated to determine its phase matching scores P 1 ( r ) and P 2 ( r ) for the two haplotypes, H 1 and H 2 . These scores are computed by analyzing all overlapping reads and the heterozygous sites they share with r . The phase scores are calculated as: Where K r is the set of all overlapping reads for r , and S r and S k are the sets of heterozygous sites for reads r and k , respectively. The contribution of each shared site s to the phase score, Δ P H ( r ), is defined as: Here, A r ( s ) and A k ( s ) represent the alleles of r and k at site s , respectively. The weight w ( s ) assigned to site s depends on the type of heterozygous site: This scoring ensures that the phase scores for r are influenced by the agreement or disagreement between r and all overlapping reads at shared heterozygous sites. After calculating the phase scores, the haplotype HP of r is assigned as follows: If P 1 ( r ) and P 2 ( r ) are equal, the haplotype assignment can propagate from the overlapping read with the strongest phase connection, or a new haplotype block may be started. This approach ensures consistency in haplotype assignments based on the majority consensus among overlapping reads. 4. Haplotype Extension Haplotypes are extended iteratively by analyzing overlapping reads. If a read r k overlaps two or more phased reads , its phase is determined by propagating the majority consensus: This ensures the consistency of haplotype assignments across contiguous genomic regions. Reads that span conflicting haplotypes are flagged for manual review or downstream quality filtering. 5. Pair-End Read Merging For paired-end reads ( r j , r j ,) the algorithm evaluates the consistency of their haplotypes: Inconsistent pairs trigger a phase reassignment to minimize discordance, leveraging the paired-end linkage information. A weighted graph representation of pair-end links can be constructed for further optimization of haplotype continuity. 6. Integration with Pre-Phased Data (Optional) When an additional pre-phased VCF file is provided, for example one generated by statistical phasing tools such as ShapeIT, the algorithm merges the read-based haplotypes with the pre-phased data. This involves merging haplotypes and, if necessary, switching the phase numbers (e.g., swapping haplotype 1 and haplotype 2) to ensure consistency with the pre-phased data numbering of haplotypes. The alignment score A ( b,h ) for a pre-phased block b and a read-based block h is calculated as: where S b and S h are the sets of heterozygous sites in b and h , and w ( s ) represents the weight based on the site type (e.g., SNP or indel). Haplotypes are adjusted to maximize A ( b,h ), ensuring that the merged haplotypes align with the pre-phased data and improving the overall phasing accuracy. 7. Output Generation The final outputs include: Phased VCF: Annotated with a PS (Phase Set) field. Annotated BAM: Each read is tagged with HP (Haplotype Phase) and HT (Haplotype number) fields. Split BAM Files: Separate BAM files for each haplotype, facilitating downstream analyses. BED File: Haplotype boundaries across the genome are defined for visualization. The detailed algorithm and code can be accessed at: https://github.com/DZeevi-Lab/TinkerHap Evaluation TinkerHap was evaluated in the following use cases: To evaluate the algorithm’s performance in variable regions and for rare variants using Illumina short-reads, we analyzed Whole Genome Sequencing (WGS) data from 1,040 parent-offspring trios that we identified in the UK Biobank [ 9 ] on the MHC class II region in humans, specifically on chr6:32,439,878-33,143,325 (hg38 genome version). To evaluate the algorithm’s performance with long-reads, we used PacBio long-read sequencing data of the full genomes of GIAB Ashkenazi trio HG002-4 and Chinese trio HG005-7 datasets by Revio (publicly offered by GIAB [ 10 ]). For each offspring in the trios, we constructed a “truth” set of known phased heterozygous sites (“truth sites”). This was achieved by examining loci where each parent possesses different homozygous alleles or where one parent was heterozygous, and the other was homozygous. After preparing the data, we phased the offspring sequence using the following algorithms: ShapeIT[ 1 ], WhatsHap[ 5 ], HapCUT2[ 6 ], TinkerHap, and TinkerHap with ShapeIT[ 1 ] phased data as an additional input for merging haplotypes (as described in the “Algorithm” section above). The success rate was evaluated by counting the number of sites in the phased output that matched the truth set. Results MHC class II gene region phasing (chr6:32,439,878-33,143,325) View this table: View inline View popup Download powerpoint Table 1. Phasing performance of different algorithms on short-reads aligned to the MHC class II region 1 TinkerHap+ShapeIT: TinkerHap algorithm when used with additional ShapeIT pre-phased file. 2 Phasing accuracy: Successfully phased sites divided by the total number of heterozygous sites. 3 Haplotype size: Median haplotype size across all samples. “Haplotype” refers to a set of alleles at variant sites along a single chromosome that are inherited together and are guaranteed to be phased together by the algorithm. 4 Common phased sites errors: Phasing error % in heterozygous sites phased by all algorithms. 5 Runtime: Median runtime per sample. Download figure Open in new tab Figure 1. Phased BAM Outputs displaying Heterozygous sites (IGV [ 11 ] screenshot) - The upper track is the original alignment, while the two tracks below represent the output of Haplotype 1 and Haplotype 2. Heterozygous sites are correctly segregated between the two phases, demonstrating successful phasing. The continuous blue line in the bottom track illustrates a BED file annotation, highlighting the size of the haplotype region where all variants are confirmed to share the same phase. PacBio phasing (whole genome) View this table: View inline View popup Download powerpoint Table 2. Phasing performance of different algorithms on long-reads of full genomes. 1 Phasing accuracy: Successfully phased sites divided by the total number of heterozygous sites. 2 Haplotype size: Median haplotype size across all samples. 3 Common phased sites errors: Phasing error % in heterozygous sites phased by all algorithms. 4 Runtime: Median runtime per sample. Discussion Here, we introduce TinkerHap, a read-based phasing algorithm designed for accurate and reliable phasing across diverse genomic contexts, with the ability to integrate statistical phasing data from third-party tools for improved performance. We evaluated TinkerHap using two datasets: the MHC class II region in humans with Illumina short-read WGS data to assess its accuracy in variable regions, and PacBio sequencing data to evaluate its performance with long-reads. These datasets were selected due to their suitability for testing read-based phasing algorithms, as both are characterized by a high density of variants that provide many opportunities for phasing. Performance of Short-Reads in Variable Regions In the MHC class II region using short-reads, TinkerHap phased 97.1% of variants with 95.1% accuracy. In comparison, the second-best algorithm phased 96.2% of variants with 94.8% accuracy. All methods showed higher phasing accuracy for SNPs compared to indels (97.1% and 89.6%, respectively, in TinkerHap). Performance of Long-Read Sequencing TinkerHap achieved a phasing accuracy of 97.5% for SNPs and 96.0% for indels with PacBio datasets. These results were superior to the second-best algorithm, which demonstrated accuracies of 95.5% and 95.4%, respectively. Moreover, TinkerHap produced longer haplotype blocks (median size: 79,449 bp) compared to the second-best algorithm (68,303 bp). Runtime analysis revealed that TinkerHap required 10,519 seconds per sample, compared to 5,495 seconds for the fastest algorithm. Comparison of Long-Read and Short-Read Performance TinkerHap performed better with long-read sequencing data compared to short-read data in several key metrics. Long-reads offer superior upstream alignment quality, particularly at highly variable sites, which enhances the overall accuracy of variant calling and subsequent phasing steps. Long-read data yielded more extensive haplotype blocks (median size: 79,449 bp compared to 631 bp with short-reads) and higher phasing accuracy (97.5% for SNPs in long-reads compared to 96.0% in short-reads, and 96% for indels in long-reads compared to 87.8% in short-reads). This improved performance is expected due to long-reads containing more heterozygote sites and enabling improved alignments. Integration with Statistical Phasing TinkerHap uniquely includes the ability to integrate data from third-party tools, such as ShapeIT. By incorporating pre-phased haplotypes, the TinkerHap + ShapeIT combination achieved 99.5% phased variants with 96.3% accuracy, significantly outperforming standalone methods. This hybrid approach improved haplotype block continuity and effectively addressed gaps in read coverage. Limitations TinkerHap’s runtime and memory usage for long-read data present areas for potential optimization, and it currently lacks support for polyploid genomes. TinkerHap is limited in merging distant haplotypes, which could be particularly useful for applications such as exome sequencing. Future incorporation of pedigree information could address this issue and enhance TinkerHap’s accuracy in trio or family-based studies. In most phasing errors that we manually examined, inaccuracies were primarily attributed to upstream variant calling rather than to the phasing algorithm itself. This suggests that TinkerHap may be approaching the limit of what can be achieved with downstream read-based phasing alone. This underscores the importance of high-quality preprocessing. Funding This research was supported by the ISRAEL SCIENCE FOUNDATION and JDRF (grant No. 2658/21). Competing interests The authors declare no competing interests. Author contribution Conceptualization – U.H., D.Z.; Methodology - U.H.; Formal Analysis - U.H.; Investigation - U.H; Writing, original draft preparation – U.H.; Writing, review & editing - U.H., D.Z., E.S., D.N, I.B.; Visualization - U.H. Supervision – D.Z.; Funding Acquisition – D.Z. Acknowledgements We thank Rona Gershon Talmi from the Hamaabada Podcast (Kan) and Dr. Jeremy Fogel and Tuval Rosenwasser from the Think & Drink Different Podcast for their contribution to this work. This research has been conducted using the UK Biobank Resource under application number 74655. Footnotes This revision includes the addition of an acknowledgment section in accordance with UK Biobank's requirements. No other changes have been made to the research content, analysis, or conclusions. This update ensures compliance with attribution guidelines while maintaining the integrity of the original work. References [1]. ↵ O. Delaneau , J. Marchini , and J. F. Zagury , “ A linear complexity phasing method for thousands of genomes ,” Nature Methods 2011 9:2, vol. 9 , no. 2 , pp. 179 – 181 , Dec . 2011 , doi: 10.1038/nmeth.1785 . OpenUrl CrossRef PubMed [2]. ↵ B. L. Browning , X. Tian , Y. Zhou , and S. R. Browning , “ Fast two-stage phasing of large-scale sequence data ,” The American Journal of Human Genetics , vol. 108 , no. 10 , pp. 1880 – 1890 , Oct . 2021 , doi: 10.1016/j.ajhg.2021.08.005 . OpenUrl CrossRef PubMed [3]. ↵ T. Druet and M. Georges , “ LINKPHASE3: an improved pedigree-based phasing algorithm robust to genotyping and map errors ,” Bioinformatics , vol. 31 , no. 10 , pp. 1677 – 1679 , May 2015 , doi: 10.1093/bioinformatics/btu859 . OpenUrl CrossRef PubMed [4]. ↵ D. B. Miller and S. R. Piccolo , “ trioPhaser: using Mendelian inheritance logic to improve genomic phasing of trios ,” BMC Bioinformatics , vol. 22 , no. 1 , p. 559 , Dec . 2021 , doi: 10.1186/s12859-021-04470-4 . OpenUrl CrossRef PubMed [5]. ↵ M. Martin et al. , “ WhatsHap: fast and accurate read-based phasing ,” bioRxiv , p. 085050 , Nov . 2016 , doi: 10.1101/085050 . OpenUrl Abstract / FREE Full Text [6]. ↵ V. Bansal , “ HapCUT2: A Method for Phasing Genomes Using Experimental Sequence Data ,” Methods in Molecular Biology , vol. 2590 , pp. 139 – 147 , 2023 , doi: 10.1007/978-1-0716-2819-5_9 . OpenUrl CrossRef [7]. ↵ Y. Choi , A. P. Chan , E. Kirkness , A. Telenti , and N. J. Schork , “ Comparison of phasing strategies for whole human genomes ,” PLoS Genet , vol. 14 , no. 4 , p. e1007308 , Apr . 2018 , doi: 10.1371/journal.pgen.1007308 . OpenUrl CrossRef PubMed [8]. ↵ A. Heger , “ Pysam: HTSlib interface for Python , 2009 ,” https://github.com/pysam-developers/pysam . [9]. ↵ C. Sudlow et al. , “ UK Biobank: An Open Access Resource for Identifying the Causes of a Wide Range of Complex Diseases of Middle and Old Age ,” PLoS Med , vol. 12 , no. 3 , p. 1001779 , Mar . 2015 , doi: 10.1371/JOURNAL.PMED.1001779 . OpenUrl CrossRef [10]. ↵ Genome in a Bottle Consortium , “GIAB Benchmarking Data,” 2023 , Accessed: Dec. 12, 2024 . [Online]. Available: https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/ [11]. ↵ H. Thorvaldsdottir , J. T. Robinson , and J. P. Mesirov , “ Integrative Genomics Viewer (IGV): high-performance genomics data visualization and exploration ,” Brief Bioinform , vol. 14 , no. 2 , pp. 178 – 192 , Mar . 2013 , doi: 10.1093/bib/bbs017 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted March 16, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following TinkerHap - A Novel Read-Based Phasing Algorithm with Integrated Multi-Method Support for Enhanced Accuracy Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share TinkerHap - A Novel Read-Based Phasing Algorithm with Integrated Multi-Method Support for Enhanced Accuracy Uri Hartmann , Eran Shaham , Dafna Nathan , Ilana Blech , Danny Zeevi bioRxiv 2025.02.16.638517; doi: https://doi.org/10.1101/2025.02.16.638517 Share This Article: Copy Citation Tools TinkerHap - A Novel Read-Based Phasing Algorithm with Integrated Multi-Method Support for Enhanced Accuracy Uri Hartmann , Eran Shaham , Dafna Nathan , Ilana Blech , Danny Zeevi bioRxiv 2025.02.16.638517; doi: https://doi.org/10.1101/2025.02.16.638517 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genomics Subject Areas All Articles Animal Behavior and Cognition (7622) Biochemistry (17648) Bioengineering (13868) Bioinformatics (41876) Biophysics (21422) Cancer Biology (18552) Cell Biology (25458) Clinical Trials (138) Developmental Biology (13364) Ecology (19866) Epidemiology (2067) Evolutionary Biology (24290) Genetics (15589) Genomics (22475) Immunology (17711) Microbiology (40325) Molecular Biology (17144) Neuroscience (88469) Paleontology (666) Pathology (2826) Pharmacology and Toxicology (4815) Physiology (7635) Plant Biology (15113) Scientific Communication and Education (2044) Synthetic Biology (4286) Systems Biology (9814) Zoology (2268)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00