FuFiHLA: A tool for Full-Field HLA typing from long reads data

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 27,479 characters · extracted from preprint-html · click to expand
FuFiHLA: A tool for Full-Field HLA typing from long reads data | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results FuFiHLA: A tool for Full-Field HLA typing from long reads data Jingqing Hu , Qian Qin , Heng Li , Ying Zhou doi: https://doi.org/10.1101/2025.10.23.684216 Jingqing Hu 1 Department of Data Science, Dana-Farber Cancer Institute , Boston, MA, 02115, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Qian Qin 1 Department of Data Science, Dana-Farber Cancer Institute , Boston, MA, 02115, USA 2 Department of Biomedical Informatics, Harvard Medical School , Boston, MA, 02115, USA 3 Division of Rheumatology, Inflammation, and Immunity , Brigham and Women’s Hospital, Boston, MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Heng Li 1 Department of Data Science, Dana-Farber Cancer Institute , Boston, MA, 02115, USA 2 Department of Biomedical Informatics, Harvard Medical School , Boston, MA, 02115, USA 4 Broad Institute of MIT and Harvard , 415 Main St, Cambridge, MA, 02142, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: yingzhou{at}ds.dfci.harvard.edu hli{at}ds.dfci.harvard.edu Ying Zhou 1 Department of Data Science, Dana-Farber Cancer Institute , Boston, MA, 02115, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: yingzhou{at}ds.dfci.harvard.edu hli{at}ds.dfci.harvard.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF 0. Abstract Motivation Allele typing for Human Leukocyte Antigen (HLA) genes has many important clinical applications. Popular short-read typing can only accurately distinguish alleles at the peptide level, which potentially limit our understanding of the effect of variants in non-coding region. Recently, a few methods were declared to distinguish full-field HLA alleles from de-novo assemblies, which motivates us to develop an accurate HLA typing method directly from long reads. Results We developed FuFiHLA, a lightweight open-source software, to type HLA alleles. Currently it supports typing alleles of six HLA genes (HLA-A, HLA-B, HLA-C, HLA-DRB1, HLA-DQA1, and HLA-DQB1) from long reads. Evaluation using 47 PacBio HiFi samples from HPRC shows that FuFiHLA achieves 99.57% accuracy in the full field allele typing and QV as 50.1 for consensus allele sequence construction. Additional testing on four Nanopore R10 reads demonstrates slightly reduced accuracy in the fourth field. Availability and implementation FuFiHLA is available on GitHub ( https://github.com/jingqing-hu/FuFiHLA ) under MIT License. 1. Introduction The Human Leukocyte Antigen (HLA) region is located on the short arm of chromosome 6, specifically on band 6p21.3, and covers more than one hundred coding genes that play crucial roles in human immune system. This region has been implicated in over hundreds of diseases ( 1 ). Due to the need of recognizing unpredicted antigens, several HLA genes exhibit exceptionally high within-species diversity even within the coding region. For instance, pairwise differences are 3% for HLA-A, 4% for HLA-DRB1, and 5% for HLA-DQB1 ( 2 ), whereas the genome-wide average is 0.1% in human ( 3 ) and about 1.23% between human and chimpanzee ( 4 ). Regarding the high polymorphism in HLA genes, a nomenclature system has been developed for distinguishing gene sequences or alleles ( 5 ). A specific gene sequence, or an allele, is assigned a name consisting of four fields, addressing the difference in antigen binding affinity (field 1), in peptide (field 2), in coding nucleotide (field 3), and in intron nucleotide (field 4). An extra suffix tag may be appended to denote expression changes. HLA typing is an art of inferring the accurate HLA allele name from biospecimen based on a pre-exist reference database. In this work, we will focus on using PacBio HiFi reads of DNA sequences ( 6 ), and extending to Nanopore R10 data with slight modification. The IMGT/HLA is used as the standard reference data set for HLA allele typing ( 7 ). In the latest version 3.59, it consists of 41003 distinct alleles covering 47 HLA genes. With a known reference allele data set, there are two major computational strategies for HLA allele typing from DNA sequences: one is to infer the best allele combinations from reference database to explain the observed reads, the other is to recover or reassemble the allele sequence and type the allele by comparing to the reference data set ( 8 – 12 ). Using short read data, the first strategy is less robust with undocumented alleles, while the second strategy might be affected by assembly errors. Thus, both strategies can hardly achieve high accuracy in full-field HLA typing. Long read sequencing could be a game-changer for full field HLA typing. Because of the long extension of read length (about 15kb for PacBio HiFi read), it is much easier than using the short reads (less than 300bp) to recover full length sequence of HLA allele. Recently, several assembly-based methods claimed to have high typing accuracy in full field ( 2 , 13 , 14 ). In complementary, there is a timely need for long reads-based HLA typing method. So far there are three typing tools taking long reads but with limitations: HLA*LA( 11 ) only outputs the first three fields nomenclature; Starphase ( 15 ) (a new version of HiFiHLA) only supports PacBio HiFi data; SpecImmune ( 16 ) needs an aligned bam with a specific version of reference genome, which is less convenient in practice. In this work, we will present a new method for HLA allele typing on the six graft transplant genes (HLA-A, HLA-B, HLA-C, HLA-DQA1, HLA-DQB1, and HLA-DRB1), it can take both aligned and raw/unaligned long reads as input, typing HLA allele in full-field and constructing consensus allele sequences. It supports both PacBio HiFi and Nanopore R10 data. 2. Pipeline Overview This pipeline processes reads through sequential steps to output allele sequences and allele types. ( Figure 1a ) Alleles with only partial sequences in IMGT/HLA are ignored in our current typing. Download figure Open in new tab Figure 1. Overview of FuFiHLA. a ) Informative reads are extracted from raw reads by mapping them to the anchor alleles and further mapped to all alleles from targeted genes to find out template allele pair for each gene. Then those informative reads are assigned to each of the template allele for variants calling and consensus gene sequence construction. Reconstructed sequences that include variants from template allele are further mapped to all alleles of targeted genes for allele typing. b ) MOI method for choosing better alignment. 2.1 Informative Read extraction We used a subset of IMGT/HLA alleles as anchors to extract informative reads from raw reads. When bam and the gene annotation of the reference are available, we can also use reads overlapped with the targeted genes as input. In our evaluation, both generated identical allele typing results, but with slightly difference in consensus sequence construction. To construct anchor allele set, we clustered all IMGT/HLA allele sequences with cd-hit (identity cutoff = 0.95) ( 17 ), and from each cluster we selected one allele as anchor allele to represent that allele cluster. We also added allele sequences from HLA-DRB6, HLA-DRB7, HLA-DRB8, and HLA-DRB9, which are mostly partial, to the anchor set. In total there are 136 anchor alleles selected, covering the six targeted genes and 37 other HLA genes. The reads were mapped to all anchor alleles and further extracted as informative reads based on the overlapping with anchor alleles from the six targeted genes. Particularly, an informative read should be covering at least 40% length of any targeted anchor allele or covering at least 200bps from one end of a targeted anchor allele. Due to the similarity among HLA genes, one read can be mapped to several alleles even from different genes which may introduce additional noise. Thus, the allele-to-read mappings were removed if the mismatch rate is 10 times higher than the lowest mismatch rate of the same read. 2.2 Alignment comparison To determine the better alignment between two reads to an allele or two alleles to a read, we restrict the comparison of M ismatch in the O verlapped mapping I nterval ( MOI ) ( Figure 1b ). This design is tested to be more practical than using mismatch rate directly due to the existence of sequencing error and the nature of allele length variation. With MOI method, the better alignment includes a smaller number of mismatches in the overlapped mapping interval. For reads with higher error rates, such as the Nanopore R10 data, more ‘tie’ situations are created in the comparison by ignoring sequence difference with at most one indel or substitution event. 2.3 Template allele pair selection and consensus allele sequence construction A pair of template alleles for each targeted gene are used as references to construct consensus allele sequences from informative reads, based on the assumption that the target sample has exact two copies of each of the six genes. Template allele pair is a combination of reference alleles of the highest agreement with the reads. In total 19,782 allele sequences of the targeted genes are used in the selection. To select a pair of template alleles for each of the targeted genes in one sample, we constructed a pool of alleles (P allele ) to iterate allele pairs and a pool of reads (P read ) that belongs to the specific gene. When multiple alleles are mapped to the same region of a particular read, the best-match allele(s) are selected by the MOI method through pairwise comparison. The selected allele(s) are added to the allele pool P allele , and the supporting read is added to the read pool P read . The mappings between selected alleles and their supporting read are also recorded. For an allele A, a three-metric tuple S(A) = ( n, c, m ) is calculated, where n is the total number of unique supporting reads, c is total read coverage, and m is total number of matched base pairs. If one read supports several alleles, then its contribution to c will be divided by the total number of supported alleles. The three-metric tuple for a pair of alleles, noted A and B, is calculated as S ( A, B ) = S ( A ) + S ( B ). We rank allele pairs based on the three-metric tuple S ( A, B ) in decreasing order and the allele pair of the first rank is selected as the template allele pair. Practically, we found that top 15 alleles for HiFi reads and top 30 alleles for R10 reads with highest coverage was good enough to select the proper template allele pair. Once the template allele pair is selected, reads are assigned/phased to each of them through MOI method. If the mappings to each template allele are equal, then the read is assigned to both alleles. Since it tends to give two different alleles even though for the homozygous sites, we added an additional filtration to the template alleles that if one allele’s mapping coverage is four times smaller than the other, the template allele with less coverage will be removed and the site is forced to be homozygous. With phased reads and the template allele, we applied longcallD for variants calling ( 18 ) and “bcftools consensus” for allele sequence construction ( 19 ). Only major variants supported by at least three reads are used for variants calling and consensus sequence construction. 2.4 HLA Allele typing HLA allele typing is based on a penalty score by considering different mismatch penalties of mapping between consensus allele sequence and reference allele sequence. Exonic mismatch penalty is 10 per event and intronic mismatch penalty is 1 per event. The allele with the lowest penalty score is selected as the typing allele. 3. Results We first evaluated the performance of our HLA typing tool on 47 HiFi samples from the Human Pangenome Reference Consortium (Table S1), which includes 16 Americans, 24 Africans, 6 Asians and 1 European. The average read depth is 41X and average read length was about 15 kb. The error rate of HiFi long reads is <1%. ( 20 ) Regarding the assembly-based annotation as ground truth ( 2 ), we compared FuFiHLA with three other long read typing tools, HLA*LA(1.0.4), Starphase(1.3.2) and SpecImmune(1.0.0), across the six targeted genes at different resolutions ( Table 2 ). In our evaluation, both Starphase and FuFiHLA demonstrated high accuracy, exceeding 99% even in the full-field. View this table: View inline View popup Download powerpoint Table 1. Accuracy of long read HLA typing on six targeted genes. View this table: View inline View popup Download powerpoint Table 2. Runtime in minute. Alignment time for Starphase is the time used to construct bam file. We also evaluate the base pair consistency of gene sequence construction by aligning the constructed gene sequences to the de novo assemblies of each of the 47 samples from HPRC phase 2. Among the 564 allele sequences, FuFiHLA (raw/unaligned read as input) gives 548 perfectly matches and 16 with mismatches. Among the 16 gene sequences with mismatched nucleotides, 12 of them have indels of homopolymer or short tandem repeats and four of them have single nucleotide substitutions. (Table S2) Using bam for reads extraction has similar performance. (Table S3) Meanwhile Starphase provided 532 perfect matches and 32 with mismatches. Among the 32 gene sequences with mismatched nucleotides, 26 of them have indels of homopolymer or short tandem repeats and five have single base substitutions and/or indel differences, and 1 wrong calling for DRB1 allele with edit distance larger than 4000.(Table S4) The overall QV score of consensus allele sequence is 50.1 for FuFiHLA and 44.6 for Starphase (one wrong call was excluded). Using Nanopore R10 reads would lead to reduced accuracy for FuFiHLA in the fourth field. Among four testing samples (HG002 from GIAB ( 21 ) and three non-cancer samples from CASTLE panel ( 22 )), it achieved 100% accuracy for the first three fields but dropped to 87.0% for the forth field, which mainly due to the sequencing error of homopolymers in introns.(Table S5) 4. Discussion Long reads HLA typing is timely needed for clinical application. In this work we presented a new method, FuFiHLA, which can accurately type six HLA genes from WGS long reads. Based on the evaluation on 47 HPRC HiFi samples, FuFiHLA achieved 99.57% accuracy at 4-field allele typing. Starphase is another method developed recently by PacBio, which has similar typing accuracy as FuFiHLA but less accurate in consensus allele sequences construction. Starpahse is faster than FuFiHLA when taking bam (aligned) as input, but slower when accounting for bam preparing time because using anchor sets is more efficient than constructing bam to extract informative reads. ( Table 2 ) Taking advantage of the large number of reference HLA gene sequences in IMGT/HLA, we are able to select template pair that minimizing the difference between reference alleles and targeted alleles which makes it more robust in allele typing and allele sequence construction, even though the reads depth is low. For example, the HG002 sample we used only has coverage of 10X but we can give all corrected allele typing for 12 alleles and only one allele with mismatches as indels of homopolymer or short tandem repeats. One potential risk for estimation error could be caused by the in-completeness of IMGT/HLA reference data. The only allele typing mismatch in the second filed made by FuFiHLA in our evaluation happened on the allele DRB1*04:92 in the sample HG01358, where the allele’s full gene sequence is not available in the reference data and the coding sequence (partial) was used for typing the ground truth by Immuannot. However, FuFiHLA gave the identical allele sequence as the assembly. In summary, we provided the most accurate method for full field HLA allele typing and consensus gene sequences construction on six graft transplant related genes. Currently it supports PacBio HiFi and Nanopore R10 reads, but the framework could be extended for covering more genes with other types of long reads sequencing data. Conflict of interest None declared. Funding This work is supported by US National Institute of Health grant R01HG010040, R01HG014175, U41HG010972, U01HG013748 and U24CA294203 (to H.L.). Data availability HPRC HiFi reads and assemblies are publicly available from https://humanpangenome.org/hprc-data-release-2/ and the list of 47 samples are included as supplementary file. Nanopore R10 reads include three non-cancer samples (H1437, H2009 and HCC1937) from CASTLE panel ( https://github.com/CASTLE-Panel/castle ) and HG002 from GIAB ( https://42basepairs.com/browse/s3/ont-open-data/giab_2023.05/analysis/hg002/ ). Acknowledgements We would like to acknowledge the National Genome Research Institute (NHGRI) for funding the following grants supporting the creation of the human pangenome reference: U41HG010972, U01HG010971, U01HG013760, U01HG013755, U01HG013748, U01HG013744, R01HG011274, and the Human Pangenome Reference Consortium (BioProject ID: PRJNA730823). Funder Information Declared National Institute of Health , R01HG010040, R01HG014175, U41HG010972, U01HG013748 and U24CA294203 References 1. ↵ Shiina T , Hosomichi K , Inoko H , Kulski JK . The HLA genomic loci map: expression, interaction, diversity and disease . J Hum Genet . 2009 Jan ; 54 ( 1 ): 15 – 39 . OpenUrl CrossRef PubMed Web of Science 2. ↵ Zhou Y , Song L , Li H. Full-resolution HLA and KIR gene annotations for human genome assemblies . Genome Res . 2024 Nov ; 34 ( 11 ): 1931 – 41 . OpenUrl Abstract / FREE Full Text 3. ↵ The 1000 Genomes Project Consortium, Corresponding authors , Auton A , Abecasis GR , Steering committee , Altshuler DM , et al. A global reference for human genetic variation . Nature . 2015 Oct 1; 526 ( 7571 ): 68 – 74 . OpenUrl CrossRef PubMed 4. ↵ Suntsova MV , Buzdin AA . Differences between human and chimpanzee genomes and their implications in gene expression, protein functions and biochemical properties of the two species . BMC Genomics . 2020 Sept ; 21 ( S7 ): 535 . OpenUrl CrossRef PubMed 5. ↵ Marsh SGE , Albert ED , Bodmer WF , Bontrop RE , Dupont B , Erlich HA , et al. Nomenclature for factors of the HLA system, 2010 . Tissue Antigens . 2010 Apr ; 75 ( 4 ): 291 – 455 . OpenUrl CrossRef PubMed Web of Science 6. ↵ Lang D , Zhang S , Ren P , Liang F , Sun Z , Meng G , et al. Comparison of the two up-to-date sequencing technologies for genome assembly: HiFi reads of Pacific Biosciences Sequel II system and ultralong reads of Oxford Nanopore . GigaScience . 2020 Dec 15; 9 ( 12 ): giaa123 . OpenUrl CrossRef PubMed 7. ↵ Barker DJ , Maccari G , Georgiou X , Cooper MA , Flicek P , Robinson J , et al. The IPD-IMGT/HLA Database . Nucleic Acids Res . 2023 Jan 6; 51 ( D1 ): D1053 – 60 . OpenUrl CrossRef PubMed 8. ↵ Klasberg S , Surendranath V , Lange V , Schöfl G. Bioinformatics Strategies, Challenges, and Opportunities for Next Generation Sequencing-Based HLA Genotyping . Transfus Med Hemotherapy . 2019 ; 46 ( 5 ): 312 – 25 . OpenUrl 9. Song L , Bai G , Liu XS , Li B , Li H. Efficient and accurate KIR and HLA genotyping with massively parallel sequencing data . Genome Res . 2023 June ; 33 ( 6 ): 923 – 31 . OpenUrl Abstract / FREE Full Text 10. Szolek A , Schubert B , Mohr C , Sturm M , Feldhahn M , Kohlbacher O. OptiType: precision HLA typing from next-generation sequencing data . Bioinformatics . 2014 Dec 1; 30 ( 23 ): 3310 – 6 . OpenUrl CrossRef PubMed Web of Science 11. ↵ Berger B Dilthey AT , Mentzer AJ , Carapito R , Cutland C , Cereb N , Madhi SA , et al. HLA*LA—HLA typing from linearly projected graph alignments . Berger B , editor. Bioinformatics . 2019 Nov 1; 35 ( 21 ): 4394 – 6 . OpenUrl CrossRef PubMed 12. ↵ Claeys A , Merseburger P , Staut J , Marchal K , Van den Eynden J. Benchmark of tools for in silico prediction of MHC class I and class II genotypes from NGS data . BMC Genomics . 2023 May 9; 24 ( 1 ): 247 . OpenUrl CrossRef PubMed 13. ↵ Wang S , Wang M , Chen L , Pan G , Wang Y , Li SC . SpecHLA enables full-resolution HLA typing from sequencing data . Cell Rep Methods . 2023 Sept ; 3 ( 9 ): 100589 . OpenUrl PubMed 14. ↵ Kronenberg Z , Harting J. HiFiHLA: An HLA star-calling tool for PacBio HiFi data types [Internet] . GitHub ; 2024 . Available from: https://github.com/PacificBiosciences/hifihla 15. ↵ Holt JM , Harting J , Chen X , Baker D , Kronenberg Z , Gonzaludo N , et al. StarPhase: Comprehensive Phase-Aware Pharmacogenomic Diplotyper for Long-Read Sequencing Data . 16. ↵ Wang S , Wang X , Wang M , Zhou Q , Li SC . SpecImmune accurately genotypes diverse immune-related gene families using long-read data [Internet] . Bioinformatics ; 2025 [cited 2025 Apr 3 ]. Available from: http://biorxiv.org/lookup/doi/10.1101/2025.02.04.636381 17. ↵ Li W , Godzik A. Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences . Bioinforma Oxf Engl . 2006 July 1; 22 ( 13 ): 1658 – 9 . OpenUrl 18. ↵ Gao Y. longcallD [Internet] . GitHub ; 2025 . Available from: https://github.com/yangao07/longcallD 19. ↵ Danecek P , Bonfield JK , Liddle J , Marshall J , Ohan V , Pollard MO , et al. Twelve years of SAMtools and BCFtools . GigaScience . 2021 Jan 29; 10 ( 2 ): giab008 . OpenUrl CrossRef PubMed 20. ↵ Liao WW , Asri M , Ebler J , Doerr D , Haukness M , Hickey G , et al. A draft human pangenome reference . Nature . 2023 May 1; 617 ( 7960 ): 312 – 24 . OpenUrl CrossRef PubMed 21. ↵ Talenti A. Sequencing Genome in a Bottle samples [Internet] . Oxford Nanopore Technologies Open Data Releases ; 2023 . Available from: https://epi2me.nanoporetech.com/giab-2023.05/ 22. ↵ Park J , Cook DE , Chang PC , Kolesnikov A , Brambrink L , Mier JC , et al. DeepSomatic: Accurate somatic small variant discovery for multiple sequencing technologies [Internet] . Bioinformatics ; 2024 [cited 2025 Oct 13 ]. Available from: http://biorxiv.org/lookup/doi/10.1101/2024.08.16.608331 View the discussion thread. Back to top Previous Next Posted October 24, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following FuFiHLA: A tool for Full-Field HLA typing from long reads data Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share FuFiHLA: A tool for Full-Field HLA typing from long reads data Jingqing Hu , Qian Qin , Heng Li , Ying Zhou bioRxiv 2025.10.23.684216; doi: https://doi.org/10.1101/2025.10.23.684216 Share This Article: Copy Citation Tools FuFiHLA: A tool for Full-Field HLA typing from long reads data Jingqing Hu , Qian Qin , Heng Li , Ying Zhou bioRxiv 2025.10.23.684216; doi: https://doi.org/10.1101/2025.10.23.684216 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41911) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13371) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15599) Genomics (22482) Immunology (17728) Microbiology (40363) Molecular Biology (17163) Neuroscience (88536) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00