Haplotype-resolved T2T gap-free genomes of the winegrape cultivar ‘Cabernet Sauvignon’

doi:10.1101/2025.07.26.666927

Haplotype-resolved T2T gap-free genomes of the winegrape cultivar ‘Cabernet Sauvignon’

2025 · doi:10.1101/2025.07.26.666927

preprint OA: closed CC-BY-NC-ND-4.0

📄 Open PDF Full text JSON View at publisher

Full text 33,686 characters · extracted from preprint-html · click to expand

Haplotype-resolved T2T gap-free genomes of the winegrape cultivar ‘Cabernet Sauvignon’ | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Haplotype-resolved T2T gap-free genomes of the winegrape cultivar ‘Cabernet Sauvignon’ View ORCID Profile Falak Sher Khan , View ORCID Profile Tiepeng Sun , View ORCID Profile Xiangfeng Wang , View ORCID Profile Xuenan Zhang , Naomi Abe-Kanoh , Ying Hua Su , View ORCID Profile Li Guo , View ORCID Profile Wenxiu Ye doi: https://doi.org/10.1101/2025.07.26.666927 Falak Sher Khan 1 State Key Laboratory of Wheat Improvement, College of Life Sciences, Shandong Agricultural University , Tai’an, 271018, Shandong, China 2 Shandong Key Laboratory of Precision Molecular Crop Design and Breeding, Peking University Institute of Advanced Agricultural Sciences, Shandong Laboratory of Advanced Agricultural Sciences in Weifang , Weifang, 261325, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Falak Sher Khan Tiepeng Sun 2 Shandong Key Laboratory of Precision Molecular Crop Design and Breeding, Peking University Institute of Advanced Agricultural Sciences, Shandong Laboratory of Advanced Agricultural Sciences in Weifang , Weifang, 261325, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tiepeng Sun Xiangfeng Wang 2 Shandong Key Laboratory of Precision Molecular Crop Design and Breeding, Peking University Institute of Advanced Agricultural Sciences, Shandong Laboratory of Advanced Agricultural Sciences in Weifang , Weifang, 261325, China 3 United Graduate School of Agricultural Science, Iwate University , 3-18-8 Ueda, Morioka, Iwate, 020-8550, Japan 4 Department of Food, Life and Environmental Science, Faculty of Agriculture, Yamagata University , Wakaba-machi 1-23, Tsuruoka, Yamagata, 997-8555, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xiangfeng Wang Xuenan Zhang 2 Shandong Key Laboratory of Precision Molecular Crop Design and Breeding, Peking University Institute of Advanced Agricultural Sciences, Shandong Laboratory of Advanced Agricultural Sciences in Weifang , Weifang, 261325, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xuenan Zhang Naomi Abe-Kanoh 3 United Graduate School of Agricultural Science, Iwate University , 3-18-8 Ueda, Morioka, Iwate, 020-8550, Japan 4 Department of Food, Life and Environmental Science, Faculty of Agriculture, Yamagata University , Wakaba-machi 1-23, Tsuruoka, Yamagata, 997-8555, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ying Hua Su 1 State Key Laboratory of Wheat Improvement, College of Life Sciences, Shandong Agricultural University , Tai’an, 271018, Shandong, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: suyh{at}sdau.edu.cn li.guo{at}pku-iaas.edu.cn wenxiu.ye{at}pku-iaas.edu.cn Li Guo 2 Shandong Key Laboratory of Precision Molecular Crop Design and Breeding, Peking University Institute of Advanced Agricultural Sciences, Shandong Laboratory of Advanced Agricultural Sciences in Weifang , Weifang, 261325, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Li Guo For correspondence: suyh{at}sdau.edu.cn li.guo{at}pku-iaas.edu.cn wenxiu.ye{at}pku-iaas.edu.cn Wenxiu Ye 2 Shandong Key Laboratory of Precision Molecular Crop Design and Breeding, Peking University Institute of Advanced Agricultural Sciences, Shandong Laboratory of Advanced Agricultural Sciences in Weifang , Weifang, 261325, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Wenxiu Ye For correspondence: suyh{at}sdau.edu.cn li.guo{at}pku-iaas.edu.cn wenxiu.ye{at}pku-iaas.edu.cn Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Cabernet Sauvignon (CS), a cultivar of winegrape ( Vitis vinifera ), is among the most renowned winegrape varieties globally. In this study, we released the haplotype-resolved telomere-to-telomere (T2T) CS genome assembled using a combination of PacBio HiFi, ONT ultra-long, and Hi-C sequencing data. The two T2T gap-free haplotype-resolved assemblies CS-T2T.Hap1 and CS-T2T.Hap2 sized 491.11 Mb (contig N50 = 25.09 Mb) and 491.90 Mb (contig N50 = 25.51 Mb), respectively. Genome annotation predicted a total of 36,456 genes in CS-T2T.Hap1 and 35,471 genes in CS-T2T.Hap2. By genome comparison, we discovered and validated megabase inversion events on Chromosome 03,11,18 and 19, which are not prevalent in other haplotype-resolved V. vinifera genomes. In summary, this haplotype-resolved T2T genome represents an essential genomic resource for Cabernet Sauvignon, and lays the foundation for its genetic studies, improvement and utilization. Background & Summary The cultivated grapevine ( V. vinifera L.) is one of the most valuable perennial fruit crops cultivated for wine-making and fresh consumption worldwide 1 . ‘Cabernet Sauvignon’ is a major red wine cultivar renowned for its thick, resilient skins and high concentration of phenolic compounds. Wine from this variety exhibits a robust tannic structure and complex flavor profile dominated by notes of blackcurrant, plum, and green bell pepper 2 . These traits are attributed to the grapevine’s unique genetic lineage and environmental adaptability, making it one of the most widely cultivated wine grape varieties globally. Originating from a natural cross between Sauvignon Blanc and Cabernet Franc in the Aquitaine region of France prior to the seventeenth century, its historical and viticultural significance is further underscored by its genetic stability and widespread adoption across diverse terroirs 3 . Since the first V. vinifera genome was assembled in 2007 using the grapevine PN40024 4 , originally derived from 9 selfings of ‘Helfensteiner’ (cross of ‘Pinot noir’ and ‘Schiava grossa’) 5 , the field of grapevine genomics has progressed rapidly, with high-quality reference assemblies now available for diverse species and cultivars 6 - 9 . In 2023, of particular significance, telomere-to-telomere (T2T) genome has been completed for PN40024 10 . Since then, several economically vital varieties such as the white wine grape ‘Chardonnay’ 8 , ‘Thomson seedless’ 11 , ‘Baimunage’, ‘Black Monukka’, ‘Muscat Hamburg’, ‘Hongmunage’, ‘Manicure Finger’, ‘Thompson’ 9 . The genome of ‘Cabernet Sauvignon’ was initially assembled in 2016 12 , followed by the release of several chromosomal-level versions 8 , 13 .Despite its global prominence as the most widely planted wine grape, ‘Cabernet Sauvignon’ lacks a T2T gap-free reference genome, which hinders comprehensive understanding of the molecular mechanism behind its complex traits, including disease resistance, berry development, and terroir-specific flavor profiles, thus its improvement and utilization. In this study, we assembled the haplotype-resolved T2T genome of ‘Cabernet Sauvignon’ using PacBio-HiFi, ONT ultra-long, and Hi-C sequencing data. The two haplotype-resolved T2T genome assemblies are gapless and sized 491.11 Mb and 491.90 Mb with N50 length of 25.09 Mb and 25.51 Mb, respectively. A total of 36,456 and 35,471 protein-coding genes were annotated in the two haplotype genomes combining ab initio prediction, homology proteins and transcriptome evidence. Vigorous quality assessment demonstrated the high accuracy and completeness of the assembly and annotation. These T2T assemblies and annotations improve the genomic resources available for V. vinifera cultivar ‘Cabernet Sauvignon’ and provide a crucial reference genome for its molecular studies and improvement. Methods Plant resources and genome sequencing The ‘Cabernet Sauvignon’ was cultivated in the vineyards at Peking University Institute of Advanced Agricultural Sciences, China. Genomic DNA was extracted from grapevine leaves using the CTAB (cetyltrimethylammonium bromide) method 14 . The Agilent 4200 Bioanalyzer (Agilent Technologies) was used to evaluate the DNA integrity. For HiFi sequencing library preparation, 15 µg of high-molecular-weight genomic DNA was utilized to generate a PacBio SMRTbell library with the SMRT Express Template Prep Kit 2.0 (Pacific Biosciences).Circular consensus sequencing (CCS) was conducted on a PacBio Sequel IIe system (Biomarker Technologies Corporation, Qingdao, China) to yield 87.86 Gb of HiFi data, corresponding to 178 × genome coverage, with read N50 length of 18.06 kb ( Table 1 ). Ultra-long-read sequencing was performed using the Oxford Nanopore Technologies (ONT) platform, where size-selected long DNA fragments were prepared according to the SQK-LSK109 Kit protocol. Sequencing was carried out on the PromethION sequencer at Single-Molecule Sequencing Platform of Annoroad. A total of 49.71 Gb ONT data was generated, corresponding to 101 × genome coverage ( Table 1 ). The Next-Generation Sequencing (NGS) and High-throughput chromosome conformation capture sequencing (Hi-C) datasets used in this study were obtained from published sources, corresponding to 50 × and 126 × genome coverage, respectively 8 ( Table 1 ). View this table: View inline View popup Download powerpoint Table 1: Statistic of sequencing data for ‘Cabernet sauvignon’ genome assembly. erage was calculated based on the Hap1 genome assembly of 491.10 Mb. Genome assembly To estimate fundamental characteristics of the ‘Cabernet Sauvignon’ genome, such as genome size, heterozygosity, we performed the genome survey using the PacBio HiFi data. The K-mer analysis was performed using Jellyfish v2.3.0 15 with a K-mer size of 21 and used for the genome survey by GenomeScope v1.0 16 . The genome size of ‘Cabernet Sauvignon’ was estimated at 495.32 Mb, with the heterozygosity rate of 1.56% ( Fig. 1a ). To assemble the haplotype-resolved T2T genomes for ‘Cabernet Sauvignon’, PacBio HiFi (178 ×), ONT ultra-long (101 ×), and Hi-C (126 ×) reads were integrated to produce an initial assembly of two haplotypes using hifiasm v0.24.0 17 with the parameters ‘--hom-cov --h1 --h2 --ul’. To eliminate mitochondrial and chloroplast sequences from the genome assembly, the genome contigs were aligned to the V. vinifera mitochondrial genome (GenBank: FM179380.1 ) and chloroplast genome (GenBank: DQ424856.1 ) using minimap2 v2.28 18 with the parameter ‘--x asm5’. Contigs exhibiting ≥ 50% alignment coverage to plastid sequences were excluded from the assembly. By comparing contigs with all NCBI RefSeq bacterial genomes using blast-2.13.0+ with the parameter “-task megablast”, other common contamination sequences (such as microbial DNAs) were further eliminated. The high-quality haplotype-resolved assemblies were scaffolded into chromosome-scale genomes using Hi-C data with Juicer 19 and 3d-DNA 20 pipeline. Manual corrections to the scaffolded pseudochromosomes were performed using Juicebox v1.11.08 21 , guided by Hi-C chromatin interaction patterns observed in contact heatmaps. To fill the gaps in the chromosome-scale assemblies, ONT ultra-long and HiFi reads were mapped to the haplotype-resolved genomes, and the aligned reads were used to fill gaps. The final genome assembly produced two gap-free haplotypes ( Fig. 1b ), haplotype 1 (CS-T2T.Hap1, 491.11 Mb) and haplotype 2 (CS-T2T.Hap2, 491.90 Mb) ( Table 2 ) with each genome consists of 19 chromosomes ( Fig. 1c ). The scaffold N50 lengths for CS-T2T.Hap1 and CS-T2T.Hap2 were 25.09 Mb and 25.51 Mb, respectively ( Table 2 ). The telomere sequences were identified using Tandem Repeat Finder (TRF) v4.09.1 22 , and the result file was converted into GFF3 format via TRF2GFF ( https://github.com/Adamtaranto/TRF2GFF ) to detect the seven-base repeats. In summary, ‘Cabernet Sauvignon’ assembly can be regarded as having achieved telomere-to-telomere and gap-free completeness. View this table: View inline View popup Download powerpoint Table 2: Assembly statistics for the two haplotypes of ‘Cabernet sauvignon’ Download figure Open in new tab Figure 1. Hapotype-resolved genome assembly of ‘Cabernet Sauvignon’. (a) Genome survey based on K-mer analysis using 1.0. (b) Genomic features of CS genome. Tracks (A) to (G): chromosomes, GC content, gene density, exon density, LTR/Gypsy density, LTR/Copia density. Windows: 50 Kb. Left (green): Haplotype 2. Right (blue):c) Hi-C contact map of CS assembly. TE annotation Before genome annotation, a nonredundant transposable element (TE) library was constructed using the Extensive de novo TE Annotator (EDTA) v2.2.1 23 with parameters ‘--sensitive 0 -- anno 1 --step all’. For haplotype-resolved assembly, approximately 51.80-52.70% of genome was classified as repetitive elements, which included 39.60-40.60% long terminal repeats (LTRs), 4.07-4.10% long interspersed nuclear elements (LINEs), 0.24-0.25% short interspersed nuclear elements (SINEs), and 7.86-7.81% DNA transposon elements, respectively ( Table 3 ). View this table: View inline View popup Download powerpoint Table 3: Statistics of annotated repeat elements in the genome of ‘Cabernet Sauvignon‘ Genome annotation Gene model prediction was performed using a comprehensive pipeline that integrated multiple data sources after masking repetitive elements, including: ab initio predictions, homologous protein comparisons, and RNA-seq evidence. The ab initio gene prediction was implemented by training the GeneMark-ET model with BRAKER2 v2.1.6 24 and further refined it by training the semi-HMM model SNAP 25 via MAKER v3.01.03 26 . The homologous protein sequences were included different published data from Vitis 8 , 10 and UniProt Swiss-Prot database ( https://www.uniprot.org/downloads ). The homologous protein dataset was filtered using CD-HIT v4.8.1 27 with default parameters to remove redundant sequences. The transcriptome data were obtained from published sources and derived from the same cultivar 8 . Transcriptome data were mapped to the haplotype-resolved assembly using HISAT2 v2.2.1 28 , and subsequently processed with StringTie v2.2.1 29 for genome-guided transcript assembly.The trained gene prediction models, homologous protein sequences and transcriptome evidence were integrated into MAKER to predict gene structures. The final gene model was predicted by the MAKER pipeline with protein-coding genes having an AED < 0.5. A total of 36,456 and 35,471 protein-coding genes were depicted for CS-Hap1 and CS-Hap2, respectively ( Table 4 ). View this table: View inline View popup Download powerpoint Table 4: Summary of protein-coding genes functional annotation for the two pes of ‘Cabernet sauvignon’ Functional annotation of protein-coding genes The predicted protein-coding genes were functionally annotated utilizing a robust multi-database approach to ensure comprehensive annotation, including Non-Redundant (NR), Evolutionary Genealogy of Genes: Non-supervised Orthologous Groups (eggNOG), Protein Families Database (Pfam), Swiss-Prot protein database, Kyoto Encyclopedia of Genes and Genomes (KEGG) and Gene Ontology (GO) databases. The eggNOG database was engaged via eggNOG-mapper ( http://eggnog-mapper.embl.de/ ). The non-redundant (NR) and Swiss-Prot protein databases were employed for functional annotation using Diamond BLASTP v2.1.6 30 . A total of 34,142 and 32,648 non-redundant protein-coding genes were annotated using the various databases for CS-T2T.Hap1 and CS-T2T.Hap2, respectively ( Table 4 ). Genome synteny analysis To compare published Vitis genome 8 , 10 with the assembled genome, whole-genome alignments were generated using minimap2 v2.28 18 , followed by sorting and indexing the alignment BAM files with SAMtools v1.20 31 . Genomic synteny and structural variants were identified by SyRI 32 , and the results visualized using Plotsr ( https://github.com/schneebergerlab/plotsr ). Notably, within the two ‘Cabernet Sauvignon’ haplotype genomes, we discovered megabase-scale inversions, which are not prevalent in other haplotype-resolved V. vinifera genomes ( Fig. 2a ). To validate the chromosomal inversions, we compared the two haplotype genomes of ‘Cabernet Sauvignon’ using minimap2 v2.28 18 and visualizing by D-genies 33 . For example, a chromosomal inversion of approximately 8 Mb was detected through comparative analysis of chromosome 19 between CS-T2T.Hap1 and CS-T2T.Hap2 ( Fig. 2b ). Furthermore, chromosomal inversions were validated using Hi-C interaction maps, which confirm a local inversion in this region ( Fig. 2b ). Download figure Open in new tab Figure 2. Genome synteny and structural variants analysis. (a) The collinearity and structural variants among six genomes. nay’. PN: ‘PN40024’. CS: ‘Cabernet Sauvignon’. (b) Example of an inversion event on chromosome 19 in two dated by Hi-C data. Up, dot plot showing genome alignment. Down, Hi-C contact heatmap. Data Records The raw genomic sequencing data and genome assemblies have been deposited in the European Molecular Biology Laboratory-European Bioinformatics Institute (EMBL-EBI) ( https://www.ebi.ac.uk/ ) under the study number of PRJEB94081. The accession numbers of PacBio HiFi and ONT sequencing data are publicly accessible as ERR15313518 and ERR15314151 respectively. The accession numbers of genome assemblies are publicly accessible as ERZ27315251 and ERZ27315252 respectively. The annotation files have been deposited in Zenodo ( https://doi.org/10.5281/zenodo.16300203 ). Technical Validations Genome assembly quality assessment To evaluate the completeness of the ‘Cabernet Sauvignon’ haplotype-resolved T2T gap-free genome, we employed the Benchmarking Universal Single-Copy Orthologous (BUSCO) (v5.4.2) 34 with the eudicots_odb10 database. The evaluation of genome completeness demonstrated BUSCO scores of 98.3% and 98.2% for CS-T2T.Hap1 and CS-T2T.Hap2, respectively ( Table 2 ). The ONT and HiFi reads were aligned to genomes with minimap2v2.28 (−ax map-ont, -ax map-pb) 18 . The mapping rate, mean sequencing depth, and genome coverage were calculated via SAMtools v1.20 31 from BAM files generated by minimap2. The haplotype-resolved genome assemblies showed high mapping ratio: HiFi reads reached 99.99%, and ONT reads achieved 100% mapping to both haplotypes ( Table 2 ), reflecting superior assembly continuity. Long terminal repeats (LTRs) were predicted using LTR_FINDER v1.1 35 , followed by classification and filtering using LTR_retriever v2.9.0 36 to calculate the LTR Assembly Index (LAI), reflecting genome assembly continuity. LAI scores of 16.46 (CS-T2T.Hap1) and 18.25 (CS-T2T.Hap2) indicated high-quality haplotype-resolved assemblies ( Table 2 ). The k-mer-based quality of the haplotype-resolved assemblies was assessed using Meryl v1.3 and Merqury v1.3 37 . The results revealed quality values (QV) of 49.13 for CS-T2T.Hap1 and 49.83 for CS-T2T.Hap2 ( Table 2 ), indicating high accuracy in both haplotypes. Collectively, these results demonstrate the high accuracy and completeness of the T2T haplotype-resolved genome assembly for the grapevine cultivar ‘Cabernet Sauvignon’. Gene annotation assessment Functional validation of annotated genes was conducted through alignment with different public protein databases. For the two haplotype genomes, revealed that the following proportions of proteins were annotated across databases: NR (92.82%), EggNOG (77.38%), Pfam (69.56%), Swiss-Prot (64.78%), KEGG (37.53%), and GO (35.60%). Overall, 93.65%(Hap1) and 92.04% (Hap2) non-redundant genes were mapped to public protein databases, confirming the comprehensive of the gene annotations. Code availability All software used in this work are publicly available, and the software versions and parameters used are described in the Methods section. The parameters not mentioned in the analysis were used as default parameters suggested by the developer. No specific script was developed in this study. Author contributions W.Y., L.G. and Y.H.S. conceived and supervised the project, X.Z. prepared plant materials.F.S.K. and T.S. performed bioinformatics analysis, prepared the tables, figures, uploaded the raw data and wrote the manuscript. X.W., N.K., L.G. and W.Y. revised the manuscript. All authors have read and approved the final version of the manuscript. Competing interests The authors declare no competing interests. Acknowledgements This research was supported by the Key Research and Development Program of Shandong Province (2024LZGC033, 2024CXPT031), and Shandong Provincial Natural Science Foundation (SYS202206), Taishan Scholars Program of Shandong Province and Weifang Key Laboratory of Grapevine Improvement and Utilization, China. Funder Information Declared Key Research and Development Program of Shandong Province , 2024LZGC033 , 2024CXPT031 Shandong Provincial Natural Science Foundation , SYS202206 Taishan Scholars Program of Shandong Province Weifang Key Laboratory of Grapevine Improvement and Utilization Footnotes https://doi.org/10.5281/zenodo.16300203 https://www.ebi.ac.uk/ena/browser/view/PRJEB94081 References 1. ↵ Zhou , Y. , Massonnet , M. , Sanjak , J.S. , Cantu , D. & Gaut , B.S. Evolutionary genomics of grape (Vitis vinifera ssp. vinifera) domestication . Proceedings of the National Academy of Sciences of the United States of America 114 , 11715 – 11720 ( 2017 ). OpenUrl Abstract / FREE Full Text 2. ↵ Ren , Y. et al. Assessment of ‘Cabernet Sauvignon’ Grape Quality Half-Véraison to Maturity for Grapevines Grown in Different Regions . International Journal of Molecular Sciences 24 , 4670 ( 2023 ). OpenUrl PubMed 3. ↵ Bowers , J.E. & Meredith , C.P. The parentage of a classic wine grape, Cabernet Sauvignon . Nature Genetics 16 , 84 – 7 ( 1997 ). OpenUrl CrossRef PubMed Web of Science 4. ↵ Jaillon , O. et al. The grapevine genome sequence suggests ancestral hexaploidization in major angiosperm phyla . Nature 449 , 463 – 7 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 5. ↵ Velt , A. et al. An improved reference of the grapevine genome reasserts the origin of the PN40024 highly homozygous genotype . G3 Genes|Genomes|Genetics 13 , jkad067 ( 2023 ). OpenUrl 6. ↵ Cochetel , N. et al. A super-pangenome of the North American wild grape species . Genome Biology 24 , 290 ( 2023 ). OpenUrl CrossRef PubMed 7. Wang , Y. , Ding , K. , Li , H. , Kuang , Y. & Liang , Z. Biography of Vitis genomics: recent advances and prospective . Horticulture Research 11 , uhae128 ( 2024 ). OpenUrl 8. ↵ Guo , L. et al. Super pangenome of Vitis empowers identification of downy mildew resistance genes for grapevine improvement . Nature Genetics 57 , 741 – 753 ( 2025 ). OpenUrl CrossRef PubMed 9. ↵ Liu , Z. et al. Grapevine pangenome facilitates trait genetics and genomic breeding . Nature Genetics 56 , 2804 – 2814 ( 2024 ). OpenUrl CrossRef PubMed 10. ↵ Shi , X. et al. The complete reference genome for grapevine (Vitis vinifera L.) genetics and breeding . Horticulture Research 10 , uhad061 ( 2023 ). OpenUrl PubMed 11. ↵ Wang , X. et al. Telomere-to-telomere and gap-free genome assembly of a susceptible grapevine species (Thompson Seedless) to facilitate grape functional genomics . Horticulture Research 11 , uhad260 ( 2023 ). OpenUrl PubMed 12. ↵ Chin , C.S. et al. Phased diploid genome assembly with single-molecule real-time sequencing . Nature Methods 13 , 1050 – 1054 ( 2016 ). OpenUrl CrossRef PubMed 13. ↵ Massonnet , M. et al. The genetic basis of sex determination in grapes . Nature Communications 11 , 2902 ( 2020 ). OpenUrl PubMed 14. ↵ Porebski , S. , Bailey , L.G. & Baum , B.R. Modification of a CTAB DNA extraction protocol for plants containing high polysaccharide and polyphenol components . Plant Molecular Biology Reporter 15 , 8 – 15 ( 1997 ). OpenUrl CrossRef Web of Science 15. ↵ Marçais , G. & Kingsford , C. A fast, lock-free approach for efficient parallel counting of occurrences of k-mers . Bioinformatics 27 , 764 – 70 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 16. ↵ Vurture , G.W. et al. GenomeScope: fast reference-free genome profiling from short reads . Bioinformatics 33 , 2202 – 2204 ( 2017 ). OpenUrl CrossRef PubMed 17. ↵ Cheng , H. , Concepcion , G.T. , Feng , X. , Zhang , H. & Li , H. Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm . Nature Methods 18 , 170 – 175 ( 2021 ). OpenUrl CrossRef PubMed 18. ↵ Li , H. Minimap2: pairwise alignment for nucleotide sequences . Bioinformatics 34 , 3094 – 3100 ( 2018 ). OpenUrl CrossRef PubMed 19. ↵ Durand , N.C. et al. Juicer Provides a One-Click System for Analyzing Loop-Resolution Hi-C Experiments . Cell systems 3 , 95 – 8 ( 2016 ). OpenUrl CrossRef PubMed 20. ↵ Dudchenko , O. et al. De novo assembly of the Aedes aegypti genome using Hi-C yields chromosome-length scaffolds . Science 356 , 92 – 95 ( 2017 ). OpenUrl Abstract / FREE Full Text 21. ↵ Durand , N.C. et al. Juicebox Provides a Visualization System for Hi-C Contact Maps with Unlimited Zoom . Cell systems 3 , 99 – 101 ( 2016 ). OpenUrl CrossRef PubMed 22. ↵ Benson , G. Tandem repeats finder: a program to analyze DNA sequences . Nucleic Acids Research 27 , 573 – 80 ( 1999 ). OpenUrl CrossRef PubMed Web of Science 23. ↵ Ou , S. et al. Benchmarking transposable element annotation methods for creation of a streamlined, comprehensive pipeline . Genome Biology 20 , 275 ( 2019 ). OpenUrl CrossRef PubMed 24. ↵ Brůna , T. , Hoff , K.J. , Lomsadze , A. , Stanke , M. & Borodovsky , M. BRAKER2: automatic eukaryotic genome annotation with GeneMark-EP+ and AUGUSTUS supported by a protein database . NAR Genomics and Bioinformatics 3 , qaa108 ( 2021 ). 25. ↵ Johnson , A.D. et al. SNAP: a web-based tool for identification and annotation of proxy SNPs using HapMap . Bioinformatics 24 , 2938 – 9 ( 2008 ). OpenUrl CrossRef PubMed Web of Science 26. ↵ Cantarel , B.L. et al. MAKER: an easy-to-use annotation pipeline designed for emerging model organism genomes . Genome Research 18 , 188 – 96 ( 2008 ). OpenUrl Abstract / FREE Full Text 27. ↵ Fu , L. , Niu , B. , Zhu , Z. , Wu , S. & Li , W. CD-HIT: accelerated for clustering the next-generation sequencing data . Bioinformatics 28 , 3150 – 2 ( 2012 ). OpenUrl CrossRef PubMed Web of Science 28. ↵ Kim , D. , Langmead , B. & Salzberg , S.L. HISAT: a fast spliced aligner with low memory requirements . Nature Methods 12 , 357 – 60 ( 2015 ). OpenUrl CrossRef PubMed 29. ↵ Pertea , M. et al. StringTie enables improved reconstruction of a transcriptome from RNA-seq reads . Nature Biotechnology 33 , 290 – 5 ( 2015 ). OpenUrl CrossRef PubMed 30. ↵ Buchfink , B. , Reuter , K. & Drost , H.G. Sensitive protein alignments at tree-of-life scale using DIAMOND . Nature Methods 18 , 366 – 368 ( 2021 ). OpenUrl CrossRef PubMed 31. ↵ Li , H. et al. The Sequence Alignment/Map format and SAMtools . Bioinformatics 25 , 2078 – 9 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 32. ↵ Goel , M. , Sun , H. , Jiao , W.B. & Schneeberger , K. SyRI: finding genomic rearrangements and local sequence differences from whole-genome assemblies . Genome Biology 20 , 277 ( 2019 ). OpenUrl CrossRef PubMed 33. ↵ Cabanettes , F. & Klopp , C. D-GENIES: dot plot large genomes in an interactive, efficient and simple way . Peer Journal 6 , e4958 ( 2018 ). OpenUrl 34. ↵ Manni , M. , Berkeley , M.R. , Seppey , M. & Zdobnov , E.M. BUSCO: Assessing Genomic Data Quality and Beyond . Current Protocols in Bioinformatics 1 , e323 ( 2021 ). OpenUrl 35. ↵ Xu , Z. & Wang , H. LTR_FINDER: an efficient tool for the prediction of full-length LTR retrotransposons . Nucleic Acids Research 35 , W265 – 8 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 36. ↵ Ou , S. , Chen , J. & Jiang , N. Assessing genome assembly quality using the LTR Assembly Index (LAI) . Nucleic Acids Research 46 , e126 ( 2018 ). OpenUrl PubMed 37. ↵ Rhie , A. , Walenz , B.P. , Koren , S. & Phillippy , A.M. Merqury: reference-free quality, completeness, and phasing assessment for genome assemblies . Genome Biology 21 , 245 ( 2020 ). OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted July 27, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Haplotype-resolved T2T gap-free genomes of the winegrape cultivar ‘Cabernet Sauvignon’ Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Haplotype-resolved T2T gap-free genomes of the winegrape cultivar ‘Cabernet Sauvignon’ Falak Sher Khan , Tiepeng Sun , Xiangfeng Wang , Xuenan Zhang , Naomi Abe-Kanoh , Ying Hua Su , Li Guo , Wenxiu Ye bioRxiv 2025.07.26.666927; doi: https://doi.org/10.1101/2025.07.26.666927 Share This Article: Copy Citation Tools Haplotype-resolved T2T gap-free genomes of the winegrape cultivar ‘Cabernet Sauvignon’ Falak Sher Khan , Tiepeng Sun , Xiangfeng Wang , Xuenan Zhang , Naomi Abe-Kanoh , Ying Hua Su , Li Guo , Wenxiu Ye bioRxiv 2025.07.26.666927; doi: https://doi.org/10.1101/2025.07.26.666927 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genomics Subject Areas All Articles Animal Behavior and Cognition (7617) Biochemistry (17633) Bioengineering (13856) Bioinformatics (41841) Biophysics (21399) Cancer Biology (18529) Cell Biology (25422) Clinical Trials (138) Developmental Biology (13352) Ecology (19860) Epidemiology (2067) Evolutionary Biology (24281) Genetics (15582) Genomics (22461) Immunology (17700) Microbiology (40295) Molecular Biology (17140) Neuroscience (88413) Paleontology (666) Pathology (2823) Pharmacology and Toxicology (4813) Physiology (7632) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4284) Systems Biology (9808) Zoology (2267)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-24T02:00:01.246996+00:00

License: CC-BY-NC-ND-4.0