Haplotype-resolved genome assembly of the tetraploid potato cultivar Desiree

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 36,717 characters · extracted from preprint-html · click to expand
Haplotype-resolved genome assembly of the tetraploid potato cultivar Désirée | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Haplotype-resolved genome assembly of the tetraploid potato cultivar Désirée View ORCID Profile Tim Godec , Sebastian Beier , Natalia Yaneth Rodriguez-Granados , Rashmi Sasidharan , Lamis Abdelhakim , Markus Teige , Björn Usadel , Kristina Gruden , View ORCID Profile Marko Petek doi: https://doi.org/10.1101/2025.01.14.631659 Tim Godec 1 National Institute of Biology, Department of Biotechnology and Systems Biology , Ljubljana, Slovenia 2 Jožef Stefan International Postgraduate School , Ljubljana, Slovenia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tim Godec For correspondence: tim.godec{at}nib.si Sebastian Beier 3 Institute of Bio- and Geosciences (IBG-4 Bioinformatics), Bioeconomy Science Center (BioSC), CEPLAS, Forschungszentrum Jülich GmbH , Jülich, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Natalia Yaneth Rodriguez-Granados 4 Plant Stress Resilience, Institute of Environmental Biology, Utrecht University , Utrecht, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rashmi Sasidharan 4 Plant Stress Resilience, Institute of Environmental Biology, Utrecht University , Utrecht, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lamis Abdelhakim 5 PSI (Photon Systems Instruments) , Drásov, Czech Republic Find this author on Google Scholar Find this author on PubMed Search for this author on this site Markus Teige 6 Molecular Systems Biology (MOSYS), Department of Functional and Evolutionary Ecology, University Vienna , Vienna, Austria Find this author on Google Scholar Find this author on PubMed Search for this author on this site Björn Usadel 3 Institute of Bio- and Geosciences (IBG-4 Bioinformatics), Bioeconomy Science Center (BioSC), CEPLAS, Forschungszentrum Jülich GmbH , Jülich, Germany 7 Faculty of Mathematics and Natural Sciences, Institute for Biological Data Science, Cluster of Excellence on Plant Sciences (CEPLAS), Heinrich Heine University Düsseldorf , Düsseldorf, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kristina Gruden 1 National Institute of Biology, Department of Biotechnology and Systems Biology , Ljubljana, Slovenia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Marko Petek 1 National Institute of Biology, Department of Biotechnology and Systems Biology , Ljubljana, Slovenia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marko Petek Abstract Full Text Info/History Metrics Preview PDF Abstract Cultivar Désirée is an important model for potato functional genomics studies to assist breeding strategies. Here, we present a haplotype-resolved genome assembly of Désirée, achieved by assembling PacBio HiFi reads and Hi-C scaffolding, resulting in a high-contiguity chromosome-level assembly. We implemented a comprehensive annotation pipeline incorporating gene models and functional annotations from the Solanum tuberosum Phureja DM reference genome alongside RNA-seq reads to provide high-quality gene and transcript annotations. Additionally, we provide a genome-wide DNA methylation profile using Oxford Nanopore reads, enabling insights into potato epigenetics. The assembled genome, annotations, methylation and expression data are visualised in a publicly accessible genome browser ( https://desiree.nib.si ), providing a valuable resource for the potato research community. Background & Summary Potato ( Solanum tuberosum ) is one of the most important and widely cultivated crops worldwide, with a significant role in global food security and agricultural research. Despite its significance, many studies still rely on the genome of the double monoploid (DM) clone of group Phureja DM1–3 516 R44 1 , 2 which lacks a substantial portion of the gene repertoire and variability found in cultivated tetraploid potato varieties. The potato cultivar Désirée is a red-skinned late-season potato variety, originally bred in the Netherlands in 1962 by crossing parent cultivars Urgenta and Depesche (Potato Pedigree Database) 3 . It is still cultivated due to its favourable agronomic traits, such as predictable yields and high tolerance to drought and some pathogens 4 . It has also been used in breeding programs, yet a genome assembly for the Désirée cultivar has not been available. In research, it has been propagated in tissue cultures, and used for genetic manipulation including gene overexpression 5 , gene silencing 6 , and Crispr-Cas gene editing 7 . Although haplotype-resolved genome assemblies are becoming common in diploid organisms, the high heterozygosity rate, extensive repeat content, and the autopolyploid nature of cultivated potatoes still present significant challenges for generating high-quality haplotype-resolved assemblies. Currently, five haplotype-resolved genomes of autotetraploid potato cultivars are publicly available 8 – 12 as well as several phased diploid genomes 13 – 15 . The recently published haplotype-resolved tetraploid potato assemblies rely on labour-intensive techniques such as single-pollen sequencing 10 or the use of parental and crossing material 11 , which may not always be available. Adding to existing publicly available genomes, we provide a reference quality (CRAQ overall AQI of 97.5) haplotype-resolved genome assembly of the tetraploid cultivar Désirée, assembled using solely PacBio HiFi and Illumina Hi-C data. Our assembly is accompanied by a comprehensive structural and functional gene annotation reaching 99.4 % BUSCO completeness for Solanaceae, accompanied by orthology to DM genes. For the potato research community, we provide an online resource featuring a genome browser and downloadable genomic assembly and annotation files, providing a valuable tool for studies involving allele-specific expression or promoter analysis. Methods Sample preparation and sequencing Leaves from 4-week old S. tuberosum cv. Désirée plants were collected and flash-frozen. High molecular weight genomic DNA (HMW gDNA) used for PacBio HiFi, Illumina and Oxford Nanopore Technologies (ONT) sequencing was extracted from the leaf tissues using a modified CTAB method 16 . The concentration and quality of the extracted DNA were assessed using a NanoDrop spectrophotometer. PacBio HiFi HMW gDNA was sent to National Genomics Infrastructure (NGI) Sweden for library preparation and sequencing on the PacBio Sequel II platform. We obtained 79.4 Gbp of raw data, consisting of 4.1 million reads. Illumina Hi-C Leaves from 4-week old S. tuberosum cv. Désirée plants were collected, flash-frozen in liquid nitrogen and ground using mortar and pestle. Hi-C library prep using the Omni-C kit (Dovetail Genomics) and sequencing were performed on an Illumina NovaSeq 6000 platform by NGI Sweden. Sequencing generated 2018.4 million paired-end (2 × 150 bp) reads. ONT The HMW gDNA was used for ONT DNA library prep using the SQK-LSK110 kit and sequenced on a MinION using the FLO-MIN106 flow cell. Reads were basecalled using Dorado (v0.7.2) with the model [email protected] which generated 5.8 Gbp. The reads with methylation-related tags were converted to bedMethyl format using modkit (v0.4.1). Illumina short reads Illumina short-read library was constructed from the HMW gDNA and sequenced on Illumina NextSeq 2000 by ELIXIR Slovenia node to generate 150 bp paired-end reads. The short-read sequencing generated approximately 138 Gbp of raw data, consisting of 460.1 million paired-end (2 × 150 bp) reads. Genome size and heterozygosity estimation The genome characteristics of S. tuberosum cv. Désirée, including genome size, heterozygosity, and repeat content, were estimated using Illumina short-read data and a k-mer based approach. A 21-mer frequency distribution was generated with Jellyfish (v2.2.10), and the genome’s key features were inferred using GenomeScope2 (v2.0). The haploid genome size was estimated at 669.6 Mbp, with a heterozygosity rate estimated at 3.8–5.7%. De novo genome assembly, Hi-C scaffolding and quality assessment PacBio HiFi and Illumina Hi-C reads were initially assembled into four sets of haplotype-resolved contigs using Hifiasm (v0.19.8-r603) 17 – 19 . Hifiasm primary unitigs were searched against DM genome assembly with blastn (v2.5.0) 20 and best matches were visualised on Graphical Fragment Assembly with Bandage (v0.8.1, Fig. 1a ) 21 . We performed quality control of the contigs using Merqury (v1.3, Fig. 1b ) 22 k-mer spectra and BUSCO completeness scores (v5.4.7, solanales_odb10 dataset) 23 . The length of haplotype draft assemblies ranged from 761.6 Mbp to 888.4 Mbp with contig N50 sizes ranging from 7.0 Mbp to 13.7 Mbp ( Table 1 ). Download figure Open in new tab Fig. 1 General characteristics of Désirée genome assembly a) Assembly graph of primary unitigs coloured by best match to DM chromosomes (also designated with numbers on the graph). b) Merqury k-mer spectra for initial contigs and scaffolded chromosomes. The k = 21 was used. K-mers are categorized as read-only (grey), unique (red), and shared (blue, green, purple, orange). Peaks corresponding to higher multiplicities indicate the presence of highly repeated k-mers. c) Dot plot comparing cv. Désirée chromosome-anchored contigs with DM v8.1 chromosomes. The colour designates contig identity. d) Genomic synteny of cv. Désirée haplotype-resolved assembly. View this table: View inline View popup Download powerpoint Table 1. Summary of the four haplotypes of the Désirée genome assembly. Contigs identified as contaminants were removed based on blastn (v0.8.1) searches against a custom-built contaminant database, which includes Solanum plastid and mitochondrial sequences and bacterial NCBI RefSeq sequences. Decontaminated scaffolds were anchored to chromosomes by mapping Hi-C reads to each haplotype set separately following the manufacturer’s recommended pipeline for Omni-C data ( https://omni-c.readthedocs.io ). Briefly, Hi-C reads were mapped using BWA-MEM (v0.7.17-r1188) 24 then the mappings were parsed with pairtools (v0.3.0) 25 followed by samtools (v1.3.1) 26 to identify and extract valid pairs. Valid pairs were used to anchor and orient scaffolds into chromosomes using YaHS (v1.2a.1) 27 and Juicebox Assembly Tools (v2.17.00) 28 , 29 . Chromosomes 11 and 12 of haplotype 4 lacked ∼20 Mbp and ∼30 Mbp part of the pericentromeric region, respectively, and haplotype 1 contained two additional unplaced scaffolds (scaffold_22 and scaffold_23). Alignment of these scaffolds to reference genome (DM v6.1) and inspection of Hi-C contacts suggested that these scaffolds are the missing regions of chromosomes 11 and 12 in haplotype 4. Therefore, we remapped Hi-C reads and incorporated these two scaffolds in haplotype 4 using Juicebox Assembly Tools (v2.17.00). The final scaffolded assembly size amounts to 3.3 Gbp, with individual haplotypes ranging between 762 and 888 Mb. As expected, one haplotype is highly similar to the DM haplotype, whereas other haplotypes can be more dissimilar ( Fig. 1c ). A comparison of Merqury k-mer spectra between the initial contigs and the scaffolded chromosomes ( Fig. 1a ) reveals that many apparent duplications in the contigs are resolved during scaffolding. A small proportion of sequences remains missing from the chromosomes and those can be found in the whole genome FASTA. The haplotype assemblies were sequentially aligned using minimap2 (v2.28) and analyzed with SyRi (1.7.0) to identify syntenic regions and structural rearrangements which were visualized using plotsr (v1.1.1, Fig. 1d ). Genome annotation Repeat elements in the S. tuberosum cv. Désirée genome were identified using the Extensive de novo TE Annotator (EDTA, v2.2.1) 30 . Repetitive sequences cover 489 - 534 Mbp per haplotype, representing more than 70% of the genome ( Table 2 ). View this table: View inline View popup Download powerpoint Table 2. Summary of genome annotations for each haplotype. The prediction of protein-coding genes in the assembled S. tuberosum cv. Désirée was determined using five complementary approaches: de novo , homology-based, transcriptome-based, deep-learning, and reference-based predictions ( Fig. 2 ). Download figure Open in new tab Fig. 2 Workflow overview of S. tuberosum cv. Désirée genome annotation. For transcriptome-based prediction, two methods were applied for short reads and Iso-Seq reads, respectively. Short reads from multiple tissues were aligned to each haplotype using STAR (2.7.10a) 31 , and transcripts were assembled with StringTie2 (v2.2.1) 32 , followed by Portcullis (v1.2.4) 33 for junction validation. Iso-Seq reads from five S. tuberosum cultivars were mapped to both haplotypes using minimap2 (v2.28) 34 , and transcripts were generated using IsoQuant (v3.3.1) 35 and TAMA Collapse (tc_version_date_2023_03_28) 36 . BRAKER3 (v3.0.8) 37 was used in ETP mode to predict gene models by integrating de novo , homology-based, and transcriptome-based predictions. Repeat masking of the assembly was performed with RepeatMasker (v4.1.2), using EDTA annotations. Protein sequences from OrthoDB (green plant orthologs) were provided as evidence, and short-read STAR alignments with invalid junctions removed were included. Helixer (v0.3.3) 38 , 39 was used for deep-learning-based gene prediction via its web interface ( https://www.plabipd.de/helixer_main.html ). Gene models from the S. tuberosum reference genome (DM v6.1, UniTato annotation) were transferred to the Désirée assembly using Liftoff (v1.6.3) 40 . All five transcript or gene model sets were consolidated using Mikado (v2.3.4) 41 to generate a non-redundant set of transcripts. Protein-coding gene completeness was assessed using BUSCO ( Table 2 , v5.4.7, solanales_odb10 dataset) and OMArk (v0.3.0, omamer v2.0.2) 42 . The predicted protein-coding genes were functionally annotated using EggNOG Mapper (v2.1.11) 43 with the EggNOG database (version 5.0.2) 44 for the Viridiplantae subset. This included categories such as gene names, Gene Ontologies (GOs), enzyme functions (EC), and KEGG pathways, reactions, and modules, along with CAZy families, PFAM domains, and more. Additionally, functional land-plant protein annotations were predicted using Mercator4 (v7) 45 via the web platform ( https://www.plabipd.de/mercator_main.html ). Annotations from EggNOG and Mercator4 were combined into the final GFF3 annotation file. Orthologous groups between haplotypes and UniTato genes were identified using OrthoFinder (v2.5.5) 46 . Across haplotypes, 55.3% of orthogroups contained genes from all four haplotypes, 22.9% from three haplotypes, 19.2% from two haplotypes, and 2.7% from a single haplotype. When comparing the Désirée annotation to UniTato, 17.24% of genes were specific to the Désirée annotation. Data Records The raw sequencing data, including Illumina Hi-C, Illumina paired-end, PacBio HiFi, and ONT reads, have been deposited at the National Center for Biotechnology Information (NCBI) Sequence Read Archive (SRA) under BioProject number PRJNA1185028. Plastid, mitochondrial and bacterial sequences used for removal of contaminant contigs were downloaded from NCBI RefSeq release 218. Transcriptomic data used for gene annotation was downloaded from public repositories: SRA under accessions PRJNA1192223, PRJNA1186376, PRJNA718240, PRJNA803222, PRJNA1209787 and PRJNA1191209; the Gene Expression Omnibus (GEO) under accession GSE232028; and the National Genomics Data Center (NGDC) under accession CRA006012. Existing gene models used in the gene annotation pipeline were downloaded from https://unitato.nib.si and https://spuddb.uga.edu . The genome assemblies of the four haplotypes have been submitted to NCBI GenBank under the BioProject accessions PRJNA1196677, PRJNA1196678, PRJNA1196679 and PRJNA1196680. The assembled genome, including annotations, methylation profile and identified orthologs, is hosted in a Zenodo repository under DOI: 10.5281/zenodo.14609304 and is also accessible via an interactive genome browser at https://desiree.nib.si . Technical Validation We assessed the assembly quality and completeness using DNA sequencing read mapping, CRAQ, BUSCO analysis, and Merqury k-mer based evaluation. Illumina reads were mapped with BWA (v0.7.17), while PacBio and ONT reads were aligned using minimap2 (v2.28). Mapping rates were 99.90%, 100.00%, and 99.74% for Illumina paired-end, PacBio, and ONT reads, respectively. CRAQ (v1.0.9) 47 analysis of PacBio and Illumina mappings yielded a regional AQI of 96.3 and an overall AQI of 97.5, classifying the assembly as reference quality (AQI > 90). Assembly completeness was assessed with BUSCO (v5.4.7) using the solanales_odb10 lineage database, identifying 5930 (99.6%) of the 5950 BUSCO orthologous groups in both the whole genome and chromosome-only assemblies ( Table 1 ). Merqury (v1.3) analysis, using a Meryl (v1.3) database constructed from Illumina reads, estimated genome completeness at 98.57% for the whole genome and 95.73% for the chromosomes. The estimated QV values were 54.30 and 58.53 for the whole genome and chromosomes, respectively. Completeness of gene annotation was assessed using OMArk (v0.3.0, omamer v2.0.2), BUSCO (v5.4.7) and Mercator4 (v7). OMArk analysis demonstrated that our annotation captured 94.1%-94.6% of Hierarchical Orthologous Groups (HOGs) per haplotype, with duplication rates ranging from 11.5% to 11.9% ( Fig. 3a ). When combining genes from all haplotypes, the proportion of complete HOGs reaches 99.3%, meaning that not all conserved genes are present in all haplotypes. Similarly, BUSCO analysis reported a haplotype completeness range of 93.3%–95.4% ( Table 2 ), while the whole genome annotation achieved 99.4% completeness. Protein classification via Mercator4 revealed that 93.9%–94.6% of Mercator bins were occupied per haplotype, increasing to 97.5% when combining all proteins ( Table 2 ). As expected, the Mercator bin with the largest proportion of missing proteins was associated with clade-specific metabolism ( Fig. 3b ). Additionally, the classified proteins showed no significant deviation from the median protein length, confirming consistency in annotation quality ( Fig. 3c ). Download figure Open in new tab Fig. 3 Validation of gene annotation. a) OMArk quality assessment showing consistency, completeness and count of proteins across all four haplotypes. b) Histogram showing the percentage of Mercator4 functional bins occupied by the Désirée proteins. c) Histogram displaying the distribution of proteins grouped by their percentage deviation from the median protein length. Usage Notes The presented Désirée genome assembly is of high contiguity, completeness and phasing quality and presents a valuable resource for haplotype-aware transcriptomics, proteomics and epigenomics analyses. The transfer of UniTato annotations 48 provides translation of gene identifiers from the DM to the Désirée genome. The RNA-seq datasets used to supplement gene model annotation are predominantly from mature leaf and root tissue, thus genes specifically expressed in other tissue and developmental stages may not be fully captured in the current annotation. The genome was produced from a plant propagated in tissue culture for over a decade. A recent pangenome study 49 found that in vitro propagated plants of the Solanum section Petota have greater numbers of TEs in their genomes. While this seems to hold for LTR elements and DNA transposons in the Désirée genome, overall TE expansion is not evident. Examining the DNA methylation profile available in the Désirée genome browser might provide more insight into specific transposable element expansion in this cultivar. Recently, efforts were made to generate potato pangenomes 9 , 49 . However, the number of included phased tetraploid genomes is still limited. Including Désirée and more phased tetraploid genomes will improve the completeness of potato pangenome. This will bridge knowledge gaps in potato genomics and give potato breeders a powerful toolkit for developing more resilient and productive cultivars. Code Availability The code, scripts and command-line tool commands used for genome assembly, annotation and quality control are freely available in the GitHub repository https://github.com/NIB-SI/desiree-genome . Author contributions TG : Methodology, Data curation, Investigation, Visualization, Writing - Original Draft. SB : Investigation, Writing - Review & Editing. BU : Writing - Review & Editing. NYRG : Resources, Writing - Review & Editing. RS : Resources, Writing - Review & Editing. LA : Resources, Writing - Review & Editing. MT : Funding acquisition, Writing - Review & Editing. KG : Funding acquisition, Conceptualization, Writing - Review & Editing. MP : Conceptualization, Validation, Resources, Supervision, Project administration, Writing - Review & Editing. Competing interests The author(s) declare no competing interests. Acknowledgement This work benefits from resources and services provided by ELIXIR, a distributed infrastructure for life science data, funded by national governments and the European Commission, particularly the Elixir-SI node for performing Illumina paired-end sequencing. Funding for this work was provided by the European Union’s Horizon 2020 research and innovation programme project ADAPT (grant agreement No GA 2020 862-858), Slovenian Research and Innovation Agency (ARIS) project grants P4-0165, P4-0431, and J4-3089. SB and BU are supported by the German Federal Ministry of Education and Research (BMBF) in the frame of the German Network for Bioinformatics Infrastructure (de.NBI). References 1. ↵ Yang , X. et al. The gap-free potato genome assembly reveals large tandem gene clusters of agronomical importance in highly repeated genomic regions . Molecular Plant 16 , 314 – 317 ( 2023 ). OpenUrl CrossRef PubMed 2. ↵ Pham , G. M. et al. Construction of a chromosome-scale long-read reference genome assembly for potato . GigaScience 9 , giaa100 ( 2020 ). OpenUrl CrossRef PubMed 3. ↵ van Berloo , R. , Hutten , R. C. B. , van Eck , H. J. & Visser , R. G. F. An Online Potato Pedigree Database Resource . Potato Res . 50 , 45 – 57 ( 2007 ). OpenUrl CrossRef 4. ↵ The European Cultivated Potato Database . https://www.europotato.org/varieties/view/Desiree-E . 5. ↵ Tomaž , Š. et al. A mini-TGA protein modulates gene expression through heterogeneous association with transcription factors . Plant Physiology 191 , 1934 – 1952 ( 2023 ). OpenUrl CrossRef PubMed 6. ↵ Halim , V. A. et al. PAMP-induced defense responses in potato require both salicylic acid and jasmonic acid . The Plant Journal 57 , 230 – 242 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 7. ↵ Lukan , T. et al. CRISPR/Cas9-mediated fine-tuning of miRNA expression in tetraploid potato . Horticulture Research 9 , uhac147 ( 2022 ). OpenUrl CrossRef 8. ↵ Bao , Z. et al. Genome architecture and tetrasomic inheritance of autotetraploid potato . Molecular Plant 15 , 1211 – 1226 ( 2022 ). OpenUrl CrossRef PubMed 9. ↵ Hoopes , G. et al. Phased, chromosome-scale genome assemblies of tetraploid potato reveal a complex genome, transcriptome, and predicted proteome landscape underpinning genetic diversity . Molecular Plant 15 , 520 – 536 ( 2022 ). OpenUrl CrossRef PubMed 10. ↵ Sun , H. et al. Chromosome-scale and haplotype-resolved genome assembly of a tetraploid potato cultivar . Nat Genet 54 , 342 – 348 ( 2022 ). OpenUrl CrossRef PubMed 11. ↵ Serra Mari , R. et al. Haplotype-resolved assembly of a tetraploid potato genome using long reads and low-depth offspring data . Genome Biology 25 , 26 ( 2024 ). OpenUrl CrossRef PubMed 12. ↵ Reyes-Herrera , P. H. et al. Chromosome-scale genome assembly and annotation of the tetraploid potato cultivar Diacol Capiro adapted to the Andean region . G3 Genes|Genomes|Genetics 14 , jkae139 ( 2024 ). OpenUrl CrossRef 13. ↵ Freire , R. et al. Chromosome-scale reference genome assembly of a diploid potato clone derived from an elite variety . G3 Genes|Genomes|Genetics 11 , jkab330 ( 2021 ). OpenUrl CrossRef 14. van Lieshout , N. et al. Solyntus, the New Highly Contiguous Reference Genome for Potato (Solanum tuberosum) . G3 Genes|Genomes|Genetics 10 , 3489 – 3495 ( 2020 ). OpenUrl CrossRef 15. ↵ Zhou , Q. et al. Haplotype-resolved genome analyses of a heterozygous diploid potato . Nat Genet 52 , 1018 – 1023 ( 2020 ). OpenUrl CrossRef PubMed 16. ↵ Doyle , J. DNA extraction by using DTAB-CTAB procedures . Phytochemical Bulletin 19 , 11 – 17 ( 1987 ). OpenUrl 17. ↵ Cheng , H. , Concepcion , G. T. , Feng , X. , Zhang , H. & Li , H. Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm . Nat Methods 18 , 170 – 175 ( 2021 ). OpenUrl CrossRef PubMed 18. Cheng , H. et al. Haplotype-resolved assembly of diploid genomes without parental data . Nat Biotechnol 40 , 1332 – 1335 ( 2022 ). OpenUrl CrossRef PubMed 19. ↵ Cheng , H. , Asri , M. , Lucas , J. , Koren , S. & Li , H. Scalable telomere-to-telomere assembly for diploid and polyploid genomes with double graph . Nat Methods 21 , 967 – 970 ( 2024 ). OpenUrl CrossRef PubMed 20. ↵ Camacho , C. et al. BLAST+: architecture and applications . BMC Bioinformatics 10 , 421 ( 2009 ). OpenUrl CrossRef PubMed 21. ↵ Wick , R. R. , Schultz , M. B. , Zobel , J. & Holt , K. E. Bandage: interactive visualization of de novo genome assemblies . Bioinformatics 31 , 3350 – 3352 ( 2015 ). OpenUrl CrossRef PubMed 22. ↵ Rhie , A. , Walenz , B. P. , Koren , S. & Phillippy , A. M. Merqury: reference-free quality, completeness, and phasing assessment for genome assemblies . Genome Biology 21 , 245 ( 2020 ). OpenUrl CrossRef PubMed 23. ↵ Manni , M. , Berkeley , M. R. , Seppey , M. , Simão , F. A. & Zdobnov , E. M. BUSCO Update: Novel and Streamlined Workflows along with Broader and Deeper Phylogenetic Coverage for Scoring of Eukaryotic, Prokaryotic, and Viral Genomes . Molecular Biology and Evolution 38 , 4647 – 4654 ( 2021 ). OpenUrl CrossRef PubMed 24. ↵ Li , H. Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM . arxiv: 1303.3997 [q-bio] ( 2013 ). 25. ↵ Open2C et al. Pairtools: From sequencing data to chromosome contacts . PLOS Computational Biology 20 , e1012164 ( 2024 ). OpenUrl CrossRef 26. ↵ Danecek , P. et al. Twelve years of SAMtools and BCFtools . GigaScience 10 , giab008 ( 2021 ). OpenUrl CrossRef PubMed 27. ↵ Zhou , C. , McCarthy , S. A. & Durbin , R. YaHS: yet another Hi-C scaffolding tool . Bioinformatics 39 , btac808 ( 2023 ). OpenUrl CrossRef PubMed 28. ↵ Dudchenko , O. et al. The Juicebox Assembly Tools module facilitates de novo assembly of mammalian genomes with chromosome-length scaffolds for under $1000. 254797 Preprint at doi: 10.1101/254797 ( 2018 ). OpenUrl Abstract / FREE Full Text 29. ↵ Juicebox Provides a Visualization System for Hi-C Contact Maps with Unlimited Zoom . Cell Systems 3 , 99 – 101 ( 2016 ). OpenUrl CrossRef PubMed 30. ↵ Ou , S. et al. Benchmarking transposable element annotation methods for creation of a streamlined, comprehensive pipeline . Genome Biology 20 , 275 ( 2019 ). OpenUrl CrossRef PubMed 31. ↵ Dobin , A. et al. STAR: ultrafast universal RNA-seq aligner . Bioinformatics 29 , 15 – 21 ( 2013 ). OpenUrl CrossRef PubMed Web of Science 32. ↵ Shumate , A. , Wong , B. , Pertea , G. & Pertea , M. Improved transcriptome assembly using a hybrid of long and short reads with StringTie . PLOS Computational Biology 18 , e1009730 ( 2022 ). OpenUrl CrossRef 33. ↵ Mapleson , D. , Venturini , L. & Swarbreck , D. EI-CoreBioinformatics/portcullis . EI-CoreBioinformatics ( 2024 ). 34. ↵ Li , H. New strategies to improve minimap2 alignment accuracy . Bioinformatics 37 , 4572 – 4574 ( 2021 ). OpenUrl CrossRef PubMed 35. ↵ Prjibelski , A. D. et al. Accurate isoform discovery with IsoQuant using long reads . Nat Biotechnol 41 , 915 – 918 ( 2023 ). OpenUrl CrossRef PubMed 36. ↵ Kuo , R. I. et al. Illuminating the dark side of the human transcriptome with long read transcript sequencing . BMC Genomics 21 , 751 ( 2020 ). OpenUrl CrossRef PubMed 37. ↵ Gabriel , L. et al. BRAKER3: Fully automated genome annotation using RNA-seq and protein evidence with GeneMark-ETP, AUGUSTUS, and TSEBRA . Genome Res . 34 , 769 – 777 ( 2024 ). OpenUrl Abstract / FREE Full Text 38. ↵ Holst , F. et al. Helixer–de novo Prediction of Primary Eukaryotic Gene Models Combining Deep Learning and a Hidden Markov Model . 2023.02.06.527280 Preprint at doi: 10.1101/2023.02.06.527280 ( 2023 ). OpenUrl Abstract / FREE Full Text 39. ↵ Stiehler , F. et al. Helixer: cross-species gene annotation of large eukaryotic genomes using deep learning . Bioinformatics 36 , 5291 – 5298 ( 2021 ). OpenUrl CrossRef PubMed 40. ↵ Shumate , A. & Salzberg , S. L. Liftoff: accurate mapping of gene annotations . Bioinformatics 37 , 1639 – 1643 ( 2021 ). OpenUrl CrossRef PubMed 41. ↵ Venturini , L. , Caim , S. , Kaithakottil , G. G. , Mapleson , D. L. & Swarbreck , D. Leveraging multiple transcriptome assembly methods for improved gene structure annotation . GigaScience 7 , giy093 ( 2018 ). OpenUrl PubMed 42. ↵ Nevers , Y. et al. Quality assessment of gene repertoire annotations with OMArk . Nat Biotechnol 1 – 10 ( 2024 ) doi: 10.1038/s41587-024-02147-w . OpenUrl CrossRef 43. ↵ Cantalapiedra , C. P. , Hernández-Plaza , A. , Letunic , I. , Bork , P. & Huerta-Cepas , J. eggNOG-mapper v2: Functional Annotation, Orthology Assignments, and Domain Prediction at the Metagenomic Scale . Molecular Biology and Evolution 38 , 5825 – 5829 ( 2021 ). OpenUrl CrossRef PubMed 44. ↵ Huerta-Cepas , J. et al. eggNOG 5.0: a hierarchical, functionally and phylogenetically annotated orthology resource based on 5090 organisms and 2502 viruses . Nucleic Acids Research 47 , D309 – D314 ( 2019 ). OpenUrl CrossRef PubMed 45. ↵ MapMan4: A Refined Protein Classification and Annotation Framework Applicable to Multi-Omics Data Analysis . Molecular Plant 12 , 879 – 892 ( 2019 ). OpenUrl CrossRef PubMed 46. ↵ Emms , D. M. & Kelly , S. OrthoFinder: phylogenetic orthology inference for comparative genomics . Genome Biology 20 , 238 ( 2019 ). OpenUrl CrossRef PubMed 47. ↵ Li , K. , Xu , P. , Wang , J. , Yi , X. & Jiao , Y. Identification of errors in draft genome assemblies at single-nucleotide resolution for quality assessment and improvement . Nat Commun 14 , 6556 ( 2023 ). OpenUrl CrossRef PubMed 48. ↵ Zagorščak , M. et al. Evidence-based unification of potato gene models with the UniTato collaborative genome browser . Front. Plant Sci . 15 , ( 2024 ). 49. ↵ Bozan , I. et al. Pangenome analyses reveal impact of transposable elements and ploidy on the evolution of potato species . Proceedings of the National Academy of Sciences 120 , e2211117120 ( 2023 ). OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted January 14, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Haplotype-resolved genome assembly of the tetraploid potato cultivar Désirée Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Haplotype-resolved genome assembly of the tetraploid potato cultivar Désirée Tim Godec , Sebastian Beier , Natalia Yaneth Rodriguez-Granados , Rashmi Sasidharan , Lamis Abdelhakim , Markus Teige , Björn Usadel , Kristina Gruden , Marko Petek bioRxiv 2025.01.14.631659; doi: https://doi.org/10.1101/2025.01.14.631659 Share This Article: Copy Citation Tools Haplotype-resolved genome assembly of the tetraploid potato cultivar Désirée Tim Godec , Sebastian Beier , Natalia Yaneth Rodriguez-Granados , Rashmi Sasidharan , Lamis Abdelhakim , Markus Teige , Björn Usadel , Kristina Gruden , Marko Petek bioRxiv 2025.01.14.631659; doi: https://doi.org/10.1101/2025.01.14.631659 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Plant Biology Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17690) Bioengineering (13892) Bioinformatics (41935) Biophysics (21451) Cancer Biology (18587) Cell Biology (25499) Clinical Trials (138) Developmental Biology (13377) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24318) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88601) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15152) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00