Draft Genome of the Endangered Visayan Spotted Deer (Rusa alfredi), a Philippine Endemic Species

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 64,801 characters · extracted from preprint-html · click to expand
Draft Genome of the Endangered Visayan Spotted Deer (Rusa alfredi), a Philippine Endemic Species | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Draft Genome of the Endangered Visayan Spotted Deer ( Rusa alfredi) , a Philippine Endemic Species View ORCID Profile Ma. Carmel F. Javier , View ORCID Profile Albert C. Noblezada , View ORCID Profile Persie Mark Q. Sienes , View ORCID Profile Robert S. Guino-o , View ORCID Profile Nadia Palomar-Abesamis , View ORCID Profile Maria Celia D. Malay , View ORCID Profile Carmelo S. del Castillo , View ORCID Profile Victor Marco Emmanuel N. Ferriols doi: https://doi.org/10.1101/2025.02.05.636739 Ma. Carmel F. Javier 1 Philippine Genome Center Visayas, University of the Philippines Visayas , Miagao Iloilo Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ma. Carmel F. Javier Albert C. Noblezada 1 Philippine Genome Center Visayas, University of the Philippines Visayas , Miagao Iloilo Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Albert C. Noblezada Persie Mark Q. Sienes 4 Biology Department, Silliman University , Dumaguete Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Persie Mark Q. Sienes For correspondence: vnferriols{at}up.edu.ph persieqsienes{at}su.edu.ph Robert S. Guino-o 5 Angelo King Center for Research and Environmental Management, Silliman University , Dumaguete Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Robert S. Guino-o Nadia Palomar-Abesamis 4 Biology Department, Silliman University , Dumaguete Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Nadia Palomar-Abesamis Maria Celia D. Malay 6 Marine Science Institute, University of the Philippines Diliman , Quezon City Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Maria Celia D. Malay Carmelo S. del Castillo 2 Institute of Aquaculture, College of Fisheries and Ocean Sciences, University of the Philippines Visayas , Miagao Iloilo 3 National Institute of Molecular Biology and Biotechnology, University of the Philippines Visayas , Miagao Iloilo Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Carmelo S. del Castillo Victor Marco Emmanuel N. Ferriols 1 Philippine Genome Center Visayas, University of the Philippines Visayas , Miagao Iloilo 2 Institute of Aquaculture, College of Fisheries and Ocean Sciences, University of the Philippines Visayas , Miagao Iloilo Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Victor Marco Emmanuel N. Ferriols For correspondence: vnferriols{at}up.edu.ph persieqsienes{at}su.edu.ph Abstract Full Text Info/History Metrics Preview PDF ABSTRACT The Visayan Spotted Deer ( Rusa alfredi ) is an endangered and endemic species in the Philippines facing significant threats from habitat loss and hunting. It is considered as the world’s most threatened deer species by the International Union for Conservation of Nature (IUCN) thus its conservation has been a top priority. Despite its status, there is a notable lack of genomic information available for R. alfredi and the genus Rusa in general. This study presents the first draft genome assembly of the Visayan Spotted Deer (VSD), Rusa alfredi , using Illumina short-read sequencing technology. The RusAlf_1.1 assembly has a 2.52 Gb total length with a contig N50 of 46 Kb and scaffold N50 size of 75 Mb. The assembly has a BUSCO complete score of 95.5%, demonstrating the genome’s completeness, and includes the annotation of 24,531 genes. Phylogenetic analysis based on single-copy orthologs reveals a close evolutionary relationship between the R. alfredi and the genus Cervus . The availability of the RusAlf_1.1 genome assembly represents a significant advancement in our understanding of the VSD. It opens opportunities for further research in population genetics and evolutionary biology, which could contribute to more effective conservation and management strategies for this endangered species. This genomic resource can help in assuring the survival of Rusa alfredi in the country. DATA DESCRIPTION The genus Rusa is native to South and Southeast Asia, inhabiting diverse habitats ranging from dense forests to grasslands [ 1 ]. The Visayan Spotted Deer (VSD), also known as the Philippine Spotted Deer, Rusa alfredi (NCBI: txid1088129 ) , is one of three endemic species in the Philippines and is a highly rare and endangered species indigenous to the Philippines’ Visayan Islands. This region is considered one of the country’s highest conservation priority areas, particularly due to the number of threatened endemic taxa and the degree of threats to species and habitats. Characterized by their soft dark-brown coat and unique nominal spots, R. alfredi once played a vital role as herbivores in shaping vegetation dynamics although its extirpation from most areas makes it difficult to fully determine its historical ecological impact. It has been classified as endangered since 1988 by the International Union for Conservation of Nature (IUCN) Red List of Endangered Species. As of 2016, only an estimated 700 mature individuals remained in the wild. The genus Rusa is facing significant decline in biodiversity worldwide and is under immense threat of global extinction. The geographic range of R. alfredi formerly encompassed the Central Visayan islands of Negros, Panay, Guimaras, Masbate, and Cebu. Presently, only the islands of Panay and Negros shelter small, remnant populations of wild R. alfredi ( Figure 1A ) [ 2 ]. Accurate reports of the population density and distribution of the species in the wild have not yet been established. Like other cervid species in the world, the steep decline in the population of R. alfredi is mainly due to deforestation and hunting, despite being legally protected. Efforts to conserve the population of R. alfredi have been put in place, including the proposed creation of new national parks and properly structured captive breeding for reintroduction to the wild. The first captive breeding program for R. alfredi in the country was established at the Department of Biology and the Center for Tropical Conservation Studies (CENTROP) of Silliman University, in Dumaguete City, Negros Oriental, Philippines from Negros Island stock [ 3 ]. Presently, it has the largest captive-bred stock of the species globally. Download figure Open in new tab Download figure Open in new tab Figure 1. A) Distribution Map of R. alfredi based on the IUCN Red List of Threatened Species [ 2 ] B) Photo of the Visayan Spotted Deer (code name: Abraham) at the Center for Tropical Conservation Studies (CENTROP) Silliman University, Dumaguete City, Negros Oriental, Philippines. Photo taken by L. Cabrera. Recent advancements in genomic sequencing have created the possibility of producing large-scale reference genomes, which may offer new insights into an organism’s genetic diversity and architecture. This enables researchers to identify key genetic traits, track evolutionary changes, and develop strategies for conservation and breeding programs aimed at preserving biodiversity and enhancing desirable traits in various organisms. Whereas several genetic technologies are already accessible, few are being used to their full potential. The IUCN lists 15,521 animal species as threatened, and less than 3% of those species have genomic resources that can inform and aid conservation management [ 4 ]. Currently, there is no available reference genome for R. alfredi nor for the genus Rusa . Generation of the reference genome would give us a better understanding of this endangered Visayan-endemic deer’s history, diversity, and demographics significant for the management of the captive population. In this study, the first draft genome assembly of the R. alfredi, was generated using Illumina short-read sequences that could serve as a reference for gene prediction, taxonomy, evolution, landscape genetics, and conservation genomics. METHODS Sample Collection The sampling was conducted under Department of Environmental and Natural Resources (DENR) Region VII Gratuitous Permit No. 2022-17. The sample was obtained from a member of the captive population at the Center for Tropical Studies (CENTROP), Silliman University, Dumaguete City, Negros Oriental, Philippines. A male deer (Abraham; Figure 1B ) was restrained using a net and a piece of ear tissue was collected using an ear notcher, a standard tool for ear tagging in animals. Before release, wound spray was applied to the ear to prevent infection and allow faster healing. The tissue sample was cleaned with 95% ethanol, placed in a 1.5 mL microcentrifuge tube with absolute ethanol, and stored at -20°C for future use. DNA Extraction and Quantification Extraction was performed at Silliman University using Wizard® SV Genomic DNA Purification System following the manufacturer’s protocol (Promega, 2012). The quality of the genomic DNA was subsequently checked using gel electrophoresis, Multiskan SkyHigh Spectrophotometer, and Qubit Fluorometer. Library Preparation The library construction was carried out with 100 ng of genomic DNA following the Illumina DNA library preparation kit manufacturer’s protocol (Illumina, 2020). The resulting amplified library was quantified and controlled on an Agilent Bioanalyzer 2100 (Agilent, Santa Clara, CA) and sequenced in 2 x 151 bp paired-end reads on an Illumina NextSeq 1000. A total of 157.47 Gbp of raw data was generated after sequencing. Genome survey The quality of the short reads was checked using the FastQC v0.12.1 [ 5 ]. To remove low-quality reads and sequencing adapters, reads were trimmed using Trimmomatic v0.39 [ 6 ] with the following parameters: ILLUMINACLIP: Nextera-PE-PE.fa:2:30:10 LEADING:30 TRAILING:30 SLIDINGWINDOW:4:20 MINLEN:36. The genome size of Rusa alfredi was estimated using a k-mer-based approach. K-mer frequencies were obtained using jellyfish [ 7 ] using the command: jellyfish count -C -m 21 -s 1G <(zcat forwards_reads.fastq.gz) reads.histo. The resulting k-mer histogram was used in GenomeScope2 [ 8 ] to estimate the genome size and heterozygosity. GenomeScope2 was run using the command: genomescope.R -i reads.histo -o genomescope_21 -k 21. Genome Assembly and Quality Assessment Using the trimmed reads, the Rusa alfredi genome was assembled using MaSuRCA v4.1.0 [ 9 ]. The configuration file used for running MaSuRCA includes “PE = pe 500 100” as the recommended safe insert size and standard deviation values for short reads and “GRAPH_KMER_SIZE = auto” for automatic selection of k-mer size (k=99 was selected). The MaSuRCA assembly pipeline was run using the command: “masurca config.txt”. The configuration file was uploaded to GigaDB [ 10 ]. To improve the quality and contiguity of the assembly, contigs were corrected for mis-assemblies and scaffolded based on sequence homology using RagTag version 2.1.0 [ 11 ] with Cervus elaphus genome (GenBank assembly accession number: GCF_910594005.1) as reference. Assembly correction was performed using RagTag with default parameters. Corrected contigs were then used for scaffolding using RagTag with default parameters. General metrics for assessing the quality of the assembly were determined using QUAST v5.2.0 [ 12 ]. QUAST was run with the “ --large ” option and with the inclusion of the paired-end reads by adding the “- 1 ” and “- 2 ” flags to provide results for the assembly coverage. The contigs and scaffolds were also checked for completeness using Benchmarking Universal Single-Copy Orthologs (BUSCO v5.4.4) [ 13 ] using the cetartiodactyla_odb10. The assembled genome was visualized using BlobToolKit v4.3.5 [ 14 ]. The quality of the assembly was evaluated using Merqury [ 15 ]. K-mer count from reads was obtained first using the command: meryl k=21 *fastq.gz output reads.meryl threads=30 memory=30. Assembly consensus quality value (QV), k-mer completeness, and spectra-cn plots were generated by running the command: merqury.sh reads.meryl GCA_038501445.1_RusAlf_1.1_genomic.fna abraham_merq. The number of heterozygous sites and base coverage were determined based on the reads’ alignment to the assembled genome. Reads were mapped back to the assembly using BWA-MEM [ 16 ] with the command: bwa mem -t 12 GCA_038501445.1_RusAlf_1.1_genomic.fna forward_reads.fastq.gz reverse_reads.fastq.gz | gzip -3 > aln-pe.sam.gz. Alignment was further processed using SAMtools v1.20 [ 17 ]. Specifically, mate information was added in the alignment using samtools fixmate, followed by samtools sort, and samtools markdup for marking and removing the duplicates (with -r flag). Average base coverage was determined using samtools depth. Alignment was also used for obtaining the raw variant call format (VCF) file using BCFtools v1.21 [ 17 ] by running the command: bcftools mpileup -Ou -f GCA_038501445.1_RusAlf_1.1_genomic.fna alignment.fxm.sorted.rmdup.bam | bcftools call - mv -Ov -o raw_variants.vcf. Filtered VCF file was obtained by running the command: bcftools filter -e ‘QUAL < 30 || DP < 10’ -o filtered_variant.vcf -O v raw_variants.vcf. Number of heterozygous sites was determined using the command: bcftools view -i ‘GT=“0/1”’ filtered_variant.vcf | grep -v “^#” | wc -l. Repeats and Gene Annotation Before the annotation, the assembly was screened out for contaminants and the presence of mitochondrial sequences. Detected mitochondrial sequences in the assembly were either trimmed or removed from the assembly using Seqkit v2.7.0 [ 18 ]. De novo identification of the repeats was performed in the assembly using RepeatModeler v2.0.5 [ 19 ]. The Database for RepeatModeler was first generated by running the command: BuildDatabase -name VSD GCA_038501445.1_RusAlf_1.1_genomic.fna. It was followed by de novo repeat identification using the command: RepeatModeler -database VSD -threads 12 -LTRStruct. The resulting library of repeats was then merged with the mammals repeat library extracted from the Dfam database [ 20 ] using famdb.py script. Mammalian repeat library was obtained using the command: famdb.py -i Dfam.h5 families -a -d -f fasta_name “mammals” > mammals_repeat_library.fasta. The combined libraries were then used to soft mask the repeats in the genome using RepeatMasker v4.1.5 [ 21 ] with ‘ -s -xsmall ’ options. For gene annotation, homology-based gene prediction was performed using Gene Model Mapper (GeMoMa v1.9) Pipeline [ 22 ] with Cervus elaphus genome (GenBank accession number: GCF_910594005.1) as reference. GeMoMa was run using the command: GeMoMa -Xmx50G GeMoMaPipeline threads=12 outdir=GeMoMa GeMoMa.Score=ReAlign AnnotationFinalizer.r=NO o=true t=RusAlf_v1.1.fna a=mCerela.gff g=GCF_910594005.1_mCerEla1.1_genomic.fna. Additional gene annotation was obtained using BRAKER v3.0.8 annotation pipeline C [ 22 – 32 ]. Vertebrata protein sequences from the OrthoDB v11 [ 34 ] partition were used to serve as extrinsic evidence for gene prediction in the soft-masked genome. BRAKER-annotated genes were filtered by retaining only those with hits in the Pfam database [ 35 ], identified using InterProScan v5.72-103 [ 36 ]. Verified annotated genes from BRAKER were then added to gene annotation from GeMoMa using AGAT v1.4.2 [ 37 ]. To ensure that gene annotation structures are retained, only gene annotations from BRAKER with no overlapping CDS are added to the gene annotations from GeMoMa to generate the final gene set using the agat_sp_complement_annotations.pl script. Protein sequences from the final gene set of R. alfredi were extracted for further downstream analysis. Phylogenetic Tree A phylogenetic tree of R. alfredi and other species of cervids was constructed based on single copy orthologs. Protein sequences from reference genomes of Odocoileus virginianus, Rangifer tarandus, Muntiacus muntjak, Muntiacus reevesi, Dama dama, Cervus hanglu yarkandensis, C. elaphus , and C. canadensis were downloaded from the GenBank while sequences for Cervus nippon were downloaded from figshare database [ 38 ]. These sequences were used together with the predicted protein sequences of R. alfredi for creating species tree. Asian water buffalo ( Bubalus bubalis ) was included to serve as an outgroup. Longest transcript per gene in each species protein dataset was identified and retained using primary_transcript.py from OrthoFinder v.2.5.5 [ 39 ]. Single copy orthologs were identified using OrthoFinder v.2.5.5 [ 39 ], renamed the sequences with the corresponding species ID using Seqkit v2.7.0 [ 18 ] and aligned each orthologs using MUSCLE v5.1.0 [ 40 ]. Aligned sequences were then concatenated using Seqkit v2.7.0 [ 18 ] and trimming was performed using GBLOCKS v0.91b [ 41 ] default parameter. The maximum likelihood tree was generated using IQ-TREE v2.3.6 [ 42 ] with ModelFinder [ 43 ] for model selection based on Bayesian Information Criterion (BIC) and boostrap set at 1000. ML tree based on single-copy orthologs was constructed using the command: iqtree -s MSA_cervid_sco_concat_sorted_trimmed.fasta -m MFP -B 1000. The resulting ML tree was then visualized using iTOL [ 44 ]. Mitochondrial Genome Assembly, Annotation, and Phylogenetics The R. alfredi mitochondrial genome was also assembled using MITObim v1.9.[ 45 ]. Complete COI sequence from the existing Rusa alfredi complete mitogenome (NCBI Accession no. JN632698.1 ) was used as seed fasta for the assembly. A random sampling of 20% of reads was performed using the following command: “downsample.py -s 20 --interleave -r forward_read -r reverse_read | gzip > sampled_20.fastq.gz”. Sampled reads were then used for the assembly by running the command: MITObim.pl -start 1 -end 100 -sample mysample -ref myref -readpool sampled_20.fastq.gz -quick seed.fasta --pair. The circular topology of the assembly was checked using the command: circules.py -f assembled_mtDNA.fasta. Annotation was then performed in the assembled mitogenome using MitoZ v3.6 [ 46 ]. Annotation of the assembled mitogenome was performed using the command: mitoz annotate --fastafiles Abraham_mtDNA_genome.fasta --outprefix annotation --thread_number 12 --clade Chordata. A maximum likelihood (ML) tree was also constructed based on concatenated 13 coding sequences of mitochondrial genomes of different species of cervids. Concatenated coding sequences were aligned in MEGA11 [ 47 ] using MUSCLE [ 40 ]. After alignment, the maximum likelihood tree was constructed using IQ-TREE v2.3.6 [ 42 ] with the use of ModelFinder [ 43 ] for model selection based on the Bayesian Information Criterion (BIC). The ML tree was constructed with a bootstrap of 1000. Water buffalo ( Bubalus taurus ) was selected as an outgroup. The phylogenetic tree was visualized in iTOL [ 44 ]. ML tree based on single-copy orthologs was constructed using the command: iqtree -s MSA_cervid_sco_concat_sorted_trimmed.fasta -m MFP -B 1000. The resulting ML tree was then visualized using iTOL [ 44 ]. Ethics Declaration All study procedures and utility of experimental animals were conducted following the Republic Act (RA) No. 8485 or The Animal Welfare Act of 1998 of the Philippines. The tissue sampling carried out for this study was approved by the Department of Environmental and Natural Resources (DENR) Region VII (Gratuitous Permit No. 2022-17 Series of 2022). RESULTS AND DISCUSSION Reference genomes play a crucial role in understanding genetic variation and the molecular underpinnings of traits across various organisms. They facilitate gene annotation, regulatory element identification, and the elucidation of biological processes. Molecular investigations in cervid species have predominantly focused on systematic relationships using mitochondrial genomes [ 48 ] leaving gaps in understanding the adaptive potential and genetic basis of traits and the resolution of deeper nodes (above the family level) in population studies. Moreover, mitochondrial genomes alone may not provide a complete reconstruction of a species’ evolutionary history since it is maternally inherited. Furthermore, several species are underrepresented in genomic databases due to their threatened conservation status or lack of available data, hindering sample collection [ 49 ]. This study presents the draft genome assembly of Rusa alfredi (RusAlf_1.1), marking the first genome assembly for the genus Rusa . This contribution is pivotal for conducting integrative analyses essential for the conservation and management strategies of R. alfredi amidst the threats of human, environmental, and emerging diseases. Genome Survey The genome of R. alfredi (codename: Abraham) is estimated to be 2.37 GB in length with a low level of heterozygosity (0.30%) based on k-mer analysis using GenomeScope [ 8 ] result ( Figure 2 ). Based on the analysis using the mapped reads, a total of 4,305,197 (0.17%) heterozygous sites were identified confirming the low heterozygosity of the genome. Genome size was also similar to the estimated genome size of MaSuRCA (2.37 GB) and close to the actual total length of the assembled contigs (2.51 GB). K-mer distribution showed a single peak indicating a high homozygosity (99.70%) of the assembled genome. Download figure Open in new tab Figure 2. K-mer (21) distribution using GenomeScope2 for estimating the genome size and heterozygosity of the Rusa alfredi genome. len-estimated haploid genome length; aa-homozygosity; ab-heterozygosity; k-cov-mean heterozygous k-mer coverage, err-read error rate; dup- the average rate of read duplications; k: k-mer size used for the run; p-ploidy. Captive populations have low heterozygosity compared to wild populations primarily due to factors like inbreeding and bottleneck effect [ 50 ], which limit genetic diversity in smaller, isolated groups. The variations between the two haplotypes in genomes with low heterozygosity often involve smaller-scale differences, making alignment easier during genome assembly and leading to accurate consensus sequences. In deer genomes, such as those of sika deer [ 51 ]and white-tailed deer [ 52 ], low levels of heterozygosity have been shown to simplify de novo assembly and improve alignment accuracy. Genome assembly and quality assessment The assembled draft genome of R. alfredi , RusAlf_1.1 has 171,678 total contigs with a total length of 2.5 GB. The genome size of the assembled genome was found to be comparable to the genomes of other cervid species such as Cervus hanglu yarkadensis with 2.6 GB (CEY_v1, GenBank accession GCA_010411085.1 ) and Muntiacus muntjak with 2.57 GB (UCB_Mmun_1.0, GenBank accession GCA_008782695.1 ). The assembled genome has short contiguity with N50 of 46kb, which was expected considering the limitations of short paired-end reads (2 x 151 bp) to resolve the repeats in large genomes [ 53 ]. Long-read sequences are usually added for the assembly to achieve longer contiguity, which adds to the overall cost associated with genome assembly efforts. Genome assembly can be improved using reference genomes provided the reference genome is closely related to the target species. Genus Cervus is one of the closest relatives of R. alfredi based on the phylogenetic tree of mitochondrial genomes of the tribe Cervini [ 54 ]. For the draft genome RusAlf_1.1, mCerEla1.1 (GenBank accession GCA_910594005.1 ) was used to correct misassembled contigs based on sequence homology and improve the assembly through homology-based scaffolding. The final assembly has a total of 57,916 scaffolds, scaffold N50 of 75 MB and scaffold L50 of 13 ( Figure 3A ). The same homology-based assembly was performed using C. elaphus , mCerEla1.1 as a reference for mounting contigs for chromosome-level assembly of the fallow deer ( Dama dama ) reference genome [ 55 ]. Download figure Open in new tab Figure 3. A) Assembly metrics and BUSCO scores of RusAlf_1.1 and B) Repeat elements in the draft genome of RusAlf_1.1 The quality of genome assemblies is generally assessed based on contiguity and completeness. It was highlighted that interpreting the quality of the assembly using metrics like N50 or L50 alone can be misleading, as it only measures the assembly contiguity and does not consider the assembly completeness and correctness [ 56 ]. In this study, despite the low level of contiguity, the draft genome of R. alfredi scored a high level of completeness with 95.5% complete BUSCO using cetartiodactyla_odb10 (n=13335). The assembled genome also scored a high merqury QV of 47 (equivalent to about 99.99% base accuracy) and a completeness score of 96.76%. In addition, the k-mer spectrum plot shows a single high peak for 1-copy k-mer (red) and a very small area covered for 2-copy k-mer (blue) indicating a homozygous genome ( Figure 4 ). The assembly quality was also checked by mapping the reads back to the final assembly using QUAST pipeline. The read mapping results revealed that 99.38% of the reads were successfully mapped back to the assembly with a mean base coverage of 47x. This study showed that a high level of assembly completeness of the draft genome can still be achieved using only short paired-end reads. It is worth noting that the estimated genome size from the genome survey is smaller than the assembled genome size mainly due to differences in methodology. K-mer analysis underestimates size by excluding repetitive sequences and errors, while whole genome assembly includes all data, including repetitive regions and possible duplications. This results in a larger assembled genome size compared to the survey estimate. Download figure Open in new tab Figure 4. Merqury k-mer spectrum plot of the assembled genome of Rusa alfredi against the Illumina short paired-end reads. Read-only (grey) represents k-mers that are only found in reads but not in the assembly. Colors represent k-mers found in reads and the assembly 1x (red), 2x (blue), 3x (green), 4x (purple) and >4x (yellow) Comparison of the BUSCO results between contigs (from MaSuRCA) and scaffolds (MaSuRCA+RagTag correct and scaffold) of the assembly shows improvement in the completeness of the assembly from 74.2% to 95.5% complete score ( Table 1 ). RagTag statistics after scaffolding also showed high confidence scores (average grouping confidence: 99.78%; average location confidence: 99.65%). However, it should be noted that using reference genomes of different species for scaffolding could introduce errors considering the structural variations even between genomes of two related species. As there are no current genetic maps and limited related genomic resources for Rusa alfredi , structural variations in the genome could be addressed and validated in future studies by incorporating long read sequencing as well as Hi-C libraries. Nevertheless, the current draft genome of R. alfredi serves as a valuable foundational resource for continued conservation of this species. View this table: View inline View popup Download powerpoint Table 1. BUSCO summary results for contigs and scaffolds of Rusa alfredi draft genome. Genome annotation report The RusAlf_1.1 genome is comprised of 44.27% of total interspersed repeat sequences. Most repeats were classified as retroelements comprising 40.16 % of the genome, followed by DNA transposons with 2.61 %, and unclassified repeats with 1.50 % ( Figure 3B ). The repetitive sequence analysis reveals similarities in the genomic compositions to several cervid species’ genomes. For instance, in the Sika deer ( Cervus nippon ), repetitive sequences make up around 45.38 % of its genome [ 57 ]. Among these repetitive elements, long interspersed nuclear elements (LINEs), short interspersed nuclear elements (SINEs), and long terminal repeats (LTRs) are the most abundant. Similar patterns are observed in Tarim red deer ( Cervus elaphus yarkandensis) [ 58 ], Siberian musk deer ( Moschus moschiferus ) [ 59 ], white-tailed deer ( Odocoileus virginianus ) [ 60 ], and reindeer ( Rangifer tarandus ) [ 61 ], where repetitive sequences account for significant portions of their genomes, ranging from 39.1% to 42.4%. It was also found that the simple sequence repeats (SSRs) make up about 0.76 % of the RusAlf_1.1 genome. These SSRs or microsatellites are highly polymorphic loci that can be used for conservation genetics to estimate genetic structure. Genetic diversity plays a crucial role in wildlife management and disease mitigation, as demonstrated by studies on wild pig populations in Texas and roe deer in Iberia, emphasizing the need to integrate genetic data into conservation strategies [ 62 ], [ 63 ]. The captive population of Silliman University would directly benefit from the assembled genome in assisting their current genetic diversity studies. Gene annotation of RusAlf_1.1 was initially performed through homology-based gene prediction. The addition of transcriptome data has been shown to improve the accuracy of gene prediction [ 64 ], especially for de novo gene prediction. However, obtaining transcriptome data for critically endangered species like R. alfredi is challenging due to its limited population size and the ethical and logistical constraints of sampling. Additionally, obtaining a sample for RNA-Seq in this study was not possible due to limited financial resources. Nevertheless, a total of 22,862 genes were predicted from the RusAlf_1.1 genome through homology, which is comparable to the 22,941 predicted genes in the red deer ( Cervus elaphus ) genome [ 65 ]. To further predict genes present in the genome, additional gene prediction was performed using the BRAKER pipeline C, incorporating a protein database for external evidence in gene prediction. BRAKER initially predicted a total of 35,129 genes, of which 16,343 were verified using InterProScan with the Pfam database. Among these verified genes, 1,669 contained coding sequences (CDS) unique to BRAKER and did not overlap with the GeMoMa annotation. These genes with non-overlapping CDS were added to the GeMoMa annotation, resulting in a final gene count of 24,531 for R. alfredi. The predicted genes in RusAlf_1.1 were then used to study the phylogenetic relationship of R. alfredi with other cervids with sequenced genomes. Phylogenetic Inference A phylogenetic analysis was constructed based on single-copy orthologs of different species of deer ( Figure 5 ). The resulting phylogenetic tree showed a monophyletic grouping of the four species of Cervus , namely C. elaphus (GenBank accession GCA_910594005.1 ) , C. hanglu yarkandensis (CEY_v1, GenBank accession GCA_010411085.1 ), Cervus nippon (GenBank accession GCA_040085125.1 ), and C. c anadensis (GenBank accession GCF_019320065.1 ). A similar tree was depicted in a previous study with the addition of RusAlf_1.1 for this study [ 66 ]. The species tree revealed a close relationship between RusAlf_1.1 and the genus Cervus . This result supports a previous study based on complete mitochondrial genomes suggesting that the genus Rusa is sister to Cervus [ 67 ]. However, the phylogenetic position of R. alfredi relative to other species of Rusa could not be evaluated due to the absence of genome data for other Rusa species. The continued efforts for the genome assembly of Rusa spp. will be crucial for elucidating the evolutionary relationships between Cervus and Rusa . Download figure Open in new tab Figure 5. Maximum likelihood (ML) tree of different cervid species based on single copy orthologs. ML tree was constructed from multiple sequence alignment (MSA) concatenated 7,188 single-copy orthologs of nuclear genomes. MSA has a total of 3,992,528 amino acid sites after trimming. ML tree was constructed using Q.mammal+F+I+R10 substitution model and bootstrap set at 1000. Mitochondrial Genome Assembly, Annotation, and Phylogenetics The complete mitochondrial genome for RusAlf_1.1 was also assembled using short paired-end reads. The final assembly has a 16,356 bp total length. A total of 13 coding genes, 22 tRNA genes, and two (2) rRNA genes were annotated in the assembled mitogenome. The assembly was uploaded in the GenBank with accession no. PQ083075 . An ML tree of different species of cervids using concatenated coding sequences of mitogenomes ( Figure 6 ) showed subdivisions between subfamilies of cervids which are Capreolinae and Cervinae. The monophyletic grouping of R. alfredi Abraham (GenBank Accession no. PQ083075.1 ) and the reference mitogenome for R. alfredi (GenBank Accession no. JN632698.1 ) was also observed. The ML tree result further supports the close relationship between R. alfredi and the genus Cervus . Download figure Open in new tab Figure 6. Maximum likelihood (ML) tree of different species of cervids based on concatenated coding sequences from complete mitochondrial genomes. ML tree was constructed using the TIM2+F+I+R3 substitution model with 1000 bootstrap replicates. Mitogenome sequences have become valuable resources for elucidating phylogenetic relationships among different cervids. For instance, the proposed transfer of Rucervus eldii to the genus Panolia was due mitogenomic evidence for its close relationship with Elaphurus davidianus and separation from Rucervus duvaucelii [ 67 ], [ 68 ]. In this study, it was found that Rusa forms an evolutionary grade with Cervus due to the position of the latter as a monophyletic clade nested within Rusa. Rusa alfredi was recovered as basal to the Rusa + Cervus clade, agreeing with a previous mitogenomic phylogeny [ 67 ]. The basal position of R. alfredi in the clade raises interesting questions about the evolutionary history of the cervids, particularly in island environments such as the Philippines. The patterns of speciation and diversification of cervids in insular southeast Asia warrant further study. It is recommended to also sequence the genome of R. marianna , another cervid endemic to the Philippines, as well as other allopatrically-distributed Rusa populations, to elucidate their evolutionary histories and taxonomic distinctiveness. Increased taxon sampling could also potentially serve to test the Pleistocene Aggregate Island Complex (PAIC) theory [ 69 ] by examining patterns of divergence, gene flow, and demographic history of deer populations that were potentially connected during periods of low sea levels but are currently separated in different islands. In addition, Pleistocene climate-driven changes in the availability of suitable habitats may have also caused disjunct distributions and diversification [ 70 ]. Paleoclimatic models can be incorporated to understand the physical and environmental factors that may have promoted diversification in Philippine cervids. Previous studies have shown evidence of hybridization between different species of Cervus [ 71 ], [ 72 ] and between species of Rusa [ 73 ]. Subsequent backcrossing of hybrids to the population could cause mitochondrial introgression, which could obscure or complicate phylogenetic reconstruction if based solely on the mitochondrial genomes. In the case of R. alfredi , although hybridization between R. alfredi and R. marianna was previously observed [ 2 ], [ 3 ], it was unlikely to happen in the current small population size and non-overlapping geographic distribution of the two species. Also, there is still a lack of genetic evidence for the previous report of hybridization between R. alfredi and R. marianna that can support the possible mitochondrial introgression in the current captive and wild populations of R. alfredi . Providing whole genome sequences for other native species of Rusa could further provide genomic resources for detecting hybrids, which will also help in the management and monitoring of these species, especially for the reintroduction of captive populations in the wild. The assembled genome of R. alfredi represents an advancement in research and conservation efforts for this endangered endemic species. It not only reinforces previous taxonomic classifications of R. alfredi but also facilitates the evaluation of its evolutionary relationships with other species of Rusa and Cervus [ 3 ], [ 74 ]. This underscores the importance of obtaining additional genomic data from additional Rusa and Cervus species. Considering the limitations of the draft assembly using short reads sequencing and possibilities of misassembly given the used methods and resources, the quality of the genome for R. alfredi can be improved by adding RNA-Seq, karyotyping to establish a clear chromosomal framework, integrating long-read sequencing to enhance contiguity and accuracy, and utilizing Hi-C libraries to detect and resolve structural variations. These approaches will not only refine the genome assembly but also provide critical insights into structural differences between R. alfredi and other Cervus species, ultimately contributing to more robust conservation strategies. Nevertheless, the initial availability of genomic resource will support the development of targeted conservation strategies among the captive population. Incorporating samples from wild populations of R. alfredi will also allow us to identify genes that have evolved in captive settings, informing us about survival adaptations crucial for reintroduction efforts into the wild. This work enables further studies such as microsatellite analysis, SNPs, RADseq, reference gene characterization, and whole-genome resequencing [ 75 ]. DATA AVAILABILITY This genome assembly has been deposited at NCBI GenBank under the accession JBCEYX000000000 . All sequencing reads can be accessed through the NCBI SRA (Project number: PRJNA1102104 ). Files generated in this study (Illumina reads, codes, configuration file for assembly, assembled genome, annotations, MSA, and phylogenetic tree files) are available in GigaDB [ 10 ]. FUNDING This work was supported by the Philippine Genome Center Visayas, University of the Philippines Visayas. COMPETING INTERESTS The authors declare that there is no conflict of interest. AUTHOR CONTRIBUTIONS M.C.M., V.M.E.F., C.D.C., R.S.G., and N.P.A. conceptualized and supervised the study. V.M.E.F. secured the funding for the conduct of the study. P.M.S. facilitated the permit for sample collection and handled sample preparation before sequencing. M.C.J. performed the experiment and managed the project; A.N. conducted the assembly and bioinformatics analysis. M.C.J. and A.N. wrote the manuscript with contributions from all authors. All authors reviewed and approved the final manuscript. SUBJECT AREAS Genetics and Genomics; Bioinformatics, Evolutionary Biology ACKNOWLEDGEMENT The authors would like to express their gratitude to Ozzy Boy S. Nicopior for his assistance in generating the distribution map used in this paper. Footnotes mfjavier{at}up.edu.ph , acnoblezada{at}up.edu.ph , csdelcastillo{at}up.edu.ph , nadiapabesamis{at}su.edu.ph , robertsguino-o{at}su.edu.ph , mdmalay{at}up.edu.ph ABBREVIATIONS IUCN International Union for Conservation of Nature VSD Visayan Spotted Deer BUSCO Benchmarking Universal Single-Copy Orthologs CENTROP Center for Tropical Conservation Studies QGIS Quantum Geographic Information System DENR Department of Environmental and Natural Resources MaSuRCA Maryland Super-Read Celera Assembler QUAST quality assessment tool for genome assemblies GeMoMa Gene Model Mapper MITObim mitochondrial baiting and iterative mapping MEGA Molecular Evolutionary Genetics Analysis MUSCLE Multiple Sequence Comparison by Log-Expectation BIC Bayesian Information Criterion ML Maximum Likelihood LINEs long interspersed nuclear elements SINEs short interspersed nuclear elements LTRs long terminal repeats iTOL Interactive Tree of Life PAIC Pleistocene Aggregate Island Complex CDS Coding Sequences References [1]. ↵ N. A. N. G. Ali , M. L. Abdullah , S. A. M. Nor , T. M. Pau , N. A. M. Kulaimi , and D. M. Naim , “ A review of the genus Rusa in the indo-malayan archipelago and conservation efforts ,” Saudi J Biol Sci , vol. 28 , no. 1 , pp. 10 – 26 , Jan. 2021 , doi: 10.1016/J.SJBS.2020.08.024 . OpenUrl CrossRef PubMed [2]. ↵ S. Brook , “ Rusa alfredi, Phillipine Spotted Deer ,” The IUCN Red List of Threatened Species , 2016 , doi: e.T4273A22168782. 10.2305/IUCN.UK.2016-2.RLTS.T4273A22168782.en . OpenUrl CrossRef [3]. ↵ W. L. R. Oliver , C. R. Cox , and L. L. Dolar , “ The Philippine Spotted Deer Conservation Project ,” Oryx , vol. 25 , no. 4 , pp. 199 – 205 , 1991 , doi: 10.1017/S0030605300034335 . OpenUrl CrossRef [4]. ↵ C. J. Hogg et al. , “ Threatened Species Initiative: Empowering conservation action using genomic resources ,” Proc Natl Acad Sci U S A , vol. 119 , no. 4 , p. e2115643118 , Jan. 2022 , doi: 10.1073/PNAS.2115643118/SUPPL_FILE/PNAS.2115643118.SAPP.PDF . OpenUrl Abstract / FREE Full Text [5]. ↵ S. Andrews , “ FastQC: a quality control tool for high throughput sequence data .” Accessed: Sep . 14 , 2024 . [Online]. Available: https://www.bioinformatics.babraham.ac.uk/people.html#simon [6]. ↵ A. M. Bolger , M. Lohse , and B. Usadel , “ Genome analysis Trimmomatic: a flexible trimmer for Illumina sequence data ,” vol. 30 , no. 15 , pp. 2114 – 2120 , 2014 , doi: 10.1093/bioinformatics/btu170 . OpenUrl CrossRef PubMed Web of Science [7]. ↵ G. Marçais and C. Kingsford , “ A fast, lock-free approach for efficient parallel counting of occurrences of k-mers ,” Bioinformatics , vol. 27 , no. 6 , pp. 764 – 770 , Mar. 2011 , doi: 10.1093/BIOINFORMATICS/BTR011 . OpenUrl CrossRef PubMed Web of Science [8]. ↵ T. R. Ranallo-Benavidez , K. S. Jaron , and M. C. Schatz , “ GenomeScope 2.0 and Smudgeplot for reference-free profiling of polyploid genomes ,” Nature Communications 2020 11 : 1 , vol. 11, no. 1, pp. 1–10, Mar. 2020 , doi: 10.1038/s41467-020-14998-3 . OpenUrl CrossRef PubMed [9]. ↵ A. V. Zimin , G. Marçais , D. Puiu , M. Roberts , S. L. Salzberg , and J. A. Yorke , “ The MaSuRCA genome assembler ,” Bioinformatics , vol. 29 , no. 21 , pp. 2669 – 2677, Nov. 2013 , doi: 10.1093/BIOINFORMATICS/BTT476 . OpenUrl CrossRef PubMed Web of Science [10]. ↵ T. P. Sneddon , P. Li , and S. C. Edmunds , “ GigaDB: Announcing the GigaScience database ,” Gigascience , vol. 1 , no. 1 , Jul. 2012 , doi: 10.1186/2047-217X-1-11/2656140 . OpenUrl CrossRef [11]. ↵ M. Alonge et al. , “ Automated assembly scaffolding using RagTag elevates a new tomato system for high-throughput genome editing ,” Genome Biol , vol. 23 , no. 1 , pp. 1 – 19 , Dec. 2022 , doi: 10.1186/S13059-022-02823-7/FIGURES/2 . OpenUrl CrossRef PubMed [12]. ↵ “ GitHub - ablab/quast: Genome assembly evaluation tool .” Accessed: Oct. 10, 2024 . [Online]. Available: https://github.com/ablab/quast [13]. ↵ M. Manni , M. R. Berkeley , M. Seppey , F. A. Simão , and E. M. Zdobnov , “ BUSCO Update: Novel and Streamlined Workflows along with Broader and Deeper Phylogenetic Coverage for Scoring of Eukaryotic, Prokaryotic, and Viral Genomes ,” Mol Biol Evol , vol. 38 , no. 10 , pp. 4647 – 4654 , Sep. 2021 , doi: 10.1093/MOLBEV/MSAB199 . OpenUrl CrossRef [14]. ↵ R. Challis , E. Richards , J. Rajan , G. Cochrane , and M. Blaxter , “ BlobToolKit – Interactive Quality Assessment of Genome Assemblies ,” G3 Genes|Genomes|Genetics , vol. 10 , no. 4 , pp. 1361 – 1374 , Apr. 2020 , doi: 10.1534/G3.119.400908 . OpenUrl CrossRef [15]. ↵ A. Rhie , B. P. Walenz , S. Koren , and A. M. Phillippy , “ Merqury: Reference-free quality, completeness, and phasing assessment for genome assemblies ,” Genome Biol , vol. 21 , no. 1 , pp. 1 – 27 , Sep. 2020 , doi: 10.1186/S13059-020-02134-9/FIGURES/6 . OpenUrl CrossRef [16]. ↵ H. Li and R. Durbin , “ Fast and accurate short read alignment with Burrows– Wheeler transform ,” Bioinformatics , vol. 25 , no. 14 , pp. 1754 – 1760 , Jul. 2009 , doi: 10.1093/BIOINFORMATICS/BTP324 . OpenUrl CrossRef PubMed Web of Science [17]. ↵ P. Danecek et al. , “ Twelve years of SAMtools and BCFtools ,” Gigascience , vol. 10 , no. 2 , pp. 1 – 4 , Jan. 2021 , doi: 10.1093/GIGASCIENCE/GIAB008 . OpenUrl CrossRef [18]. ↵ W. Shen , B. Sipos , and L. Zhao , “ SeqKit2: A Swiss army knife for sequence and alignment processing ,” iMeta , vol. 3 , no. 3 , p. e191 , Jun. 2024 , doi: 10.1002/IMT2.191 . OpenUrl CrossRef [19]. ↵ J. M. Flynn et al. , “ RepeatModeler2 for automated genomic discovery of transposable element families ,” Proc Natl Acad Sci U S A , vol. 117 , no. 17 , pp. 9451 – 9457 , Apr. 2020 , doi: 10.1073/PNAS.1921046117/SUPPL_FILE/PNAS.1921046117.SAPP.PDF . OpenUrl Abstract / FREE Full Text [20]. ↵ “GitHub - Dfam-consortium/FamDB: FamDB file format library and utilities .” Accessed: Oct. 08, 2024 . [Online]. Available: https://github.com/Dfam-consortium/FamDB [21]. ↵ A. Smit , R. Hubley , and P. Green , “ RepeatMasker Open-4.0 .” Accessed: Oct. 09, 2024 . [Online]. Available: http://www.repeatmasker.org [22]. ↵ J. Keilwagen , F. Hartung , and J. Grau , “ GeMoMa: Homology-Based Gene Prediction Utilizing Intron Position Conservation and RNA-seq Data ,” Methods in Molecular Biology , vol. 1962 , pp. 161 – 177 , 2019 , doi: 10.1007/978-1-4939-9173-0_9 . OpenUrl CrossRef PubMed [23]. L. Gabriel , K. J. Hoff , T. Brůna , M. Borodovsky , and M. Stanke , “ TSEBRA: transcript selector for BRAKER ,” BMC Bioinformatics , vol. 22 , no. 1 , pp. 1 – 12 , Dec. 2021 , doi: 10.1186/S12859-021-04482-0/FIGURES/3 . OpenUrl CrossRef [24]. M. Stanke , O. Schöffmann , B. Morgenstern , and S. Waack , “ Gene prediction in eukaryotes with a generalized hidden Markov model that uses hints from external sources ,” BMC Bioinformatics , vol. 7 , no. 1 , pp. 1 – 11 , Feb. 2006 , doi: 10.1186/1471-2105-7-62/TABLES/2 . OpenUrl CrossRef PubMed Web of Science [25]. M. Stanke , M. Diekhans , R. Baertsch , and D. Haussler , “ Using native and syntenically mapped cDNA alignments to improve de novo gene finding ,” Bioinformatics , vol. 24 , no. 5 , pp. 637 – 644 , Mar. 2008 , doi: 10.1093/BIOINFORMATICS/BTN013 . OpenUrl CrossRef PubMed Web of Science [26]. H. Iwata and O. Gotoh , “ Benchmarking spliced alignment programs including Spaln2, an extended version of Spaln that incorporates additional species-specific features ,” Nucleic Acids Res , vol. 40 , no. 20 , Nov. 2012 , doi: 10.1093/NAR/GKS708 . OpenUrl CrossRef [27]. B. Buchfink , C. Xie , and D. H. Huson , “ Fast and sensitive protein alignment using DIAMOND ,” Nature Methods 2014 12:1, vol. 12 , no. 1 , pp. 59 – 60 , Nov. 2014 , doi: 10.1038/nmeth.3176 . OpenUrl CrossRef PubMed [28]. O. Gotoh , “ A space-efficient and accurate method for mapping and aligning cDNA sequences onto genomic sequence ,” Nucleic Acids Res , vol. 36 , no. 8 , p. 2630 , May 2008 , doi: 10.1093/NAR/GKN105 . OpenUrl CrossRef PubMed Web of Science [29]. A. Lomsadze , V. Ter-Hovhannisyan , Y. O. Chernoff , and M. Borodovsky , “ Gene identification in novel eukaryotic genomes by self-training algorithm ,” Nucleic Acids Res , vol. 33 , no. 20 , p. 6494 , 2005 , doi: 10.1093/NAR/GKI937 . OpenUrl CrossRef PubMed Web of Science [30]. T. Brůna , A. Lomsadze , and M. Borodovsky , “ GeneMark-EP+: eukaryotic gene prediction with self-training in the space of genes and proteins ,” NAR Genom Bioinform , vol. 2 , no. 2 , Jun. 2020 , doi: 10.1093/NARGAB/LQAA026 . OpenUrl CrossRef [31]. K. J. Hoff , A. Lomsadze , M. Borodovsky , and M. Stanke , “ Whole-Genome Annotation with BRAKER ,” Methods Mol Biol , vol. 1962 , pp. 65 – 95 , 2019 , doi: 10.1007/978-1-4939-9173-0_5 . OpenUrl CrossRef PubMed [32]. ↵ T. Brůna , K. J. Hoff , A. Lomsadze , M. Stanke , and M. Borodovsky , “ BRAKER2: automatic eukaryotic genome annotation with GeneMark-EP+ and AUGUSTUS supported by a protein database ,” NAR Genom Bioinform , vol. 3 , no. 1 , p. lqaa108 , Mar. 2021 , doi: 10.1093/NARGAB/LQAA108 . OpenUrl CrossRef [33]. K. J. Hoff , S. Lange , A. Lomsadze , M. Borodovsky , and M. Stanke , “ BRAKER1: Unsupervised RNA-Seq-Based Genome Annotation with GeneMark-ET and AUGUSTUS ,” Bioinformatics , vol. 32 , no. 5 , pp. 767 – 769 , Mar. 2016 , doi: 10.1093/BIOINFORMATICS/BTV661 . OpenUrl CrossRef PubMed [34]. ↵ D. Kuznetsov et al. , “ OrthoDB v11: annotation of orthologs in the widest sampling of organismal diversity ,” Nucleic Acids Res , vol. 51 , no. D1 , pp. D445 – D451 , Jan. 2023 , doi: 10.1093/NAR/GKAC998 . OpenUrl CrossRef PubMed [35]. ↵ J. Mistry et al. , “ Pfam: The protein families database in 2021 ,” Nucleic Acids Res , vol. 49 , no. D1 , pp. D412 – D419 , Jan. 2021 , doi: 10.1093/NAR/GKAA913 . OpenUrl CrossRef PubMed [36]. ↵ P. Jones et al. , “ InterProScan 5: genome-scale protein function classification ,” Bioinformatics , vol. 30 , no. 9 , pp. 1236 – 1240 , May 2014 , doi: 10.1093/BIOINFORMATICS/BTU031 . OpenUrl CrossRef PubMed Web of Science [37]. ↵ J. Dainat et al. , “ NBISweden/AGAT: AGAT-v1.4.1 ” , doi: 10.5281/ZENODO.13799920 . OpenUrl CrossRef [38]. ↵ Q. Wang , R. Han , H. Xing , and H. Li , “ A consensus genome of sika deer (Cervus nippon) and transcriptome analysis provided novel insights on the regulation mechanism of transcript factor in antler development ,” BMC Genomics , vol. 25 , no. 1 , pp. 1 – 13 , Dec. 2024 , doi: 10.1186/S12864-024-10522-9/FIGURES/4 . OpenUrl CrossRef PubMed [39]. ↵ D. M. Emms and S. Kelly , “ OrthoFinder: Phylogenetic orthology inference for comparative genomics ,” Genome Biol , vol. 20 , no. 1 , pp. 1 – 14 , Nov. 2019 , doi: 10.1186/S13059-019-1832-Y/FIGURES/5 . OpenUrl CrossRef [40]. ↵ R. C. Edgar , “ MUSCLE: A multiple sequence alignment method with reduced time and space complexity ,” BMC Bioinformatics , vol. 5 , no. 1 , pp. 1 – 19 , Aug. 2004 , doi: 10.1186/1471-2105-5-113/FIGURES/16 . OpenUrl CrossRef PubMed Web of Science [41]. ↵ J. Castresana , “ Selection of Conserved Blocks from Multiple Alignments for Their Use in Phylogenetic Analysis ,” Mol Biol Evol , vol. 17 , no. 4 , pp. 540 – 552 , Apr. 2000 , doi: 10.1093/OXFORDJOURNALS.MOLBEV.A026334 . OpenUrl CrossRef PubMed Web of Science [42]. ↵ L. T. Nguyen , H. A. Schmidt , A. Von Haeseler , and B. Q. Minh , “ IQ-TREE: A Fast and Effective Stochastic Algorithm for Estimating Maximum-Likelihood Phylogenies ,” Mol Biol Evol , vol. 32 , no. 1 , p. 268, Jan. 2015 , doi: 10.1093/MOLBEV/MSU300 . OpenUrl CrossRef [43]. ↵ S. Kalyaanamoorthy , B. Q. Minh , T. K. F. Wong , A. Von Haeseler , and L. S. Jermiin , “ ModelFinder: fast model selection for accurate phylogenetic estimates ,” Nature Methods 2017 14 : 6 , vol. 14, no. 6, pp. 587–589, May 2017 , doi: 10.1038/nmeth.4285 . OpenUrl CrossRef PubMed [44]. ↵ I. Letunic and P. Bork , “ Interactive Tree of Life (iTOL) v6: Recent updates to the phylogenetic tree display and annotation tool ,” Nucleic Acids Res , vol. 52 , no. W1 , pp. W78 –W82, Jul. 2024 , doi: 10.1093/NAR/GKAE268 . OpenUrl CrossRef [45]. ↵ C. Hahn , L. Bachmann , and B. Chevreux , “ Reconstructing mitochondrial genomes directly from genomic next-generation sequencing reads—a baiting and iterative mapping approach ,” Nucleic Acids Res , vol. 41 , no. 13 , p. e129 , Jul. 2013 , doi: 10.1093/NAR/GKT371 . OpenUrl CrossRef PubMed [46]. ↵ G. Meng , Y. Li , C. Yang , and S. Liu , “ MitoZ: a toolkit for animal mitochondrial genome assembly, annotation and visualization ,” Nucleic Acids Res , vol. 47 , no. 11 , pp. e63 – e63 , Jun. 2019 , doi: 10.1093/NAR/GKZ173 . OpenUrl CrossRef PubMed [47]. ↵ K. Tamura , G. Stecher , and S. Kumar , “ MEGA11: Molecular Evolutionary Genetics Analysis Version 11 ,” Mol Biol Evol , vol. 38 , no. 7 , pp. 3022 – 3027 , Jun. 2021 , doi: 10.1093/MOLBEV/MSAB120 . OpenUrl CrossRef PubMed [48]. ↵ A. Hassanin et al. , “ Pattern and timing of diversification of Cetartiodactyla (Mammalia, Laurasiatheria), as revealed by a comprehensive analysis of mitochondrial genomes ,” C R Biol , vol. 335 , no. 1 , pp. 32 – 50 , Dec. 2011 , doi: 10.1016/j.crvi.2011.11.002 . OpenUrl CrossRef PubMed [49]. ↵ N. S. Heckeberg , D. Erpenbeck , G. Wörheide , and G. E. Rössner , “ Systematic relationships of five newly sequenced cervid species ,” PeerJ , vol. 2016 , no. 4 , 2016 , doi: 10.7717/PEERJ.2307/SUPP-2 . OpenUrl CrossRef [50]. ↵ H. M. Liang et al. , “ Genetic Diversity and Population Structure in Captive Populations of Formosan Sambar Deer (Rusa unicolor swinhoei) ,” Animals 2023 , Vol. 13 , Page 3106, vol. 13, no. 19 , p. 3106 , Oct. 2023 , doi: 10.3390/ANI13193106 . OpenUrl CrossRef PubMed [51]. ↵ Q. Wang , R. Han , H. Xing , and H. Li , “ A consensus genome of sika deer (Cervus nippon) and transcriptome analysis provided novel insights on the regulation mechanism of transcript factor in antler development ,” BMC Genomics , vol. 25 , no. 1 , pp. 1 – 13 , Dec. 2024 , doi: 10.1186/S12864-024-10522-9/FIGURES/4 . OpenUrl CrossRef PubMed [52]. ↵ E. W. London , A. L. Roca , J. E. Novakofski , and N. E. Mateus-Pinilla , “ A De Novo Chromosome-Level Genome Assembly of the White-Tailed Deer, Odocoileus Virginianus ,” Journal of Heredity , vol. 113 , no. 4 , pp. 479 – 489 , Jul. 2022 , doi: 10.1093/JHERED/ESAC022 . OpenUrl CrossRef PubMed [53]. ↵ R. P. Baptista et al. , “ Assembly of highly repetitive genomes using short reads: the genome of discrete typing unit III Trypanosoma cruzi strain 231 ,” Microb Genom , vol. 4 , no. 4 , Apr. 2018 , doi: 10.1099/MGEN.0.000156 . OpenUrl CrossRef [54]. ↵ M. G. Ghazi et al. , “ Population genetics and evolutionary history of the endangered Eld’s deer (Rucervus eldii) with implications for planning species recovery ,” Sci Rep , vol. 11 , no. 1 , Dec. 2021 , doi: 10.1038/S41598-021-82183-7 . OpenUrl CrossRef [55]. ↵ R. K. Barnard , J. A. Smith , N. Yuan , F. Liu , and S. Sibte Hadi , “ An announcement of a new genome sequence available for Dama dama (fallow deer) ,” 2023 , doi: 10.1016/j.fsiae.2023.100074 . OpenUrl CrossRef [56]. ↵ S. Porrelli et al. , “ Draft genome of the lowland anoa ( Bubalus depressicornis ) and comparison with buffalo genome assemblies (Bovidae, Bubalina) ,” G3 Genes|Genomes|Genetics , vol. 12 , no. 11 , Nov. 2022 , doi: 10.1093/g3journal/jkac234 . OpenUrl CrossRef [57]. ↵ X. Xing et al. , “ The first high-quality reference genome of sika deer provides insights for high-tannin adaptation ,” Genomics Proteomics Bioinformatics , Jun. 2022 , doi: 10.1016/J.GPB.2022.05.008 . OpenUrl CrossRef [58]. ↵ H. Ba et al. , “ Chromosome-level genome assembly of Tarim red deer, Cervus elaphus yarkandensis ,” Scientific Data 2020 7 : 1 , vol. 7, no. 1, pp. 1–8, Jun. 2020 , doi: 10.1038/s41597-020-0537-0 . OpenUrl CrossRef [59]. ↵ L. Yi et al. , “ Whole-genome sequencing of wild Siberian musk deer (Moschus moschiferus) provides insights into its genetic features ,” BMC Genomics , vol. 21 , no. 1 , pp. 1 – 13 , Jan. 2020 , doi: 10.1186/S12864-020-6495-2/FIGURES/3 . OpenUrl CrossRef PubMed [60]. ↵ E. W. London , A. L. Roca , J. E. Novakofski , and N. E. Mateus-Pinilla , “ A De Novo Chromosome-Level Genome Assembly of the White-Tailed Deer, Odocoileus Virginianus ,” Journal of Heredity , vol. 113 , no. 4 , pp. 479 – 489 , Jul. 2022 , doi: 10.1093/jhered/esac022 . OpenUrl CrossRef PubMed [61]. ↵ Z. Li et al. , “ Draft genome of the reindeer (Rangifer tarandus) ,” Gigascience , vol. 6 , no. 12 , pp. 1 – 5 , Dec. 2017 , doi: 10.1093/GIGASCIENCE/GIX102 . OpenUrl CrossRef PubMed [62]. ↵ T. Barros et al. , “ The Multiple Origins of Roe Deer Populations in Western Iberia and Their Relevance for Conservation ,” Animals 2020 , Vol. 10, Page 2419, vol. 10 , no. 12 , p. 2419 , Dec. 2020 , doi: 10.3390/ANI10122419 . OpenUrl CrossRef PubMed [63]. ↵ J. Delgado-Acevedo , A. Zamorano , R. W. Deyoung , and T. A. Campbell , “ Genetic Population Structure of Wild Pigs in Southern Texas ,” 2021 , doi: 10.3390/ani11010168 . OpenUrl CrossRef [64]. ↵ T. S. K. Prasad et al. , “ Integrating transcriptomic and proteomic data for accurate assembly and annotation of genomes ,” Genome Res , vol. 27 , no. 1 , pp. 133 – 144 , Jan. 2017 , doi: 10.1101/GR.201368.115/-/DC1 . OpenUrl Abstract / FREE Full Text [65]. ↵ “ Wellcome Open Research | Open Access Publishing Platform .” Accessed: Dec. 17, 2024 . [Online]. Available: https://wellcomeopenresearch.org/articles/6-336 [66]. ↵ L. Tang , S. Dong , and X. Xing , “ Comparative Genomics Reveal Phylogenetic Relationship and Chromosomal Evolutionary Events of Eight Cervidae Species ,” Animals , vol. 14 , no. 7 , p. 1063, Apr. 2024 , doi: 10.3390/ANI14071063/S1 . OpenUrl CrossRef [67]. ↵ P. Mackiewicz et al. , “ Phylogeny and evolution of the genus Cervus (Cervidae, Mammalia) as revealed by complete mitochondrial genomes ,” Sci Rep , vol. 12 , no. 1 , p. 16381, Dec. 2022 , doi: 10.1038/S41598-022-20763-X . OpenUrl CrossRef [68]. ↵ C. Pitra , J. Fickel , E. Meijaard , and C. P. Groves , “ Evolution and phylogeny of old world deer ,” Mol Phylogenet Evol , vol. 33 , no. 3 , pp. 880 – 895 , Dec. 2004 , doi: 10.1016/J.YMPEV.2004.07.013 . OpenUrl CrossRef PubMed Web of Science [69]. ↵ L. R. Heaney , “ Biogeography of mammals in SE Asia: estimates of rates of colonization, extinction and speciation ,” Biological Journal of the Linnean Society , vol. 28 , no. 1–2 , pp. 127 – 165 , May 1986 , doi: 10.1111/J.1095-8312.1986.TB01752.X . OpenUrl CrossRef Web of Science [70]. ↵ P. A. Hosner , L. A. Sánchez-González , A. Townsend Peterson , and R. G. Moyle , “ Climate-driven diversification and pleistocene refugia in philippine birds: Evidence from phylogeographic structure and paleoenvironmental niche modeling ,” Evolution (N Y ), vol. 68 , no. 9 , pp. 2658 – 2674 , 2014 , doi: 10.1111/EVO.12459 . OpenUrl CrossRef [71]. ↵ S. L. Smith , R. F. Carden , B. Coad , T. Birkitt , and J. M. Pemberton , “ A survey of the hybridisation status of Cervus deer species on the island of Ireland ,” Conservation Genetics , vol. 15 , no. 4 , pp. 823 – 835 , Mar. 2014 , doi: 10.1007/S10592-014-0582-3/FIGURES/4 . OpenUrl CrossRef [72]. ↵ J. Queirós , C. Gortázar , and P. C. Alves , “ Deciphering Anthropogenic Effects on the Genetic Background of the Red Deer in the Iberian Peninsula ,” Front Ecol Evol , vol. 8 , p. 515401, May 2020 , doi: 10.3389/FEVO.2020.00147/BIBTEX . OpenUrl CrossRef [73]. ↵ E. Hill et al. , “ Hybridisation rates, population structure, and dispersal of sambar deer (Cervus unicolor) and rusa deer (Cervus timorensis) in south-eastern Australia ,” Wildlife Research , vol. 50 , no. 9 , pp. 669 – 687 , Jul. 2023 , doi: 10.1071/WR22129 . OpenUrl CrossRef [74]. ↵ P. Grubb and C. P. Groves , “ Notes on the taxonomy of the deer (Mammalia, Cervidae) of the Philippines ,” Zool Anz , vol. 210 , no. 1/2 , pp. 119 – 144 , 1983 . OpenUrl [75]. ↵ P. Brandies , E. Peel , C. J. Hogg , and K. Belov , “ The Value of Reference Genomes in the Conservation of Threatened Species ,” Genes 2019 , Vol. 10, Page 846, vol. 10 , no. 11 , p. 846 , Oct. 2019 , doi: 10.3390/GENES10110846 . OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted February 06, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Draft Genome of the Endangered Visayan Spotted Deer (Rusa alfredi), a Philippine Endemic Species Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Draft Genome of the Endangered Visayan Spotted Deer ( Rusa alfredi) , a Philippine Endemic Species Ma. Carmel F. Javier , Albert C. Noblezada , Persie Mark Q. Sienes , Robert S. Guino-o , Nadia Palomar-Abesamis , Maria Celia D. Malay , Carmelo S. del Castillo , Victor Marco Emmanuel N. Ferriols bioRxiv 2025.02.05.636739; doi: https://doi.org/10.1101/2025.02.05.636739 Share This Article: Copy Citation Tools Draft Genome of the Endangered Visayan Spotted Deer ( Rusa alfredi) , a Philippine Endemic Species Ma. Carmel F. Javier , Albert C. Noblezada , Persie Mark Q. Sienes , Robert S. Guino-o , Nadia Palomar-Abesamis , Maria Celia D. Malay , Carmelo S. del Castillo , Victor Marco Emmanuel N. Ferriols bioRxiv 2025.02.05.636739; doi: https://doi.org/10.1101/2025.02.05.636739 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genomics Subject Areas All Articles Animal Behavior and Cognition (7642) Biochemistry (17715) Bioengineering (13907) Bioinformatics (42003) Biophysics (21470) Cancer Biology (18624) Cell Biology (25533) Clinical Trials (138) Developmental Biology (13390) Ecology (19935) Epidemiology (2067) Evolutionary Biology (24356) Genetics (15617) Genomics (22529) Immunology (17753) Microbiology (40432) Molecular Biology (17200) Neuroscience (88681) Paleontology (667) Pathology (2840) Pharmacology and Toxicology (4828) Physiology (7653) Plant Biology (15161) Scientific Communication and Education (2046) Synthetic Biology (4304) Systems Biology (9826) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00