Novel Endogenous Retrovirus in the Slow Loris

preprint OA: closed CC-BY-4.0
📄 Open PDF Full text JSON View at publisher
Full text 46,439 characters · extracted from preprint-html · click to expand
Novel Endogenous Retrovirus in the Slow Loris | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Novel Endogenous Retrovirus in the Slow Loris View ORCID Profile Charles Michie , View ORCID Profile Hayley Beth Free , View ORCID Profile Vincent Nijman , View ORCID Profile Ravinder Kanda doi: https://doi.org/10.1101/2025.01.02.631053 Charles Michie 1 School of Biological and Medical Sciences, Oxford Brookes University Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Charles Michie For correspondence: 19286910{at}brookes.ac.uk Hayley Beth Free 1 School of Biological and Medical Sciences, Oxford Brookes University Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Hayley Beth Free Vincent Nijman 2 Nocturnal Primate Research Group, School of Law and Social Sciences, Oxford Brookes University 3 Centre for Functional Genomics, Department of Health and Life Sciences, Oxford Brookes University Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Vincent Nijman Ravinder Kanda 1 School of Biological and Medical Sciences, Oxford Brookes University 3 Centre for Functional Genomics, Department of Health and Life Sciences, Oxford Brookes University Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ravinder Kanda Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Endogenous retroviruses (ERVs) are the result of an exogenous infectious retrovirus becoming integrated within the host genome through infection of germline cells. The majority of ERV research has been conducted on humans and other great apes and research of them within other primates can provide unique insights. Screening the genome of two endangered slow lorises Nycticebus bengalensis and N. coucang with a previously identified LTR, a novel ERV was identified within their genomes with varying levels of completeness and numerous solo LTRs. Phylogenetic analysis of the genes of this ERV indicates that it is a betaretrovirus most closely related to HERV-K. Despite being closely related to HERV-K, the ERV does not appear to be within humans and is only found in Asian lorises and absent in all other primates. Due to the staggering similarity of viral insertions of this ERV between the two species of slow lorises, indicates that the current N. coucang genome may actually be a hybrid of the two species or a N. bengalensis from a different population as the reference genome. This study highlights that studying ERVs in more distant primates provides information about the genomes and evolution of these species as well as viral evolution. 1 Introduction Endogenous retroviruses (ERVs) are genomic remnants of ancient exogenous retroviral infections, that became endogenized into vertebrate genomes through infection of germline cells, allowing their inheritance in a mendelian fashion ( Weiss, 2006 ). While most ERVs do not become fixed in a species population, those that do can constitute a significant proportion of the host genome, where in humans approximately 8% of our genome are ERVs ( Lander et al., 2001 ). ERVs were long considered to be “junk DNA”, non-functional elements occupying space in the genome, but further study has demonstrated that they can have large impacts; contributing to novel genes, acting as promoters, disrupting genes and having links to various diseases ( Chen et al., 2024 ; Katzourakis et al., 2005 ; Xue et al., 2020 ). The structure of ERVs typically consists of two long terminal repeats (LTRs) with four major genes between: gag, pro, pol and env ( Coffin, 1992 ). By studying these ERVs we can gain information about the retroviruses which infected past populations, and the impact of these insertions on the genome evolution of the host species. The study of ERVs in primates genomes has mainly focused on humans and the other great apes. There has been a particular focus on human endogenous retroviruses (HERVs) and although all HERV families have been identified in chimp genomes, some HERV loci are unique to humans, suggesting integration of these particular insertions relatively recently ( Li et al., 2022 ). The study of ERVs within humans and great apes genomes has been beneficial in investigating host-virus interactions, as well as how ERVs have contributed to their genomics and disease ( Grandi et al., 2019 ; Grandi & Tramontano, 2017 ). Comparatively very little study of ERVs has been accomplished in the strepsirrhines, with what studies that have been undertaken within strepsirrhines largely focused on one superfamily, the lemurs (Lemuroidea) with very few little on lorises and bushbabies (Lorisoidea). The Lorisoidea are small nocturnal primates found in Africa (Galagidae) and Asia (Lorisidae). The Asian lorises comprise three genera, i.e., slow lorises ( Nycticebus ), pygmy lorises ( Xanthonycticebus ) and slender lorises ( Loris ) ( Nekaris, 2014 ; Nekaris & Nijman, 2022 ; Nekaris & Starr, 2015 ). All lorises species are on the IUCN Red List (status varying from Vulnerable to Critically Endangered), largely due to deforestation and trade for pets or medicinal purposes ( Moore et al., 2014 ; Nekaris & Burrows, 2020 ). To date there has been very little study of ERVs within lorises, with only two studies describing ERVs within the slow loris ( Gifford et al., 2005 ; Li et al., 2022 ). Studies of retroviruses that infected and evolved with lorises may provide an opportunity to understand the basal traits of primate retroviruses. By screening within smaller primates, we can examine if patterns seen in larger primates which have been extensively studied such as number of loci, families are the same in smaller primates and possibly if there are unique adaptations within lorises due to retroviral insertions. Here, we use an LTR (Nycticebus_bengalensis_LG02 100652008 – 100652802 Unknown_HERV_3LTR) identified in Li et al., (2022) from an “Unknown HERV” to scan for new viruses within two species, the Bengal slow loris Nycticebus bengalensis and the greater slow loris N. coucang . Although they report a single retroviral insertion it did not have all four major genes. We are interested in understanding the evolutionary dynamics of ERVs in Nycticebus genomes. By comparing viral insertions in these closely related species it is possible to study how the ERV has changed within the genome since species split and if there have been any genomic events. 2 Methods 2.1 Genome screening To investigate the number and distribution of loci of the novel ERV identified by Li et al., (2022) , the 715 bp sequence “Nycticebus_bengalensis_LG02 100652008 – 100652802 Unknown_HERV_3LTR” (Supplementary Figure 1) was extracted from the N. bengalensis reference genome and used as a query in BLASTn (default parameters) to perform in silico screening of the reference genomes of Nycticebus bengalensis (GCA_023898255.1) and N. coucang (GCA_027406575.1). The LTR BLASTn hits were used to identify complete and incomplete full-length ERVs, truncated viral elements, and solo LTRs. Any BLASTn hit that was over 400 bp long and had an e-value of less than 1×10 -5 was considered a real LTR and analysed further. The LTR BLASTn hits which were longer than 400 bp but were incomplete (<715 bp), we calculated the expected location of the start and end of the LTR based on difference between query and subject length in the genome and extracted these. 2.2 Identification of Full-length ERVs Potential full-length ERVs were determined by identifying LTRs that were within 20kb bases of each other, on the same chromosome, and in the same orientation. These pairs of LTRs were extracted and aligned manually in Geneious Prime v2024.0.3 ( https://www.geneious.com/ ). To identify the internal genes of this ERV, we focused on those pairs of LTRs that had high sequence similarity (>94%) and were therefore more likely to represent recent insertions with intact internal genes (LTRs of a full-length insertion are identical at the time of integration and become more divergent with time as they accumulate mutations). For the pairs of LTRs that satisfied these criteria, the internal region between the LTRs was extracted, and open reading frames were identified using the NCBI ORFfinder ( https://www.ncbi.nlm.nih.gov/orffinder/ ). A BLASTp search against the non-redundant protein sequences database (restricted to Viruses – taxid: 10239) was used to identify the gag, pro, pol, and env genes. Sequences which had all four core genes (gag, pro, pol, env) and did not have large insertions (over 2000 bases) were aligned in Geneious (MUSCLE v5.1; Edgar, 2004 ). As none of the insertions identified had the four complete internal genes, we created a consensus sequence of each gene from the full length elements we identified, to enable comparison to other ERVs. A number of insertions were missing one or more of the core genes – these we term incomplete full-length ERVs. Conserved domains were identified with the conserved domain database ( Wang et al., 2023 ). 2.3 Phylogenetic analysis of the full-length ERV The phylogenetic history of the constructed consensus full-length ERV was determined by alignment of conserved domains within each gene to increase strength of alignment. Separate phylogenetic trees were made for each gene. Those used were the p24 N region of gag , RVP of pro , RT of pol and TM of env translated amino acids with representative retroviruses from several retroviral families (Supplementary Table 1). The retroviruses compared against were biassed towards Betaretroviruses as the best BLAST hits were Betaretroviruses. Epsilonretroviruses were excluded from all analyses, and Spumaretroviruses were excluded in env analysis as they resulted in poor alignments. Only Alpharetroviruses, Betaretroviruses and Deltaretroviruses have a p24 conserved domain in gag therefore only these were included in the gag phylogeny. The position of the p24, RVP and RT was determined by the conserved domain database ( Wang et al., 2023 ), whereas the TM was determined by the start of the cleavage site (R-X-R/K-R) until the end of the env ( Bénit et al., 2001 ). All regions were aligned in Geneious (MUSCLE). ModelFinder ( Kalyaanamoorthy et al., 2017 ) on IQTree v2.3.5 (Minh et al., 2020) was used to determine the best model of protein evolution ( gag – LG+I+G4, pro and pol – rtREV+G4, env – LG+G4). Maximum likelihood trees were constructed with PhyML v3.3. 20180621 ( Guindon et al., 2010 ) with 1000 bootstraps and Bayesian trees were constructed with MrBayes v3.26 ( Huelsenbeck & Ronquist, 2001 ) with a burn-in length of 100,000 and 1,000,000 Markov chain Monte Carlo (MCMC) steps with four heated chains and heated chain temperature of 0.2 with a sampling frequencing of 200 trees. Final effective sample sizes of 1812, 1435, 1319, 1440 for gag, pro pol and env analyses respectively indicated convergence of the Bayesian MCMC analysis. Trees were visualised with TreeViewer v2.2 ( Bianchini & Sánchez-Baracaldo, 2024 ). 2.4 LTR insertion categorisation For the remaining LTRs, an additional 1kb of flanking sequence from the 5’ and 3’ end of each insertion was extracted to identify whether those loci represent true solo LTRs, or 5’/3’ truncated FL elements ( Figure 1 ). The consensus gag and env gene were used to BLASTn these extended LTRs. In rare cases there were triplets, where an LTR was associated with two viral elements. Download figure Open in new tab Figure 1. Graphical representation of categories of novel retroviral insertions identified within the genomes of Nycticebus coucang and Nycticebus bengalensis . 2.5 Similarity of the novel full-length ERV in Nycticebus coucang reference genome The full length ERV loci identified in either N. bengalensis or N. coucang were compared to determine if these insertions predated the divergence of these species (i.e. present in the same state in both species), or detect insertional polymorphism (i.e. FL in one species, but solo LTR/preinsertion site in the other species) which would indicate relatively recent activity of this ERV (i.e after these two species had diverged). To determine if the viral insertion was the same between the two species, the target site duplications (TSDs) and flanking region of each full-length pair and truncated LTR found within the N. bengalensis was compared against those found in the N. coucang . If the TSDs between species were identical and the insertion was on the same chromosome it was determined to be the same viral insertion. The order of chromosomes was different between the reference genomes, therefore segments of each chromosome from each genome was compared against the opposing genome using BLASTn to match the chromosomes together (Supplementary Table 2). As there was no Y chromosome available for N. bengalensis this was excluded. The similarity between all insertions (excluding solo LTRs) was determined through alignment in Geneious (MUSCLE) and the percentage of sites that were identical was found. The similarity was determined between the whole insertion and between the 5’ and 3’ LTRs in the genomes of the two species. The number of pairwise differences was also calculated. 2.6 Estimation of age of insertions The age of insertion was estimated from the amount of divergence between LTRs. This was only calculated for full length ERVs in N. coucang as the N. bengalensis genome has regions of lower quality. Time was calculated as T = D/(μ * L), where D is the number of differences between the two LTRs, μ is the mutation rate, and L is the length of the LTR. 1.72×10 -9 (per bp per year) is used as the mutation rate from Lemurs ( Campbell et al., 2021 ). Clear large indels were ignored and removed from the alignment when calculating differences between the LTRs as they cannot be accounted for by the neutral evolutionary mutation rate. 2.7 Identification of the novel full-length ERV in other primates To identify what other primate genomes the full-length ERV is present in, the consensus env gene and 3’ LTR (2672bp) were screened with BLASTn against the reference genomes of eight other strepsirrhine with varying amounts of divergence from N. bengalensis as representatives of other clades and the reference human genome ( Xanthonycticebus pygmaeus - GWHBCHX00000000; Loris tardigradus - GCA_023783135; L. lydekkerianus - GCA_963574355; Perodicticus potto - GCA_963574655; Otolemur garnettii - GCA_000181295; Lemur catta - GCA_020740605; Homo sapiens - GCF_000001405). If there were any BLAST hits which had higher than 80% percent identity and 75% query coverage (2004bp) then it was determined that the ERV was also present in that species. 3 Results 3.1 Identification of a full-length ERV in Nycticebus bengalensis Screening the N. bengalensis genome with the LTR from ( Li et al., 2022 ) there were 1498 hits over 400bp long, of which 184 were within 20kp of each other creating 96 pairs as some LTRs were involved into multiple pairs. After removal of pairs which contained an LTR that did not have a clear start or end motif or contained indels there were 84 pairs of LTRs in total (Supplementary Table 3). Of these 84 pairs, 27 had a pairwise identity above 94%. After finding open reading frames and BLASTp of these, there were 16 sequences which contained all four genes, 14 of which did not have large insertions and were used to construct a consensus full-length ERV which is 8923bp long (NCBI Accession numbers to be provided). This consensus full-length ERV was mostly complete, where only six indels or bases changes were needed to create full length genes. Each gene was in a different reading frame. 3.1.1 Conserved domains in the full-length ERV The full-length ERV contained many of the typical conserved domains for retroviruses. The gag gene contained a myristylation signal at the beginning, followed by a truncated p10 gag , an N and C terminal p24 gag , and a single zinc finger. In the pro gene there was a dUTPase at the beginning typical of betaretroviruses ( Hizi & Herzig, 2015 ), which is followed the protease which contains the catalytic motif of DSG which is more typical of alpharetroviruses, whereas DTG is common in betaretroviruses ( Chameettachal et al., 2023 ; Konvalinka et al., 2015 ). The pol begins with the reverse transcriptase (RT) which has a catalytic site of YMDD. There is only one heptad repeat in the env . 3.2 Phylogenetic analysis Both bayesian and maximum likelihood analysis of all four major genes indicated that the novel ERV is a betaretroviruses, most closely related to HERV-K ( Figure 2 ; Supplementary Figure 2). Following similar nomenclature, this virus has provisionally been named Loris endogenous retrovirus 1 (LERV1). Notably, LERV1 is distinct from the one other ERV identified in the Nycticebus genus, RV Slow Loris ( Gifford et al., 2005 ). Download figure Open in new tab Figure 2. Phylogeny of LERV1 (in red) inferred from Bayesian (MrBayes) analysis with posterior probability shown. Scale bar represents substitutions per site. Trees have been rooted at midpoint. Maximum likelihood inference (PhyML) can be found in Supplementary Figure 2. 3.3 LTR identification and categorization in the two Nycticebus genomes In N. coucang there were a total of 1426 solo LTRs and 45 complete full length elements, whilst in N. bengalensis there were 1305 solo LTRs and 33 complete full length elements. In both species the most viral elements were on Chromosomes 1 and X ( Figure 3 ). No significant difference was found between the number of insertions and the distribution across the genome between species (ANOVA: p-value = 0.71). There was no bias towards orientation in either species (Chi-Squared Test: N. bengalensis p-value = 0.17, N. coucang p-value = 0.66). An insertion with three LTRs, referred to as triplets, represent full-length insertions where there has been a duplication of an LTR and internal genes resulting in an insertion with three LTRs. These were more common within N. bengalensis where there has been multiple duplications of the entire insertion whilst in N. coucang there is a single copy of the insertion ( Figure 4A ). The disparity between number of LTR hits and number of LTRs involved in pairs can be partly explained by difference in chromosome length and likely being missed in assembly (Supplementary Table 2), whilst others can be explained by a genomic event within one genome (Supplementary Table 3). For example, within the N. bengalensis a complete full-length pair ( N. coucang X:124248563-124259123) has undergone homologous recombination and is now a solo LTR ( N. bengalensis X:122693788-122696469) as evident by identical TSDs and flanking regions ( Figure 4B ). In both species there is evidence of inversions on chromosomes where a full-length element is in different orientations in the two genomes. 10 full-length insertions were unique to the N. coucang genome; however, we cannot be confident that this represents LERV1 activity after the divergence of these two species, as the pre-integration site was absent from the N. bengalensis genome implying that the whole region may be missing from the assembly. Download figure Open in new tab Figure 3. Number of LERV1 LTR insertions according to insertion category in N. bengalensis and N. coucang reference genomes across every chromosome. Download figure Open in new tab Figure 4. Graphical representation of examples of genomic events of LERV1 between N. bengalensis and N. coucang . (A) represents where there has been a segmental duplication creating a triplet LTR with two internal sequences. (B) represents an example of homologous recombination, where in N. bengalensis the ERV underwent homologous recombination but did not in N. coucang . Letters at start and end of LTR are the TSDs. 3.4 Dating of LERV1 insertions As the LTRs are identical at the time of insertion, the divergence between the 5’ and 3’ LTR can be used to estimate the age of the insertion. Based on the divergence of the LTRs in complete full length insertions, the youngest LERV1 insertion is approximately 1.7 million years old and oldest may be ∼63 million years old ( Table 1 ; Figure 5 ). View this table: View inline View popup Table 1: LTR similarity of complete full-length insertions within N. bengalensis and N. coucang within a genome and between genomes in 5’ and 3’ LTR. Sorted here by estimated age based on N. coucang . Estimated age of insertion based on N. coucang except for ERV marked with * as they were solo LTRs in N. coucang . Multiple insertions could not be found in N. bengalensis and have been marked with ‘Region not found’ as searching for the flanking region of the ERV revealed no results. Download figure Open in new tab Figure 5. Range of estimated age of viral insertions of LERV1 based on 5’ and 3’ LTR divergences. Insertions at the same locus in N. bengalensis and N. coucang would indicate the insertion occurring prior to the divergence of these two species. There was high sequence similarity between full-length insertions, and the truncated LTRs (Supplementary Table 3), with the majority of these shared insertions indicating >99% homology, and a few insertions were 100% identical in the two species. Given that these species are thought to have diverged approximately 5 MYA ( Blair et al., 2023 ), this was surprising. For loci that inserted before the divergence of these two species we would expect the 5’ and 3’ LTRs of those full length elements to be more similar within a species than between. Focusing on the complete full length elements that were present in both species we found the differences between 5’ and 3’ LTRs between species was less than that within a species ( Table 1 ). Due to the surprisingly high similarity between insertions and LTRs between the N. bengalensis and N. coucang genomes, we compared mtDNA to confirm the classification of these species. A number of mtDNA genes are available on Genbank for these two species ( Blair et al., 2023 ; Pozzi et al., 2015 ). Cytochrome B (cytb), NADH dehydrogenase subunit 4 (ND4) and cytochrome c oxidase subunit I (COX1) sequences of N. bengalensis, N. coucang, and N. javanicus (Supplementary Table 4) were downloaded from Genbank .We extracted the same genes from the reference genomes for N. bengalensis and N. coucang and constructed a Bayesian phylogeny to confirm the species identification. The mitochondrial sequences from both the reference genome and reference mitochondria (NC_002765) of N. coucang are placed within the N. bengalensis with strong support across all genes. ( Figure 6 ). Download figure Open in new tab Figure 6. Phylogenetic tree of three mitochondrial genes Cytb, ND4 and COXI of slow lorises inferred from Bayesian (MrBayes) analysis with posterior probability shown. Scale bar represents substitutions per site. In Cytb triangle represents from Blair et al., (2023) and square represents from Pozzi et al., (2015) . Tips in yellow represent sequences which come from individuals which have previously been identified as N. coucang . 3.5 Identification of novel full-length ERV in other primates BLASTn searches of the consensus 3’ LTR and env identified the novel full-length ERV in three other species reference genomes; X. pygmaeus, L. tardigradus and L. lydekkerianus . No hits with substantial or any query coverage or percentage identity were found in P. potto, O. garnetti, L. catta or H. sapiens . 4 Discussion A substantial number of ERVs have been identified in great apes and other Haplorhini, while comparatively fewer studies have focused on Strepsirrhini, particularly lorises. This study represents the first detailed examination of ERVs specifically in slow lorises within the genus Nycticebus , and the first to identify all major retroviral genes. Using the LTR of an incomplete ( gag, env missing) “Unknown HERV” insertion reported in a previous study ( Li et al., 2022 ), we identified 47 full length viral insertions ( Table 1 ) which had remnants of all genes across two genomes of Nycticebus , however none of the genes were intact, suggesting that this is not a recent genome invasion of this family of retrovirus Over 100 insertions were incomplete copies (missing internal genes, 5’ or 3’ truncated, see Supplementary Table 3). The BLAST results indicate numerous solo LTRs (∼1400) present in the Nycticebus genome, which is similar to observations of other primate ERVs, where the solo LTRs are ten times more numerous than the full length insertions (Belshaw et al., 2007). Phylogenetic analysis reveals this virus is a betaretrovirus, most closely related and most similar to HERV-K across all genes ( Figure 2 ). Despite being most similar to HERV-K and the LTR being previously labelled as being from an “Unknown HERV” this virus does not appear to be a HERV and is unique to Asian lorises. This relationship may warrant further investigation as throughout the phylogenetic tree retroviruses appear to approximately cluster with retroviruses from similar or closely related species, but LERV1 is closest to HERV-K. The novel retrovirus identified here is distinct from the RV Slow Loris ( Gifford et al., 2005 ) previously identified within the Nycticebus genus. The HERV-K betaretrovirus is one of the most well studied family of retroviruses as it is the most recently integrated endogenous retrovirus in the human genome, with some loci being insertionally polymorphic ( Hohn et al., 2013 ). HERV-K has had a significant impact in the human genome, with certain loci implicated in addiction ( Karamitros et al., 2018 ), in addition to the numerous links between HERV-K and diseases such as various cancers ( Dervan et al., 2021 ; Rivas et al., 2022 ), and neurodegenerative disorders ( Adler et al., 2024 ). With comparable numbers of full length proviruses and solo LTRs, it is likely that LERV1 has had a similar impact in the genome evolution of the subfamily Lorisinae. Estimates of the age of the HERV-K family are also similar to our age estimates of LERV1 calculated here, ranging from 1.7 MYA to 63 MYA ( Table 1 ), providing further support for the idea that LERV1 has had a significant impact on genome evolution in these species. However, the genomes of the smaller primates are not as well annotated as the larger primate genomes to investigate this currently. The virus could be identified exclusively in the subfamily Lorisinae suggesting that the oldest insertion should have occurred approximately 30Mya in the common ancestor of the subfamily before the divergence into the genera Loris, Nycticebus and Xanthonycticebus ( Pozzi et al., 2015 ); our age estimates for some loci exceed this date, which could indicate that gene conversion has occurred ( Jedlicka et al., 2020 ), in which case these estimates are unlikely to be accurate. The subfamily Lorisinae is found exclusively in Asia whilst its sister subfamily Perodicticinae is in Africa, indicating that the initial LERV1 integration likely happened somewhere within Asia (specifically India) after the invasion of the common ancestor of Lorisinae colonized Asia ( Ali & Aitchison, 2008 ). We observed several instances of insertional polymorphism of LERV1 loci in the N. bengalensis and N. coucang genome ( Table 1 , Supplementary Table 3). Given the divergence times of these two species are estimated to be between 4 and 8 MYA ( Blair et al., 2023 ; Pozzi et al., 2015 ), this would suggest that these particular loci inserted in the common ancestor at around this time. However, our calculations of insertion times for three loci which exhibit insertional polymorphism through homologous recombination events based on the LTR divergence of the full length provirus, range from 1.8 MYA to 45 MYA. For the youngest of these (X:124248563-124259123 in N. coucang ), the full-length insertion in N. coucang had LTRs that are 99.7% similar, and the solo LTR within N. bengalensis is 99.24% to 99.54% similar to the 5’ and 3’ LTRs in N. coucang respectively; looking at the remaining proviruses, in the large majority of loci, comparison of the LTRs of each locus indicated a higher degree of similarity between the 5’ and 3’ LTRs of the two species, as opposed to within the species ( Table 1 ). This was unexpected given the divergence time of N. bengalensis and N. coucang , and the range of proviral integration times. Due to this very high level of similarity a brief examination of the mitochondrial genes from the N. bengalensis and N. coucang reference genomes was undertaken to determine if they are different species. Our results show that the current reference genome of N. coucang is not a pure N. coucang , and is either a hybrid whose mother was a N. bengalensis , or a N. bengalensis from a different population than the reference genome of N. bengalensis ( Figure 6 ). We also show that the reference mitochondrial N. coucang (NC_002765) is also unlikely to have come from N. coucang . Examination of nuclear DNA will be required to establish the true nature of these genomes and whether they are hybrids. As the N. coucang reference genome comes from an individual born in San Diego Zoo in 1989 ( https://www.ncbi.nlm.nih.gov/biosample/SAMN28408555/ ) it is possible that the parents may have been misidentified (where one or both were N. bengalensis ), as in the past it has been challenging to differentiate between the two species, and there have been species reclassifications ( Nekaris, 2014 ; Nekaris & Starr, 2015 ; Pozzi et al., 2015 ). Regardless, the similarity of viral elements between species indicates a more recent common ancestor of the N. bengalensis and N. coucang reference genomes compared to the expected ∼4 to 8 MYA species divergence. The discovery of this retrovirus within the slow lorises provides insights into both the genome evolution of these species and viruses themselves. Through the examination of this virus, we have found multiple genomic events including homologous recombination, duplication and inversions. Further examination could be carried out to discover if any solo LTRs are close to genes and therefore may be acting as a promoter or enhancer (e.g. Pi et al., 2004 ). Examination of this virus allowed us to determine that it is likely that the currently published reference genome of N. coucang is incorrect and may be a hybrid with N. bengalensis , or a N. bengalensis from a different population. Both N. bengalensis and N. coucang are endangered (Nekaris, Al-Razi, et al., 2020; Nekaris, Poindexter, et al., 2020), and therefore it is crucial to correctly determine the classification of these species and genomes. 5 References ↵ Adler , G. L. , Le , K. , Fu , Y. , & Kim , W. S. ( 2024 ). Human Endogenous Retroviruses in Neurodegenerative Diseases . Genes , 15 ( 6 ), Article 6. doi: 10.3390/genes15060745 OpenUrl CrossRef ↵ Ali , J. R. , & Aitchison , J. C. ( 2008 ). Gondwana to Asia: Plate tectonics, paleogeography and the biological connectivity of the Indian sub-continent from the Middle Jurassic through latest Eocene (166–35 Ma) . Earth-Science Reviews , 88 ( 3 ), 145 – 166 . doi: 10.1016/j.earscirev.2008.01.007 OpenUrl CrossRef GeoRef ↵ Bénit , L. , Dessen , P. , & Heidmann , T. ( 2001 ). Identification, phylogeny, and evolution of retroviral elements based on their envelope genes . Journal of Virology , 75 ( 23 ), 11709 – 11719 . doi: 10.1128/JVI.75.23.11709-11719.2001 OpenUrl Abstract / FREE Full Text ↵ Bianchini , G. , & Sánchez-Baracaldo , P. ( 2024 ). TreeViewer: Flexible, modular software to visualise and manipulate phylogenetic trees . Ecology and Evolution , 14 ( 2 ), e10873 . doi: 10.1002/ece3.10873 OpenUrl CrossRef PubMed ↵ Blair , M. E. , Cao , G. T. H. , López-Nandam , E. H. , Veronese-Paniagua , D. A. , Birchette , M. G. , Kenyon , M. , Md-Zain , B. M. , Munds , R. A. , Nekaris , K. A.-I. , Nijman , V. , Roos , C. , Thach , H. M. , Sterling , E. J. , & Le , M. D. ( 2023 ). Molecular Phylogenetic Relationships and Unveiling Novel Genetic Diversity among Slow and Pygmy Lorises, including Resurrection of Xanthonycticebus intermedius . Genes , 14 ( 3 ), Article 3. doi: 10.3390/genes14030643 OpenUrl CrossRef ↵ Campbell , C. R. , Tiley , G. P. , Poelstra , J. W. , Hunnicutt , K. E. , Larsen , P. A. , Lee , H.-J. , Thorne , J. L. , dos Reis , M. , & Yoder , A. D. ( 2021 ). Pedigree-based and phylogenetic methods support surprising patterns of mutation rate and spectrum in the gray mouse lemur . Heredity , 127 ( 2 ), 233 – 244 . doi: 10.1038/s41437-021-00446-5 OpenUrl CrossRef PubMed ↵ Chameettachal , A. , Mustafa , F. , & Rizvi , T. A. ( 2023 ). Understanding Retroviral Life Cycle and its Genomic RNA Packaging . Journal of Molecular Biology , 435 ( 3 ), 167924 . doi: 10.1016/j.jmb.2022.167924 OpenUrl CrossRef PubMed ↵ Chen , M. , Huang , X. , Wang , C. , Wang , S. , Jia , L. , & Li , L. ( 2024 ). Endogenous retroviral solo-LTRs in human genome . Frontiers in Genetics , 15 . doi: 10.3389/fgene.2024.1358078 OpenUrl CrossRef ↵ J. A. Levy Coffin , J. M. ( 1992 ). Structure and Classification of Retroviruses . In J. A. Levy (Ed.), The Retroviridae (pp. 19 – 49 ). Springer US . doi: 10.1007/978-1-4615-3372-6_2 OpenUrl CrossRef ↵ Dervan , E. , Bhattacharyya , D. D. , McAuliffe , J. D. , Khan , F. H. , & Glynn , S. A. ( 2021 ). Ancient Adversary – HERV-K (HML-2) in Cancer . Frontiers in Oncology , 11 . doi: 10.3389/fonc.2021.658489 OpenUrl CrossRef ↵ Edgar , R. C. ( 2004 ). MUSCLE: Multiple sequence alignment with high accuracy and high throughput . Nucleic Acids Research , 32 ( 5 ), 1792 – 1797 . doi: 10.1093/nar/gkh340 OpenUrl CrossRef PubMed Web of Science ↵ Gifford , R. , Kabat , P. , Martin , J. , Lynch , C. , & Tristem , M. ( 2005 ). Evolution and Distribution of Class II-Related Endogenous Retroviruses . Journal of Virology , 79 ( 10 ), 6478 – 6486 . doi: 10.1128/jvi.79.10.6478-6486.2005 OpenUrl Abstract / FREE Full Text ↵ Grandi , N. , Pisano , M. P. , & Tramontano , E. ( 2019 ). The Emerging Field of Human Endogenous Retroviruses: Understanding Their Physiological Role and Contribution to Diseases . Future Virology , 14 ( 7 ), 441 – 444 . doi: 10.2217/fvl-2019-0061 OpenUrl CrossRef ↵ Grandi , N. , & Tramontano , E. ( 2017 ). Type W Human Endogenous Retrovirus (HERV-W) Integrations and Their Mobilization by L1 Machinery: Contribution to the Human Transcriptome and Impact on the Host Physiopathology . Viruses , 9 ( 7 ), Article 7. doi: 10.3390/v9070162 OpenUrl CrossRef PubMed ↵ Guindon , S. , Dufayard , J.-F. , Lefort , V. , Anisimova , M. , Hordijk , W. , & Gascuel , O. ( 2010 ). New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing the Performance of PhyML 3.0 . Systematic Biology , 59 ( 3 ), 307 – 321 . doi: 10.1093/sysbio/syq010 OpenUrl CrossRef PubMed Web of Science ↵ Hizi , A. , & Herzig , E. ( 2015 ). dUTPase: The frequently overlooked enzyme encoded by many retroviruses . Retrovirology , 12 ( 1 ), 70 . doi: 10.1186/s12977-015-0198-9 OpenUrl CrossRef PubMed ↵ Hohn , O. , Hanke , K. , & Bannert , N. ( 2013 ). HERV-K(HML-2), the Best Preserved Family of HERVs: Endogenization, Expression, and Implications in Health and Disease . Frontiers in Oncology , 3 . doi: 10.3389/fonc.2013.00246 OpenUrl CrossRef PubMed ↵ Huelsenbeck , J. P. , & Ronquist , F. ( 2001 ). MRBAYES: Bayesian inference of phylogenetic trees . Bioinformatics , 17 ( 8 ), 754 – 755 . doi: 10.1093/bioinformatics/17.8.754 OpenUrl CrossRef PubMed Web of Science ↵ Jedlicka , P. , Lexa , M. , & Kejnovsky , E. ( 2020 ). What Can Long Terminal Repeats Tell Us About the Age of LTR Retrotransposons, Gene Conversion and Ectopic Recombination? Frontiers in Plant Science , 11 . doi: 10.3389/fpls.2020.00644 OpenUrl CrossRef PubMed ↵ Kalyaanamoorthy , S. , Minh , B. Q. , Wong , T. K. F. , von Haeseler , A. , & Jermiin , L. S. ( 2017 ). ModelFinder: Fast model selection for accurate phylogenetic estimates . Nature Methods , 14 ( 6 ), 587 – 589 . doi: 10.1038/nmeth.4285 OpenUrl CrossRef PubMed ↵ Karamitros , T. , Hurst , T. , Marchi , E. , Karamichali , E. , Georgopoulou , U. , Mentis , A. , Riepsaame , J. , Lin , A. , Paraskevis , D. , Hatzakis , A. , McLauchlan , J. , Katzourakis , A. , & Magiorkinis , G. ( 2018 ). Human Endogenous Retrovirus-K HML-2 integration within RASGRF2 is associated with intravenous drug abuse and modulates transcription in a cell-line model . Proceedings of the National Academy of Sciences , 115 ( 41 ), 10434 – 10439 . doi: 10.1073/pnas.1811940115 OpenUrl Abstract / FREE Full Text ↵ Katzourakis , A. , Rambaut , A. , & Pybus , O. G. ( 2005 ). The evolutionary dynamics of endogenous retroviruses . Trends in Microbiology , 13 ( 10 ), 463 – 468 . doi: 10.1016/j.tim.2005.08.004 OpenUrl CrossRef PubMed Web of Science ↵ Konvalinka , J. , Kräusslich , H.-G. , & Müller , B. ( 2015 ). Retroviral proteases and their roles in virion maturation . Virology , 479–480, 403 – 417 . doi: 10.1016/j.virol.2015.03.021 OpenUrl CrossRef ↵ Lander , E. S. , Linton , L. M. , Birren , B. , Nusbaum , C. , Zody , M. C. , Baldwin , J. , Devon , K. , Dewar , K. , Doyle , M. , FitzHugh , W. , Funke , R. , Gage , D. , Harris , K. , Heaford , A. , Howland , J. , Kann , L. , Lehoczky , J. , LeVine , R. , McEwan , P. , … The Wellcome Trust : ( 2001 ). Initial sequencing and analysis of the human genome . Nature , 409 ( 6822 ), 860 – 921 . doi: 10.1038/35057062 OpenUrl CrossRef PubMed Web of Science ↵ Li , Y. , Zhang , G. , & Cui , J. ( 2022 ). Origin and Deep Evolution of Human Endogenous Retroviruses in Pan-Primates . Viruses , 14 ( 7 ), 1370 . doi: 10.3390/v14071370 OpenUrl CrossRef PubMed ↵ Moore , R. S. , Wihermanto , & Nekaris , K. a. I. ( 2014 ). Compassionate conservation, rehabilitation and translocation of Indonesian slow lorises . Endangered Species Research , 26 ( 2 ), 93 – 102 . doi: 10.3354/esr00620 OpenUrl CrossRef ↵ Nekaris , K. A. I. ( 2014 ). Extreme primates: Ecology and evolution of Asian lorises . Evolutionary Anthropology: Issues, News, and Reviews , 23 ( 5 ), 177 – 187 . doi: 10.1002/evan.21425 OpenUrl CrossRef Nekaris , K. A. I. , Al-Razi , H. , Blair , M. E. , Das , N. , Ni , Q. , Samun , E. , Streicher , U. , Xue-long , J. , & Yongcheng , L. ( 2020 ). IUCN Red List of Threatened Species: Nycticebus bengalensis . IUCN Red List of Threatened Species 2020 . doi: 10.2305/IUCN.UK.2020-2.RLTS.T39758A179045340.en OpenUrl CrossRef ↵ Nekaris , K. A. I. , & Burrows , A. M. ( 2020 ). Evolution, Ecology and Conservation of Lorises and Pottos’ (Vol. 87 ). Cambridge University Press . ↵ Nekaris , K. A. I. , & Nijman , V. ( 2022 ). A new genus name for pygmy lorises, Xanthonycticebus gen. Nov. (Mammalia, Primates) . Zoosystematics & Evolution , 98 ( 1 ), 87 – 92 . doi: 10.3897/zse.98.81942 OpenUrl CrossRef Nekaris , K. A. I. , Poindexter , S. , & Streicher , U. ( 2020 ). IUCN Red List of Threatened Species: Nycticebus coucang . IUCN Red List of Threatened Species 2020 . https://www.iucnredlist.org/en ↵ Nekaris , K. A. I. , & Starr , C. ( 2015 ). Conservation and ecology of the neglected slow loris: Priorities and prospects . Endangered Species Research , 28 ( 1 ), 87 – 95 . doi: 10.3354/esr00674 OpenUrl CrossRef ↵ Pi , W. , Yang , Z. , Wang , J. , Ruan , L. , Yu , X. , Ling , J. , Krantz , S. , Isales , C. , Conway , S. J. , Lin , S. , & Tuan , D. ( 2004 ). The LTR enhancer of ERV-9 human endogenous retrovirus is active in oocytes and progenitor cells in transgenic zebrafish and humans . Proceedings of the National Academy of Sciences , 101 ( 3 ), 805 – 810 . doi: 10.1073/pnas.0307698100 OpenUrl Abstract / FREE Full Text ↵ Pozzi , L. , Nekaris , K. A.-I. , Perkin , A. , Bearder , S. K. , Pimley , E. R. , Schulze , H. , Streicher , U. , Nadler , T. , Kitchener , A. , Zischler , H. , Zinner , D. , & Roos , C. ( 2015 ). Remarkable ancient divergences amongst neglected lorisiform primates . Zoological Journal of the Linnean Society , 175 ( 3 ), 661 – 674 . doi: 10.1111/zoj.12286 OpenUrl CrossRef PubMed ↵ Rivas , S. R. , Valdez , M. J. M. , Govindarajan , V. , Seetharam , D. , Doucet-O’Hare , T. T. , Heiss , J. D. , & Shah , A. H. ( 2022 ). The Role of HERV-K in Cancer Stemness . Viruses , 14 ( 9 ), Article 9. doi: 10.3390/v14092019 OpenUrl CrossRef ↵ Wang , J. , Chitsaz , F. , Derbyshire , M. K. , Gonzales , N. R. , Gwadz , M. , Lu , S. , Marchler , G. H. , Song , J. S. , Thanki , N. , Yamashita , R. A. , Yang , M. , Zhang , D. , Zheng , C. , Lanczycki , C. J. , & Marchler-Bauer , A. ( 2023 ). The conserved domain database in 2023 . Nucleic Acids Research , 51 ( D1 ), D384 – D388 . doi: 10.1093/nar/gkac1096 OpenUrl CrossRef PubMed ↵ Weiss , R. A. ( 2006 ). The discovery of endogenous retroviruses . Retrovirology , 3 ( 1 ), 67 . doi: 10.1186/1742-4690-3-67 OpenUrl CrossRef PubMed ↵ Xue , B. , Sechi , L. A. , & Kelvin , D. J. ( 2020 ). Human Endogenous Retrovirus K (HML-2) in Health and Disease . Frontiers in Microbiology , 11 . doi: 10.3389/fmicb.2020.01690 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted January 02, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Novel Endogenous Retrovirus in the Slow Loris Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Novel Endogenous Retrovirus in the Slow Loris Charles Michie , Hayley Beth Free , Vincent Nijman , Ravinder Kanda bioRxiv 2025.01.02.631053; doi: https://doi.org/10.1101/2025.01.02.631053 Share This Article: Copy Citation Tools Novel Endogenous Retrovirus in the Slow Loris Charles Michie , Hayley Beth Free , Vincent Nijman , Ravinder Kanda bioRxiv 2025.01.02.631053; doi: https://doi.org/10.1101/2025.01.02.631053 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genomics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17691) Bioengineering (13892) Bioinformatics (41937) Biophysics (21452) Cancer Biology (18589) Cell Biology (25504) Clinical Trials (138) Developmental Biology (13378) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24320) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88605) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15156) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-05-24T02:00:01.246996+00:00
License: CC-BY-4.0