Gene copy number variation (gCNV) contributes to adaptation along environmental gradient

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 107,843 characters · extracted from preprint-html · click to expand
Gene copy number variation (gCNV) contributes to adaptation along environmental gradient | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Gene copy number variation (gCNV) contributes to adaptation along environmental gradient View ORCID Profile Qiujie Zhou , View ORCID Profile Martin Lascoux , View ORCID Profile Pascal Milesi doi: https://doi.org/10.1101/2025.09.12.675866 Qiujie Zhou 1 Plant Ecology and Evolution, Department of Ecology and Genetics, Uppsala University , Norbyvägen 18D, 75236 Uppsala, Sweden 2 Science for Life Laboratory (SciLifeLab), Uppsala University , 75237 Uppsala, Sweden Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Qiujie Zhou Martin Lascoux 1 Plant Ecology and Evolution, Department of Ecology and Genetics, Uppsala University , Norbyvägen 18D, 75236 Uppsala, Sweden Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Martin Lascoux Pascal Milesi 1 Plant Ecology and Evolution, Department of Ecology and Genetics, Uppsala University , Norbyvägen 18D, 75236 Uppsala, Sweden 2 Science for Life Laboratory (SciLifeLab), Uppsala University , 75237 Uppsala, Sweden Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Pascal Milesi For correspondence: pascal.milesi{at}scilifelab.uu.se Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Gene copy number variations (gCNVs) are structural variations that represent a significant source of genetic polymorphism. While single nucleotide polymorphisms (SNPs) have been the primary focus of population and quantitative genomics, recent studies indicated that gCNVs could also play an important role in adaptation notably because of their multiallelic and quantitative nature. In this study, we investigate the role of gCNVs in local adaptation along environmental gradients using extensive genomic datasets in Norway spruce ( Picea abies ) and Siberian spruce ( P. obovata ). We used a robust pipeline for the detection and quantification of gCNVs from short-read exome capture data and used haploid samples for validation. We showed that gCNVs are pervasive, representing approximately 11% of the protein coding genes and are notably enriched in genes involved in response to environmental stress, such as temperature tolerance, immune response, and metal ion regulation. Population genetic structure at gCNV was similar to that observed at SNPs. However, some gCNVs also display distinctive adaptive signatures not captured by SNPs. Finally, we conducted gCNV-based genotype-environment association (GEA) and genome-wide association studies (GWAS) to further evidence that gCNVs contribute to local adaptation patterns and to the control of quantitative traits. Introduction Understanding the genetic basis of adaptation is a central issue in evolutionary biology and has implications for many fields, from conservation biology and population management to plant and animal breeding and health. The development of next-generation sequencing has facilitated the acquisition of population-level genomic data, providing insights into patterns of local adaptation and of its genetic control. Genome scans, genotype-environment or genotype-phenotype association studies have been widely used to characterize the genetic architecture of adaptative traits, and to relate these architectures to various factors such as the selection at the loci underlying the variation in the trait, or even the biology of the focal species (e.g., generation time, intensity of gene flow, mating system). In organisms occupying large geographical ranges, such as forest trees, local adaptation often occurs along environmental gradients. The source of selection can thus be highly dimensional, encompassing abiotic and biotic factors (e.g. climatic variables, photoperiod, soil types, presence of pathogens) and the response can affect many, often correlated, phenotypic traits. Hence, one can expect a polygenic architecture of adaptation and a quantitative relationship between environmental, genotypic and phenotypic variation (e.g. Milesi et al. 2019 ). To date, most studies of adaptation along environmental gradients have relied on the role of single nucleotide polymorphisms (SNPs). Yet, a main outcome of a decade of high-throughput sequencing is that genomic structural variations (SVs) encompass more genetic variability than SNPs (e.g. Catanach et al. 2019 ; Tigano 2020 ). Recent studies highlighted an important role for SVs on both long and short evolutionary timescales ( Weissensteiner et al. 2020 ; Y. Zhou et al. 2019 ; Finnegan et al. 2023 ; Peona et al. 2022 ), with a large number of cases suggesting their significant contributions to local adaptation (e.g. Westram et al. 2021 ; Lecomte et al. 2024 ; Yan et al. 2021 ; Ben-Jemaa et al. 2024 ; Cayuela et al. 2021 ). Hence, especially in non-model species where available polymorphism often covers a limited part of the genome, it would be advantageous to include SV as additional markers. In contrast to other types of SVs (e.g. inversions and translocations), deletions and duplications are unbalanced mutations that affect the dosage (amount) of a DNA sequence, resulting in copy number variations (CNVs). In the case of gCNV, these mutations lead to a variation in the number of copies of a gene among individuals. Gene copy number variations (gCNVs) are widespread in eukaryotic genomes (e.g. Schiessl et al. 2017 ; Zmienko et al. 2016 ; Prunier et al. 2017 ), supported by a gene duplication rate per gene per generation is many orders of magnitude larger than point mutation rate (e.g., Mahmoud et al. 2019 ; Katju and Bergthorsson 2013 ). While most gene duplications are probably highly deleterious in the early stages (e.g, Schrider et al. 2013 ), since the first likely consequence of a change in gene copy number is a change in the amount of the gene’s product (e.g. Labbé et al. 2014 ; Shao et al. 2019 ), they can also form the basis of adaptations (e.g. detoxification and resistance to xenobiotics, Kondrashov 2012 and references therein). Different gene copy numbers can thus be associated with different phenotypic value (e.g. Assogba et al. 2016 ; Milesi et al. 2022 in insect, Wei et al. 2023 in angiosperms) and can be selected for in different environments (e.g. Dorant et al. 2020 in crustacean, Cayuela et al. 2021 in amphibian, Kuo et al. 2024 in angiosperms). Their multiallelic and quantitative nature therefore make gCNVs natural candidates in the control of quantitative traits and adaptation along environmental gradients. In spite of their potential as a molecular marker for evolutionary studies, genome-wide screening of gCNVs has remained confined to a limited number of organisms (e.g. Redon et al. 2006 ; Y. Zhou et al. 2022 ; Prunier et al. 2017 ). These genome-wide screenings have used long-reads sequencing or specially designed technologies such as array-based comparative genome hybridization (aCGH) and SNP-array ( McCarroll and Altshuler 2007 ; Prunier et al. 2017 ). Furthermore, gCNVs were often subsumed under the term “structural variants” and considered as bi-allelic markers together with other structural variants such as inversions (e.g. Y. Zhou et al. 2022 ; Kang et al. 2023 ). The quantitative nature of gCNVs and their use for population and quantitative genomics studies has thus been largely overlooked ( Mérot et al. 2020 ; Lindstedt et al. 2025 ; Conrad and Hurles 2007 , but see Chiang et al. 2017 ; Sjödin and Jakobsson 2012 ). Here, we test the hypothesis that gene copy number variations contribute to adaptation along environmental gradient. To do so, we investigated the contribution of gCNVs to quantitative traits and local adaptation in Norway and Siberian spruce ( Picea abies [L.] H. Karst and P. obovata Ledeb., respectively). Norway and Siberian spruce are two keystone boreal forest tree species which form a syngameon (Q. Zhou et al. 2024 ) with a joint distribution range that extends from the Alpine Mountain range in the east to the Sea of Okhotsk in the West, spanning a large array of ecological niches (Karunaratne et al, 2024). Spruce species have giga genomes with a large fraction of repeated elements ( Nystedt et al. 2013 ; Warren et al. 2015 ; Nilsson et al. 2025 ) and extensive gene duplications ( Prunier et al. 2017 ; Sahli 2017 ). Since the seminal work of Lagercrantz and Ryman 1990 spatial variation in genetic diversity at large and small geographic scales, as well as the demographic history of Norway and Siberian spruce has been studied extensively (J. Chen et al. 2019 ; L. Li et al. 2022 ; Karunarathne et al. 2024 ; Q. Zhou et al. 2024 ; Milesi et al. 2024 ). Furthermore, recent studies have also identified quantitative trait loci (QTL) associated to the variation of phenotypic traits and assessed their importance for local adaptation ( Milesi et al. 2019 ; Z. Q. Chen et al. 2021 ; L. Li et al. 2022 ; CapadorlJBarreto et al. 2021; Tiret et al. 2023 ; J. Chen et al. 2012 ; 2014 ). In this study we leveraged the extensive genomic resources available for P. abies and P. obovata to investigate the role of gCNVs in local adaptation. Using an innovative approach and combining information from diploid and haploid DNA we show that gCNVs are widespread in the genomes of the two spruce species (∼12% of the 26,219 targeted genes). We then further develop population genetics metrics and quantitative genomics approaches to show that gCNVs globally follow the main population structure but exhibit distinct signatures of local adaptation that were not captured by SNPs. Results Gene CNVs can be reliably called from short-read exome capture data Calling copy number variations is challenging, even with full genome data and good quality reference genomes. Here, we leverage diploid and haploid DNA information from an extensive exome capture dataset to detect gene copy number variations (gCNVs) in two diploid exome-capture sequencing datasets. The first diploid dataset, hereafter “ Swedish cline ” dataset (plus signs, Fig. 1 and Supplementary material 1, N = 1758) includes trees sampled along a latitudinal gradient in Sweden and the second diploid dataset, hereafter “ P. abies - P. obovata ” dataset (filled rounds and diamonds, Fig. 1 and Supplementary material 2, N = 542) includes individuals sampled along a large longitudinal gradient. Finally, the third dataset consist of haploid tissues (megagametophytes, diamonds, Fig. 1 and Supplementary material 3, N = 180). We implemented a series of analyses to detect gCNVs from the two diploid dataset and curate the haploid dataset for false positive detection. Our approach was conservative, systematically favoring removal of false positives at the expense of false negatives. Download figure Open in new tab Figure 1. Sampling map for the three datasets included in this study. The yellow shaded area marks the joint natural distribution range of Picea abies and P. obovata . For the P. abies-P. obovata and the haploid dataset, location of populations sampled are indicated using filled or empty green symbols, respectively, for the Swedish cline dataset, individual tree locations are indicated with plus signs. Populations represented with green diamonds with thick black border indicate a subset of the P. abies-P. obovata dataset used for a growth chamber experiment and further genome-wide association analysis (GWAS). First, we used the method implemented in the rCNV R package to identify SNPs putatively located in multi-copy regions (so called “deviant” SNPs): 46,354 (18.22% of the total number of SNPs) and 127,770 SNPs (16.90% of the total number of SNPs) were identified as such for the Swedish cline and P. abies-P. obovata datasets, respectively. To avoid false positives, only probes with an enrichment in deviant SNPs of at least 30% were retained as candidate markers for gene CNVs (hereafter ‘CNV-probes’). 3,460 CNV-probes (covering 3,071 genes) were identified in the Swedish cline dataset, and 3,575 CNV-probes (covering 3,041 genes) in the P. abies-P. obovata dataset. A total of 2,449 CNV-probes (2,173 genes) were shared by both datasets (Szymkiewicz-Simpson overlap coefficient = 0.71). After filtering out paralogs (i.e. CNV-probes with a fixed number of copies across individuals), we retained 2,801 gCNVs (encompassing 3140 probes) and 2,479 gCNVs (encompassing 2903 probes) for the Swedish cline dataset and the P. abies-P. obovata datasets, respectively (Szymkiewicz-Simpson overlap coefficient = 0.69). As expected for copy number variations, the genomic regions corresponding to the gCNVs showed a higher mean depth of coverage (uDoC) and a higher coefficient of variation (CV, standard deviation of DoC / square root of uDoC) than the genomic regions harboring single copy genes for both diploid datasets ( Fig. 2A & B and Table 1 , Welch Two Sample t -test, all p -value < 2.2e-16). Download figure Open in new tab Figure 2. gCNVs mapping statistics and functional annotation. A , Depth of coverage (DoC); B , coefficient of variation (CV); C , observed heterozygosity in haploid data for genes classified as gCNVs (green) or single-copy (orange) from diploid DNA in each dataset; *** p -values < 2.2e -16 . D & E , Gene Ontology (GO biological processes) enrichment analyses across all gCNVs ( D ) and adaptive candidate gCNVs detected through GEA for the Swedish cline datasets ( E ). Only top 20 GO terms with a p -value < 0.05 are shown; see Fig. S2 for the P. abies-P.obovata dataset, and Tables S4 and S5 for the full list of enriched GO terms. View this table: View inline View popup Download powerpoint Table 1: Depth of coverage statistics and observed heterozygosity for single-copy or gCNV We further validated these results by comparing the observed heterozygosity (Ho) of gCNVs with that of genes classified as single-copy using the haploid dataset. Heterozygous positions in haploid DNA are not expected except for sequencing errors or when reads from multi-copy regions align at the same position in the reference genome, generating pseudo-heterozygotes. As expected, the average Ho of the haploid DNA (µHo = 0.028 ± 0.07) was much lower than that of the diploid DNA of the Swedish cline dataset (µHo = 0.246 ± 0.14) and the P. abies-P. obovata dataset (µHo = 0.123 ± 0.13). Note that the lower Ho in the P. abies-P. obovata dataset compared to the Swedish cline dataset is probably due to a Whalund effect, as the population structure is much stronger. More importantly, for both dataset, genes classified as gCNVs using the diploid data have a much higher Ho in the haploid data than those classified as single-copy (Welch Two Sample t -test, all p -value < 2.2e -16 , Fig. 2C and Table 1 ). Taken together, our results support a conservative but robust detection of gCNVs from short-read exome capture data, although we can’t rule out a small proportion of false positives. gCNVs are widespread across the genome and involved in responses to biotic and abiotic factors Across both datasets, at least 10.8% of the targeted protein-coding genes of the Norway spruce and Siberian spruce show copy number variations. Using the consensus genetic map of Bernhardsson et al. (2019) , we show that they are distributed across the 12 linkage groups and that some form clusters in the same genomic regions (Fig. S1). These clusters likely indicate long segmental duplications, where the same mutation event affect several genes; the highly fragmented nature of the reference genome makes it difficult to properly estimate the length of the amplicons. Gene Ontology (GO, biological process only) terms associated with the gCNVs identified in each dataset were mostly shared across the two dataset (Swedish cline dataset, 962 GO terms, P. abies-P. obovata dataset, 925 GO terms; Szymkiewicz-Simpson overlap coefficient = 0.93) and enriched for terms mainly related to immune response and signal transduction (Fisher’s exact test, FDR adjusted p -value < 0.05, Tables S1 and S2 and Fig. 2D & S2A). In addition to response to biotic factors, GO terms were also enriched (Fisher’s exact test p -value 0.05) for terms related to abiotic responses (e.g., response to water deprivation, cold, UV, red/far red light, and metal ions), growth and morphogenesis regulation (e.g., response to auxin and salicylic acid, cell death), and metabolic pathways (Tables S1 and S2 and Fig. S3 & S4). gCNVs follow the main population genetic structure but show a different pattern of diversity than SNPs We then used principal component analyses (PCA) to explore population genetic structure obtained from SNPs and gCNVs. For gCNVs, we used the normalized depth of coverage of CNV-probes as a proxy for copy number. For SNPs the population structure was the same as in previous studies (L. Li et al. 2022 ; Q. Zhou et al. 2024 ). For the P. abies-P. obovata dataset, the SNP-based PCA captures the gradual change from P. abies genetic background to P. obovata genetic background, and the first two axes are strongly correlated with longitude and latitude ( Fig. 3A & S5A). For the Swedish cline dataset, the population structure captured by the first two axes correlates with latitude ( Fig. 3G & S5B). The population genetic structures retrieved with gCNVs were similar to that obtained with SNPs, but at a much lower resolution ( Fig. 3B & H , Fig. S5C & D). The gCNV-based population genetic structure predominantly followed longitude for the P. abies-P. obovata dataset and latitude for the Swedish cline dataset (Fig. S5C & D). For both the datasets, the top principal components obtained from SNPs and CNV-probes were highly correlated ( Fig. 3D & J ). To further assess the reliability of our classification between CNV-probes and single-copy probes we carried out PCAs based on the normalized depth of coverage of single-copy probes only. The population structure was barely discernible for the P. abies-P. obovata dataset and showed much weaker correlation with latitude or longitude, and it was completely absent for the Swedish cline dataset ( Fig. 3C & I , Fig. S5E & F). It confirms both the robustness of the classification, and that the observed population structure from the normalized DoC of CNV-probes is not a spurious signal from batch effect or library size. Download figure Open in new tab Figure 3. Population structure and genetic diversity obtained from SNPs and gCNVs. A – C & G – I , Principal component analyses based on single-copy SNPs, DoC of CNV-probes, and DoC of single-copy probes for the P. abies–P. obovata dataset (A-C) and the Swedish cline dataset (G-I). Colors correspond to genetic cluster defined by Q. Zhou et al. 2024 for the P. abies-P. obovata dataset and L. Li et al. 2022 for the Swedish cline dataset, respectively; D, E and F represent correlations between PCs scores computed using gCNVs or SNPs, population pairwise CNV dist and F ST and allele size variance (computed from gCNVs) and nucleotide diversity (summed π across all neutral single-copy SNPs for each dataset), respectively for the P. abies-P. obovata dataset; J,K and L, same as D,E and F but for the Swedish cline dataset. Estimates of Wright fixation index computed from SNPs ( F ST ) and gCNVs ( CNV dist ) were strongly correlated for both datasets ( Fig. 3E & K ). We then investigated the pattern of isolation-by-distance (IBD) obtained from SNPs or gCNVs. For both datasets and both types of markers, the patterns of IBD were strong, although more pronounced for SNPs (Mantel’s statistic r F ST – distance: 0.92 and 0.76, for the P. abies-P. obovata dataset and the Swedish cline dataset, respectively, all p -values < 1e -4 . Fig. S6A & B; Mantel’s statistic r CNV dist - distance: 0.64 and 0.48, for the P. abies-P. obovata dataset and the Swedish cline dataset, respectively, all p -values < 1e -4 . Fig. S6C & D). The strong IBD patterns as well as the strong correlations between F ST and CNV dist mean that physically and genetically close populations tend to have more similar gCNVs profiles. Taken together with the PCA, our results suggest that gCNVs globally segregate in the populations as SNPs do. However, within-population genetic diversity estimated from putatively neutral SNPs or gCNVs showed weak but negative correlations ( Fig. 3F & L). gCNV plays a role in local adaptation and are associated to the variation of phenotypic traits To test if gCNVs are involved in adaptation along environmental gradients, we explored the quantitative the relationship between gCNV and 19 bioclimatic variables at tree sampling locations downloaded from the Chelsa database (Fig. S7A, S8A & Table S3). First, for both datasets, we observed strong correlations between top principal components of PCAs based on normalized DoC of gCNVs or the records for the 19 bioclimatic variables ( Fig. 4A & 4B). Using the same set of bioclimatic variables, we then computed pairwise population environment distances (Euclidean) to investigate the pattern of Isolation-by-Environment (IBE). Given our sampling range, we expect the environmental distance to increase along with the geodesic distance between populations. To control for it, we first regressed the CNV dist over the geodesic distances (same model as used to calculate IBD) and then tested the significance of the correlation between the residuals of this regression and the environmental distance. For both dataset we detected a significant IBE pattern for gCNVs further supporting a role for gCNVs in adaptation along environmental gradients (Spearman’s rank correlation coefficients rho = 0.13, p -value < 1e -6 and rho = 0.10, p -value < 1e -6 , for P. abies-P. obovata and Swedish cline datasets, respectively). Download figure Open in new tab Figure 4. Genotype–environment associations. A & B , Pairwise Spearman’s correlations between the three first PCs of PCAs based on environmental data or normalized DoC of gCNVs, for the P. abies-P.obovata and the Swedish cline dataset, respectively. C & D Examples of CNV-probes that are significantly correlated with environmental variables. C. Top panel: Distribution of the mean normalized depth of coverage (DoC) of probe 18966 across the populations included in the P. abies-P. obovata dataset. The background color gradient represents the bioclimatic variable 1 (BIO1, mean annual air temperature). Bottom panel: Correlation between DoC of this probe and latitude or. D. Distribution of mean DoC of probe 17821 across populations included in the Swedish cline dataset. The background color gradient represents the bioclimatic variable 6 (BIO1, mean diurnal air temperature range). Bottom panel: Correlation between the DoC of this probe and latitude. E, Relationships between gCNV- and SNP-based GEA for the top three PCs of the bioclimatic variables-based PCA for the P. abies-P. obovata dataset. For each gene displaying CNV, the SNP with the smallest p -value is represented. Significant associations using the DoC of gCNVs are indicated in orange, while significant associations using SNPs are in blue. Also see Fig. S15–S18. We then performed a series of genotype-environment association analyses using generalized linear model to detect gCNVs associated with any of the 19 bioclimatic variables or the top three PCs of a PCA based on these variables. Across all 19 variables, 56 candidate gCNVs (2.3% of all gCNVs) were identified in the P. abies-P. obovata dataset and 103 (3.7% of all gCNVs) in the Swedish cline dataset (Fig. S9 & S10, examples of candidate gCNVs were shown in Fig. 4C & 4D , Table S4) with only two genes identified as candidate in both datasets. One of them is Aspartic Protease in Guard Cell 1-like (ASPG1-like) and encodes for an aspartic protease which plays a role in drought avoidance trough abscisic acid signaling pathway; the functions of the other genes are unknown. For the top three environmental PCs, we detected 18 and 9 candidate gCNVs, respectively for the two datasets (Fig. S7 & S8); all but two overlap with the candidate gCNVs associated with individual bioclimatic variables. The low overlap between the lists of candidate gCNVs between the P. abies-P. obovata and the Swedish cline dataset could be explained by the different geographical range of the two datasets, different climatic drivers likely shaping their diversity. This is supported by the fact that many associations with precipitation related variables are found in the P. abies-P. obovata dataset where almost none were found in the Swedish dataset (Table S4). Despite the low overlap in candidate gCNVs, but as expected for genes involved in adaptation along environmental gradients, both lists were enriched for GO terms mainly associated with responses to abiotic factors and organ development ( Fig. 2E , S2B, S11 & S12, Table S5 & S6). We also used a similar approach to that used by J. Chen et al. (2014) to identify gCNVs associated with latitude, using a subset of the P. abies-P. obovata dataset. However, no overlap was found between the significant gCNVs with those detected based on the Swedish cline dataset, even though both datasets follow latitudinal gradients. Finally, we explored the role of gCNVs in the control of phenotypic and phenology related traits known to vary across environmental gradient in Norway and Siberian spruce by conducting a common garden experiment under controlled condition in a growth chamber (Supplementary material 1). As expected, all traits varied significantly across populations and quantitatively along latitude and longitude (Table S8). We then explored the pattern of gene copy number, phenotypic and bioclimatic variations using PCAs ( Fig. 5A and Fig. S13). Top principal components of the various PCAs showed strong quantitative relationships and between each other ( Fig. 5A ), evidencing that gCNVs contribute to the strong local adaptation pattern through the control of phenotypic traits. Download figure Open in new tab Figure 5. Pattern of variation of trees genotype, phenotype and original environment. A , Pairwise Spearman’s rho correlation coefficient between the top PCs of PCAs based on phenotypic data (pDim), gCNVS normalized DoC (gDim), or bioclimatic variables at tree sampling locations (eDim). Disc diameter and color scale are proportional to Spearman’s rho with asterisks representing the significance level (*, p < 0.05; **, p < 0.01 and ***, p < 0.001). B , Q–Q plots for genome-wide associations between gCNVs normalized DoC and three phenotypic and phenology related traits. Grey shading indicates the 95% confidence interval under the null expectation (red line), and significant associations (adjusted p -value < 0.2) are in green. C , Relationship between normalized depth of coverage and trait values (corrected for the block effect) for the candidate gene elicitor-responsive protein 3 . We then conducted gCNV-based GWAS using linear and generalized linear mixed models to detect candidate gCNVs involved in the control of the phenotypic traits while correcting for population structure and population of origin. Despite the limited number of samples and a strong confounding effect of population structure (Fig. S14) normalized DoC of gCNVs was significantly associated with phenotypic and phenology-related traits (tree’s height (three associations), diameter (two), number of days for bud break (one) and total number of buds (four) as well as three associations with the first PC of the PCA based of phenotypic and phenology - related data) ( Fig. 5B , Fig. S14 and Tab. S4). Similar to GEA, the gCNVs that were significantly associated with the first PC were also detected as candidate genes using individual traits. Among this set of candidates, one gCNVs—annotated as elicitor-responsive protein 3 —stands out as it seems to be involved in the control of three traits (height, diameter, and number of buds, Fig. 5C ). This gene is primarily involved in the control of immune response but its over expression has been shown to reduce growth in A. thaliana ( Jing et al. 2020 ; Wang et al. 2024 ), probably through a metabolic trade-off as often observed for amplification of resistance genes (e.g. Guillemaud et al. 1999 ). Adaptive gCNVs cannot be detected using SNPs as markers in GEA and GWAS To test whether the same genes could have been detected using SNP data alone, we repeated the GEAs and the GWAS but using the SNPs located within the gCNVs as markers. For GEA, we used both, a population-based approach ( Bayenv2 ) and an individual-based approach (same generalized linear model as for gCNVs). We also used the same linear and generalized linear models for SNP-based GWAS as for gCNVs-based GWAS. For both GEA and GWAS we used putatively neutral SNPs located within single-copy genes to control for population structure. To err on the side of caution and ensure comparability with the gCNV-based analyses, we only retained one SNP per probe and selected the one with the largest Bayes factor ( bayenv2 ) or smallest p -value. Across all analyses only eight genes over a total of 494 significant associations (1.6%) were detected as candidate using both types of genotypes, SNPs or normalized DoC as a proxy for copy number (Tab. S4). This is further evidenced by the absence of correlation between the p -values obtained for GEA and GWAS using either type of genotypes ( Fig. 4E and S15–S19). Whether through GEA or GWAS analyses, our study shows that the signature of selection for gCNVs cannot be detected by conventional approaches using SNPs as markers. Discussion In this study, we investigated the role of gene copy number variations (gCNVs) in adaptation along environmental gradients in Norway spruce and Siberian spruce. We built a comprehensive framework for population genetic analyses with gCNVs, from their identification in exome capture sequencing data to population structure analyses and identification of candidate gCNVs contributing to local adaptation and associated with quantitative traits. We showed that gCNVs are numerous and widespread across the genomes of the two species. We globally observed the same patterns of population structure with gCNVs as with SNPs, and similar patterns of isolation by distance. A significant proportion of gCNVs were detected as candidate genes for adaptation, genes that would not have been detected using classical approaches with SNPs. Opportunities and limitations of using short-read exome capture data to call gCNVs Exome capture sequencing using short reads is a cost-effective method to generate extensive population-level data. It is particularly useful for non-model species, where the reference genome is often highly fragmented, and/or for species with giga-genomes where sequencing the whole genome of many individuals is too costly. However, this sequencing technology is prone to significant variation in read depth of coverage (DoC) due to various confounding factors that make it difficult to detect copy number variations ( Krumm et al. 2012 ). Nevertheless, exome capture targets the gene-space which is typically well annotated in reference genomes and shows a high degree of conservation (e.g. in A. thaliana , Igolkina et al. 2024 ). Thus, pseudo-SNPs are more likely to originate from gene copy number variations and paralogs than from other types of repeats, such as transposable elements. Here, we used the unsupervised machine learning-based clustering method implemented in the rCNV R package ( Karunarathne et al. 2023 ) to detect pseudo-SNPs (referred to as ‘deviant’ SNPs in the rCNV method). As it is an indirect method that relies mainly on statistics derived from allelic DoC ratios and apparent excess of heterozygotes calculated at the SNP level it can be prone to both false positives and false negatives. False positives Various error-prone factors may influence the detection and some of the ‘deviant’ SNPs used for the classification are likely to be simply sequencing errors. As we expect these errors to be randomly distributed across the genome, we only considered probes containing at least 30% of the SNPs flagged as ‘deviant’ as candidates for multi-copy genes, using their local density as a source of information. Other false positives may arise from paralogs (e.g. fixed duplicates), as SNPs within paralogous genes may also be flagged as ‘deviant’. With exome capture data, the absolute depth of coverage is too variable from one targeted region to another to be used to identify fixed paralogs by fold change ( Neves 2013 ; Krumm et al. 2012 ). However, our study has shown that the relative depth of coverage of a given locus across samples can be reliably used after correcting for confounding factors (e.g. batch effect, GC content, Fromer et al 2012 ). It is then possible to use a clustering approach (e.g. MCLUST, Scrucca et al. 2016 ) to define copy number groups and filter out CNV-probes for which the best model would support a unique group. Such probes are likely to bind to fixed or nearly fixed paralogous copies. In contrast to previous studies in conifers using whole genome data ( Nystedt et al. 2013 ; Warren et al. 2015 ; Niu et al. 2022 ; Jang et al. 2024 ), only a small fraction of candidate multi-copy genes was identified as paralogs in our study. A first explanation comes from the design of the exome capture experiment, which avoids large gene families because the same probe would bind to too many loci (e.g., Neves 2013 ; Vidalis et al. 2018 ; Milesi et al. 2024 ). Also, reads from duplicated copies that have accumulated divergence over a long enough time are less likely to be misaligned and therefore would not be detected. Another explanation lies in the specific features of the rCNV method, which has been optimized for the detection of copy number variations (e.g. the use of coefficient of variation of DoC, see also discussion in Karunarathne et al. 2023 ). In addition, fixed substitutions between two divergent paralogous copies have a 1:1 allelic ratio for each individual; only the apparent excess of heterozygotes induced by their presence allows their discrimination. In any case, the presence of false positives is expected to introduce random noise into the depth of coverage data, which would attenuate the pattern of population structure or any quantitative relationship with the number of copies (e.g. IBD, GEA, GWAS). Our results are therefore conservative and appear robust to the potential presence of undetected false positives. False negatives False negatives would be mainly due to a lack of statistical power when classifying the SNPs with the rCNV method. A too low depth of coverage of a given region or too few heterozygous individuals for a given SNP would prevent a robust detection of a ‘deviant’ SNP ( Karunarathne et al. 2023 ). Another limitation is the frequency of segregating duplicates in the dataset. Rare CNVs would result in a small number of heterozygotes with a skewed allele ratio, making their detection difficult. Similarly, the approach we used is blind to CNVs that would be strictly identical in the region captured (i.e., no pseudo-SNP present), which would also contribute to the low number of paralogs we detected. In contrast to false positives, false negatives are not expected to affect the patterns of variation observed with normalized depth of coverage for gCNVs. The various controls we have performed using normalized depth of coverage with single-copy genes also tend to indicate a rather low proportion of false negatives, or that they segregate at a low enough frequency to not generate patterns of population structure or spurious associations ( Fig. 3 & S5). Validation The number of putative gene CNVs and the large sample size of our datasets would make validation of even a small proportion of gCNVs using quantitative PCR-based approaches a time-consuming and costly endeavor. Instead, we used haploid DNA from the megagametophyte, a tissue found in the seed of spruce species, as a high-throughput and cost-effective approach to validate our detection of gCNVs as, for example, in Prunier et al. (2017) and Lind et al. (2022). Expanding the probe set from 48,000 to 90,000, while targeting the same genes, increased the resolution of our validation set. It is worth noting that validation using haploid data is performed using only the density of SNPs, regardless of the depth of coverage of the different alleles. The validation step is thus performed on a different signature than the detection step, and our study demonstrates the reliability of using haploid sequencing data to validate the call of gCNVs genome-wide; given that sequencing errors are limited. In coniferous species, for example, an optimal sequencing design would involve sequencing both haploid DNA from the megagametophyte and diploid DNA from the embryo from the same seed ( Bernhardsson et al. 2019 ; Lind et al. 2022 ). In addition to generating high-confidence SNP and gCNV data, such a design would also allow the phasing of the two types of polymorphism, giving access to the relative copy number carried by each homologous chromosome rather than an individual average. Large gCNV polymorphism in Picea abies and P. obovata populations Over all the genes captured in our experiment (26,223) and despite conservative filtering criteria, ∼10% of them display copy number variations. This large fraction is likely to be a lower bound, as our detection approach excludes i) rare copy number variant, ii) large gene families and misses iii) copies with strictly identical sequences in the probe’s regions. Such a large number is more likely due to a high gene duplication rate than to a whole genome duplication (WGD) event followed by biased gene retention as often observed in plant species ( Panchy et al. 2016 ). As a single WGD event occurred at the root of Pinaceae between 200 and 342 mya ( Stull et al. 2021 ), a substantial divergence between the copies retained would be expected. Our findings are, however, consistent with recent studies in spruce species based on WGS data ( Nilsson et al. 2025 ) or array comparative genomic hybridization (aCGH) sequencing (Prunier, Caron, Lamothe, et al. 2017; Sahli 2017 ). Nilsson et al. (2025) showed that since its divergence from Scots pine ( Pinus sylvestris ) ca. 130 mya, Norway spruce genome accumulated 1 Gb of genic sequences mostly through large segmental duplications, independently from transposable element activities. Sahli (2017) estimated a rate of 3*10 -5 copy number changes per gene per generation using Picea glauca (white spruce) pedigrees. This estimate is within the range of gene duplication rate obtained for other eukaryote organisms ( Schrider et al. 2013 ; Denver et al. 2009 ; Lipinski et al. 2011 ; Pan and Zhang 2007 ) and would explain the large polymorphism of copy number observed in the genome of Norway spruce and Siberian spruce. We identified gCNVs in all the linkage groups as well as the presence of several clusters suggesting the existence of potential gCNV hotspots (Fig. S1), as found in poplar ( Prunier et al. 2019 ; Pinosio et al. 2016 ). It is also possible that these clusters, or some of them, belong to large segmental duplications encompassing several genes (e.g. Assogba et al. 2016 ). The use of exome capture technology however impedes us to tease apart the alternative hypotheses, but the study by Nilsson et al. (2025) would support the latter. Nonetheless, considering the spread of the gCNVs across the linkage groups most of them probably occurred from independent events, further suggesting a high gene duplication rate. gCNVs and SNPs segregate in a similar way between population but may have different global fitness effect Despite the large difference in geographical range, we found that genes showing copy number variations largely overlap between the two datasets (∼71% overlap, 2,173 genes). This is not surprising given that P. abies and P. obovata form a syngameon (Q. Zhou et al. 2024 ) with extensive gene flow, relatively low level of genetic differentiation ( Tsuda et al. 2016 ; J. Chen et al. 2019 ; Q. Zhou et al. 2024 ) and a small fraction of fixed substitutions (Karunaratne et al. 2024). As other studies on different organisms previously showed (e.g. Sjödin and Jakobsson 2012 ; Xu et al. 2016 ; Cheeseman et al. 2016 ), we characterized a similar population structure using either the normalized depth of coverage of gCNVs or SNPs as genotypes, but with much lower resolution for the former. If this shows that gCNVs tend to segregate as SNPs between populations, it appears that there is little, if any, benefit to considering them in addition to SNPs in estimating population structure, especially since a large fraction of SNPs are likely to be effectively neutral which might not be the case for gCNVs. In striking contrast to SNPs, we found a similar number of gCNVs in each dataset. One reason for this could be that our approach to detect gCNV is not exhaustive and is biased toward common gCNVs. On the other hand, the different patterns in numbers of SNPs and gCNVs between these two datasets may also suggest that different evolutionary forces shape the bulk of the diversity of these two types of polymorphisms, as supported by the lack of positive correlation between within population diversity of gCNVs and neutral SNPs ( Fig. 3F & L). This is even more likely considering the higher estimates of the gene duplication rate compared to the point mutation rate. Several studies have suggested that gCNVs are more likely to have larger fitness effects than SNPs copies (e.g., Katju and Bergthorsson 2013 ; Schrider, Houle, et al. 2013; Adler et al. 2014 ; Langley et al. 2012 ; Dorant et al. 2020 ; Stuart et al. 2023 ), which likely increase with the number of copies (e.g., Adler et al. 2014 ; Langley et al. 2012 ; Katju and Bergthorsson 2013 ; Schrider, Houle, et al. 2013). The vast majority of gCNVs are thus expected to be highly deleterious (Schrider, Houle, et al. 2013) and should be removed by purifying selection. Conversely, high copy number CNVs that are kept in the population are more likely to have a positive fitness effect. For example, most adaptive gene amplifications conferring resistance to xenobiotics are selected against in their absence (e.g. Kondrashov 2012 ; Milesi et al. 2017 ; Weill et al. 2000 ; Andersson and Hughes 2009 ), demonstrating the intrinsic deleterious effects of such mutations. A comparison of the frequency spectrum of SNPs and gCNVs would be interesting to assess the proportion of gCNVs that are effectively neutral. It is, however, not possible with the current dataset because our detection method is biased towards common gCNVs. Also, we only access relative copy-number while absolute copy number would be needed as our study, as others before, suggests that different copy numbers may have different selective effects. gCNVs are key players in adaptation along environmental gradient The development of high-throughput sequencing technologies and the acquisition of population-level genomic data offer the opportunity to gain a comprehensive view of the genetic architecture of adaptation. This notably evidenced the role of genomic structural variations in adaptation alongside that of SNPs (e.g. Assogba et al. 2016 for large segmental duplications, Battlay et al. 2025 for inversions). Despite noisy DoC of CNVs probes, a strong confounding effect with population structure and conservative thresholds, we detected a significant number of candidate adaptive gCNVs in both datasets, indicating their role in adaptation along environmental gradient at small and large geographical scales. The low overlap in candidate gCNVs between the two datasets (only one) suggests that the variation in number of copies is likely a respond to different selective drivers in different part of the range, as illustrated by the different number of associations with precipitation-related variables found in the P. abies-P. obovata dataset (62 associations) and the Swedish cline dataset (only four, Table S4). Our study also provides support for a role of gCNVs in controlling quantitative traits involved in adaptation along the same gradients (growth traits and phenology-related traits) even though the power of the analysis was limited by a low number of individuals. This in line with Prunier et al. (2017) that showed that gCNVs are involved in the control of phenology (e.g. bud set, bud flush) and growth traits in Picea glauca , using parent – offspring’s trios in a breeding program. The majority of candidate gCNVs that we identified from GEA and GWAS are involved in relevant biological processes as immune response, metal ion response, flowering time regulation, and response to environmental stress and stimulus (e.g. water deprivation, cold, and far-red light). These functions were repeatedly identified in different species ( Schiessl et al. 2017 ; Wei et al. 2023 ; Prunier, Caron, Lamothe, et al. 2017; Hung et al. 2024 ; Hardigan et al. 2016 ), but also in Norway and Siberian spruce using SNPs as markers (J. Chen et al. 2014 ; Milesi et al. 2019 ; L. Li et al. 2022 ; Z. Q. Chen et al. 2021 ; Karunarathne et al. 2024 ). The high duplication rate, multi-allelic and quantitative nature of gCNVs likely enable a dynamic and rapid response to environmental variation, whereby different copy numbers are selected along environmental gradients. On the other hand, gCNVs are likely to be more detrimental when not adaptive, and one would expect them to have a lower standing variance compared to SNPs, what could delay the response. In any case, our study shows that gCNVs, alongside SNPs, contribute to local adaptation along environmental gradients, further supporting polygenic adaptation ( Milesi et al. 2019 ; L. Li et al. 2022 ; Z. Q. Chen et al. 2021 ). The effect of gCNVs is not captured by SNPs Last but not least, adaptive gCNVs could not have been identified using SNP data alone. On the short term, gCNVs can be selected for two main reasons: gene dosage (e.g. Kondrashov 2012 ) and fixation of a permanent heterozygote advantage (segregation avoidance model, Milesi et al. 2017 ; Spofford 1969 ). Under the dosage scenario, the adaptive potential is independent of the observed allele frequencies, whereas under the segregation avoidance model, the SNPs trapped in the duplication would show an apparent excess of heterozygotes ( Lenormand et al. 1998 ) and would have probably been filtered out of the dataset using standard SNP filtering procedure. Although most of the genome scans, GWAS and GEA methods were primarily developed for analyzing SNP data, they have proved effective in detecting adaptive structural variations (SVs) using shift in SNPs frequencies as markers. If this is particularly true for balanced SVs such as inversions ( Ayala et al. 2019 ; Kennington et al. 2007; Koch et al. 2021 ; Westram et al. 2023 ), it is much less so for unbalanced mutations, as deletions or copy number variations (e.g. Mérot et al. 2020 ). For example, Redon et al. (2006) found that causal CNVs in humans were poorly predicted by neighboring SNPs. Prunier, Caron, Lamothe, et al. (2017) found no overlap between gCNVs and SNPs in P. glauca and Harris et al. (2024) obtained much higher polygenic scores for various quantitative traits when including CNVs in addition to SNPs. gCNVs can thus be considered largely untapped genetic variation that may even explain a fraction of phenotypic and / or fitness variation, and, to some extent, part of the ‘missing-heritability’ ( Maher 2008 ), as for other structural variants (e.g., Yao Zhou et al. 2022 ). Since SNPs do not capture their effects, a comprehensive understanding of their role at short evolutionary timescales requires characterizing gCNVs a priori and including them in genomic studies in a more systematic way, rather than to investigating them a posteriori from already identified candidate genes. Such information would also be particularly relevant in quantitative genomics, for instance in the context of plant and animal breeding. Conclusion The last decade has seen a giant leap in the availability of high-throughput sequencing data. These data have primarily been used to analyze SNP variation across populations. However, our study has shown that short-read exome capture data can be reliably used for studying gene copy number variations. The use of old data and the recognition of gCNVs as an additional type of polymorphism opens the door to additional studies to fully capture their role on short evolutionary timescales. Our study also highlights the importance of incorporating gene CNVs into population and quantitative genomic studies to address important questions in evolutionary biology. This would require development/adaptation of current analytical methods, and our study has taken a step in this direction. Whether the pattern we observed in Norway spruce and Siberian spruce is restricted to species with similar biological characteristics (i.e., large distribution gradient, extensive gene flow, inability to track ecological niche shift) or if it is a more global pattern remains an open question. In any case, our study is an incentive to systematize the study of gene CNVs. Material and Methods Sampling and sequencing This study combined previously published genomic datasets for approximately 2300 individual trees from different studies, but sequenced using the same exome capture experiment (Illumina sequencing of pair-end short-reads with a set of 40,018 probes targeting 26,219 genes, Vidalis et al. 2018 ). We defined two datasets at different geographical scales ( Fig. 1 ): - First, the “ P. abies-P. obovata ” dataset consists of 542 spruce individuals sampled in 55 populations along a longitudinal gradient from Western Europe and Fennoscandia (Norway spruce) to the Yenisei River (Siberian spruce) and across a large hybrid zone between the two species (Supplementary material 1, Karunarathne et al. 2024 ; Q. Zhou et al. 2024 ). - Second, the “ Swedish cline ” dataset is composed of 1758 individual trees collected in natural populations along a latitudinal gradient whose progeny were planted in breeding trials established by Skogforsk (The Forestry Research Institute of Sweden, Supplementary material 2, L. Li et al. 2022 ). For population-based analysis, we gathered geographically close-by individual trees belonging to a same genetic cluster into a same population following Li et al. (2022) . In addition, we extracted haploid DNA from the megagametophyte, a haploid tissue found in the seed, for an additional 180 individuals from 36 populations covering a comparable geographic range to the P. abies-P. obovata dataset to capture the same genetic diversity ( Fig. 1 , Supplementary Material 3). We used haploid DNA as a high-throughput and time-efficient approach to cross-validate the list of candidate gene CNVs established from the diploid DNA (see below). To isolate the megagametophyte, we dissected the seeds according to the protocol of García and Escribano-Ávila (2016) . We used the Nucleospin PLANT II minikit (Macherey-Nagel GmbH & Co. KG, Germany) to extract haploid DNA from megagametophytes. The haploid DNA was then sequenced by the same company as the diploid DNA (RAPiD Genomic, USA), using the same exome capture sequencing technology, but with an expanded probe set: we targeted the same genes as for the diploid DNA, but increased the number of probes per gene by designing an additional set of 51,982 probes (92,000 in total). The raw-reads were deposited in NCBI ( https://www.ncbi.nlm.nih.gov/sra , BioProject PRJNA1261476). DNA mapping and SNP calling For diploid DNA, we downloaded the raw-reads from NCBI ( https://www.ncbi.nlm.nih.gov/sra , BioProject PRJNA511374, PRJNA731384, and PRJNA1007582). We then followed the same pipeline as used in Q. Zhou et al. 2024 to perform quality control of the raw-reads, sequence alignment against the P. abies reference genome (v1.0, Nystedt et al. 2013 ), SNP calling and filtering. Briefly, after mapping (BWA-MEM v 0.7.17, H. Li et al. 2009 ), PCR duplicates were removed using PICARD v 2.27.4 ( https://github.com/broadinstitute/picard/ ), followed by genotype identification carried out with GATK HaplotypeCaller (v 4.1.4.1) individually and SNP calling using GenotypeGVCFs across all samples jointly. Hard-filtering was performed to filter out SNPs of low quality using the following criteria: QD < 2.0, MQ 3.0, QUAL < 20.0, MQRankSum < -12.5, ReadPosRankSum 60.0. The final datasets contained 254,360 and 758,429 SNPs for the Swedish cline and P. abies-P. obovata dataset, respectively, and included both putatively neutral and non-neutral SNPs. Only putatively neutral SNPs (i.e. SNPs located in intron or intergenic regions, or being synonymous in protein coding sequences) with call rate larger than 0.7 and minor allele frequencies larger than 0.01 were kept for population structure analyses. We used exactly the same pipeline for haploid DNA; note that we kept the parameter for sample ploidy equal to 2 in GATK (--sample-ploidy 2) to allow the software to call heterozygote positions. Identification and quantification of gene copy number variations To identify gene copy number variations from our exome capture data, we first identified SNPs located in putative multi-copy genomic regions. We then normalized the sequencing depth of coverage of these multi-copy regions and defined copy number genotypes. Finally, we excluded regions for which there was no variation in copy number between individuals (i.e., paralogs). The whole pipeline is detailed below and represented in Fig. 6 . Download figure Open in new tab Figure 6: Detection and quantification of gene copy number variations from exome capture data. The main steps of the workflow are represented by the three light gray boxes and are described in detail in the Materials and Methods section. Orange boxes are input data sets, dark gray boxes are intermediate data sets, and green boxes are output data sets used for downstream analyses. Detection of multi-copy regions from SNPs As a first step, we used the approach for CNV and paralogous regions discovery from SNP data optimized for reduced genome representation sequencing data implemented in the rCNV R package (v1.3.9 Karunarathne et al. 2023 ). Briefly, it combines information from the apparent excess of heterozygotes from Hardy-Weinberg equilibrium with the deviation from the expected mean and variance of allelic depth of coverage (DoC) ratios across heterozygotes sites to classify each SNP as ‘deviant’ (i.e. putatively located in multi-copy regions) or not. We first corrected allelic DoC for genotype misclassification (where genotype call and allelic DoC do not match) and odd-numbered DoC using the ad.correct function. Given the scale of our sampling, we controlled for population structure when calculating the apparent excess of heterozygotes (using the Fis parameter in the allele.info function). As exome capture can introduce a bias in depth of coverage towards the reference allele ( Lelieveld et al. 2015 ), we followed the recommendations of Karunarathne et al. (2023) and used the average allele DoC ratio across all individuals ( p.all ) as the expected allelic DoC ratio in heterozygotes (instead of p = 0.5). We then used the unsupervised K-means clustering approach to classify the SNPs as “deviant” or not ( cnv function, filter=kmeans parameter). A significant proportion of false positives and false negatives are inevitable when calling CNVs from SNP data (Karanurathne et al., 2023). Therefore, in a second step, we used the density of “deviant” SNPs in each target region to further refine the list of candidate gene CNVs. Each probe used for the exome capture is 120 bp long, and we defined the targeted region of each probe by extending the sequence of the probe by 100 bp on both sides, resulting a total of 320 bp region per probe. Probes with a number of SNPs greater than two and a proportion of SNPs classified as “deviant” greater than 0.3 within the 320 bp targeted region were classified as “CNV-probes”. All the other probes with at least two SNPs were considered as “single-copy probes”. Genes which contained at least one CNV-probe were considered multi-copy genes. Conversely, only those genes which only contained probes categorized as single-copy probes were treated as single-copy genes. Using the same classification, we generated a clean SNPs dataset keeping only SNPs located within single-copy genes, resulting in 136,803 and 356,940 SNPs for the Swedish cline and the P. abies-P. obovata dataset, respectively. Validation of multi-copy regions We used the SNP dataset obtained from haploid DNA to validate the set of genes classified as gCNVs. As the megagametophyte is a haploid tissue, heterozygous positions are not expected unless there is a sequencing error or if reads from multiple copy regions are stacked at the same position during alignment to the reference genome. Therefore, for each gene independently, we calculated the observed heterozygosity across all the probes it contained and compared it between genes classified as single-copy or multi-copy, the latter being expected to have higher heterozygosity. DoC normalization and copy number genotypes Exome capture is a molecular hybridization-based sequencing technology, and the sequencing depth of coverage (DoC) of each position decreases with distance from the center of the probe ( Neves 2013 ; Vidalis et al. 2018 ). Therefore, the DoC of a given “deviant” SNP cannot be used as a direct proxy for the number of copies of the genomic region in which it falls. Instead, we used the DoC of the 60 bp region (retrieved from the alignment files using samtools depth v 1.15, H. Li et al. 2009 ) located at the center of the probe capturing that region, as suggested by Neves (2013) . Many factors can also influence the depth of coverage, either globally (e.g., library size, batch effect) or more locally (e.g., probe capture efficiency, GC content, Fromer et al. 2012 ). To control for the effect of these factors on DoC, we used a two-step normalization procedure to generate depth matrices in which DoC represent relative copy number genotypes (i.e., the number of copies of a given gene in a given individual). First, we excluded probes with an average DoC < 5 and normalized the DoC using the cpm.normal function implemented in the rCNV R package to account for variation in library size between samples. Second, following the approach of Krumm et al. (2012) , we excluded obvious outliers using a normalization method based on principal component analysis and used singular value decomposition (SVD) to examine different factors that might influence the top five principal components (PCs, Table S8). We performed linear regressions of PC loading (for individual-based factors) or score (for probe-based factors) of each PC against the following factors: population structure (represented by PC loadings of PC1 and PC2 from PCAs based on cleaned SNP data), sequencing batch, sequencing library size, coefficient of variance of individual DoC, mean probe DoC, and probe GC content. Among the PCs examined, only those with a strong correlation with the population structure were retained, while the effect of the other confounding factors was removed from the DoC (based on the adjusted r 2 of the regressions, Table S8) by changing the singular values of the corresponding PCs to 0 and reconstructing normalized DoC with the new singular vectors. Paralogs filtering We used a model-based hierarchical clustering approach, implemented in the R package MCLUST v 6.1.1 ( Scrucca et al. 2016 ) to further refine the candidate gCNV lists. We defined discrete copy number groups from the normalized DoC distribution of each CNV-probe across samples. Two different models were tested: an equal variance model and a spherical variable variance model. For each model, the number of components ( k , i.e. the number of copy number groups) ranged from 1 to 10. The best model, together with the corresponding optimal k , was selected based on the associated Bayesian Information Criterion (BIC). We then discarded the CNV-probes for which a single copy number group was identified across all individuals ( k = 1 ) or two copy number groups were identified ( k = 2 ) but with fewer than five individuals in any group, similar to filtering for non-variant sites or low minor-allele count variants with SNP data. The normalized DoC of the remaining CNV-probes was then used as a proxy for gene copy number in all downstream analyses. Population structure analyses For SNP data, we used the smartpca function implemented in EIGENSOFT v 7.2.0 ( Galinsky et al. 2016 ) with default parameters using only putatively neutral SNPs pruned for pseudo-SNPs and for linkage disequilibrium ( r 2 > 0.5) using plink v2.00a3LM ( Purcell et al. 2007 ). For gCNVs, the PCA was performed on the normalized DoC of CNV-probes using the prcomp function (R base package v 4.2.2 ). Population differentiation, Isolation-by-Distance, and Isolation-by-Environment To measure genetic diversity within population, we used VCFtools v0.1.17 to calculate per-site nucleotide diversity (Nei’s π , Nei and Tajima 1981 ) for SNP data. For gCNVs, we used the allele size variance ( Valdes et al. 1993 ) for measuring within population diversity while using normalized DoC of CNV-probes as a proxy for gene copy number: where n is the total number of CNV-probes, var(Doc i,j ) represents the sample variance of normalized DoC for probe i in population j . We estimated population pairwise F ST ( Weir and Cockerham 1984 ) using VCFtools v0.1.17 ( Danecek et al. 2011 ) based on SNP data. To ensure comparability between SNPs and gCNVs, we instead calculated population pairwise weighted Manhattan distances based on the normalized DoC of CNV-probes (hereafter CNV dist ) for measuring population dissimilarity. where j l and j 2 are the two populations to measure gCNV-based genetic dissimilarity, uDoc i indicates the mean depth of coverage of probe i across all samples. We estimated population pairwise geodesic distances using the R package geodist v0.0.8 ( Padgham 2021 ). We quantified population pairwise environmental distances using records for 19 bioclimatic variables at the population location downloaded from the Chelsa database v 2.1 ( http://chelsa-climate.org , Table S2). The bioclimatic variables were scaled before calculating Euclidean distances between population pairs using the dist function in the R package stats v 4.2.1. We examined patterns of Isolation by Distance (IBD) by regressing either F ST or CNV dist over geodesic distances, respectively. As the environmental distance is expected to be strongly correlated with the geodesic distance between populations, we tested the patterns of Isolation by Environment (IBE) by regressing residuals of IBD over the environmental distance. Testing for local adaptation For the two main datasets, we conducted genotype-environment-association (GEA) analyses to identify candidate gCNVs involved in adaptation along environmental gradient. We used the following generalized linear model for GEA analyses controlling for population structure: where g ij is the genotype at gCNV locus j of individuals i , u j is the mean copy number of gCNV locus j across all individuals, using DoC as proxy, e ik is the value of environmental variable k at sampling location of individual i and ε ijk represent residuals. Q is a matrix containing the top PCs coordinates from a PCA based on putatively neutral SNPs pruned for LD and CNVs. The number of PCs used for controlling for population structure were determined manually based on scree plot of variance explained, as suggested by Luu et al. (2017) . We performed GEA analyses for each of the 19 bioclimatic variables (Table. S3) as well as for the top three PCs from a PCA based on the bioclimatic variables, which explained > 85% of the total variance for both datasets. Because of a strong confounding effect between population structure and environmental variation in Norway spruce ( Milesi et al. 2019 ; L. Li et al. 2022 ), gCNVs with a false discovery rate (FDR) < 0.2 (R package stats v 4.2.1) were considered as candidates contributing to adaptation along environmental gradients. To test whether the gCNVs candidate for local adaptation could have been detected from the SNPs data alone, we also performed GEA analyses using all SNPs found within all gCNVs. Two approaches were used, both controlling for population structure. On one hand, as we did for CNVs, we used a similar population based generalized linear model by replacing the response variable with alternative allele frequency. Top three PC values for each population were obtained by averaging PC loading across all individuals within the population. SNPs with FDR < 0.2 were considered as significant associations. Second, we used bayenv2 software ( Günther and Coop 2013 ) to perform a GEA between each SNP and bioclimatic variable using a variance-covariance matrix of population allele frequencies (omega matrix) to control for population structure. The omega matrix was computed using BayPass v2.3 ( Gautier 2015 ) based on 50,000 randomly sampled putatively neutral SNPs, pruned for multicopy regions and high linkage disequilibrium. SNPs with a Bayes factor > 20 (strong to decisive evidence according to Jeffrey’s scale, Jeffreys 1998 ) were considered as candidate SNPs. To be more conservative, we only retained the smallest FDR and the largest Bayes factor among SNPs within a given probe. Growth chamber experiment and gCNV-based genome-wide association study A total of 230 individuals from 22 populations of the P. abies-P. obovata dataset ( Fig. 1 , Supplementary material 1) were also grown into a growth chamber in a common garden setting following J. Chen et al. (2012) . Seeds were germinated and individual seedlings were grown in 8×8 cm pots randomly spread over 26 trays. The trays were spread over three benches in a growth chamber and their respective positions within and between benches were regularly randomly changed. The seedlings were first grown under continuous light (250 μ mol m −2 sec −1 light, 700 nm wavelength) at a temperature of 20°C for eight weeks. Then, seedlings were grown under increasing night length, where each photoperiod lasted one week, and the dark period was extended by 1.5 hr each week until reaching full dormancy in the growth chamber. The trees were kept under dormancy for a six weeks period at 12°C. Two additional growing seasons of 26 weeks were induced with a day length regime following that of 56°N (15th April to 15th of October) and a constant temperature of 20°C, each separated by a dormancy period of six weeks at 12°C adapted from Liepe et al. (2016) . During the entire experiment, the substrate was kept moist by watering the seedlings regularly. Before the last growing season, we recorded the number of days before budbreak occurred and after the last growing season we measured individual height, diameter and recorded the number of branches and total number of buds per individuals. First, we assessed patterns of local adaptation by analyzing phenotypic variation between populations using the following model: where i ijk corresponds to the value of a given phenotypic trait measured for individual i from population j in block k , ε ijk represents the residuals . b and p are fixed-effects corresponding to the tray in the growth chamber and the population of origin, respectively. We used linear models to analyze height, diameter, and the number of branches (log-transformed and increased by one), as well as generalized linear models to analyze the number of buds (with a Poisson distribution) and the number of days before budbreak (with a gamma distribution and a log link function). Next, we examined the relationship between phenotypic trait values and the latitude, longitude, and altitude of the sampling location. Since these three factors are highly correlated, we analyzed them separately using the above-described model ( eq. 4 ), replacing the population effect ( p ) with the corresponding factor. Finally, we conducted genome wide association studies (GWAS) to identify gCNVs putatively involved in the control of these traits. We used the following mixed model controlling for block effect as well as population structure: where i ijk corresponds to the value of a given phenotypic trait measured for individual i from population j in block k , ε ijk represents the residuals . b is the fixed effect corresponding to the tray. Q is a matrix of the first three PCs’ coordinates from a PCA based on putatively neutral SNPs from single-copy genes pruned for LD to control for population structure. X is a matrix of normalized DoC for CNV-probes, and p is the random effect corresponding to the population of origin. We used linear mixed models to analyze height, diameter, and the log-transformed number of branches increased by one. We also used generalized linear mixed models to analyze the number of buds with a Poisson distribution and the number of days before budbreak with a gamma distribution and a log link function. Functional annotation and GO enrichment analyses The protein coding sequence (CDS) of each gene included in the probe panel was extracted from the reference genome based on the generic feature annotation v1.0 ( http://congenie.org/ ) and translated into protein sequences using gffread v 0.12.8 ( Pertea and Pertea 2020 ). Functional annotation was obtained by aligning the protein sequences against the non-redundant (NR) database ( https://www.ncbi.nlm.nih.gov/ ), the UniProtKB database ( https://www.uniprot.org/ ), and the Pfam database ( http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/ ), using BLAST v 2.15.0+ ( Johnson et al. 2008 ) and DIAMOND v2.1.9 software ( Buchfink et al. 2021 ). For each protein sequence, the five most significant hits with the lowest e -values were selected from each database. The associated gene ontology (GO) terms (biological process only) were obtained by either ID mapping ( https://www.uniprot.org/id-mapping ) or InterProScan ( https://www.ebi.ac.uk/interpro/search/sequence/ ). Functional enrichment analyses were performed with clusterProfiler v 4.6.2 ( Wu et al. 2021 ) and visualized with rrvgo v 1.1.0 ( Sayols 2023 ). Author contribution PM and QZ conceived and designed the study, performed the experiments, analyzed the data, and wrote the manuscript. ML contributed data. PM and ML supervised the work. All authors participated in reviewing – editing tasks. Data availability and open science The raw resequencing data of the newly sequenced haploid samples from this study have been deposited in the National Center for Biotechnology Information (NCBI) BioProject PRJNA1261476. All the scripts and intermediate data used in the analysis are archived in https://zenodo.org/records/17105753 . Conflict of interest The authors of this preprint declare that they have no financial conflict of interest with the content of this article. Acknowledgement We would like to thank Piyal Karunarathne for his assistance with code troubleshooting and for his helpful guidance on implementing specific functions in the rCNV R package and Jun Chen, Clémence Monod, Chen Chen and Piyal Karunarathne for their help with setting up the growth chamber experiment and traits measurement. We also would like to thank Vladimir Semerikov for his help with sampling most of the P. obovata material and Elena Nakvasina in providing us with additional plant materials. We also thank Øyvind Meland Edvardsen and the Norwegian Forest Seed Center, Arne Steffenrem and the Norwegian Institute of Bioeconomy Research, Luc E. Pâques and the INRAE-UMR BIOFORA Orléans, France, Muhidin Šeho and the Office for Forest Genetics, Teisendorf, Germany, Darius Danusevicius and the Vytautas Magnus university, Kaunas, Lithuania, Marcela van Loo and the Austrian Research Center for Forest, Giovanni Giuseppe Vendramin and Andrea Piotti and the Institute of Biosciences and BioResources, CNR, Italy, for sampling seeds in Norway spruce Natural populations. We thank the Swedish National Infrastructure for Computing (SNIC) for allocating computing and data storage resources for this project under the numbers NAISS 2024/6-389 and UPPMAX 2025/2-24. High-throughput sequencing costs for this study were supported by Nilsson-Ehle Endowments (43255) and Lundman’s Foundation for Botanical Studies grants from the Swedish Phytogeographic Society, awarded to Qiujie Zhou. This work was supported by Formas – a Swedish Research Council for Sustainable Development - through grant numbers 2016-00780 awarded to Martin Lascoux and 2024-02415 awarded to Pascal Milesi. Funder Information Declared Nilsson-Ehle Endowments , 43255 Swedish Phytogeographic Society Formas , 2016-00780 , 2024-02415 Footnotes https://zenodo.org/records/17105753 References ↵ Adler , Marlen , Mehreen Anjum , Otto G. Berg , Dan I. Andersson , and Linus Sandegren . 2014 . ‘ High Fitness Costs and Instability of Gene Duplications Reduce Rates of Evolution of New Genes by Duplication-Divergence Mechanisms ’. Molecular Biology and Evolution 31 ( 6 ): 1526 – 35 . OpenUrl CrossRef PubMed ↵ Andersson , Dan I. , and Diarmaid Hughes . 2009 . ‘ Gene Amplification and Adaptive Evolution in Bacteria ’. Annual Review of Genetics 43 ( 1 ): 167 – 95 . doi: 10.1146/annurev-genet-102108-134805 . OpenUrl CrossRef PubMed Web of Science ↵ Assogba , Benoît S. , Pascal Milesi , Luc S. Djogbénou , et al. 2016 . ‘ The Ace-1 Locus Is Amplified in All Resistant Anopheles Gambiae Mosquitoes: Fitness Consequences of Homogeneous and Heterogeneous Duplications ’. PLoS Biology 14 ( 12 ): e2000618 . OpenUrl CrossRef PubMed ↵ Ayala , Diego , Simo Zhang , Mathieu Chateau , et al. 2019 . ‘ Association Mapping Desiccation Resistance within Chromosomal Inversions in the African Malaria Vector Anopheles Gambiae ’. Molecular Ecology 28 ( 6 ): 1333 – 42 . doi: 10.1111/mec.14880 . OpenUrl CrossRef ↵ Battlay , Paul , Samuel Craig , Andhika R Putra , et al. 2025 . ‘ Rapid Parallel Adaptation in Distinct Invasions of Ambrosia Artemisiifolia Is Driven by Large-Effect Structural Variants ’. Molecular Biology and Evolution 42 ( 1 ): msae270. doi: 10.1093/molbev/msae270 . OpenUrl CrossRef ↵ Ben-Jemaa , Slim , Mekki Boussaha , Nathalie Mandonnet , Philippe Bardou , and Michel Naves . 2024 . ‘ Uncovering Structural Variants in Creole Cattle from Guadeloupe and Their Impact on Environmental Adaptation through Whole Genome Sequencing ’. PLoS One 19 ( 8 ): e0309411 . OpenUrl PubMed ↵ Bernhardsson , Carolina , Amaryllis Vidalis , Xi Wang , et al. 2019 . ‘ An Ultra-Dense Haploid Genetic Map for Evaluating the Highly Fragmented Genome Assembly of Norway Spruce (Picea Abies) ’. G3: Genes, Genomes, Genetics 9 ( 5 ): 1623 – 32 . OpenUrl ↵ Buchfink , Benjamin , Klaus Reuter , and Hajk-Georg Drost . 2021 . ‘ Sensitive Protein Alignments at Tree-of-Life Scale Using DIAMOND ’. Nature Methods 18 ( 4 ): 366 – 68 . OpenUrl PubMed Capador□Barreto , Hernán D ., Carolina Bernhardsson , Pascal Milesi , et al. 2021 . ‘ Killing Two Enemies with One Stone? Genomics of Resistance to Two Sympatric Pathogens in Norway Spruce ’. Molecular Ecology 30 ( 18 ): 4433 – 47 . doi: 10.1111/mec.16058 . OpenUrl CrossRef ↵ Catanach , Andrew , Ross Crowhurst , Cecilia Deng , Charles David , Louis Bernatchez , and Maren Wellenreuther . 2019 . ‘ The Genomic Pool of Standing Structural Variation Outnumbers Single Nucleotide Polymorphism by Threefold in the Marine Teleost Chrysophrys Auratus ’. Molecular Ecology 28 ( 6 ): 1210 – 23 . doi: 10.1111/mec.15051 . OpenUrl CrossRef ↵ Cayuela , Hugo , Yann Dorant , Claire Mérot , et al. 2021 . ‘ Thermal Adaptation Rather than Demographic History Drives Genetic Structure Inferred by Copy Number Variants in a Marine Fish ’. Molecular Ecology 30 ( 7 ): 1624 – 41 . doi: 10.1111/mec.15835 . OpenUrl CrossRef ↵ Cheeseman , Ian H. , Becky Miller , John C. Tan , et al. 2016 . ‘ Population Structure Shapes Copy Number Variation in Malaria Parasites ’. Molecular Biology and Evolution 33 ( 3 ): 603 – 20 . OpenUrl CrossRef PubMed ↵ Chen , Jun , Thomas Källman , Xiaofei Ma , et al. 2012 . ‘ Disentangling the Roles of History and Local Selection in Shaping Clinal Variation of Allele Frequencies and Gene Expression in Norway Spruce (Picea Abies) ’. Genetics 191 ( 3 ): 865 – 81 . OpenUrl Abstract / FREE Full Text ↵ Chen , Jun , Lili Li , Pascal Milesi , et al. 2019 . ‘ Genomic Data Provide New Insights on the Demographic History and the Extent of Recent Material Transfers in Norway Spruce ’. Evolutionary Applications 12 ( 8 ): 1539 – 51 . doi: 10.1111/eva.12801 . OpenUrl CrossRef PubMed ↵ Chen , Jun , Yoshiaki Tsuda , Michael Stocks , et al. 2014 . ‘ Clinal Variation at Phenology-Related Genes in Spruce: Parallel Evolution in FTL2 and Gigantea? ’ Genetics 197 ( 3 ): 1025 – 38 . OpenUrl Abstract / FREE Full Text ↵ Chen , Zhi Qiang , Yanjun Zan , Pascal Milesi , et al. 2021 . ‘ Leveraging Breeding Programs and Genomic Data in Norway Spruce (Picea Abies L. Karst) for GWAS Analysis ’. Genome Biology 22 ( 1 ). doi: 10.1186/S13059-021-02392-1 . OpenUrl CrossRef PubMed ↵ Chiang , Colby , Alexandra J. Scott , Joe R. Davis , et al. 2017 . ‘ The Impact of Structural Variation on Human Gene Expression ’. Nature Genetics 49 ( 5 ): 692 – 99 . OpenUrl CrossRef PubMed ↵ Conrad , Donald F. , and Matthew E. Hurles . 2007 . ‘ The Population Genetics of Structural Variation ’. Nature Genetics 39 ( 7 ): S30 – 36 . doi: 10.1038/ng2042 . OpenUrl CrossRef PubMed ↵ Danecek , Petr , Adam Auton , Goncalo Abecasis , et al. 2011 . ‘ The Variant Call Format and VCFtools ’. Bioinformatics 27 ( 15 ): 2156 – 58 . doi: 10.1093/bioinformatics/btr330 . OpenUrl CrossRef PubMed Web of Science ↵ Denver , Dee R. , Peter C. Dolan , Larry J. Wilhelm , et al. 2009 . ‘ A Genome-Wide View of Caenorhabditis Elegans Base-Substitution Mutation Processes ’. Proceedings of the National Academy of Sciences 106 ( 38 ): 16310 – 14 . doi: 10.1073/pnas.0904895106 . OpenUrl Abstract / FREE Full Text ↵ Dorant , Yann , Hugo Cayuela , Kyle Wellband , et al. 2020 . ‘ Copy Number Variants Outperform SNPs to Reveal Genotype–Temperature Association in a Marine Species ’. Molecular Ecology, ahead of print . doi: 10.1111/mec.15565 . OpenUrl CrossRef ↵ Finnegan , Michael , Jeanne Hamet , Erick Desmarais , and Stéphanie Bedhomme . 2023 . ‘ Following the Dynamics of Structural Variants in Experimentally Evolved Populations ’. Journal of Visualized Experiments (JoVE ) , no. 192 : e64709 . ↵ Fromer , Menachem , Jennifer L. Moran , Kimberly Chambert , et al. 2012 . ‘ Discovery and Statistical Genotyping of Copy-Number Variation from Whole-Exome Sequencing Depth ’. The American Journal of Human Genetics 91 ( 4 ): 597 – 607 . doi: 10.1016/j.ajhg.2012.08.005 . OpenUrl CrossRef PubMed ↵ Galinsky , Kevin J. , Gaurav Bhatia , Po Ru Loh , et al. 2016 . ‘ Fast Principal-Component Analysis Reveals Convergent Evolution of ADH1B in Europe and East Asia ’. American Journal of Human Genetics , ahead of print. doi: 10.1016/j.ajhg.2015.12.022 . OpenUrl CrossRef PubMed ↵ García , C. , and G. Escribano-Ávila . 2016 . ‘ An Optimised Protocol to Isolate High-Quality Genomic DNA from Seed Tissues Streamlines the Workflow to Obtain Direct Estimates of Seed Dispersal Distances in Gymnosperms ’. Journal of Plant Research 129 ( 3 ): 559 – 63 . doi: 10.1007/s10265-016-0806-3 . OpenUrl CrossRef PubMed ↵ Gautier , Mathieu . 2015 . ‘ Genome-Wide Scan for Adaptive Divergence and Association with Population-Specific Covariates ’. Genetics 201 ( 4 ): 1555 – 79 . doi: 10.1534/genetics.115.181453 . OpenUrl Abstract / FREE Full Text ↵ Guillemaud , Thomas , Michel Raymond , Anastasia Tsagkarakou , Clotilde Bernard , Pierrick Rochard , and Nicole Pasteur . 1999 . ‘ Quantitative Variation and Selection of Esterase Gene Amplification in Culex Pipiens ’. Heredity 83 ( 1 ): 87 – 99 . doi: 10.1038/sj.hdy.6885370 . OpenUrl CrossRef PubMed Web of Science ↵ Günther , Torsten , and Graham Coop . 2013 . ‘ Robust Identification of Local Adaptation from Allele Frequencies ’. Genetics 195 ( 1 ): 205 – 20 . doi: 10.1534/genetics.113.152462 . OpenUrl Abstract / FREE Full Text ↵ Hardigan , Michael A. , Emily Crisovan , John P. Hamilton , et al. 2016 . ‘ Genome Reduction Uncovers a Large Dispensable Genome and Adaptive Role for Copy Number Variation in Asexually Propagated Solanum Tuberosum ’. The Plant Cell 28 ( 2 ): 388 – 405 . OpenUrl Abstract / FREE Full Text Harris , Laura , Ellen M. McDonagh , Xiaolei Zhang , et al. 2024 . ‘Genome-Wide Association Testing beyond SNPs ’. Nature Reviews Genetics , October 7 , 1 – 15 . doi: 10.1038/s41576-024-00778-y . OpenUrl CrossRef ↵ Hung , Tin Hang , Ernest T. Y. Wu , Pauls Zeltiņš , et al. 2024 . ‘ Long-Insert Sequence Capture Detects High Copy Numbers in a Defence-Related Beta-Glucosidase Gene Βglu-1 with Large Variations in White Spruce but Not Norway Spruce ’. BMC Genomics 25 ( 1 ): 118 . doi: 10.1186/s12864-024-09978-6 . OpenUrl CrossRef PubMed ↵ Igolkina , Anna A. , Sebastian Vorbrugg , Fernando A. Rabanal , et al. 2024 . ‘Towards an Unbiased Characterization of Genetic Polymorphism’. Preprint, bioRxiv , May 30 . doi: 10.1101/2024.05.30.596703 . OpenUrl Abstract / FREE Full Text ↵ Jang , Min-Jeong , Hye Jeong Cho , Young-Soo Park , et al. 2024 . ‘ Haplotype-Resolved Genome Assembly and Resequencing Analysis Provide Insights into Genome Evolution and Allelic Imbalance in Pinus Densiflora ’. Nature Genetics , 1 – 11 . ↵ Jeffreys , Harold . 1998 . The Theory of Probability . OUP Oxford . ↵ Jing , Yanping , Nuo Shen , Xiaojiang Zheng , et al. 2020 . ‘ Danger-Associated Peptide Regulates Root Immune Responses and Root Growth by Affecting ROS Formation in Arabidopsis ’. International Journal of Molecular Sciences 21 . doi: 10.3390/ijms21134590 . OpenUrl CrossRef PubMed ↵ Johnson , Mark , Irena Zaretskaya , Yan Raytselis , Yuri Merezhuk , Scott McGinnis , and Thomas L. Madden . 2008 . ‘ NCBI BLAST: A Better Web Interface ’. Nucleic Acids Research 36 (suppl_ 2 ): W5 – 9 . OpenUrl CrossRef PubMed Web of Science ↵ Kang , Minghui , Haolin Wu , Huanhuan Liu , et al. 2023 . ‘ The Pan-Genome and Local Adaptation of Arabidopsis Thaliana ’. Nature Communications 14 ( 1 ): 6259 . OpenUrl PubMed ↵ Karunarathne , Piyal , Qiujie Zhou , Martin Lascoux , and Pascal Milesi . 2024 . ‘ Hybridization Mediated Range Expansion and Climate Change Resilience in Two Keystone Tree Species of Boreal Forests ’. Global Change Biology 30 ( 4 ): e17262 . doi: 10.1111/gcb.17262 . OpenUrl CrossRef PubMed ↵ Karunarathne , Piyal , Qiujie Zhou , Klaus Schliep , and Pascal Milesi . 2023 . ‘ A Comprehensive Framework for Detecting Copy Number Variants from Single Nucleotide Polymorphism Data: “rCNV”, a Versatile R Package for Paralogue and CNV Detection ’. Molecular Ecology Resources 23 ( 8 ): 1772 – 89 . doi: 10.1111/1755-0998.13843 . OpenUrl CrossRef PubMed ↵ Katju , Vaishali , and Ulfar Bergthorsson . 2013 . ‘ Copy-Number Changes in Evolution: Rates, Fitness Effects and Adaptive Significance ’. Frontiers in Genetics 4 : 273 . OpenUrl PubMed Kennington , W. Jason , Ary A . Hoffmann, and Linda Partridge . 2007 . ‘ Mapping Regions within Cosmopolitan Inversion In (3R) Payne Associated with Natural Variation in Body Size in Drosophila Melanogaster ’. Genetics 177 ( 1 ): 549 – 56 . OpenUrl Abstract / FREE Full Text ↵ Koch , Eva L. , Hernán E. Morales , Jenny Larsson , et al. 2021 . ‘ Genetic Variation for Adaptive Traits Is Associated with Polymorphic Inversions in Littorina Saxatilis ’. Evolution Letters 5 ( 3 ): 196 – 213 . OpenUrl PubMed ↵ Kondrashov , Fyodor A . 2012 . ‘ Gene Duplication as a Mechanism of Genomic Adaptation to a Changing Environment ’. Proceedings of the Royal Society B: Biological Sciences 279 ( 1749 ): 5048 – 57 . doi: 10.1098/rspb.2012.1108 . OpenUrl CrossRef PubMed ↵ Krumm , Niklas , Peter H. Sudmant , Arthur Ko , et al. 2012 . ‘ Copy Number Variation Detection and Genotyping from Exome Sequence Data ’. Genome Research 22 ( 8 ): 1525 – 32 . doi: 10.1101/gr.138115.112 . OpenUrl Abstract / FREE Full Text ↵ Kuo , Wen-Hsi , Sara J. Wright , Linda L. Small , and Kenneth M. Olsen . 2024 . ‘ De Novo Genome Assembly of White Clover (Trifolium Repens L.) Reveals the Role of Copy Number Variation in Rapid Environmental Adaptation ’. BMC Biology 22 ( 1 ): 165 . doi: 10.1186/s12915-024-01962-6 . OpenUrl CrossRef PubMed ↵ Labbé , Pierrick , Pascal Milesi , André Yébakima , Nicole Pasteur , Mylène Weill , and Thomas Lenormand . 2014 . ‘ GENE-Dosage Effects on Fitness in Recent Adaptive Duplications: Ace-1 in the Mosquito Culex Pipiens ’. Evolution; International Journal of Organic Evolution 68 ( 7 ): 2092 – 101 . OpenUrl CrossRef PubMed Web of Science ↵ Lagercrantz , Ulf , and Nils Ryman . 1990 . ‘ Genetic Structure of Norway Spruce (Picea Abies): Concordance of Morphological and Allozymic Variation ’. Evolution 44 ( 1 ): 38 – 53 . doi: 10.1111/j.1558-5646.1990.tb04278.x . OpenUrl CrossRef PubMed ↵ Langley , Charles H. , Kristian Stevens , Charis Cardeno , et al. 2012 . ‘ Genomic Variation in Natural Populations of Drosophila Melanogaster ’. Genetics 192 ( 2 ): 533 – 98 . OpenUrl Abstract / FREE Full Text ↵ Lecomte , Laurie , Mariann Árnyasi , Anne-Laure Ferchaud , et al. 2024 . ‘ Investigating Structural Variant, Indel and Single Nucleotide Polymorphism Differentiation between Locally Adapted Atlantic Salmon Populations ’. Evolutionary Applications 17 ( 3 ): e13653 . doi: 10.1111/eva.13653 . OpenUrl CrossRef PubMed ↵ Lelieveld , Stefan H. , Malte Spielmann , Stefan Mundlos , Joris A. Veltman , and Christian Gilissen . 2015 . ‘ Comparison of Exome and Genome Sequencing Technologies for the Complete Capture of Protein□Coding Regions ’. Human Mutation 36 ( 8 ): 815 – 22 . doi: 10.1002/humu.22813 . OpenUrl CrossRef PubMed ↵ Lenormand , Thomas , Thomas Guillemaud , Denis Bouguet , and Michel Raymond . 1998 . ‘ Appearance and Sweep of a Gene Duplication: Adaptive Response and Potential for a New Function in the Mosquito Culex Pipiens ’. Evolution-International Journal of Organic Evolution 52 ( 6 ): 1705 – 12 . OpenUrl ↵ Li , Heng , Bob Handsaker , Alec Wysoker , et al. 2009 . ‘ The Sequence Alignment/Map Format and SAMtools ’. Bioinformatics 25 ( 16 ): 2078 – 79 . doi: 10.1093/bioinformatics/btp352 . OpenUrl CrossRef PubMed Web of Science ↵ Li , Lili , Pascal Milesi , Mathieu Tiret , et al. 2022 . ‘ Teasing Apart the Joint Effect of Demography and Natural Selection in the Birth of a Contact Zone ’. New Phytologist 236 ( 5 ): 1976 – 87 . doi: 10.1111/nph.18480 . OpenUrl CrossRef PubMed ↵ Liepe , Katharina J. , Andreas Hamann , Pia Smets , Connor R. Fitzpatrick , and Sally N. Aitken . 2016 . ‘ Adaptation of Lodgepole Pine and Interior Spruce to Climate: Implications for Reforestation in a Warming World ’. Evolutionary Applications 9 ( 2 ): 409 – 19 . doi: 10.1111/eva.12345 . OpenUrl CrossRef PubMed ↵ Lind , Brandon M. , Mengmeng Lu , Dragana Obreht Vidakovic , et al. 2022 . ‘ Haploid, Diploid, and Pooled Exome Capture Recapitulate Features of Biology and Paralogy in Two Non-Model Tree Species ’. Molecular Ecology Resources 22 ( 1 ): 225 – 38 . doi: 10.1111/1755-0998.13474 . OpenUrl CrossRef PubMed ↵ Lindstedt , Freja , Qiujie Zhou , and Pascal Milesi . 2025 . ‘ When Numbers Matter: Rethinking the Role of Gene Duplication on Short Evolutionary Timescales ’. American Journal of Botany 112 ( 7 ): e70072 . doi: 10.1002/ajb2.70072 . OpenUrl CrossRef PubMed ↵ Lipinski , Kendra J. , James C. Farslow , Kelly A. Fitzpatrick , Michael Lynch , Vaishali Katju , and Ulfar Bergthorsson . 2011 . ‘ High Spontaneous Rate of Gene Duplication in Caenorhabditis Elegans ’. Current Biology 21 ( 4 ): 306 – 10 . OpenUrl CrossRef PubMed ↵ Luu , Keurcien , Eric Bazin , and Michael G. B. Blum . 2017 . ‘ Pcadapt: An R Package to Perform Genome Scans for Selection Based on Principal Component Analysis ’. Molecular Ecology Resources 17 ( 1 ): 67 – 77 . doi: 10.1111/1755-0998.12592 . OpenUrl CrossRef PubMed ↵ Maher , Brendan . 2008 . ‘ Personal Genomes: The Case of the Missing Heritability ’. Nature 456 ( 7218 ): 18 – 21 . doi: 10.1038/456018a . OpenUrl CrossRef PubMed Web of Science ↵ Mahmoud , Medhat , Nastassia Gobet , Diana Ivette Cruz-Dávalos , Ninon Mounier , Christophe Dessimoz , and Fritz J. Sedlazeck . 2019 . ‘ Structural Variant Calling: The Long and the Short of It ’. Genome Biology 20 ( 1 ): 246 . doi: 10.1186/s13059-019-1828-7 . OpenUrl CrossRef PubMed ↵ McCarroll , Steven A. , and David M. Altshuler . 2007 . ‘ Copy-Number Variation and Association Studies of Human Disease ’. Nature Genetics 39 ( Suppl 7 ): S37 – 42 . OpenUrl CrossRef PubMed ↵ Mérot , Claire , Rebekah A. Oomen , Anna Tigano , and Maren Wellenreuther . 2020 . ‘ A Roadmap for Understanding the Evolutionary Significance of Structural Genomic Variation ’. Trends in Ecology & Evolution 35 ( 7 ): 561 – 72 . OpenUrl PubMed ↵ Milesi , Pascal , Mats Berlin , Jun Chen , et al. 2019 . ‘ Assessing the Potential for Assisted Gene Flow Using Past Introduction of Norway Spruce in Southern Sweden: Local Adaptation and Genetic Basis of Quantitative Traits in Trees ’. Evolutionary Applications , ahead of print. doi: 10.1111/eva.12855 . OpenUrl CrossRef PubMed ↵ Milesi , Pascal , Jean-Loup Claret , Sandra Unal , Mylène Weill , and Pierrick Labbé . 2022 . ‘ Evolutionary Trade-Offs Associated with Copy Number Variations in Resistance Alleles in Culex Pipiens Mosquitoes ’. Parasites & Vectors 15 ( 1 ): 484 . doi: 10.1186/s13071-022-05599-8 . OpenUrl CrossRef PubMed ↵ Milesi , Pascal , Chedly Kastally , Benjamin Dauphin , et al. 2024 . ‘ Resilience of Genetic Diversity in Forest Trees over the Quaternary ’. Nature Communications 15 ( 1 ): 8538 . doi: 10.1038/s41467-024-52612-y . OpenUrl CrossRef PubMed ↵ Milesi , Pascal , Mylène Weill , Thomas Lenormand , and Pierrick Labbe . 2017 . ‘ Heterogeneous Gene Duplications Can Be Adaptive Because They Permanently Associate Overdominant Alleles ’. Evolution Letters 1 ( 3 ): 169 – 80 . OpenUrl PubMed ↵ Nei , M. , and F. Tajima . 1981 . ‘ DNA Polymorphism Detectable by Restriction Endonucleases .’ Genetics 97 ( 1 ): 145 – 63 . doi: 10.1093/genetics/97.1.145 . OpenUrl Abstract / FREE Full Text ↵ Neves , Leandro Gomide . 2013 . Exome Sequencing for High-Throughput Genomic Analysis of Trees . University of Florida . https://search.proquest.com/openview/a4797d8ae4decbcfa6bc98dd2c3873ca/1?pq-origsite=gscholar&cbl=18750 . ↵ Nilsson , Ove , Teitur Ahlgren Kalman , Nicolas Delhomme , et al. 2025 . ‘1000 Conifer Genomes: Genome Innovation, Organisation and Diversity ’. Preprint, Research Square , May 26 . doi: 10.21203/rs.3.rs-6502828/v1 . OpenUrl CrossRef ↵ Niu , Shihui , Jiang Li , Wenhao Bo , et al. 2022 . ‘ The Chinese Pine Genome and Methylome Unveil Key Features of Conifer Evolution ’. Cell 185 ( 1 ): 204 – 217 .e14. doi: 10.1016/j.cell.2021.12.006 . OpenUrl CrossRef PubMed ↵ Nystedt , Björn , Nathaniel R. Street , Anna Wetterbom , et al. 2013 . ‘ The Norway Spruce Genome Sequence and Conifer Genome Evolution ’. Nature 497 ( 7451 ): 579 – 84 . doi: 10.1038/nature12211 . OpenUrl CrossRef PubMed Web of Science ↵ Padgham , Mark . 2021 . ‘ Geodist: Fast, Dependency-Free Geodesic Distance Calculations’ . R Package Version 0.0.7 , https://github.com/hypertidy/geodist . ↵ Pan , Deng , and Liqing Zhang . 2007 . ‘ Quantifying the Major Mechanisms of Recent Gene Duplications in the Human and Mouse Genomes: A Novel Strategy to Estimate Gene Duplication Rates ’. Genome Biology 8 ( 8 ): R158 . doi: 10.1186/gb-2007-8-8-r158 . OpenUrl CrossRef PubMed ↵ Panchy , Nicholas , Melissa Lehti-Shiu , and Shin-Han Shiu . 2016 . ‘ Evolution of Gene Duplication in Plants ’. Plant Physiology 171 ( 4 ): 2294 – 316 . doi: 10.1104/pp.16.00523 . OpenUrl Abstract / FREE Full Text ↵ Peona , Valentina , Mozes P. K. Blom , Carolina Frankl-Vilches , et al. 2022 . ‘The Hidden Structural Variability in Avian Genomes’. Preprint, bioRxiv , January 2 . doi: 10.1101/2021.12.31.473444 . OpenUrl Abstract / FREE Full Text ↵ Pertea , Mihaela , and Geo Pertea . 2020 . ‘ GFF Utilities: GffRead and GffCompare ’. F1000Research 9 : 304 . OpenUrl ↵ Pinosio , Sara , Stefania Giacomello , Patricia Faivre-Rampant , et al. 2016 . ‘ Characterization of the Poplar Pan-Genome by Genome-Wide Identification of Structural Variation ’. Molecular Biology and Evolution 33 ( 10 ): 2706 – 19 . OpenUrl CrossRef PubMed ↵ Prunier , Julien , Sébastien Caron , Manuel Lamothe , et al. 2017 . ‘ Gene Copy Number Variations in Adaptive Evolution: The Genomic Distribution of Gene Copy Number Variations Revealed by Genetic Mapping and Their Adaptive Role in an Undomesticated Species, White Spruce (Picea Glauca) ’. Molecular Ecology 26 ( 21 ): 5989 – 6001 . doi: 10.1111/mec.14337 . OpenUrl CrossRef Prunier , Julien , Sébastien Caron , and John MacKay . 2017 . ‘ CNVs into the Wild: Screening the Genomes of Conifer Trees (Picea Spp.) Reveals Fewer Gene Copy Number Variations in Hybrids and Links to Adaptation ’. BMC Genomics 18 ( 1 ): 1 – 12 . doi: 10.1186/S12864-016-3458-8/FIGURES/4 . OpenUrl CrossRef PubMed ↵ Prunier , Julien , Isabelle Giguère , Natalie Ryan , et al. 2019 . ‘ Gene Copy Number Variations Involved in Balsam Poplar (Populus Balsamifera L.) Adaptive Variations ’. Molecular Ecology 28 ( 6 ): 1476 – 90 . doi: 10.1111/mec.14836 . OpenUrl CrossRef ↵ Purcell , Shaun , Benjamin Neale , Kathe Todd-Brown , et al. 2007 . ‘ PLINK: A Tool Set for Whole-Genome Association and Population-Based Linkage Analyses ’. The American Journal of Human Genetics 81 ( 3 ): 559 – 75 . doi: 10.1086/519795 . OpenUrl CrossRef PubMed ↵ Redon , Richard , Shumpei Ishikawa , Karen R. Fitch , et al. 2006 . ‘ Global Variation in Copy Number in the Human Genome ’. Nature 444 ( 7118 ): 444 – 54 . doi: 10.1038/nature05329 . OpenUrl CrossRef PubMed Web of Science ↵ Sahli , Atef . 2017 . Copy Number Variations in the Gene Space of Picea Glauca . https://corpus.ulaval.ca/server/api/core/bitstreams/1b49dd51-9b56-442a-9f0f-ad3311d02006/content . ↵ Sayols , Sergi . 2023 . ‘ Rrvgo: A Bioconductor Package for Interpreting Lists of Gene Ontology Terms ’. microPublication Biology 2023 : doi: 10.17912/micropub.biology.000811 . 10.17912/micropub.biology.000811. OpenUrl CrossRef ↵ Schiessl , Sarah , Bruno Huettel , Diana Kuehn , Richard Reinhardt , and Rod J. Snowdon . 2017 . ‘ Targeted Deep Sequencing of Flowering Regulators in Brassica Napus Reveals Extensive Copy Number Variation ’. Scientific Data 4 ( 1 ): 170013 . doi: 10.1038/sdata.2017.13 . OpenUrl CrossRef PubMed ↵ Schrider , Daniel R , David Houle , Michael Lynch , and Matthew W Hahn . 2013 . ‘ Rates and Genomic Consequences of Spontaneous Mutational Events in Drosophila Melanogaster ’. Genetics 194 ( 4 ): 937 – 54 . doi: 10.1534/genetics.113.151670 . OpenUrl Abstract / FREE Full Text Schrider , Daniel R. , Fabio CP Navarro , Pedro AF Galante , et al. 2013 . ‘ Gene Copy-Number Polymorphism Caused by Retrotransposition in Humans ’. PLoS Genetics 9 ( 1 ): e1003242 . OpenUrl ↵ Scrucca , Luca , Michael Fop , T. Brendan Murphy , and Adrian E. Raftery . 2016 . ‘ Mclust 5: Clustering, Classification and Density Estimation Using Gaussian Finite Mixture Models ’. The R Journal 8 ( 1 ): 289 – 317 . OpenUrl PubMed ↵ Shao , Xin , Ning Lv , Jie Liao , et al. 2019 . ‘ Copy Number Variation Is Highly Correlated with Differential Gene Expression: A Pan-Cancer Study ’. BMC Medical Genetics 20 ( 1 ): 175 . doi: 10.1186/s12881-019-0909-5 . OpenUrl CrossRef PubMed ↵ Sjödin , Per , and Mattias Jakobsson . 2012 . ‘ Population Genetic Nature of Copy Number Variation ’. In Genomic Structural Variants , edited by Lars Feuk , vol. 838 . Methods in Molecular Biology. Springer New York . doi: 10.1007/978-1-61779-507-7_10 . OpenUrl CrossRef PubMed ↵ Spofford , Janice B . 1969 . ‘ Heterosis and the Evolution of Duplications ’. The American Naturalist 103 ( 932 ): 407 – 32 . doi: 10.1086/282611 . OpenUrl CrossRef Web of Science ↵ Stuart , Katarina C. , Richard J. Edwards , William B. Sherwin , and Lee A. Rollins . 2023 . ‘ Contrasting Patterns of Single Nucleotide Polymorphisms and Structural Variation across Multiple Invasions ’. Molecular Biology and Evolution , July 4, 2022.07.04.498653. doi: 10.1093/molbev/msad046 . OpenUrl CrossRef ↵ Stull , Gregory W. , Xiao-Jian Qu , Caroline Parins-Fukuchi , et al. 2021 . ‘ Gene Duplications and Phylogenomic Conflict Underlie Major Pulses of Phenotypic Evolution in Gymnosperms ’. Nature Plants 7 ( 8 ): 1015 – 25 . doi: 10.1038/s41477-021-00964-4 . OpenUrl CrossRef PubMed ↵ Tigano , Anna . 2020 . ‘ A Population Genomics Approach to Uncover the CNVs, and Their Evolutionary Significance, Hidden in Reduced-Representation Sequencing Data Sets ’. Molecular Ecology 29 ( 24 ): 4749 – 53 . OpenUrl ↵ Tiret , Mathieu , Lars Olsson , Thomas Grahn , et al. 2023 . ‘ Divergent Selection Predating the Last Glacial Maximum Mainly Acted on Macro□phenotypes in Norway Spruce ’. Evolutionary Applications 16 ( 1 ): 163 – 72 . doi: 10.1111/eva.13519 . OpenUrl CrossRef PubMed ↵ Tsuda , Yoshiaki , Jun Chen , Michael Stocks , et al. 2016 . ‘The Extent and Meaning of Hybridization and Introgression between Siberian Spruce (Picea Obovata) and Norway Spruce (Picea Abies): Cryptic Refugia as Stepping Stones to the West?’ Molecular Ecology 25 ( 12 ): 2773 – 89 . doi: 10.1111/mec.13654 . OpenUrl CrossRef ↵ Valdes , Ana Maria , Montgomery Slatkin , and Nelson B. Freimer . 1993 . ‘ Allele Frequencies at Microsatellite Loci: The Stepwise Mutation Model Revisited .’ Genetics 133 ( 3 ): 737 – 49 . OpenUrl Abstract / FREE Full Text ↵ Vidalis , Amaryllis , Douglas Scofield , Leandro Neves , Carolina Bernhardsson , María Rosario García-Gil , and Pär Ingvarsson . 2018 . ‘ Design and Evaluation of a Large Sequence-Capture Probe Set and Associated SNPs for Diploid and Haploid Samples of Norway Spruce (Picea Abies) ’. bioRxiv , 291716 . doi: 10.1101/291716 . OpenUrl Abstract / FREE Full Text ↵ Wang , Ruolin , Yu Wang , Dandan He , et al. 2024 . ‘ Responses of Plant Immune System and Rhizosphere Soil Microbiome to the Elicitor BAR11 in Arabidopsis Thaliana ’. Science of The Total Environment 914 ( March ): 169920 . doi: 10.1016/j.scitotenv.2024.169920 . OpenUrl CrossRef PubMed ↵ Warren , René L. , Christopher I. Keeling , Macaire Man Saint Yuen , et al. 2015 . ‘ Improved White Spruce (Picea Glauca) Genome Assemblies and Annotation of Large Gene Families of Conifer Terpenoid and Phenolic Defense Metabolism’ . The Plant Journal 83 ( 2 ): 189 – 212 . doi: 10.1111/tpj.12886 . OpenUrl CrossRef PubMed ↵ Wei , Kai , Remco Stam , Aurélien Tellier , and Gustavo A. Silva-Arias . 2023 . ‘Copy Number Variations Shape Genomic Structural Diversity Underpinning Ecological Adaptation in the Wild Tomato Solanum Chilense’. Preprint, bioRxiv , July 25 . doi: 10.1101/2023.07.21.549819 . OpenUrl Abstract / FREE Full Text ↵ Weill , Mylène , C. Berticat , Michel Raymond , and Christine Chevillon . 2000 . ‘ Quantitative Polymerase Chain Reaction to Estimate the Number of Amplified Esterase Genes in Insecticide-Resistant Mosquitoes ’. Analytical Biochemistry 285 ( 2 ): 267 – 70 . OpenUrl CrossRef PubMed Web of Science ↵ Weir , B. S. , and C. C. Cockerham . 1984 . ‘ Estimating F-Statistics for the Analysis of Population Structure .’ Evolution 38 ( 6 ): 1358 – 70 . doi: 10.1111/j.1558-5646.1984.tb05657.x . OpenUrl CrossRef PubMed Web of Science ↵ Weissensteiner , Matthias H. , Ignas Bunikis , Ana Catalán , et al. 2020 . ‘ Discovery and Population Genomics of Structural Variation in a Songbird Genus ’. Nature Communications 11 ( 1 ): 3403 . OpenUrl PubMed ↵ Westram , Anja M. , Rui Faria , Kerstin Johannesson , and Roger Butlin . 2021 . ‘ Using Replicate Hybrid Zones to Understand the Genomic Basis of Adaptive Divergence ’. Molecular Ecology 30 ( 15 ): 3797 – 814 . doi: 10.1111/mec.15861 . OpenUrl CrossRef PubMed ↵ Westram , Anja M. , Hernan E. Morales , Kerstin Johannesson , Roger Butlin , and Rui Faria . 2023 . ‘ Understanding the Adaptive Role of Chromosomal Inversions across Large Geographical Scales: The Potential of Pool-Seq Data ’. BioRxiv , 2023 – 08 . ↵ Wu , Tianzhi , Erqiang Hu , Shuangbin Xu , et al. 2021 . ‘ clusterProfiler 4.0: A Universal Enrichment Tool for Interpreting Omics Data ’. The Innovation 2 ( 3 ). doi: 10.1016/j.xinn.2021.100141 . OpenUrl CrossRef PubMed ↵ Xu , Lingyang , Yali Hou , Derek M. Bickhart , et al. 2016 . ‘ Population-Genetic Properties of Differentiated Copy Number Variations in Cattle ’. Scientific Reports 6 ( 1 ): 23161 . OpenUrl PubMed ↵ Yan , Stephanie M. , Rachel M. Sherman , Dylan J. Taylor , et al. 2021 . ‘ Local Adaptation and Archaic Introgression Shape Global Diversity at Human Structural Variant Loci ’. Elife 10 : e67615 . OpenUrl CrossRef PubMed ↵ Zhou , Qiujie , Piyal Karunarathne , Lili Andersson-Li , et al. 2024 . ‘ Recurrent Hybridization and Gene Flow Shaped Norway and Siberian Spruce Evolutionary History over Multiple Glacial Cycles ’. Molecular Ecology 33 ( 17 ): e17495 . doi: 10.1111/mec.17495 . OpenUrl CrossRef ↵ Zhou , Yao , Zhiyang Zhang , Zhigui Bao , et al. 2022 . ‘ Graph Pangenome Captures Missing Heritability and Empowers Tomato Breeding ’. Nature 606 ( 7914 ): 527 – 34 . doi: 10.1038/s41586-022-04808-9 . OpenUrl CrossRef PubMed ↵ Zhou , Yongfeng , Andrea Minio , Mélanie Massonnet , et al. 2019 . ‘ The Population Genetics of Structural Variants in Grapevine Domestication ’. Nature Plants 5 ( 9 ): 965 – 79 . doi: 10.1038/s41477-019-0507-8 . OpenUrl CrossRef PubMed ↵ Zmienko , Agnieszka , Anna Samelak-Czajka , Piotr Kozlowski , Maja Szymanska , and Marek Figlerowicz . 2016 . ‘ Arabidopsis Thaliana Population Analysis Reveals High Plasticity of the Genomic Region Spanning MSH2, AT3G18530 and AT3G18535 Genes and Provides Evidence for NAHR-Driven Recurrent CNV Events Occurring in This Location ’. BMC Genomics 17 ( 1 ): 893 . doi: 10.1186/s12864-016-3221-1 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted September 16, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Gene copy number variation (gCNV) contributes to adaptation along environmental gradient Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Gene copy number variation (gCNV) contributes to adaptation along environmental gradient Qiujie Zhou , Martin Lascoux , Pascal Milesi bioRxiv 2025.09.12.675866; doi: https://doi.org/10.1101/2025.09.12.675866 Share This Article: Copy Citation Tools Gene copy number variation (gCNV) contributes to adaptation along environmental gradient Qiujie Zhou , Martin Lascoux , Pascal Milesi bioRxiv 2025.09.12.675866; doi: https://doi.org/10.1101/2025.09.12.675866 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Evolutionary Biology Subject Areas All Articles Animal Behavior and Cognition (7618) Biochemistry (17637) Bioengineering (13864) Bioinformatics (41853) Biophysics (21403) Cancer Biology (18540) Cell Biology (25429) Clinical Trials (138) Developmental Biology (13356) Ecology (19862) Epidemiology (2067) Evolutionary Biology (24287) Genetics (15585) Genomics (22464) Immunology (17701) Microbiology (40300) Molecular Biology (17142) Neuroscience (88440) Paleontology (666) Pathology (2825) Pharmacology and Toxicology (4814) Physiology (7633) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4285) Systems Biology (9809) Zoology (2268)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00