Full text
88,511 characters
· extracted from
preprint-html
· click to expand
Expanding CIRdb, a comprehensive catalog of whole-exome sequencing data of Canary Islanders | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Expanding CIRdb, a comprehensive catalog of whole-exome sequencing data of Canary Islanders View ORCID Profile Ana Díaz-de Usera , View ORCID Profile Luis A. Rubio-Rodríguez , View ORCID Profile Adrián Muñoz-Barrera , View ORCID Profile Jose M. Lorenzo-Salazar , View ORCID Profile Beatriz Guillen-Guio , View ORCID Profile David Jáspez , Almudena Corrales , View ORCID Profile Itahisa Marcelino-Rodríguez , View ORCID Profile María Del Cristo Rodríguez-Pérez , View ORCID Profile Antonio Cabrera-de León , View ORCID Profile Rafaela González-Montelongo , View ORCID Profile Raquel Cruz-Guerrero , View ORCID Profile Ángel Carracedo , View ORCID Profile Carlos Flores doi: https://doi.org/10.1101/2025.11.24.25340885 Ana Díaz-de Usera 1 Genomics Division, Instituto Tecnológico y de Energías Renovables (ITER) , Santa Cruz de Tenerife, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ana Díaz-de Usera Luis A. Rubio-Rodríguez 1 Genomics Division, Instituto Tecnológico y de Energías Renovables (ITER) , Santa Cruz de Tenerife, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Luis A. Rubio-Rodríguez Adrián Muñoz-Barrera 1 Genomics Division, Instituto Tecnológico y de Energías Renovables (ITER) , Santa Cruz de Tenerife, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Adrián Muñoz-Barrera Jose M. Lorenzo-Salazar 1 Genomics Division, Instituto Tecnológico y de Energías Renovables (ITER) , Santa Cruz de Tenerife, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jose M. Lorenzo-Salazar Beatriz Guillen-Guio 2 Division of Public Health and Epidemiology, School of Medical Sciences, University of Leicester , Leicester, UK 3 NIHR Leicester Biomedical Research Centre , Leicester, UK 4 CIBER de Enfermedades Respiratorias (CIBERES), Instituto de Salud Carlos III , Madrid, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Beatriz Guillen-Guio David Jáspez 1 Genomics Division, Instituto Tecnológico y de Energías Renovables (ITER) , Santa Cruz de Tenerife, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for David Jáspez Almudena Corrales 4 CIBER de Enfermedades Respiratorias (CIBERES), Instituto de Salud Carlos III , Madrid, Spain 5 Research Unit, Hospital Universitario Nuestra Señora de Candelaria, Instituto de Investigación Sanitaria de Canarias (IISC) , Santa Cruz de Tenerife, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site Itahisa Marcelino-Rodríguez 6 Área de Medicina Preventiva y Salud Pública, Universidad de La Laguna , Santa Cruz de Tenerife, Spain 7 Instituto de Tecnologías Biomédicas, Universidad de La Laguna , San Cristóbal de La Laguna, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Itahisa Marcelino-Rodríguez María Del Cristo Rodríguez-Pérez 5 Research Unit, Hospital Universitario Nuestra Señora de Candelaria, Instituto de Investigación Sanitaria de Canarias (IISC) , Santa Cruz de Tenerife, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for María Del Cristo Rodríguez-Pérez Antonio Cabrera-de León 5 Research Unit, Hospital Universitario Nuestra Señora de Candelaria, Instituto de Investigación Sanitaria de Canarias (IISC) , Santa Cruz de Tenerife, Spain 6 Área de Medicina Preventiva y Salud Pública, Universidad de La Laguna , Santa Cruz de Tenerife, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Antonio Cabrera-de León Rafaela González-Montelongo 1 Genomics Division, Instituto Tecnológico y de Energías Renovables (ITER) , Santa Cruz de Tenerife, Spain 7 Instituto de Tecnologías Biomédicas, Universidad de La Laguna , San Cristóbal de La Laguna, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rafaela González-Montelongo Raquel Cruz-Guerrero 8 Centro de Investigación Biomédica en Red de Enfermedades Raras (CIBERER), Instituto de Salud Carlos III , Madrid, Spain 9 Fundación Pública Galega de Medicina Xenómica, Sistema Galego de Saúde (SERGAS) , Santiago de Compostela, Spain 10 Instituto de Investigación Sanitaria de Santiago (IDIS) , Santiago de Compostela, Spain 11 Centro Singular de Investigación en Medicina Molecular y Enfermedades Crónicas (CIMUS), Universidade de Santiago de Compostela , Santiago de Compostela, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Raquel Cruz-Guerrero Ángel Carracedo 8 Centro de Investigación Biomédica en Red de Enfermedades Raras (CIBERER), Instituto de Salud Carlos III , Madrid, Spain 9 Fundación Pública Galega de Medicina Xenómica, Sistema Galego de Saúde (SERGAS) , Santiago de Compostela, Spain 10 Instituto de Investigación Sanitaria de Santiago (IDIS) , Santiago de Compostela, Spain 11 Centro Singular de Investigación en Medicina Molecular y Enfermedades Crónicas (CIMUS), Universidade de Santiago de Compostela , Santiago de Compostela, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ángel Carracedo Carlos Flores 1 Genomics Division, Instituto Tecnológico y de Energías Renovables (ITER) , Santa Cruz de Tenerife, Spain 4 CIBER de Enfermedades Respiratorias (CIBERES), Instituto de Salud Carlos III , Madrid, Spain 5 Research Unit, Hospital Universitario Nuestra Señora de Candelaria, Instituto de Investigación Sanitaria de Canarias (IISC) , Santa Cruz de Tenerife, Spain 7 Instituto de Tecnologías Biomédicas, Universidad de La Laguna , San Cristóbal de La Laguna, Spain 12 Facultad de Ciencias de la Salud, Universidad Fernando Pessoa Canarias , Las Palmas de Gran Canaria, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Carlos Flores For correspondence: cflores.genomica{at}gmail.com Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Within the intricate European genetic diversity landscape, Canary Islanders exhibit a unique genetic admixture, comprising European (EUR), North African (NAF), and sub-Saharan African (SSA) ancestries. This study aimed to comprehensively characterize the full spectrum of small genetic variation among 920 unrelated donors from this population based on whole-exome sequencing data to further develop CIRdb as the Canary Islanders-specific reference catalog of genetic variation. We combined this with SNP array data and whole-genome sequencing for specific analyses, revealing a total of 387,555 variants, of which 15.1% were previously unreported. Notably, 74.4% of these variants were classified as rare (with frequency <0.5%), including up to 40% of singletons. We also identified and curated a set of 2,068 variants prioritized as putative pathogenic. Intriguingly, the novel pathogenic variants exhibited enrichment in respiratory, cardiovascular, and metabolic disorders. Genetic differentiation patterns clustered separately individuals from the smallest islands, providing fine-grained insights into within-archipelago differentiation. A scan of local genetic ancestry deviations across the genome revealed an EUR ancestry enrichment around the 17q21.31 inversion, widely recognized for positive selection and associated to pleiotropic effects across pulmonary, infectious, and immunological diseases. Our results also evidenced a selective sweep shared by Canary Islanders and the NAF population around Prune Exopolyphosphatase 1 gene, which is associated with body mass index, cardiovascular health, and metabolic traits. Taken together, CIRdb presents a valuable resource of exome-wide genetic variation in a population at the edge of Southwestern European genetic diversity. Introduction Democratization of next-generation sequencing has opened the possibility for international efforts aimed at deeply characterizing genetic variation in different human populations to improve biomedical research and clinical practice ( Karczewski et al. 2020 ; Mulder et al. 2018 ). The 1000 Genomes Project (1KGP) was among the first efforts to foster the discovery of genetic variation in diverse populations revealing that our genome differs in roughly 5-6 million positions from the human reference genome ( 1000 Genomes Project Consortium et al. 2015 ). Many studies have evidenced the biomedical consequences of ignoring the genetic background of populations ( Ghouse et al. 2018 ; Sirugo, Williams, and Tishkoff 2019 ; Wojcik et al. 2019 ). These and others have served to establish the grounds to increase the diversity in catalogs of genetic variation to optimally represent population-specific particularities and to support Personalized Medicine strategies across populations worldwide ( Kore et al. 2025 ). Many countries have developed their own genetic variation catalogs or have conducted extensive genetic studies in control individuals ( Nagasaki et al. 2015 ; UK10K Consortium et al. 2015 ; Jeroncic et al. 2016 ; Scott et al. 2016 ; Chheda et al. 2017 ; Kim et al. 2018 ; Fattahi et al. 2019 ; Apol et al. 2022 ) by combining variant information and population features ( Wong et al. 2013 ; Genome of the Netherlands Consortium 2014 ; Dopazo et al. 2016 ). In Spain, a pioneer study of 267 healthy individuals from Galicia and Andalusia highlighted the need to establish local population catalogs specifically in this country for an optimal understanding of genetic variation with clinical relevance ( Dopazo et al. 2016 ). The Canary Islands archipelago (Spain) is located ∼100 km from the nearest point to the northwest African coast, at the southwestern edge of Europe ( Figure 1 ). Originally, the Canary Islands were inhabited by aborigines whose most likely ancestral origin was in the Berber population from North Africa ( Hooton 1970 ; Arauna et al. 2017 ; Serrano et al. 2023 ). Notwithstanding, during its conquest by Europeans in the 15th century, the archipelago was subject to important events of admixture and displacement of aboriginals by European populations and imported slaves ( Maca-Meyer et al. 2005 ; Rodríguez-Varela et al. 2017 ; Fregel et al. 2019 ). As a result, the current Canary Islanders are a genetically admixed population of three main ancestries: European (EUR), North African (NAF), and sub-Saharan African (SSA). Mitogenomic studies in the current inhabitants have further unraveled fine-grained influences such as from Portuguese and Galicians ( García-Olivares et al. 2023 ). Furthermore, the geographic isolation of this population; the effects of sexual asymmetry, evidenced by the historically progressive decrease in male indigenous lineages ( Flores et al. 2003 ; Fregel et al. 2009 ); and multiple local adaptation events have finally modeled a particular genetic makeup where, on average, 75-83% has an EUR ancestry component, 17-23% has an NAF ancestry component, and 2% or less has an SSA ancestry component ( Pino-Yanes et al. 2011 ; Botigué et al. 2013 ). Individually, larger African ancestries were more recently recognized, with up to 38.2% NAF and 9.5% SSA ( Díaz-de Usera et al. 2022 ). However, although the Canary Islands populations (aboriginal, historical, and modern) have been subjected to several analyses during the last two decades, the studies have generally focused on a few genetic markers or small sample sizes. Download figure Open in new tab Figure 1. Geographical location of the Canary Islands. Based on this and the particular genetic makeup of the current Canary Islands populations, we developed the Canary Islanders reference database, termed CIRdb ( Díaz-de Usera et al. 2022 ), as a detailed sequence-based exome reference catalog of genetic variation to allow disentangling population specificities with biomedical impact. In previous studies of the CIRdb cohort, we have exposed the overall design and methods to build the catalog, including population analyses based on SNP arrays and mitogenomes ( Díaz-de Usera et al. 2022 ; García-Olivares et al. 2023 ). Here, we analyze whole-exome sequencing (WES) data of the cohort to improve the population-specific reference catalog, also involving SNP array data and whole-genome sequencing (WGS) data for specific purposes. This enabled laying the foundation of resources for Genomic Medicine programs in the region and to further deepen into the analysis of the admixture and putative signals of adaptation. Materials and Methods Sample selection, genotyping, and reference population datasets The study was approved by the Research Ethics Committee of the Hospital Universitario Nuestra Señora de Candelaria (CHUNSC_2020_95) and performed according to The Code of Ethics of the World Medical Association (Declaration of Helsinki). The CIRdb cohort was obtained from a total of 1,024 controls that were selected from donors (483 males, 541 females) from the largest general population cohort of the Canary Islands, the ‘CDC of the Canary Islands’ (Cardiovascular, Diabetes, and Cancer) ( Cabrera de León et al. 2008 ). Individuals who self-reported the absence of cardiovascular, metabolic, immunologic, or cancer diseases and had at least two generations of ancestors born on the same island were included in the present study. The island assigned to each individual corresponded to the island of origin of the four grandparents. However, owing to difficulties in sample collection, as was the case for Fuerteventura, this criterion was relaxed, allowing one of the four grandparents to be from a different island in the archipelago. Individuals whose grandparents were not from the same island were classified as No Island Assigned (NIA). These were considered for establishing the catalog, but not for the population, ancestry, or inbreeding analyses conducted in the study. Procedures for DNA extraction, SNP array genotyping, and quality control (QCs) were described previously ( Guillen-Guio et al. 2018 ; Díaz-de Usera et al. 2022 ). After QCs, 920 samples (439 males, 481 females) with paired WES and SNP array data were kept for further analyses: 106 from El Hierro, 99 from La Palma, 145 from La Gomera, 165 from Tenerife, 215 from Gran Canaria, 47 from Fuerteventura, 114 from Lanzarote, and 29 considered NIA. Reference WGS data from 1KGP Phase 3 ( 1000 Genomes Project Consortium et al. 2015 ) were included in specific analyses to contextualize the genetic composition of the Canary Islanders in the EUR population landscape, which encompassed data from Finnish (FIN) (N =99), British (GBR) (N =91), and Utah Residents with Northern and Western European ancestry (CEU) (N =99). In addition, data from the Iberian population in Spain (IBS) (N =107) was also considered for its genetic proximity to the Canary Islands population. For the African component, the SSA population was selected using the Yoruba population in Ibadan (Nigeria) (YRI) (N =108) in the 1KGP as a proxy. The NAF (N =32) was represented by data from individuals recruited at different locations north of the African continent ( Serradell et al. 2024 ). See the Supplementary Material for further details. A total of 325,756 variants obtained by WES and 507,607 SNP array variants, a subset of all variants passing the QCs and included in the catalog, were used for the analyses that needed intersection with the population reference datasets. The SNP array data were only used for the ancestry and the Runs of Homozygosity (ROHs) analyses. Whole-exome sequencing Nextera DNA Exome Library Prep with a 350 bp insert size and Illumina DNA Prep with Enrichment kits were used for WES library preparation ( Díaz-de Usera et al. 2022 ; García-Olivares et al. 2023 ), according to the manufacturer’s instructions (Illumina Inc., San Diego, CA, USA). Pools of up to 12 dual-indexed libraries at 2 nM loading concentration were sequenced on Illumina HiSeq 4000 and NovaSeq 6000 Sequencing Systems (Illumina Inc.) with 75 bp and 100 bp paired-end reads, respectively, following the vendor’s guidance. All experiments used 1% PhiX Control V3 (Illumina Inc.) following the manufacturer’s recommendations. Genomic data were processed at the TeideHPC Supercomputing facility ( https://teidehpc.iter.es/en/home/ ), following an in-house bioinformatic pipeline described elsewhere ( Tosco-Herrera et al. 2022 ), composed mainly of preprocessing, variant discovery, and QC steps. Additionally, GATK Best Practices ( O’Connor and van der Auwera 2020 ) for large cohorts were implemented in the joint variant calling stage using GATK HaplotypeCaller and the Genomic Variant Call Format (GVCF) mode. After variant calling, different filters were applied to refine the genetic variants included in the catalog (see Supplementary Material). Variant annotation Variant annotation was performed with ANNOVAR v18.04.16 ( Wang, Li, and Hakonarson 2010 ) and The Ensembl Variant Effect Predictor (VEP) v100 ( McLaren et al. 2016 ), using GRCh37/hg19 as the reference genome ( Church et al. 2011 ). The software, databases, and plugins used for variant annotation are listed in Supplementary Table S1. Pathogenicity was defined according to the InterVar tool and the C-score reported by Combined Annotation Dependent Depletion (CADD) ( Rentzsch et al. 2019 ). The C-score and the mutation significance cutoff (MSC) were used in combination so that the variant was cataloged as putative pathogenic when the C-score was higher than MSC (at 99% confidence). Statistical differences among the number of variants in the different analyses were assessed using the R v4.0.2 environment ( R Core Team 2022 ), based on one-way ANOVA and Duncan’s new multiple range tests ( p <0.05). In particular cases, a Fisher’s exact test with subsequent Bonferroni correction was also used for variant comparisons. Enrichment analyses were performed using the Genomic Regions Enrichment of Annotations Tool (GREAT) ( McLean et al. 2010 ), Enrichr ( Kuleshov et al. 2016 ), and GeneSCF ( Subhash and Kanduri 2016 ). The annotation of variants and genes with traits and diseases was performed using the GWAS Catalog ( Buniello et al. 2019 ), Open Targets Genetics ( Ghoussaini et al. 2021 ), Open Targets Platform ( Ochoa et al. 2021 ), GeneCards ( Safran et al. 2021 ), and OMIM r20210128 ( Amberger et al. 2009 ). Principal Component Analysis and Discriminant Analysis of Principal Components To adapt to the WES data context, some modifications (i.e., variant exclusions due to call rate <0.80) from the aforementioned QC pipeline were done using PLINK v1.9 ( Chang et al. 2015 ). Principal Component Analysis (PCA) was performed using PLINK v1.9, after excluding variants located in regions of long-range linkage disequilibrium (LD) and with high LD (window size=50, step size=5, r2=0.05), as previously described ( Guillen-Guio et al. 2018 ). For stratified analyses of genetic differentiation based on alternative allele frequency (AAF) range, variants were further sorted by allele frequency in common (AAF>0.05), low frequency (0.005≤ AAF ≤0.05), and rare (AAF<0.005). The same parameters (i.e., sample, variants, and AAF filters) were used for Discriminant Analysis of Principal Components (DAPC) ( Jombart, Devillard, and Balloux 2010 ). Since rare variants consisting of singletons and doubletons behaved as outliers, they were removed from the PCA and DAPC assessments. A nonparametric Mann-Whitney U-test was used to assess the differences between the first three principal components (PCs) adjusting for the number of comparisons. Genetic ancestry deviations and runs of homozygosity Ancestry inferences based on ELAI v1.01 ( Guan 2014 ) were previously obtained for the CIRdb individuals using SNP array data ( Díaz-de Usera et al. 2022 ). Ancestry analyses included WGS data from 423 individuals from the reference populations, which comprised all YRI, NAF, and EUR datasets except the IBS to avoid spurious effects on ancestry estimates due to the genetic proximity between populations ( Guillen-Guio et al. 2018 ; Díaz-de Usera et al. 2022 ). These estimates were based on a three-way admixture model of EUR, NAF, and SSA, assuming 14 generations since the last admixture event based on our previous observations ( Guillen-Guio et al. 2018 ). Local ancestry block sizes and the average number of ancestry-related blocks in a haploid Canary Islander genome were calculated after removing centromeres and considering the flanking blocks as different elements to avoid overestimation of the block lengths. Deviations in the local parental genomic ancestries were individually assessed for each ancestry using the method developed by Zhu et al. ( Zhu 2012 ). Loci with a Z-score >|3| were considered significantly deviating from the average ( Guillen-Guio et al. 2018 ). PLINK v1.9-based ROH estimates, obtained according to previous procedures ( Kirin et al. 2010 ; Guillen-Guio et al. 2018 ), were classified by the average length (0.5-1, 1-2, 2-4, 4-8, 8-16, and ≥16 Mb). We also considered a simplified version of the classification proposed by Pemberton and colleagues (<1.6Mb and ≥1.6Mb) ( Pemberton et al. 2012 ). In addition, the average total length of ROHs and the number of fragments in ROHs were studied per island using nonparametric Mann-Whitney U-tests and Bonferroni correction adjustments ( p <2.38×10 -3 to adjust for 21 comparisons). Selective sweep analyses around the exonic variant of Prune Exopolyphosphatase 1 (PRUNE1) gene As part of the DAPC, we first extracted variants contributing more to linear discriminant 1 (LD1) within the common frequency range. To evaluate the likely biomedical implications between ancestry and diseases, the AAF of these prioritized variants was analyzed using Fisher’s exact tests applying a Bonferroni correction to the comparisons between Canary Islanders, EUR, and NAF populations ( p <3.33×10 -5 to adjust for 1,501 comparisons). For those variants with statistical differences, the number of individuals was first normalized to match the sample size of the smallest group (i.e., NAF). Subsequently, Weir and Cockerham’s F ST ( Weir and Cockerham 1984 ) and Population Branch Statistics (PBS) ( Yi et al. 2010 ) estimates were obtained for each variant. Next, we used the software iSAFE v1.1.1 ( Akbari et al. 2018 ) to evaluate whether the variant with the highest F ST estimation value in the comparisons with EUR subpopulations and its flanking regions had evidence of selective sweep. An iSAFE scan was run in the region located one megabase (Mb) upstream and downstream from the variant of interest, considering the IgnoreGaps flag and the default MaxFreq value (0.95) (see Supplementary Material). For this analysis, the Homo sapiens (GRCh37) ancestral FASTA sequence was obtained from Ensembl release 75 ( http://ftp.ensembl.org/pub/release-75/fasta/ancestral_alleles/ ). Variants with the largest iSAFE scores were prioritized for functional annotation using the Variant to Gene (V2G) score ( Ghoussaini et al. 2021 ) ( https://genetics.opentargets.org/ ). Phenotype-Wide Association Study of the PRUNE1 exonic variant To perform a phenome-wide association study (PheWAS) analysis and given that previous studies support sufficient power for PheWAS of common variants with >200 cases ( Verma et al. 2018 ), we examined associations between the previously identified statistically significant SNP in PRUNE1 (rs3738476_C) and phenotypes based on the 9 th version of the International Classification of Diseases catalog (ICD-9 codes), using the PheWAS v0.99.6.1 R package. All the 920 individuals were mapped to 66 documented diseases in the ICD-9 codes that included at least 20 counts in the cohort and then converted into individual phenotype groups (‘phecodes’). Analyses included age and sex as covariates in all PheWAS models and statistical significance was set at p =7.6×10 -4 to correct for multiple comparisons. Results Exonic genetic variation in the Canary Islanders A total of 920 individuals and 387,555 WES variants were finally included in CIRdb. No statistical differences were found in the average number of variants identified per individual across islands (one-way ANOVA test, p =0.558) (Supplementary Table S2). Variants were classified into 357,846 single nucleotide variants (SNVs), 18,574 deletions, and 11,135 insertions with an average transition/transversion (Ti/Tv) ratio in autosomes of 2.90 ± 0.04 (SD). The distribution by AAF strata was: 47,493 common, 51,672 low frequency, and 288,390 rare variants. There were 149,851 singletons and 56,188 doubletons in the rare AAF stratum. Thus, 74.4% of the variation had AAF<0.005, and nearly 40% of them was private to individuals. There were statistical differences between islands in the low frequency and rare AAF strata (Supplementary Figure S1) (one-way ANOVA test, p <2×10 -16 for both comparisons; Duncan’s new multiple range tests, p <0.05). Heterozygosity analyses revealed that El Hierro had the highest number of homozygous variants among all the islands (one-way ANOVA test, p =1.95×10 -3 ). Out of the 120,273 variants found in El Hierro, 43,613 were homozygous for the alternative allele in at least one individual and 2,565 of those variants were found in homozygous state in at least 90% of individuals. We found statistical differences after Bonferroni correction in AAF for 16 variants between El Hierro and some of the other islands (Fisher’s exact test, p <1.95×10 -5 ) (Supplementary Table S3). All these variants were cataloged as non-pathogenic by the InterVar tool, CADD, and the inferred impact in the Ensembl VEP annotation. Data from Open Targets Genetics linked several of the 16 variants to autoimmune diseases, cardiovascular traits, hair color, and skin browning processes, among others (Supplementary Table S3). One of these variants is the missense variant rs2249265, which is functionally linked to PTGFRN and has been associated with type 2 diabetes (T2D) ( Vujkovic et al. 2020 ), showing statistical differences in AAF between El Hierro and La Gomera. Considering pathogenicity classifications (combining the InterVar tool classification, C-score, and MSC values) and high impact together, 2,068 variants were identified globally as putative pathogenic in CIRdb. These variants were enriched in genes involved in oncological diseases as well as in neurodegenerative and intellectual disability based on Jensen Disease Ontology. A similar pattern was observed in a stratified analysis by island (Supplementary Table S4). Overall, this evidenced enrichment in ciliopathies, neuropathologies, cardiovascular traits, and metabolic disorders. Sensory diseases, mainly related to ocular conditions such as retinitis pigmentosa, cataract and cone-rod or fundus dystrophy, were also significantly enriched ( Figure 2 ). Download figure Open in new tab Figure 2. Enrichment analysis on the 2,068 putative pathogenic variants identified in the CIRdb catalog. Annotations in Jensen Disease Ontology were evaluated. When the affected genomic region and the impact classification provided by Ensembl VEP were simultaneously considered, 74.5% of variants in exonic and intronic regions were within the four different impact categories (77.6% for high impact, 97.4% for moderate, 97.5% for low, and 74.9% for modifier) ( Figure 3 ). Splicing with high impact was found for 3,695 variants (17.8% of the total). Moreover, in the splicing regions, nearly one out of every two variants (i.e., 49.1%) were singletons. In line with the idea that variants of potentially higher functional importance are more likely to be singletons ( Ke, Taylor, and Cardon 2008 ), we found a higher proportion of singletons among the high-impact variants ( Figure 4 ). Further assessments of missense and loss-of-function variants (LoF) can be found in Supplementary Figure S2 and Supplementary Table S5. Download figure Open in new tab Figure 3. Distribution of whole-exome sequencing variants found in CIRdb based on genomic region and impact classification provided by Ensembl VEP. Colors represent the following impact categories: dark blue for modifier impact, medium blue for high impact, grey for moderate impact, and light blue for low impact. ncRNA, noncoding RNA. Download figure Open in new tab Figure 4. Proportion of singletons and non-singletons across different impact classifications found in CIRdb whole-exome sequencing data. Novel variants in the CIRdb catalog A total of 15.1% of WES variants (n=58,389) were categorized as novel since they were not present in dbSNP154, 1KGP, gnomAD exome v2.1.1 ( Karczewski et al. 2020 ), TOPMed v5 ( Taliun et al. 2021 ), or the HGDP-CEPH ( Bergström et al. 2020 ) projects. La Palma island had the highest average of novel variants per individual, followed by Gran Canaria, Tenerife, La Gomera, Lanzarote, El Hierro, and Fuerteventura. However, statistical differences were observed only when compared with the last two islands (Duncan’s new multiple range test, p <0.05). The novel variants were categorized by their AAF strata in 12 common, 1,311 with low frequency, 8,380 rare, 10,019 doubletons, and 38,667 singletons. Thus, 66% of the novel variants were also private to individuals. By impact, novel variants were enriched in the high impact class when compared to all variants identified ( Figure 5 ) and were distributed in 19.7% of high impact, 39.7% with moderate, 17.4% with low, and 23.1% classified as modifier. Additionally, 17.5% of the novel variants were also predicted to be LoF, and 1,242 (2.13%) of the novel variants were predicted as putatively pathogenic (Supplementary Table S6). Overall, these variants were in genes associated with cardiovascular, immunological, respiratory, allergic, and sensory diseases, among others, and some of the affected genes were linked to skin pigmentation and sunburn. Furthermore, various novel putatively pathogenic variants affected genes that have been associated with COVID-19 susceptibility and severity, such as TYK2 and RAVER1 ( COVID-19 Host Genetics Initiative 2023 ) (Supplementary Table S7). Download figure Open in new tab Figure 5. Distribution of total and novel variants based on impact classification found in CIRdb whole-exome sequencing data. Shared genetic variation between Canary Islanders and North Africans We found 10,758 WES variants exclusively shared between the Canary Islanders of CIRdb and the NAF WGS data (i.e., were private to both populations). We found no statistical differences in the number of these variants per island (one-way ANOVA test, p =0.456). However, La Palma showed the lowest number of the shared variants (622 ± [SD] 35) in comparison to all the other islands (one-way ANOVA test, p =0.002; Duncan’s new multiple range test, p <0.05). Fuerteventura, Lanzarote, and El Hierro showed the largest number of these private variants shared with NAF, followed by Tenerife and Gran Canaria. Their AAF classification was: 1,178 common, 1,985 low frequency, 3,442 rare, 1,506 doubletons, and 2,647 singletons. Most of them were classified as of moderate impact (41.3%) and none of these variants were predicted to be putatively pathogenic. Missense, synonymous, and intronic variants were the most abundant categories (Supplementary Table S8). The 50 variants with high impact and CADD>MSC similarly provided benign predictions or were variants of uncertain significance (VUS). Among these 50 variants, 13 showed statistical differences in AAF between the Canary Islands considered altogether and the NAF population after Bonferroni correction (Fisher’s exact test, p <0.001) (Supplementary Table S9). The AAF of five of these 13 variants was statistically different between some Canary Islands populations and NAF. Interestingly, we did not find AAF differences for any of the 13 variants between Fuerteventura (one of the closest islands to the northwest African coast) and NAF. Nine out of these 13 variants were exonic to GPRC6A , RP1L1 , OR4X2 , PKD1L2 , KRT38 , SIRPA , and DEFB126 . Interestingly, a genome-wide association study previously linked DEFB126 variation with coronary artery disease in type 1 diabetes ( Antikainen et al. 2021 ). Exome-wide genetic differentiation of the Canary Islands populations In the common frequency strata (AAF>0.05), the first two main PCs clustered the Canary Islanders into three different groups: the first encompassed individuals from La Gomera, the second included the individuals from El Hierro, and the third encompassed individuals from the other five islands (Mann-Whitney U-test, p <2.50×10 -16 for all comparisons) ( Figure 6A ). In the rare AAF strata (AAF<0.005), the third main PC evidenced the differentiation of individuals from Gran Canaria from the rest of the islands (Mann-Whitney U-test, p <2.20×10 -16 for the comparison of Gran Canaria against the rest of islands) ( Figure 6B ). When comparing against the reference populations, the Canary Islanders showed an intermediate position between the EUR and NAF populations (Mann-Whitney U-test, p <1×10 -22 for all comparisons in PC1) as observed elsewhere ( Guillen-Guio et al. 2018 ; Díaz-de Usera et al. 2022 ) (Supplementary Table S10). Tenerife and La Palma exhibited greater affinities with the EUR population, eminently with IBS, whereas La Gomera was the island clustering closer to NAF. Nevertheless, all pairwise comparisons among islands and EUR populations were significant ( p <1×10 -21 ). El Hierro, La Gomera, and NAF differentiated from the rest of populations for the first three PCs (Mann-Whitney U-test, p <6×10 -6 ), except for the comparison of La Gomera and NAF in PC2 (Supplementary Table S10). Download figure Open in new tab Figure 6. Representation of the first two ( A ) and the first three ( B ) principal components (PCs) of exome-wide genetic variation in Canary Islanders. A . Common variants (AAF >0.05) explaining 17.2% of variability. B . Rare variants (AAF <0.005), excluding singletons and doubletons, explaining 19.9% of variability. To maximize discrimination among groups, we then relied on DAPC with all WES variants, revealing El Hierro as a clear outlier when compared to the other Canary Islands ( Figure 7 ). El Hierro showed the largest number of variants with statistical differences in AAF among comparisons within Canary Islands in the LD1 (Fisher’s exact test after Bonferroni correction, p <2.86×10 -5 ), except in the rare AAF range. We identified 15 variants with statistically different AAF between El Hierro and five of the other islands. These 15 variants were in FLT4 , MALRD1 , C1orf159, PPME1 , C12orf42, LOC101929058, TMTC1 , ABHD17A , SLC6A3 , CEP85L , MRPL20-AS1 , ZBTB17 , RNF220 , CXCR4 , and GRIP2 . Intriguingly, some of these genes have been associated with cardiovascular traits: FLT4 associates with coronary heart disease ( Kulminski et al. 2018 ), ZBTB17 with dilated cardiomyopathy ( Tadros et al. 2021 ), MALRD1 with coronary artery disease in Type 1 diabetes ( Charmet et al. 2018 ), and RNF220 with heart rate ( Eppinga et al. 2016 ; Verweij, van de Vegte, and van der Harst 2018 ). Additionally, MALRD1 , TMTC1 , CEP85L , and RNF220 have been involved in diverse respiratory conditions ( Lutz et al. 2015 ; Forno et al. 2017 ; Tian et al. 2017 ; Sakornsakolpat et al. 2019 ). In the rare AAF stratum, individuals from Gran Canaria clustered out when compared with the other populations (Supplementary Figure S3A). Within the Canary Islands, El Hierro and Gran Canaria exhibited two separate clusters (Supplementary Figure S3B). Download figure Open in new tab Figure 7. Discriminant Analysis of Principal Components (DAPC) for all variants. ( A ) Analysi including both Canary Islanders and reference populations (excluding outlier populations: SSA and FIN); and ( B ) Analysis only the Canary Islanders. LD1, horizontal axis; LD2, vertical axis. CEU, Utah Residents with Northern and Western European ancestry (1KGP); GBR, British (1KGP); IBS, Iberian population in Spain (1KGP); NAF, North African population. Genetic ancestry and ROHs in CIRdb Genetic ancestry proportions in CIRdb individuals were previously obtained based on the SNP array data ( Díaz-de Usera et al. 2022 ). Further details of CIRdb individuals are provided in Supplementary Table S11 and Supplementary Figure S4 evidencing an overall picture of 77.6 ± 3.71% (SD) of a genetic ancestry associated with a component prevailing in EUR, 18.9 ± 3.10% (SD) of a component prevailing in NAF, and 3.52 ± 1.51% (SD) of a component prevailing SSA. Based on local genetic ancestries, we previously identified genomic regions enriched in one of the ancestries and associated these regions with signals of selection ( Guillen-Guio et al. 2018 ). By leveraging a sample that nearly doubled that of the previous study, we revealed previously identified regions and a novel one showing large deviation in the local genetic ancestry ( Figure 8 ). The previously described regions included 2q21.3-2q22.1 (linked to lactase persistence) and 6p21.32 (in the HLA region), which were enriched in NAF and depleted in EUR ancestry, and 6p21.33, which was enriched in SSA ancestry. The novel region was located in 17q21.31 and was enriched in EUR and depleted in NAF ancestry ( Table 1 , Supplementary Material). This region overlaps with a well-known inversion site proposed to be under positive selection in European populations ( Stefansson et al. 2005 ) and with pleiotropic effects across diseases including severe respiratory diseases, among others ( Tantisira et al. 2008 ; Fingerlin et al. 2013 ; Noth et al. 2013 ; Degenhardt et al. 2022 ). Download figure Open in new tab Figure 8. Genome-wide scan of deviations in local admixture for European ( A ), North African ( B ), and Sub-Saharan African ( C ) ancestry components in the current inhabitants of the Canary Islands from CIRdb. Horizontal dashed lines (blue>|2|; red>|3|) indicate z-score thresholds. View this table: View inline View popup Download powerpoint Table 1. Regions enriched or depleted in European (EUR), North African (NAF), and sub-Saharan African (SSA) local ancestries in Canary Islanders. We used ROH estimates to assess isolation and inbreeding in the islands. In agreement with previous findings, El Hierro and La Gomera showed the highest estimates for the average total ROH, the total ROH length (Mann-Whitney U-test, p <1×10 -3 for all pairwise comparisons), and the number of fragments in ROHs (Mann-Whitney U-test, p <1×10 -5 for all pairwise comparisons) compared to all other islands ( Figure 9 , Supplementary Figure S5 and S6, Supplementary Table S12). Download figure Open in new tab Figure 9. Comparison of the average total ROH length (Mb) and the number of ROH fragment per island. Top ( A ), all ROHs; Middle ( B ), ROHs <1.6 Mb; Bottom ( C ), ROHs ≥1.6 Mb. A scan for selective sweep around PRUNE1 and a PheWAS of the exonic variant Among the prioritized common variants in DAPC LD1, we identified 1,407 WES variants that showed no significant AAF differences between the Canary Islanders (altogether or for each island by separate) and the NAF population (Fisher’s exact test, Bonferroni-corrected p >3.33×10 -5 ). Six out of those variants showed statistically significant differences in AAF between the Canary Islands and EUR populations (Fisher’s exact test, Bonferroni-corrected p 10%) in the Canary Islands ( Table 2 , Supplementary Table S13). Of those, we focused on rs3738476 in chromosome 1, a variant exonic to Prune Exopolyphosphatase 1 ( PRUNE1 ) gene that showed the largest difference in AAF between the Canary Islands (87.2%) and NAF (91.2%) compared to EUR populations (AAF range: 49.5% and 52.7%), as quantified by the PBS genetic distance (0.303) and the pairwise F ST distances between NAF or Canary Islanders vs. EUR (Supplementary Tables S14 and S15, respectively). To evaluate the existence of a putative selective sweep in the region, we conducted an iSAFE scan based on WGS data in a subset of the participants and in IBS and NAF in the surrounding region of this variant (chr1:150,520,898-151,498,624) ( Figure 10 ). The region with the highest iSAFE score was chr1:151,010,521-151,116,279 (Canary Islands: iSAFE score >0.12, peak at 0.131; NAF: iSAFE score >0.11, peak at 0.136), while the signal tempered for IBS. This region harbors other 36 variants associated with body mass index (BMI)-related traits and functionally linked to nine genes involved in cardiovascular traits ( BNIPL , C1orf56, CDC42SE1 , CTSS , LYSMD1 , MLLT11 , PRUNE1 , SEM6C , and TNFAIP8L2 ) ( Pulit et al. 2019 ) and diabetes mellitus ( BNIPL , C1orf56, CDC42SE1 , CTSS , MLLT11 , and TNFAIP8L2 ) ( Vujkovic et al. 2020 ) (Supplementary Table S16). We then conducted a PheWAS to examine possible associations between the rs3738476 variant at PRUNE1 and the 66 available phecodes of participants ( Figure 11 ). Although no associations were evidenced at statistical significance, nominally significant associations ( p <0.05) were found with respiratory disease, intestinal diverticula, and stomach cancer. View this table: View inline View popup Download powerpoint Table 2. Variants with similar alternative allele frequencies in Canary Islanders and the North African populations but differing from European populations. Download figure Open in new tab Figure 10. iSAFE scan of whole-genome sequencing data in the genomic region surrounding rs3738476. Lines represent the moving average for each population. IBS, Iberian population in Spain (1KGP); NAF, North African population. Download figure Open in new tab Figure 11. Results from the logistic regression analysis testing the association between the rs3738476 variant and the incidence of 66 phecodes, with age and sex included as covariates. Each triangle represents the p -value of the association, pointing upward or downward depending on whether the variant is a risk or a protective factor. The size of the triangle corresponds to the odds ratio (OR), and its color indicates the respective ICD-9 category. The horizontal gray line represents the nominal significance threshold ( p =0.05). Discussion We developed CIRdb as the first exome-wide catalog of the natural genetic variation in the current inhabitants of the Canary Islands, the European population with the highest fraction of NAF ancestry reported to date. We found that approximately 15.1% of the variants found were novel at the time the analyses were conducted, emphasizing the key necessity for developing a population-specific catalog of genetic diversity in this archipelago ( Díaz-de Usera et al. 2022 ). A similar scenario was observed in other populations like Cilento in South Italy ( Nutile et al. 2019 ) and the Iranian population ( Fattahi et al. 2019 ). These findings highlight the importance of genetic studies in diverse populations and the potential for unique discoveries. Strikingly, the island of La Palma exhibited the highest average of novel variants per individual, making it a compelling candidate for future studies on disease-causing variation. Our diverse analyses combining WES, SNP array, and WGS data comparing the genetic background of the current Canary Islanders with other populations from Europe and Africa reinforced the recent admixed nature of the population and the footprints of population isolation, which were most salient in two of the smallest islands (El Hierro and La Gomera). This resource allowed us to identify genetic variation that could have significant biomedical implications for prevalent diseases in the Canary Islands, including metabolic, infectious, and respiratory diseases, among others, and in skin pigmentation and sunburn. We also identified a new region showing large deviations in local ancestry around a common inversion (17q21.31) known to be under positive selection in European populations and showing pleiotropic effects in human diseases. Our analyses also prioritized the region around PRUNE1 to be under putative positive selection in Canary Islanders, a genomic region associated with body mass index (BMI), and metabolic and cardiovascular disorders. We expect that the CIRdb exome-wide catalog of the natural genetic variation in the Canary Islands will constitute a key resource to assist in genetic diagnosis of patients and in the identification of disease risks, as described elsewhere ( Lorente-Arencibia et al. 2022 ). The inhabitants of the Canary Islands have had a unique history of admixture and isolation in the southwestern European context. Although genetic isolation footprints have been well-documented, at least for some of the islands such as El Hierro ( Ordóñez et al. 2017 ; Guillen-Guio et al. 2018 ), its impact on the prevalence of genetic diseases in the population remains largely unexplored. Many studies support a higher prevalence of cardiovascular ( Cabrera de León et al. 2006 ; Bueno, Hernáez, and Hernández 2008 ; Marcelino-Rodríguez et al. 2016 ; Rodríguez-Esparragón et al. 2017 ) and respiratory and allergic diseases ( Sánchez-Lerma et al. 2009 ; Juliá-Serdá et al. 2011 ) in the Canary Islands compared to mainland regions of Spain. Some monogenic disorders have been linked to founder effects in particular islands, such as Fanconi anemia in La Palma ( Castella et al. 2011 ), type 1 primary hyperoxaluria in La Gomera ( Santana et al. 2003 ), and Wilson disease in Gran Canaria ( García-Villarreal et al. 2000 ; Lorente-Arencibia et al. 2022 ). This evidence is in line with our findings and the patterns of genetic differentiation in the exonic regions and the presence of private variants in current Canary Islanders. There is support for the biomedical implications of the recent North African genetic influences in the European population context ( Botigué et al. 2013 ). Accordingly, we prioritized common genetic variants -with AAF shared between Canary Islanders and North Africans while diverged in Europeans, including Iberians- that were associated with risk for cardiovascular disorders. Some of these were rs301 in LPL that is associated with insulin resistance and atherosclerosis ( Deo et al. 2009 ; Kraja et al. 2011 ) and rs17261772 in RAB3GAP1 that is associated with sudden cardiac arrest ( Huertas-Vazquez et al. 2013 ). We also prioritized genomic regions that showed evidence of a selective sweep in the Canary Islanders and North Africans but not in Iberians, with a peak at a synonymous exonic variant (rs3738476) in PRUNE1 . The risk allele rs3738476_A, which is near fixation in the Canary Islanders and North Africans, was previously associated with BMI-related traits ( Pulit et al. 2019 ). The available evidence supports that the risk variant could affect gene function by creating a new “GGACU” sequence that would be susceptible to being methylated ( Liu et al. 2020 ). A PheWAS of this variant in the Canary Islanders also provided tentative links with other diseases, including respiratory disease, intestinal diverticula, and stomach cancer, although these results should be interpreted with caution given that statistical significance was not reached. The following constitute the main limitations of the study. First, whole-exome sequencing was based on short-read technology applied to nearly 1,000 unrelated individuals reporting at least two ancestors born in the archipelago. This limited our ability to study ultra-rare variants and the optimal assessment of structural variation, which also pose significant risks for disease ( Huddleston and Eichler 2016 ). Second, sequence variation at non-exonic regions remains largely uncovered, which will necessitate whole-genome approaches to better understand disease architecture and provide further insights into population history ( Collins et al. 2020 ). Third, since the study prioritized individuals without history of severe diseases as well as no cardiovascular, metabolic, immunologic, or cancer conditions, the PheWAS was based on a limited number of samples that were positively ascertained for the traits assessed. Fourth, given that we used GRCh37/hg19 as the reference, our study may have missed information from important genomic regions that could be assessed by leveraging the most recent version of the human reference genome, i.e., T2T-CHM13 ( Nurk et al. 2022 ). Fifth, since genetic sequence variation in North Africans is central to understanding the genetic diversity in the Canary Islanders, some of our results may be affected by the limited sample size of North Africans used as a reference due to their underrepresentation in global genetic studies ( Gurdasani et al. 2015 ; Serradell et al. 2024 ). Conclusion We present CIRdb, the first catalog of genetic variation obtained by whole-exome sequencing in current inhabitants of the Canary Islands. We found evident patterns of isolation in El Hierro and La Gomera. We also identified two genetic loci with well-known links with complex disease risks evidencing patterns indicative of selective processes and shared patterns of genetic differentiation between North Africans and Canary Islanders that are distinct from Europeans. Data Availability Data supporting the findings are available as part of the manuscript or from the supplementary files. Access to the raw sequence dataset is restricted to qualified researchers under institutional agreements. Data access requests must be reviewed before release. Author contributions Conceptualization: C.F.; Methodology: A.D.-d.U., R.C.-G., J.M.L.-S., and C.F.; Investigation: A.D.-d.U., B.G.-G., I.M.-R., A.Co., R.G.-M., and A.Ca.; Formal Analysis: A.D.-d.U., L.A.R.-R., A.M.-B, J.M.L.-S., D.J., I.M.-R., and R.C.-G.; Data Curation: A.D.-d.U., L.A.R.-R., A.M.-B, R.G.-M., and D.J.; Resources: M.C.R.-P., A.C.-d.-L., A.C.; Supervision: C.F.; Funding Acquisition: C.F.; Writing – Original Draft Preparation: A.D.-d.U. and C.F.; Writing – Review & Editing: all authors. Funding This research was funded by Ministerio de Ciencia e Innovación (RTC-2017-6471-1; AEI/FEDER, UE) and the Instituto de Salud Carlos III (CB06/06/1088 and PI23/00980), which were co-financed by the European Regional Development Funds ‘A way of making Europe’ from the European Union; Fundación CajaCanarias and Fundación Bancaria “La Caixa” (2018PATRI20); Cabildo Insular de Tenerife (CGIEU0000219140); the agreement OA17/008 and OA23/043 with Instituto Tecnológico y de Energías Renovables (ITER) to strengthen scientific and technological education, training, research, development and scientific innovation in genomics, epidemiological surveillance based on massive sequencing, personalized medicine, and biotechnology. A.D.-d.U. was supported by a fellowship from the Spanish Ministry of Education and Vocational Training (grant number FPU16/01435). JML-S was supported by Cabildo Insular de Tenerife and Consejería de Educación, Gobierno de Canarias (A0000014697). Competing interests The authors declare that they have no potential conflicts of interest with respect to the authorship and/or publication of this article. The funders had no role in the design of the study; in the collection, analyses, or interpretation of data; in the writing of the manuscript, or in the decision to publish the results. Acknowledgements We would like to thank the support from our colleagues from the Teide-HPC Supercomputing facility ( http://teidehpc.iter.es/en ), which was funded by INP-2011-0063-PCT-430000-ACT (INNPLANTA program) from the Spanish Ministry of Economy and Competitiveness. References ↵ 1000 Genomes Project Consortium , Adam Auton , Lisa D. Brooks , Richard M. Durbin , Erik P. Garrison , Hyun Min Kang , Jan O. Korbel , et al. 2015 . “ A Global Reference for Human Genetic Variation .” Nature 526 ( 7571 ): 68 – 74 . OpenUrl CrossRef PubMed ↵ Akbari , Ali , Joseph J. Vitti , Arya Iranmehr , Mehrdad Bakhtiari , Pardis C. Sabeti , Siavash Mirarab , and Vineet Bafna . 2018 . “ Identifying the Favored Mutation in a Positive Selective Sweep .” Nature Methods 15 ( 4 ): 279 – 82 . OpenUrl PubMed ↵ Amberger , Joanna , Carol A. Bocchini , Alan F. Scott , and Ada Hamosh . 2009 . “ McKusick’s Online Mendelian Inheritance in Man (OMIM) .” Nucleic Acids Research 37 (Database issue): D793 – 6 . OpenUrl CrossRef PubMed Web of Science ↵ Antikainen , Anni A. V. , Niina Sandholm , David-Alexandre Trégouët , Romain Charmet , Amy Jayne McKnight , Tarunveer S. Ahluwalia , Anna Syreeni , et al. 2021 . “ Genome-Wide Association Study on Coronary Artery Disease in Type 1 Diabetes Suggests Beta-Defensin 127 as a Risk Locus .” Cardiovascular Research 117 ( 2 ): 600 – 612 . OpenUrl CrossRef PubMed ↵ Apol , Katrin D. , Leivur N. Lydersen , Ólavur Mortensen , Pál Weihe , Bjarni Á Steig , Guðrið Andorsdóttir , and Noomi O. Gregersen . 2022 . “ FarGen - Participants in the Genetic Research Infrastructure of the Faroe Islands .” Scandinavian Journal of Public Health 50 ( 7 ): 980 – 87 . OpenUrl CrossRef PubMed ↵ Arauna , Lara R. , Javier Mendoza-Revilla , Alex Mas-Sandoval , Hassan Izaabel , Asmahan Bekada , Soraya Benhamamouch , Karima Fadhlaoui-Zid , Pierre Zalloua , Garrett Hellenthal , and David Comas . 2017 . “ Recent Historical Migrations Have Shaped the Gene Pool of Arabs and Berbers in North Africa .” Molecular Biology and Evolution 34 ( 2 ): 318 – 29 . OpenUrl CrossRef PubMed ↵ Bergström , Anders , Shane A. McCarthy , Ruoyun Hui , Mohamed A. Almarri , Qasim Ayub , Petr Danecek , Yuan Chen , et al. 2020 . “Insights into Human Genetic Variation and Population History from 929 Diverse Genomes.” Science (New York, N.Y.) 367 ( 6484 ): eaay5012 . OpenUrl ↵ Botigué , Laura R. , Brenna M. Henn , Simon Gravel , Brian K. Maples , Christopher R. Gignoux , Erik Corona , Gil Atzmon , et al. 2013 . “ Gene Flow from North Africa Contributes to Differential Human Genetic Diversity in Southern Europe .” Proceedings of the National Academy of Sciences of the United States of America 110 ( 29 ): 11791 – 96 . OpenUrl Abstract / FREE Full Text ↵ Bueno , Héctor , Rubén Hernáez , and Adrián V. Hernández . 2008 . “ Type 2 Diabetes Mellitus and Cardiovascular Disease in Spain: A Narrative Review .” C Rev Esp Cardiol Supl . 8 : 50C – 58 . OpenUrl ↵ Buniello , Annalisa , Jacqueline A. L. MacArthur , Maria Cerezo , Laura W. Harris , James Hayhurst , Cinzia Malangone , Aoife McMahon , et al. 2019 . “ The NHGRI-EBI GWAS Catalog of Published Genome-Wide Association Studies, Targeted Arrays and Summary Statistics 2019 .” Nucleic Acids Research 47 ( D1 ): D1005 – 12 . OpenUrl CrossRef PubMed ↵ Cabrera de León , Antonio , Ma del Cristo Rodríguez Pérez , Delia Almeida González , Santiago Domínguez Coello , Armando Aguirre Jaime , Buenaventura Brito Díaz , Ana González Hernández , and Lina I. Pérez Méndez . 2008 . “Presentación de La Cohorte ‘CDC de Canarias’: Objetivos, Diseño y Resultados Preliminares.” Revista Espanola de Salud Publica 82 ( 5 ): 519 – 34 . OpenUrl CrossRef PubMed Web of Science ↵ Cabrera de León , Antonio , María del Cristo Rodríguez-Pérez , José Carlos del Castillo-Rodríguez , Buenaventura Brito-Díaz , Lina I. Pérez-Méndez , Mercedes Muros de Fuentes , Delia Almeida-González , Marta Batista-Medina , and Armando Aguirre-Jaime . 2006 . “[Coronary risk in the population of the Canary Islands, Spain, using the Framingham function].” Medicina Clinica 126 ( 14 ): 521 – 26 . OpenUrl PubMed ↵ Castella , Maria , Roser Pujol , Elsa Callén , Juan P. Trujillo , José A. Casado , Hans Gille , Francis P. Lach , et al. 2011 . “ Origin, Functional Role, and Clinical Impact of Fanconi Anemia FANCA Mutations .” Blood 117 ( 14 ): 3759 – 69 . OpenUrl Abstract / FREE Full Text ↵ Chang , Christopher C. , Carson C. Chow , Laurent Cam Tellier , Shashaank Vattikuti , Shaun M. Purcell , and James J. Lee . 2015 . “ Second-Generation PLINK: Rising to the Challenge of Larger and Richer Datasets .” GigaScience 4 ( February ): 7 . OpenUrl CrossRef PubMed ↵ Charmet , Romain , Seamus Duffy , Sareh Keshavarzi , Beata Gyorgy , Michel Marre , Peter Rossing , Amy Jayne McKnight , et al. 2018 . “ Novel Risk Genes Identified in a Genome-Wide Association Study for Coronary Artery Disease in Patients with Type 1 Diabetes .” Cardiovascular Diabetology 17 ( 1 ): 61 . OpenUrl PubMed ↵ Chheda , Himanshu , Priit Palta , Matti Pirinen , Shane McCarthy , Klaudia Walter , Seppo Koskinen , Veikko Salomaa , et al. 2017 . “ Whole-Genome View of the Consequences of a Population Bottleneck Using 2926 Genome Sequences from Finland and United Kingdom .” European Journal of Human Genetics 25 ( 4 ): 477 – 84 . OpenUrl PubMed ↵ Church , Deanna M. , Valerie A. Schneider , Tina Graves , Katherine Auger , Fiona Cunningham , Nathan Bouk , Hsiu-Chuan Chen , et al. 2011 . “ Modernizing Reference Genome Assemblies .” PLoS Biology 9 ( 7 ): e1001091 . OpenUrl CrossRef PubMed ↵ Collins , Ryan L. , Harrison Brand , Konrad J. Karczewski , Xuefang Zhao , Jessica Alföldi , Laurent C. Francioli , Amit V. Khera , et al. 2020 . “ A Structural Variation Reference for Medical and Population Genetics .” Nature 581 ( 7809 ): 444 – 51 . OpenUrl CrossRef PubMed ↵ COVID-19 Host Genetics Initiative . 2023 . “A Second Update on Mapping the Human Genetic Architecture of COVID-19.” Nature 621 ( 7977 ): E7 – E26 . OpenUrl CrossRef PubMed ↵ Degenhardt , Frauke , David Ellinghaus , Simonas Juzenas , Jon Lerga-Jaso , Mareike Wendorff , Douglas Maya-Miles , Florian Uellendahl-Werth , et al. 2022 . “ Detailed Stratified GWAS Analysis for Severe COVID-19 in Four European Populations .” Human Molecular Genetics 31 ( 23 ): 3945 – 66 . OpenUrl CrossRef PubMed ↵ Deo , Rahul C. , David Reich , Arti Tandon , Ermeg Akylbekova , Nick Patterson , Alicja Waliszewska , Sekar Kathiresan , Daniel Sarpong , Herman A. Taylor Jr, . and James G. Wilson . 2009 . “ Genetic Differences between the Determinants of Lipid Profile Phenotypes in African and European Americans: The Jackson Heart Study .” PLoS Genetics 5 ( 1 ): e1000342 . OpenUrl ↵ Díaz-de Usera , Ana , Luis A. Rubio-Rodríguez , Adrián Muñoz-Barrera , Jose M. Lorenzo-Salazar , Beatriz Guillen-Guio , David Jáspez , Almudena Corrales , et al. 2022 . “ Developing CIRdb as a Catalog of Natural Genetic Variation in the Canary Islanders .” Scientific Reports 12 ( 1 ): 16132 . OpenUrl PubMed ↵ Dopazo , Joaquín , Alicia Amadoz , Marta Bleda , Luz Garcia-Alonso , Alejandro Alemán , Francisco García-García , Juan A. Rodriguez , et al. 2016 . “ 267 Spanish Exomes Reveal Population-Specific Differences in Disease-Related Genetic Variation .” Molecular Biology and Evolution 33 ( 5 ): 1205 – 18 . OpenUrl CrossRef PubMed ↵ Eppinga , Ruben N. , Yanick Hagemeijer , Stephen Burgess , David A. Hinds , Kari Stefansson , Daniel F. Gudbjartsson , Dirk J. van Veldhuisen , Patricia B. Munroe , Niek Verweij , and Pim van der Harst . 2016 . “ Identification of Genomic Loci Associated with Resting Heart Rate and Shared Genetic Predictors with All-Cause Mortality .” Nature Genetics 48 ( 12 ): 1557 – 63 . OpenUrl CrossRef PubMed ↵ Fattahi , Zohreh , Maryam Beheshtian , Marzieh Mohseni , Hossein Poustchi , Erin Sellars , Sayyed Hossein Nezhadi , Amir Amini , et al. 2019 . “ Iranome: A Catalog of Genomic Variations in the Iranian Population .” Human Mutation 40 ( 11 ): 1968 – 84 . OpenUrl CrossRef PubMed ↵ Fingerlin , Tasha E. , Elissa Murphy , Weiming Zhang , Anna L. Peljto , Kevin K. Brown , Mark P. Steele , James E. Loyd , et al. 2013 . “ Genome-Wide Association Study Identifies Multiple Susceptibility Loci for Pulmonary Fibrosis .” Nature Genetics 45 ( 6 ): 613 – 20 . OpenUrl CrossRef PubMed ↵ Flores , C. , N. Maca-Meyer , J. A. Pérez , A. M. González , J. M. Larruga , and V. M. Cabrera . 2003 . “ A Predominant European Ancestry of Paternal Lineages from Canary Islanders .” Annals of Human Genetics 67 ( Pt 2 ): 138 – 52 . OpenUrl PubMed ↵ Forno , Erick , Joanne Sordillo , John Brehm , Wei Chen , Takis Benos , Qi Yan , Lydiana Avila , et al. 2017 . “ Genome-Wide Interaction Study of Dust Mite Allergen on Lung Function in Children with Asthma .” The Journal of Allergy and Clinical Immunology 140 ( 4 ): 996 – 1003.e7 . OpenUrl ↵ Fregel , Rosa , Verónica Gomes , Leonor Gusmão , Ana M. González , Vicente M. Cabrera , António Amorim , and Jose M. Larruga . 2009 . “ Demographic History of Canary Islands Male Gene-Pool: Replacement of Native Lineages by European .” BMC Evolutionary Biology 9 ( August ): 181 . OpenUrl PubMed ↵ Fregel , Rosa , Alejandra C. Ordóñez , Jonathan Santana-Cabrera , Vicente M. Cabrera , Javier Velasco-Vázquez , Verónica Alberto , Marco A. Moreno-Benítez , et al. 2019 . “ Mitogenomes Illuminate the Origin and Migration Patterns of the Indigenous People of the Canary Islands .” PLoS One 14 ( 3 ): e0209125 . OpenUrl PubMed ↵ García-Olivares , Víctor , Luis A. Rubio-Rodríguez , Adrián Muñoz-Barrera , Ana Díaz-de Usera , David Jáspez , Antonio Iñigo-Campos , María Del Cristo Rodríguez Pérez , et al. 2023 . “Digging into the Admixture Strata of Current-Day Canary Islanders Based on Mitogenomes.” iScience 26 ( 1 ): 105907 . OpenUrl PubMed ↵ García-Villarreal , L. , S. Daniels , S. H. Shaw , D. Cotton , M. Galvin , J. Geskes , P. Bauer , A. Sierra-Hernández , A. Buckler , and A. Tugores . 2000 . “ High Prevalence of the Very Rare Wilson Disease Gene Mutation Leu708Pro in the Island of Gran Canaria (Canary Islands, Spain): A Genetic and Clinical Study .” Hepatology 32 ( 6 ): 1329 – 36 . OpenUrl CrossRef PubMed Web of Science ↵ Genome of the Netherlands Consortium . 2014 . “Whole-Genome Sequence Variation, Population Structure and Demographic History of the Dutch Population.” Nature Genetics 46 ( 8 ): 818 – 25 . OpenUrl CrossRef PubMed ↵ Ghouse , J. , M. W. Skov , R. S. Bigseth , G. Ahlberg , J. K. Kanters , and M. S. Olesen . 2018 . “ Distinguishing Pathogenic Mutations from Background Genetic Noise in Cardiology: The Use of Large Genome Databases for Genetic Interpretation .” Clinical Genetics 93 ( 3 ): 459 – 66 . OpenUrl PubMed ↵ Ghoussaini , Maya , Edward Mountjoy , Miguel Carmona , Gareth Peat , Ellen M. Schmidt , Andrew Hercules , Luca Fumis , et al. 2021 . “ Open Targets Genetics: Systematic Identification of Trait-Associated Genes Using Large-Scale Genetics and Functional Genomics .” Nucleic Acids Research 49 ( D1 ): D1311 – 20 . OpenUrl CrossRef PubMed ↵ Guan , Yongtao . 2014 . “ Detecting Structure of Haplotypes and Local Ancestry .” Genetics 196 ( 3 ): 625 – 42 . OpenUrl Abstract / FREE Full Text ↵ Guillen-Guio , Beatriz , Jose M. Lorenzo-Salazar , Rafaela González-Montelongo , Ana Díaz-de Usera , Itahisa Marcelino-Rodríguez , Almudena Corrales , Antonio Cabrera de León , Santos Alonso , and Carlos Flores . 2018 . “ Genomic Analyses of Human European Diversity at the Southwestern Edge: Isolation, African Influence and Disease Associations in the Canary Islands .” Molecular Biology and Evolution 35 ( 12 ): 3010 – 26 . OpenUrl PubMed ↵ Gurdasani , Deepti , Tommy Carstensen , Fasil Tekola-Ayele , Luca Pagani , Ioanna Tachmazidou , Konstantinos Hatzikotoulas , Savita Karthikeyan , et al. 2015 . “ The African Genome Variation Project Shapes Medical Genetics in Africa .” Nature 517 ( 7534 ): 327 – 32 . OpenUrl CrossRef PubMed ↵ Hooton , Earnest A . 1970 . The Ancient Inhabitants of the Canary Islands . Kraus Reprint . ↵ Huddleston , John , and Evan E. Eichler . 2016 . “An Incomplete Understanding of Human Genetic Variation.” Genetics 202 ( 4 ): 1251 – 54 . OpenUrl Abstract / FREE Full Text ↵ Huertas-Vazquez , Adriana , Christopher P. Nelson , Xiuqing Guo , Kyndaron Reinier , Audrey Uy-Evanado , Carmen Teodorescu , Jo Ayala , et al. 2013 . “ Novel Loci Associated with Increased Risk of Sudden Cardiac Death in the Context of Coronary Artery Disease .” PLoS One 8 ( 4 ): e59905 . OpenUrl CrossRef PubMed ↵ Jeroncic , Ana , Yasin Memari , Graham Rs Ritchie , Audrey E. Hendricks , Anja Kolb-Kokocinski , Angela Matchan , Veronique Vitart , et al. 2016 . “ Whole-Exome Sequencing in an Isolated Population from the Dalmatian Island of Vis .” European Journal of Human Genetics 24 ( 10 ): 1479 – 87 . OpenUrl PubMed ↵ Jombart , Thibaut , Sébastien Devillard , and François Balloux . 2010 . “ Discriminant Analysis of Principal Components: A New Method for the Analysis of Genetically Structured Populations .” BMC Genetics 11 ( October ): 94 . OpenUrl PubMed ↵ Juliá-Serdá , G. , P. Cabrera-Navarro , O. Acosta-Fernández , P. Martín-Pérez , P. Losada-Cabrera , M. A. García-Bello , T. Carrillo-Díaz , and J. Antó-Boqué . 2011 . “ High Prevalence of Asthma and Atopy in the Canary Islands, Spain .” The International Journal of Tuberculosis and Lung Disease 15 ( 4 ): 536 – 41 . OpenUrl ↵ Karczewski , Konrad J. , Laurent C. Francioli , Grace Tiao , Beryl B. Cummings , Jessica Alföldi , Qingbo Wang , Ryan L. Collins , et al. 2020 . “ The Mutational Constraint Spectrum Quantified from Variation in 141,456 Humans .” Nature 581 ( 7809 ): 434 – 43 . OpenUrl CrossRef PubMed ↵ Ke , Xiayi , Martin S. Taylor , and Lon R. Cardon . 2008 . “ Singleton SNPs in the Human Genome and Implications for Genome-Wide Association Studies .” European Journal of Human Genetics 16 ( 4 ): 506 – 15 . OpenUrl CrossRef PubMed ↵ Kim , Jungeun , Jessica A. Weber , Sungwoong Jho , Jinho Jang , Jehoon Jun , Yun Sung Cho , Hak-Min Kim , et al. 2018 . “ KoVariome: Korean National Standard Reference Variome Database of Whole Genomes with Comprehensive SNV, Indel, CNV, and SV Analyses .” Scientific Reports 8 ( 1 ): 5677 . OpenUrl PubMed ↵ Kirin , Mirna , Ruth McQuillan , Christopher S. Franklin , Harry Campbell , Paul M. McKeigue , and James F. Wilson . 2010 . “ Genomic Runs of Homozygosity Record Population History and Consanguinity .” PLoS One 5 ( 11 ): e13996 . OpenUrl CrossRef PubMed ↵ Kore , Pragati , Michael W. Wilson , Grace Tiao , Katherine Chao , Philip W. Darnowsky , Nick Watts , Jessica Honorato Mauer , et al. 2025 . “ Improved Allele Frequencies in GnomAD through Local Ancestry Inference .” Nature Communications 16 ( 1 ): 8734 . OpenUrl PubMed ↵ Kraja , Aldi T. , Dhananjay Vaidya , James S. Pankow , Mark O. Goodarzi , Themistocles L. Assimes , Iftikhar J. Kullo , Ulla Sovio , et al. 2011 . “ A Bivariate Genome-Wide Approach to Metabolic Syndrome: STAMPEED Consortium .” Diabetes 60 ( 4 ): 1329 – 39 . OpenUrl Abstract / FREE Full Text ↵ Kuleshov , Maxim V. , Matthew R. Jones , Andrew D. Rouillard , Nicolas F. Fernandez , Qiaonan Duan , Zichen Wang , Simon Koplev , et al. 2016 . “ Enrichr: A Comprehensive Gene Set Enrichment Analysis Web Server 2016 Update .” Nucleic Acids Research 44 ( W1 ): W90 – 7 . OpenUrl CrossRef PubMed ↵ Kulminski , Alexander M. , Jian Huang , Yury Loika , Konstantin G. Arbeev , Olivia Bagley , Arseniy Yashkin , Matt Duan , and Irina Culminskaya . 2018 . “ Strong Impact of Natural-Selection-Free Heterogeneity in Genetics of Age-Related Phenotypes .” Aging 10 ( 3 ): 492 – 514 . OpenUrl PubMed ↵ Liu , Jun’e , Kai Li , Jiabin Cai , Mingchang Zhang , Xiaoting Zhang , Xushen Xiong , Haowei Meng , et al. 2020 . “ Landscape and Regulation of M6A and M6Am Methylome across Human and Mouse Tissues .” Molecular Cell 77 ( 2 ): 426 – 440.e6 . OpenUrl CrossRef PubMed ↵ Lorente-Arencibia , Pascual , Luis García-Villarreal , Rafaela González-Montelongo , Luis A. Rubio-Rodríguez , Carlos Flores , Paloma Garay-Sánchez , Tanausú delaCruz , et al. 2022 . “ Wilson Disease Prevalence: Discrepancy between Clinical Records, Registries and Mutation Carrier Frequency .” Journal of Pediatric Gastroenterology and Nutrition 74 ( 2 ): 192 – 99 . OpenUrl PubMed ↵ Lutz , Sharon M. , Michael H. Cho , Kendra Young , Craig P. Hersh , Peter J. Castaldi , Merry-Lynn McDonald , Elizabeth Regan , et al. 2015 . “ A Genome-Wide Association Study Identifies Risk Loci for Spirometric Measures among Smokers of European and African Ancestry .” BMC Genetics 16 ( 1 ): 138 . OpenUrl PubMed ↵ Maca-Meyer , Nicole , Vicente M. Cabrera , Matilde Arnay , Carlos Flores , Rosa Fregel , Ana M. González , and José M. Larruga . 2005 . “ Mitochondrial DNA Diversity in 17th--18th Century Remains from Tenerife (Canary Islands) .” American Journal of Physical Anthropology 127 ( 4 ): 418 – 26 . OpenUrl PubMed ↵ Marcelino-Rodríguez , Itahisa , Roberto Elosua , María del Cristo Rodríguez Pérez , Daniel Fernández-Bergés , María Jesús Guembe , Tomás Vega Alonso , Francisco Javier Félix , et al. 2016 . “On the Problem of Type 2 Diabetes-Related Mortality in the Canary Islands, Spain. The DARIOS Study.” Diabetes Research and Clinical Practice 111 ( January ): 74 – 82 . OpenUrl PubMed ↵ McLaren , William , Laurent Gil , Sarah E. Hunt , Harpreet Singh Riat , Graham R. S. Ritchie , Anja Thormann , Paul Flicek , and Fiona Cunningham . 2016 . “ The Ensembl Variant Effect Predictor .” Genome Biology 17 ( 1 ): 122 . OpenUrl CrossRef PubMed ↵ McLean , Cory Y. , Dave Bristor , Michael Hiller , Shoa L. Clarke , Bruce T. Schaar , Craig B. Lowe , Aaron M. Wenger , and Gill Bejerano . 2010 . “ GREAT Improves Functional Interpretation of Cis-Regulatory Regions .” Nature Biotechnology 28 ( 5 ): 495 – 501 . OpenUrl CrossRef PubMed Web of Science ↵ Mulder , Nicola , Alash’le Abimiku , Sally N. Adebamowo , Jantina de Vries , Alice Matimba , Paul Olowoyo , Michele Ramsay , Michelle Skelton , and Dan J. Stein . 2018 . “ H3Africa: Current Perspectives .” Pharmacogenomics and Personalized Medicine 11 ( April ): 59 – 66 . OpenUrl ↵ Nagasaki , Masao , Jun Yasuda , Fumiki Katsuoka , Naoki Nariai , Kaname Kojima , Yosuke Kawai , Yumi Yamaguchi-Kabata , et al. 2015 . “ Rare Variant Discovery by Deep Whole-Genome Sequencing of 1,070 Japanese Individuals .” Nature Communications 6 ( 1 ): 8018 . OpenUrl PubMed ↵ Noth , Imre , Yingze Zhang , Shwu-Fan Ma , Carlos Flores , Mathew Barber , Yong Huang , Steven M. Broderick , et al. 2013 . “ Genetic Variants Associated with Idiopathic Pulmonary Fibrosis Susceptibility and Mortality: A Genome-Wide Association Study .” The Lancet Respiratory Medicine 1 ( 4 ): 309 – 17 . OpenUrl PubMed ↵ Nurk , Sergey , Sergey Koren , Arang Rhie , Mikko Rautiainen , Andrey V. Bzikadze , Alla Mikheenko , Mitchell R. Vollger , et al. 2022 . “The Complete Sequence of a Human Genome.” Science (New York, N.Y.) 376 ( 6588 ): 44 – 53 . OpenUrl ↵ Nutile , T. , D. Ruggiero , A. F. Herzig , A. Tirozzi , S. Nappo , R. Sorice , F. Marangio , C. Bellenguez , A. L. Leutenegger , and M. Ciullo . 2019 . “ Whole-Exome Sequencing in the Isolated Populations of Cilento from South Italy .” Scientific Reports 9 ( 1 ): 4059 . OpenUrl PubMed ↵ Ochoa , David , Andrew Hercules , Miguel Carmona , Daniel Suveges , Asier Gonzalez-Uriarte , Cinzia Malangone , Alfredo Miranda , et al. 2021 . “ Open Targets Platform: Supporting Systematic Drug-Target Identification and Prioritisation .” Nucleic Acids Research 49 ( D1 ): D1302 – 10 . OpenUrl CrossRef PubMed ↵ O’Connor , Brian D. , and Geraldine van der Auwera . 2020 . Genomics Analysis with Spark, Docker and Clouds . Sebastopol, CA : O’Reilly Media . ↵ Ordóñez , Alejandra C. , R. Fregel , A. Trujillo-Mederos , Montserrat Hervella , Concepción de-la-Rúa , and Matilde Arnay-de-la-Rosa . 2017 . “ Genetic Studies on the Prehispanic Population Buried in Punta Azul Cave (El Hierro, Canary Islands) .” Journal of Archaeological Science 78 ( February ): 20 – 28 . OpenUrl ↵ Pemberton , Trevor J. , Devin Absher , Marcus W. Feldman , Richard M. Myers , Noah A. Rosenberg , and Jun Z. Li . 2012 . “ Genomic Patterns of Homozygosity in Worldwide Human Populations .” American Journal of Human Genetics 91 ( 2 ): 275 – 92 . OpenUrl CrossRef PubMed ↵ Pino-Yanes , María , Almudena Corrales , Santiago Basaldúa , Alexis Hernández , Luisa Guerra , Jesús Villar , and Carlos Flores . 2011 . “ North African Influences and Potential Bias in Case-Control Association Studies in the Spanish Population .” PLoS One 6 ( 3 ): e18389 . OpenUrl CrossRef PubMed ↵ Pulit , Sara L. , Charli Stoneman , Andrew P. Morris , Andrew R. Wood , Craig A. Glastonbury , Jessica Tyrrell , Loïc Yengo , et al. 2019 . “ Meta-Analysis of Genome-Wide Association Studies for Body Fat Distribution in 694 649 Individuals of European Ancestry .” Human Molecular Genetics 28 ( 1 ): 166 – 74 . OpenUrl CrossRef PubMed ↵ R Core Team . 2022 . R: A Language and Environment for Statistical Computing . Vienna, Austria : R Foundation for Statistical Computing . ↵ Rentzsch , Philipp , Daniela Witten , Gregory M. Cooper , Jay Shendure , and Martin Kircher . 2019 . “ CADD: Predicting the Deleteriousness of Variants throughout the Human Genome .” Nucleic Acids Research 47 ( D1 ): D886 – 94 . OpenUrl CrossRef PubMed ↵ Rodríguez-Esparragón , Francisco , Juan Carlos López-Fernández , Nisa Buset-Ríos , Miguel A. García-Bello , Erika Hernández-Velazquez , Laura Cappiello , and José Carlos Rodríguez-Pérez . 2017 . “ Paraoxonase 1 and 2 Gene Variants and the Ischemic Stroke Risk in Gran Canaria Population: An Association Study and Meta-Analysis .” The International Journal of Neuroscience 127 ( 3 ): 191 – 98 . OpenUrl PubMed ↵ Rodríguez-Varela , Ricardo , Torsten Günther , Maja Krzewińska , Jan Storå , Thomas H. Gillingwater , Malcolm MacCallum , Juan Luis Arsuaga , et al. 2017 . “ Genomic Analyses of Pre-European Conquest Human Remains from the Canary Islands Reveal Close Affinity to Modern North Africans .” Current Biology 28 ( 10 ): 1677 – 9 . OpenUrl ↵ Safran , Marilyn , Naomi Rosen , Michal Twik , Ruth BarShir , Tsippi Iny Stein , Dvir Dahary , Simon Fishilevich , and Doron Lancet . 2021 . “ The GeneCards Suite .” In Practical Guide to Life Science Databases , 27 – 56 . Singapore : Springer Nature Singapore . ↵ Sakornsakolpat , Phuwanat , Dmitry Prokopenko , Maxime Lamontagne , Nicola F. Reeve , Anna L. Guyatt , Victoria E. Jackson , Nick Shrine , et al. 2019 . “ Genetic Landscape of Chronic Obstructive Pulmonary Disease Identifies Heterogeneous Cell-Type and Phenotype Associations .” Nature Genetics 51 ( 3 ): 494 – 505 . OpenUrl CrossRef PubMed ↵ Sánchez-Lerma , B. , F. J. Morales-Chirivella , I. Peñuelas , C. Blanco Guerra , F. Mesa Lugo , I. Aguinaga-Ontoso , and F. Guillén-Grima . 2009 . “ High Prevalence of Asthma and Allergic Diseases in Children Aged 6 to 7 Years from the Canary Islands .” Journal of Investigational Allergology & Clinical Immunology 19 ( 5 ): 383 – 90 . OpenUrl PubMed ↵ Santana , A. , E. Salido , A. Torres , and L. J. Shapiro . 2003 . “ Primary Hyperoxaluria Type 1 in the Canary Islands: A Conformational Disease Due to I244T Mutation in the P11L-Containing Alanine:Glyoxylate Aminotransferase .” Proceedings of the National Academy of Sciences of the United States of America 100 ( 12 ): 7277 – 82 . OpenUrl Abstract / FREE Full Text ↵ Scott , Eric M. , Anason Halees , Yuval Itan , Emily G. Spencer , Yupeng He , Mostafa Abdellateef Azab , Stacey B. Gabriel , et al. 2016 . “ Characterization of Greater Middle Eastern Genetic Variation for Enhanced Disease Gene Discovery .” Nature Genetics 48 ( 9 ): 1071 – 76 . OpenUrl CrossRef PubMed ↵ Serradell , Jose M. , Jose M. Lorenzo-Salazar , Carlos Flores , Oscar Lao , and David Comas . 2024 . “ Modelling the Demographic History of Human North African Genomes Points to a Recent Soft Split Divergence between Populations .” Genome Biology 25 ( 1 ): 201 . OpenUrl PubMed ↵ Serrano , Javier G. , Alejandra C. Ordóñez , Jonathan Santana , Elías Sánchez-Cañadillas , Matilde Arnay , Amelia Rodríguez-Rodríguez , Jacob Morales , et al. 2023 . “ The Genomic History of the Indigenous People of the Canary Islands .” Nature Communications 14 ( 1 ): 4641 . OpenUrl PubMed ↵ Sirugo , Giorgio , Scott M. Williams , and Sarah A. Tishkoff . 2019 . “ The Missing Diversity in Human Genetic Studies .” Cell 177 ( 4 ): 1080 . OpenUrl CrossRef PubMed ↵ Stefansson , Hreinn , Agnar Helgason , Gudmar Thorleifsson , Valgerdur Steinthorsdottir , Gisli Masson , John Barnard , Adam Baker , et al. 2005 . “ A Common Inversion under Selection in Europeans .” Nature Genetics 37 ( 2 ): 129 – 37 . OpenUrl CrossRef PubMed Web of Science ↵ Subhash , Santhilal , and Chandrasekhar Kanduri . 2016 . “ GeneSCF: A Real-Time Based Functional Enrichment Tool with Support for Multiple Organisms .” BMC Bioinformatics 17 ( 1 ): 365 . OpenUrl CrossRef PubMed ↵ Tadros , Rafik , Catherine Francis , Xiao Xu , Alexa M. C. Vermeer , Andrew R. Harper , Roy Huurman , Ken Kelu Bisabu , et al. 2021 . “ Shared Genetic Pathways Contribute to Risk of Hypertrophic and Dilated Cardiomyopathies with Opposite Directions of Effect .” Nature Genetics 53 ( 2 ): 128 – 34 . OpenUrl CrossRef PubMed ↵ Taliun , Daniel , Daniel N. Harris , Michael D. Kessler , Jedidiah Carlson , Zachary A. Szpiech , Raul Torres , Sarah A. Gagliano Taliun, et al. 2021 . “ Sequencing of 53,831 Diverse Genomes from the NHLBI TOPMed Program .” Nature 590 ( 7845 ): 290 – 99 . OpenUrl CrossRef PubMed ↵ Tantisira , Kelan G. , Ross Lazarus , Augusto A. Litonjua , Barbara Klanderman , and Scott T. Weiss . 2008 . “ Chromosome 17: Association of a Large Inversion Polymorphism with Corticosteroid Response in Asthma .” Pharmacogenetics and Genomics 18 ( 8 ): 733 – 37 . OpenUrl CrossRef PubMed Web of Science ↵ Tian , Chao , Bethann S. Hromatka , Amy K. Kiefer , Nicholas Eriksson , Suzanne M. Noble , Joyce Y. Tung , and David A. Hinds . 2017 . “ Genome-Wide Association and HLA Region Fine-Mapping Studies Identify Susceptibility Loci for Multiple Common Infections .” Nature Communications 8 ( 1 ): 599 . OpenUrl PubMed ↵ Tosco-Herrera , Eva , Adrián Muñoz-Barrera , David Jáspez , Luis A. Rubio-Rodríguez , Alejandro Mendoza-Alvarez , Hector Rodriguez-Perez , Jonathan Jou , et al. 2022 . “ Evaluation of a Whole-Exome Sequencing Pipeline and Benchmarking of Causal Germline Variant Prioritizers .” Human Mutation 43 ( 12 ): 2010 – 20 . OpenUrl CrossRef PubMed ↵ UK10K Consortium , Klaudia Walter , Josine L. Min , Jie Huang , Lucy Crooks , Yasin Memari , Shane McCarthy , et al. 2015 . “The UK10K Project Identifies Rare Variants in Health and Disease.” Nature 526 ( 7571 ): 82 – 90 . OpenUrl CrossRef PubMed ↵ Verma , Anurag , Yuki Bradford , Scott Dudek , Anastasia M. Lucas , Shefali S. Verma , Sarah A. Pendergrass , and Marylyn D. Ritchie . 2018 . “ A Simulation Study Investigating Power Estimates in Phenome-Wide Association Studies .” BMC Bioinformatics 19 ( 1 ). ↵ Verweij , Niek , Yordi J. van de Vegte , and Pim van der Harst . 2018 . “ Genetic Study Links Components of the Autonomous Nervous System to Heart-Rate Profile during Exercise .” Nature Communications 9 ( 1 ): 898 . OpenUrl PubMed ↵ Vujkovic , Marijana , Jacob M. Keaton , Julie A. Lynch , Donald R. Miller , Jin Zhou , Catherine Tcheandjieu , Jennifer E. Huffman , et al. 2020 . “ Discovery of 318 New Risk Loci for Type 2 Diabetes and Related Vascular Outcomes among 1.4 Million Participants in a Multi-Ancestry Meta-Analysis .” Nature Genetics 52 ( 7 ): 680 – 91 . OpenUrl CrossRef PubMed ↵ Wang , Kai , Mingyao Li , and Hakon Hakonarson . 2010 . “ ANNOVAR: Functional Annotation of Genetic Variants from High-Throughput Sequencing Data .” Nucleic Acids Research 38 ( 16 ): e164 . OpenUrl CrossRef PubMed ↵ Weir , B. S. , and C. Clark Cockerham . 1984 . “ Estimating F-Statistics for the Analysis of Population Structure .” Evolution; International Journal of Organic Evolution 38 ( 6 ): 1358 . OpenUrl CrossRef PubMed Web of Science ↵ Wojcik , Genevieve L. , Mariaelisa Graff , Katherine K. Nishimura , Ran Tao , Jeffrey Haessler , Christopher R. Gignoux , Heather M. Highland , et al. 2019 . “ Genetic Analyses of Diverse Populations Improves Discovery for Complex Traits .” Nature 570 ( 7762 ): 514 – 18 . OpenUrl CrossRef PubMed ↵ Wong , Lai-Ping , Rick Twee-Hee Ong , Wan-Ting Poh , Xuanyao Liu , Peng Chen , Ruoying Li , Kevin Koi-Yau Lam , et al. 2013 . “ Deep Whole-Genome Sequencing of 100 Southeast Asian Malays .” The American Journal of Human Genetics 92 ( 1 ): 52 – 66 . OpenUrl CrossRef PubMed ↵ Yi , Xin , Yu Liang , Emilia Huerta-Sanchez , Xin Jin , Zha Xi Ping Cuo , John E. Pool , Xun Xu , et al. 2010 . “Sequencing of 50 Human Exomes Reveals Adaptation to High Altitude.” Science 329 ( 5987 ): 75 – 78 . OpenUrl Abstract / FREE Full Text ↵ Zhu , Xiaofeng . 2012 . “ The Analysis of Ethnic Mixtures .” Methods in Molecular Biology 850 : 465 – 81 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted November 27, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Expanding CIRdb, a comprehensive catalog of whole-exome sequencing data of Canary Islanders Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Expanding CIRdb, a comprehensive catalog of whole-exome sequencing data of Canary Islanders Ana Díaz-de Usera , Luis A. Rubio-Rodríguez , Adrián Muñoz-Barrera , Jose M. Lorenzo-Salazar , Beatriz Guillen-Guio , David Jáspez , Almudena Corrales , Itahisa Marcelino-Rodríguez , María Del Cristo Rodríguez-Pérez , Antonio Cabrera-de León , Rafaela González-Montelongo , Raquel Cruz-Guerrero , Ángel Carracedo , Carlos Flores medRxiv 2025.11.24.25340885; doi: https://doi.org/10.1101/2025.11.24.25340885 Share This Article: Copy Citation Tools Expanding CIRdb, a comprehensive catalog of whole-exome sequencing data of Canary Islanders Ana Díaz-de Usera , Luis A. Rubio-Rodríguez , Adrián Muñoz-Barrera , Jose M. Lorenzo-Salazar , Beatriz Guillen-Guio , David Jáspez , Almudena Corrales , Itahisa Marcelino-Rodríguez , María Del Cristo Rodríguez-Pérez , Antonio Cabrera-de León , Rafaela González-Montelongo , Raquel Cruz-Guerrero , Ángel Carracedo , Carlos Flores medRxiv 2025.11.24.25340885; doi: https://doi.org/10.1101/2025.11.24.25340885 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4421) Dentistry and Oral Medicine (443) Dermatology (381) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15212) Forensic Medicine (30) Gastroenterology (1121) Genetic and Genomic Medicine (6581) Geriatric Medicine (667) Health Economics (996) Health Informatics (4520) Health Policy (1366) Health Systems and Quality Improvement (1611) Hematology (539) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15906) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (667) Neurology (6580) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1141) Occupational and Environmental Health (956) Oncology (3324) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5431) Public and Global Health (9212) Radiology and Imaging (2193) Rehabilitation Medicine and Physical Therapy (1368) Respiratory Medicine (1194) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ff118d39b080704',t:'MTc3OTM0MDQ2MA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.