Full text
80,099 characters
· extracted from
preprint-html
· click to expand
Genome-wide association analyses reveal susceptibility variants linked to Parkinson’s disease in the South African population using inferred global and local ancestry | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Genome-wide association analyses reveal susceptibility variants linked to Parkinson’s disease in the South African population using inferred global and local ancestry View ORCID Profile Kathryn Step , Thiago Peixoto Leal , View ORCID Profile Emily Waldo , Lusanda Madula , View ORCID Profile Yolandi Swart , View ORCID Profile Carlos F. Hernández , View ORCID Profile Jonggeol Jeffrey Kim , View ORCID Profile Sara Bandres-Ciga , Global Parkinson’s Genetics Program (GP2) , View ORCID Profile Ignacio F. Mata , View ORCID Profile Soraya Bardien doi: https://doi.org/10.1101/2025.08.01.25331910 Kathryn Step 1 Division of Molecular Biology and Human Genetics, Faculty of Medicine and Health Sciences, Stellenbosch University , Cape Town, South Africa 2 South African Medical Research Council Centre for Tuberculosis Research, Stellenbosch University , Cape Town, South Africa Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kathryn Step Thiago Peixoto Leal 3 Genomic Medicine, Lerner Research Institute, Cleveland Clinic Foundation , Cleveland, OH, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site Emily Waldo 3 Genomic Medicine, Lerner Research Institute, Cleveland Clinic Foundation , Cleveland, OH, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Emily Waldo Lusanda Madula 1 Division of Molecular Biology and Human Genetics, Faculty of Medicine and Health Sciences, Stellenbosch University , Cape Town, South Africa 2 South African Medical Research Council Centre for Tuberculosis Research, Stellenbosch University , Cape Town, South Africa Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yolandi Swart 1 Division of Molecular Biology and Human Genetics, Faculty of Medicine and Health Sciences, Stellenbosch University , Cape Town, South Africa 2 South African Medical Research Council Centre for Tuberculosis Research, Stellenbosch University , Cape Town, South Africa Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yolandi Swart Carlos F. Hernández 4 Universidad del Desarrollo, Centro de Genética y Genómica, Facultad de Medicina Clínica Alemana , Santiago 7610658, Chile Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Carlos F. Hernández Jonggeol Jeffrey Kim 5 Department of Molecular and Human Genetics, Baylor College of Medicine , Houston, TX, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jonggeol Jeffrey Kim Sara Bandres-Ciga 6 Center for Alzheimer’s and Related Dementias (CARD), National Institute on Aging and National Institute of Neurological Disorders and Stroke, National Institutes of Health , Bethesda, MD, USA , 20814 Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sara Bandres-Ciga Ignacio F. Mata 3 Genomic Medicine, Lerner Research Institute, Cleveland Clinic Foundation , Cleveland, OH, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ignacio F. Mata Soraya Bardien 1 Division of Molecular Biology and Human Genetics, Faculty of Medicine and Health Sciences, Stellenbosch University , Cape Town, South Africa 2 South African Medical Research Council Centre for Tuberculosis Research, Stellenbosch University , Cape Town, South Africa 7 South African Medical Research Council/Stellenbosch University Genomics of Brain Disorders Research Unit, Stellenbosch University , Cape Town, South Africa Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Soraya Bardien For correspondence: sbardien{at}sun.ac.za Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Genome-wide association studies (GWAS) have been successful in identifying over 100 loci associated with Parkinson’s disease (PD) susceptibility. However, the majority of these studies have focused on European cohorts with few including diverse ancestries. Using genotyped and imputed data from 691 South African PD cases and 826 controls, we conducted a conventional GWAS, two local ancestry GWAS (LA-GWAS) approaches (one using local ancestry as a covariate and the other separating the dosage per ancestry), and an association analysis to identify regions of homozygosity associated with PD status. Furthermore, we replicated these findings using another admixed population, a Latin American cohort (LARGE-PD). The ancestry inference suggested that the South African cohort is admixed from five populations, including African (AFR), European (EUR), Malaysian (MAL), Nama (NAMA), and South Asian (SAS), though with varying accuracy levels. The conventional GWAS successfully identified one locus (rs17098735-T) with genome-wide significance (p-value: 1.23×10 -8 ; beta= 2.286; SE= 0.401). Within the local ancestry window of the top GWAS hit, among individuals carrying the variant, 86.7% had AFR ancestry, 11% NAMA ancestry, and 2.2% MAL ancestry, with no EUR or SAS ancestry observed, highlighting a potential ancestry-specific genetic risk factor. Three lead loci were replicated in the LARGE-PD cohort. LA-GWAS using the Cochran-Armitage trend test identified 35 lead SNPs above suggestive significance after multiple test correction. Tractor-based approaches identified three lead loci when analyzing all five ancestry components jointly, as well as three additional lead loci in the AFR-only component, highlighting ancestry-specific loci that may contribute to genetic risk in diverse populations. For the LA-GWAS, one independent locus was replicated in LARGE-PD. Our findings suggest ancestry specificity in PD risk and underscores the importance of including diverse populations in genetics research. The study contributes towards a global understanding of the genetic etiology underlying PD. Introduction Stemming from a complex etiology that includes a strong genetic component ( Trevisan et al ., 2024 ), Parkinson’s disease (PD) is a neurodegenerative disorder characterized by a wide range of both motor and non-motor symptoms ( Armstrong and Okun, 2020 ). The burden of PD in Sub-Saharan African populations is increasing rapidly within the aging populations, ranking as the 11 th most prevalent nervous system disorder in the region ( GBD 2021 Nervous System Disorders Collaborators, 2024 ). By 2040, it is predicted that there will be 12 million individuals with PD globally ( Dorsey et al ., 2018 ; Karikari et al ., 2018 ), with Africa expected to face the greatest impact ( Dorsey et al ., 2007 ). However, despite the rise in disease prevalence, the representation of African populations in PD genetic research remains limited. The South African population is genetically diverse and comprises several diverse global ancestries, including the African, Asian, and European groups ( Thami and Chimusa, 2019 ). Notably, southern Africa is home to indigenous populations, such as the Khoe-San, who possess some of the most divergent lineages with shorter linkage disequilibrium (LD) blocks ( Henn et al ., 2011 ; Schlebusch et al ., 2012 ). Ancestral allele frequencies vary across genomes due to factors such as natural selection, genetic drift, and differing exposures to environments and pathogens ( Brown et al ., 2017 ; Schubert, Andaleon and Wheeler, 2020 ). These variations in allele frequencies can help identify population-specific disease risk variants in admixed individuals while simultaneously uncovering risk variants relevant to multiple populations ( Swart et al ., 2021 ). Previous PD genome-wide association studies (GWAS) and meta-analyses have identified 90 independent risk loci in a European cohort ( Nalls et al ., 2019 ), 134 loci through a European meta-analysis (Global Parkinson’s Genetics Program (GP2) and Leonard, 2025), 11 in an Asian cohort ( Foo et al ., 2020 ), and one in a Latin American cohort ( Loesch et al ., 2021 ). Recent efforts to diversify PD genetic research include a multiple-ancestry meta-analysis, comprising ancestral groups previously underrepresented, which identified 78 independent loci with 12 potentially novel loci ( Kim et al ., 2024 ). The first PD GWAS that was focused on African and African admixed participants successfully identified a novel ancestry-specific risk variant (rs3115534-G) in the GBA1 gene ( Rizig et al ., 2023 ). Here, we aim to conduct the first GWAS for PD in a South African study cohort with computational approaches best suited for multi-way admixed populations including a GWAS incorporating local ancestry, which refers to the genetic contribution from specific ancestral populations at an individual level ( Fu and Shi, 2024 ). The inclusion of this diverse ancestral population has the potential to uncover additional genetic risk factors on a global and local ancestral scale. Methods and materials An overview of the methods is provided in Figure 1 . Download figure Open in new tab Figure 1: Overview of the methods followed in the present study. AFR, African; AV ROH , average length of ROHs; EAS, East Asian; EUR, European; F ROH , Inbreeding coefficient estimates based on ROHs; FUMA, Functional Mapping and Annotation; MAL, Malaysian; NAMA, Nama-Khoe; N ROH , number of ROHs; PCs, Principal components; PD, Parkinson’s disease; ROH, runs of homozygosity; SAIGE, Scalable and Accurate Implementation of GEneralized mixed model; SAS, South Asian; S ROH , sum or total length of ROHs Study population and ethics approval For the discovery cohort, 691 individuals living with PD, diagnosed by a neurologist specializing in movement disorders according to the Queen’s Square Brain Bank Criteria ( Hughes et al ., 1992 ), were recruited from 2002 until June 2020 as part of the South African Parkinson’s Disease Study Collection (SAPDSC) ( van Rensburg et al ., 2022 ). A total of 826 controls were recruited from 2005 until 2018 (Health Research Ethics Committee, Stellenbosch University, 2002C/059). Participants ( Supplementary Table 1 ) were submitted to the Global Parkinson’s Genetics Program (GP2) for genotyping ( Global Parkinson’s Genetics Program, 2021 ; Step, Eltaraifee, et al ., 2025 ). For replication, we used an independent cohort from the Latin American Research consortium on the Genetics of Parkinson’s Disease (LARGE-PD) Phase 1 ( Supplementary Table 2 ), as previously described ( Loesch et al ., 2021 ). Full details on the cohort recruitment (807 PD cases and 690 controls), genotyping with Illumina’s Multi-Ethnic Genotyping Array (MEGA), quality control, and imputation can be found in Loesch, et al . 2021 . This cohort was selected due to its high level of admixture, similar to the South African population, though the specific ancestral contributions differ ( Norris et al ., 2018 ; Loesch et al ., 2021 ). Both populations exhibit greater admixture compared to other global populations. Genotyping, quality control, and imputation Genotyping of the SAPDSC was performed by GP2 using the NeuroBooster Array (v1.0, Illumina, San Diego, CA), which is composed of a backbone of 1,914,934 genome-wide variants and 95,273 variants associated with neurological diseases or traits ( Bandres-Ciga et al ., 2023 ). Initial quality control (QC) was performed using PLINK v1.9 ( Purcell et al ., 2007 ) and PLINK v2.0 ( Chang et al ., 2015 ), as previously described ( Leal et al ., 2025 ). The QC steps included filtering for Hardy-Weinberg equilibrium (1×10 -10 in cases, 1×10 -6 in controls), excess heterozygosity (>3 standard deviations), removing duplicate variants, data missingness at genotype and individual level (5%), redundant variants (A/T or C/G), and sex discrepancies (Females: F 0.8). The QC also identified a list of related individuals using King v2.2.9 (Manichaikul et al ., 2010) and a kinship coefficient cutoff of 0.0884 indicating second degree relation (Manichaikul et al ., 2010). Related individuals were excluded using NAToRA (Leal et al ., 2022) ( Supplementary Figure 1 ). These individuals were excluded at the relevant steps in the downstream analysis where relatedness was not permitted, while they were retained for analyses that allowed related individuals. Overall, 1,245,359 variants and 1,461 samples passed QC (679 cases; 782 controls). The Trans-Omics for Precision Medicine (TOPMed) Imputation Server was used for imputation with panel R3 ( Das et al ., 2016 ), and a Rsq filter of 0.3 was applied. Imputation quality was assessed using the empirical dosage Rsq (EmpRsq) values, which offer a more accurate measure than model-based Rsq by directly comparing observed and imputed genotypes. Unlike Rsq, which estimates quality based on the imputation model, EmpRsq evaluates the correlation between actual genotyped variants and their imputed counterparts, ensuring a more reliable assessment ( Supplementary Figure 2 ). Ancestry inference To accurately infer the ancestral proportions of the SAPDSC, we performed ancestry inference as previously described ( Leal et al ., 2025 ). For this, a reference panel were created using the continental non-hunter-gatherer African (AFR; n=471), continental East Asian (EAS; n=316), continental European (EUR; n=136), and continental South Asian (SAS; n=67) samples from the 1000 Genomes Project Phase III ( Byrska-Bishop et al ., 2022 ; Shriner et al ., 2023 ). Furthermore, we made use of whole-genome sequencing data for 84 unrelated healthy controls from the Nama (NAMA) population, an indigenous hunter-gatherer Khoe-San population of Southern Africa, representing a distinct subcontinental genetic ancestry ( van Eeden et al ., 2022 ; Ragsdale et al ., 2023 ), as well as genotyping data for 40 unrelated, healthy Malaysian (MAL) controls, also representing a subcontinental ancestral group ( Supplementary Table 3 ), from GP2’s seventh data release ( Leonard et al ., 2024 ). These populations were included to account for the extensive genetic diversity found in the SAPDSC. A principal component analysis (PCA) was completed to identify outliers ( Supplementary Figure 3 ). The reference files were phased using the TOPMed Imputation Server ( Das et al ., 2016 ) to ensure phasing accuracy and increase comparability to imputed data, leveraging its large and diverse reference panel for improved haplotype resolution. Population substructure was investigated using ADMIXTURE v1.3.0 ( Alexander and Lange, 2011 ), using AFR, EUR, EAS, NAMA, and SAS for reference populations. For this, LD pruning was performed using a sliding window of 200 SNPs, with a step size of 50 SNPs, and an r² threshold of 0.2 to remove variants in high LD, where 508,428 variants were retained to infer ancestry in a supervised manner. The global ancestry proportions were visualised using PONG software v1.5 ( Behr et al ., 2016 ). Local ancestry (LA) was inferred using G-Nomix v1.0 ( Hilmarsson et al ., 2021 ) and the data phased using the TOPMed Imputation Server ( Das et al ., 2016 ). Conventional association analysis The conventional association was conducted using methods previously described ( Leal et al ., 2025 ). We performed a supervised principal component analysis (PCA) using the previously mentioned parental populations to access the principal components (PCs). The SAPDSC genotyped data was then projected onto these components. A Manhattan plot for the projected PCs (1-50) was created to assess if the PCs had genome-wide influence or if they were driven by a specific genomic region ( Supplementary Figure 4 ). A stepwise regression was used to identify PCs that significantly contributed to explain variance in the phenotype while minimizing redundancy. The selected PCs capture key ancestry-related variation and potential confounding effects. The analysis included the following covariates: age, sex, PC1, PC3, PC7, PC10, PC12, PC15, PC16, PC19, PC29, PC37, PC42, PC45, PC46, PC49, PC50. A regression analysis was run with Scalable and Accurate Implementation of GEneralized mixed model (SAIGE) v1.3.1 ( Zhou et al ., 2018 ). SAIGE was used for the association analysis due to its ability to infer and account for relatedness using the genetic relationship matrix as a random effect, enabling the retention of more individuals in the analysis. Furthermore, using conditional analysis, SAIGE can reduce the occurrence of type 1 errors, allowing for the identification of rare-variant associations while accounting for an unbalanced number of cases and controls ( Zhou et al ., 2018 ). An additional analysis was run using PC1-10 including a QQ plot and genomic inflation factor ( λ GC) ( Supplementary Figure 5 ). The G-Nomix output was used to infer the LA of participants carrying the top ten GWAS hits. The variant-containing window was extracted from the imputed files and matched it to the inferred ancestry in the marginal segment probability (MSP) file. Post-GWAS in silico analysis was conducted using LocusZoom v0.13.0 ( Pruim et al ., 2010 ), Functional Mapping and Annotation (FUMA) v1.3.8 ( Watanabe et al ., 2017 ), gnomAD v4.1.0 ( Chen et al ., 2024 ), Open Targets Genetics v22.10 ( Ghoussaini et al ., 2021 ; Mountjoy et al ., 2021 ), and the Genotype-Tissue Expression (GTEx) Portal v10 ( GTEx Consortium, 2013 ). To validate the top GWAS hit, we performed Sanger sequencing on a subset of samples carrying the variant. Polymerase chain reaction amplification and sequencing were conducted using standard protocols (detailed in Supplementary Methods ). Sequencing chromatograms were manually reviewed to confirm the variant’s presence ( Supplementary Figure 6 ). Local ancestry association analysis A LA genome-wide association analysis (LA-GWAS) was run on unrelated individuals using Admix-kit v0.1.3 ( Hou et al ., 2024 ) and the LA results obtained from G-Nomix v1.0 ( Hilmarsson et al ., 2021 ), following methods previously described ( Leal et al ., 2025 ). Admix-kit allows for multiple association analysis models to be run simultaneously. For this analysis we ran the following: (1) Cochran-Armitage trend test (ATT; ( Cochran, 1954 ; Zheng and Gastwirth, 2006 )) and (2) Tractor ( Atkinson et al ., 2021 ). ATT makes use of LA information as a covariate, ultimately increasing the power of the study by 25% ( Hou et al ., 2021 ). Conversely, Tractor separates the dosage information per ancestry and estimates the effect sizes contributing to trait variation, increasing the power of detecting ancestry-specific associations ( Atkinson et al ., 2021 ). Related individuals were removed from the imputed and MSP files according to the kinship estimates completed in QC. The imputed VCF files were filtered to retain variants with a minor allele frequency (MAF) >0.005, thereby removing monomorphic and very rare variants and converted to PLINK 2.0 binary format for downstream analysis. Both models were run through Admix-kit using the stepwise regression PCs from the GWAS with SAIGE. However, for the LA-GWAS, an analysis using PC1-10 was not performed as the cohort’s complex admixture led to model overfitting, preventing p-value calculations. The ATT output files contain the beta, standard error (SE), and p-value for each variant per chromosome. The Tractor output files contained association results for each variant per ancestry as well as the results for the combined dataset, allowing for cross-ancestry comparison. Following the LA-GWAS, the results were run through the above mentioned in silico approaches. Effect directions from previous Parkinson’s disease association studies Beta and p-values were obtained for Nalls et al ., 2019 and Kim et al ., 2024 from supplementary materials, Rizig et al ., 2023 from the Neurodegenerative Disease Knowledge Portal ( Dilliott et al ., 2024 ), and for Loesch et al ., 2021 from LARGE-PD. Where necessary, LiftOver v1.0 from the University of California Santa Cruz Genome Browser was used to convert to GRCh38 ( Perez et al ., 2025 ). Beta and p-values were extracted per positional coordinate and all reference and alternate alleles were verified against their respective alleles. Beta values were used to compare studies, as they provide a standardized measure of effect size and direction across different datasets. To ensure consistency, beta values from the previous GWAS were inverted when the effect and other alleles were in the opposite orientation from the SAPDSC GWAS, allowing for direct comparison across datasets. Runs of homozygosity analysis Runs of homozygosity (ROH) are regions of consecutive homozygous genotypes ( Szpiech et al ., 2013 ) which typically span more than 1.5Mb ( Moreno-Grau et al ., 2021 ). Previous research has shown an increased homozygosity in neurodegenerative diseases, such as PD ( Simón-Sánchez et al ., 2012 ) and Alzheimer’s disease ( Ghani et al ., 2015 ; Moreno-Grau et al ., 2021 ). Given that conventional GWAS primarily detects associations at the single-variant level, ROH analysis serves as a complementary approach by identifying risk loci where homozygosity may contribute to PD susceptibility. By incorporating both GWAS and ROH-based association analysis, we aimed to capture a broader spectrum of genetic risk factors, including both common and recessively inherited variants. To investigate the association of ROH with PD status, we used PLINK v1.9 ( Purcell et al ., 2007 ) and the methods previously described ( Step, Hernández, et al ., 2025 ). The analysis included assessing four ROH parameters: (1) the sum of ROHs (S ROH ), (2) the number of ROHs (N ROH ), (3) the average length of ROHs (AV ROH ), and (4) the inbreeding coefficient (F ROH ) estimates based on the ROHs. Furthermore, we investigated ROHs which overlap known PD, pallido-pyramidal syndrome, and atypical parkinsonism genes and risk loci ( Supplementary Table 4 ). For this analysis, a 1Mb window downstream and upstream from the risk loci and genes were included, encompassing the 135 lead loci identified from the standard GWAS performed on the SAPDSC. Bonferroni threshold was adjusted by dividing 0.05 by the number of enriched ROHs to account for multiple testing. Results Global and local ancestry analysis in the South African cohort The SAPDSC exhibited five-way admixture between EUR (56%), AFR (18.8%), NAMA (13%), SAS (6.9%), and MAL (5.2%) ( Supplementary Figure 7 ), which is in line with previous studies ( Swart et al ., 2021 ). The EAS contribution was minimal and the EAS individuals from the 1000 Genomes Project Phase III date were removed from the reference files for downstream analysis, however we retained the MAL individuals as they were shown to contribute notably to LA inference. LA was investigated for each individual in the study to further assess the contribution of ancestral populations at specific genomic regions ( Supplementary Table 5 ), allowing for a more precise understanding of the genetic diversity, such as the presence of LD blocks ( Supplementary Figure 8 ). It was predicted that the ancestral inference accuracy is as follows ( Supplementary Material ), AFR (∼98.8%), EUR (∼96.5%), MAL (∼82.8%), NAMA (∼86.4%), and SAS (∼86.8%). The ancestral groups with smaller sample sizes and lower-quality genotyping files (SAS, NAMA, and MALAY) have a decreased ancestral inference accuracy. This study serves as a proof of concept for the feasibility of ancestral inference in diverse populations, highlighting the challenges associated with smaller sample sizes and lower-quality data. Genome-wide association analysis identifies risk variants associated with Parkinson’s disease The top hit (rs17098735-T) on chromosome 14 achieved genome-wide significance (beta=2.286; SE=0.401; p-value: 1.23×10 -8 ) and is not in LD with any known PD signal. The variant is directly upstream of A-Kinase Anchoring Protein 6 ( AKAP6 ) gene, suggesting potential cis-regulatory effects on AKAP6 transcriptional activity. The encoded protein is highly expressed in various brain regions ( Supplementary Figure 9 ), as obtained from the GTEx Analysis Release v10 (dbGaP Accession phs000424.v10.p2) on 01/23/2025 ( GTEx Consortium, 2013 ). Rs17098735-T has a gnomAD frequency of 4.61% in AFR and 0.02% in EUR ancestries. Among the participants carrying the variant (28 cases and 15 controls), 86.7% of the variant carriers were inferred to have AFR ancestry, 11.1% NAMA, and 2.2% MAL ancestry ( Table 1 ). No carriers exhibited EUR or SAS inferred ancestry. View this table: View inline View popup Download powerpoint Table 1: Genome-wide association study top ten hits. When looking at suggestive-level variants, one lead locus (rs75842871-G; beta=-1.776; SE=0.350; p-value: 3.90×10 -7 ) was found to functionally implicate 17 genes associated with PD ( Supplementary Table 6 ) ( Simón-Sánchez et al ., 2009 ; Do et al ., 2011 ; International Parkinson Disease Genomics Consortium et al ., 2011; Nalls et al ., 2014 ; Pickrell et al ., 2016 ; Chang et al ., 2017 ; Bandres-Ciga et al ., 2019 ; Rodrigo and Nyholt, 2021 ), as identified through Open Target Genetics, which uses a combination of genetic association data, expression quantitative trait loci (eQTL) studies, and functional annotations to link genetic variants to their potential target genes ( Ghoussaini et al ., 2021 ). We identified one lead locus with a mapped eQTL. This locus, rs75842871-G, was found to be highly expressed in the brain cortex (GTEx Analysis Release v10 (dbGaP Accession phs000424.v10.p2) on 03/05/2025) and significantly associated with Corticotropin Releasing Hormone Receptor 1 ( CRHR1 ), with a known PD association ( Cheng, Zhu and Zhang, 2020 ). Additionally, the intergenic variant rs58182002-C (beta=-3.385; SE=0.690; p-value: 9.16×10 -7 ) on chromosome 15 influences two genes with an association to PD, Cholinergic Receptor Nicotinic Alpha 7 Subunit ( CHRNA7 ) and OTU Deubiquitinase 7A ( OTUD7A ). None of the other top ten lead loci have previous associations with PD or any other neurodegenerative diseases. The summary statistics and LA information for the top ten GWAS hits are presented in Table 1 . The Manhattan plot for the GWAS is presented in Figure 2 and the QQ-plot is presented in Supplementary Figure 10 . The λ GC was 1.113, indicating a slight inflation in the dataset, potentially from using the projected PCA as a covariate in the analysis. Download figure Open in new tab Figure 2: (A) Manhattan plot for SAIGE results by chromosome. The significant peak is located on chromosome 14 within the AKAP6 gene (B) LocusZoom plot of chromosome 14. The genomic inflation factor ( λ GC) for p-values ≤0.05 was 1.113. Replication and directionality of effect sizes across summary statistics for conventional association study Although the statistically significant hit (rs17098735-T) was not replicated, three of the suggestively significant lead loci (n=351) were replicated in the LARGE-PD cohort (p-value <0.05), which are located near the Cyclin Dependent Kinase 13 ( CDK13 ), Zinc Finger Protein 722 ( ZNF722 ), and Calcium And Integrin Binding Family Member 2 ( CIB2 ) genes ( Supplementary Table 7 , Supplementary Figure 11 ). Additionally, we examined the p-values for 78 PD GWAS hits identified in Kim et al ., 2023 and 90 PD GWAS hits identified in Nalls et al ., 2019 , in both the conventional GWAS and LA-GWAS with ATT. In the conventional GWAS ( Supplementary Figure 12 ), 11 variants exhibited p-value <0.05 and aligned directionality (Kim et al ., 2023: n=5, Nalls et al ., 2019 : n=6) and one with opposing directionality ( Nalls et al ., 2019 ). The LA-GWAS had 18 variants with p-value <0.05 and aligned directionality (Kim et al ., 2023: n=8, Nalls et al ., 2019 : n=10) and one variant with opposing directionality (Kim et al ., 2023: n=1). Three of the hits were shared between the conventional GWAS and LA-GWAS, with a total of 28 unique variants with p-value <0.05 between the two methods ( Supplementary Table 8 ). Local ancestry association analysis reveals additional risk variants For the LA-GWAS using ATT, there was one lead SNP (rs149066239-T) above genome-wide significance and 35 lead SNPs above the suggestive significance threshold. The top hit (rs149066239-T; beta=-1.826; SE=0.357; p-value: 1.17×10 -8 ) is an intergenic variant and eQTL for Amyloid Beta Precursor Protein Binding Family A Member 2 ( APBA2 ). Among the loci reaching suggestive significance, we highlight a subset of variants with potential biological relevance. One such variant (rs7601869-T; beta=-1.434; SE=0.295; p-value: 2.51×10 -7 ) is an intronic variant nearest to Ubiquitin Conjugating Enzyme E2 E3 ( UBE2E3 ), which has been implicated in neurodegenerative disease, including PD ( Zheng et al ., 2016 ; Mamoor, 2024 ). Additional variants of interest include rs75614986-G (beta=-1.627; SE=0.350; p-value: 3.14×10 -7 ), rs2532570-G (beta=-1.203; SE=0.252; p-value: 5.77×10 -7 ), rs139463168-C (beta=-3.120; SE=0.809; p-value: 6.05×10 -7 ), and rs10977554-C (beta=0.448; SE=0.092; p-value: 9.29×10 -7 ). These variants are associated with genes involved in neuronal pathways, neurodegenerative diseases, and synapse function, making them particularly relevant for further investigation ( Kawashima et al ., 1997 ; Suzuki et al ., 2000 ; Di Gregorio et al ., 2014 ; Joo, Hippenmeyer and Luo, 2014 ; Ishii et al ., 2022 ). The LA-GWAS hits were expressed in a range of tissues including the brain, nerve tissue, cultured fibroblasts, tibial nerve, and skeletal muscle (( GTEx Consortium, 2013 ); dbGaP Accession phs000424.v10.p2). The Manhattan plot for the LA-GWAS using the ATT model is presented in Figure 3 and the QQ-plot ( λ GC = 1.311) is presented in Supplementary Figure 10 . The summary statistics for the top ten lead loci is presented in Table 2 . One lead locus replicated in the LARGE-PD cohort with aligned directionality ( Supplementary Figure 12 ). Download figure Open in new tab Figure 3: Manhattan plot for LA-GWAS results using ATT model by chromosome. The genomic inflation factor ( λ GC) for p-values ≤0.05 was 1.311. View this table: View inline View popup Table 2: Local ancestry genome-wide association study top ten lead loci for the ATT model. For the LA-GWAS using Tractor, six lead loci, with suggestive significance, were identified using FUMA ( Supplementary Table 9 ), with three lead loci (rs2251850-T (beta=1.480; SE=0.805; p-value: 4.14×10 -7 ), rs2449208-T (beta=0.829; SE=1.483; p-value: 7.53×10 -6 ), and rs1960758-G (beta=-2.239; SE=1.277; p-value: 9.7×10 -6 )) in the AFR ancestral component and three lead loci (rs4740539-A (beta=1.066; SE=0.277; p-value: 1.31×10 -6 ), rs2148353-G (beta=0.985; SE=0.217; p-value:4.84×10 -6 ), and rs58553697-G (beta=0.929; SE=0.215; p-value: 7.53×10 -6 )) in all the ancestral components combined ( Figure 4 ). No hits reached genome-wide or suggestive significance in the EUR, MAL, NAMA, or SAS ancestries. The top LA-GWAS hit (rs2251850-T), across all ancestries, is an intronic variant which functionally implicates the Wnt Family Member 5A ( WNT5A ) and Leucine Rich Repeats And Transmembrane Domains 1 ( LRTM1 ). This lead locus had an allele frequency across the ancestries as follows: AFR (26.6%), EUR (13.6%), MAL (2.1%), NAMA (28.1%), and SAS (11.5%). The lead loci in the AFR component, rs4740539-A, is an intergenic variant for Doublesex And Mab-3 Related Transcription Factors 1, 2, and 3 ( DMRT1 , DMRT2 , and DMRT3 ). No eQTLs were identified in the LA-GWAS using Tractor. Download figure Open in new tab Figure 4: Manhattan plot for LA-GWAS results using Tractor by chromosome for ancestries with genome-wide significance: (A) All ancestries, (B) African. No loci reached genome-wide significance for the European, Nama, Malaysian, or South Asian ancestral components. Comparison of runs of homozygosity analysis between the cohorts A comparison of the ROH analysis results for the SAPDSC and LARGE-PD cohorts is presented in Table 3 . The total N ROH was 7,087 and 15,501 for the SAPDSC and LARGE-PD cohorts, respectively. The SAPDSC cohort had a lower mean N ROH and S ROH compared to the LARGE-PD cohort ( Supplementary Figure 14 ). However, the AV ROH was similar between the two groups. The LARGE-PD cohort showed a higher F ROH , indicative of a higher degree of autozygosity suggesting that a greater proportion of this cohort is made up of homozygous segments. Ultimately, there were no statistically significant differences between cases and controls for the four parameters across both cohorts. In terms of investigating ROHs overlapping with known PD, pallido-pyramidal syndrome, and atypical parkinsonism genes and risk loci, LARGE-PD has 443 ROHs overlapping these genes and loci. Among these, 337 ROHs were enriched in cases and three passed Bonferroni correction. Notably, all three ROHs are located on chromosome 7, spanning the same PD region previously identified through GWAS ( Nalls et al ., 2019 ). Conversely, SAPDSC had 300 overlapping PD genes, 163 enriched in cases, and no ROH that passed Bonferroni correction. View this table: View inline View popup Download powerpoint Table 3: Runs of homozygosity analysis results. Discussion To our knowledge, this study serves as the first PD GWAS including South African participants and the first LA-GWAS for PD in Africa. Ancestry inference revealed the five-way admixture observed in the population, allowing for PD susceptibility to be investigated using population-specific risk variants as well as risk variants for multiple global populations, simultaneously ( Swart et al ., 2021 ; Step et al ., 2024 ). Our conventional GWAS identified one novel genome-wide significant hit as well as 134 suggestive hits (p-value: 1×10 -5 ). The lead locus (rs17098735-T) is located in the AKAP6 gene and has a CADD v1.7 Phred score of 13.18 indicating it is among the top approximately 5% of most deleterious variants and is likely to have a functional or pathogenic consequence. Proteins encoded by the AKAP genes play key roles in regulating numerous brain cellular processes, such as the development, differentiation, and migration of neurons ( He et al ., 2024 ). The AKAP6 gene has previously been associated with PD ( Phung et al ., 2020 ; Pihlstrøm et al ., 2022 ; Cappelletti et al ., 2023 ). Specifically, differentially methylated loci near AKAP6 have been linked to the development of Lewy bodies ( Pihlstrøm et al ., 2022 ) in the brain, a hallmark of PD physiopathology, where reduced AKAP6 expression has been identified in individuals with PD ( Phung et al ., 2020 ). Moreover, previous GWAS have identified risk loci in AKAP6 linked to Alzheimer’s disease ( Adewuyi et al ., 2022 ), further emphasizing its involvement in the pathogenesis of neurodegenerative diseases. There were numerous hits that reached suggestive significance with potential biological relevance which warrant further study. Many of these are intergenic and intronic variants in genes implicated in various neurological and neurodegenerative disorders, including Alzheimer’s disease ( Counts et al ., 2007 ) and Parkinson’s disease ( Quik, Campos and Grady, 2013 ). Of the 135 lead SNPs above the suggestive-significance threshold, three loci replicated in the LARGE-PD cohort, however, this did not include the top GWAS hit. While this number is low, it could be due to population specific risk variants. Additionally, the use of different genotyping arrays between the two cohorts may have contributed to the low replication rate, as differences in array design can affect variant coverage and detection. This is especially true since the MEGA array, used to genotype LARGE-PD Phase 1, did not contain custom content, limiting the ability to identify PD-associated SNPs unless they were common or imputed. Interestingly, the multi-ancestry meta-analysis ( Kim et al ., 2024 ) consistently showed more variants with aligned directionality. This consistency in effect direction across populations suggests a shared genetic contribution to disease risk, reinforcing the robustness of the association signal across diverse ancestries. The top hit (rs149066239-T) identified using the LA-GWAS with ATT model is associated with APBA2 as both an intronic variant and an eQTL. This gene has previously been linked to Alzheimer’s disease ( Arnold et al ., 2013 ), where the encoded protein acts as a neuronal stabilizer to the amyloid precursor protein, involved in synaptic vesicle exocytosis ( Berson et al ., 2023 ). One of the lead loci (rs7601869-T) is associated with the protein coding gene, UBE2E3 , which plays a role in the pathogenesis of neurodegenerative diseases ( Zheng et al ., 2016 ; Plafker et al ., 2018 ). Furthermore, in silico prediction shows that this loci has a functional impact on NCK Associated Protein 1 ( NCKAP1 ), where the apoptosis of neuronal cells is induced with reduced gene expression ( Suzuki et al ., 2000 ), and CERK Like Autophagy Regulator ( CERKL ), which plays an essential role in neuronal cell viability ( Tuson, Marfany and Gonzàlez-Duarte, 2004 ). The LA-GWAS with Tractor identified rs2251850-T, an intergenic variant in WNT5A and LRTM1 , as the top hit across all ancestral components. These proteins are surface markers for midbrain dopaminergic neurons and play a role in their maintenance ( Arenas, 2014 ; Samata et al ., 2016 ). Both genes have shown to be potential therapeutic targets for PD treatment ( Parish et al ., 2008 ; Samata et al ., 2016 ). The second (rs2449208-T) and third locus (rs1960758-G) functionally implicate CUB And Sushi Multiple Domains 1 ( CSMD1 ), which is highly expressed in the brain ( Nagase, Kikuno and Ohara, 2001 ; Kraus et al ., 2006 ). The encoded protein is localised to the synapse ( Baum et al ., 2024 ) and acts as a regulator for inflammation in the central nervous system ( Kraus et al ., 2006 ). This gene has previously been implicated in both idiopathic PD ( Bai et al ., 2021 ) and familial PD ( Ruiz-Martínez et al ., 2017 ), where pathogenic variants have been absent in neurologically healthy controls. For the LA-GWAS with Tractor assessing the AFR component, three significant lead loci were identified (rs4740539-A, rs2148353-G, and rs58553697-G). The first and third loci are not in LD and are intergenic variants with DMRT1 , DMRT2 , and DMRT3 respectively. Previous associations with sporadic PD and DMRT2 have been made in participants of Han Chinese ancestry ( Wang et al ., 2020 ). Moreover, while DMRT3 has not been linked directly to PD, it is recognised for its role in gait disturbances ( Grillner and El Manira, 2020 ; Huang et al ., 2021 ). The second locus (rs2148353-G) is an intergenic variant for Distal Membrane Arm Assembly Component 1 ( DMAC1 ) which is required for a functional accessory subunit required for mitochondrial complex I ( Stroud et al ., 2016 ). There is significant evidence for the influence of mitochondrial dysfunction, particularly of complex I, in the pathogenesis of PD ( Henchcliffe and Beal, 2008 ; Kamienieva et al ., 2023 ; Buck et al ., 2025 ). Although no significance was observed in the MAL, NAMA, or SAS ancestry components, this was expected given their minimal representation in the cohort’s inferred ancestry. The lack of significant associations in the EUR group may be due to Tractor distributing genetic risk across multiple ancestry components, reducing the strength of EUR-specific signals in an admixed cohort. One of the most consistently reported genes in PD GWAS is alpha-synuclein ( SNCA ), as highlighted in previous studies by Nalls et al . (2019) , Loesch et al . (2021) , Kim et al . (2024) , and Foo et al . (2020). However, the present study showed no significant peak over the SNCA region in the Manhattan plots for any of the GWAS approaches utilized. The SNCA SNP closest to significance was rs12648141-G (p-value: 4.4×10 -4 ). These observations may reflect a population-specific pattern, suggesting that the role of SNCA in PD susceptibility might vary across different ancestral groups. This warrants further investigation, particularly to understand how SNCA prevalence and its genetic variations may influence PD risk in AFR and AFR admixed populations. The ROH analysis found no significant differences between cases and controls in the two study cohorts. However, the mean and SE observed are comparable to previous research investigating the association of PD and ROHs in diverse ancestries, including African and American Admixed cohorts ( Step, Hernández, et al ., 2025 ). Additionally, the SAPDSC had lower values across all four of the ROH parameters investigated, with shorter and fewer ROHs, suggesting greater admixture in comparison to the LARGE-PD cohort ( Ceballos et al ., 2018 ). This aligns with previous research showing that while both populations are multi-way admixed, the South African population is five-way admixed ( Chimusa et al ., 2013 ; Swart et al ., 2021 ), while the LARGE-PD cohort is viewed as primarily three-way admixed ( Norris et al ., 2018 ; Loesch et al ., 2021 ). While we successfully conducted various association analyses, this study has several limitations. The most notable is the small sample size of the SAPDSC. Furthermore, there is no replication dataset for the South African cohort, and we acknowledge two key shortfalls in using LARGE-PD Phase 1 as a replication cohort. First, the datasets were genotyped using different arrays, SAPDSC with the NeuroBooster Array and LARGE-PD Phase 1 using MEGA. Although both arrays provide suitable coverage for imputation, there are differences between them that could impact comparability and coverage. This issue can be rectified using LARGE-PD Phase 2 which uses the NeuroBooster Array, however, data on the complete cohort is not available yet. The second shortfall is the ancestral composition of the replication cohort which differs from the discovery cohort. In LARGE-PD, the proportion of continental AFR ancestry accounts for less than 6% ( Loesch et al ., 2021 ), making it underpowered for the detection of risk variants specific to participants of AFR ancestry, a major component (18%) of the SAPDSC. Notably, the AFR proportion in LARGE-PD may be even smaller if some individuals have Nama ancestry, a distinct sub-continental population within AFR. Also, both cohorts lack whole-genome sequencing data, limiting further investigation and validation for the ROH analysis. In terms of eQTL mapping, the results generated from FUMA serve as a proof of concept that the GWAS loci function as eQTLs for known PD genes. However, future studies should incorporate RNA sequencing data and a more refined eQTL analysis pipeline to achieve more accurate validation. Finally, while we performed in silico validation using public databases, additional in vitro functional validation is required to verify the impact of the associated variants on biological pathways. In conclusion, we provide novel genetic insights into PD pathogenicity using a cohort of highly admixed and genetically diverse South African individuals, highlighting the importance of including genetically diverse and underrepresented populations in PD genetics research. This research aligns with emerging studies demonstrating the value of investigating PD genetics in underrepresented populations ( Loesch et al ., 2021 ; Rizig et al ., 2023 ). These studies have uncovered population-specific variants and mechanisms, collectively underscoring how genetic diversity can reveal previously undetected pathogenic pathways even in modest-sized cohorts. Data and code availability Data used in the preparation of this article were obtained from the Global Parkinson’s Genetics Program (GP2; https://gp2.org ). Specifically, we used Tier 2 data from GP2 release 7 [DOI: 10.5281/zenodo.10962119]. Tier 1 data can be accessed by completing a form on the Accelerating Medicines Partnership in Parkinson’s Disease (AMP®-PD) website ( https://amp-pd.org/register-for-amp-pd ). Tier 2 data access requires approval and a Data Use Agreement signed by your institution. The data analyzed in this study is subject to the following licenses/restrictions: No new genetic data was generated for this study however, summary statistics for the quality and accuracy assessment of the genetic data for the NAMA participants will be made available to researchers who meet the criteria for access after application to the Health Research Ethics Committee of Stellenbosch University. Requests to access the Nama datasets should be directed to Prof. Marlo Moller ( marlom{at}sun.ac.za ). Additionally, LARGE-PD summary statistics can be found in the PD GWAS Browser ( https://pdgenetics.shinyapps.io/GWASBrowser ) and from the Neurodegenerative Disease Knowledge Portal ( https://ndkp.hugeamp.org/phenotype.html?phenotype=Parkinsons ). The pipelines were developed and are maintained by Dr. Thiago Peixoto Leal ( peixott{at}ccf.org ) and are available at https://github.com/MataLabCCF . Additionally, an overview of the analysis and any additional scripts not available through the Mata Lab GitHub, and the identifiers for all software programs and packages used, are available on GitHub [ https://github.com/GP2code/SouthAfrican_PD_GWAS ] and were given a persistent identifier via Zenodo [DOI: 10.5281/zenodo.15442537]. Author contributions K.S., S.B., and I.M. conceptualized the manuscript. K.S. and T.P.L. wrote the first manuscript draft. T.P.L. compiled the scripts used for the data analysis. T.P.L., E.W., L.M., C.F.H., J.J.F., S.B.-C., and Y.S. assisted with the bioinformatics analysis. All authors reviewed, edited, and approved the final version of the manuscript for submission. Declaration of interests I.F.M has received honorarium from the Parkinson’s Foundation PD GENEration Steering Committee and Aligning Science Across Parkinson’s Global Parkinson Genetic Program (ASAP-GP2). Data Availability Data used in the preparation of this article were obtained from the Global Parkinson's Genetics Program (GP2; https://gp2.org ). Specifically, we used Tier 2 data from GP2 release 7 [DOI: 10.5281/zenodo.10962119]. Tier 1 data can be accessed by completing a form on the Accelerating Medicines Partnership in Parkinson's Disease (AMP®-PD) website ( https://amp-pd.org/register-for-amp-pd ). Tier 2 data access requires approval and a Data Use Agreement signed by your institution. The data analyzed in this study is subject to the following licenses/restrictions: No new genetic data was generated for this study however, summary statistics for the quality and accuracy assessment of the genetic data for the NAMA participants will be made available to researchers who meet the criteria for access after application to the Health Research Ethics Committee of Stellenbosch University. Requests to access the Nama datasets should be directed to Prof. Marlo Moller ( marlom{at}sun.ac.za ). Additionally, LARGE-PD summary statistics can be found in the PD GWAS Browser ( https://pdgenetics.shinyapps.io/GWASBrowser ) and from the Neurodegenerative Disease Knowledge Portal ( https://ndkp.hugeamp.org/phenotype.html?phenotype=Parkinsons ). The pipelines were developed and are maintained by Dr. Thiago Peixoto Leal ( peixott{at}ccf.org ) and are available at https://github.com/MataLabCCF . Additionally, an overview of the analysis and any additional scripts not available through the Mata Lab GitHub, and the identifiers for all software programs and packages used, are available on GitHub [ https://github.com/GP2code/SouthAfrican_PD_GWAS ] and were given a persistent identifier via Zenodo [DOI: 10.5281/zenodo.15442537]. https://amp-pd.org/register-for-amp-pd https://pdgenetics.shinyapps.io/GWASBrowser https://ndkp.hugeamp.org/phenotype.html?phenotype=Parkinsons Funding K.S. is supported by The Michael J. Fox Foundation and Aligning Sciences Across Parkinson’s Disease Global Parkinson Genetic Program. L.M. is funded by the National Research Foundation and Harry Crossley Foundation. I.F.M. is supported by the National Institutes of Health (1R01NS112499, U01AG076482, R01NS132437), The Michael J. Fox Foundation and the Aligning Science Across Parkinson’s Global Parkinson Genetic Program (ASAP-GP2), American Parkinson’s Disease Association (APDA) and Department of Veterans Affairs (I01BX005978-01A1). He also receives honorarium for his participation in Parkinson’s Foundation PD GENEration Steering Committee and Aligning Science Across Parkinson’s Global Parkinson Genetic Program (ASAP-GP2) Operations Committee. The Michael J. Fox Foundation (MJFF-026283 for E.W. and I.F.M.) and Alzheimer’s Disease Sequencing Project (ADSP) (5U01AG076482-03 for E.W.) Acknowledgements We would like to acknowledge and thank the study participants for their contribution. Data used in the preparation of this article were obtained from Global Parkinson’s Genetics Program (GP2). GP2 is funded by the Aligning Science Across Parkinson’s (ASAP) initiative and implemented by The Michael J. Fox Foundation for Parkinson’s Research ( https://gp2.org ). For a complete list of GP2 members see https://gp2.org . All figures were created using BioRender ( https://www.biorender.com/ ). We would like to acknowledge Lim Shen-Yang, Tan Ai-Huey, and Azlina Ahmad-Annuar for their efforts in recruiting study participants from Malaysia. We thank Kate Andersh for her work as scientific project manager for this project. We thank Dr. Thiago Peixoto Leal for his contributions to script development used in the analysis. A detailed description of the methods can be found at https://doi.org/10.1101/2025.07.18.25331793 . We also acknowledge the Centre for High Performance Computing (CHPC), South Africa, for providing computational resources. We acknowledge the support of the Centre for Tuberculosis Research (CTR) of the South African Medical Research Council. All figures were created using BioRender.com . For open access, the author has applied a CC BY public copyright license to all Author Accepted Manuscripts arising from this submission . References ↵ Adewuyi , E.O. et al. ( 2022 ) ‘A large-scale genome-wide cross-trait analysis reveals shared genetic architecture between Alzheimer’s disease and gastrointestinal tract disorders’ , Communications biology , 5 ( 1 ), p. 691 . OpenUrl PubMed ↵ Alexander , D.H. and Lange , K . ( 2011 ) ‘ Enhancements to the ADMIXTURE algorithm for individual ancestry estimation ’, BMC bioinformatics , 12 ( 1 ), p. 246 . OpenUrl CrossRef PubMed Web of Science ↵ Arenas , E . ( 2014 ) ‘Wnt signaling in midbrain dopaminergic neuron development and regenerative medicine for Parkinson’s disease’ , Journal of molecular cell biology , 6 ( 1 ), pp. 42 – 53 . OpenUrl CrossRef PubMed ↵ Armstrong , M.J. and Okun , M.S . ( 2020 ) ‘ Diagnosis and treatment of Parkinson disease: A review: A review ’, JAMA: the journal of the American Medical Association , 323 ( 6 ), pp. 548 – 560 . OpenUrl CrossRef PubMed ↵ Arnold , S.E. et al. ( 2013 ) ‘Cellular, synaptic, and biochemical features of resilient cognition in Alzheimer’s disease’ , Neurobiology of aging , 34 ( 1 ), pp. 157 – 168 . OpenUrl CrossRef PubMed Web of Science ↵ Atkinson , E.G. et al. ( 2021 ) ‘ Tractor uses local ancestry to enable the inclusion of admixed individuals in GWAS and to boost power ’, Nature genetics , 53 ( 2 ), pp. 195 – 204 . OpenUrl CrossRef PubMed ↵ Bai , X. et al. ( 2021 ) ‘CUB and Sushi Multiple Domains (CSMD1) gene polymorphisms and susceptibilities to idiopathic Parkinson’s disease in northern Chinese Han population: A case-control study’ , Parkinson’s disease , 2021 , p. 6661162 . OpenUrl ↵ Bandres-Ciga , S. et al. ( 2019 ) ‘ The genetic architecture of Parkinson disease in Spain: Characterizing population-specific risk, differential haplotype structures, and providing etiologic insight ’, Movement disorders: official journal of the Movement Disorder Society , 34 ( 12 ), pp. 1851 – 1863 . OpenUrl PubMed ↵ Bandres-Ciga , S. et al. ( 2023 ) ‘ NeuroBooster Array: A Genome-Wide Genotyping Platform to Study Neurological Disorders Across Diverse Populations ’, medRxiv: the preprint server for health sciences [Preprint] . Available at : doi: 10.1101/2023.11.06.23298176 . OpenUrl Abstract / FREE Full Text ↵ Baum , M.L. et al. ( 2024 ) ‘ CSMD1 regulates brain complement activity and circuit development ’, Brain, behavior, and immunity , 119 , pp. 317 – 332 . OpenUrl CrossRef PubMed ↵ Behr , A.A. et al. ( 2016 ) ‘ Pong: Fast analysis and visualization of latent clusters in population genetic data ’, Bioinformatics , 32 ( 18 ), pp. 2817 – 2823 . OpenUrl CrossRef PubMed ↵ Berson , E. et al. ( 2023 ) ‘Whole genome deconvolution unveils Alzheimer’s resilient epigenetic signature’ , Nature communications , 14 ( 1 ), p. 4947 . OpenUrl PubMed ↵ Brown , L.A. et al. ( 2017 ) ‘ Admixture mapping identifies an Amerindian ancestry locus associated with albuminuria in Hispanics in the United States ’, Journal of the American Society of Nephrology: JASN , 28 ( 7 ), pp. 2211 – 2220 . OpenUrl PubMed ↵ Buck , A.C. et al. ( 2025 ) ‘ Mitochondria targeted nanoparticles for the treatment of mitochondrial dysfunction-associated brain disorders ’, Frontiers in bioengineering and biotechnology , 13 , p. 1563701 . OpenUrl ↵ Byrska-Bishop , M. et al. ( 2022 ) ‘ High-coverage whole-genome sequencing of the expanded 1000 Genomes Project cohort including 602 trios ’, Cell , 185 ( 18 ), pp. 3426 – 3440.e19 . OpenUrl CrossRef PubMed ↵ Cappelletti , C. et al. ( 2023 ) ‘Transcriptomic profiling of Parkinson’s disease brains reveals disease stage specific gene expression changes’ , Acta neuropathologica , 146 ( 2 ), pp. 227 – 244 . OpenUrl CrossRef PubMed ↵ Ceballos , F.C. et al. ( 2018 ) ‘ Runs of homozygosity: windows into population history and trait architecture ’, Nature reviews. Genetics , 19 ( 4 ), pp. 220 – 234 . OpenUrl CrossRef PubMed ↵ Chang , C.C. et al. ( 2015 ) ‘ Second-generation PLINK: rising to the challenge of larger and richer datasets ’, GigaScience , 4 , p. 7 . OpenUrl CrossRef PubMed ↵ Chang , D. et al. ( 2017 ) ‘A meta-analysis of genome-wide association studies identifies 17 new Parkinson’s disease risk loci’ , Nature genetics , 49 ( 10 ), pp. 1511 – 1516 . OpenUrl CrossRef PubMed ↵ Cheng , W.-W. , Zhu , Q. and Zhang , H.-Y . ( 2020 ) ‘Identifying risk genes and interpreting pathogenesis for Parkinson’s disease by a multiomics analysis’ , Genes , 11 ( 9 ), p. 1100 . OpenUrl ↵ Chen , S. et al. ( 2024 ) ‘ A genomic mutational constraint map using variation in 76,156 human genomes ’, Nature , 625 ( 7993 ), pp. 92 – 100 . OpenUrl CrossRef PubMed ↵ Chimusa , E.R. et al. ( 2013 ) ‘ Determining ancestry proportions in complex admixture scenarios in South Africa using a novel proxy ancestry selection method ’, PloS one , 8 ( 9 ), p. e73971 . OpenUrl CrossRef PubMed ↵ Cochran , W.G . ( 1954 ) ‘Some methods for strengthening the common χ 2 tests’ , Biometrics , 10 ( 4 ), p. 417 . OpenUrl CrossRef PubMed Web of Science ↵ Counts , S.E. et al. ( 2007 ) ‘ Alpha7 nicotinic receptor up-regulation in cholinergic basal forebrain neurons in Alzheimer disease ’, Archives of neurology , 64 ( 12 ), pp. 1771 – 1776 . OpenUrl CrossRef PubMed Web of Science ↵ Das , S. et al. ( 2016 ) ‘ Next-generation genotype imputation service and methods ’, Nature genetics , 48 ( 10 ), pp. 1284 – 1287 . OpenUrl CrossRef PubMed ↵ Di Gregorio , E. et al. ( 2014 ) ‘ ELOVL5 mutations cause spinocerebellar ataxia 38 ’, The American Journal of Human Genetics , 95 ( 2 ), pp. 209 – 217 . OpenUrl CrossRef PubMed ↵ Dilliott , A.A. et al. ( 2024 ) ‘ The Neurodegenerative Disease Knowledge Portal: Propelling discovery through the sharing of neurodegenerative disease genomic resources ’, medRxiv. Available at : doi: 10.1101/2024.05.27.24307990 . OpenUrl Abstract / FREE Full Text ↵ Do , C.B. et al. ( 2011 ) ‘Web-based genome-wide association study identifies two novel loci and a substantial genetic component for Parkinson’s disease’ , PLoS genetics , 7 ( 6 ), p. e1002141 . OpenUrl ↵ Dorsey , E.R. et al. ( 2007 ) ‘ Projected number of people with Parkinson disease in the most populous nations, 2005 through 2030 ’, Neurology , 68 ( 5 ), pp. 384 – 386 . OpenUrl CrossRef PubMed ↵ Dorsey , E.R. et al. ( 2018 ) ‘ The emerging evidence of the Parkinson pandemic ’, Journal of Parkinson’s disease , 8 ( s1 ), pp. S3 – S8 . OpenUrl ↵ van Eeden , G. et al. ( 2022 ) ‘ The recombination landscape of the Khoe-San likely represents the upper limits of recombination divergence in humans ’, Genome biology , 23 ( 1 ), p. 172 . OpenUrl CrossRef PubMed ↵ Foo , J.N. et al. ( 2020 ) ‘ Identification of Risk Loci for Parkinson Disease in Asians and Comparison of Risk Between Asians and Europeans: A Genome-Wide Association Study ’, JAMA neurology , 77 ( 6 ), pp. 746 – 754 . OpenUrl PubMed ↵ Fu , H. and Shi , G . ( 2024 ) ‘ Local ancestry inference based on population-specific single-nucleotide polymorphisms-A study of admixed populations in the 1000 Genomes Project ’, Genes , 15 ( 8 ), p. 1099 . OpenUrl ↵ GBD 2021 Nervous System Disorders Collaborators ( 2024 ) ‘Global, regional, and national burden of disorders affecting the nervous system, 1990-2021: a systematic analysis for the Global Burden of Disease Study 2021’ , Lancet neurology , 23 ( 4 ), pp. 344 – 381 . OpenUrl CrossRef PubMed ↵ Ghani , M. et al. ( 2015 ) ‘ Association of Long Runs of Homozygosity With Alzheimer Disease Among African American Individuals ’, JAMA neurology , 72 ( 11 ), pp. 1313 – 1323 . OpenUrl PubMed ↵ Ghoussaini , M. et al. ( 2021 ) ‘ Open Targets Genetics: systematic identification of trait-associated genes using large-scale genetics and functional genomics ’, Nucleic acids research , 49 ( D1 ), pp. D1311 – D1320 . OpenUrl CrossRef PubMed ↵ Global Parkinson’s Genetics Program ( 2021 ) ‘ GP2: The global Parkinson’s genetics program ’, Movement disorders: official journal of the Movement Disorder Society , 36 ( 4 ), pp. 842 – 851 . OpenUrl PubMed Global Parkinson’s Genetics Program (GP2) and Leonard , H.L. ( 2025 ) ‘ Novel Parkinson’s disease genetic risk factors within and across European populations ’, medRxiv . Available at : doi: 10.1101/2025.03.14.24319455 . OpenUrl Abstract / FREE Full Text ↵ Grillner , S. and El Manira , A . ( 2020 ) ‘ Current principles of motor control, with special reference to vertebrate locomotion ’, Physiological reviews , 100 ( 1 ), pp. 271 – 320 . OpenUrl CrossRef PubMed ↵ GTEx Consortium ( 2013 ) ‘ The Genotype-Tissue Expression (GTEx) project ’, Nature genetics , 45 ( 6 ), pp. 580 – 585 . OpenUrl CrossRef PubMed ↵ Henchcliffe , C. and Beal , M.F . ( 2008 ) ‘ Mitochondrial biology and oxidative stress in Parkinson disease pathogenesis ’, Nature clinical practice. Neurology , 4 ( 11 ), pp. 600 – 609 . OpenUrl PubMed ↵ Henn , B.M. et al. ( 2011 ) ‘ Hunter-gatherer genomic diversity suggests a southern African origin for modern humans ’, Proceedings of the National Academy of Sciences of the United States of America , 108 ( 13 ), pp. 5154 – 5162 . OpenUrl Abstract / FREE Full Text ↵ He , Z. et al. ( 2024 ) ‘Novel insight into the role of A-kinase anchoring proteins (AKAPs) in ischemic stroke and therapeutic potentials’ , Biomedecine & pharmacotherapie [Biomedicine & pharmacotherapy] , 175 ( 116715 ), p. 116715 . OpenUrl PubMed ↵ Hilmarsson , H. et al. ( 2021 ) ‘ High resolution ancestry deconvolution for next generation genomic data ’, bioRxiv. Available at : doi: 10.1101/2021.09.19.460980 . OpenUrl Abstract / FREE Full Text ↵ Hou , K. et al. ( 2021 ) ‘ On powerful GWAS in admixed populations ’, Nature genetics , 53 ( 12 ), pp. 1631 – 1633 . OpenUrl CrossRef PubMed ↵ Hou , K. et al. ( 2024 ) ‘Admix-kit: an integrated toolkit and pipeline for genetic analyses of admixed populations’ , Bioinformatics (Oxford, England) , 40 ( 4 ). Available at : doi: 10.1093/bioinformatics/btae148 . OpenUrl CrossRef ↵ Huang , M. et al. ( 2021 ) ‘Mitochondrial dysfunction-induced H3K27 hyperacetylation perturbs enhancers in Parkinson’s disease’ , JCI insight , 6 ( 17 ). Available at : doi: 10.1172/jci.insight.138088 . OpenUrl CrossRef ↵ Hughes , A.J. et al. ( 1992 ) ‘Accuracy of clinical diagnosis of idiopathic Parkinson’s disease: a clinico-pathological study of 100 cases’ , Journal of neurology, neurosurgery, and psychiatry , 55 ( 3 ), pp. 181 – 184 . OpenUrl Abstract / FREE Full Text International Parkinson Disease Genomics Consortium et al. ( 2011 ) ‘ Imputation of sequence variants for identification of genetic risks for Parkinson’s disease: a meta-analysis of genome-wide association studies ’, Lancet , 377 ( 9766 ), pp. 641 – 649 . OpenUrl CrossRef PubMed Web of Science ↵ Ishii , A. et al. ( 2022 ) ‘ Tau-binding protein PRMT8 facilitates vacuole degeneration in the brain ’, The journal of biochemistry , 172 ( 4 ), pp. 233 – 243 . OpenUrl PubMed ↵ Joo , W. , Hippenmeyer , S. and Luo , L . ( 2014 ) ‘Neurodevelopment. Dendrite morphogenesis depends on relative levels of NT-3/TrkC signaling’ , Science (New York, N.Y.) , 346 ( 6209 ), pp. 626 – 629 . OpenUrl ↵ Kamienieva , I. et al. ( 2023 ) ‘In search for mitochondrial biomarkers of Parkinson’s disease: Findings in parkin-mutant human fibroblasts’ , Biochimica et biophysica acta. Molecular basis of disease , 1869 ( 7 ), p. 166787 . OpenUrl PubMed ↵ Karikari , T.K. et al. ( 2018 ) ‘ Commentary: Global, regional, and national burden of neurological disorders during 1990–2015: a systematic analysis for the Global Burden of Disease Study 2015 ’, Frontiers in neurology , 9 . Available at : doi: 10.3389/fneur.2018.00201 . OpenUrl CrossRef ↵ Kawashima , N. et al. ( 1997 ) ‘ Differential expression of isoforms of PSD-95 binding protein (GKAP/SAPAP1) during rat brain development ’, FEBS letters , 418 ( 3 ), pp. 301 – 304 . OpenUrl CrossRef PubMed Web of Science ↵ Kim , J.J. et al. ( 2024 ) ‘Multi-ancestry genome-wide association meta-analysis of Parkinson’s disease’ , Nature genetics , 56 ( 1 ), pp. 27 – 36 . OpenUrl CrossRef PubMed ↵ Kraus , D.M. et al. ( 2006 ) ‘ CSMD1 is a novel multiple domain complement-regulatory protein highly expressed in the central nervous system and epithelial tissues ’, The journal of immunology , 176 ( 7 ), pp. 4419 – 4430 . OpenUrl PubMed ↵ Leal , T.P. et al. ( 2025 ) ‘Genotype-phenotype association study conducted on LARGE-PD reveals novel loci associated with Parkinson’s Disease’ , medRxiv , p. 2025.07.18.25331793 . ↵ Leonard , H. et al. ( 2024 ) ‘Global Parkinson’s Genetics Program data release 7’ . Zenodo. Available at : doi: 10.5281/ZENODO.10962119 . OpenUrl CrossRef ↵ Loesch , D.P. et al. ( 2021 ) ‘Characterizing the Genetic Architecture of Parkinson’s Disease in Latinos’ , Annals of neurology , 90 ( 3 ), pp. 353 – 365 . OpenUrl CrossRef PubMed ↵ Mamoor , S . ( 2024 ) ‘Dysregulation of UBE2E3 expression in Parkinson’s Disease’ . Available at : doi: 10.13140/RG.2.2.10262.97603 . OpenUrl CrossRef ↵ Moreno-Grau , S. et al. ( 2021 ) ‘Long runs of homozygosity are associated with Alzheimer’s disease’ , Translational psychiatry , 11 ( 1 ), p. 142 . OpenUrl PubMed ↵ Mountjoy , E. et al. ( 2021 ) ‘ An open approach to systematically prioritize causal variants and genes at all published human GWAS trait-associated loci ’, Nature genetics , 53 ( 11 ), pp. 1527 – 1533 . OpenUrl CrossRef PubMed ↵ Nagase , T. , Kikuno , R. and Ohara , O . ( 2001 ) ‘ Prediction of the coding sequences of unidentified human genes. XXI. The complete sequences of 60 new cDNA clones from brain which code for large proteins ’, DNA research: an international journal for rapid publication of reports on genes and genomes , 8 ( 4 ), pp. 179 – 187 . OpenUrl ↵ Nalls , M.A. et al. ( 2014 ) ‘Large-scale meta-analysis of genome-wide association data identifies six new risk loci for Parkinson’s disease’ , Nature genetics , 46 ( 9 ), pp. 989 – 993 . OpenUrl CrossRef PubMed ↵ Nalls , M.A. et al. ( 2019 ) ‘Identification of novel risk loci, causal insights, and heritable risk for Parkinson’s disease: a meta-analysis of genome-wide association studies’ , Lancet neurology , 18 ( 12 ), pp. 1091 – 1102 . OpenUrl CrossRef PubMed ↵ Norris , E.T. et al. ( 2018 ) ‘ Genetic ancestry, admixture and health determinants in Latin America ’, BMC genomics , 19 ( Suppl 8 ), p. 861 . OpenUrl CrossRef PubMed ↵ Parish , C.L. et al. ( 2008 ) ‘ Wnt5a-treated midbrain neural stem cells improve dopamine cell replacement therapy in parkinsonian mice ’, The journal of clinical investigation , 118 ( 1 ), pp. 149 – 160 . OpenUrl CrossRef PubMed Web of Science ↵ Perez , G. et al. ( 2025 ) ‘ The UCSC Genome Browser database: 2025 update ’, Nucleic acids research , 53 ( D1 ), pp. D1243 – D1249 . OpenUrl CrossRef PubMed ↵ Phung , D.M. et al. ( 2020 ) ‘Meta-analysis of differentially expressed genes in the substantia nigra in Parkinson’s disease supports phenotype-specific transcriptome changes’ , Frontiers in neuroscience , 14 , p. 596105 . OpenUrl PubMed ↵ Pickrell , J.K. et al. ( 2016 ) ‘ Detection and interpretation of shared genetic influences on 42 human traits ’, Nature genetics , 48 ( 7 ), pp. 709 – 717 . OpenUrl CrossRef PubMed ↵ Pihlstrøm , L. et al. ( 2022 ) ‘ Epigenome-wide association study of human frontal cortex identifies differential methylation in Lewy body pathology ’, Nature communications , 13 ( 1 ), p. 4932 . OpenUrl PubMed ↵ Plafker , K.S. et al. ( 2018 ) ‘ Loss of the ubiquitin conjugating enzyme UBE2E3 induces cellular senescence ’, Redox biology , 17 , pp. 411 – 422 . OpenUrl PubMed ↵ Pruim , R.J. et al. ( 2010 ) ‘LocusZoom: regional visualization of genome-wide association scan results’ , Bioinformatics (Oxford, England) , 26 ( 18 ), pp. 2336 – 2337 . OpenUrl CrossRef PubMed Web of Science ↵ Purcell , S. et al. ( 2007 ) ‘ PLINK: a tool set for whole-genome association and population-based linkage analyses ’, American journal of human genetics , 81 ( 3 ), pp. 559 – 575 . OpenUrl CrossRef PubMed ↵ Quik , M. , Campos , C. and Grady , S.R . ( 2013 ) ‘ Multiple CNS nicotinic receptors mediate L-dopa-induced dyskinesias: studies with parkinsonian nicotinic receptor knockout mice ’, Biochemical pharmacology , 86 ( 8 ), pp. 1153 – 1162 . OpenUrl CrossRef PubMed ↵ Ragsdale , A.P. , et al. ( 2023 ) ‘ Publisher Correction: A weakly structured stem for human origins in Africa ’, Nature , 620 ( 7972 ), p. E11. ↵ van Rensburg , Z.J. et al. ( 2022 ) ‘The South African Parkinson’s Disease Study Collection’ , Movement disorders: official journal of the Movement Disorder Society , 37 ( 1 ), pp. 230 – 232 . OpenUrl PubMed ↵ Rizig , M. et al. ( 2023 ) ‘Identification of genetic risk loci and causal insights associated with Parkinson’s disease in African and African admixed populations: a genome-wide association study’ , Lancet neurology , 22 ( 11 ), pp. 1015 – 1025 . OpenUrl CrossRef PubMed ↵ Rodrigo , L.M. and Nyholt , D.R . ( 2021 ) ‘Imputation and reanalysis of ExomeChip data identifies novel, conditional and joint genetic effects on Parkinson’s disease risk’ , Genes , 12 ( 5 ), p. 689 . OpenUrl ↵ Ruiz-Martínez , J. et al. ( 2017 ) ‘ Whole-exome sequencing associates novel CSMD1 gene mutations with familial Parkinson disease ’, Neurology. Genetics , 3 ( 5 ), p. e177 . OpenUrl ↵ Samata , B. et al. ( 2016 ) ‘ Purification of functional human ES and iPSC-derived midbrain dopaminergic progenitors using LRTM1 ’, Nature communications , 7 , p. 13097 . OpenUrl PubMed ↵ Schlebusch , C.M. et al. ( 2012 ) ‘Genomic variation in seven Khoe-San groups reveals adaptation and complex African history’ , Science (New York, N.Y.) , 338 ( 6105 ), pp. 374 – 379 . OpenUrl ↵ Schubert , R. , Andaleon , A. and Wheeler , H.E . ( 2020 ) ‘ Comparing local ancestry inference models in populations of two- and three-way admixture ’, PeerJ , 8 ( e10090 ), p. e10090 . OpenUrl CrossRef PubMed ↵ Shriner , D. et al. ( 2023 ) ‘ Universal genome-wide association studies: Powerful joint ancestry and association testing ’, HGG advances , 4 ( 4 ), p. 100235 . OpenUrl PubMed ↵ Simón-Sánchez , J. et al. ( 2009 ) ‘Genome-wide association study reveals genetic risk underlying Parkinson’s disease’ , Nature genetics , 41 ( 12 ), pp. 1308 – 1312 . OpenUrl CrossRef PubMed Web of Science ↵ Simón-Sánchez , J. et al. ( 2012 ) ‘Cooperative genome-wide analysis shows increased homozygosity in early onset Parkinson’s disease’ , PloS one , 7 ( 3 ), p. e28787 . OpenUrl CrossRef PubMed ↵ Step , K. et al. ( 2024 ) ‘ Exploring the role of underrepresented populations in polygenic risk scores for neurodegenerative disease risk prediction ’, Frontiers in neuroscience , 18 . Available at : doi: 10.3389/fnins.2024.1380860 . OpenUrl CrossRef ↵ Step , K. , Eltaraifee , E. , et al. ( 2025 ) ‘Advancing Parkinson’s disease research in Africa: A strategic training framework of the global Parkinson’s genetics program’ , Movement disorders: official journal of the Movement Disorder Society , 40 ( 1 ), pp. 51 – 56 . OpenUrl PubMed ↵ Step , K. , Hernández , C.F. , et al. ( 2025 ) ‘Genome-wide assessment identifies novel runs of homozygosity linked to Parkinson’s disease etiology across diverse ancestral populations’ , medRxiv. Available at : doi: 10.1101/2025.05.21.25328076 . OpenUrl Abstract / FREE Full Text ↵ Stroud , D.A. et al. ( 2016 ) ‘ Accessory subunits are integral for assembly and function of human mitochondrial complex I ’, Nature , 538 ( 7623 ), pp. 123 – 126 . OpenUrl CrossRef PubMed ↵ Suzuki , T. et al. ( 2000 ) ‘ Molecular cloning of a novel apoptosis-related gene, human Nap1 (NCKAP1), and its possible relation to Alzheimer disease ’, Genomics , 63 ( 2 ), pp. 246 – 254 . OpenUrl CrossRef PubMed ↵ Swart , Y. et al. ( 2021 ) ‘ Local ancestry adjusted Allelic association analysis robustly captures tuberculosis susceptibility loci ’, Frontiers in genetics , 12 , p. 716558 . OpenUrl PubMed ↵ Szpiech , Z.A. et al. ( 2013 ) ‘ Long runs of homozygosity are enriched for deleterious variation ’, American journal of human genetics , 93 ( 1 ), pp. 90 – 102 . OpenUrl CrossRef PubMed ↵ Thami , P.K. and Chimusa , E.R . ( 2019 ) ‘ Population structure and implications on the genetic architecture of HIV-1 phenotypes within Southern Africa ’, Frontiers in genetics , 10 , p. 905 . OpenUrl PubMed ↵ Trevisan , L. et al. ( 2024 ) ‘Genetics in Parkinson’s disease, state-of-the-art and future perspectives’ , British medical bulletin , 149 ( 1 ), pp. 60 – 71 . OpenUrl CrossRef PubMed ↵ Tuson , M. , Marfany , G. and Gonzàlez-Duarte , R . ( 2004 ) ‘ Mutation of CERKL, a novel human ceramide kinase gene, causes autosomal recessive retinitis pigmentosa (RP26) ’, The American Journal of Human Genetics , 74 ( 1 ), pp. 128 – 138 . OpenUrl CrossRef PubMed Web of Science ↵ Wang , B. et al. ( 2020 ) ‘Sporadic Parkinson’s disease potential risk loci identified in Han ancestry of Chinese mainland’ , Frontiers in aging neuroscience , 12 , p. 603793 . OpenUrl PubMed ↵ Watanabe , K. et al. ( 2017 ) ‘ Functional mapping and annotation of genetic associations with FUMA ’, Nature communications , 8 ( 1 ), p. 1826 . OpenUrl PubMed ↵ Zheng , G. and Gastwirth , J.L . ( 2006 ) ‘ On estimation of the variance in Cochran-Armitage trend tests for genetic association using case-control studies ’, Statistics in medicine , 25 ( 18 ), pp. 3150 – 3159 . OpenUrl CrossRef PubMed Web of Science ↵ Zheng , Q. et al. ( 2016 ) ‘ Dysregulation of ubiquitin-proteasome system in neurodegenerative diseases ’, Frontiers in aging neuroscience , 8 , p. 303 . OpenUrl PubMed ↵ Zhou , W. et al. ( 2018 ) ‘ Efficiently controlling for case-control imbalance and sample relatedness in large-scale genetic association studies ’, Nature genetics , 50 ( 9 ), pp. 1335 – 1341 . OpenUrl CrossRef PubMed Bandres-Ciga , S. et al. ( 2024 ) ‘ NeuroBooster Array: A Genome-Wide Genotyping Platform to Study Neurological Disorders Across Diverse Populations ’, Movement disorders: official journal of the Movement Disorder Society. Available at : doi: 10.1002/mds.29902 OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted August 02, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Genome-wide association analyses reveal susceptibility variants linked to Parkinson’s disease in the South African population using inferred global and local ancestry Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Genome-wide association analyses reveal susceptibility variants linked to Parkinson’s disease in the South African population using inferred global and local ancestry Kathryn Step , Thiago Peixoto Leal , Emily Waldo , Lusanda Madula , Yolandi Swart , Carlos F. Hernández , Jonggeol Jeffrey Kim , Sara Bandres-Ciga , Global Parkinson’s Genetics Program (GP2) , Ignacio F. Mata , Soraya Bardien medRxiv 2025.08.01.25331910; doi: https://doi.org/10.1101/2025.08.01.25331910 Share This Article: Copy Citation Tools Genome-wide association analyses reveal susceptibility variants linked to Parkinson’s disease in the South African population using inferred global and local ancestry Kathryn Step , Thiago Peixoto Leal , Emily Waldo , Lusanda Madula , Yolandi Swart , Carlos F. Hernández , Jonggeol Jeffrey Kim , Sara Bandres-Ciga , Global Parkinson’s Genetics Program (GP2) , Ignacio F. Mata , Soraya Bardien medRxiv 2025.08.01.25331910; doi: https://doi.org/10.1101/2025.08.01.25331910 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4538) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (541) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3333) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00d6b7a4da758d3',t:'MTc3OTYzNzQ0Ng=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.