Mixed-effects polygenic risk score Phenome-wide association study detects genetic correlation between colorectal cancer risk and phenotype data extracted from the electronic health record

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 50,391 characters · extracted from preprint-html · click to expand
Mixed-effects polygenic risk score Phenome-wide association study detects genetic correlation between colorectal cancer risk and phenotype data extracted from the electronic health record | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Mixed-effects polygenic risk score Phenome-wide association study detects genetic correlation between colorectal cancer risk and phenotype data extracted from the electronic health record View ORCID Profile Elisabeth A. Rosenthal , Wei-Qi Wei , Yuan Luo , Bahram Namjou-Khales , Daniel J. Schaid , Edward D. Esplin , Michael Lape , Leah Kottyan , Jennifer Allen Pacheco , Chunhua Weng , Adam Samuel Gordon , Iftikhar J. Kullo , David R. Crosslin , William M. Grady , Li Hsu , Ulrike Peters , Gail P. Jarvik doi: https://doi.org/10.1101/2025.02.26.25322864 Elisabeth A. Rosenthal 1 Department of Medicine, Division Medical Genetics, University of Washington Medical Center , Seattle, WA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Elisabeth A. Rosenthal For correspondence: erosen{at}uw.edu Wei-Qi Wei 2 Department of Biomedical Informatics, Vanderbilt University Medical Center , Nashville, TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yuan Luo 3 Feinberg School of Medicine, Northwestern University , Chicago, IL Find this author on Google Scholar Find this author on PubMed Search for this author on this site Bahram Namjou-Khales 4 Center for Autoimmune Genomics and Etiology, Cincinnati Children’s Hospital Medical Center , Cincinnati, OH Find this author on Google Scholar Find this author on PubMed Search for this author on this site Daniel J. Schaid 5 Department of Quantitative Health Sciences, Mayo Clinic , Rochester, MN Find this author on Google Scholar Find this author on PubMed Search for this author on this site Edward D. Esplin 6 Labcorp Genetics , San Francisco, CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Michael Lape 7 Division of Allergy & Immunology, Cincinnati Children’s Hospital Medical Center , Cincinnati, OH Find this author on Google Scholar Find this author on PubMed Search for this author on this site Leah Kottyan 7 Division of Allergy & Immunology, Cincinnati Children’s Hospital Medical Center , Cincinnati, OH Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jennifer Allen Pacheco 8 Center for Genetic Medicine, Feinberg School of Medicine, Northwestern University , Chicago, IL Find this author on Google Scholar Find this author on PubMed Search for this author on this site Chunhua Weng 9 Department of Biomedical Informatics , Columbia, New York, NY Find this author on Google Scholar Find this author on PubMed Search for this author on this site Adam Samuel Gordon 8 Center for Genetic Medicine, Feinberg School of Medicine, Northwestern University , Chicago, IL Find this author on Google Scholar Find this author on PubMed Search for this author on this site Iftikhar J. Kullo 10 Department of Cardiovascular Medicine, Mayo Clinic , Rochester, MN Find this author on Google Scholar Find this author on PubMed Search for this author on this site David R. Crosslin 11 Division of Biomedical Informatics and Genomics, School of Medicine , Tulane, New Orleans, LA Find this author on Google Scholar Find this author on PubMed Search for this author on this site William M. Grady 12 Translational Sciences and Therapeutics Division and Public Health Sciences Division, Fred Hutchinson Cancer Center , Seattle, WA 13 Division of Gastroenterology, University of Washington School of Medicine , Seattle, WA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Li Hsu 14 Public Health Sciences Division, Fred Hutchinson Cancer Center , Seattle, WA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ulrike Peters 14 Public Health Sciences Division, Fred Hutchinson Cancer Center , Seattle, WA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Gail P. Jarvik 1 Department of Medicine, Division Medical Genetics, University of Washington Medical Center , Seattle, WA 15 Department of Genome Sciences, University of Washington Medical Center , Seattle, WA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Many factors, including environmental and genetic variables, contribute to Colorectal Cancer (CRC) risk. Some of these risk factors may share underlying genetics with CRC. We investigated potential shared genetics by performing a Phenome-wide association study (PheWAS) with a multi-ancestry CRC polygenic risk score (PRS). The discovery cohort (N=426,464) consisted of ancestrally diverse participants from the United Kingdom Biobank. The replication cohort (N=87,271) consisted of ancestrally diverse participants from the electronic Medical Records and Genomics Network. We used a mixed-effects model to adjust for the presence of related individuals in both datasets. To preserve power, we limited testing to ancestor phecodes derived from the electronic health record (EHR), which were not likely to be a result of CRC or its treatment. We discovered and replicated associations between the CRC PRS and breast cancer, prostate cancer, obesity, smoking and alcohol use (discovery p< 1.1e-4; replication p<0.0019). As these results corroborate findings from other studies using orthogonal methods, we demonstrate that a CRC PRS can be used as a proxy for genetic risk for CRC when investigating shared genetics between CRC and other phenotypes. Further study of the relationship between PRS from multiple traits with EHR data may reveal additional shared genetic factors. Introduction Colorectal cancer (CRC) is the second most deadly cancer in the United States, with an average mortality of 13%, and increasing incidence among younger individuals ( Siegel et al. 2023 ). Risk of CRC is complex and known to be associated with both environmental and genetic risk factors. Associated environmental factors for CRC risk include smoking, diet, alcohol intake, and physical activity, as well as the comorbidities obesity and diabetes ( Sawicki et al. 2021 ; Jiang et al. 2011 ; Pearson-Stuttard et al. 2021 ; Kyrgiou et al. 2017 ). Genetic heritability of CRC is estimated to be between 12 and 40% ( Jiao et al. 2014 ; Graff et al. 2017). Several genes are known to underlie high monogenic risk, but these account for only ~20% of familial cancers ( Lowery et al. 2016 ; Patel and Ahnen 2012 ; Rosenthal et al. 2018 ). Polygenic risk scores (PRSs), which aggregate the effects of multiple small effects across the genome, may capture some of this missing heritability ( Hatchell et al. 2019 ; Manolio et al. 2009 ). The heritability explained by common SNPs for CRC has been estimated to be between 7 and 11% accounting for ~73% of familial risk. ( Fernandez-Rozadilla et al. 2023 ; Zhang et al. 2020 ; Jiao et al. 2014 ). In addition, PRS have been shown to predict CRC risk, independently of family history, indicating that they may also explain some portion of non-familial CRC ( Mars et al. 2022 ; Tada et al. 2016 ; Archambault et al. 2020 )). Moreover, CRC PRS have been shown to be more predictive for younger adults than older adults, indicating that they may be helpful in identifying individuals who would benefit from increased or earlier screening ( Archambault et al. 2020 ). Mendelian randomization (MR) and cross-trait linkage disequilibrium (LD) score regression provide evidence of shared genetics underlying risk for CRC and other traits. MR is a technique that can determine if risk factors are merely associated with an outcome of interest, or if they are causal for that outcome by using underlying genetics of the risk factor as instrumental variables ( Burgess and Thompson 2013 ; Burgess, Dudbridge, and Thompson 2016 ). For example, although it is well known that body mass index (BMI) is positively associated with CRC risk, MR studies indicate that genetic predictors of BMI may be causally associated with CRC risk ( Renehan et al. 2008 ; Kyrgiou et al. 2017 ; Bouras et al. 2024 ). Similarly, MR has been used to show a genetic correlation between early-onset CRC and alcohol consumption ( Laskar et al. 2024 ). Traditional MR studies focus on a single gene, as increasing the number of SNPs involved can lead to an increased chance of violating instrumental variable assumptions, reducing the reliability of MR. Additionally, pleiotropy influencing multiple biological pathways (i.e., non-horizontal pleiotropy) violates the instrumental variable assumptions and can lead to misinterpretations of causal relationships ( Burgess et al. 2017 ; Bowden et al. 2017 ). Alternatively, cross-trait LD score regression measures genetic correlation genome-wide between two traits ( Lee et al. 2012 ; Bulik-Sullivan et al. 2015 ). For example, CRC has been shown to be genetically correlated with breast, lung, and esophageal cancers in individuals of European ancestry ( Lindström et al. 2023 ). Similarly, CRC has been shown to be genetically correlated with fasting insulin, BMI, and smoking using LD-score regression ( Fernandez-Rozadilla et al. 2023 ). Here, we investigate the genetic correlation underlying CRC risk and other phenotypes by performing PheWAS with a CRC PRS ( Carroll, Bastarache, and Denny 2014 ). The CRC PRS used here has been developed in individuals of European and Asian ancestry, and validated in multiple datasets ( Thomas et al. 2023 ). We used participant data from the United Kingdom Biobank (UKBB) as a discovery cohort and participant data from the electronic Medical Records and Genomics (eMERGE) study as a replication cohort. As biobanks often include related individuals, we used a mixed model analysis, accounting for possible phenotype correlation among related individuals, allowing efficient use of all data. We corroborate the shared genetic relationship between CRC and multiple traits, such as breast cancer, obesity, smoking and alcohol use disorder. We also find evidence for potential shared genetics underlying CRC risk and prostate cancer. Methods Participants The discovery cohort was derived from the UKBB ( Bycroft et al. 2018 ), extracted on September 18, 2023. We included participants for whom electronic health record (EHR) data was available and for whom the PRS could be calculated. In addition to the EHR data, we extracted cancer registry and death registry data. We then excluded any participants who developed cancer before age 18, resulting in a sample size of N=426,464 (N female = 232,979, N male = 193,485) ( Table 1 ). View this table: View inline View popup Download powerpoint Table 1: Percent female (%F), Age (mean and range), and total sample count by cohort. The replication cohort is also broken down by site and sorted by the number of adult participants at each site. UKBB=United Kingdom Biobank; eMERGE=electronic Medical Records and Genomics Network; CHOP=Children’s Hospital of Philadelphia; CCHMC= Cincinnati Children ‘s Hospital Medical Center; UW=University of Washington. The replication cohort was derived from adult participants in the eMERGE phase 3 study (N = 87,271; N female = 48,737, N male = 38,534), as described previously ( Stanaway et al. 2019 )( Table 1 ). The eMERGE phase 3 network consisted of 12 sites across the continental United States. Data for these participants were extracted from the EHR through January 2022. The 12 sites in the eMERGE cohort included the following clinical site types: hospital or primary care, adult or pediatric, and specialty clinics. Genotype data and PRS calculation Methods for genotype data collection for both UKBB and eMERGE participants have been published previously ( Stanaway et al. 2019 ; Bycroft et al. 2018 ). SNP data was extracted from the imputed genotypes in UKBB (Resource 530; Category 100319) and eMERGE. The genotype data in eMERGE was harmonized and combined over several genotype arrays and then imputed genome-wide. We used the principal components of ancestry (UKBB Data-Field 22009) and kinship estimates (UKBB Data-Field 22021) provided by the UKBB. We calculated principal components of ancestry and estimated kinship in eMERGE, adjusting for recent family history, using the R package GENESIS v2.20.1 ( Gogarten et al. 2019 ; R Core Team 2024 ) (see Supplemental Material ). The allele effects for each SNP in the PRS were obtained from ( Thomas et al. 2023 ) and can also be found in the PGS catalog (PGS003852). The PRS was calculated using the R package bigsnpr v1.10.8 ( Privé et al. 2018 ), which accounts for potential strand reversal and allele mismatches. PheWAS Discovery We transformed the international Classification of Diseases (ICD) 9/10 data from the medical records to phecodes using the R package PheWAS v0.99.6-1 with phecode map v1.2024 ( Carroll, Bastarache, and Denny 2014 ; Wei et al. 2017 ; Wu et al. 2019 ). We defined the medical record to be the compilation of data from the EHR, death records (UKBB participants only), and cancer registries. In the situation where the death record contained more than one ICD code, we kept only the ICD code for the primary cause of death. For each phecode, a participant was assigned case status if that phecode was observed at least twice and was assigned control status if that phecode was never observed. Participants with exactly one observation of the phecode were excluded from analysis for that phecode. Therefore, the sample size varies by phecode. The phecode system is hierarchical, similar to the ICD system, so that ancestor codes define a broad phenotype, and child codes define increasingly specific phenotypes related to the ancestor. Therefore, a participant who is a case for a child code would also be a case for all of its ancestor codes. Due to this hierarchical nature, the number of cases is always highest for the ancestor codes relative to the child codes. We performed a mixed model PheWAS, which accounts for kinship among the participants. We used the mixed model analysis from the R package GENESIS v2.32 to evaluate the association between the PRS and case/control status for each phecode, adjusting for sex, age, and the first four principal components of ancestry ( Gogarten et al. 2019 ). Case/control status were encoded as factors (1, 0 respectively) and stored in VCF format in order to use the functionality of the R package GENESIS. Age was defined as the oldest age recorded in the medical record or age at death, when available. We excluded males from the analysis of breast cancer as it is rare in males, and we excluded females in the analysis of prostate cancer. As many phecodes are correlated (e.g., ancestor phecodes and their child phecodes), using a Bonferonni p-value correction adjusting for all phecodes would be too conservative. Therefore, we focused our analysis on the ancestor codes (N = 510). As the PRS was derived with UKBB data as part of the training data, we expect significance for CRC ancestor phecodes (153, 565) and do not count them toward the number of tests. In addition, we removed all phecodes from the discovery analysis which could be a consequence of CRC or its treatment (N = 53) (e.g., Chemotherapy, Ileostomy status) (see Supplemental Table 1). Therefore the p-value cutoff used in the initial association was 0.05/455=1.1e-4. Replication We counted the number of significant tests from the discovery analysis as the total number of tests for the replication analysis. Therefore, the p-value cutoff was determined as 0.05/N, where N is the number of significant ancestor phecodes, from UKBB, that are not likely to be a consequence of CRC or its treatment. We performed a mixed model PheWAS adjusting for age, sex, site, and the first four principal components of ancestry. We included site because ICD code usage varied across the twelve sites, depending on the type of clinic (hospital vs. primary care; adult vs. pediatric; general vs. specialty). As a supplementary analysis, we performed a mixed model PheWAS in the UKBB cohort, excluding participants who were coded as cases for CRC (phecode 153) for all phecodes that replicated in the eMERGE cohort. The purpose of this analysis was to detect possible bias in effect size estimates due to the presence of CRC cases in the initial discovery (see supplemental Material). Results Demographics Females represented 55% and 56% of the participants in UKBB and eMERGE, respectively ( Table 1 ). The average age was slightly higher in UKBB (65 ± 10.5 years) than eMERGE (63 ± 18 years). On average, in UKBB and eMERGE, females were two and four years younger than males, respectively. Additionally, the age distribution varied across sites in eMERGE. Younger participants were enrolled at Boston Children’s and Children’s Hospital of Philadelphia (CHOP), but the total sample size of adults (age >= 18) at these sites made up only < 1% of the total analyzed here. In eMERGE, the largest portion of participants (35%) were from Harvard, followed by Vanderbilt (25%). The majority of participants identified as White in both UKBB (94%) and eMERGE (80%). In UKBB 2.2% self-identified as Asian, 0.6% self-identified as Black, 2.3% self-identified as Other or Mixed ancestry, and 0.5% did not indicate a race. Similar to age, the self-identified race varied across eMERGE sites, with a low of 11% self-identifying as White at Mt. Sinai and up to 99.6% self-identifying as White at Geisinger. Overall, 8.1% self-identified as Black, with a range between 0.3% (Geisinger) and 65% (Mt. Sinai). Overall, 1% self-identified as Asian, with a range between 0% (Mt. Sinai) and 3.9% (Columbia). Only 0.1% self-identified as Native American or Pacific Islander, with the largest proportion (0.8%) at Kaiser/University of Washington (UW). Self-identified race was missing for 11% overall in eMERGE. Four percent of participants in eMERGE identified as Hispanic, with the majority at Harvard (1.8%) and Mt. Sinai (1.5%). There was no mechanism at eMERGE to self-identify as multiple races or ethnicities. However, there is genotypic variation and admixture among the participants in both cohorts ( Figure 1 ). Download figure Open in new tab Figure 1: The first two principal components (PC) of ancestry in the A) Discovery and B) Replication cohorts. In both plots, the three points of the triangle, clockwise from the left-most point, represent European genetic ancestry, African genetic ancestry, and East Asian genetic ancestry. PRS The overall mean and standard deviation (s.d.) of the PRS is 0.34 ±0.47 in UKBB and 0.61 ±0.47 in eMERGE. After adjusting for the first four PCs of ancestry, the mean was zero for both cohorts. The distribution of the ancestry adjusted PRS is not significantly different from a standard Normal distribution in both cohorts using a Kolmogorov-Smirnov test (p > 0.17), as expected. Discovery and Replication The PRS was highly significantly associated with CRC in UKBB (p<2.2e-308), as expected ( Supplemental Table 1). In addition, the PRS was associated with 22 ancestor phecodes that are likely a consequence of CRC or its treatment, such as Secondary malignant neoplasm and Chemotherapy ( Supplemental Table 1). Excluding these phecodes, the PRS was associated with 26 ancestor phecodes at an alpha level of 1.1e-4 in UKBB ( Table 2 ). View this table: View inline View popup Download powerpoint Table 2: Effect estimates (β) and p-values (p) for phecodes that are significantly associated with the CRC PRS in the discovery cohort (p< 1.1e-4) and the replication cohort (p<0.0019). The PRS was significantly associated with CRC in eMERGE (p=7.8e-65), as expected. Given that there were 26 significant ancestor phecodes that are not necessarily a consequence of CRC or its treatment in the UKBB data, we set the p-value cutoff to 0.05/26= 0.0019 for the replication. Ten of the ancestor phecodes replicated in eMERGE ( Table 2 ): Septicemia; Family history; Breast Cancer; Acquired absence of breast; Cancer of prostate; Benign neoplasm of colon; Benign neoplasm of other parts of digestive system; Overweight, obesity and other hyperalimentation; Alcohol-related disorders; and Tobacco use disorder. Additionally, the direction of effect is the same across cohorts and the estimated effect sizes are similar across the cohorts. However, when we removed the CRC cases from the discovery UKBB cohort, the effect estimates for Septicemia and Acquired absence of breast reduced by approximately half (See Supplemental Material). Discussion We provide evidence for genetic correlation between CRC and several traits, using the CRC PRS as a genetic score for CRC risk. The estimated effect size in the discovery and replication cohorts for these phecodes was similar across cohorts, indicating that the overall genetic correlation structure may be similar across the cohorts. Among non-CRC related cancers, only breast cancer in females and prostate cancer in males were found to be associated with the CRC PRS. Additionally, benign neoplasms of the colon and of other parts of the digestive system, which can become cancerous if not removed, were associated with the PRS. Unsurprisingly, we detected correlation between the PRS and Family History. Family History is defined as family history for any disorder, including neoplasms and cancer. It is likely that patients with CRC, prostate cancer, or breast cancer are asked for their family history in the clinic, which would explain this association. We also detected an association between the CRC PRS and modifiable environmental phecodes believed to be risk factors for CRC. These include obesity, smoking, and alcohol use. Although we replicated the association with obesity, we did not replicate the association with diabetes as the p-value for diabetes (0.003) in the eMERGE replication cohort was slightly higher than the cutoff for replication (1.9e-3). We did not observe bias in the estimated effect sizes for these phecodes due to the presence of the CRC cases, indicating that the relationship between these behavioral risk factors may share underlying genetics with CRC risk. Conversely, although we detected an association with septicemia, there is evidence that the presence of CRC cases in the initial UKBB cohort may be biasing the estimated effect as the exclusion of these cases resulted in a change in effect size by half. Our results replicate genetic correlations with CRC that were found in other studies using orthogonal methods. These include correlations with breast cancer, obesity, alcohol use, and smoking ( Renehan et al. 2008 ; Kyrgiou et al. 2017 ; Bouras et al. 2024 ; Lindström et al. 2023 ; Fernandez-Rozadilla et al. 2023 ; Laskar et al. 2024 ). This indicates that using PRS to test for genetic correlations may be a complementary method to explore the joint genetic architecture of multiple traits, including traits that are thought to be behavioral, but have evidence of genetic causes ( Polderman et al. 2015 ). We did not replicate the genetic correlation between CRC and lung (cancer within the respiratory system) or esophageal cancers that were previously reported ( Lindström et al. 2023 ). It is possible that these were false positives, or that using a PRS is not as sensitive as cross-trait LD score regression, or there is lower power in the eMERGE dataset to detect an effect. We also detected association between CRC risk and prostate cancer, which has not been detected by the other methods, to our knowledge. Interpretation of these results is limited by a few factors. First, the phecodes are built from ICD9/10 billing codes. Billing codes do not necessarily indicate the presence of a phenotype but rather that a clinical encounter related to that phenotype occurred ( Wei et al. 2016 ). To reduce noise due to this issue, we considered participants as cases if they had the same phecode twice on separate clinical visits and as controls if there was no occurrence of that phecode. In addition, we used cancer registry data, which further improved the case definition for cancers. Moreover, we focused on the ancestor codes, which encompass a broader phenotype definition and therefore may be more coherent. For example, both diabetes I and II billing codes were observed for the same individuals (data not shown). Rather than focus on the descendant diabetes codes, we looked at the association between the CRC PRS and diabetes overall. Furthermore, billing code use is not consistent across clinics, which can result in a loss of power or increase type I error ( Sulieman et al. 2022 ; Pendergrass and Crawford 2019 ). We observed this phenomenon in our data for vision phenotypes, necessitating adjustment by site (data not shown). Finally, behavioral conditions are not reliably recorded in EHR as billing codes ( Pendergrass and Crawford 2019 ). Therefore, we may be undercounting the number of participants who smoke or use alcohol. However, smoking and alcohol behavior are typically assessed in the clinic ( Adler and Stead 2015 ). As we observe an association between the PRS and both smoking and alcohol use in both cohorts, we believe this provides evidence for further investigation of a potential genetic correlation between CRC and smoking and alcohol consumption behaviors. Another limitation is that the PRS was developed including data from the UKBB. Therefore, the PRS may be confounded with some phenotypes and behaviors within the UKBB. However, participants from the UKBB made up only 13.5% of the training data (4.8% of the CRC cases), reducing potential confounding ( Thomas et al. 2023 ; Fernandez-Rozadilla et al. 2023 ). Similarly, participants from eMERGE were used to validate the PRS (27% of the total validation cohort). However, as part of the validation dataset, no adjustments were made to the PRS model. Therefore, it is unlikely that unintentional confounding between the PRS and phenotypes of interest are present due to the eMERGE participants. Further studies assessing PheWAS for other trait PRS may provide evidence in support or against the correlations observed here. For example, PRSs exist for obesity and several cancers ( Lennon et al. 2024 ). A multiple PRS PheWAS may reveal shared genetics across traits. Correlation of shared loci across trait specific PRS may also reveal specific regions of the genome that exhibit pleiotropy for those traits, and potentially improve the localization of causal variants within these loci. Furthermore, genetics of behavioral traits could be explored with the use of multiple PRS, and possibly lead to detangling the environmental contribution of these behavioral traits on phenotypes of interest from the shared underlying genetics. Further methods development will be required to achieve this goal. We have demonstrated that PheWAS, using a CRC PRS as a genetic score, can aid in detecting genetic correlation between CRC risk and other traits recorded as ICD codes in the medical record. This provides an orthogonal method to MR and LD-score regression. MR assumes all the SNPs have effect sizes in the same direction and that any direct pleiotropic effects of the SNPs on the outcome are distributed independently of the genetic associations with the risk factors. Neither of these assumptions are necessary for PheWAS with a PRS. LD-score regression typically uses summary statistics and requires harmonization of phenotypes and statistics across datasets. PheWAS with a PRS allows for the use of individual-level data that can be found in large biobanks as well as local clinics. Further analysis with other PRS is necessary to confirm the utility of PRS PheWAS as a reliable orthogonal method to detect genetic correlation between other traits. Web Resources Phecodes: http://www.phecode.org Statements and Declarations Competing Interests The authors have no relevant financial or non-financial interests to disclose. Author Contributions All authors contributed to writing and editing this manuscript. Data analysis and the first draft were done by Elisabeth A Rosenthal. All authors commented on previous versions of the manuscript. Study conception and design were by EAR, DRC, UP, and GPJ. Data curation was done by W-QW, YL, BN-K, ML, LK, JAP, DRC, and GPJ. Funding, resources and supervision were provided by DRC and GPJ. All authors read and approved the final manuscript. Data Availability eMERGE stage 3 phenotype and genotype data are available in dbGAP under accession number phs001584.v2.p2. UKBB data is available at the UK Biobank Resource. Ethics approval Ethics approval for the eMERGE stage 3 study was provided by each participating institution’s institutional review board. Consent to participate Informed consent was obtained from all individual participants included in the study. Acknowledgments This research has been conducted using the UK Biobank Resource under Application Number 47377. This work uses data provided by patients and collected by the NHS as part of their care and support. eMERGE Network (Phase III): This phase of the eMERGE Network was initiated and funded by the NHGRI through the following grants: U01HG008657 (Group Health Cooperative/University of Washington); U01HG008685 (Brigham and Women’s Hospital); U01HG008672 (Vanderbilt University Medical Center); U01HG008666 (Cincinnati Children’s Hospital Medical Center); U01HG006379 (Mayo Clinic); U01HG008679 (Geisinger Clinic); U01HG008680 (Columbia University Health Sciences); U01HG008684 (Children’s Hospital of Philadelphia); U01HG008673 (Northwestern University); U01HG008701 (Vanderbilt University Medical Center serving as the Coordinating Center); U01HG008676 (Partners Healthcare/Broad Institute); U01HG008664 (Baylor College of Medicine); and U54MD007593 (Meharry Medical College). References ↵ Adler , Nancy E. , and William W. Stead . 2015 . “ Patients in Context--EHR Capture of Social and Behavioral Determinants of Health .” The New England Journal of Medicine 372 ( 8 ): 698 – 701 . OpenUrl CrossRef PubMed ↵ Archambault , Alexi N. , Yu-Ru Su , Jihyoun Jeon , Minta Thomas , Yi Lin , David V. Conti , Aung Ko Win , et al. 2020 . “ Cumulative Burden of Colorectal Cancer-Associated Genetic Variants Is More Strongly Associated With Early-Onset vs Late-Onset Cancer .” Gastroenterology 158 ( 5 ): 1274 – 86 .e12. OpenUrl CrossRef PubMed ↵ Bouras , Emmanouil , Dipender Gill , Verena Zuber , Neil Murphy , Niki Dimou , Krasimira Aleksandrova , Sarah J. Lewis , et al. 2024 . “ Identification of Potential Mediators of the Relationship between Body Mass Index and Colorectal Cancer: A Mendelian Randomization Analysis .” International Journal of Epidemiology 53 ( 3 ). doi: 10.1093/ije/dyae067 . OpenUrl CrossRef ↵ Bowden , Jack , Fabiola Del Greco M , Cosetta Minelli , George Davey Smith , Nuala Sheehan , and John Thompson . 2017 . “ A Framework for the Investigation of Pleiotropy in Two-Sample Summary Data Mendelian Randomization .” Statistics in Medicine 36 ( 11 ): 1783 – 1802 . OpenUrl CrossRef PubMed ↵ Bulik-Sullivan , Brendan , Hilary K. Finucane , Verneri Anttila , Alexander Gusev , Felix R. Day , Po-Ru Loh , ReproGen Consortium , et al. 2015 . “ An Atlas of Genetic Correlations across Human Diseases and Traits .” Nature Genetics 47 ( 11 ): 1236 – 41 . OpenUrl CrossRef PubMed ↵ Burgess , Stephen , Jack Bowden , Tove Fall , Erik Ingelsson , and Simon G. Thompson . 2017 . “ Sensitivity Analyses for Robust Causal Inference from Mendelian Randomization Analyses with Multiple Genetic Variants .” Epidemiology 28 ( 1 ): 30 – 42 . OpenUrl CrossRef PubMed ↵ Burgess , Stephen , Frank Dudbridge , and Simon G. Thompson . 2016 . “ Combining Information on Multiple Instrumental Variables in Mendelian Randomization: Comparison of Allele Score and Summarized Data Methods .” Statistics in Medicine 35 ( 11 ): 1880 – 1906 . OpenUrl CrossRef PubMed ↵ Burgess , Stephen , and Simon G. Thompson . 2013 . “ Use of Allele Scores as Instrumental Variables for Mendelian Randomization .” International Journal of Epidemiology 42 ( 4 ): 1134 – 44 . OpenUrl CrossRef PubMed Web of Science ↵ Bycroft , Clare , Colin Freeman , Desislava Petkova , Gavin Band , Lloyd T. Elliott , Kevin Sharp , Allan Motyer , et al. 2018 . “ The UK Biobank Resource with Deep Phenotyping and Genomic Data .” Nature 562 ( 7726 ): 203 – 9 . OpenUrl CrossRef PubMed ↵ Carroll , Robert J. , Lisa Bastarache , and Joshua C. Denny . 2014 . “ R PheWAS: Data Analysis and Plotting Tools for Phenome-Wide Association Studies in the R Environment .” Bioinformatics 30 ( 16 ): 2375 – 76 . OpenUrl CrossRef PubMed Web of Science ↵ Fernandez-Rozadilla , Ceres , Maria Timofeeva , Zhishan Chen , Philip Law , Minta Thomas , Stephanie Schmit , Virginia Díez-Obrero , et al. 2023 . “ Deciphering Colorectal Cancer Genetics through Multi-Omic Analysis of 100,204 Cases and 154,587 Controls of European and East Asian Ancestries .” Nature Genetics 55 ( 1 ): 89 – 99 . OpenUrl CrossRef PubMed ↵ Gogarten , Stephanie M. , Tamar Sofer , Han Chen , Chaoyu Yu , Jennifer A. Brody , Timothy A. Thornton , Kenneth M. Rice , and Matthew P. Conomos . 2019 . “ Genetic Association Testing Using the GENESIS R/Bioconductor Package .” Bioinformatics 35 ( 24 ). doi: 10.1093/bioinformatics/btz567 . OpenUrl CrossRef PubMed ↵ Hatchell , Kathryn E. , Qiongshi Lu , Scott J. Hebbring , Erin D. Michos , Alexis C. Wood , and Corinne D. Engelman . 2019 . “ Ancestry-Specific Polygenic Scores and SNP Heritability of 25(OH)D in African- and European-Ancestry Populations .” Human Genetics 138 ( 10 ): 1155 – 69 . OpenUrl PubMed ↵ Jiang , Ying , Qiwen Ben , Hong Shen , Weiqi Lu , Yong Zhang , and Jun Zhu . 2011 . “ Diabetes Mellitus and Incidence and Mortality of Colorectal Cancer: A Systematic Review and Meta-Analysis of Cohort Studies .” European Journal of Epidemiology 26 ( 11 ): 863 – 76 . OpenUrl CrossRef PubMed ↵ Jiao , Shuo , Ulrike Peters , Sonja Berndt , Hermann Brenner , Katja Butterbach , Bette J. Caan , Christopher S. Carlson , et al. 2014 . “ Estimating the Heritability of Colorectal Cancer .” Human Molecular Genetics 23 ( 14 ): 3898 – 3905 . OpenUrl CrossRef PubMed ↵ Kyrgiou , Maria , Ilkka Kalliala , Georgios Markozannes , Marc J. Gunter , Evangelos Paraskevaidis , Hani Gabra , Pierre Martin-Hirsch , and Konstantinos K. Tsilidis . 2017 . “ Adiposity and Cancer at Major Anatomical Sites: Umbrella Review of the Literature .” BMJ 356 ( February ): j477 . OpenUrl Abstract / FREE Full Text ↵ Laskar , R. S. , C. Qu , J. R. Huyghe , T. Harrison , R. B. Hayes , Y. Cao , P. T. Campbell , et al. 2024 . “ Genome-Wide Association Studies and Mendelian Randomization Analyses Provide Insights into the Causes of Early-Onset Colorectal Cancer .” Annals of Oncology 35 ( 6 ): 523 – 36 . OpenUrl PubMed ↵ Lee , S. H. , J. Yang , M. E. Goddard , P. M. Visscher , and N. R. Wray . 2012 . “ Estimation of Pleiotropy between Complex Diseases Using Single-Nucleotide Polymorphism-Derived Genomic Relationships and Restricted Maximum Likelihood .” Bioinformatics 28 ( 19 ): 2540 – 42 . OpenUrl CrossRef PubMed Web of Science ↵ Lennon , Niall J. , Leah C. Kottyan , Christopher Kachulis , Noura S. Abul-Husn , Josh Arias , Gillian Belbin , Jennifer E. Below , et al. 2024 . “ Selection, Optimization and Validation of Ten Chronic Disease Polygenic Risk Scores for Clinical Implementation in Diverse US Populations .” Nature Medicine 30 ( 2 ): 480 – 87 . OpenUrl CrossRef PubMed ↵ Lindström , Sara , Lu Wang , Helian Feng , Arunabha Majumdar , Sijia Huo , James Macdonald , Tabitha Harrison , et al. 2023 . “ Genome-Wide Analyses Characterize Shared Heritability among Cancers and Identify Novel Cancer Susceptibility Regions .” Journal of the National Cancer Institute 115 ( 6 ): 712 – 32 . OpenUrl CrossRef PubMed ↵ Lowery , Jan T. , Dennis J. Ahnen , Paul C. Schroy 3rd . , Heather Hampel , Nancy Baxter , C. Richard Boland , Randall W. Burt , et al. 2016 . “ Understanding the Contribution of Family History to Colorectal Cancer Risk and Its Clinical Implications: A State-of-the-Science Review .” Cancer 122 ( 17 ): 2633 – 45 . OpenUrl CrossRef PubMed ↵ Manolio , T. A. , F. S. Collins , N. J. Cox , D. B. Goldstein , L. A. Hindorff , D. J. Hunter , M. I. McCarthy , et al. 2009 . “ Finding the Missing Heritability of Complex Diseases .” Nature 461 ( 7265 ): 747 – 53 . OpenUrl CrossRef PubMed Web of Science ↵ Mars , Nina , Joni V. Lindbohm , Pietro Della Briotta Parolo , Elisabeth Widén , Jaakko Kaprio , Aarno Palotie , FinnGen , and Samuli Ripatti . 2022 . “ Systematic Comparison of Family History and Polygenic Risk across 24 Common Diseases .” American Journal of Human Genetics 109 ( 12 ): 2152 – 62 . OpenUrl PubMed ↵ Patel , Swati G. , and Dennis J. Ahnen . 2012 . “ Familial Colon Cancer Syndromes: An Update of a Rapidly Evolving Field .” Current Gastroenterology Reports 14 ( 5 ): 428 – 38 . OpenUrl CrossRef PubMed ↵ Pearson-Stuttard , Jonathan , Nikos Papadimitriou , Georgios Markozannes , Sofia Cividini , Artemisia Kakourou , Dipender Gill , Evangelos C. Rizos , et al. 2021 . “ Type 2 Diabetes and Cancer: An Umbrella Review of Observational and Mendelian Randomization Studies .” Cancer Epidemiology, Biomarkers & Prevention: A Publication of the American Association for Cancer Research, Cosponsored by the American Society of Preventive Oncology 30 ( 6 ): 1218 – 28 . OpenUrl PubMed ↵ Pendergrass , Sarah A. , and Dana C. Crawford . 2019 . “ Using Electronic Health Records To Generate Phenotypes For Research .” Current Protocols in Human Genetics / Editorial Board, Jonathan L. Haines [et Al.] 100 ( 1 ): e80 . OpenUrl ↵ Polderman , Tinca J. C. , Beben Benyamin , Christiaan A. de Leeuw , Patrick F. Sullivan , Arjen van Bochoven , Peter M. Visscher , and Danielle Posthuma . 2015 . “ Meta-Analysis of the Heritability of Human Traits Based on Fifty Years of Twin Studies .” Nature Genetics 47 ( 7 ): 702 – 9 . OpenUrl CrossRef PubMed ↵ Privé , Florian , Hugues Aschard , Andrey Ziyatdinov , and Michael G. B. Blum . 2018 . “ Efficient Analysis of Large-Scale Genome-Wide Data with Two R Packages: Bigstatsr and Bigsnpr .” Bioinformatics 34 ( 16 ): 2781 – 87 . OpenUrl CrossRef PubMed ↵ R Core Team . 2024 . R: A Language and Environment for Statistical Computing . Vienna, Austria: R Foundation for Statistical Computing . https://www.R-project.org/ . Versions 4.3.2 and 4.4.0. ↵ Renehan , Andrew G. , Margaret Tyson , Matthias Egger , Richard F. Heller , and Marcel Zwahlen . 2008 . “ Body-Mass Index and Incidence of Cancer: A Systematic Review and Meta-Analysis of Prospective Observational Studies .” The Lancet 371 ( 9612 ): 569 – 78 . OpenUrl ↵ Rosenthal , Elisabeth A. , Brian H. Shirts , Laura M. Amendola , Martha Horike-Pyne , Peggy D. Robertson , Fuki M. Hisama , Robin L. Bennett , et al. 2018 . “ Rare Loss of Function Variants in Candidate Genes and Risk of Colorectal Cancer .” Human Genetics 137 ( 10 ): 795 – 806 . OpenUrl PubMed ↵ Sawicki , Tomasz , Monika Ruszkowska , Anna Danielewicz , Ewa Niedźwiedzka , Tomasz Arłukowicz , and Katarzyna E. Przybyłowicz . 2021 . “ A Review of Colorectal Cancer in Terms of Epidemiology, Risk Factors, Development, Symptoms and Diagnosis .” Cancers 13 ( 9 ). doi: 10.3390/cancers13092025 . OpenUrl CrossRef PubMed ↵ Siegel , Rebecca L. , Nikita Sandeep Wagle , Andrea Cercek , Robert A. Smith , and Ahmedin Jemal . 2023 . “ Colorectal Cancer Statistics, 2023 .” CA: A Cancer Journal for Clinicians 73 ( 3 ): 233 – 54 . OpenUrl CrossRef PubMed ↵ Stanaway , Ian B. , Taryn O. Hall , Elisabeth A. Rosenthal , Melody Palmer , Vivek Naranbhai , Rachel Knevel , Bahram Namjou-Khales , et al. 2019 . “ The eMERGE Genotype Set of 83,717 Subjects Imputed to ~40 Million Variants Genome Wide and Association with the Herpes Zoster Medical Record Phenotype .” Genetic Epidemiology 43 ( 1 ): 63 – 81 . OpenUrl CrossRef PubMed ↵ Sulieman , Lina , Robert M. Cronin , Robert J. Carroll , Karthik Natarajan , Kayla Marginean , Brandy Mapes , Dan Roden , Paul Harris , and Andrea Ramirez . 2022 . “ Comparing Medical History Data Derived from Electronic Health Records and Survey Answers in the All of Us Research Program .” Journal of the American Medical Informatics Association: JAMIA 29 ( 7 ): 1131 – 41 . OpenUrl PubMed ↵ Tada , Hayato , Olle Melander , Judy Z. Louie , Joseph J. Catanese , Charles M. Rowland , James J. Devlin , Sekar Kathiresan , and Dov Shiffman . 2016 . “ Risk Prediction by Genetic Risk Scores for Coronary Heart Disease Is Independent of Self-Reported Family History .” European Heart Journal 37 ( 6 ): 561 – 67 . OpenUrl CrossRef PubMed ↵ Thomas , Minta , Yu-Ru Su , Elisabeth A. Rosenthal , Lori C. Sakoda , Stephanie L. Schmit , Maria N. Timofeeva , Zhishan Chen , et al. 2023 . “ Combining Asian and European Genome-Wide Association Studies of Colorectal Cancer Improves Risk Prediction across Racial and Ethnic Populations .” Nature Communications 14 ( 1 ): 6147 . OpenUrl PubMed ↵ Wei , Wei-Qi , Lisa A. Bastarache , Robert J. Carroll , Joy E. Marlo , Travis J. Osterman , Eric R. Gamazon , Nancy J. Cox , Dan M. Roden , and Joshua C. Denny . 2017 . “ Evaluating Phecodes, Clinical Classification Software, and ICD-9-CM Codes for Phenome-Wide Association Studies in the Electronic Health Record .” PloS One 12 ( 7 ): e0175508 . OpenUrl CrossRef PubMed ↵ Wei , Wei-Qi , Pedro L. Teixeira , Huan Mo , Robert M. Cronin , Jeremy L. Warner , and Joshua C. Denny . 2016 . “ Combining Billing Codes, Clinical Notes, and Medications from Electronic Health Records Provides Superior Phenotyping Performance .” Journal of the American Medical Informatics Association: JAMIA 23 ( e1 ): e20 – 27 . OpenUrl CrossRef PubMed ↵ Wu , Patrick , Aliya Gifford , Xiangrui Meng , Xue Li , Harry Campbell , Tim Varley , Juan Zhao , et al. 2019 . “ Mapping ICD-10 and ICD-10-CM Codes to Phecodes: Workflow Development and Initial Evaluation .” JMIR Medical Informatics 7 ( 4 ): e14325 . OpenUrl ↵ Zhang , Yan Dora , Amber N. Hurson , Haoyu Zhang , Parichoy Pal Choudhury , Douglas F. Easton , Roger L. Milne , Jacques Simard , et al. 2020 . “ Assessment of Polygenic Architecture and Risk Prediction Based on Common Variants across Fourteen Cancers .” Nature Communications 11 ( 1 ): 3353 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted February 28, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Mixed-effects polygenic risk score Phenome-wide association study detects genetic correlation between colorectal cancer risk and phenotype data extracted from the electronic health record Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Mixed-effects polygenic risk score Phenome-wide association study detects genetic correlation between colorectal cancer risk and phenotype data extracted from the electronic health record Elisabeth A. Rosenthal , Wei-Qi Wei , Yuan Luo , Bahram Namjou-Khales , Daniel J. Schaid , Edward D. Esplin , Michael Lape , Leah Kottyan , Jennifer Allen Pacheco , Chunhua Weng , Adam Samuel Gordon , Iftikhar J. Kullo , David R. Crosslin , William M. Grady , Li Hsu , Ulrike Peters , Gail P. Jarvik medRxiv 2025.02.26.25322864; doi: https://doi.org/10.1101/2025.02.26.25322864 Share This Article: Copy Citation Tools Mixed-effects polygenic risk score Phenome-wide association study detects genetic correlation between colorectal cancer risk and phenotype data extracted from the electronic health record Elisabeth A. Rosenthal , Wei-Qi Wei , Yuan Luo , Bahram Namjou-Khales , Daniel J. Schaid , Edward D. Esplin , Michael Lape , Leah Kottyan , Jennifer Allen Pacheco , Chunhua Weng , Adam Samuel Gordon , Iftikhar J. Kullo , David R. Crosslin , William M. Grady , Li Hsu , Ulrike Peters , Gail P. Jarvik medRxiv 2025.02.26.25322864; doi: https://doi.org/10.1101/2025.02.26.25322864 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4440) Dentistry and Oral Medicine (444) Dermatology (383) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1510) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1126) Genetic and Genomic Medicine (6605) Geriatric Medicine (668) Health Economics (998) Health Informatics (4541) Health Policy (1369) Health Systems and Quality Improvement (1613) Hematology (543) HIV/AIDS (1265) Infectious Diseases (except HIV/AIDS) (15921) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (147) Nephrology (668) Neurology (6604) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1145) Occupational and Environmental Health (957) Oncology (3334) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (692) Primary Care Research (711) Psychiatry and Clinical Psychology (5448) Public and Global Health (9234) Radiology and Imaging (2199) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (594) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a0153238d8d62fcb',t:'MTc3OTcxODk4Nw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-06-19T06:35:33.578913+00:00