Formal Statistical Replication Analysis in Lung Cancer Genome-Wide Association Studies

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 93,007 characters · extracted from preprint-html · click to expand
Formal Statistical Replication Analysis in Lung Cancer Genome-Wide Association Studies | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Formal Statistical Replication Analysis in Lung Cancer Genome-Wide Association Studies View ORCID Profile Yung-Han Chang , View ORCID Profile Jinyoung Byun , View ORCID Profile Bryan R. Gorman , View ORCID Profile Rayjean J. Hung , View ORCID Profile James D. McKay , View ORCID Profile Christopher I. Amos , View ORCID Profile Saiju Pyarajan , View ORCID Profile Arjun Bhattacharya , View ORCID Profile Ryan Sun doi: https://doi.org/10.1101/2025.10.02.25337130 Yung-Han Chang 1 Department of Biostatistics, University of Texas MD Anderson Cancer Center UTHealth Houston Graduate School of Biomedical Sciences , Houston, TX, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yung-Han Chang Jinyoung Byun 2 Institute for Clinical and Translational Research, Baylor College of Medicine , Houston, TX, USA 3 Department of Medicine, Section of Epidemiology and Population Sciences, Baylor College of Medicine , Houston, TX, USA 4 University of New Mexico Comprehensive Cancer Center , Albuquerque, NM, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jinyoung Byun Bryan R. Gorman 5 Center for Data and Computational Sciences (C-DACS), VA Cooperative Studies Program , VA Boston Healthcare System, Boston, MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Bryan R. Gorman Rayjean J. Hung 6 Dalla Lana School of Public Health, University of Toronto , Toronto, Ontario, Canada 7 Prosserman Centre for Population Health Research, Lunenfeld-Tanenbaum Research Institute , Sinai Health System, Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rayjean J. Hung James D. McKay 8 Section of Genetics, International Agency for Research on Cancer , World Health Organization, Lyon, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for James D. McKay Christopher I. Amos 2 Institute for Clinical and Translational Research, Baylor College of Medicine , Houston, TX, USA 3 Department of Medicine, Section of Epidemiology and Population Sciences, Baylor College of Medicine , Houston, TX, USA 4 University of New Mexico Comprehensive Cancer Center , Albuquerque, NM, USA 9 Dan L Duncan Comprehensive Cancer Center, Baylor College of Medicine , Houston, TX, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Christopher I. Amos Saiju Pyarajan 5 Center for Data and Computational Sciences (C-DACS), VA Cooperative Studies Program , VA Boston Healthcare System, Boston, MA, USA 10 Department of Medicine, Brigham and Women’s Hospital, Harvard Medical School , Boston, MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Saiju Pyarajan Arjun Bhattacharya 11 Department of Epidemiology, University of Texas MD Anderson Cancer Center , Houston, TX, USA 12 Institute for Data Science in Oncology, University of Texas MD Anderson Cancer Center , Houston, TX, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Arjun Bhattacharya For correspondence: rsun3{at}mdanderson.org Ryan Sun 1 Department of Biostatistics, University of Texas MD Anderson Cancer Center UTHealth Houston Graduate School of Biomedical Sciences , Houston, TX, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ryan Sun For correspondence: rsun3{at}mdanderson.org Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Dozens of genome-wide association studies (GWAS) have identified thousands of single nucleotide polymor-phisms (SNPs) associated with lung cancer risk. However, it remains challenging to translate these findings to clinical insights. One well-known obstacle is the large amount of type I error attached to GWAS; attempted solutions such as setting a p -value threshold across multiple cohorts or looking for small meta-analysis p -values have only somewhat reduced false positive findings. In contrast, here we advocate for a statistical model-based replication analysis. We first demonstrate that a formal statistical test for the replication com-posite null hypothesis - i.e. that the regression coefficient of a SNP falls in the same direction in multiple cohorts simultaneously - can curate a smaller, higher-quality list of significant SNPs than common alterna-tives. In two-way simulations, the false discovery rate (FDR) of model-based replication analysis is 6.4 times lower than that of meta-analysis with a p < 10 −8 threshold. In three-way replication analysis, 9.8% of the International Lung Cancer Consortium GWAS significant SNPs are replicated for squamous cell lung cancer while 33.8% are replicated for lung adenocarcinoma. Finally, we construct polygenic risk scores (PRSs) and find the replication-based PRS achieves virtually identical performance to a GWAS-significant PRS while us-ing 87.3% fewer variants. Thus, formal model-based replication analysis can greatly reduce spurious findings while still identifying important variants, allowing for more robust and more efficient translation of GWAS results. Introduction Lung cancer is the leading cause of cancer-related mortality in both men and women [ 1 , 2 , 3 , 4 ]. While tobacco use is the primary risk factor, individuals who have never smoked or had significant environmental exposures may still face an increased risk due to inherited genetic factors [ 1 , 2 , 5 , 6 ]. The heritability of lung cancer has been estimated at 15-18% [ 7 ]. Dozens of lung cancer genome-wide association studies (GWAS) have been performed in varied settings and diverse populations, and this approach still remains highly popular, with many substantial recent and ongoing efforts [ 1 , 8 , 9 ]. However, despite researchers identifying thousands of genetic variants associated with lung cancer risk over the past two decades [ 8 , 10 , 11 , 12 ], there still exists a significant portion of missing heritability, and clinical applications of GWAS findings remain limited. Such a trend suggests that the continued unearthing of new associations may not be producing actionable insights. One substantial, well-understood obstacle of the GWAS approach is the large amount of type I error it produces. Because many non-functional single nucleotide polymorphisms (SNPs) lie in linkage disequilibrium (LD) with true causal variants, GWAS highlights many unimportant variants. While some highly significant variants are consistently identified across studies, a much larger number are often falsely emphasized as well, as we will show. Such unimportant noise greatly hinders downstream translational efforts such as building polygenic risk scores (PRSs) or developing targeted therapies [ 1 , 8 , 9 , 13 , 14 , 15 ]. Efforts to prune inconsequential SNPs, such as requiring a certain p -value threshold in multiple datasets or using significant meta-analysis results that integrate different cohorts, have only somewhat improved the quality of findings [ 16 ]. Although the value of replication is well-recognized, the lack of replicability has been consistently cited as a major challenge facing genetics research. The number of lung cancer replication studies remains very small in proportion to the number of new GWAS or GWAS meta-analysis investigations [ 2 ]. One possible reason for the scarcity of replication literature is the lack of robust, systematic, and interpretable methods for testing the replication null hypothesis [ 17 , 18 ]. Here, we advocate for and perform formal statistical model-based replication analysis in lung cancer GWAS cohorts. Our overall goal is to provide an interpretable and reliable framework for reducing false positives while still highlighting important variants. This framework can be straightforwardly extended to other phenotypes. We first demonstrate how to utilize a version of the popular empirical Bayes two-group model to test the replication composite null hypothesis [ 19 ]. This null hypothesis is rarely explicitly stated in GWAS studies. However, the definition of the replication composite null - that a SNP regression coefficient is non-zero and falls in the same direction across multiple cohorts simultaneously - truly matches the scientific purpose of a replication study (see Figure 1 ) [ 18 ]. Download figure Open in new tab Figure 1: Conceptual comparison of replication analysis, meta-analysis, and p-value threshold approach. Replication analysis tests the composite null, requiring SNPs to show consistent associations across all cohorts (e.g., SNPs 2 and 3). Meta-analysis instead tests the global null, so it can reject based on signal in a single cohort (e.g., SNP 1). The p -value threshold method requires a SNP to pass a fixed cutoff in every cohort, which can miss true signals (e.g., SNP 3 in Cohort C with a p-value barely greater than 5 × 10 −8 ). The three-dimensional schematics illustrate that under meta-analysis the global null is the single point at the origin (0,0,0), whereas a composite null (shaded planes) includes all points where at least one parameter is zero; in three dimensions this composite null region is much larger than the global null point. This geometric interpretation explains why the empirical Bayes replication test offers stronger protection against false discoveries as the number of cohorts (dimensions) increases. We further construct PRSs and conduct functional annotation analysis to test the quality of selected SNPs. In realistic simulations, model-based replication shows a 6.4-fold lower false discovery rate (FDR) com-pared to meta-analysis (using a significance level of p < 10 −8 ) and improved power by 19.1%. The proposed method also has the added advantage of adaptivity to different strengths of signals across different datasets. Additionally, model-based replication exhibits increased relative power and FDR calibration as the number of cohorts increases. Across three large lung cancer GWAS cohorts [ 8 , 20 , 21 ], we find that a PRS based on replication-based SNPs shows virtually identical performance to a GWAS-significant PRS while using 87.3% fewer SNPs. Our findings indicate that model-based replication analysis can robustly filter out false positives and curate a more concise list of important SNPs from multiple GWAS. This filtering of false positives, and retention of true positives, provides higher-quality findings for follow-up translational research and improves the reproducibility of GWAS investigations. In certain cases, model-based replication testing can reduce the number of putatively significant findings by an order of magnitude. Additionally, while this work focuses on lung cancer, the approaches are broadly generalizable to other phenotypes. Results Overview of model-based replication analysis To assess replicability across cohorts, we employed model-based replication analysis, which tests the compos-ite null hypothesis [ 19 , 22 ]. In this framework, a SNP is considered null if it shows no effect in at least one cohort or if effect directions differ across cohorts, making rejection more stringent than the meta-analysis approach for integrating multiple GWAS [ 1 , 9 , 13 , 14 ]. Let θ jk denote the cohort-specific regression coefficient for SNP j in cohort k , for k = 1 , …, K . We define the replication composite null as Replication testing was implemented using a multidimensional empirical Bayes two-group approach [ 18 ], with density estimation based on the conditionally symmetric Gaussian mixture model (csmGmm) [ 19 ]. This density model has been shown to provide robust operating characteristics and, crucially, offers interpretability guarantees by preventing frequentist-Bayesian contradictions. Within this framework, replication testing calculates the empirical Bayes local false discovery rate (lfdr) [ 23 ] for each SNP and uses it as a decision rule to identify variants that replicate across cohorts. See Figure 1 and Methods for further details. To demonstrate the benefits of model-based replication analysis, we first conducted simulations varying mean effect sizes and proportions of causal SNPs in two- and three-cohort settings, and we then applied the method to three large cancer GWAS summary datasets, comparing its performance against the p -value threshold approach (selecting SNPs by a fixed threshold across all cohorts) and meta-analysis. We further assessed the quality of findings by constructing PRSs and conducting functional annotation analyses. Realistic simulations show that replication analysis properly calibrates FDR while maintaining power The simulation studies show that model-based replication analysis offers (i) robust control of false discoveries, (ii) improved statistical power, and (iii) adaptivity to different signal strengths across datasets. Additionally, (iv) its advantages become greater as the number of datasets increases. The first set of simulations shown in Figure 2A, 2C , and 2E demonstrate (i) that model-based replication analysis can protect nominal FDRs across a range of sparsity and effect size settings. When the proportion of causal variants is 0.02%, there are roughly 1,400 causal SNPs ( Figure 2A ). When the proportion is 1%, there are 700,000 causal SNPs ( Figure 2E ). In each case, the model-based approach robustly constrains the number of false discoveries. This robust performance holds when the signals are very weak (causal summary statistic means of 2) and very strong (causal summary statistic means of 5+). Download figure Open in new tab Figure 2: Two-way replication analysis simulation evaluating the impact of varying mean effect sizes and proportions of causal variants. Each line corresponds to a different analysis method. For the meta-analysis, significance was defined at p < 1 × 10 −8 . Each p -value threshold line corresponds to the approach of selecting only SNPs passing the given significance cutoff ( p < threshold) in both datasets. The x-axis shows the mean effect size, while the y-axis displays either FDR (left panels) or statistical power (right panels). The gray horizontal line in the left panels marks the nominal FDR = 0.1. In contrast, meta-analysis demonstrates extremely large numbers of false discoveries, as would be ex-pected, given that it does not test the replication null hypothesis. The meta-analysis significance level of p < 1 × 10 −8 is chosen in part to keep its empirical FDR reasonable. A less stringent significance level brings the FDR to near 1. On the other hand, the p -value threshold approach of looking for SNPs under a threshold in both datasets is extremely conservative, with observed FDR near 0 in all settings. This poor performance holds regardless of the threshold chosen ( p < 10 −8 down to p < 10 −5 ). Figures 2B , 2D , and 2F further show (ii) that model-based replication analysis offers good power to detect truly replicated SNPs. The model-based replication approach detects substantially more signals than the threshold approaches, which is expected given the extreme conservativeness of the threshold methods. The empirical Bayes model also shows slightly more power than meta-analysis, even though meta-analysis makes many more false discoveries (see Supplementary Appendix A for more FDR and power simulations across different settings). Additionally, the suggested model shows larger power advantages when mean effect sizes are small to moderate, e.g. below 5 on the x-axis. This behavior is partly due to (iii) the adaptiveness of the test. Methods such as the threshold approach or meta-analysis rely on a fixed significance level such as 1 × 10 −8 , and if the signals are not sufficiently strong, power will be very low. However, the empirical Bayes model can adapt to low overall signal strength and still identify SNPs with relatively strong associations across multiple datasets. Figure 3 displays key differences between two-way and three-way replication analysis, especially (iv), with many advantages of model-based replication testing increasing in the three dimensional setting. In particular, the empirical Bayes model shows much better protection of false discoveries than meta-analysis, as expected since the composite null expands relative to the global null in three dimensions compared with two ( Figure 1 ). The model-based replication test also continues to show much better adherence to the nominal FDR than the threshold approaches, which make almost no discoveries when signals are small to moderate. Download figure Open in new tab Figure 3: Three-way replication analysis simulation evaluating the impact of varying mean effect sizes and proportions of causal variants. The x-axis is the mean effect size, while the y-axis is either statistical power or the FDR, depending on the panel. The advantages of model-based replication analysis are more extreme in three dimensions than in two dimensions. The gray horizontal line in the left panels marks the nominal FDR = 0.1. The model-based replication approach also achieves greater power than the threshold approach in three dimensions. This is expected, since reaching a significance threshold across three datasets is more difficult than across two, even for true signals. Meta-analysis can show slightly more power in three-way settings, but it is clearly an invalid test given the extremely large numbers of false discoveries. Figure 3 succinctly illustrates the dangers of using meta-analysis or the threshold approach to identify high-quality SNPs with large numbers of datasets. As additional cohorts are included, meta-analysis identifies an increasing number of false positives, because the global null becomes more different from the composite null as the number of dimensions increases. Additionally, as the number of datasets grows, the threshold approaches impose increasingly stringent criteria, yielding only the most obvious findings. Further simulation settings are available in the Supplementary Materials. Two-way replication analysis curates higher-quality SNPs than meta-analysis in real data While our primary focus is on the three-way replication analysis of the International Lung Cancer Consortium (ILCCO), Million Veteran Program (MVP), and UK Biobank (UKB) datasets, we first briefly present two-way replication results, which follow patterns similar to the simulation studies above. Specifically, Extended Data Figure 1 presents Manhattan plots of the replication analysis results on the two largest datasets (ILCCO and MVP) for overall lung cancer, LUAD, and LUSC. Additional two-way results with the UK Biobank can be found in Supplementary Appendix B. In total, there are 3,736 SNPs with standard GWAS p < 5 × 10 −8 for overall lung cancer in the ILCCO dataset. Of the 3,736 SNPs, 12.8% are identified as significant in the model-based replication analysis, and 9.9% are identified as significant under the p -value threshold approach at 1 × 10 −8 . However, 81.5% are declared significant in meta-analysis; many of these SNPs correspond to the green dots at the bottom of Extended Data Figure 1A. Thus, meta-analysis is ineffective for curating higher-quality SNPs in overall lung cancer using these two datasets, as it largely reproduces the set of originally significant ILCCO SNPs. For LUAD, there are 1,160 SNPs with standard GWAS p < 5 × 10 −8 in the ILCCO data (Extended Data Figure 1B). Of the 1,160, 40.4% are significant in the model-based replication analysis, and 4.1% are significant under the p -value threshold approach at 1 × 10 −8 . By contrast, 89.7% are significant by meta-analysis. We can see that the overall lung cancer and LUAD examples reinforce themes (i) and (ii) from the simulations. The model-based replication analysis appears to offer much better protection of false positives than meta-analysis while also offering much more power than the threshold approach. The model-based analysis can also adapt to the lower levels of signal in LUAD (where the sample size is smaller than for overall lung cancer) and identify a much larger proportion of replication SNPs. The LUSC example of Extended Data Figure 1C is noteworthy because it shows how model-based repli-cation testing can also identify high-quality variants that may be overlooked amid thousands of significant SNPs. For example, a locus in the 9p21 region of chromosome 9 is uniquely identified by the model-based analysis. Given the simulations ( Figure 2 ) showing that model-based replication analysis can have more power than meta-analysis, it is reasonable to believe that this locus is an important finding. The ability to identify SNPs with moderate signals across multiple datasets is an important complement to filtering out spurious associations. Three-way replication analysis reveals additional loci missed by meta-analysis We next present the results of three-way replication analysis in the ILCCO, MVP, and UKB datasets. There are substantial differences in signal size between the three datasets, with smallest p -values for overall lung cancer being p ≈ 10 −100 in the ILCCO dataset, p ≈ 10 −25 in MVP, and p ≈ 10 −7 in the UKB. Replication results are shown overlaid on the original GWAS in Figure 4 and separately in Extended Data Figure 2, with selected summaries provided in Table 1 (additional results available in Supplementary Appendix C). Download figure Open in new tab Figure 4: Manhattan Plots of Lung Cancer GWAS Summary Statistics by Subtype and Cohort. The y-axis is the − log 10 ( P ) value from the original GWAS of each subtype in each cohort. SNPs are colored red if significant in both model-based replication analysis and meta-analysis, green if significant only in meta-analysis, and blue if significant only in model-based replication analysis. Only SNPs with original GWAS p < 10 −4 are displayed, and the exact counts of significant SNPs are provided in Table 1 . View this table: View inline View popup Download powerpoint Table 1: Summary of significant SNPs identified by GWAS, model-based replication analysis, meta-analysis, and p-value threshold approaches in ILCCO, MVP, and UKB. For ILCCO and MVP, percentages represent the proportion of GWAS-significant SNPs identified by each method. No genome-wide significant SNPs were detected in UKB. Raw counts are provided in the Total column. Figure 4A illustrates (iii), the data-adaptive behavior of the model-based replication analysis. Although there are no traditional genome-wide significant signals in the UKB dataset for overall lung cancer, the empirical Bayes model adapts to the low overall signal level and still identifies 461 SNPs with relatively small p -values that are replicated in the ILCCO and MVP datasets. These SNPs fall in the well-chronicled chromosome 5p15.33 and chromosome 15q25 regions, corresponding to the TERT [ 24 , 25 ] and nicotine receptor loci [ 26 , 27 , 28 ], respectively, as expected. Figures 4B , 4C , and Extended Data Figure 2 further demonstrate that model-based replication testing can identify important SNPs that meta-analysis misses. For example, the chromosome 3q28 locus is identified in LUAD only by model-based replication testing, not by meta-analysis ( Figures 4B , Extended Data Figure 2C, 2D). Additional loci uniquely identified by model-based replication testing include chromosome 5p15.33 in LUSC ( Figures 4C , Extended Data Figure 2E, 2F) and chromosome 6p21.33 for overall lung cancer and LUSC (Extended Data Figure 2A, 2B, 2E, 2F). The chromosome 6 region in particular has received heightened scrutiny lately due to immune-related processes implicated in lung cancer [ 29 , 30 ]. Table 1 provides additional evidence of (iv), illustrating how three-dimensional settings can exacerbate differences between model-based replication analysis, meta-analysis, and the threshold approaches. We see that for overall lung cancer, only 12.3% of the original ILCCO GWAS-significant SNPs are identified as significant by model-based replication testing, while 37.9% are identified as significant by meta-analysis. Thus, meta-analysis identifies approximately three times as many SNPs, and previous simulations suggest that many of these are likely false positives. For both LUAD and LUSC, model-based replication analysis and meta-analysis make similar numbers of discoveries. However, for both subtypes, the threshold approach at p < 1×10 −6 finds no significant SNPs, whereas the replication analysis finds hundreds of significant SNPs. This dramatic difference in the number of findings further emphasizes the lack of power for the threshold approach. The most significant SNPs identified in the three-way replication analysis for each lung cancer subtype are presented in Extended Data Table 1 . We can see that generally the ILCCO data provides the strongest evidence of association, and the UKB data provides the least. The full list of significant findings, along with all results, pipelines, and intermediate files used in this analysis, are available at locations described in the Data Availability. Replication-based PRS achieves virtually identical performance with far fewer variants We constructed two PRSs for overall lung cancer: (a) a GWAS-significant PRS based on ILCCO variants with p < 5 × 10 −8 (3,736 SNPs), and (b) a two-way replication PRS constructed from variants significant in both ILCCO and MVP (478 SNPs) by PRSice [ 31 ] (see Methods). Replication-based PRS achieved virtually identical performance to GWAS-significant PRS while requiring 87.3% fewer genetic variants for overall lung cancer. The AUC was 0.698 for the replication-based PRS and 0.699 for the GWAS-significant PRS; both significantly outperformed the covariate-only model (DeLong Test: replication p = 5.65 × 10 −7 ; GWAS p = 3.59 × 10 −7 ). The adjusted odds ratio (OR) for the top 10% versus bottom 10% PRS was 1.97 for the replication-based PRS and 1.88 for the GWAS-significant PRS ( Figure 5 ). Case rates showed similar separation, with 1.00% vs. 0.51% for the replication PRS (top 10% vs. bottom 10%) and 0.98% vs. 0.53% for the GWAS PRS. Download figure Open in new tab Figure 5: PRS stratification of overall lung cancer risk in UK Biobank. Adjusted ORs with 95% CIs are shown across PRS deciles (reference = decile 1). Scores were built using PRSice. Replication analysis and meta-analysis identify comparable functionally rele-vant SNPs We further investigated whether the quality of findings from replication analysis is comparable to the quality from meta-analysis in a real-data application. While it is generally not possible to label the true causal SNPs in real data, we used genome-wide functional annotation data from Functional Annotation of Variants Online Resources (FAVOR) as a proxy. Specifically, we aligned FAVOR data with replication results at a well-studied locus on chromosome 15q25.1 harboring nicotinic acetylcholine receptor genes [ 26 , 27 , 28 ]. We focused on LUSC, given that the numbers of findings at this locus were reasonably similar between model-based replication analysis (234) and meta-analysis (191). Figure 6 shows how two FAVOR functional annotation scores range for SNPs identified as significant by model-based replication analysis and meta-analysis. Model-based replication analysis identified 13 (5.6%) SNPs with medium or higher conservation scores and 34 (14.5%) with medium or higher epigenetic activ-ity scores, whereas meta-analysis identified 9 (4.7%) and 32 (16.8%) such SNPs, respectively. Thus, the proportion of SNPs with high functional relevance is similar between the two approaches. Download figure Open in new tab Figure 6: Functional annotation of variants identified in the three-way real data analysis for LUSC. The x-axis is genomic position and the y-axis is − log 10 ( FDR ). The first column displays results from the replication analysis, and the second column shows results from the meta-analysis. The top row presents conservation scores, while the bottom row presents epigenetic scores. We categorize the scores as follows: scores larger than 25 are considered high, scores between 10 and 25 are medium, and scores less than 10 are low. Annotation scores represent only one approach to assessing functionality and are not specific to lung cancer. However, these results suggest that, as previously seen, model-based replication analysis offers power comparable to meta-analysis for detecting important variants. Its main advantage remains the ability to substantially reduce the number of putatively significant GWAS findings. By filtering out false positives early, model-based replication analysis ensures that subsequent annotation and translational research efforts are focused on biologically relevant variants. Discussion This work advocates for and conducts an interpretable and statistically principled model-based replication analysis of lung cancer GWAS results. Specifically, we have demonstrated how an empirical Bayes two-group model can be applied to formally test the replication composite null hypothesis in a straightforward manner. This approach improves control of false discoveries and offers greater statistical power compared to commonly used approaches such as the p -value threshold approach or meta-analysis. Furthermore, the model-based replication method adapts effectively to the strength of signal in the GWAS summary statistics and can show improved performance as the number of datasets increases. Simulation studies show that model-based replication analysis consistently maintains effective FDR con-trol while preserving power across different simulation settings, whereas meta-analysis yields inflated false discoveries and the p -value threshold approach is overly conservative with little power. Applied to three large lung cancer cohorts, our method demonstrates that many genome-wide significant SNPs could not be replicated, underscoring the prevalence of false positives in many traditional GWASs, while also identifying functionally relevant loci such as the chromosome 6 region missed by meta-analysis. This locus has been linked to inflammatory processes driving lung cancer in previous clinical studies [ 29 , 30 ], reinforcing the biological plausibility of the replication-based findings. Replication-based PRS further demonstrates the ef-ficiency of the approach, achieving nearly identical predictive performance to a GWAS-significant PRS while requiring almost 90% fewer variants. With the growing availability of large-scale biobanks and other GWAS, it is increasingly important to integrate their results in a robust manner [ 32 , 33 , 34 ]. For example, thousands of SNPs have been associated with different lung cancer histologies across diverse populations, yet it remains unclear which should be prioritized. The large amount of false positives can substantially hinder clinical efforts such as PRS construction or development of new therapies [ 35 , 36 , 37 , 38 , 39 , 40 ]. We note that some other statistical models have been proposed for assessing replicability, although few have seen widespread adoption. For example, the repfdr, MAMBA, and SCREEN methods are similarly Bayesian frameworks, however they do not offer interpretability guarantees of our suggested approach [ 41 , 42 , 43 ], which can lead to discrepancies between frequentist summary statistics and Bayesian replication findings. They may also require additional preprocessing steps such as SNP pruning. We also acknowledge several limitations to our work. One limitation is that, due to the sample size of the available GWAS summary statistics, we focus only on individuals of European ancestry. Previous studies have shown that single-ancestry GWAS can be limited by population-specific LD structures, [ 44 , 45 , 46 ]. However, this challenge also presents an opportunity, as an important future direction is to extend model-based replication across diverse ancestries. Utilizing the suggested framework across different ancestries may further remove noise findings that are attributable to LD, since the noise SNPs linked by LD will be different in different ancestries. Another limitation is that we did not account for differences in reference panels used across the GWAS summary statistics. Variability in reference panels can impact imputation quality, LD estimation, and, p -values and downstream analyses [ 47 , 48 ]. Finally, another direction for future work is exploring the viability of testing for replication in only a subset of datasets. For example, if there are four datasets of interests, one might focus on identifying SNPs that replicate in at least three, which could balance stringency with greater power. Materials and Methods Lung cancer datasets The ILCCO GWAS is the largest lung cancer GWAS, with 29,266 cases and 56,450 controls of European ancestry. Subtype specific analyses for LUAD and LUSC include 11,273 and 7,426 cases, respectively. Details about the study and its summary statistics are available in the literature [ 8 ]. The MVP GWAS is a recent study of 10,398 lung cancer cases and 62,708 controls of European ancestry that do not overlap with previous lung cancer cohorts. Subtype specific analyses for LUAD and LUSC include 2,019 and 1,475 cases, respectively. Details about the study and its summary statistics are also available in the literature [ 20 ]. We further performed a GWAS in the UK Biobank on 2,404 cases and 330,018 controls of white, British ancestry [ 21 ]. The LUSC subtype analysis included 489 cases, and the LUAD subtype analysis included 862 cases. We used the REGENIE [ 49 ] default pipeline to generate summary statistics, setting a minor allele frequency threshold of 0.00005. We corrected for the same covariates as the ILCCO analysis, using age, gender, and the first 10 ancestry principal components (PCs) in the regression model. Data harmonization was applied to keep only the variants shared in common by all three datasets. We also flipped effect directions if necessary to ensure that effect alleles and reference alleles were constant across studies. After all data cleaning steps, there were 7,335,666 common SNPs in the analysis for overall lung cancer, 7,325,055 for LUSC, and 7,327,929 for LUAD. Overview of replication analysis through testing composite null hypotheses In general, GWAS summary statistics from two cohorts A and B are generated from logistic regression models of the form Here, for each subject i = 1 , …, n A in cohort A, the is the genotype at SNP j , the lung cancer outcome is , and there is a vector of p additional covariates such as principal components of the genotype matrix. Then . All the same definitions carry over to the i ′ = 1 , …, n B subjects in cohort B. As an example, cohort A could be ILCCO and cohort B could be MVP. For a third cohort C, e.g. the UK Biobank, we would fit a third model of the same form where γ j is the regression coefficient for the SNP j , and the pattern continues for more cohorts. We are interested in testing the replication null hypothesis, which is a composite null hypothesis. That is, a SNP j is considered replicated only when α j and β j are both nonzero and share the same effect direction (if there are two SNPs). In other words, the null hypothesis includes the cases where either α j or β j is non-zero as well as the case where they point in different directions. More formally, for two cohorts, we have : α j = 0 ∪ β k = 0 ∪ {sign( α j ) ≠ sign( β j )} for SNP j [ 19 , 22 ]. For three cohorts, we have : α j = 0 ∪ β j = 0 ∪ γ j = 0 ∪ {sign( α j ) ≠ sign( β j ) ∪ sign( α j ) ≠ sign( γ j ) ∪ sign( β j ) ≠ sign( γ j )}. Other popular approaches to identify high-quality SNPs Another popular approach for integrating multiple GWAS to find high-quality SNPs is to perform meta-analysis [ 1 , 9 , 13 , 14 ]. However meta-analysis tools such as METAL [ 50 ] test the global null hypothesis, which is : α j = β j = γ j = 0 for the three-study example. Thus, using meta-analytic type methods does not directly address the scientific question of interest in a replication study. For instance, meta-analysis can identify many SNPs that are highly associated with lung cancer in one study while showing little association in the other two [ 9 ]. However, intuitively, such variants would not be considered replicated (1). The ad-hoc p -value threshold approach, e.g. searching for SNPs with p < 5 × 10 −8 in multiple cohorts, can be used to test the replication null. However, this approach is not data-adaptive and does not have good operating characteristics, as we will show. For example, in a relatively small GWAS cohorts where no SNP reaches p < 5 × 10 −8 , there will automatically be no replicated findings, even if that cohort has valuable information to contribute ( Figure 1 ). Empirical bayes replication testing We advocate for using a multidimensional empirical Bayes two-group approach [ 18 ] paired with a version of the csmGmm [ 19 ] for density estimation in replication testing. To the best of our knowledge, this approach has not been used for replication studies in the genetics literature. In the two-group model, which is popular in many other genomics settings [ 23 ], each SNP j in each cohort k is assumed to possess an unobserved indicator of association status, Thus, there is an association group (denoted by 1 and −1) and a no association group (denoted by 0). There are j = 1, 2, …, J total SNPs and k = 1, 2, …, K total studies in the analysis. With two-dimensional studies ( K = 2), there are K 2 = 9 possible effect configurations [ 18 ]. The set of all nine possible effect combinations is given by We map each of the nine possible association configurations to a vector h l , with l = 0, 1 , …L = 8 in the order given above. Then the two scenarios where the SNP truly possesses a replicated effect are the alternative space, H a = { h 5 = (−1, −1) T , h 8 = (1, 1) T }. The other seven scenarios fall under the null space H 0 . The lfdr [ 23 ] for SNP j can then be straightforwardly calculated through Bayes’ Theorem as Here, Z j = ( Z j 1 , …, Z jK ) T are the summary statistics for SNP j across the K studies, and H j = ( H j 1 , …, H jK ) T are the unknown true association configurations for SNP j in those studies. There are many possible densities f (·) that can be used to calculate Equation (1) above [ 18 ]. We suggest to use a version of the csmGmm model, which addresses the critical issue that empirical Bayes procedures may yield contradictions between Bayesian and frequentist significance rankings [ 19 ], for better interpretability. As an example, while other Bayesian methods might declare a SNP with GWAS summary statistics of ( Z MV P = 5.1 , Z ILCCO = 5.2) to be more replicated than a SNP with summary statistics of ( Z MV P = 5.3 , Z ILCCO = 5.4), the csmGmm provably prevents such incongruous conclusions. Meta-analysis We used METAL [ 50 ] to perform a meta-analysis on GWAS summary statistics from multiple European ancestry cohorts. A sample size-based method was applied to analyze three different cancers. SNPs with p < 1 × 10 −8 were considered statistically significant. This threshold was selected in accordance with recent lung cancer meta-analysis studies and to maintain a reasonable false discovery rate. Simulation studies We conducted extensive simulation studies to evaluate the power and FDR of various methods in GWAS datasets with the same number of variants as our lung cancer real data analysis (approximately 7 million). Summary statistics were simulated from multivariate normal distributions, and we varied different mean effect sizes and proportions of causal SNPs. We considered both two-cohort ( K = 2) and three-cohort ( K = 3) settings, and we performed 100 iterations at each reported setting. In all simulations, the proportions of causal variants were determined independently. For example, when each dataset contained 1% causal variants, the expected overlap was 0.01% of variants that were causal in both datasets. Also, we used an equal number of positive and negative effects in all cases. A nominal FDR of 0.1 was used for lfdr inference. Polygenic risk score construction Scores were generated with PRSice [ 31 ] using clumping and thresholding (LD r 2 = 0.2, 250-kb window, p -value threshold = 0.05) and applied to UK Biobank genotypes. Each PRS was standardized to have mean 0 and standard deviation 1. Associations with lung cancer status were assessed via logistic regression adjusted for age, sex, and the first 10 PCs. Variant functional annotations We used genome-wide functional annotations from the FAVOR pipeline [ 51 ] to assess the importance of identified variants. Specifically, we considered annotation principal components (aPC) derived from FAVOR for categories including epigenetic function and evolutionarily conserved function. The epigenetics score includes values such as H3K27Ac peaks [ 52 , 53 ], while the conservation score includes algorithms such as phastCons [ 54 ] and PhyloP [ 55 ]. The scores are presented on a PHRED scale, so that an aPC epigenetics score of 10 indicates that a SNP ranks in the top 10% of epigenetically active SNPs across the genome. We considered scores of greater than 25 to be high values and those between 10-25 to be medium scores. Data Availability All data produced in the present work are contained in the manuscript. Data availability The R package csmGmm can be used to run the model-based replication analysis described in this manuscript. It is available at https://cran.r-project.org/web/packages/csmGmm/index.html . The code to reproduce of all the tables and figures in this manuscripts are available at https://github.com/yhc0211/Lung-Cancer-Replication-Analysis . Intermediate files needed to reproduce this work are available at https://odin.mdacc.tmc.edu/rsun3/ . For example, the summary statistics from GWAS of lung cancer in the UKB using REGENIE are available here. AUTHORS’ DISCLOSURES No disclosures or conflicts of interest are reported by the authors. AUTHORS’ CONTRIBUTIONS YHC: Formal analysis, validation, investigation, visualization, methodology, writing–original draft, writ-ing–review and editing; JB: Writing–review and editing, resources; BRG: Writing–review and editing, re-sources; RJH: Writing–review and editing, resources; JDM: Writing–review and editing, resources; CIA:Writing– review and editing, resources; SP: Writing–review and editing, resources; AB: Methodology, supervision, in-vestigation, writing–review and editing; RS: Conceptualization, resources, supervision, funding acquisition, investigation, methodology, writing–review and editing ACKNOWLEDGEMENTS This work was supported by National Institutes of Health award R35 GM154843 (R.S.). The funders had no role in study design, data collection and analysis, or preparation of the manuscript. References [1]. ↵ Bryan R. Gorman , Sun-Gou Ji , Michael Francis , Anoop K. Sendamarai , Yunling Shi , Poornima Devi-neni , Uma Saxena , Elizabeth Partan , Andrea K. DeVito , Jinyoung Byun , Younghun Han , Xiangjun Xiao , Don D. Sin , Wim Timens , Jennifer Moser , Sumitra Muralidhar , Rachel Ramoni , Rayjean J. Hung , James D. McKay , Yohan Bossé , Ryan Sun , Christopher I. Amos , VA Million Veteran Program, and Saiju Pyarajan. Multi-ancestry GWAS meta-analyses of lung cancer reveal susceptibility loci and elucidate smoking-independent genetic risk . Nature Communications , 15 ( 1 ): 8629 , October 2024 . OpenUrl PubMed [2]. ↵ Yohan Bossé and Christopher I. Amos . A Decade of GWAS Results in Lung Cancer . Cancer Epidemi-ology, Biomarkers & Prevention , 27 ( 4 ): 363 – 379 , April 2018 . OpenUrl Abstract / FREE Full Text [3]. ↵ Rebecca L. Siegel , Kimberly D. Miller , Hannah E. Fuchs , and Ahmedin Jemal . Cancer statistics, 2022 . CA: a cancer journal for clinicians , 72 ( 1 ): 7 – 33 , January 2022 . OpenUrl CrossRef PubMed [4]. ↵ Paul Brennan , Pierre Hainaut , and Paolo Boffetta . Genetics of lung-cancer susceptibility . The Lancet Oncology , 12 ( 4 ): 399 – 408 , April 2011 . OpenUrl CrossRef PubMed Web of Science [5]. ↵ Matthew B. Schabath and Michele L. Cote . Cancer Progress and Priorities: Lung Cancer . Cancer Epidemiology, Biomarkers & Prevention , 28 ( 10 ): 1563 – 1579 , October 2019 . OpenUrl FREE Full Text [6]. ↵ Amanda Leiter , Rajwanth R. Veluswamy , and Juan P. Wisnivesky . The global burden of lung cancer: current status and future trends . Nature Reviews. Clinical Oncology , 20 ( 9 ): 624 – 639 , September 2023 . OpenUrl PubMed [7]. ↵ Juncheng Dai , Wei Shen , Wanqing Wen , Jiang Chang , Tongmin Wang , Haitao Chen , Guangfu Jin , Hongxia Ma , Chen Wu , Lian Li , Fengju Song , YiXin Zeng , Yue Jiang , Jiaping Chen , Cheng Wang , Meng Zhu , Wen Zhou , Jiangbo Du , Yongbing Xiang , Xiao-Ou Shu , Zhibin Hu , Weiping Zhou , Kexin Chen , Jianfeng Xu , Weihua Jia , Dongxin Lin , Wei Zheng , and Hongbing Shen . Estimation of heritabil-ity for nine common cancers using data from genome-wide association studies in Chinese population . International Journal of Cancer , 140 ( 2 ): 329 – 336 , January 2017 . OpenUrl PubMed [8]. ↵ James D. McKay , Rayjean J. Hung , Younghun Han , Xuchen Zong , Robert Carreras-Torres , David C. Christiani , Neil E. Caporaso , Mattias Johansson , Xiangjun Xiao , Yafang Li , Jinyoung Byun , Alison Dun-ning , Karen A. Pooley , David C. Qian , Xuemei Ji , Geoffrey Liu , Maria N. Timofeeva , Stig E. Bojesen , Xifeng Wu , Loic Le Marchand , Demetrios Albanes , Heike Bickeböller , Melinda C. Aldrich , William S. Bush , Adonina Tardon , Gad Rennert , M. Dawn Teare , John K. Field , Lambertus A. Kiemeney , Philip Lazarus , Aage Haugen , Stephen Lam , Matthew B. Schabath , Angeline S. Andrew , Hongbing Shen , Yun-Chul Hong , Jian-Min Yuan , Pier Alberto Bertazzi , Angela C. Pesatori , Yuanqing Ye , Nancy Diao , Li Su , Ruyang Zhang , Yonathan Brhane , Natasha Leighl , Jakob S. Johansen , Anders Mellem-gaard , Walid Saliba , Christopher A. Haiman , Lynne R. Wilkens , Ana Fernandez-Somoano , Guillermo Fernandez-Tardon , Henricus F. M. van der Heijden , Jin Hee Kim , Juncheng Dai , Zhibin Hu , Michael P. A. Davies , Michael W. Marcus , Hans Brunnström , Jonas Manjer , Olle Melander , David C. Muller , Kim Overvad , Antonia Trichopoulou , Rosario Tumino , Jennifer A. Doherty , Matt P. Barnett , Chu Chen , Gary E. Goodman , Angela Cox , Fiona Taylor , Penella Woll , Irene Brüske , H.-Erich Wichmann , Judith Manz , Thomas R. Muley , Angela Risch , Albert Rosenberger , Kjell Grankvist , Mikael Johans-son , Frances A. Shepherd , Ming-Sound Tsao , Susanne M. Arnold , Eric B. Haura , Ciprian Bolca , Ivana Holcatova , Vladimir Janout , Milica Kontic , Jolanta Lissowska , Anush Mukeria , Simona Ognjanovic , Tadeusz M. Orlowski , Ghislaine Scelo , Beata Swiatkowska , David Zaridze , Per Bakke , Vidar Skaug , Shanbeh Zienolddiny , Eric J. Duell , Lesley M. Butler , Woon-Puay Koh , Yu-Tang Gao , Richard S. Houl-ston , John McLaughlin , Victoria L. Stevens , Philippe Joubert , Maxime Lamontagne , David C. Nickle , Ma’en Obeidat , Wim Timens , Bin Zhu , Lei Song , Linda Kachuri , María Soler Artigas , Martin D. To-bin , Louise V. Wain , SpiroMeta Consortium , Thorunn Rafnar , Thorgeir E. Thorgeirsson , Gunnar W. Reginsson , Kari Stefansson , Dana B. Hancock , Laura J. Bierut , Margaret R. Spitz , Nathan C. Gad-dis , Sharon M. Lutz , Fangyi Gu , Eric O. Johnson , Ahsan Kamal , Claudio Pikielny , Dakai Zhu , Sara Lindströem , Xia Jiang , Rachel F. Tyndale , Georgia Chenevix-Trench , Jonathan Beesley , Yohan Bossé , Stephen Chanock , Paul Brennan , Maria Teresa Landi , and Christopher I. Amos . Large-scale association analysis identifies new lung cancer susceptibility loci and heterogeneity in genetic susceptibility across histological subtypes . Nature Genetics , 49 ( 7 ): 1126 – 1132 , July 2017 . OpenUrl CrossRef PubMed [9]. ↵ Jinyoung Byun , Younghun Han , Yafang Li , Jun Xia , Erping Long , Jiyeon Choi , Xiangjun Xiao , Meng Zhu , Wen Zhou , Ryan Sun , Yohan Bossé , Zhuoyi Song , Ann Schwartz , Christine Lusk , Thorunn Rafnar , Kari Stefansson , Tongwu Zhang , Wei Zhao , Rowland W. Pettit , Yanhong Liu , Xihao Li , Hufeng Zhou , Kyle M. Walsh , Ivan Gorlov , Olga Gorlova , Dakai Zhu , Susan M. Rosenberg , Susan Pinney , Joan E. Bailey-Wilson , Diptasri Mandal , Mariza de Andrade , Colette Gaba , James C. Willey , Ming You , Mar-shall Anderson , John K. Wiencke , Demetrius Albanes , Stephan Lam , Adonina Tardon , Chu Chen , Gary Goodman , Stig Bojeson , Hermann Brenner , Maria Teresa Landi , Stephen J. Chanock , Mattias Johansson , Thomas Muley , Angela Risch , H.-Erich Wichmann , Heike Bickeböller , David C. Christiani , Gad Rennert , Susanne Arnold , John K. Field , Sanjay Shete , Loic Le Marchand , Olle Melander , Hans Brunnstrom , Geoffrey Liu , Angeline S. Andrew , Lambertus A. Kiemeney , Hongbing Shen , Shanbeh Zienolddiny , Kjell Grankvist , Mikael Johansson , Neil Caporaso , Angela Cox , Yun-Chul Hong , Jian-Min Yuan , Philip Lazarus , Matthew B. Schabath , Melinda C. Aldrich , Alpa Patel , Qing Lan , Nathaniel Rothman , Fiona Taylor , Linda Kachuri , John S. Witte , Lori C. Sakoda , Margaret Spitz , Paul Brennan , Xihong Lin , James McKay , Rayjean J. Hung , and Christopher I. Amos . Cross-ancestry genome-wide meta-analysis of 61,047 cases and 947,237 controls identifies new susceptibility loci contributing to lung cancer . Nature Genetics , 54 ( 8 ): 1167 – 1177 , August 2022 . OpenUrl CrossRef PubMed [10]. ↵ Erping Long , Harsh Patel , Jinyoung Byun , Christopher I Amos , and Jiyeon Choi . Functional studies of lung cancer GWAS beyond association . Human molecular genetics , 31 ( R1 ): R22 – R36 , October 2022 . OpenUrl CrossRef PubMed [11]. ↵ Teri A. Manolio , Francis S. Collins , Nancy J. Cox , David B. Goldstein , Lucia A. Hindorff , David J. Hunter , Mark I. McCarthy , Erin M. Ramos , Lon R. Cardon , Aravinda Chakravarti , Judy H. Cho , Alan E. Guttmacher , Augustine Kong , Leonid Kruglyak , Elaine Mardis , Charles N. Rotimi , Montgomery Slatkin , David Valle , Alice S. Whittemore , Michael Boehnke , Andrew G. Clark , Evan E. Eichler , Greg Gibson , Jonathan L. Haines , Trudy F. C. Mackay , Steven A. McCarroll , and Peter M. Visscher . Finding the missing heritability of complex diseases . Nature , 461 ( 7265 ): 747 – 753 , October 2009 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed Web of Science [12]. ↵ Ian A. Yang , John W. Holloway , and Kwun M. Fong . Genetic susceptibility to lung cancer and co-morbidities . Journal of Thoracic Disease , 5 ( Suppl 5 ), October 2013 . Publisher: AME Publishing Company . [13]. ↵ Zhaoming Wang , Wei Jie Seow , Kouya Shiraishi , Chao A. Hsiung , Keitaro Matsuo , Jie Liu , Kexin Chen , Taiki Yamji , Yang Yang , I-Shou Chang , Chen Wu , Yun-Chul Hong , Laurie Burdett , Kathleen Wyatt , Charles C. Chung , Shengchao A. Li , Meredith Yeager , Amy Hutchinson , Wei Hu , Neil Caporaso , Maria T. Landi , Nilanjan Chatterjee , Minsun Song , Joseph F. Fraumeni , Jr , Takashi Kohno , Jun Yokota , Hideo Kunitoh , Kyota Ashikawa , Yukihide Momozawa , Yataro Daigo , Tetsuya Mitsudomi , Yasushi Yatabe , Toyoaki Hida , Zhibin Hu , Juncheng Dai , Hongxia Ma , Guangfu Jin , Bao Song , Zhehai Wang , Sensen Cheng , Zhihua Yin , Xuelian Li , Yangwu Ren , Peng Guan , Jiang Chang , Wen Tan , Chien-Jen Chen , Gee-Chen Chang , Ying-Huang Tsai , Wu-Chou Su , Kuan-Yu Chen , Ming-Shyan Huang , Yuh-Min Chen , Hong Zheng , Haixin Li , Ping Cui , Huan Guo , Ping Xu , Li Liu , Motoki Iwasaki , Taichi Shimazu , Shoichiro Tsugane , Junjie Zhu , Gening Jiang , Ke Fei , Jae Yong Park , Yeul Hong Kim , Jae Sook Sung , Kyong Hwa Park , Young Tae Kim , Yoo Jin Jung , Chang Hyun Kang , In Kyu Park , Hee Nam Kim , Hyo-Sung Jeon , Jin Eun Choi , Yi Young Choi , Jin Hee Kim , In-Jae Oh , Young-Chul Kim , Sook Whan Sung , Jun Suk Kim , Ho-Il Yoon , Sun-Seog Kweon , Min-Ho Shin , Adeline Seow , Ying Chen , Wei-Yen Lim , Jianjun Liu , Maria Pik Wong , Victor Ho Fun Lee , Bryan A. Bassig , Margaret Tucker , Sonja I. Berndt , Wong-Ho Chow , Bu-Tian Ji , Junwen Wang , Jun Xu , Alan Dart Loon Sihoe , James C.M. Ho , John K.C. Chan , Jiu-Cun Wang , Daru Lu , Xueying Zhao , Zhenhong Zhao , Junjie Wu , Hongyan Chen , Li Jin , Fusheng Wei , Guoping Wu , She-Juan An , Xu-Chao Zhang , Jian Su , Yi-Long Wu , Yu-Tang Gao , Yong-Bing Xiang , Xingzhou He , Jihua Li , Wei Zheng , Xiao-Ou Shu , Qiuyin Cai , Robert Klein , William Pao , Charles Lawrence , H. Dean Hosgood , III , Chin-Fu Hsiao , Li-Hsin Chien , Ying-Hsiang Chen , Chung-Hsing Chen , Wen-Chang Wang , Chih-Yi Chen , Chih-Liang Wang , Chong-Jen Yu , Hui-Ling Chen , Yu-Chun Su , Fang-Yu Tsai , Yi-Song Chen , Yao-Jen Li , Tsung-Ying Yang , Chien-Chung Lin , Pan-Chyr Yang , Tangchun Wu , Dongxin Lin , Baosen Zhou , Jinming Yu , Hongbing Shen , Michiaki Kubo , Stephen J. Chanock , Nathaniel Rothman , and Qing Lan . Meta-analysis of genome-wide association studies identifies multiple lung cancer susceptibility loci in never-smoking Asian women . Human Molecular Genetics , 25 ( 3 ): 620 – 629 , February 2016 . OpenUrl CrossRef PubMed [14]. ↵ Jianxin Shi , Kouya Shiraishi , Jiyeon Choi , Keitaro Matsuo , Tzu-Yu Chen , Juncheng Dai , Rayjean J. Hung , Kexin Chen , Xiao-Ou Shu , Young Tae Kim , Maria Teresa Landi , Dongxin Lin , Wei Zheng , Zhihua Yin , Baosen Zhou , Bao Song , Jiucun Wang , Wei Jie Seow , Lei Song , I.-Shou Chang , Wei Hu , Li-Hsin Chien , Qiuyin Cai , Yun-Chul Hong , Hee Nam Kim , Yi-Long Wu , Maria Pik Wong , Brian Dou-glas Richardson , Karen M. Funderburk , Shilan Li , Tongwu Zhang , Charles Breeze , Zhaoming Wang , Batel Blechter , Bryan A. Bassig , Jin Hee Kim , Demetrius Albanes , Jason Y. Y. Wong , Min-Ho Shin , Lap Ping Chung , Yang Yang , She-Juan An , Hong Zheng , Yasushi Yatabe , Xu-Chao Zhang , Young-Chul Kim , Neil E. Caporaso , Jiang Chang , James Chung Man Ho , Michiaki Kubo , Yataro Daigo , Minsun Song , Yukihide Momozawa , Yoichiro Kamatani , Masashi Kobayashi , Kenichi Okubo , Takayuki Honda , Dean H. Hosgood , Hideo Kunitoh , Harsh Patel , Shun-ichi Watanabe , Yohei Miyagi , Haruhiko Nakayama , Shingo Matsumoto , Hidehito Horinouchi , Masahiro Tsuboi , Ryuji Hamamoto , Koichi Goto , Yuichiro Ohe , Atsushi Takahashi , Akiteru Goto , Yoshihiro Minamiya , Megumi Hara , Yuichiro Nishida , Kenji Takeuchi , Kenji Wakai , Koichi Matsuda , Yoshinori Murakami , Kimihiro Shimizu , Hiroyuki Suzuki , Motonobu Saito , Yoichi Ohtaki , Kazumi Tanaka , Tangchun Wu , Fusheng Wei , Hongji Dai , Mitchell J. Machiela , Jian Su , Yeul Hong Kim , In-Jae Oh , Victor Ho Fun Lee , Gee-Chen Chang , Ying-Huang Tsai , Kuan-Yu Chen , Ming-Shyan Huang , Wu-Chou Su , Yuh-Min Chen , Adeline Seow , Jae Yong Park , Sun-Seog Kweon , Kun-Chieh Chen , Yu-Tang Gao , Biyun Qian , Chen Wu , Daru Lu , Jianjun Liu , Ann G. Schwartz , Richard Houlston , Margaret R. Spitz , Ivan P. Gorlov , Xifeng Wu , Ping Yang , Stephen Lam , Adonina Tardon , Chu Chen , Stig E. Bojesen , Mattias Johansson , Angela Risch , Heike Bickeböller , Bu-Tian Ji , H.-Erich Wichmann , David C. Christiani , Gadi Rennert , Susanne Arnold , Paul Brennan , James McKay , John K. Field , Sanjay S. Shete , Loic Le Marchand , Geoffrey Liu , Angeline Andrew , Lam-bertus A. Kiemeney , Shan Zienolddiny-Narui , Kjell Grankvist , Mikael Johansson , Angela Cox , Fiona Taylor , Jian-Min Yuan , Philip Lazarus , Matthew B. Schabath , Melinda C. Aldrich , Hyo-Sung Jeon , Shih Sheng Jiang , Jae Sook Sung , Chung-Hsing Chen , Chin-Fu Hsiao , Yoo Jin Jung , Huan Guo , Zhibin Hu , Laurie Burdett , Meredith Yeager , Amy Hutchinson , Belynda Hicks , Jia Liu , Bin Zhu , Sonja I. Berndt , Wei Wu , Junwen Wang , Yuqing Li , Jin Eun Choi , Kyong Hwa Park , Sook Whan Sung , Li Liu , Chang Hyun Kang , Wen-Chang Wang , Jun Xu , Peng Guan , Wen Tan , Chong-Jen Yu , Gong Yang , Alan Dart Loon Sihoe , Ying Chen , Yi Young Choi , Jun Suk Kim , Ho-Il Yoon , In Kyu Park , Ping Xu , Qincheng He , Chih-Liang Wang , Hsiao-Han Hung , Roel C. H. Vermeulen , Iona Cheng , Junjie Wu , Wei-Yen Lim , Fang-Yu Tsai , John K. C. Chan , Jihua Li , Hongyan Chen , Hsien-Chih Lin , Li Jin , Jie Liu , Norie Sawada , Taiki Yamaji , Kathleen Wyatt , Shengchao A. Li , Hongxia Ma , Meng Zhu , Zhehai Wang , Sensen Cheng , Xuelian Li , Yangwu Ren , Ann Chao , Motoki Iwasaki , Junjie Zhu , Gening Jiang , Ke Fei , Guoping Wu , Chih-Yi Chen , Chien-Jen Chen , Pan-Chyr Yang , Jinming Yu , Victoria L. Stevens , Joseph F. Fraumeni , Nilanjan Chatterjee , Olga Y. Gorlova , Chao Agnes Hsiung , Christopher I. Amos , Hongbing Shen , Stephen J. Chanock , Nathaniel Rothman , Takashi Kohno , and Qing Lan . Genome-wide association study of lung adenocarcinoma in East Asia and comparison with a European population . Nature Communications , 14 ( 1 ): 3043 , May 2023 . Publisher: Nature Publishing Group . OpenUrl PubMed [15]. ↵ Maria N. Timofeeva , Rayjean J. Hung , Thorunn Rafnar , David C. Christiani , John K. Field , Heike Bickeböller , Angela Risch , James D. McKay , Yufei Wang , Juncheng Dai , Valerie Gaborieau , John McLaughlin , Darren Brenner , Steven A. Narod , Neil E. Caporaso , Demetrius Albanes , Michael Thun , Timothy Eisen , H.-Erich Wichmann , Albert Rosenberger , Younghun Han , Wei Chen , Dakai Zhu , Mar-garet Spitz , Xifeng Wu , Mala Pande , Yang Zhao , David Zaridze , Neonilia Szeszenia-Dabrowska , Jolanta Lissowska , Peter Rudnai , Eleonora Fabianova , Dana Mates , Vladimir Bencko , Lenka Foretova , Vladimir Janout , Hans E. Krokan , Maiken Elvestad Gabrielsen , Frank Skorpen , Lars Vatten , Inger Njølstad , Chu Chen , Gary Goodman , Mark Lathrop , Simone Benhamou , Tõnu Vooder , Kristjan Välk , Mari Nelis , Andres Metspalu , Olaide Raji , Ying Chen , John Gosney , Triantafillos Liloglou , Thomas Muley , Hen-drik Dienemann , Gudmar Thorleifsson , Hongbing Shen , Kari Stefansson , Paul Brennan , Christopher I. Amos , Richard Houlston , Maria Teresa Landi , and Transdisciplinary Research in Cancer of the Lung (TRICL) Research Team . Influence of common genetic variation on lung cancer risk: meta-analysis of 14 900 cases and 29 485 controls . Human Molecular Genetics , 21 ( 22 ): 4980 – 4995 , November 2012 . OpenUrl CrossRef PubMed Web of Science [16]. ↵ Wei Zhou , Masahiro Kanai , Kuan-Han H. Wu , Humaira Rasheed , Kristin Tsuo , Jibril B. Hirbo , Ying Wang , Arjun Bhattacharya , Huiling Zhao , Shinichi Namba , Ida Surakka , Brooke N. Wolford , Valeria Lo Faro , Esteban A. Lopera-Maya , Kristi Läll , Marie-Julie Favé , Juulia J. Partanen , Sinéad B. Chapman , Juha Karjalainen , Mitja Kurki , Mutaamba Maasha , Ben M. Brumpton , Sameer Chavan , Tzu-Ting Chen , Michelle Daya , Yi Ding , Yen-Chen A. Feng , Lindsay A. Guare , Christopher R. Gignoux , Sarah E. Gra-ham , Whitney E. Hornsby , Nathan Ingold , Said I. Ismail , Ruth Johnson , Triin Laisk , Kuang Lin , Jun Lv , Iona Y. Millwood , Sonia Moreno-Grau , Kisung Nam , Priit Palta , Anita Pandit , Michael H. Preuss , Chadi Saad , Shefali Setia-Verma , Unnur Thorsteinsdottir , Jasmina Uzunovic , Anurag Verma , Matthew Zawistowski , Xue Zhong , Nahla Afifi , Kawthar M. Al-Dabhani , Asma Al Thani , Yuki Bradford , Archie Campbell , Kristy Crooks , Geertruida H. de Bock , Scott M. Damrauer , Nicholas J. Douville , Sarah Finer , Lars G. Fritsche , Eleni Fthenou , Gilberto Gonzalez-Arroyo , Christopher J. Griffiths , Yu Guo , Karen A. Hunt , Alexander Ioannidis , Nomdo M. Jansonius , Takahiro Konuma , Ming Ta Michael Lee , Arturo Lopez-Pineda , Yuta Matsuda , Riccardo E. Marioni , Babak Moatamed , Marco A. Nava-Aguilar , Ken-suke Numakura , Snehal Patil , Nicholas Rafaels , Anne Richmond , Agustin Rojas-Muñoz , Jonathan A. Shortt , Peter Straub , Ran Tao , Brett Vanderwerff , Manvi Vernekar , Yogasudha Veturi , Kathleen C. Barnes , Marike Boezen , Zhengming Chen , Chia-Yen Chen , Judy Cho , George Davey Smith , Hilary K. Finucane , Lude Franke , Eric R. Gamazon , Andrea Ganna , Tom R. Gaunt , Tian Ge , Hailiang Huang , Jennifer Huffman , Nicholas Katsanis , Jukka T. Koskela , Clara Lajonchere , Matthew H. Law , Liming Li , Cecilia M. Lindgren , Ruth J. F. Loos , Stuart MacGregor , Koichi Matsuda , Catherine M. Olsen , David J. Porteous , Jordan A. Shavit , Harold Snieder , Tomohiro Takano , Richard C. Trembath , Judith M. Vonk , David C. Whiteman , Stephen J. Wicks , Cisca Wijmenga , John Wright , Jie Zheng , Xiang Zhou , Philip Awadalla , Michael Boehnke , Carlos D. Bustamante , Nancy J. Cox , Segun Fatumo , Daniel H. Geschwind , Caroline Hayward , Kristian Hveem , Eimear E. Kenny , Seunggeun Lee , Yen-Feng Lin , Hamdi Mbarek , Reedik Mägi , Hilary C. Martin , Sarah E. Medland , Yukinori Okada , Aarno V. Palotie , Bogdan Pasa-niuc , Daniel J. Rader , Marylyn D. Ritchie , Serena Sanna , Jordan W. Smoller , Kari Stefansson , David A. van Heel , Robin G. Walters , Sebastian Zöllner , Alicia R. Martin , Cristen J. Willer , Mark J. Daly , and Benjamin M. Neale . Global Biobank Meta-analysis Initiative: Powering genetic discovery across human disease . Cell Genomics , 2 ( 10 ), October 2022 . Publisher: Elsevier . [17]. ↵ Peter Kraft , Eleftheria Zeggini , and John P. A. Ioannidis . Replication in Genome-Wide Association Studies . Statistical Science , 24 ( 4 ): 561 – 573 , November 2009 . Publisher: Institute of Mathematical Statistics . OpenUrl CrossRef PubMed Web of Science [18]. ↵ Ruth Heller and Daniel Yekutieli . Replicability analysis for genome-wide association studies . The Annals of Applied Statistics , 8 ( 1 ): 481 – 498 , March 2014 . Publisher: Institute of Mathematical Statistics . OpenUrl [19]. ↵ Ryan Sun , McCaw , Zachary R. , and Xihong Lin . Testing a Large Number of Composite Null Hy-potheses Using Conditionally Symmetric Multidimensional Gaussian Mixtures in Genome-Wide Stud-ies . Journal of the American Statistical Association , 0 ( 0 ): 1 – 13 . Publisher: ASA Website eprint : doi: 10.1080/01621459.2024.2422124 . OpenUrl CrossRef [20]. ↵ John Michael Gaziano , John Concato , Mary Brophy , Louis Fiore , Saiju Pyarajan , James Breeling , Stacey Whitbourne , Jennifer Deen , Colleen Shannon , Donald Humphries , Peter Guarino , Mihaela Aslan , Daniel Anderson , Rene LaFleur , Timothy Hammond , Kendra Schaa , Jennifer Moser , Grant Huang , Sumitra Muralidhar , Ronald Przygodzki , and Timothy J. O’Leary . Million Veteran Program: A mega-biobank to study genetic influences on health and disease . Journal of Clinical Epidemiology , 70 : 214 – 223 , February 2016 . OpenUrl CrossRef PubMed [21]. ↵ Clare Bycroft , Colin Freeman , Desislava Petkova , Gavin Band , Lloyd T. Elliott , Kevin Sharp , Allan Motyer , Damjan Vukcevic , Olivier Delaneau , Jared O’Connell , Adrian Cortes , Samantha Welsh , Alan Young , Mark Effingham , Gil McVean , Stephen Leslie , Naomi Allen , Peter Donnelly , and Jonathan Marchini . The UK Biobank resource with deep phenotyping and genomic data . Nature , 562 ( 7726 ): 203 – 209 , October 2018 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed [22]. ↵ Yen-Tsung Huang . Genome-wide analyses of sparse mediation effects under composite null hypothe-ses . The Annals of Applied Statistics , 13 ( 1 ): 60 – 84 , March 2019 . Publisher: Institute of Mathematical Statistics . OpenUrl [23]. ↵ Bradley Efron . Microarrays, Empirical Bayes and the Two-Groups Model . Statistical Science , 23 ( 1 ): 1 – 22 , February 2008 . Publisher: Institute of Mathematical Statistics . OpenUrl CrossRef Web of Science [24]. ↵ Reinier Snetselaar , Matthijs F. M. van Oosterhout , Jan C. Grutters , and Coline H. M. van Moorsel . Telomerase Reverse Transcriptase Polymorphism rs2736100: A Balancing Act between Cancer and Non-Cancer Disease, a Meta-Analysis . Frontiers in Medicine , 5 , February 2018 . Publisher: Frontiers . [25]. ↵ Krista A. Zanetti , Zhaoming Wang , Melinda Aldrich , Christopher I. Amos , William J. Blot , Elise D. Bowman , Laurie Burdette , Qiuyin Cai , Neil Caporaso , Charles C. Chung , Elizabeth M. Gillanders , Christopher A. Haiman , Helen M. Hansen , Brian E. Henderson , Laurence N. Kolonel , Loic Le Marchand , Shengchao Li , Lorna Haughton McNeill , Bríd M. Ryan , Ann G. Schwartz , Jennette D. Sison , Margaret R. Spitz , Margaret Tucker , Angela S. Wenzlaff , John K. Wiencke , Lynne Wilkens , Margaret R. Wrensch , Xifeng Wu , Wei Zheng , Weiyin Zhou , David Christiani , Julie R. Palmer , Trevor M. Penning , Alyssa G. Rieber , Lynn Rosenberg , Edward A. Ruiz-Narvaez , Li Su , Anil Vachani , Yongyue Wei , Alexander S. Whitehead , Stephen J. Chanock , and Curtis C. Harris . Genome-wide association study confirms lung cancer susceptibility loci on chromosomes 5p15 and 15q25 in an African-American population . Lung Cancer (Amsterdam, Netherlands) , 98 : 33 – 42 , August 2016 . OpenUrl CrossRef PubMed [26]. ↵ Porat M. Erlich , Stuart N. Hoffman , Margaret Rukstalis , John J. Han , Xin Chu , W. H. Linda Kao , Glenn S. Gerhard , Walter F. Stewart , and Joseph A. Boscarino . Nicotinic acetylcholine receptor genes on chromosome 15q25.1 are associated with nicotine and opioid dependence severity . Human Genetics , 128 ( 5 ): 491 – 499 , November 2010 . OpenUrl CrossRef PubMed [27]. ↵ Nancy L. Saccone , Robert C. Culverhouse , Tae-Hwi Schwantes-An , Dale S. Cannon , Xiangning Chen , Sven Cichon , Ina Giegling , Shizhong Han , Younghun Han , Kaisu Keskitalo-Vuokko , Xiangyang Kong , Maria Teresa Landi , Jennie Z. Ma , Susan E. Short , Sarah H. Stephens , Victoria L. Stevens , Lingwei Sun , Yufei Wang , Angela S. Wenzlaff , Steven H. Aggen , Naomi Breslau , Peter Broderick , Nilanjan Chatter-jee , Jingchun Chen , Andrew C. Heath , Markku Helïovaara , Nicole R. Hoft , David J. Hunter , Majken K. Jensen , Nicholas G. Martin , Grant W. Montgomery , Tianhua Niu , Thomas J. Payne , Leena Peltonen , Michele L. Pergadia , John P. Rice , Richard Sherva , Margaret R. Spitz , Juzhong Sun , Jen C. Wang , Robert B. Weiss , William Wheeler , Stephanie H. Witt , Bao-Zhu Yang , Neil E. Caporaso , Marissa A. Ehringer , Tim Eisen , Susan M. Gapstur , Joel Gelernter , Richard Houlston , Jaakko Kaprio , Kenneth S. Kendler , Peter Kraft , Mark F. Leppert , Ming D. Li , Pamela A. F. Madden , Markus M. Nöthen , Sreeku-mar Pillai , Marcella Rietschel , Dan Rujescu , Ann Schwartz , Christopher I. Amos , and Laura J. Bierut . Multiple Independent Loci at Chromosome 15q25.1 Affect Smoking Quantity: a Meta-Analysis and Comparison with Lung Cancer and COPD . PLoS Genetics , 6 ( 8 ): e1001053 , August 2010 . OpenUrl [28]. ↵ Margaret R. Spitz , Christopher I. Amos , Qiong Dong , Jie Lin , and Xifeng Wu . The CHRNA5-A3 region on chromosome 15q24-25.1 is a risk factor both for nicotine dependence and for lung cancer . Journal of the National Cancer Institute , 100 ( 21 ): 1552 – 1556 , November 2008 . OpenUrl CrossRef PubMed [29]. ↵ Guangfu Jin , Meng Zhu , Rong Yin , Wei Shen , Jia Liu , Jie Sun , Cheng Wang , Juncheng Dai , Hongxia Ma , Chen Wu , Zhihua Yin , Jiaqi Huang , Brandon W. Higgs , Lin Xu , Yihong Yao , David C. Christiani , Christopher I. Amos , Zhibin Hu , Baosen Zhou , Yongyong Shi , Dongxin Lin , and Hongbing Shen . Low-Frequency Coding Variants at 6p21.33 and 20q11.21 Are Associated with Lung Cancer Risk in Chinese Populations . American Journal of Human Genetics , 96 ( 5 ): 832 – 840 , May 2015 . OpenUrl CrossRef PubMed [30]. ↵ Nicholas McGranahan , Rachel Rosenthal , Crispin T. Hiley , Andrew J. Rowan , Thomas B. K. Watkins , Gareth A. Wilson , Nicolai J. Birkbak , Selvaraju Veeriah , Peter Van Loo , Javier Herrero , Charles Swan-ton , Mariam Jamal-Hanjani , Selvaraju Veeriah , Seema Shafi , Justyna Czyzewska-Khan , Diana Johnson , Joanne Laycock , Leticia Bosshard-Carter , Rachel Rosenthal , Pat Gorman , Robert E. Hynds , Gareth Wilson , Nicolai J. Birkbak , Thomas B. K. Watkins , Nicholas McGranahan , Stuart Horswell , Richard Mitter , Mickael Escudero , Aengus Stewart , Peter Van Loo , Andrew Rowan , Hang Xu , Samra Tura-jlic , Crispin Hiley , Christopher Abbosh , Jacki Goldman , Richard Kevin Stone , Tamara Denner , Nik Matthews , Greg Elgar , Sophia Ward , Marta Costa , Sharmin Begum , Ben Phillimore , Tim Chambers , Emma Nye , Sofia Graca , Maise Al Bakir , Kroopa Joshi , Andrew Furness , Assma Ben Aissa , Yien Ning Sophia Wong , Andy Georgiou , Sergio Quezada , John A. Hartley , Helen L. Lowe , Javier Herrero , David Lawrence , Martin Hayward , Nikolaos Panagiotopoulos , Shyam Kolvekar , Mary Falzon , Elaine Borg , Teresa Marafioti , Celia Simeon , Gemma Hector , Amy Smith , Marie Aranda , Marco Novelli , Dah-mane Oukrif , Sam M. Janes , Ricky Thakrar , Martin Forster , Tanya Ahmad , Siow Ming Lee , Dionysis Papadatos-Pastos , Dawn Carnell , Ruheena Mendes , Jeremy George , Neal Navani , Asia Ahmed , Mag-ali Taylor , Junaid Choudhary , Yvonne Summers , Raffaele Califano , Paul Taylor , Rajesh Shah , Piotr Krysiak , Kendadai Rammohan , Eustace Fontaine , Richard Booton , Matthew Evison , Phil Crosbie , Stu-art Moss , Faiza Idries , Leena Joseph , Paul Bishop , Anshuman Chaturved , Anne Marie Quinn , Helen Doran , Angela Leek , Phil Harrison , Katrina Moore , Rachael Waddington , Juliette Novasio , Fiona Black-hall , Jane Rogan , Elaine Smith , Caroline Dive , Jonathan Tugwood , Ged Brady , Dominic G. Rothwell , Francesca Chemi , Jackie Pierce , Sakshi Gulati , Babu Naidu , Gerald Langman , Simon Trotter , Mary Bellamy , Hollie Bancroft , Amy Kerr , Salma Kadiri , Joanne Webb , Gary Middleton , Madava Djearaman , Dean Fennell , Jacqui A. Shaw , John Le Quesne , David Moore , Apostolos Nakas , Sridhar Rathinam , William Monteiro , Hilary Marshall , Louise Nelson , Jonathan Bennett , Joan Riley , Lindsay Primrose , Luke Martinson , Girija Anand , Sajid Khan , Anita Amadi , Marianne Nicolson , Keith Kerr , Shirley Palmer , Hardy Remmen , Joy Miller , Keith Buchan , Mahendran Chetty , Lesley Gomersall , Jason Lester , Alison Edwards , Fiona Morgan , Haydn Adams , Helen Davies , Malgorzata Kornaszewska , Richard At-tanoos , Sara Lock , Azmina Verjee , Mairead MacKenzie , Maggie Wilcox , Harriet Bell , Allan Hackshaw , Yenting Ngai , Sean Smith , Nicole Gower , Christian Ottensmeier , Serena Chee , Benjamin Johnson , Aiman Alzetani , Emily Shaw , Eric Lim , Paulo De Sousa , Monica Tavares Barbosa , Alex Bowman , Si-mon Jordan , Alexandra Rice , Hilgardt Raubenheimer , Chiara Proli , Maria Elena Cufari , John Carlo Ronquillo , Angela Kwayie , Harshil Bhayani , Morag Hamilton , Yusura Bakar , Natalie Mensah , Lyn Am-brose , Anand Devaraj , Silviu Buderi , Jonathan Finch , Leire Azcarate , Hema Chavan , Sophie Green , Hillaria Mashinga , Andrew G. Nicholson , Kelvin Lau , Michael Sheaff , Peter Schmid , John Conibear , Veni Ezhil , Babikir Ismail , Melanie Irvin-sellers , Vineet Prakash , Peter Russell , Teresa Light , Tracey Horey , Sarah Danson , Jonathan Bury , John Edwards , Jennifer Hill , Sue Matthews , Yota Kitsanta , Kim Suvarna , Patricia Fisher , Allah Dino Keerio , Michael Shackcloth , John Gosney , Pieter Postmus , Sarah Feeney , Julius Asante-Siaw , Hugo J. W. L. Aerts , Stefan Dentro , and Christophe Dessimoz . Allele-Specific HLA Loss and Immune Escape in Lung Cancer Evolution . Cell , 171 ( 6 ): 1259 – 1271 .e11, November 2017 . Publisher: Elsevier . OpenUrl CrossRef PubMed [31]. ↵ Jack Euesden , Cathryn M. Lewis , and Paul F. O’Reilly . PRSice: Polygenic Risk Score software . Bioin-formatics , 31 ( 9 ): 1466 – 1468 , May 2015 . OpenUrl CrossRef PubMed [32]. ↵ All of Us Research Program Investigators , Joshua C. Denny , Joni L. Rutter , David B. Goldstein , An-thony Philippakis , Jordan W. Smoller , Gwynne Jenkins , and Eric Dishman . The “All of Us” Research Program . The New England Journal of Medicine , 381 ( 7 ): 668 – 676 , August 2019 . OpenUrl CrossRef PubMed [33]. ↵ Samuel Khodursky , Nour Mimouni , and Michael G Levin . Recent developments in population biobanks and the genetic architecture of complex disease . Human Molecular Genetics , page ddaf036 , April 2025 . [34]. ↵ Monkol Lek , Konrad J. Karczewski , Eric V. Minikel , Kaitlin E. Samocha , Eric Banks , Timothy Fen-nell , Anne H. O’Donnell-Luria , James S. Ware , Andrew J. Hill , Beryl B. Cummings , Taru Tukiainen , Daniel P. Birnbaum , Jack A. Kosmicki , Laramie E. Duncan , Karol Estrada , Fengmei Zhao , James Zou , Emma Pierce-Hoffman , Joanne Berghout , David N. Cooper , Nicole Deflaux , Mark DePristo , Ron Do , Jason Flannick , Menachem Fromer , Laura Gauthier , Jackie Goldstein , Namrata Gupta , Daniel Howrigan , Adam Kiezun , Mitja I. Kurki , Ami Levy Moonshine , Pradeep Natarajan , Lorena Orozco , Gina M. Peloso , Ryan Poplin , Manuel A. Rivas , Valentin Ruano-Rubio , Samuel A. Rose , Douglas M. Ruderfer , Khalid Shakir , Peter D. Stenson , Christine Stevens , Brett P. Thomas , Grace Tiao , Maria T. Tusie-Luna , Ben Weisburd , Hong-Hee Won , Dongmei Yu , David M. Altshuler , Diego Ardissino , Michael Boehnke , John Danesh , Stacey Donnelly , Roberto Elosua , Jose C. Florez , Stacey B. Gabriel , Gad Getz , Stephen J. Glatt , Christina M. Hultman , Sekar Kathiresan , Markku Laakso , Steven McCarroll , Mark I. McCarthy , Dermot McGovern , Ruth McPherson , Benjamin M. Neale , Aarno Palotie , Shaun M. Purcell , Danish Saleheen , Jeremiah M. Scharf , Pamela Sklar , Patrick F. Sullivan , Jaakko Tuomilehto , Ming T. Tsuang , Hugh C. Watkins , James G. Wilson , Mark J. Daly , and Daniel G. MacArthur . Analysis of protein-coding genetic variation in 60,706 humans . Nature , 536 ( 7616 ): 285 – 291 , August 2016 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed Web of Science [35]. ↵ Leonard P. Freedman , Iain M. Cockburn , and Timothy S. Simcoe . The Economics of Reproducibility in Preclinical Research . PLOS Biology , 13 ( 6 ): e1002165 , June 2015 . Publisher: Public Library of Science . OpenUrl CrossRef PubMed [36]. ↵ Aroon D. Hingorani , Jasmine Gratton , Chris Finan , A. Floriaan Schmidt , Riyaz Patel , Reecha Sofat , Valerie Kuan , Claudia Langenberg , Harry Hemingway , Joan K. Morris , and Nicholas J. Wald . Perfor-mance of polygenic risk scores in screening, prediction, and risk stratification: secondary analysis of data in the Polygenic Score Catalog . BMJ Medicine , 2 ( 1 ), October 2023 . Publisher: BMJ Publishing Group Ltd . [37]. ↵ C. Glenn Begley and John P.A. Ioannidis . Reproducibility in Science . Circulation Research , 116 ( 1 ): 116 – 126 , January 2015 . Publisher: American Heart Association . OpenUrl Abstract / FREE Full Text [38]. ↵ Francis S. Collins and Lawrence A. Tabak . Policy: NIH plans to enhance reproducibility . Nature , 505 ( 7485 ): 612 – 613 , January 2014 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed Web of Science [39]. ↵ Nicholas J. Wald and Robert Old . The illusion of polygenic disease risk prediction . Genetics in Medicine: Official Journal of the American College of Medical Genetics , 21 ( 8 ): 1705 – 1707 , August 2019 . OpenUrl PubMed [40]. ↵ Alicia R. Martin , Masahiro Kanai , Yoichiro Kamatani , Yukinori Okada , Benjamin M. Neale , and Mark J. Daly . Clinical use of current polygenic risk scores may exacerbate health disparities . Nature Genetics , 51 ( 4 ): 584 – 591 , April 2019 . OpenUrl CrossRef PubMed [41]. ↵ Daniel McGuire , Yu Jiang , Mengzhen Liu , J. Dylan Weissenkampen , Scott Eckert , Lina Yang , Fang Chen , Arthur Berg , Scott Vrieze , Bibo Jiang , Robbee Wedow , Qunhua Li , and Dajiang J. Liu . Model-based assessment of replicability for genome-wide association meta-analysis . Nature Communications , 12 ( 1 ): 1964 , March 2021 . Publisher: Nature Publishing Group . OpenUrl PubMed [42]. ↵ Ruth Heller , Shay Yaacoby , and Daniel Yekutieli. repfdr: a tool for replicability analysis for genome-wide association studies . Bioinformatics , 30 ( 20 ): 2971 – 2972 , October 2014 . OpenUrl CrossRef PubMed [43]. ↵ David Amar , Ron Shamir , and Daniel Yekutieli . Extracting replicable associations across multiple studies: Empirical Bayes algorithms for controlling the false discovery rate . PLOS Computational Biology , 13 ( 8 ): e1005700 , August 2017 . Publisher: Public Library of Science . OpenUrl [44]. ↵ Kazuyoshi Ishigaki , Saori Sakaue , Chikashi Terao , Yang Luo , Kyuto Sonehara , Kensuke Yamaguchi , Tiffany Amariuta , Chun Lai Too , Vincent A. Laufer , Ian C. Scott , Sebastien Viatte , Meiko Taka-hashi , Koichiro Ohmura , Akira Murasawa , Motomu Hashimoto , Hiromu Ito , Mohammed Hammoudeh , Samar Al Emadi , Basel K. Masri , Hussein Halabi , Humeira Badsha , Imad W. Uthman , Xin Wu , Li Lin , Ting Li , Darren Plant , Anne Barton , Gisela Orozco , Suzanne M. M. Verstappen , John Bowes , Alexan-der J. MacGregor , Suguru Honda , Masaru Koido , Kohei Tomizuka , Yoichiro Kamatani , Hiroaki Tanaka , Eiichi Tanaka , Akari Suzuki , Yuichi Maeda , Kenichi Yamamoto , Satoru Miyawaki , Gang Xie , Jinyi Zhang , Christopher I. Amos , Edward Keystone , Gertjan Wolbink , Irene van der Horst-Bruinsma , Jing Cui , Katherine P. Liao , Robert J. Carroll , Hye-Soon Lee , So-Young Bang , Katherine A. Siminovitch , Niek de Vries , Lars Alfredsson , Solbritt Rantapää-Dahlqvist , Elizabeth W. Karlson , Sang-Cheol Bae , Robert P. Kimberly , Jeffrey C. Edberg , Xavier Mariette , Tom Huizinga , Philippe Dieudé , Matthias Schneider , Martin Kerick , Joshua C. Denny , Koichi Matsuda , Keitaro Matsuo , Tsuneyo Mimori , Fumi-hiko Matsuda , Keishi Fujio , Yoshiya Tanaka , Atsushi Kumanogoh , Matthew Traylor , Cathryn M. Lewis , Stephen Eyre , Huji Xu , Richa Saxena , Thurayya Arayssi , Yuta Kochi , Katsunori Ikari , Masayoshi Hari-gai , Peter K. Gregersen , Kazuhiko Yamamoto , S. Louis Bridges , Leonid Padyukov , Javier Martin , Lars Klareskog , Yukinori Okada , and Soumya Raychaudhuri . Multi-ancestry genome-wide association anal-yses identify novel genetic mechanisms in rheumatoid arthritis . Nature Genetics , 54 ( 11 ): 1640 – 1651 , November 2022 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed [45]. ↵ Satoshi Koyama , Kaoru Ito , Chikashi Terao , Masato Akiyama , Momoko Horikoshi , Yukihide Momozawa , Hiroshi Matsunaga , Hirotaka Ieki , Kouichi Ozaki , Yoshihiro Onouchi , Atsushi Takahashi , Seitaro No-mura , Hiroyuki Morita , Hiroshi Akazawa , Changhoon Kim , Jeong-sun Seo , Koichiro Higasa , Motoki Iwasaki , Taiki Yamaji , Norie Sawada , Shoichiro Tsugane , Teruhide Koyama , Hiroaki Ikezaki , Naoyuki Takashima , Keitaro Tanaka , Kokichi Arisawa , Kiyonori Kuriki , Mariko Naito , Kenji Wakai , Shinichiro Suna , Yasuhiko Sakata , Hiroshi Sato , Masatsugu Hori , Yasushi Sakata , Koichi Matsuda , Yoshinori Mu-rakami , Hiroyuki Aburatani , Michiaki Kubo , Fumihiko Matsuda , Yoichiro Kamatani , and Issei Komuro . Population-specific and trans-ancestry genome-wide analyses identify distinct and shared genetic risk loci for coronary artery disease . Nature Genetics , 52 ( 11 ): 1169 – 1177 , November 2020 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed [46]. ↵ Ming-Huei Chen , Laura M. Raffield , Abdou Mousas , Saori Sakaue , Jennifer E. Huffman , Arden Moscati , Bhavi Trivedi , Tao Jiang , Parsa Akbari , Dragana Vuckovic , Erik L. Bao , Xue Zhong , Regina Manansala , Véronique Laplante , Minhui Chen , Ken Sin Lo , Huijun Qian , Caleb A. Lareau , Mélissa Beaudoin , Karen A. Hunt , Masato Akiyama , Traci M. Bartz , Yoav Ben-Shlomo , Andrew Beswick , Jette Bork- Jensen , Erwin P. Bottinger , Jennifer A. Brody , Frank J. A. van Rooij , Kumaraswamynaidu Chitrala , Kelly Cho , Hélène Choquet , Adolfo Correa , John Danesh , Emanuele Di Angelantonio , Niki Dimou , Jingzhong Ding , Paul Elliott , Tõnu Esko , Michele K. Evans , James S. Floyd , Linda Broer , Niels Grarup , Michael H. Guo , Andreas Greinacher , Jeff Haessler , Torben Hansen , Joanna M. M. Howson , Qin Qin Huang , Wei Huang , Eric Jorgenson , Tim Kacprowski , Mika Kähönen , Yoichiro Kamatani , Masahiro Kanai , Savita Karthikeyan , Fotis Koskeridis , Leslie A. Lange , Terho Lehtimäki , Markus M. Lerch , Allan Linneberg , Yongmei Liu , Leo-Pekka Lyytikäinen , Ani Manichaikul , Hilary C. Martin , Koichi Matsuda , Karen L. Mohlke , Nina Mononen , Yoshinori Murakami , Girish N. Nadkarni , Matthias Nauck , Kjell Nikus , Willem H. Ouwehand , Nathan Pankratz , Oluf Pedersen , Michael Preuss , Bruce M. Psaty , Olli T. Raitakari , David J. Roberts , Stephen S. Rich , Benjamin A. T. Rodriguez , Jonathan D. Rosen , Jerome I. Rotter , Petra Schubert , Cassandra N. Spracklen , Praveen Surendran , Hua Tang , Jean-Claude Tardif , Richard C. Trembath , Mohsen Ghanbari , Uwe Völker , Henry Völzke , Nicholas A. Watkins , Alan B. Zonderman , Peter W. F. Wilson , Yun Li , Adam S. Butterworth , Jean-François Gauchat , Charleston W. K. Chiang , Bingshan Li , Ruth J. F. Loos , William J. Astle , Evangelos Evangelou , David A. van Heel , Vijay G. Sankaran , Yukinori Okada , Nicole Soranzo , Andrew D. Johnson , Alexander P. Reiner , Paul L. Auer , and Guillaume Lettre . Trans-ethnic and Ancestry-Specific Blood-Cell Genetics in 746,667 Individuals from 5 Global Populations . Cell , 182 ( 5 ): 1198 – 1213 .e14, September 2020 . OpenUrl CrossRef PubMed [47]. ↵ Corbin Quick , Pramod Anugu , Solomon Musani , Scott T. Weiss , Esteban G. Burchard , Marquitta J. White , Kevin L. Keys , Francesco Cucca , Carlo Sidore , Michael Boehnke , and Christian Fuchs-berger . Sequencing and imputation in GWAS: Cost-effective strategies to increase power and ge-nomic coverage across diverse populations . Genetic Epidemiology , 44 ( 6 ): 537 – 549 , 2020 . eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/gepi.22326 . OpenUrl CrossRef PubMed [48]. ↵ Dominic M. H. Tong and Ryan D. Hernandez . Population genetic simulation study of power in as-sociation testing across genetic architectures and study designs . Genetic Epidemiology , 44 ( 1 ): 90 – 103 , January 2020 . OpenUrl PubMed [49]. ↵ Joelle Mbatchou , Leland Barnard , Joshua Backman , Anthony Marcketta , Jack A. Kosmicki , Andrey Ziyatdinov , Christian Benner , Colm O’Dushlaine , Mathew Barber , Boris Boutkov , Lukas Habegger , Manuel Ferreira , Aris Baras , Jeffrey Reid , Goncalo Abecasis , Evan Maxwell , and Jonathan Marchini . Computationally efficient whole-genome regression for quantitative and binary traits . Nature Genetics , 53 ( 7 ): 1097 – 1103 , July 2021 . OpenUrl CrossRef PubMed [50]. ↵ Cristen J. Willer , Yun Li , and Gonçalo R. Abecasis . METAL: fast and efficient meta-analysis of genomewide association scans . Bioinformatics , 26 ( 17 ): 2190 – 2191 , September 2010 . OpenUrl CrossRef PubMed Web of Science [51]. ↵ Hufeng Zhou , Theodore Arapoglou , Xihao Li , Zilin Li , Xiuwen Zheng , Jill Moore , Abhijith Asok , Sushant Kumar , Elizabeth E Blue , Steven Buyske , Nancy Cox , Adam Felsenfeld , Mark Gerstein , Eimear Kenny , Bingshan Li , Tara Matise , Anthony Philippakis , Heidi L Rehm , Heidi J Sofia , Grace Snyder , NHGRI Genome Sequencing Program Variant Functional Annotation Working Group , Zhiping Weng , Benjamin Neale , Shamil R Sunyaev , and Xihong Lin . FAVOR: functional annotation of variants online resource and annotator for variation across the human genome . Nucleic Acids Research , 51 ( D1 ): D1300 – D1311 , January 2023 . OpenUrl CrossRef PubMed [52]. ↵ Menno P. Creyghton , Albert W. Cheng , G. Grant Welstead , Tristan Kooistra , Bryce W. Carey , Eve-line J. Steine , Jacob Hanna , Michael A. Lodato , Garrett M. Frampton , Phillip A. Sharp , Laurie A. Boyer , Richard A. Young , and Rudolf Jaenisch . Histone H3K27ac separates active from poised enhancers and predicts developmental state . Proceedings of the National Academy of Sciences , 107 ( 50 ): 21931 – 21936 , December 2010 . Publisher: Proceedings of the National Academy of Sciences . OpenUrl Abstract / FREE Full Text [53]. ↵ Alvaro Rada-Iglesias , Ruchi Bajpai , Tomek Swigut , Samantha A. Brugmann , Ryan A. Flynn , and Joanna Wysocka . A unique chromatin signature uncovers early developmental enhancers in humans . Nature , 470 ( 7333 ): 279 – 283 , February 2011 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed Web of Science [54]. ↵ Adam Siepel , Gill Bejerano , Jakob S. Pedersen , Angie S. Hinrichs , Minmei Hou , Kate Rosenbloom , Hiram Clawson , John Spieth , LaDeana W. Hillier , Stephen Richards , George M. Weinstock , Richard K. Wilson , Richard A. Gibbs , W. James Kent , Webb Miller , and David Haussler . Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes . Genome Research , 15 ( 8 ): 1034 – 1050 , August 2005 . Company: Cold Spring Harbor Laboratory Press Distributor: Cold Spring Harbor Laboratory Press Institution: Cold Spring Harbor Laboratory Press Label: Cold Spring Harbor Laboratory Press Publisher: Cold Spring Harbor Lab . OpenUrl Abstract / FREE Full Text [55]. ↵ Katherine S. Pollard , Melissa J. Hubisz , Kate R. Rosenbloom , and Adam Siepel . Detection of non-neutral substitution rates on mammalian phylogenies . Genome Research , 20 ( 1 ): 110 – 121 , January 2010 . Company: Cold Spring Harbor Laboratory Press Distributor: Cold Spring Harbor Laboratory Press In-stitution: Cold Spring Harbor Laboratory Press Label: Cold Spring Harbor Laboratory Press Publisher: Cold Spring Harbor Lab . OpenUrl Abstract / FREE Full Text View the discussion thread. Back to top Previous Next Posted October 03, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Formal Statistical Replication Analysis in Lung Cancer Genome-Wide Association Studies Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Formal Statistical Replication Analysis in Lung Cancer Genome-Wide Association Studies Yung-Han Chang , Jinyoung Byun , Bryan R. Gorman , Rayjean J. Hung , James D. McKay , Christopher I. Amos , Saiju Pyarajan , Arjun Bhattacharya , Ryan Sun medRxiv 2025.10.02.25337130; doi: https://doi.org/10.1101/2025.10.02.25337130 Share This Article: Copy Citation Tools Formal Statistical Replication Analysis in Lung Cancer Genome-Wide Association Studies Yung-Han Chang , Jinyoung Byun , Bryan R. Gorman , Rayjean J. Hung , James D. McKay , Christopher I. Amos , Saiju Pyarajan , Arjun Bhattacharya , Ryan Sun medRxiv 2025.10.02.25337130; doi: https://doi.org/10.1101/2025.10.02.25337130 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffc24dc394941e2',t:'MTc3OTQ1NjI5Ng=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00