Multi-omics integration predicts 17 disease incidences in the UK Biobank

doi:10.1101/2025.08.01.25332841

Multi-omics integration predicts 17 disease incidences in the UK Biobank

2025 · doi:10.1101/2025.08.01.25332841

preprint OA: closed CC-BY-NC-4.0

📄 Open PDF Full text JSON View at publisher

Full text 53,482 characters · extracted from preprint-html · click to expand

Multi-omics integration predicts 17 disease incidences in the UK Biobank | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Multi-omics integration predicts 17 disease incidences in the UK Biobank View ORCID Profile Jiawen Du , Muqing Zhou , View ORCID Profile Laura M. Raffield , Ruihai Zhou , Yun Li , Can Chen , View ORCID Profile Quan Sun doi: https://doi.org/10.1101/2025.08.01.25332841 Jiawen Du 1 Department of Biostatistics, University of North Carolina at Chapel Hill , Chapel Hill, NC, USA MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jiawen Du Muqing Zhou 2 Department of Genetics, University of North Carolina at Chapel Hill , Chapel Hill, NC, USA MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Laura M. Raffield 2 Department of Genetics, University of North Carolina at Chapel Hill , Chapel Hill, NC, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Laura M. Raffield Ruihai Zhou 3 Division of Cardiology, University of North Carolina at Chapel Hill , Chapel Hill, NC, USA MSc, MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yun Li 1 Department of Biostatistics, University of North Carolina at Chapel Hill , Chapel Hill, NC, USA 2 Department of Genetics, University of North Carolina at Chapel Hill , Chapel Hill, NC, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Can Chen 1 Department of Biostatistics, University of North Carolina at Chapel Hill , Chapel Hill, NC, USA 4 Carolina Health Informatics Program, University of North Carolina , Chapel Hill, NC, USA 5 School of Data Science and Society, University of North Carolina , Chapel Hill, NC, USA 6 Department of Mathematics, University of North Carolina , Chapel Hill, NC, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Quan Sun 1 Department of Biostatistics, University of North Carolina at Chapel Hill , Chapel Hill, NC, USA 7 Department of Pathology and Laboratory Medicine, University of Pennsylvania , Philadelphia, PA, USA 8 Center for Computational and Genomic Medicine, Children’s Hospital of Philadelphia , Philadelphia, PA, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Quan Sun For correspondence: sunq{at}chop.edu Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Importance Traditional clinical predictors for disease risks have limitations in capturing underlying disease complexity. Multi-omics technologies, such as metabolomics and proteomics, offer deeper molecular perspectives that could enhance risk prediction, but large-scale studies integrating the two omics are scarce. Objectives The primary objective is to systematically evaluate whether adding metabolomics and/or proteomics data to traditional clinical predictors improves risk prediction for 17 common incident diseases. A secondary objective is to identify key disease-related omics features. Data Sources and Participants Our study incorporated 23,776 UK Biobank participants who had complete baseline omics data for 159 NMR-based metabolites and 2,923 Olink affinity-based proteins. Main Outcomes and Measures We evaluated the model prediction of 17 incident diseases by fitting Cox proportional hazard models and obtaining Harrell’s C-index. Feature importance scores were calculated to identify key molecules contributing to each disease risk prediction. Results Adding omics data significantly improved risk prediction for all 17 diseases compared to models with clinical predictors alone (p-value < 2E-4). Proteomics-only models generally demonstrated superior predictive performance over metabolomics-only models for 14 of the 17 endpoints. We also identified key proteins, including established biomarkers like KLK3 (PSA) for prostate cancer and CRYBB2 for cataracts. Conclusion and Relevance Integration of Olink proteomics, and to a lesser extent Nightingale metabolomics, substantially improves risk prediction for a wide range of common diseases beyond established clinical factors. These findings highlight the clinical utility of proteomics for enhancing individual risk prediction and provide molecular insights into disease mechanisms, which may potentially guide future therapeutic development. Question Do multi-omics profiles improve disease risk prediction compared to models using only traditional clinical risk factors and what is the best strategy to integrate metabolomics and proteomics in disease prediction? Findings In this study, we investigated 17 incident diseases across 23,776 UK Biobank individuals with complete records of both Nightingale metabolomics and Olink proteomics profiles, and found that integrating omics data significantly enhanced disease prediction over traditional approaches, with Olink proteomics consistently providing more predictive power than Nightingale metabolomics for most diseases. We also identified key proteins, including both well-established ones like KLK3 (PSA) for prostate cancer and potential novel ones like PRG3 for skin cancer. We also connected diseases with medication, socioeconomic, demographic, and lifestyle risk factors through these key proteins. Meaning Our findings suggest the potential clinical utility of integrating multi-omics in risk prediction and biomedical discoveries. To the best of our knowledge, our study is currently the largest to systematically evaluate contributions of both metabolomics and proteomics profiles to the prediction of various incident clinical endpoints. Introduction Effective risk stratification is fundamental to the prevention, early detection, and management of various diseases 1 . Clinical risk assessment of disease outcome has primarily relied on established risk factors 2 , including demographic attributes, routine laboratory measurements, and sometimes lifestyle behaviors. These traditional predictors, while valuable, may not capture the underlying complex disease mechanisms. Recent advances in high-throughput omics technologies have significantly enhanced our ability to characterize biological processes at the molecular level, thereby holding the potential for improving risk assessment, disease prediction, and personalized interventions 3 – 5 . Metabolomics has emerged as a robust and cost-effective method to quantify metabolites circulating in the blood. Previous studies have demonstrated the potential clinical utility of nuclear magnetic resonance (NMR)-based metabolomics, establishing their additive values beyond conventional predictors in predicting various clinical endpoints 6 – 9 . Similarly, proteomics, measuring proteins in various sample types including biofluids and tissues, has also been utilized to understand disease mechanisms and improve clinical risk predictions 10 – 12 . The UK Biobank has played a key role in exploring such disease-omics relationships at a large scale 9 , 10 . However, studies to comprehensively investigate the prediction power integrating these two omics remain scarce. Leveraging these complementary omics profiles holds significant promise to enhance disease prediction in clinical practice, potentially leading to more accurate diagnoses, refined prognostic prediction, and ultimately more personalized treatment strategies 13 . To fill in the gap, we leveraged 159 metabolites (NMR based Nightingale platform) and 2,923 proteins (Affinity-based Olink 3k proteomics) measured in 23,776 individuals from the UK Biobank (UKB) to systematically evaluate whether incorporating metabolomics and proteomics data enhances prediction of 17 incident disease endpoints compared to traditional clinical predictors alone. Observing that proteomics provided robust risk prediction across most of the diseases, we further investigated top contributing proteins and identified enriched demographic, medical and socioeconomic protein correlates for each disease, leveraging external results 14 . Our study not only develops better risk prediction models, but also provides insights into a wide range of disease risk factors. Methods Omics Data and Covariates Imputation The UKB metabolomics data, collected from the targeted Nightingale NMR based platform, were measured in 275,241 participants 9 . Proteomics profiling was conducted using Olink on blood plasma samples from 54,219 individuals 15 . We performed quality control (QC) on both omics ( Supplemental Methods ) and included 23,776 individuals as our study participants who had complete records of 159 metabolites and 2,923 proteins available ( eTables 1-2 ). We considered three nested baseline clinical predictor sets following a previous study 7 : (1) age+sex; (2) ASCVD, a set of cardiovascular-related predictors, and (3) PANEL, a more comprehensive set of clinical predictors including demographic, lifestyle, and selected laboratory measurements ( Figure 1a , eTable 3 ). We assumed missing covariates were occurring at random within each recruitment center, and performed multiple imputations using chained equations (MICE) 16 , 17 with random forest separately for each recruitment center ( Supplemental Methods ). Download figure Open in new tab Figure 1. Study overview. a. Overview of the three baseline predictor sets. Three nested baseline clinical predictor sets were considered in this study: (1) age+sex; (2) ASCVD, a set of commonly used cardiovascular-related predictors, and (3) PANEL, a more comprehensive set of clinical predictors including demographic, lifestyle, and selected laboratory measurements. Details of (2) and (3) are included in eTable 3 . b. Overview of the study design. This study included 23,776 UKB individuals with complete records of 159 metabolites and 2,923 proteins. Within this cohort, we first adjusted for one baseline clinical predictor set (X) and obtained martingale residuals using CoxPH models for each endpoint (step 1). We did both inner and outer five-fold stratified CV. For the inner CV, we split the entire cohort into five folds separately for each trait to ensure an equal number of cases in each fold, yielding the prediction across all individuals using mixOmics. Models were evaluated by adding the predicted residuals back to the CoxPH model using Harrell’s C-index (step 2). The outer CV repeated this procedure five times with different random splits to assess variations in model performance (step 3). We also identified key omics features (i.e. metabolites and proteins) for each endpoint by estimating the feature importance scores (step 4). Survival Endpoints Definition and Covariate Effect Adjustment We investigated 17 endpoints, encompassing cancers, cardiovascular disorders, neurological disorders, systemic diseases, respiratory disorders, ophthalmic conditions, and musculoskeletal conditions, which were classified into 3 categories – cardiometabolic, cancers, and others ( eTable 4 ). These clinical endpoints were defined by ICD10 codes following a previous publication 7 . Participants diagnosed with the specific condition prior to the baseline assessment were excluded. We confirmed that each endpoint has an incidence rate exceeding 3% in the study cohort. Analysis for highly sex-differentiated endpoints (i.e., breast cancer and prostate cancer) was restricted to the respective populations. To control for covariates, we fitted Cox proportional hazard (PH) models for each outcome adjusting for the recruitment center and covariates. We calculated martingale residuals 18 ( Figure 1b , step 1 ), which represented the survival outcome after accounting for baseline predictor effects and were subsequently used as labels (prediction outcomes) for downstream modeling. Model Fitting, Evaluation, and Feature Importance Assessment We utilized the R package mixOmics 19 (the “block.spls” function) to perform a multivariate model training with omics data for outcome (martingale residuals) prediction. We specified 10 latent components while maintaining the default settings for the remaining parameters, and performed both inner and outer five-fold stratified cross-validation (CV). For the inner CV, we split the entire cohort into five folds separately for each trait to ensure an equal number of cases in each fold, yielding predictions across all individuals ( Figure 1b , step 2 ). The outer CV repeated this procedure five times with different random splits ( Figure 1b , step 3 ). We evaluated the prediction performance of three omics models, namely metabolomics-only, proteomics-only, and combined-omics, by fitting the Cox PH models again with the predicted residuals added to the model (i.e. endpoint ∼ baseline clinical predictor set + recruitment center + predicted residual). Finally, Harrell’s C-index was calculated and used as the evaluation metric ( Figure 1b , step 3 ). We also assessed important omics features contributing most to omics prediction models ( Figure 1b , step 4; Supplemental Methods ). Focusing on proteomics, we ranked all proteins by their normalized feature importance scores, separately for each endpoint and each baseline adjustment. Factors enriched with top contributing proteins To further explore the potential known drivers of variance in the identified disease-associated proteins, we queried enrichment results from an external study 14 linking proteins with multiple categories of factors, including basic demographics, medication usage, and socioeconomic factors. For each disease, we selected key proteins with an absolute normalized importance score (z-score) > 1.96 in our models, corresponding to a two-tailed p-value < 0.05, from models with the comprehensive PANEL baseline adjustment. For our enrichment queries, significance was defined as false discovery rate (FDR) 1. Results Characteristics of the study cohort The final study cohort comprises 23,776 UKB participants with complete metabolomics and proteomics profiles. The mean age at enrollment was 56.9 years, ranging from 39 to 70, with 10,870 (45.7%) males. Observed incidence for the 17 investigated diseases varied, ranging from 3.0% for Dementia to 14.6% for Cataracts ( Table 1 ). View this table: View inline View popup Table 1. Characteristics of the study cohort. Enhanced prediction accuracy including omics profiles We first evaluated whether integrating metabolomics and/or proteomics offered improved prediction power over baseline models based solely on one of the three clinical predictor sets ( Methods ). Across all the 17 traits, adding omics data (metabolomics, proteomics, or their combination) to any baseline consistently yielded statistically significant improvement in predictive performance ( Figure 2 , eTable 5 ), with mean C-index increment of 0.063, 0.034 and 0.030 compared to the three baseline models, respectively (p-values ranging from 3.5E-18 to 2.1E-4). For example, compared to the baseline PANEL model, adding proteomics alone resulted in a C-index increment from 0.660 to 0.750 for prostate cancer (p-value = 1.36E-7, two-sided t-test), indicating the value of omics data beyond clinical risk factors. Download figure Open in new tab Figure 2. Absolute C-index for all 17 investigated endpoints. Each dot represents the mean C-index in the outer 5-fold CV with error bars indicating one standard deviation (SD). For baseline models, C-index was calculated from CoxPH models only including baseline predictors. For omics models, C-index was calculated by adding the predicted martingale residuals back to the baseline CoxPH model. Comparison between proteomics, metabolomics, and combined models Comparing the individual contributions of these two omics, proteomics profiles generally showed superior predictive capability. Specifically, with PANEL baseline predictors, for 14 out of 17 traits (except for glaucoma, type 2 diabetes and breast cancer), proteomics-only models achieved significantly higher C-index values than metabolomics-only models (mean C-index difference 0.019, p-value = 1.22E-3, Figure 2 ). As an example, asthma consistently showed the greatest advantages of proteomics-only models over metabolomics-only models, with C-index differences of 0.081, 0.078 and 0.060 (p-values = 1.72E-12, 8.07E-14 and 2.90E-8) across age+sex, ASCVD and PANEL clinical predictors, respectively. Note that the number of measured proteins (n = 2,923) is substantially larger than metabolites (n = 159), which likely explains the larger contribution of proteomics models to risk predictions. Caution is needed to generalize this conclusion which may be specific to the diseases under study, and to the specific proteomics and metabolomics platforms used. Compared to single-omics models, performance of the combined models showed variations across traits and baseline predictors, with only renal disease and chronic obstructive pulmonary disease (COPD) demonstrating consistently significant improvements across all three baseline sets ( Figure 3 ). For other diseases, including both omics outperforms single-omics models for 9 of 15 traits in at least one baseline predictor set, with a trend showing less pronounced advantages of combined-omics models when including more comprehensive clinical predictors in PANEL models ( Supplemental Results, eFigures 1-2 ). Download figure Open in new tab Figure 3. Key proteins for cardiometabolic diseases adjusting for PANEL baseline predictors. This heatmap shows identified key proteins associated with cardiometabolic disease (MACE, heart failure, CHD, PAD, AFib, and type 2 diabetes), after adjusting for the PANEL baseline clinical predictors. We selected the top 5 important proteins (regardless of direction) for each disease and displayed the union of them. Color indicates the direction of association, with intensity showing the association strength. The normalized importance scores are displayed inside each grid. On the other hand, we only observed one trait, glaucoma, for which including metabolomics alone (C-index 0.716) showed significant while small improvements compared to either proteomics-only (C-index 0.712, p-value = 5.5E-4) or combined-omics (C-index 0.712, p-value = 2.2E-4) models with PANEL baseline predictors. Known disease associations confirmed by top contributing proteins and metabolites As proteomics-only models demonstrated robust superior performance across various diseases, we then sought to identify key proteins contributing most to predictions, especially those that remained significant even after accounting for the extensive PANEL predictors ( Methods, eTable 6, eFigures 3-22 ). We first highlight some notable examples with established protein-disease relationships. For example, we identified kallikrein-related peptidase 3/prostate-specific antigen (KLK3/PSA), a well-established prognostic marker, for prostate cancer 20 , which showed the strongest association among all the protein-disease pairs (z-score = 19.2, eFigure 7 ). Additionally, we confirmed a key determinant protein, Crystallin Beta B2 (CRYBB2), for cataracts 21 , 22 ( eFigure 10 , z-score = 17.8); as well as Glial Fibrillary Acidic Protein (GFAP) 23 , Apolipoprotein E (APOE) 24 and VGF 25 for dementia ( eFigure 22 ). We also observed a consistent pattern of top-ranking proteins across cardiovascular traits ( Figure 3 ). Even after adjusting for PANEL baseline, several well-established biomarkers were found prominent across atrial fibrillation (AFib), heart failure, coronary heart disease (CHD) and major adverse cardiac events (MACE). For example, brain natriuretic peptide (BNP) and N-terminal pro-brain natriuretic peptide (NT-proBNP), widely used as indicators for clinical diagnosis of cardiac dysfunction 26 – 29 , showed strong positive associations with all the four cardiovascular conditions ( Figure 3 , eTable 6 ). Moreover, a significant negative association with Protein S (PROS1) was also consistently observed, aligning with previous studies linking PROS1 deficiency to increased risks of CHD 30 , 31 . At a more granular level, within these four cardiometabolic traits, we observed sub-patterns. MACE and CHD tend to have more similar proteomic signatures, sharing a significant positive association with lipoprotein(a) and proprotein convertase subtilisin/kexin type 9 (PCSK9) 32 . This sub-pattern was further confirmed when calculating correlations of protein importance scores for all proteins across traits ( Figure 4 ), where we observed that cardiovascular traits formed a cluster with high correlations among each other, particularly for MACE and CHD (correlation = 0.606). Likewise, heart failure and AFib shared Coagulation Factor X (F10) which is less significant in MACE or CHD. PAD has the most distinct patterns among these conditions. While it shared the strong positive associations with BNP and NT-proBNP, PAD was also uniquely characterized by strong negative associations with cytidine deaminase (CDA), NUBP1, and MAPK13, and a positive association with ICAM4. Although these are not yet well-established biomarkers for PAD, emerging evidence suggests a plausible role for the ICAM family in the inflammatory processes that increase PAD risk 33 , 34 , highlighting potential avenues for further investigation. These observations were largely consistent using age+sex and ASCVD baseline sets ( eFigure 3 and 4, eTable 6 ). Several other previously reported protein-disease links are summarized in Supplemental Results . Download figure Open in new tab Figure 4. Correlations of protein importance scores for all proteins across 17 investigated endpoints. This correlation heatmap shows Pearson correlations of protein importance scores for all involved proteins across each endpoint. There are some obvious clusters across traits. For example, cardiovascular traits (i.e. MACE, heart failure, CHD, atrial fibrillation and PAD) formed a cluster showing high protein importance correlations, with the strongest relationship observed between MACE and CHD (correlation = 0.606). Other notable sub-patterns include a significant correlation between the two ophthalmologic conditions, cataracts and glaucoma (correlation = 0.305), and between the two respiratory diseases, COPD and asthma (correlation = 0.256). Similarly, liver disease and renal disease, two conditions affecting major metabolic and excretory organs, demonstrate correlation in their protein importance scores (correlation = 0.274). For glaucoma, the only trait that metabolomics-only model performed the best, we identified lactate as the top metabolite showing a protective effect after adjusting for the PANEL predictors ( eTable 7 ), which has been confirmed by previous studies 35 , 36 . Novel protein-disease relationships from top proteins Besides well-established protein evidence, our analysis also uncovered several novel relationships in models adjusting for PANEL predictors. For example, we identified two related novel proteins for skin cancer, Proteoglycan 3 (PRG3) and gamma-glutamyltransferase 5 (GGT5). Further Mendelian Randomization (MR) analysis ( Supplemental Methods ) revealed that there is a potential causal relationship between PRG3 protein level and skin cancer (inverse-variance weighted 37 OR=0.91, 95% CI: 0.89-0.93, p-value = 1.04E-16), with increased protein expression leading to reduced cancer risk. Sensitivity analysis using MR-Egger 38 to correct for potential horizontal pleiotropy also supported this relationship (MR-Egger intercept p-value = 0.07; MR-Egger OR = 0.94 (0.90-0.99), p-value = 0.019). Enrichment of top proteins in medication usage Leveraging our identified top contributing proteins, we queried results from an external study of UKB proteomics 14 and found significant enrichment for proteins associated with particular medications for 11 of the 17 investigated traits, resulting in 25 distinct drug-disease associations ( eTable 8 ). Six of these involved drugs uniquely associated with a single disease, while the remaining drugs were shared across multiple traits. We confirmed some expected drug-disease associations, including the widely prescribed anticoagulant Warfarin and the loop diuretic Furosemide for treating cardiovascular diseases (MACE, CHD, and AFib) ( eTable 8 ). Additionally, top proteins of COPD and asthma showed enrichment for common respiratory medications, including Salbutamol (OR = 10.24, 36.32; FDR = 3.82E-8, 3.34E-26), and the combination of Salmeterol and Fluticasone Propionate (OR = 7.85, 11.7, 36.32; FDR = 3.93E-3, 2.05E-5), while asthma was also uniquely associated with Prednisolone (OR = 3.49, FDR = 2.17E-4). We note that there are few current studies exploring the medication correlates of Olink-measured proteins in true external data (i.e. not in UKB itself). As such resources become available, it would be interesting to further contextualize the putative drivers of our top proteins contributing to disease prediction. We also uncovered new evidence supporting some existing but less well-established associations. For example, the lipid-lowering agents, Fenofibrate and Simvastatin, were associated with key proteins associated with breast cancer (OR=4.4, 2.78; FDR=2.21E-2, 9.57E-4). While some studies suggest that these two drugs tend to reduce the risk of breast cancer recurrence 39 , 40 , their mechanism remains unexplored. Furthermore, we found an association between Warfarin and top proteins for fracture risk (OR=3.16, FDR=1.04E-3), adding evidence to an early study suggesting the association between long-term Warfarin usage and increased osteoporotic fracture 41 . Future studies are needed to better understand these relationships. Enrichment of top proteins in Socioeconomics, lifestyle, and demographic factors In addition to the enriched medications, we also identified several other factors associated with various diseases, especially for cardiometabolic traits which had the most associations with socioeconomic and lifestyle factors (including qualification, average total household income, leisure/social activities, and physical activity frequency) ( eTable 9 ), consistent with previous findings that socioeconomic factors are a major determinant in cardiovascular disease risk 42 . Besides, a strong enrichment in average total household income was observed for COPD-associated proteins, which remained highly significant even after considering top proteins in the PANEL-adjusted model (OR=9.02, FDR=5.31E-7). This molecular-level finding is consistent with epidemiological data, which indicates a higher prevalence of COPD among adults with lower income levels 43 . We also confirmed expected relationships between diseases and basic demographic factors, including a strong association between incident COPD-associated proteins and smoking status (current, ever or never smoker) (OR=5.17, FDR=2.95E-19) ( eTable 10 ), reinforcing the central role of smoking in COPD pathophysiology even after accounting for extensive clinical risk factors including smoking status, possibly due to the misreported smoking status or smoking intensity information (for example pack-years, which is not included in our PANEL predictor set) that is captured by top proteins. Discussion In this study, we evaluated the incremental predictive performance of metabolomics and proteomics profiles, individually and in combination, upon three traditional clinical predictor sets for 17 incident diseases in 23,776 UKB participants. Compared to previous studies using metabolomics 7 or top 5 proteins for risk prediction 11 , we achieved further enhanced predictive power for most diseases by including comprehensive proteomics and demonstrated that proteomics generally contributed more than metabolomics under these specific platforms. We also found that including both omics may not necessarily achieve better prediction under the same training sample sizes, suggesting potential model over-fitting. Our results were based on an multi-omics integration method, mixOmics 19 , while we also performed sensitivity analyses using a different method, MOGONET 44 , which led to results in highly similar patterns ( Supplemental Methods, Supplemental Results, eFigure 23 ). To the best of our knowledge, our study is currently the largest to systematically evaluate contributions of both metabolomics and proteomics profiles in incident disease prediction. We identified top contributing proteins in disease prediction models, which reinforced discoveries from previous studies focusing on proteomics-only associations and predictions 10 , 11 . We also uncovered some novel protein-disease associations that were to our knowledge not reported before, including a potential causal relationship of PRG3 for skin cancer. Beyond these underlying important proteins, we further identified protein-associated medication, socioeconomic and demographic factors for each disease. Our results revealed known drug-disease treatment effects, for example, the association between Warfarin and multiple disease categories covering cardiovascular, respiratory, and metabolic domains ( eTable 8 ). Focusing on its pleiotropic effects, a study demonstrated that high dosage of warfarin was associated with increased expression of IL-6, COX-2, and TNF-α proteins 45 , where IL-6 genetic variants were associated with various biomarkers and diseases (e.g, blood pressure 46 , C-reactive protein 47 , cystatin C 48 , asthma 49 , COVID-19 50 , etc.), providing a possible explanation of the multiple disease domain enrichment for warfarin-associated proteins we observed here. We also identified some less documented drug-disease relationships, suggesting potential novel repurposing drug candidates or unexpected side effects, which warrant future closer investigations. Despite providing important insights, our study also has some limitations, most of which are due to data availability issues and point to potential future directions. First, we only included 159 metabolites measured on the NMR platform and 2,923 proteins on the Olink platform. Future studies are warranted to investigate whether our conclusions can be generalized to omics data from other platforms, for example untargeted metabolomics platforms, and to other cohorts and biobanks with differing recruitment strategies. Second, our omics profiles were collected at baseline recruitment, precluding capturing the information on disease progression. Another interesting question is to leverage longitudinal measurements, or omics trajectories, to predict disease incidence, which is not currently available. Third, we only included two omics types in this study. Future studies may integrate other omics, for example, genomics through polygenic risk scores or epigenomic markers. Fourth, we imputed the missing covariates assuming randomness within each recruitment center, which may not be true in reality. We might also have missed some cases using hospitalization records only, without incorporation of death or primary care records. Lastly, given the observation that proteomics models showed robust performance, we largely focused on top proteins for various diseases, without regard to whether top proteins were disease-shared or specific. A previous study discussed alternative strategies using disease-specific proteins (i.e., unique to each disease) to separate out proteins that have broad influences on a wide spectrum of traits 11 , which deserves further investigation of a large number of diseases at the same time. Future studies could also explore the interactive effects between metabolomics and proteomics, which is beyond the scope of this study. In summary, we leveraged metabolomics and proteomics profiles in 23,776 UKB participants to investigate their predictive ability for 17 common incidence diseases. Our models improved risk prediction even accounting for the comprehensive PANEL clinical predictors, suggesting the values of omics profiles. Our identified key proteins and their enriched factors provide insights into potential novel disease mechanisms, repurposing drugs or unknown side effects, and a broad category of risk factors, holding promising clinical utility. Data Availability UKB data are available upon request from UK Biobank (https://www.ukbiobank.ac.uk/) with approval required. https://www.ukbiobank.ac.uk/ Data Sharing Statement UKB data are available upon request from UK Biobank ( https://www.ukbiobank.ac.uk/ ) with approval required. Declaration of interests The authors declare no competing interests. Acknowledgments This research has been conducted using the UK Biobank Resource under Application Number 25953. We thank the UKB participants and research team for enabling this study. This study was supported by NIH funding R01AR083790, U01HG011720, R01HL146500. Footnotes Emails: Jiawen Du, jiawen15{at}email.unc.edu ; Muqing Zhou, muqingz{at}unc.edu ; Laura M. Raffield, laura_raffield{at}unc.edu ; Ruihai Zhou, ruihai_zhou{at}med.unc.edu ; Can Chen, canc{at}unc.edu ; Yun Li, yun_li{at}med.unc.edu ; Quan Sun, sunq{at}chop.edu Abbreviation used AFib Atrial Fibrillation APOE Apolipoprotein E ASCVD a set of cardiovascular-related predictors BNP brain natriuretic peptide CDA cytidine deaminase CHD Coronary Heart Disease CI Confidence Interval COPD Chronic Obstructive Pulmonary Disease COX-2 Cyclooxygenase-2 CRYBB2 Crystallin Beta B2 CV cross-validation EOS eosinophil count FDR false discovery rate F10 Coagulation Factor X GFAP Glial Fibrillary Acidic Protein GGT5 gamma-glutamyltransferase 5 ICAM Intercellular Adhesion Molecule ICD International Classification of Diseases IL-6 Interleukin-6 IVW inverse-variance weighted KLK3 kallikrein-related peptidase 3 MACE Major Adverse Cardiac Events MAPK13 Mitogen-activated protein kinase 13 MICE multiple imputations using chained equations MR Mendelian Randomization NMR nuclear magnetic resonance NT-proBNP N-terminal pro-brain natriuretic peptide NUBP1 Nucleotide-binding protein 1 OR odds ratio PAD Peripheral Artery Disease PANEL a comprehensive set of clinical predictors PCSK9 proprotein convertase subtilisin/kexin type 9 PH proportional hazard PRG3 Proteoglycan 3 PROS1 Protein S PSA prostate-specific antigen QC quality control SD standard deviation TNF-α Tumor necrosis factor alpha T2 Diabetes Type 2 Diabetes UKB UK Biobank References 1. ↵ WHO CVD Risk Chart Working Group . World Health Organization cardiovascular disease risk charts: revised models to estimate risk in 21 global regions . Lancet Glob Health . 2019 ; 7 ( 10 ): e1332 – e1345 . doi: 10.1016/S2214-109X(19)30318-3 OpenUrl CrossRef 2. ↵ Goff DC , Lloyd-Jones DM , Bennett G , et al. 2013 ACC/AHA guideline on the assessment of cardiovascular risk: a report of the American College of Cardiology/American Heart Association Task Force on Practice Guidelines . Circulation . 2014 ; 129 ( 25 Suppl 2 ): S49 – 73 . doi: 10.1161/01.cir.0000437741.48606.98 OpenUrl FREE Full Text 3. ↵ Chen C , Wang J , Pan D , et al. Applications of multi-omics analysis in human diseases . MedComm . 2023 ; 4 ( 4 ): e315 . doi: 10.1002/mco2.315 OpenUrl CrossRef 4. Garg M , Karpinski M , Matelska D , et al. Disease prediction with multi-omics and biomarkers empowers case-control genetic discoveries in the UK Biobank . Nat Genet . 2024 ; 56 ( 9 ): 1821 – 1831 . doi: 10.1038/s41588-024-01898-1 OpenUrl CrossRef PubMed 5. ↵ Collins FS , Varmus H . A new initiative on precision medicine . N Engl J Med . 2015 ; 372 ( 9 ): 793 – 795 . doi: 10.1056/NEJMp1500523 OpenUrl CrossRef PubMed Web of Science 6. ↵ Galal A , Talal M , Moustafa A . Applications of machine learning in metabolomics: Disease modeling and classification . Front Genet . 2022 ; 13 : 1017340 . doi: 10.3389/fgene.2022.1017340 OpenUrl CrossRef PubMed 7. ↵ Buergel T , Steinfeldt J , Ruyoga G , et al. Metabolomic profiles predict individual multidisease outcomes . Nat Med . 2022 ; 28 ( 11 ): 2309 – 2320 . doi: 10.1038/s41591-022-01980-3 OpenUrl CrossRef PubMed 8. Würtz P , Havulinna AS , Soininen P , et al. Metabolite profiling and cardiovascular event risk: a prospective study of 3 population-based cohorts . Circulation . 2015 ; 131 ( 9 ): 774 – 785 . doi: 10.1161/CIRCULATIONAHA.114.013116 OpenUrl Abstract / FREE Full Text 9. ↵ Julkunen H , Cichońska A , Tiainen M , et al. Atlas of plasma NMR biomarkers for health and disease in 118,461 individuals from the UK Biobank . Nat Commun . 2023 ; 14 ( 1 ): 604 . doi: 10.1038/s41467-023-36231-7 OpenUrl CrossRef PubMed 10. ↵ Deng YT , You J , He Y , et al. Atlas of the plasma proteome in health and disease in 53,026 adults . Cell . 2025 ; 188 ( 1 ): 253 – 271 .e7. doi: 10.1016/j.cell.2024.10.045 OpenUrl CrossRef 11. ↵ Carrasco-Zanini J , Pietzner M , Davitte J , et al. Proteomic signatures improve risk prediction for common and rare diseases . Nat Med . 2024 ; 30 ( 9 ): 2489 – 2498 . doi: 10.1038/s41591-024-03142-z OpenUrl CrossRef PubMed 12. ↵ Suhre K , Arnold M , Bhagwat AM , et al. Connecting genetic risk to disease end points through the human blood plasma proteome . Nat Commun . 2017 ; 8 : 14357 . doi: 10.1038/ncomms14357 OpenUrl CrossRef PubMed 13. ↵ Hasin Y , Seldin M , Lusis A . Multi-omics approaches to disease . Genome Biol . 2017 ; 18 ( 1 ): 83 . doi: 10.1186/s13059-017-1215-1 OpenUrl CrossRef PubMed 14. ↵ Pietzner M , Beuchel C , Demircan K , et al. Machine learning-guided deconvolution of plasma protein levels . Published online January 9, 2025 . doi: 10.1101/2025.01.09.25320257 OpenUrl Abstract / FREE Full Text 15. ↵ Sun BB , Chiou J , Traylor M , et al. Plasma proteomic associations with genetics and health in the UK Biobank . Nature . 2023 ; 622 ( 7982 ): 329 – 338 . doi: 10.1038/s41586-023-06592-6 OpenUrl CrossRef 16. ↵ Buuren SV , Groothuis-Oudshoorn K. mice : Multivariate Imputation by Chained Equations in R . J Stat Softw . 2011 ; 45 ( 3 ). doi: 10.18637/jss.v045.i03 OpenUrl CrossRef 17. ↵ Azur MJ , Stuart EA , Frangakis C , Leaf PJ . Multiple imputation by chained equations: what is it and how does it work? Int J Methods Psychiatr Res . 2011 ; 20 ( 1 ): 40 – 49 . doi: 10.1002/mpr.329 OpenUrl CrossRef PubMed 18. ↵ Therneau TM , Grambsch PM , Fleming TR . Martingale-based residuals for survival models . Biometrika . 1990 ; 77 ( 1 ): 147 – 160 . doi: 10.1093/biomet/77.1.147 OpenUrl CrossRef Web of Science 19. ↵ Rohart F , Gautier B , Singh A , Lê Cao KA . mixOmics: An R package for ’omics feature selection and multiple data integration . PLoS Comput Biol . 2017 ; 13 ( 11 ): e1005752 . doi: 10.1371/journal.pcbi.1005752 OpenUrl CrossRef PubMed 20. ↵ Stamey TA , Yang N , Hay AR , McNeal JE , Freiha FS , Redwine E . Prostate-specific antigen as a serum marker for adenocarcinoma of the prostate . N Engl J Med . 1987 ; 317 ( 15 ): 909 – 916 . doi: 10.1056/NEJM198710083171501 OpenUrl CrossRef PubMed Web of Science 21. ↵ Yao K , Li J , Jin C , et al. Characterization of a novel mutation in the CRYBB2 gene associated with autosomal dominant congenital posterior subcapsular cataract in a Chinese family . Mol Vis . 2011 ; 17 : 144 – 152 . OpenUrl PubMed 22. ↵ Zhou Y , Zhai Y , Huang L , et al. A Novel CRYBB2 Stopgain Mutation Causing Congenital Autosomal Dominant Cataract in a Chinese Family . J Ophthalmol . 2016 ; 2016 : 4353957 . doi: 10.1155/2016/4353957 OpenUrl CrossRef PubMed 23. ↵ Kim KY , Shin KY , Chang KA . GFAP as a Potential Biomarker for Alzheimer’s Disease: A Systematic Review and Meta-Analysis . Cells . 2023 ; 12 ( 9 ): 1309 . doi: 10.3390/cells12091309 OpenUrl CrossRef PubMed 24. ↵ Raulin AC , Doss SV , Trottier ZA , Ikezu TC , Bu G , Liu CC . ApoE in Alzheimer’s disease: pathophysiology and therapeutic strategies . Mol Neurodegener . 2022 ; 17 ( 1 ): 72 . doi: 10.1186/s13024-022-00574-4 OpenUrl CrossRef PubMed 25. ↵ Quinn JP , Kandigian SE , Trombetta BA , Arnold SE , Carlyle BC . VGF as a biomarker and therapeutic target in neurodegenerative and psychiatric diseases . Brain Commun . 2021 ; 3 ( 4 ): fcab261 . doi: 10.1093/braincomms/fcab261 OpenUrl CrossRef 26. ↵ Hijazi Z , Oldgren J , Siegbahn A , Granger CB , Wallentin L . Biomarkers in atrial fibrillation: a clinical review . Eur Heart J . 2013 ; 34 ( 20 ): 1475 – 1480 . doi: 10.1093/eurheartj/eht024 OpenUrl CrossRef PubMed Web of Science 27. Maalouf R , Bailey S . A review on B-type natriuretic peptide monitoring: assays and biosensors . Heart Fail Rev . 2016 ; 21 ( 5 ): 567 – 578 . doi: 10.1007/s10741-016-9544-9 OpenUrl CrossRef PubMed 28. Cao Z , Jia Y , Zhu B . BNP and NT-proBNP as Diagnostic Biomarkers for Cardiac Dysfunction in Both Clinical and Forensic Medicine . Int J Mol Sci . 2019 ; 20 ( 8 ): 1820 . doi: 10.3390/ijms20081820 OpenUrl CrossRef 29. ↵ Yuan S , Chen J , Ruan X , et al. Cross-population GWAS and proteomics improve risk prediction and reveal mechanisms in atrial fibrillation . Nat Commun . 2025 ; 16 ( 1 ): 6426 . doi: 10.1038/s41467-025-61720-2 OpenUrl CrossRef PubMed 30. ↵ ten Kate MK , van der Meer J . Protein S deficiency: a clinical perspective . Haemoph Off J World Fed Hemoph . 2008 ; 14 ( 6 ): 1222 – 1228 . doi: 10.1111/j.1365-2516.2008.01775.x OpenUrl CrossRef 31. ↵ Ken-Dror G , Cooper JA , Humphries SE , Drenos F , Ireland HA . Free protein S level as a risk factor for coronary heart disease and stroke in a prospective cohort study of healthy United Kingdom men . Am J Epidemiol . 2011 ; 174 ( 8 ): 958 – 968 . doi: 10.1093/aje/kwr203 OpenUrl CrossRef PubMed 32. ↵ Sabatine MS , Giugliano RP , Keech AC , et al. Evolocumab and Clinical Outcomes in Patients with Cardiovascular Disease . N Engl J Med . 2017 ; 376 ( 18 ): 1713 – 1722 . doi: 10.1056/NEJMoa1615664 OpenUrl CrossRef PubMed 33. ↵ Hazarika S , Annex BH . Biomarkers and Genetics in Peripheral Artery Disease . Clin Chem . 2017 ; 63 ( 1 ): 236 – 244 . doi: 10.1373/clinchem.2016.263798 OpenUrl Abstract / FREE Full Text 34. ↵ Mankelow TJ , Spring FA , Parsons SF , et al. Identification of critical amino-acid residues on the erythroid intercellular adhesion molecule-4 (ICAM-4) mediating adhesion to alpha V integrins . Blood . 2004 ; 103 ( 4 ): 1503 – 1508 . doi: 10.1182/blood-2003-08-2792 OpenUrl Abstract / FREE Full Text 35. ↵ Vohra R , Aldana BI , Bulli G , et al. Lactate-Mediated Protection of Retinal Ganglion Cells . J Mol Biol . 2019 ; 431 ( 9 ): 1878 – 1888 . doi: 10.1016/j.jmb.2019.03.005 OpenUrl CrossRef PubMed 36. ↵ Rombaut A , Brautaset R , Williams PA , Tribble JR . Glial metabolic alterations during glaucoma pathogenesis . Front Ophthalmol . 2023 ; 3 : 1290465 . doi: 10.3389/fopht.2023.1290465 OpenUrl CrossRef 37. ↵ Bowden J , Davey Smith G , Burgess S . Mendelian randomization with invalid instruments: effect estimation and bias detection through Egger regression . Int J Epidemiol . 2015 ; 44 ( 2 ): 512 – 525 . doi: 10.1093/ije/dyv080 OpenUrl CrossRef PubMed 38. ↵ Minelli C , Del Greco M. F , Van Der Plaat DA , Bowden J , Sheehan NA , Thompson J. The use of two-sample methods for Mendelian randomization analyses on single large datasets . Int J Epidemiol . 2021 ; 50 ( 5 ): 1651 – 1659 . doi: 10.1093/ije/dyab084 OpenUrl CrossRef 39. ↵ Chen YC , Chen JH , Tsai CF , Wu CT , Chang PC , Yeh WL . Inhibition of tumor migration and invasion by fenofibrate via suppressing epithelial-mesenchymal transition in breast cancers . Toxicol Appl Pharmacol . 2024 ; 483 : 116818 . doi: 10.1016/j.taap.2024.116818 OpenUrl CrossRef PubMed 40. ↵ Liu S , Uppal H , Demaria M , Desprez PY , Campisi J , Kapahi P . Simvastatin suppresses breast cancer cell proliferation induced by senescent cells . Sci Rep . 2015 ; 5 : 17895 . doi: 10.1038/srep17895 OpenUrl CrossRef PubMed 41. ↵ Gage BF , Birman-Deych E , Radford MJ , Nilasena DS , Binder EF . Risk of osteoporotic fracture in elderly patients taking warfarin: results from the National Registry of Atrial Fibrillation 2 . Arch Intern Med . 2006 ; 166 ( 2 ): 241 – 246 . doi: 10.1001/archinte.166.2.241 OpenUrl CrossRef PubMed Web of Science 42. ↵ Argentieri MA , Amin N , Nevado-Holgado AJ , et al. Integrating the environmental and genetic architectures of aging and mortality . Nat Med . 2025 ; 31 ( 3 ): 1016 – 1025 . doi: 10.1038/s41591-024-03483-9 OpenUrl CrossRef PubMed 43. ↵ Weeks J , Elgaddal N. Chronic Obstructive Pulmonary Disease Among Adults Aged 18 Years and Older: United States, 2023 . Centers for Disease Control and Prevention ; 2025 . doi: 10.15620/cdc/174596 OpenUrl CrossRef 44. ↵ Wang T , Shao W , Huang Z , et al. MOGONET integrates multi-omics data using graph convolutional networks allowing patient classification and biomarker identification . Nat Commun . 2021 ; 12 ( 1 ): 3445 . doi: 10.1038/s41467-021-23774-w OpenUrl CrossRef PubMed 45. ↵ Shafique H , Ashraf NM , Rashid A , et al. Determination of Pleiotropic Effect of Warfarin in VKORC1 and CYP2C9 Genotypes in Patients With Heart Valve Replacement . Front Cardiovasc Med . 2022 ; 9 : 895169 . doi: 10.3389/fcvm.2022.895169 OpenUrl CrossRef PubMed 46. ↵ Giri A , Hellwege JN , Keaton JM , et al. Trans-ethnic association study of blood pressure determinants in over 750,000 individuals . Nat Genet . 2019 ; 51 ( 1 ): 51 – 62 . doi: 10.1038/s41588-018-0303-9 OpenUrl CrossRef PubMed 47. ↵ Oh K , Yuk M , Yang S , et al. A genome-wide association study of high-sensitivity C-reactive protein in a large Korean population highlights its genetic relationship with cholesterol metabolism . Sci Rep . 2025 ; 15 ( 1 ): 189 . doi: 10.1038/s41598-024-84466-1 OpenUrl CrossRef PubMed 48. ↵ Sinnott-Armstrong N , Tanigawa Y , Amar D , et al. Genetics of 35 blood and urine biomarkers in the UK Biobank . Nat Genet . 2021 ; 53 ( 2 ): 185 – 194 . doi: 10.1038/s41588-020-00757-z OpenUrl CrossRef PubMed 49. ↵ Olafsdottir TA , Theodors F , Bjarnadottir K , et al. Eighty-eight variants highlight the role of T cell regulation and airway remodeling in asthma pathogenesis . Nat Commun . 2020 ; 11 ( 1 ): 393 . doi: 10.1038/s41467-019-14144-8 OpenUrl CrossRef PubMed 50. ↵ Gong B , Huang L , He Y , et al. A genetic variant in IL-6 lowering its expression is protective for critical patients with COVID-19 . Signal Transduct Target Ther . 2022 ; 7 ( 1 ): 112 . doi: 10.1038/s41392-022-00923-1 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted August 05, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Multi-omics integration predicts 17 disease incidences in the UK Biobank Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Multi-omics integration predicts 17 disease incidences in the UK Biobank Jiawen Du , Muqing Zhou , Laura M. Raffield , Ruihai Zhou , Yun Li , Can Chen , Quan Sun medRxiv 2025.08.01.25332841; doi: https://doi.org/10.1101/2025.08.01.25332841 Share This Article: Copy Citation Tools Multi-omics integration predicts 17 disease incidences in the UK Biobank Jiawen Du , Muqing Zhou , Laura M. Raffield , Ruihai Zhou , Yun Li , Can Chen , Quan Sun medRxiv 2025.08.01.25332841; doi: https://doi.org/10.1101/2025.08.01.25332841 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4536) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (541) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00b2aa61aa45f95',t:'MTc3OTYxMzgxOQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-24T02:00:01.246996+00:00

License: CC-BY-NC-4.0