Predicting Total Knee Replacement in Knee Osteoarthritis Using a Machine-Learning–Guided Approach in patients of the Osteoarthritis Initiative (OAI)

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 60,878 characters · extracted from preprint-html · click to expand
Predicting Total Knee Replacement in Knee Osteoarthritis Using a Machine-Learning–Guided Approach in patients of the Osteoarthritis Initiative (OAI) | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Predicting Total Knee Replacement in Knee Osteoarthritis Using a Machine-Learning–Guided Approach in patients of the Osteoarthritis Initiative (OAI) View ORCID Profile Francisco J. Blanco , View ORCID Profile Natividad Oreiro , View ORCID Profile Jorge Vázquez-García , View ORCID Profile Antonio Morano-Torres , View ORCID Profile Vanesa Balboa-Barreiro , Isabel Rodríguez-Valle , Sara Relaño , View ORCID Profile Nicola Veronese , View ORCID Profile María C. de Andrés , View ORCID Profile Ignacio Rego-Pérez doi: https://doi.org/10.1101/2025.10.28.25338966 Francisco J. Blanco 1 Grupo de Investigación en Reumatología (GIR). Instituto de Investigación Biomédica de A Coruña (INIBIC), Complexo Hospitalario Universitario de A Coruña (CHUAC), Sergas. Universidade da Coruña (UDC). C/ As Xubias de Arriba 84 , 15006, A Coruña, España 2 Universidade da Coruña (UDC), Centro de Investigación de ciencias Avanzadas (CICA), Grupo de Investigación en Reumatología y Salud (GIR-S). Departamento de Fisioterapia, Medicina y Ciencias Biomédicas, Facultad de Fisioterapia , Campus de Oza, 15008, A Coruña, España Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Francisco J. Blanco For correspondence: ignacio.rego.perez{at}sergas.es f.blanco1{at}udc.es Natividad Oreiro 1 Grupo de Investigación en Reumatología (GIR). Instituto de Investigación Biomédica de A Coruña (INIBIC), Complexo Hospitalario Universitario de A Coruña (CHUAC), Sergas. Universidade da Coruña (UDC). C/ As Xubias de Arriba 84 , 15006, A Coruña, España Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Natividad Oreiro Jorge Vázquez-García 1 Grupo de Investigación en Reumatología (GIR). Instituto de Investigación Biomédica de A Coruña (INIBIC), Complexo Hospitalario Universitario de A Coruña (CHUAC), Sergas. Universidade da Coruña (UDC). C/ As Xubias de Arriba 84 , 15006, A Coruña, España Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jorge Vázquez-García Antonio Morano-Torres 1 Grupo de Investigación en Reumatología (GIR). Instituto de Investigación Biomédica de A Coruña (INIBIC), Complexo Hospitalario Universitario de A Coruña (CHUAC), Sergas. Universidade da Coruña (UDC). C/ As Xubias de Arriba 84 , 15006, A Coruña, España Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Antonio Morano-Torres Vanesa Balboa-Barreiro 3 Unidad de apoyo a la investigación. Grupo de Investigación en Enfermería y Cuidados en Salud. Grupo de Investigación en Reumatología y Salud (GIR-S). Instituto de Investigación Biomédica de A Coruña (INIBIC), Complexo Hospitalario Universitario de A Coruña (CHUAC), Sergas. Universidade da Coruña (UDC). As Xubias , 15006. A Coruña, España Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Vanesa Balboa-Barreiro Isabel Rodríguez-Valle 1 Grupo de Investigación en Reumatología (GIR). Instituto de Investigación Biomédica de A Coruña (INIBIC), Complexo Hospitalario Universitario de A Coruña (CHUAC), Sergas. Universidade da Coruña (UDC). C/ As Xubias de Arriba 84 , 15006, A Coruña, España Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sara Relaño 1 Grupo de Investigación en Reumatología (GIR). Instituto de Investigación Biomédica de A Coruña (INIBIC), Complexo Hospitalario Universitario de A Coruña (CHUAC), Sergas. Universidade da Coruña (UDC). C/ As Xubias de Arriba 84 , 15006, A Coruña, España Find this author on Google Scholar Find this author on PubMed Search for this author on this site Nicola Veronese 4 Saint Camillus International University of Health Sciences, Faculty of Medicine , Rome, Italy Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Nicola Veronese María C. de Andrés 1 Grupo de Investigación en Reumatología (GIR). Instituto de Investigación Biomédica de A Coruña (INIBIC), Complexo Hospitalario Universitario de A Coruña (CHUAC), Sergas. Universidade da Coruña (UDC). C/ As Xubias de Arriba 84 , 15006, A Coruña, España Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for María C. de Andrés Ignacio Rego-Pérez 1 Grupo de Investigación en Reumatología (GIR). Instituto de Investigación Biomédica de A Coruña (INIBIC), Complexo Hospitalario Universitario de A Coruña (CHUAC), Sergas. Universidade da Coruña (UDC). C/ As Xubias de Arriba 84 , 15006, A Coruña, España Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ignacio Rego-Pérez For correspondence: ignacio.rego.perez{at}sergas.es f.blanco1{at}udc.es Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF ABSTRACT Objective To develop a pragmatic model to predict total knee replacement (TKR) in knee osteoarthritis (OA) using non-imaging clinical, genetic, and lifestyle data with machine-learning (ML)–guided feature selection. Methods We analyzed 3,790 Osteoarthritis Initiative (OAI) participants. Nested ML feature selection on the training set identified 15 informative variables. Classifiers were benchmarked, then a multivariable logistic regression was fit on the full cohort. Performance was summarized by discrimination (AUC with 95% CI) and calibration (Brier score). To assess the incremental value of genetics, we refit an otherwise identical Clinical model excluding the polygenic risk score (PRS) and compared specificity at fixed sensitivities using Bonferroni-adjusted McNemar tests. A pre-specified analysis examined performance by baseline Kellgren–Lawrence (KL) grade (KL 0–1 vs KL ≥2). Results On the test set, classifier AUCs ranged 0.716–0.748, with Elastic Net and XGBoost performing best. The final logistic model fit on the full cohort achieved AUC 0.765 (95% CI 0.736–0.793) with acceptable calibration (Brier 0.097). Performance remained robust by disease stage, with higher discrimination in pre-radiographic knees (KL 0–1: AUC 0.827) and moderate discrimination in KL ≥2 (AUC 0.720); decile plots indicated broadly aligned observed vs predicted risks. PRS added modest, statistically significant gains in specificity at several fixed sensitivities without materially changing AUC. Conclusions We present a pragmatic, non-imaging, ML-informed model that predicts TKR with clinically acceptable discrimination and calibration using routinely collected data. This framework provides a practical basis for individualized risk stratification and decision support without reliance on imaging. What is already known on this topic Risk of total knee replacement (TKR) in knee osteoarthritis (OA) is multifactorial and many existing models depend on imaging markers such as Kellgren–Lawrence grade or MRI findings. Established non-imaging predictors include symptoms and function (WOMAC), age, BMI, knee alignment or prior injury. Genetic scores have been explored in OA but, to date, have shown limited standalone utility compared with routine clinical factors. What this study adds This study presents a clinic-friendly, non-imaging prediction model guided by a transparent ML pipeline—nested random-forest feature selection with in-fold preprocessing and SMOTE, repeated cross-validation, and SHAP-based interpretation—that achieves acceptable discrimination and calibration in the OAI cohort. It reinforces the relevance of routine clinical factors, identifies an inverse association between Mediterranean-diet adherence and TKR risk, and evaluates the incremental—though limited—contribution of genetic risk via a polygenic risk score (PRS), with a signal that persists in pre-radiographic knees despite few events. How this study might affect research, practice or policy The model offers a practical pathway for risk stratification where imaging is unavailable or costly, supporting shared decision-making and prioritization of follow-up. It encourages precision-medicine workflows that integrate clinical and genetic information cautiously and transparently, and it sets clear directions for future work: external validation across settings, assessment in early-stage OA populations, and refinement of genetic predictors before any policy or guideline incorporation. INTRODUCTION Osteoarthritis (OA) is a chronic musculoskeletal disease of polygenic and heterogeneous nature, primarily affecting movable joints. Globally, over 500 million people have OA [ 1 ], with the knee being the most commonly affected joint [ 2 – 4 ]. Symptoms include activity-related pain, often worsened by weight-bearing and relieved by rest. While age is a major risk factor, other contributors include genetics, joint trauma, malalignment, muscle weakness, joint overload, and female sex [ 5 ]. Modifiable risk factors such as mechanical stress, obesity, and overweight are especially relevant [ 6 ]. OA is traditionally considered slowly progressive, with many patients worsening over decades. Around 20% reach end-stage disease requiring joint replacement, a major surgery with notable morbidity and mortality [ 7 ]. Total knee replacement (TKR) should only be considered when other treatments fail [ 5 ]. However, TKR rates are rising—from 120 surgeries per 100,000 people in 2014 to 165 in 2022—and are expected to increase substantially with aging populations [ 8 – 10 ]. This trend poses a growing burden on healthcare systems. Additionally, up to 20% of TKR patients experience persistent postsurgical pain due to various surgical, prosthetic, or patient-specific causes [ 11 ]. Genetic heritability of knee OA is estimated at 45–60% [ 12 ]. With advances in genome-wide association studies (GWAS) [ 13 , 14 ], polygenic risk scores (PRS) have emerged as tools with potential clinical value [ 15 ]. PRS condense genetic susceptibility to a trait by combining weighted risk alleles from GWAS summary statistics. Several studies have assessed PRS for TKR risk in European [ 16 , 17 ] and Japanese cohorts [ 18 ] using one of the largest OA GWAS to date [ 13 ], though, to our knowledge, none of these studies used the Osteoarthritis Initiative (OAI) as the target cohort. Developing predictive models to identify patients at high TKR risk is crucial for enabling lifestyle interventions and personalized care. Machine learning (ML) models are particularly suited to capture complex interactions among clinical, molecular, and genetic variables. In TKR prediction, they handle class imbalance and improve model accuracy while also aiding interpretation by selecting the most relevant predictors. This supports both risk prediction and data-driven preventive strategies. Application of ML methods is an emerging field in OA and particularly, in knee OA [ 19 – 21 ], including for TKR risk prediction [ 22 – 24 ]. Most of these rely on imaging data, such as X-rays or MRIs, which involve cost and, in the case of X-rays, repeated radiation exposure. This study sought to build and internally evaluate a non-imaging, ML-guided clinical model for predicting TKR in the OAI cohort by quantifying discrimination and calibration after ML-based feature selection, benchmarking against alternative classifiers, performing pre-specified KL-stratified analyses, and, secondarily, exploring the incremental contribution of genetic information via a PRS. METHODS An overview of the study methodology is shown in Figure 1 . All analyses were conducted using R v4.4.1. Download figure Open in new tab Figure 1. Scheme and flow chart of the methodology used in this study. OAI: Osteoarthritis Initiative; TKR: total knee replacement; PRS: polygenic risk score; SMOTE: synthetic minority oversampling technique; GeCKO: Genetic components of knee Osteoarthritis. Cohort description We used data from the Osteoarthritis Initiative (OAI), a well-characterized prospective cohort of knee OA patients with publicly available data and biospecimens ( https://nda.nih.gov/oai ) [ 25 ]. Participants were followed annually for 108 months to track symptom progression, disability, structural changes, and biomarker levels. All study procedures complied with the Declaration of Helsinki, and participants gave informed consent. Machine learning approach All ML analyses and their reporting followed the Transparent Reporting of a multivariable prediction model for Individual Prognosis Or Diagnosis with Artificial Intelligence (TRIPOD+AI) guidelines [ 26 ]. A completed checklist is provided in the Supplementary Material. We used ML to identify (non-imaging) predictors and construct an optimized model for TKR risk. Data source and pre-processing We obtained all non-imaging variables from the OAI and randomly split the data into a training set (70%) and a test set (30%). First, we retained only expert-curated and literature-supported predictors [ 27 ], then removed highly collinear features (Pearson correlation >0.7). To reduce dimensionality without information leakage, we applied a nested Random Forest (rf) feature-selection framework (5 outer folds x 5 inner folds) on the training data. In each outer fold, the top-15 variables by averaged importance were recorded, and the final predictor set was the consensus of variables appearing in ≥3/5 outer folds. Class imbalance during feature selection was handled via class weighting, and all preprocessing steps (including centering/scaling) were fit within training folds and then applied to the corresponding validation/test partitions. Additional data source used in the process We additionally considered three variables as candidate predictors: (i) adherence to the Mediterranean diet (MD), ii) a published genetic interaction between the GG genotype of SNP rs12107036 in TP63 and mtDNA haplogroup UK [ 28 ], and iii) genetic OAI data used to construct a PRS for TKR risk. All three were added via expert curation and processed identically to the other candidates within the nested rf feature-selection framework, without being forced into the models. Mediterranean diet (MD) MD adherence, provided by Veronese et al. [ 29 ], was assessed using the alternate Mediterranean diet (aMED) score [ 30 ]. This score is based on a food frequency questionnaire, which was recorded during the baseline OAI visit. The aMED takes into consideration several foods commonly consumed within the MD, including: cereals, potatoes, fruits, vegetables, legumes, fish, red meat, poultry, full fat dairy products, olive oil and consumption of alcoholic beverages. The intake of each food was categorized on the basis of monthly, weekly, or daily consumption, as appropriate, ranging from a score of 0 (less adherent) to 5 (better adherence); the total score ranges from 0 to 55, with higher values indicating higher adherence to a MD. Since there are no agreed cut-off scores for higher aMED adherence, the population was divided into quartiles: aMED <25, 26-28, 29-32, and ≥33. Genetic interaction An interaction between the TP63 rs12107036 GG genotype and mtDNA haplogroup UK has been reported as a significant risk factor for rapidly progressive knee OA in OAI participants [ 28 ]. Accordingly, we included this interaction as a candidate predictor of TKR risk. PRS construction Genotype data from 4,219 OAI participants were obtained from dbGAP (phs000955.v1.p1), as part of the genetic components of knee osteoarthritis (GeCKO) study [ 31 ], on the Illumina HumanOmni2.5 BeadChip. Variant-level QC excluded markers failing call rate, duplication or monomorphism, with MAF < 0.05, or Hardy–Weinberg equilibrium p-value 3% missing genotypes, outlier heterozygosity (±4 SD), duplicates/relateds, and ancestry outliers (>8 SD from the European 1000 Genomes PCA cluster). Post-QC VCFs were imputed on the Michigan Imputation Server (Minimac4), retaining variants with Rsq ≥ 0.8 for high-confidence analyses (≈8 million variants). To verify allele alignment, population structure control and genomic inflation, we ran a screening association scan using rvtests [ 32 ] with the score model, including as covariates the individual’s age, sex, body mass index (BMI), as well as principal components (PCs) derived from a pruned hard-call SNP set under stricter missingness/relatedness thresholds. This step was not intended for SNP discovery and no SNP-level results are reported. The final dataset included 3,085 individuals. PRS was derived from published OA GWAS summary statistics [ 13 ]. We selected 25 significantly associated with TKR risk (p-value < 1.3e -8 ) as the base set of instruments and computed a weighted score in PRSice using the reported log(OR) for the effect alleles. Effect/other alleles were aligned to the base GWAS; palindromic SNPs at ambiguous MAF were excluded or resolved using reference frequencies. Using OAI imputed data as the target, clumping in PRSice (r² = 0.1) retained 12 independent SNPs after LD pruning, with age, sex, BMI and the aforementioned PCs entered as covariates ( Supplementary Table 1 ). The PRS was standardized (z-score) and categorized by quintiles into low (Q1, 0–20%), medium (Q2–Q4, 21–80%) and high (Q5, 81–100%) risk groups [ 16 ]. Construction of ML models We trained six supervised models using caret: Random Forest (rf), XGBoost (xgbTree), Logistic Regression (glm), Gradient Boosting Machine (gbm), Elastic Net (glmnet), and Support Vector Machine (svmRadial). Hyperparameters were tuned via 10-fold cross-validation with centering/scaling and Synthetic Minority Over-sampling Technique (SMOTE; k = 5) applied within each fold. Using the 15 predictors selected in the nested feature-selection step, we then ran a repeated 5-fold cross-validation (5 repeats) with the same in-fold preprocessing and SMOTE to obtain stable performance estimates; we report mean ± SD AUC, considering AUC > 0.70 as clinically acceptable [ 33 ]. Finally, each model was refit on the full training data and evaluated once on the independent test set, reporting unbiased AUC, sensitivity, specificity, Kappa, and F1-score. The operating threshold was chosen by Youden’s index on the training set and then applied to the test set. In addition, we used SHAP (SHapley Additive exPlanations) to quantify the contribution of each predictor to individual model outputs. This approach, assigns an importance value to each feature based on its impact on individual predictions. Violin plots (summary plots) showed the distribution of feature importance across samples. Statistical analyses After the subsequent process of ML to select the most informative non-imaging predictors, a multivariable logistic regression model was developed on the entire cohort. Predictive performance of the model was summarized by discrimination (area under the ROC curve, AUC, with 95% confidence interval (CI)) and calibration (Brier score and decile-based calibration plots with 95% CIs). As a secondary analysis, we examined whether genetic information (in terms of PRS) adds value beyond clinical predictors. To do this, we fitted a Clinical model identical to the Complete model but excluding the PRS. The AUC values of these two models were compared using the DeLonǵs test. Calibration was also evaluated via the Brier score and decile-based calibration plots with 95% CI. In addition, McNemar tests with Bonferroni correction evaluated specificity differences between models at fixed sensitivity levels (0.400–0.900). Confusion matrices were also constructed. KL-stratified analysis To explore stage-related performance, the Complete model was re-estimated within baseline KL 0–1 and KL ≥2 strata. Within each stratum, we reported AUC, Brier score, decile-based calibration plots, and coefficient estimates for all predictors, following the same procedures as in the primary analysis. RESULTS Machine learning approach. Data distribution and model performance Fifteen final predictors were selected ( Table 1 ). After removing NAs, the training set included 1,658 individuals (737 males, 921 females), with 202 (12.2%) undergoing TKR. The test set included 710 individuals (314 males, 396 females), of whom 89 (12.5%) had TKR. Feature distributions did not significantly differ between sets. View this table: View inline View popup Table 1. Feature selection process in training and test sets Table 2 shows model performance. In the test set, AUC values ranged from 0.716 (Logistic Regression) to 0.748 (XGBoost). The highest F1-score was for XGBoost (0.378), followed by Elastic Net (0.376). Based on AUC and F1-score, Elastic Net (a regularized linear model) and XGBoost (a nonlinear model based on assemblies of decision trees) were the top-performing models. Confusion matrices are shown in Figure 2a . Download figure Open in new tab Download figure Open in new tab Figure 2. Confusion matrices and variable importance graphics. A) Confusion matrices of the XGBoost and Elastic net models. B) SHAP summary plot showing the impact of the most influential variables on the XGBoost and Elastic net model’s predictions. Each row represents a variable, with SHAP values on the x-axis indicating the magnitude and direction of its effect. Red denotes a positive contribution to the prediction, while blue indicates a negative one. Variables are ordered from top to bottom according to their mean absolute SHAP value. NSAID.exposure.rec: non-steroid anti-inflammatory drugs exposure; P01.INJ_R: previous injury in right knee; P01.INJ_L: previous injury in left knee; PRS.TKR.Z: standardized polygenic risk score; V00Type.alter: knee alignment (varum/valgus/normal); V00WOMKP_RL: WOMAC pain in right and/or left knee; P02SEX: sex; TP63.GGxUk: genetic interaction between SNP rs12107036 at TP63 and mitochondrial DNA (mtDNA) haplogroup Uk; P01BMI: body mass index; V00AGE: age; md: adherence to Mediterranean diet; V00WOMADL_RL: WOMAC physical function; V00BPSYS: systolic pressure; V00BPDIAS; diastolic pressure; V00SUPVITC: Vitamin C supplementation. View this table: View inline View popup Download powerpoint Table 2. Metrics of the different machine learning models in the test set of the OAI Model predictors The magnitude and direction of the effect of each variable on the prediction was determined by plotting the SHAP values on violin plots, where variables are ranked from top to bottom according to their overall importance, calculated as the mean absolute SHAP value across all observations ( Figure 2b ). In the case of XGBoost, visualization highlights that variables such as The Western Ontario and McMaster Universities Osteoarthritis Index (WOMAC) pain, non-steroid anti-inflammatory drugs (NSAIDs) exposure, WOMAC physical function, age and previous knee injury had the greatest influence on model output. For Elastic net, the most decisive features in this model was the NSAIDs exposure, followed by WOMAC pain, age, WOMAC physical function and standardized PRS. Final predictive model We used a complete-case approach for the final logistic regression, including only participants with non-missing data for all model variables; no imputation was performed. Following ML-guided feature selection, the final Complete model was fitted in 2,414 participants and comprised 11 predictors ( Table 3 ). For transparency, univariate associations for each predictor (ORs, 95% CIs, and p-values) are also reported: sex (OR=1.151; 95%CI=0.923– 1.436; p-value=0.211), age (OR=1.038; 95%CI=1.026–1.051; p-value <0.001), BMI (OR=1.074; 95%CI=1.050–1.098; p-value <0.001), GG genotype of SNP rs12107036 at TP63 and mtDNA haplogroup Uk (GG.no-Uk vs GG.Uk: OR=0.551; 95%CI=0.352–0.862; p-value=0.009; no-GG.Uk vs GG.Uk: OR=0.509; 95%CI=0.319–0.812; p-value=0.005; no-GG.no-Uk vs GG.Uk: OR=0.624; 95%CI=0.418–0.932; p-value=0.021), standardized PRS (OR=1.198; 95%CI=1.069–1.344; p-value=0.002), systolic pressure (OR=1.696; 95%CI=1.360-2.114; p-value<0.001), NSAID exposure (OR=2.885; 95%CI=2.179–3.822; p-value<0.001), previous knee injury (OR=1.496; 95%CI=1.202–1.864; p-value<0.001), knee malalignment (varus vs normal: OR=1.491; 95%CI=1.067–2.084; p-value=0.019 and valgus vs normal: OR=1.429; 95%CI=1.045–1.956; p-value=0.026) WOMAC pain (OR=1.124; 95%CI=1.103–1.145; p-value<0.001) and aMED score (OR=0.974; 95%CI=0.953–0.996; p-value=0.021) ( Table 3 ). View this table: View inline View popup Download powerpoint Table 3. Uni and multivariate regression analyses to predict the risk of TKR in 2,414 OAI patients Analysis of the incremental value of the PRS Of the 3,790 OAI subjects of Caucasian ancestry with available genetic data, PRS was calculated for 3,085 participants with knee prosthesis information (Yes/No). Based on the PRS distribution, 608 (19.7%) were categorized as low-risk, 1,859 (60.3%) as medium-risk, and 618 (20%) as high-risk. Table 4 summarizes sex, age, BMI, and knee replacement across PRS groups, with TKR frequency rising from 8.2% in low-risk to 10.7% in moderate-risk and 13.4% in high-risk (p-value=0.013). No significant interaction was found between PRS and sex or MD (data not shown). View this table: View inline View popup Download powerpoint Table 4. Baseline characteristics of the OAI population based on PRS To assess the incremental value of the PRS, we removed this predictor from the regression model and compared the metrics between both models (with and without PRS). The complete model reached an AUC of 0.765 (0.736 – 0.793), with a sensitivity of 0.770 and a specificity of 0.668. The clinical model (without PRS) showed a slightly lower AUC of 0.761 (0.733 – 0.789), with a sensitivity of 0.808 and a specificity of 0.604. However, despite the slight improvement in the AUC value, no significant differences were detected between the AUC of the clinical model (without PRS) and the AUC of the complete model (p=0.385). Calibration was similar for the complete and clinical models (Brier score 0.097 for both), showing broadly acceptable agreement between predicted probabilities and observed outcomes, with minor deviations at the extremes as suggested by the decile-based calibration plots ( Supplementary Figures 1a and 1b). In view of the positive impact that the addition of PRS has on specificity (from 0.604 to 0.668), we subsequently analyzed if the improvement provided by this score in terms of specificity was significantly different between the clinical model and the complete model. McNemar test (Bonferroni-adjusted) showed significantly improved specificity in the complete vs. clinical model at several fixed sensitivities: 0.400 (p-value=0.018), 0.450 (p-value=0.028), 0.550 (p-value=0.015), 0.700 (p-value=3.12e -05 ), and 0.750 (p-value=5.94e -09 ) ( Supplementary Figures 2). By selecting the highest statistically significant sensitivity of 0.750 to construct the confusion matrices, the specificity increased significantly from 0.640 for the clinical model to 0.676 for the complete model ( Supplementary Figure 3). KL-stratified analysis We re-estimated the Complete model within baseline strata KL 0–1 (n=1,071; 16 TKR events/1,055 non-TKR events) and KL ≥2 (n=1,336; 275 TKR events/1,061 non-TKR events). In KL 0–1, only age (p-value=0.035) and the standardized PRS (p-value=0.002) remained significant; performance was good (AUC 0.827, 95% CI 0.715–0.940) with acceptable calibration (Brier 0.014),. In KL ≥2, several clinical predictors were significant and the PRS retained an independent association (p-value=0.028); overall performance was moderate (AUC 0.720, 95% CI 0.687–0.753; Brier 0.149). In both strata, calibration was overall adequate, with small deviations at the extremes for KL ≥2, and greater imprecision in KL 0–1 probably due to the low number of TKR events in that stratum ( Table 5 and Supplementary Figures 1c and 1d). View this table: View inline View popup Download powerpoint Table 5. Multivariate regression analyses to predict the risk of TKR KL 0-1 and KL ≥2 patients DISCUSSION Our primary aim was to build a pragmatic prediction model for TKR in knee OA patients from the OAI using routinely available, non-imaging variables, guided by ML feature selection. This strategy identified a compact set of clinically interpretable predictors, some of them novel (i.e MD), and delivered overall performance that meets commonly accepted clinical thresholds. Notably, calibration was broadly acceptable, supporting the model’s potential utility for risk communication and patient stratification in settings where imaging is not readily available or cost-effective. The constructed model incorporated variables previously linked to OA and TKR. WOMAC pain and physical function, for instance, are well-established predictors [ 23 ]. A novel finding was the inverse relationship between MD adherence (aMED score) and TKR risk. Although not previously shown in this context, MD adherence has been associated with lower OA prevalence, less pain worsening, and improved cartilage quality on MRI in OAI participants [ 29 , 34 , 35 ]. MD’s anti-inflammatory and antioxidant properties likely mediate these protective effects [ 36 ]. Another relevant factor was systolic blood pressure. Patients with higher values had greater TKR risk. This aligns with prior evidence linking hypertension to incident knee OA, even after adjusting for BMI [ 37 ]. Similarly, other components of metabolic syndrome (MetS) and hypertension have been associated with TKR risk [ 38 ]. The effects of hypertension, as well as of other components of MetS, could result in vessel narrowing and reduced blood flow, leading to cartilage degeneration due to reduced nutrient from the subchondral bone [ 38 ]. In this sense, NSAID use, another top predictor, may increase blood pressure but may also reflect more severe disease or symptomatic burden [ 39 , 40 ]. In agreement with this, prior studies have also linked prolonged NSAID use to joint replacement [ 41 ]. Interestingly, in our cohort, a higher aMED score was correlated with lower diastolic pressure (r = –0.104; p-value < 0.001), reinforcing potential dietary effects on vascular health. We also identified a significant gene–mitochondrial DNA interaction: the GG genotype of rs12107036 in TP63 combined with haplogroup UK was associated with increased TKR risk. This interaction was previously linked to rapid knee OA progression in the OAI cohort [ 28 ], consistent with the fact that accelerated OA increases the likelihood of needing a prosthesis [ 42 ]. Other key predictors, such as knee malalignment, prior knee injury, age, and BMI, are well-documented TKR risk factors [ 43 , 44 ]. Our results reinforce their continued importance, even when genetic and nutritional data are included. As a secondary analysis, we examined whether genetic information adds value beyond clinical predictors. To do this, we constructed a TKR-related PRS derived from one of the largest GWAS meta-analysis in OA [ 13 ], being the first study to replicate this score in the OAI cohort. The predictive performance of PRS alone was modest (AUC < 0.6) (data not shown), consistent with prior reports [ 17 , 18 ]. This limited performance likely reflects the multifactorial nature of TKR risk, where clinical and lifestyle factors may overshadow genetic contributions, particularly in older individuals [ 45 ]. In this sense, in the pre-radiographic/early stratum (baseline KL 0–1), the standardized PRS remained independently associated with TKR risk and was among the very few variables maintaining statistical significance, whereas many clinical predictors weakened. This pattern is biologically plausible since genetic signal may be relatively more informative before structural disease and symptom burden fully emerge; yet it must be interpreted cautiously given the small number of events (16 TKRs) in this stratum. Interestingly, among PRS variants, rs34195470 ( WWP2 ) and rs34616559 ( SOX5 ) stood out. Both genes are OA-related and considered promising therapeutic targets. WWP2 is the host gene of the microRNA-140 ( miR140 ) involved in TGF- β signaling and cartilage maintenance [ 46 , 47 ]. In addition to its association with TKR in the base GWAS used in this study [ 13 ], this SNP was also previously associated with knee OA risk in a large GWAS meta-analysis [ 48 ]. On the other hand, SOX5 is a transcription factor crucial for cartilage development in terms of chondrogenesis and the maintenance of cartilage homeostasis [ 49 ], along with SOX6 and SOX9 , and its decreased expression is linked to advanced OA [ 50 ]. This study has some limitations. The lack of external validation restricts generalizability, as few cohorts combine deep genetic, nutritional, and clinical data. To strengthen internal reliability, we used a nested 5×5-fold cross-validation for feature selection, 10-fold cross-validation with SMOTE for hyperparameter tuning, and repeated 5-fold cross-validation (5 repeats) to stabilize AUC estimates, all within the training set before a final hold-out evaluation. In addition, a notable strength of our study is the dual use of linear (Elastic Net) and nonlinear (XGBoost) ML approaches, which revealed complementary patterns: Elastic Net emphasized variables with linear effects (e.g., PRS), while XGBoost captured complex, nonlinear relationships. In addition, a key strength of our work is adherence to the TRIPOD+AI reporting guidelines, which promote transparency and reproducibility in prediction model studies. While not all items are applicable or fully addressed in our context, we provide a detailed checklist in Supplementary Table 2 . In summary, we used ML to develop a practical, non-imaging model that predicts TKR with clinically acceptable performance. Adding the PRS modestly improved specificity, useful for reducing false positives and refining stratification, while having minimal impact on AUC, so its standalone clinical utility remains uncertain. Looking forward, as larger sequencing efforts yield more informative PRS, genetic risk could help triage early OA (KL 0–1) toward targeted MRI precisely where clinical signals are weak and imaging can be inconclusive. We deliberately excluded imaging here because MRI, though informative, is costly and not routinely available, and KL grade, often the strongest imaging-derived predictor, mainly reflects advanced disease (KL 3–4), limiting its relevance in early-stage OA (KL 0–1) [ 51 ]. A tiered pathway may enhance cost-efficiency and enable earlier intervention, as others have proposed [ 22 , 52 ]. On the other hand, the results of this study also show the impact of a series of factors, some of which are either preventable (blood pressure, diet, previous injury) or indicative of personalized care for OA patients (knee alignment, PRS, genetic interaction). Conclusions Our study shows that a ML–guided, non-imaging approach can produce a practical and transparent model for predicting TKR risk in knee OA. This framework advances precision medicine by turning routinely collected clinical and lifestyle data, augmented where appropriate by genetics, into individualized risk estimates that can guide prevention, targeted imaging, and referral pathways. Such models offer a scalable route to more timely, patient-specific decisions. Next steps are rigorous external validation and implementation research to ensure reliable performance and equitable impact across settings. Data Availability Data are available upon reasonable request Authoŕs contribution FJB and IRP contributed equally in the design and coordination of the study; both conceived the study, participated in its design and helped to draft the final version of the manuscript; NO coordinated the access to the biological samples and the clinical information used in this study; VBB and IRV contributed equally in the statistical analysis of data; NV and MCA helped provide the data on the Mediterranean diet; JVG and AMT helped in the analysis of the GWAS data for PRS calculation; SR performed the genetic assignments of mtDNA haplogroups and the TP63 genotypes. Funding This study has been funded by Instituto de Salud Carlos III (ISCIII) through the projects RD21/0002/0009, RD24/0007/0026, PMP22/00101, PMPTA22/00115, PI17/00210, PI22/01165, PI22/01155 and PI23/00913 and co-founded by the European Union. This work was also funded by grants IN607A 2021/07 and IN607D 2021/13 from Axencia Galega de Innovación-Xunta de Galicia. IRP is supported by Contrato Miguel Servet-II Fondo de Investigación Sanitaria (CPII17/00026) SERGAS-stabilized. JVG is supported by grant IN606A 2022/048 from Xunta de Galicia, Spain. Competing interests We declare no competing interests Ethical approval information This study has received the favorable opinion of the ethics committee of XUNTA de Galicia, registration number 2024/074 Data sharing statement Data are available upon reasonable request Patient and Public Involvement To the best of our knowledge, patients OAI cohort are informed about the goal of their enrollment in these prospective studies. The entity responsible of this cohort will be the first to receive a copy of this manuscript once it is published. In addition, all the clinical centers involved in the recruitment of the patients have made provisions to ensure the safety, confidentiality and ethical treatment of study participants according to the Declaration of Helsinki. In this sense, all the participants signed an informed consent Declaration of generative AI and AI-assisted technologies in the writing process During the preparation of this work the author(s) used ChatGPT in order to adjust the different R scripts. After using this tool, the author(s) reviewed and edited the content as needed and take(s) full responsibility for the content of the publication. Supplementary Figure 1. Decil-based calibration plots to compare the models. Predictions were split into ten equal groups; for each, we plotted the average predicted probability against the observed event rate, adding binomial-test confidence bounds and the 45° reference line. A) Complete model; B) Complete model without PRS; C) Complete model for KL 0-1 stratum; D) Complete model for KL ≥2 stratum. Supplementary Figure 2. p-values of the McNemar test to compare the specificity values between the clinical model and the complete model with PRS. Supplementary Figure 3. Confusion matrices for a fixed sensitivity of 0.75 for the clinical model and the complete model with PRS. Acknowledgements The Osteoarthritis Initiative (OAI) is a public-private partnership comprised of five contracts (N01-AR-2-2258; N01-AR-2-2259; N01-AR-2-2260; N01-AR-2-2261; N01-AR-2-2262) funded by the National Institutes of Health, a branch of the Department of Health and Human Services, and conducted by the OAI Study Investigators. Private funding partners include Pfizer, Inc.; Novartis Pharmaceuticals Corporation; Merck Research Laboratories; and GlaxoSmithKline. Private sector funding for the OAI is managed by the Foundation for the National Institutes of Health. This manuscript was prepared using an OAI public use data set and does not necessarily reflect the opinions or views of the OAI investigators, the NIH, or the private funding partners. Footnotes E-mails (and ORCID iD) and academic degrees of the co-authors: Natividad Oreiro (MD, PhD): natividad.oreiro.villar{at}sergas.es , Jorge Vázquez-García (MSc): jorge.vazquez.garcia{at}sergas.es , Antonio Morano-Torres (MSc): antonio.morano.torres{at}sergas.es , Vanesa Balboa-Barreiro (MSc): vanesa.balboa.barreiro{at}sergas.es , Isabel Rodríguez-Valle (MSc): irgzvalle{at}gmail.com Sara Relaño: sara.ma.relano.fernandez{at}sergas.es , Nicola Veronese (MD): nicola.veronese{at}unipa.it , María C. de Andrés (PhD): ma.carmen.de.andres.gonzalez{at}sergas.es Abbreviations AUC Area Under the Curve aMED Alternate Mediterranean Diet Score BMI Body Mass Index CI Confidence Interval dbGAP Database of Genotypes and Phenotypes GBM Gradient Boosting Machine GeCKO Genetic Components of Knee Osteoarthritis glmnet Generalized Linear Model Net GWAS Genome-Wide Association Study KL Kellgren & Lawrence MAF Minor Allele Frequency MD Mediterranean Diet ML Machine Learning MRI Magnetic Resonance Imaging mtDNA Mitochondrial Deoxyribonucleic Acid NSAIDs Non-Steroidal Anti-Inflammatory Drugs OA Osteoarthritis OAI Osteoarthritis Initiative OR Odds Ratio PCA Principal Component Analysis PRS Polygenic Risk Score QC Quality Control RF Random Forest ROC Receiver Operating Characteristic SHAP SHapley Additive exPlanations SNP Single Nucleotide Polymorphism SMOTE Synthetic Minority Oversampling Technique SVM Support Vector Machine TKR Total Knee Replacement VCF Variant Call Format WOMAC Western Ontario and McMaster Universities Osteoarthritis Index XGBoost Extreme Gradient Boosting References 1. ↵ Hunter DJ , March L , Chew M. Osteoarthritis in 2020 and beyond: a Lancet Commission . Lancet . 2020 Nov 28; 396 ( 10264 ): 1711 – 1712 . OpenUrl CrossRef PubMed 2. ↵ Seoane-Mato D , Sanchez-Piedra C , Silva-Fernandez L , Sivera F , Blanco FJ , Perez Ruiz F , et al. Prevalence of rheumatic diseases in adult population in Spain (EPISER 2016 study): Aims and methodology . Reumatol Clin . 2019 Mar - Apr; 15 ( 2 ): 90 – 96 . OpenUrl PubMed 3. Hunter DJ , Bierma-Zeinstra S . Osteoarthritis . Lancet . 2019 Apr 27; 393 ( 10182 ): 1745 – 1759 . OpenUrl CrossRef PubMed 4. ↵ Sharma L . Osteoarthritis of the Knee . N Engl J Med . 2021 Jan 07; 384 ( 1 ): 51 – 59 . OpenUrl CrossRef PubMed 5. ↵ Gelber AC . Knee Osteoarthritis . Ann Intern Med . 2024 Sep ; 177 ( 9 ):ITC129-ITC144. 6. ↵ Zhou ZY , Liu YK , Chen HL , Liu F . Body mass index and knee osteoarthritis risk: a dose-response meta-analysis . Obesity (Silver Spring ). 2014 Oct ; 22 ( 10 ): 2180 – 2185 . OpenUrl PubMed 7. ↵ Felson D , Niu J , Sack B , Aliabadi P , McCullough C , Nevitt MC . Progression of osteoarthritis as a state of inertia . Ann Rheum Dis . 2013 Jun ; 72 ( 6 ): 924 – 929 . OpenUrl Abstract / FREE Full Text 8. ↵ Culliford D , Maskell J , Judge A , Cooper C , Prieto-Alhambra D , Arden NK , et al. Future projections of total hip and knee arthroplasty in the UK: results from the UK Clinical Practice Research Datalink . Osteoarthritis Cartilage . 2015 Apr ; 23 ( 4 ): 594 – 600 . OpenUrl CrossRef PubMed 9. Ackerman IN , Bohensky MA , Zomer E , Tacey M , Gorelik A , Brand CA , et al. The projected burden of primary total knee and hip replacement for osteoarthritis in Australia to the year 2030 . BMC Musculoskelet Disord . 2019 Feb 23; 20 ( 1 ): 90 . OpenUrl CrossRef PubMed 10. ↵ Liu Q , Chu H , LaValley MP , Hunter DJ , Zhang H , Tao L , et al. Prediction models for the risk of total knee replacement: development and validation using data from multicentre cohort studies . Lancet Rheumatol . 2022 Feb ; 4 ( 2 ): e125 – e134 . OpenUrl 11. ↵ Luo D , Fan Z , Yin W . Chronic post-surgical pain after total knee arthroplasty: a narrative review . Perioper Med (Lond ). 2024 Nov 05; 13 ( 1 ): 108 . OpenUrl PubMed 12. ↵ Spector TD , MacGregor AJ . Risk factors for osteoarthritis: genetics . Osteoarthritis Cartilage . 2004 ; 12 Suppl A:S39-44. 13. ↵ Boer CG , Hatzikotoulas K , Southam L , Stefánsdóttir L , Zhang Y , Coutinho de Almeida R , et al. Deciphering osteoarthritis genetics across 826,690 individuals from 9 populations . Cell . 2021 Nov 24; 184 ( 24 ): 6003 – 6005 . OpenUrl PubMed 14. ↵ Hatzikotoulas K , Southam L , Stefansdottir L , Boer CG , McDonald ML , Pett JP , et al. Translational genomics of osteoarthritis in 1,962,069 individuals . Nature . 2025 May ; 641 (8065): 1217 – 1224 . OpenUrl CrossRef PubMed 15. ↵ Torkamani A , Wineinger NE , Topol EJ . The personal and clinical utility of polygenic risk scores . Nat Rev Genet . 2018 Sep ; 19 ( 9 ): 581 – 590 . OpenUrl CrossRef PubMed 16. ↵ Lacaze P , Wang Y , Polekhina G , Bakshi A , Riaz M , Owen A , et al. Genomic Risk Score for Advanced Osteoarthritis in Older Adults . Arthritis Rheumatol . 2022 Sep ; 74 ( 9 ): 1480 – 1487 . OpenUrl PubMed 17. ↵ Sedaghati-Khayat B , Boer CG , Runhaar J , Bierma-Zeinstra SMA , Broer L , Ikram MA , et al. Risk Assessment for Hip and Knee Osteoarthritis Using Polygenic Risk Scores . Arthritis Rheumatol . 2022 Sep ; 74 ( 9 ): 1488 – 1496 . OpenUrl PubMed 18. ↵ Morita Y , Kamatani Y , Ito H , Ikegawa S , Kawaguchi T , Kawaguchi S , et al. Improved genetic prediction of the risk of knee osteoarthritis using the risk factor-based polygenic score . Arthritis Res Ther . 2023 Jun 12; 25 ( 1 ): 103 . OpenUrl PubMed 19. ↵ Bonakdari H , Pelletier JP , Abram F , Martel-Pelletier J . A Machine Learning Model to Predict Knee Osteoarthritis Cartilage Volume Changes over Time Using Baseline Bone Curvature . Biomedicines . 2022 May 26; 10 ( 6 ). 20. Bonakdari H , Pelletier JP , Blanco FJ , Rego-Pérez I , Durán-Sotuela A , Aitken D , et al. Single nucleotide polymorphism genes and mitochondrial DNA haplogroups as biomarkers for early prediction of knee osteoarthritis structural progressors: use of supervised machine learning classifiers . BMC Med . 2022 Sep 12; 20 ( 1 ): 316 . OpenUrl PubMed 21. ↵ Castagno S , Gompels B , Strangmark E , Robertson-Waters E , Birch M , van der Schaar M , et al. Understanding the role of machine learning in predicting progression of osteoarthritis . Bone Joint J . 2024 Nov 01; 106 -B(11): 1216 – 1222 . OpenUrl PubMed 22. ↵ Jamshidi A , Pelletier JP , Labbe A , Abram F , Martel-Pelletier J , Droit A . Machine Learning-Based Individualized Survival Prediction Model for Total Knee Replacement in Osteoarthritis: Data From the Osteoarthritis Initiative . Arthritis Care Res (Hoboken ). 2021 Oct ; 73 ( 10 ): 1518 – 1527 . OpenUrl PubMed 23. ↵ Mahmoud K , Alagha MA , Nowinka Z , Jones G . Predicting total knee replacement at 2 and 5 years in osteoarthritis patients using machine learning . BMJ Surg Interv Health Technol . 2023 ; 5 ( 1 ): e000141 . OpenUrl 24. ↵ Li HHT , Chan LC , Chan PK , Wen C . An interpretable knee replacement risk assessment system for osteoarthritis patients . Osteoarthr Cartil Open . 2024 Jun ; 6 ( 2 ): 100440 . OpenUrl PubMed 25. ↵ Wesseling J , Dekker J , van den Berg WB , Bierma-Zeinstra SM , Boers M , Cats HA , et al. CHECK (Cohort Hip and Cohort Knee): similarities and differences with the Osteoarthritis Initiative . Ann Rheum Dis . 2009 Sep ; 68 ( 9 ): 1413 – 1419 . OpenUrl Abstract / FREE Full Text 26. ↵ Collins GS , Moons KGM , Dhiman P , Riley RD , Beam AL , Van Calster B , et al. TRIPOD+AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods . BMJ . 2024 Apr 16; 385 : e078378 . OpenUrl FREE Full Text 27. ↵ Hawker GA , Guan J , Croxford R , Coyte PC , Glazier RH , Harvey BJ , et al. A prospective population-based study of the predictors of undergoing total joint arthroplasty . Arthritis Rheum . 2006 Oct ; 54 ( 10 ): 3212 – 3220 . OpenUrl CrossRef PubMed Web of Science 28. ↵ Durán-Sotuela A , Oreiro N , Fernández-Moreno M , Vázquez-García J , Relaño-Fernández S , Balboa-Barreiro V , et al. Mitonuclear epistasis involving TP63 and haplogroup Uk: Risk of rapid progression of knee OA in patients from the OAI . Osteoarthritis Cartilage . 2024 May ; 32 ( 5 ): 526 – 534 . OpenUrl PubMed 29. ↵ Veronese N , Stubbs B , Noale M , Solmi M , Luchini C , Smith TO , et al. Adherence to a Mediterranean diet is associated with lower prevalence of osteoarthritis: Data from the osteoarthritis initiative . Clin Nutr . 2017 Dec ; 36 ( 6 ): 1609 – 1614 . OpenUrl PubMed 30. ↵ Panagiotakos DB , Pitsavos C , Stefanadis C . Dietary patterns: a Mediterranean diet score and its relation to clinical and biological markers of cardiovascular disease risk . Nutr Metab Cardiovasc Dis . 2006 Dec ; 16 ( 8 ): 559 – 568 . OpenUrl CrossRef PubMed Web of Science 31. ↵ Yerges-Armstrong LM , Yau MS , Liu Y , Krishnan S , Renner JB , Eaton CB , et al. Association analysis of BMD-associated SNPs with knee osteoarthritis . J Bone Miner Res . 2014 Jun ; 29 ( 6 ): 1373 – 1379 . OpenUrl CrossRef PubMed 32. ↵ Zhan X , Hu Y , Li B , Abecasis GR , Liu DJ . RVTESTS: an efficient and comprehensive tool for rare variant association analysis using sequence data . Bioinformatics . 2016 May 01; 32 ( 9 ): 1423 – 1426 . OpenUrl CrossRef PubMed 33. ↵ Hosmer Jr D , Lemeshow S R. S. , Applied logistic regression. USA: John Wiley & Sons, Inc 2013 . 34. ↵ Veronese N , La Tegola L , Crepaldi G , Maggi S , Rogoli D , Guglielmi G . The association between the Mediterranean diet and magnetic resonance parameters for knee osteoarthritis: data from the Osteoarthritis Initiative . Clin Rheumatol . 2018 Aug ; 37 ( 8 ): 2187 – 2193 . OpenUrl PubMed 35. ↵ Veronese N , Koyanagi A , Stubbs B , Cooper C , Guglielmi G , Rizzoli R , et al. Mediterranean diet and knee osteoarthritis outcomes: A longitudinal cohort study . Clin Nutr . 2019 Dec ; 38 ( 6 ): 2735 – 2739 . OpenUrl PubMed 36. ↵ Veronese N , Ragusa FS , Dominguez LJ , Cusumano C , Barbagallo M . Mediterranean diet and osteoarthritis: an update . Aging Clin Exp Res . 2024 Dec 03; 36 ( 1 ): 231 . OpenUrl PubMed 37. ↵ Niu J , Clancy M , Aliabadi P , Vasan R , Felson DT. Metabolic Syndrome, Its Components, and Knee Osteoarthritis: The Framingham Osteoarthritis Study . Arthritis Rheumatol . 2017 Jun ; 69 ( 6 ): 1194 – 1203 . OpenUrl PubMed 38. ↵ Monira Hussain S , Wang Y , Cicuttini FM , Simpson JA , Giles GG , Graves S , et al. Incidence of total knee and hip replacement for osteoarthritis in relation to the metabolic syndrome and its components: a prospective cohort study . Semin Arthritis Rheum . 2014 Feb ; 43 ( 4 ): 429 – 436 . OpenUrl CrossRef PubMed 39. ↵ Dougados M . The role of anti-inflammatory drugs in the treatment of osteoarthritis: a European viewpoint . Clin Exp Rheumatol . 2001 ; 19 ( 6 Suppl 25): S9 – 14 . OpenUrl PubMed Web of Science 40. ↵ Birrell F , Afzal C , Nahit E , Lunt M , Macfarlane GJ , Cooper C , et al. Predictors of hip joint replacement in new attenders in primary care with hip pain . Br J Gen Pract . 2003 Jan ; 53 ( 486 ): 26 – 30 . OpenUrl Abstract / FREE Full Text 41. ↵ Gossec L , Tubach F , Baron G , Ravaud P , Logeart I , Dougados M . Predictive factors of total hip replacement due to primary osteoarthritis: a prospective 2 year study of 505 patients . Ann Rheum Dis . 2005 Jul ; 64 ( 7 ): 1028 – 1032 . OpenUrl Abstract / FREE Full Text 42. ↵ Driban JB , Harkey MS , Barbe MF , Ward RJ , MacKay JW , Davis JE , et al. Risk factors and the natural history of accelerated knee osteoarthritis: a narrative review . BMC Musculoskelet Disord . 2020 May 29; 21 ( 1 ): 332 . OpenUrl PubMed 43. ↵ Turcotte JJ , Kelly ME , Aja JM , King PJ , MacDonald JH . Risk factors for progression to total knee arthroplasty within two years of presentation for knee osteoarthritis . J Clin Orthop Trauma . 2021 May ; 16 : 257 – 263 . OpenUrl PubMed 44. ↵ Pelletier JP , Dorais M , Paiement P , Raynauld JP , Martel-Pelletier J . Risk factors associated with the occurrence of total knee arthroplasty in patients with knee osteoarthritis: a nested case-control study . Ther Adv Musculoskelet Dis . 2022 ; 14 : 1759720X221091359 . OpenUrl PubMed 45. ↵ Kulm S , Kolin DA , Langhans MT , Kaidi AC , Elemento O , Bostrom MP , et al. Characterization of Genetic Risk of End-Stage Knee Osteoarthritis Treated with Total Knee Arthroplasty: A Genome-Wide Association Study . J Bone Joint Surg Am . 2022 Oct 19; 104 ( 20 ): 1814 – 1820 . OpenUrl PubMed 46. ↵ Mokuda S , Nakamichi R , Matsuzaki T , Ito Y , Sato T , Miyata K , et al. Wwp2 maintains cartilage homeostasis through regulation of Adamts5 . Nat Commun . 2019 Jun 03; 10 ( 1 ): 2429 . OpenUrl CrossRef PubMed 47. ↵ Roberts JB , Rice SJ . Osteoarthritis as an Enhanceropathy: Gene Regulation in Complex Musculoskeletal Disease . Curr Rheumatol Rep . 2024 Jun ; 26 ( 6 ): 222 – 234 . OpenUrl PubMed 48. ↵ Styrkarsdottir U , Lund SH , Thorleifsson G , Zink F , Stefansson OA , Sigurdsson JK , et al. Meta-analysis of Icelandic and UK data sets identifies missense variants in SMO, IL11, COL11A1 and 13 more new loci associated with osteoarthritis . Nat Genet . 2018 Dec; 50 ( 12 ): 1681 – 1687 . OpenUrl CrossRef PubMed 49. ↵ Huang Y , Wang Z . Therapeutic potential of SOX family transcription factors in osteoarthritis . Ann Med . 2025 Dec ; 57 ( 1 ): 2457520 . OpenUrl PubMed 50. ↵ Lee JS , Im GI . SOX trio decrease in the articular cartilage with the advancement of osteoarthritis . Connect Tissue Res . 2011 ; 52 ( 6 ): 496 – 502 . OpenUrl CrossRef PubMed 51. ↵ Guermazi A , Hayashi D , Roemer F , Felson DT , Wang K , Lynch J , et al. Severe radiographic knee osteoarthritis--does Kellgren and Lawrence grade 4 represent end stage disease?--the MOST study . Osteoarthritis Cartilage . 2015 Sep ; 23 ( 9 ): 1499 – 1505 . OpenUrl CrossRef PubMed 52. ↵ Rajamohan HR , Wang T , Leung K , Chang G , Cho K , Kijowski R , et al. Prediction of total knee replacement using deep learning analysis of knee MRI . Sci Rep . 2023 Apr 28; 13 ( 1 ): 6922 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted October 29, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Predicting Total Knee Replacement in Knee Osteoarthritis Using a Machine-Learning–Guided Approach in patients of the Osteoarthritis Initiative (OAI) Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Predicting Total Knee Replacement in Knee Osteoarthritis Using a Machine-Learning–Guided Approach in patients of the Osteoarthritis Initiative (OAI) Francisco J. Blanco , Natividad Oreiro , Jorge Vázquez-García , Antonio Morano-Torres , Vanesa Balboa-Barreiro , Isabel Rodríguez-Valle , Sara Relaño , Nicola Veronese , María C. de Andrés , Ignacio Rego-Pérez medRxiv 2025.10.28.25338966; doi: https://doi.org/10.1101/2025.10.28.25338966 Share This Article: Copy Citation Tools Predicting Total Knee Replacement in Knee Osteoarthritis Using a Machine-Learning–Guided Approach in patients of the Osteoarthritis Initiative (OAI) Francisco J. Blanco , Natividad Oreiro , Jorge Vázquez-García , Antonio Morano-Torres , Vanesa Balboa-Barreiro , Isabel Rodríguez-Valle , Sara Relaño , Nicola Veronese , María C. de Andrés , Ignacio Rego-Pérez medRxiv 2025.10.28.25338966; doi: https://doi.org/10.1101/2025.10.28.25338966 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Rheumatology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (971) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9220) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffe25b6fd0806f7',t:'MTc3OTQ3NzMwMg=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-06-16T06:25:30.133384+00:00