Machine Learning Fairness in Predicting Underweight, Overweight and Adiposity Across Socioeconomic and Caste Group in India: Evidence from the Longitudinal Ageing Study in India

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 52,561 characters · extracted from preprint-html · click to expand
Machine Learning Fairness in Predicting Underweight, Overweight and Adiposity Across Socioeconomic and Caste Group in India: Evidence from the Longitudinal Ageing Study in India | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Machine Learning Fairness in Predicting Underweight, Overweight and Adiposity Across Socioeconomic and Caste Group in India: Evidence from the Longitudinal Ageing Study in India John Tayu Lee , Sheng Hui Hsu , Vincent Cheng-Sheng Li , Kanya Anindya , View ORCID Profile Meng-Huan Chen , Charlotte Wang , Toby Kai-Bo Shen , Valerie Tzu-Ning Liu , Hsiao-Hui Chen , Rifat Atun doi: https://doi.org/10.1101/2025.07.06.25330978 John Tayu Lee 1 Department of Global Health and Population, Harvard T.H. Chan School of Public Health, Harvard University , Boston, Massach us etts, USA 2 Master Program in Statistics of National Taiwan University , Taiwan 3 Institute of Health Policy and Management, College of Public Health, National Taiwan University , Taiwan Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: johntayulee{at}ntu.edu.tw Sheng Hui Hsu 2 Master Program in Statistics of National Taiwan University , Taiwan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Vincent Cheng-Sheng Li 3 Institute of Health Policy and Management, College of Public Health, National Taiwan University , Taiwan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kanya Anindya 4 School of Public Health and Community Medicine, University of Gothenburg Find this author on Google Scholar Find this author on PubMed Search for this author on this site Meng-Huan Chen 3 Institute of Health Policy and Management, College of Public Health, National Taiwan University , Taiwan Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Meng-Huan Chen Charlotte Wang 5 Institute of Health Data Analytics and Statistics, College of Public Health, National Taiwan University , No. 17, Xu-Zhou Road, Taipei, 100025, Taiwan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Toby Kai-Bo Shen 3 Institute of Health Policy and Management, College of Public Health, National Taiwan University , Taiwan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Valerie Tzu-Ning Liu 3 Institute of Health Policy and Management, College of Public Health, National Taiwan University , Taiwan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hsiao-Hui Chen 3 Institute of Health Policy and Management, College of Public Health, National Taiwan University , Taiwan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rifat Atun 1 Department of Global Health and Population, Harvard T.H. Chan School of Public Health, Harvard University , Boston, Massach us etts, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Data/Code Preview PDF ABSTRACT Background Machine learning (ML) models are widely used to predict body mass index (BMI), yet their fairness across socioeconomic and caste groups remains uncertain, especially in countries with structure inequalities. This study evaluated the accuracy and fairness of ML models in predicting underweight, overweight, and central adiposity, examined the impact of socioeconomic and household factors, identified key predictive features, and assessed the effect of bias mitigation techniques on model performance. Methods This study analysed data from the nationally representative Longitudinal Ageing Study in India (LASI) with over 55,000 individuals aged 45 and older. We applied ML models (Random Forest, XGBoost, Gradient Boosting, LightGBM, DNN, DCN) alongside logistic regression. Model were trained (80%) and tested (20%), evaluated using AUROC, accuracy, sensitivity, specificity, and precision. Fairness assessment included subgroup analyses across socioeconomic and caste groups, equity-based fairness (e.g. Equalized Odds, Demographic Parity). Feature importance was examined using SHAP values. Bias mitigation techniques were applied at three stages: pre-processing (Disparate Impact Remover, Reweighting), in-processing (Exponential Gradient Reduction), and post- processing (Calibrated Equalized Odds, Reject Option Classification). Prediction density analysis assessed class separability across subgroup. Results Tree-based models—especially LightGBM and Gradient Boosting—along with Logistic Regression, consistently delivered the highest AUROC scores in predicting underweight, overweight, and high waist circumference outcomes (AUROC= 0.79-0.84). Incorporating socioeconomic and health-related variables gradually enhanced model performance; for example, the AUROC for underweight prediction increased from 0.74 to 0.78. However, our analysis revealed notable fairness issues: models performed worse for scheduled tribes and lower socioeconomic groups, as evidenced by reduced sensitivity and specificity in these subgroups. Feature importance analysis using SHAP values indicated that variables such as grip strength, gender, and residence were the key drivers of prediction differences; specifically, lower grip strength and rural residence were linked to underweight, whereas higher grip strength, urban residence, and female gender were associated with overweight and central adiposity. Regarding bias mitigation, techniques like Reject Option Classification and Equalized Odds Postprocessing showed some potential for reducing subgroup disparities by aligning the performance of low- and high-performing groups. Nevertheless, these adjustments sometimes came with trade-offs, and other methods—such as Exponentiated Gradient Reduction and Adversarial Debiasing—resulted in substantial declines in overall performance. While approaches like Disparate Impact Remover, Reweighting, and the Stratified Subgroup Best Model produced only modest changes relative to the unmitigated model, our findings highlight persistent fairness challenges. Conclusions ML models can effectively predict obesity and adiposity risks in India, but addressing biases is critical for equitable application. There are needs to further refinement of fairness awareness ML approaches in public health, particularly in the context of India’s diverse population for more inclusive and effective policy decisions. AUTHOR SUMMARY India now faces the paradox of widespread under-nutrition alongside a rising tide of obesity among its older population. We asked whether state-of-the-art machine-learning models could accurately identify individuals at highest risk of under-weight, overweight–obesity, and central adiposity while treating all social groups equitably. Using nationally representative data on more than 55,000 adults aged 45 years and above, we compared gradient-boosted decision trees, random forests, logistic regression, and other approaches with conventional regression techniques. Overall, the modern algorithms produced the strongest predictions. Yet a closer look revealed systematic shortfalls for scheduled tribes, scheduled castes, and the lowest income quintile—even when the models achieved excellent accuracy in the population as a whole. We then applied several well-established bias-mitigation strategies, such as re-weighting the training data and post-processing the decision thresholds. These interventions reduced the performance gap for disadvantaged groups, albeit at a modest cost to overall accuracy. By combining careful fairness audits with Shapley-based interpretation of feature importance, we illuminate how socioeconomic and caste-related factors shape both nutritional risk and prediction error. Our findings underscore that fair, trustworthy decision support systems in public health must be designed explicitly with equity objectives, rather than assuming that technical excellence alone will guarantee just outcomes. INTRODUCTION India, the most populous country in the world, faces significant health disparities across its diverse populations and geographic regions 1 , 2 . The prevalent issues of underweight, overweight, obesity, and adiposity are major contributors to these health inequalities and closed linked to country’s rapid demographic and epidemiological transitions 3 – 5 . These conditions range from malnutrition to overnutrition and mirror shifts in lifestyle, economic development, and societal changes throughout the nation 1 , 6 , 7 . This complex nutritional landscape imposes a significant burden of premature mortality and hinders progress toward multiple Sustainable Development Goals (SDGs), including those targeting good health and well-being (SDG 3), zero hunger (SDG 2), and reduced inequalities (SDG 10), as well as other interconnected goals such as quality education (SDG 4) 8 . Early and accurate prediction of underweight, overweight, obesity, and adiposity is crucial for the effective implementation of public health interventions 9 . Extensive research has shown that the factors influencing these conditions vary significantly across India’s diverse populations 10 – 12 . This variability poses a challenge for traditional epidemiological methods, which often struggle to capture the complex interactions among sociodemographic, household, environmental, and lifestyle factors in detail. Machine learning approaches offer new solution with its advanced algorithms capable of adapting to and analyzing datasets with a wide range of interconnected variables. These methods not only improve our ability to characterize and predict body mass index (BMI) 13 and anthropometric outcomes 14 but do so with exceptional precision. Moreover, in a variety of clinical domains, we can see that machine learning (ML) have led to remarkable improvements in predicting health outcomes—from diagnosis to risk stratification. For example, Rajkomar et al. (2018) 15 argued that ML systems have the potential to advance health equity if fairness is incorporated from design through deployment. Obermeyer et al. (2019) 16 demonstrated that widely used healthcare algorithms could exhibit substantial racial bias, leading to the under-identification of high-risk patients among marginalized groups. Despite these early insights, many subsequent studies have predominantly focused on overall predictive performance rather than rigorously assessing and mitigating bias across sensitive subgroups. A systematic review by Chen et al. (2024) 17 highlighted that although numerous methods exist to detect and reduce bias in machine learning models. It has been suggested that bias in machine learning models can arise from various sources, including the training data, the design of the algorithms, and the methods used to interpret data 15 , 17 . Such biases could potentially exacerbate existing disparities in health outcomes across different demographic groups, including variations across castes, and socioeconomic statuses 18 – 20 . This study aims to evaluate the fairness in machine learning algorithm in predicting these BMI and central adiposity in India adult population. Specifically, the objectives are to (1) assess the accuracy and fairness of ML models across different socioeconomic and caste groups, (2) compare ML performance with traditional regression methods, (3) examine the impact of incorporating socioeconomic and household variables on model accuracy and fairness, (4) identify key predictive features influencing NML and central adiposity, (5) investigate the extent and source of bias in ML predictions across socioeconomic and caste groups, and (6) evaluate the effectiveness of various bias mitigation techniques in improving fairness without compromising model performance. RESULTS Descriptive Summary The study included a total of 55,647 participants, of whom 46.67% were male and 53.33% were female. The participants’ ages ranged from 45 to 116 years, with a mean (SD) age of 59.4 (10.4) years. A demographic breakdown by caste indicated that 27.58% of participants belonged to the General category, 17.64% to the Scheduled Caste, 16.88% to the Scheduled Tribe, and 37.9% to Other Backward Classes. Another key variable, monthly per capita expenditure (MPCE), was categorized into five groups of approximately equal size: Lowest (20.28%), Lower Middle (20.33%), Middle (19.99%), Upper Middle (19.81%), and Highest (19.59%). In terms of health indicators, 18.5% (10,315 participants) were classified as underweight, 44.2% (24,570 participants) as overweight/obesity, and 45.9% (25,515 participants) exhibited a high waist circumference. Predictive Performance of Machine Learning Models The predictive performance of various machine learning models for identifying underweight, overweight/obesity, and high waist circumference among the Indian population is summarized in Supplementary Table 3 . Figure 1 presents the ROC curves and corresponding AUROC values for these models. Notably, the ROC curve visualization and AUROC in the plot are based on the overall model applied to the test data without bootstrap resampling. Download figure Open in new tab Figure 1. Receiver Operating Characteristic (ROC) Curve with Area Under the Curve (AUROC) for Model Performance Evaluation Underweight Prediction For underweight prediction, the AUROC values ranged from 0.72 to 0.80, with the highest performance observed in Logistic Regression, Gradient Boosting, and LightGBM, all achieving an AUROC of 0.80. While these models exhibited strong specificity, sensitivity remained consistently low across all groups. This discrepancy may be attributed to the imbalance in the data, as the prevalence of underweight cases was lower compared to other health outcomes. Additionally, models achieved higher accuracy for individuals with high MPCE compared to those with low MPCE; however, no notable difference was observed in AUROC. Overweight/Obesity Prediction For overweight/obesity prediction, the AUROC values ranged from 0.70 to 0.80, with LightGBM and Logistic Regression outperforming other models. However, the model’s performance varied across subgroups based on MPCE. In the lowest MPCE subgroup, LightGBM achieved a sensitivity of 0.37 (95% CI: 0.33–0.41) and a specificity of 0.92 (95% CI: 0.91–0.94), indicating limited ability to identify overweight/obesity cases despite strong non-case classification. Conversely, in the highest MPCE subgroup, sensitivity improved to 0.84 (95% CI: 0.82–0.86), but specificity dropped to 0.51 (95% CI: 0.47–0.54), highlighting a trade-off between case detection and non-case accuracy. High Waist Circumference Prediction For high waist circumference prediction, AUROC ranged from 0.74 to 0.84, with LightGBM, Gradient Boosting, and Logistic Regression again performing best. Similar to overweight and obesity prediction, the performance of LightGBM varied considerably across MPCE subgroups in terms of sensitivity and specificity, while accuracy remained relatively consistent. In the lowest MPCE subgroup, the model achieved a sensitivity of 0.52 (95% CI: 0.48–0.56) and a specificity of 0.89 (95% CI: 0.88–0.91). However, in the highest MPCE subgroup, sensitivity increased to 0.83 (95% CI: 0.80– 0.85), while specificity declined to 0.63 (95% CI: 0.60–0.66), reflecting a trade-off between improved case detection and reduced non-case classification accuracy. Model Assessment Across Outcomes Across all outcomes, Logistic Regression and LightGBM were identified as the most robust model overall, while neural network models consistently demonstrated lower performance. Due to its superior predictive capability across all outcomes and its ability to capture complex nonlinear relationships, LightGBM was selected as the primary model for further analysis. Prediction Density Analysis Figure 2 shows the density plots of predicted probabilities for the positive (blue) and negative (orange) classes, stratified by caste and MPCE. Well-separated curves—often appearing as distinct double peaks—signal strong discrimination (high sensitivity and specificity), whereas overlapping curves indicate poorer performance. Download figure Open in new tab Download figure Open in new tab Download figure Open in new tab Figure 2. Density Distribution of Model Predictions by Subgroups Stratified by Caste and Monthly Per Capita Expenditure (MPCE) Using LightGBM For underweight predictions, the positive and negative curves largely overlap across all subgroups, with only a slight left skew in the positive distribution, indicating weak discrimination. This is consistent with the lower sensitivity and specificity reported in Supplementary Table 2 for underweight predictions. In contrast, the overweight/obesity plots reveal more pronounced double peaks—especially in higher MPCE subgroups and among Other Backward Classes and General caste—though Scheduled Caste, Scheduled Tribe, and the lowest MPCE subgroup exhibit more overlap. A similar trend is observed for high waist circumference, where moderate to high MPCE subgroups show distinct double peaks, while the lowest MPCE subgroup displays considerable overlap; notably, the middle MPCE subgroup presents the clearest double peaks, suggesting the best balance between sensitivity and specificity. Taken together, these density plots confirm that LightGBM discriminates overweight/obesity and high waist circumference more effectively than underweight, although performance varies by caste and socioeconomic status, with the poorest separation observed in the lowest MPCE and marginalized caste groups. Incorporating Socioeconomic and Health Features Table 1 presents the performance metrics of the predictive models after incorporating both socioeconomic and health-related variables. For the underweight prediction model, the inclusion of these variables resulted in an increase in the AUROC from 0.74 to 0.75, and further to 0.78 after adding health-related variables. Similarly, for the overweight prediction, the AUROC increased from 0.75 to 0.76 with the addition of socioeconomic variables, and further to 0.78 after incorporating health-related variables. In the case of waist circumference, the AUROC improved from 0.80 to 0.81 with the inclusion of socioeconomic variables, and further to 0.82 with the addition of health-related data. These findings underscore the critical role of integrating socioeconomic and health-related factors to enhance the predictive performance of the models. View this table: View inline View popup Download powerpoint Table 1. Model Performance Metrics After Integrating Socioeconomic and Health-Related Variables Using LightGBM Feature Importance Analysis Figure 3 presents a summary plot of Shapley values for feature importance analysis. The most influential features were grip strength, gender, and residence, as indicated by the highest Shapley values. Additionally, hypertension, diabetes, and education level were also found to be significant contributors to the model predictions. These features played a crucial role in the overall performance of the predictive models. Download figure Open in new tab Figure 3. Feature Importance Analysis and Summary Plot of Shapley Values for Model Interpretability Focusing on the top three features, we observed that grip strength had a negative impact on underweight individuals and a positive impact on overweight individuals and those with high waist circumference. Residence in rural areas was associated with a higher likelihood of underweight, while urban residents were more likely to be overweight or have a high waist circumference. Lastly, males were more likely to be underweight, while females were more likely to be overweight, with females also showing a higher likelihood of having a high waist circumference. Bias mitigation in Socioeconomic and Caste Subgroup Figure 4 , Supplementary Table 4 , and Supplementary Table 5 compared multiple bias mitigation strategies for the three target outcomes—underweight ( Figure 4A 1/A2), overweight/obesity ( Figure 4B 1/B2), and high waist circumference ( Figure 4C 1/C2)—by presenting box plots for three key performance metrics (AUROC, sensitivity, and specificity) across caste and MPCE subgroups. In these plots, the grey boxes represent the original best-performing model (LightGBM), while the colored boxes correspond to various bias mitigation techniques. Narrower boxes indicate smaller variability among subgroups (i.e., less disparity), and higher median values reflect stronger overall performance. Notably, Reject Option Classification and Equalized Odds Postprocessing (as seen in Figures 4A 1 and 4A2) tend to “pull up” the low-performing subgroups and “pull down” the high- performing ones, which narrows the boxes and results in more uniform performance across caste and MPCE; however, this uniformity is sometimes achieved at the expense of lower median values, highlighting a trade-off between fairness and predictive power. In contrast, while Exponentiated Gradient Reduction and Adversarial Debiasing also reduce subgroup variability—especially in sensitivity—their median AUROC and sensitivity values drop substantially, indicating a more pronounced degradation in overall performance. Meanwhile, Disparate Impact Remover, Reweighting, and the Stratified Subgroup Best Model produce relatively modest changes, with box plots that remain close to the original grey distribution. Overall, Figure 4 underscores that although some methods can significantly narrow subgroup gaps—particularly for sensitivity and specificity— others introduce marked trade-offs in overall AUROC, demonstrating that different bias mitigation techniques vary widely in both their fairness benefits and their impact on predictive accuracy. Download figure Open in new tab Figure 4. Comparative Impact of Bias Mitigation Techniques on Subgroup Performance for Underweight, Overweight, and High Waist Circumference DISCUSSION This study is the first to evaluate the machine learning techniques for predicting underweight, overweight, obesity, and central adiposity within the Indian context, while also addressing biases that related to socioeconomic and demographic disparities. Our findings demonstrate that tree-based algorithms, particularly LightGBM and Gradient Boosting, outperform others in predicting anthropometric measures, including underweight, overweight, and waist measurements. Incorporating socioeconomic and health-related significantly enhanced model performance, highlighting their importance in improving model accuracy. Key predictors identified, such as grip strength, gender, residence, and hypertension, emphasize the role of socioeconomic and health variables in shaping anthropometric outcomes. Despite overall strong predictive capability, performance varied across demographic groups, with marginalized populations, such as lower socioeconomic groups and scheduled tribes, experiencing lower accuracy. These findings underscore the need to address biases in ML models to ensure fair and equitable health predictions across diverse demographic groups. Bias mitigation strategies, including Reject Option Remover and Equalized Odds Postprocessing, improved fairness, whereas Exponentiated Gradient Reduction and Adversarial Debiasing reduced disparities but at the cost of overall accuracy. Methods like Disparate Impact Remover, Reweighting, and the Stratified Subgroup Best Model yielded minimal improvements. Comparison with Existing Literature Our study advances research by using machine learning to predict nutritional outcomes— underweight, overweight, obesity, and adiposity—in a large, nationally representative sample of older Indian adults. While previous studies focused on obesity prediction in specialized datasets, such as using deep learning on environmental factors 25 or combining neuroimaging with metabolomics 26 , our work uniquely addresses nutritional risk in later life, where both under- and overnutrition coexist. Additionally, by incorporating culturally specific factors like socioeconomic status and caste, we provide a level of contextual nuance often overlooked in prior studies 27 , 28 , 29 . Our approach is further distinguished by its rigorous evaluation and mitigation of algorithmic bias. Whereas prior studies using adversarial training have focused on reducing bias in models predicting outcomes like foregone care or cost-based risk among clinical populations 30 and analyses of widely deployed risk prediction tools have revealed significant racial biases arising from label choice 16 our work systematically implements a suite of debiasing techniques—including Disparate Impact Remover, Exponentiated Gradient Reduction, Re-sampling/Re-weighting, and Adversarial Training—complemented by stratified modeling. These measures help ensure that predictive accuracy is maintained equitably across marginalized subgroups (e.g., females, scheduled tribes, lower socioeconomic strata). In addition, although obesity risk prediction models developed in Bangladesh using relatively small datasets have shown high accuracy via logistic regression 31 our study not only achieves competitive performance but does so while addressing fairness in a heterogeneous, underexplored older population. Collectively, our integrated framework of high predictive performance and robust fairness adjustment fills an important gap in the literature, offering actionable insights for equitable public health interventions in low- and middle-income settings. Policy Implications Our findings highlight the potential of ML models to inform national health policies by predicting underweight, obesity, and adiposity with high accuracy. Algorithms like LightGBM show promise in predicting health outcomes such as underweight, obesity, and adiposity by incorporating socioeconomic and demographic variables. Key predictive features—grip strength, gender, residence, education, and chronic conditions—enable targeted, population-specific interventions. However, ML models often struggle struggle with performance disparities in marginalized groups, necessitating fairness-enhancing techniques to ensure equitable predictions. Addressing these gaps requires fairness-enhancing techniques, such as the Disparate Impact Remover, to ensure consistent model accuracy across diverse groups 15 , 17 . This study has two main limitations. First, Our study did not use survey weights, as it is still unclear how to properly include them in machine learning studies 32 . Second, the use of self-reported data introduces potential biases, such as recall bias or social desirability bias, which could impact the accuracy of key variables. Future research should explore improved weighting methods and validate findings using additional datasets for broader generalizability. Conclusion ML models offer significant promise in predicting obesity and related conditions in India, but addressing biases in predictions is crucial for equitable health outcomes. Our study underscores the need for fairness-driven ML approaches in public health, particularly in the context of India’s diverse population, providing actionable insights for more inclusive and effective policy decisions. MATERIALS and METHODS Data This study uses a cross-sectional analysis of the Longitudinal Ageing Study in India (LASI) for the wave 2017-2018. LASI is a nationally representative survey of individuals aged 45 and above from all Indian states and union territories. The survey examines the health, social, and economic well-being of older adults. Harmonized with datasets like NHANES, ELSA, and CHARLS, LASI facilitates cross-population comparisons. Initially, the survey included 72,250 older adults and their spouses, enrolled through multistage probability sampling. After excluding 6,350 cases due to proxy interviews and participants younger than 45, the dataset was reduced to 64,867 individuals. Further data cleaning and processing to address incomplete records resulted in a final analytical sample of 55,647 participants. The dataset provides detailed demographic, socioeconomic, and health-related information, including anthropometric measures such as Body Mass Index (BMI) and waist circumference (WC), collected through direct physical examinations. These data are critical for developing and validating machine learning models to identify obesity and related health risks. Data collection involved computer- assisted personal interviews, molecular biomarkers (e.g., dried blood spots), and non-molecular measures at the individual, household, and community levels. Variables used in this study are detailed in Supplementary Table 2 . Measurements The primary outcome measures include BMI and WC categories based on Asian standards for the Indian population: (1) BMI categories: underweight (90 cm; Women: >80 cm. Pre-processing of Independent Variables The independent variables underwent systematic pre-processing to improve data quality and facilitate model interpretability. All pre-processing procedures, including handling missing data, variable encoding, standardization, and feature engineering, were documented and are accessible on GitHub ( https://github.com/johntayuleeHEPI/LASI-BMI.git ). More detail descriptions of all the variables can be found in Supplementary Table 2 . After feature derivation, categorical variables were encoded using one-hot encoding, and rows containing missing values were excluded from the dataset. Machine Learning Algorithms We employed multiple supervised machine learning (ML) models, including Random Forest, eXtreme Gradient Boosting (XGBoost), Gradient Boosting, Light Gradient Boosting Machine (LightGBM), Deep Neural Networks (DNN), and Fully Convolutional Networks (FCN). The dataset was divided into training (80%) and testing (20%) subsets, with the models trained on the training set and evaluated on the testing set. Features were standardized by calculating the z-score for each data point within the respective feature. Model Evaluation The evaluation of model performance was conducted using the receiver operating characteristic (ROC) curve, a graphical representation of the trade-off between true positive rates (TPR) and false positive rates (FPR) across varying classification thresholds. The area under the ROC curve (AUROC) served as the primary metric to quantify the models’ discriminative ability, indicating their effectiveness in distinguishing between classes. The model with the highest AUROC was selected for further in-depth analysis. To ensure a comprehensive evaluation, additional performance metrics, including accuracy, sensitivity, specificity, and precision, were computed. Notably, sensitivity and specificity were emphasized for their importance in identifying true positive and true negative cases relative to the ground truth. These metrics were further stratified and analyzed across subgroups to assess variations in model performance under different conditions. All performance and fairness metrics were estimated using 1,000 bootstrap iterations to derive confidence intervals (CIs), ensuring robust and reliable estimates of the models’ capabilities and fairness across subgroups. Prediction Density Analysis To evaluate the models’ discriminative power, prediction density analysis was conducted using density plots for positive and negative classes. These plots provide insights into the model’s sensitivity and specificity across subgroups. A left-skewed distribution for the positive class indicates high sensitivity, reflecting strong confidence in positive predictions, while a right-skewed distribution for the negative class signifies high specificity. In contrast, overlapping or flat distributions indicate poor class separation and limited predictive performance. Feature Importance Analysis Feature importance was analysed using impurity-based feature importance and SHAP (SHapley Additive exPlanations) values. Impurity-based importance, common in tree-based algorithms, measures the reduction in impurity achieved by each feature. SHAP values, grounded in game theory, provide a detailed attribution of each feature’s contribution to individual predictions. These methods enhanced model interpretability by identifying key factors influencing predictions. Additionally, features were categorized into three groups: demographic, socioeconomic, and health- related variables. Three models were constructed, each incorporating a different set of features: demographic-only, demographic + socioeconomic, and demographic + socioeconomic + health. This approach enabled the examination of how the inclusion of different feature categories impacted model performance and interpretability. Debias and fairness in Socioeconomic and Caste Subgroup To address the biases identified during the validation phase, we modified the training data to ensure balanced representation across all groups. The bias mitigation techniques were applied in three distinct stages: pre-processing, in-processing, and post-processing. In the pre-processing stage, we employed Disparate Impact Remover 21 , and Reweighting 22 methods to adjust the dataset. In the in-processing stage, Exponential Gradient Reduction 23 was applied to reduce bias during model training. For post-processing, we implemented fairness adjustments using Calibrated Equalized Odds Postprocessing 24 , Reject Option Classification, and Equalized Odds Postprocessing. These methods were implemented using IBM’s AI Fairness 360 (AIF360). Additionally, we incorporated a stratified machine learning model to address potential arithmetic bias. The performance of this model was compared across various fairness adjustments to evaluate their overall impact on predictive accuracy. Notably, most bias mitigation approaches in our study relied on LightGBM following data pre-processing, with the exceptions of the Prejudice Remover—which defaults to logistic regression—and Adversarial Debiasing, which employs a deep learning framework. This integrated analysis enabled us to systematically assess the trade-offs between fairness and overall model performance. Patient and Public Involvement Patients and/or the public were not involved in the design, conduct, reporting, or dissemination plans of this research. Ethics The study was approved by the NTU Ethical Review Board (NTU REC-No.: 202407HS010). Ethical approval for LASI data collection was granted by the Indian Council of Medical Research and the Institutional Review Board (IRB) at IIPS, Mumbai. Informed consent was obtained at household and individual levels. AUTHOR CONTRIBUTIONS JTL contributed the conception and design of this research, while SHH performed data extraction, model training, analytical methods, and results interpretation. KA assisted with data cleaning and extraction, and VCSL contributed to the literature review as well as revisions to the methods, results, and analysis code. MHC further supported the model training process and conducted the fairness analysis. Additionally, TKBS, VTL, and HHC collaboratively edited the manuscript, managed citations, and prepared the remaining sections, whereas RA and CW provided expert guidance on both the manuscript and the methodological design. Data Availability The data that support the findings of this study are publicly available from https://github.com/johntayuleeHEPI/LASI-BMI.git SUPPLIMENTARY FILES APPENDIX TABLE View this table: View inline View popup Download powerpoint Supplementary Table 1. Descriptive Statistics of Key Health Outcomes: Prevalence of Underweight, Overweight, and High Waist Circumference Based on BMI and Waist Measurements View this table: View inline View popup Supplementary Table 2. Definitions and Descriptions of Variables Used in the Analysis View this table: View inline View popup Supplementary Table 3. Comparison of Evaluation Metrics Across Different Machine Learning Models View this table: View inline View popup Supplementary Table 4. Performance of the Subgroup-Stratified models View this table: View inline View popup Download powerpoint Supplementary Table 5. Comparison of Fairness Metrics (Equalized odds) Among Different Bias Mitigation Methods REFERENCE 1. ↵ Balarajan Y , Selvaraj S , Subramanian SV . Health care and equity in India . Lancet 2011 ; 377 ( 9764 ): 505 – 15 . doi: 10.1016/s0140-6736(10)61894-6 [published Online First: 20110110] OpenUrl CrossRef PubMed Web of Science 2. ↵ Banerjee S , Roy Chowdhury I . Inequities in curative health-care utilization among the adult population (20-59 years) in India: A comparative analysis of NSS 71st (2014) and 75th (2017-18) rounds . PLoS One 2020 ; 15 ( 11 ): e0241994 . doi: 10.1371/journal.pone.0241994 [published Online First: 20201125] OpenUrl CrossRef PubMed 3. ↵ Khandelwal S , Reddy KS . Eliciting a policy response for the rising epidemic of overweight-obesity in India . Obes Rev 2013 ; 14 Suppl 2 : 114 – 25 . doi: 10.1111/obr.12097 OpenUrl CrossRef PubMed 4. Corsi DJ , Subramanian SV . Socioeconomic Gradients and Distribution of Diabetes, Hypertension, and Obesity in India . JAMA Netw Open 2019 ; 2 ( 4 ): e190411 . doi: 10.1001/jamanetworkopen.2019.0411 [published Online First: 20190405] OpenUrl CrossRef 5. ↵ Sengupta A , Angeli F , Syamala TS , et al. Overweight and obesity prevalence among Indian women by place of residence and socio-economic status: Contrasting patterns from ’underweight states’ and ’overweight states’ of India . Soc Sci Med 2015 ; 138 : 161 – 9 . doi: 10.1016/j.socscimed.2015.06.004 [published Online First: 20150609] OpenUrl CrossRef PubMed 6. ↵ Nguyen PH , Scott S , Headey D , et al. The double burden of malnutrition in India: Trends and inequalities (2006-2016) . PLoS One 2021 ; 16 ( 2 ): e0247856 . doi: 10.1371/journal.pone.0247856 [published Online First: 20210225] OpenUrl CrossRef PubMed 7. ↵ Subramanian SV , Davey Smith G , Subramanyam M . Indigenous health and socioeconomic status in India . PLoS Med 2006 ; 3 ( 10 ): e421 . doi: 10.1371/journal.pmed.0030421 OpenUrl CrossRef PubMed 8. ↵ Measuring progress from 1990 to 2017 and projecting attainment to 2030 of the health- related Sustainable Development Goals for 195 countries and territories: a systematic analysis for the Global Burden of Disease Study 2017 . Lancet 2018 ; 392 ( 10159 ): 2091 – 138 . doi: 10.1016/s0140-6736(18)32281-5 [published Online First: 20181108] OpenUrl CrossRef PubMed 9. ↵ An R , Shen J , Xiao Y . Applications of Artificial Intelligence to Obesity Research: Scoping Review of Methodologies . J Med Internet Res 2022 ; 24 ( 12 ): e40589 . doi: 10.2196/40589 [published Online First: 20221207] OpenUrl CrossRef PubMed 10. ↵ Kim R , Bijral AS , Xu Y , et al. Precision mapping child undernutrition for nearly 600,000 inhabited census villages in India . Proc Natl Acad Sci U S A 2021 ; 118 ( 18 ) doi: 10.1073/pnas.2025865118 OpenUrl Abstract / FREE Full Text 11. Chakrabarty M , Let S . Spatial clustering of overweight/obesity among women in India: Insights from the latest National Family Health Survey . PLoS One 2024 ; 19 ( 7 ): e0305205 . doi: 10.1371/journal.pone.0305205 [published Online First: 20240724] OpenUrl CrossRef PubMed 12. ↵ Sung M , Kumar A , Mishra R , et al. Temporal change in prevalence of BMI categories in India: patterns across States and Union territories of India, 1999-2021 . BMC Public Health 2024 ; 24 ( 1 ): 1322 . doi: 10.1186/s12889-024-18784-4 [published Online First: 20240516] OpenUrl CrossRef PubMed 13. ↵ DeGregory KW , Kuiper P , DeSilvio T , et al. A review of machine learning in obesity . Obes Rev 2018 ; 19 ( 5 ): 668 – 85 . doi: 10.1111/obr.12667 [published Online First: 20180209] OpenUrl CrossRef 14. ↵ Colmenarejo G . Machine Learning Models to Predict Childhood and Adolescent Obesity: A Review . Nutrients 2020 ; 12 ( 8 ) doi: 10.3390/nu12082466 [published Online First: 20200816] OpenUrl CrossRef 15. ↵ Rajkomar A , Hardt M , Howell MD , et al. Ensuring Fairness in Machine Learning to Advance Health Equity . Ann Intern Med 2018 ; 169 ( 12 ): 866 – 72 . doi: 10.7326/m18-1990 [published Online First: 20181204] OpenUrl CrossRef PubMed 16. ↵ Obermeyer Z , Powers B , Vogeli C , et al. Dissecting racial bias in an algorithm used to manage the health of populations . Science 2019 ; 366 ( 6464 ): 447 – 53 . doi: 10.1126/science.aax2342 OpenUrl Abstract / FREE Full Text 17. ↵ Chen F , Wang L , Hong J , et al. Unmasking bias in artificial intelligence: a systematic review of bias detection and mitigation strategies in electronic health record-based models . J Am Med Inform Assoc 2024 ; 31 ( 5 ): 1172 – 83 . doi: 10.1093/jamia/ocae060 OpenUrl CrossRef PubMed 18. ↵ Juhn YJ , Ryu E , Wi CI , et al. Assessing socioeconomic bias in machine learning algorithms in health care: a case study of the HOUSES index . J Am Med Inform Assoc 2022 ; 29 ( 7 ): 1142 – 51 . doi: 10.1093/jamia/ocac052 OpenUrl CrossRef PubMed 19. Hanna M , Pantanowitz L , Jackson B , et al. Ethical and Bias Considerations in Artificial Intelligence (AI)/Machine Learning . Mod Pathol 2024 : 100686 . doi: 10.1016/j.modpat.2024.100686 [published Online First: 20241216] OpenUrl CrossRef 20. ↵ Mulcahy P , Mahal A , McPake B , et al. Is there an association between public spending on health and choice of healthcare providers across socioeconomic groups in India? - Evidence from a national sample . Soc Sci Med 2021 ; 285 : 114149 . doi: 10.1016/j.socscimed.2021.114149 [published Online First: 20210626] OpenUrl CrossRef 21. ↵ Feldman M , Friedler SA , Moeller J , et al. Certifying and Removing Disparate Impact. Proceedings of the 21th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining . Sydney, NSW, Australia: Association for Computing Machinery , 2015 : 259 – 68 . 22. ↵ Kamiran F , Calders T . Data preprocessing techniques for classification without discrimination . Knowledge and Information Systems 2011 ; 33 ( 1 ): 1 – 33 . doi: 10.1007/s10115-011-0463-8 OpenUrl CrossRef 23. ↵ Agarwal A , Beygelzimer A , Dudik M , et al. A Reductions Approach to Fair Classification. In: Jennifer D, Andreas K, eds. Proceedings of the 35th International Conference on Machine Learning . Proceedings of Machine Learning Research : PMLR , 2018 : 60 -- 69 . 24. ↵ Pleiss G , Raghavan M , Wu F , et al. On fairness and calibration . Proceedings of the 31st International Conference on Neural Information Processing Systems . Long Beach, California, USA : Curran Associates Inc., 2017 : 5684 – 93 . 25. ↵ Maharana A , Nsoesie EO . Use of Deep Learning to Examine the Association of the Built Environment With Prevalence of Neighborhood Adult Obesity . JAMA Netw Open 2018 ; 1 ( 4 ): e181535 . doi: 10.1001/jamanetworkopen.2018.1535 [published Online First: 20180803] OpenUrl CrossRef 26. ↵ Osadchiy V , Bal R , Mayer EA , et al. Machine learning model to predict obesity using gut metabolite and brain microstructure data . Sci Rep 2023 ; 13 ( 1 ): 5488 . doi: 10.1038/s41598-023-32713-2 [published Online First: 20230404] OpenUrl CrossRef PubMed 27. ↵ Schuch HS , Furtado M , Silva G , et al. Fairness of Machine Learning Algorithms for Predicting Foregone Preventive Dental Care for Adults . JAMA Netw Open 2023 ; 6 ( 11 ): e2341625 . doi: 10.1001/jamanetworkopen.2023.41625 [published Online First: 20231101] OpenUrl CrossRef PubMed 28. ↵ Rahman SMJ , Ahmed N , Abedin MM , et al. Investigate the risk factors of stunting, wasting, and underweight among under-five Bangladeshi children and its prediction based on machine learning approach . PLoS One 2021 ; 16 ( 6 ): e0253172 . doi: 10.1371/journal.pone.0253172 [published Online First: 20210617] OpenUrl CrossRef PubMed 29. ↵ Dugan TM , Mukhopadhyay S , Carroll A , et al. Machine Learning Techniques for Prediction of Early Childhood Obesity . Appl Clin Inform 2015 ; 6 ( 3 ): 506 – 20 . doi: 10.4338/aci-2015-03-ra-0036 [published Online First: 20150812] OpenUrl CrossRef PubMed 30. ↵ Yang J , Soltan AAS , Eyre DW , et al. An adversarial training framework for mitigating algorithmic biases in clinical machine learning . NPJ Digit Med 2023 ; 6 ( 1 ): 55 . doi: 10.1038/s41746-023-00805-y [published Online First: 20230329] OpenUrl CrossRef PubMed 31. ↵ Ferdowsy F , Rahi KSA , Jabiullah MI , et al. A machine learning approach for obesity risk prediction . Current Research in Behavioral Sciences 2021 ; 2 doi: 10.1016/j.crbeha.2021.100053 OpenUrl CrossRef 32. ↵ Ferri-García R , Rueda-Sánchez JL , Rueda MdM , et al. Estimating response propensities in nonprobability surveys using machine learning weighted models . Mathematics and Computers in Simulation 2024 ; 225 : 779 – 93 . doi: 10.1016/j.matcom.2024.06.012 OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted July 07, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Machine Learning Fairness in Predicting Underweight, Overweight and Adiposity Across Socioeconomic and Caste Group in India: Evidence from the Longitudinal Ageing Study in India Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Machine Learning Fairness in Predicting Underweight, Overweight and Adiposity Across Socioeconomic and Caste Group in India: Evidence from the Longitudinal Ageing Study in India John Tayu Lee , Sheng Hui Hsu , Vincent Cheng-Sheng Li , Kanya Anindya , Meng-Huan Chen , Charlotte Wang , Toby Kai-Bo Shen , Valerie Tzu-Ning Liu , Hsiao-Hui Chen , Rifat Atun medRxiv 2025.07.06.25330978; doi: https://doi.org/10.1101/2025.07.06.25330978 Share This Article: Copy Citation Tools Machine Learning Fairness in Predicting Underweight, Overweight and Adiposity Across Socioeconomic and Caste Group in India: Evidence from the Longitudinal Ageing Study in India John Tayu Lee , Sheng Hui Hsu , Vincent Cheng-Sheng Li , Kanya Anindya , Meng-Huan Chen , Charlotte Wang , Toby Kai-Bo Shen , Valerie Tzu-Ning Liu , Hsiao-Hui Chen , Rifat Atun medRxiv 2025.07.06.25330978; doi: https://doi.org/10.1101/2025.07.06.25330978 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffca7ab78bbad07',t:'MTc3OTQ2MTY1NA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00