Full text
42,677 characters
· extracted from
preprint-html
· click to expand
Heterogeneous Effects of Sodium-Glucose Cotransporter-2 Inhibitors on Acute Kidney Injury: A Causal Learning Approach | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Heterogeneous Effects of Sodium-Glucose Cotransporter-2 Inhibitors on Acute Kidney Injury: A Causal Learning Approach View ORCID Profile Hao Dai , Yao An Lee , View ORCID Profile Jiang Bian , Jingchuan Guo doi: https://doi.org/10.1101/2025.11.23.25340831 Hao Dai 1 Department of Biostatistics & Health Data Science, Indiana University , Indianapolis, IN, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Hao Dai Yao An Lee 2 Department of Pharmaceutical Outcomes & Policy, University of Florida , Gainesville, FL, USA MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jiang Bian 1 Department of Biostatistics & Health Data Science, Indiana University , Indianapolis, IN, USA 3 Center for Biomedical Informatics, Regenstrief Institute , Indianapolis, IN, USA 4 Weldon School of Biomedical Engineering, Purdue University , IN, USA 5 Indiana University Melvin and Bren Simon Comprehensive Cancer Center , Indianapolis, IN, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jiang Bian For correspondence: serena.guo{at}purdue.edu bianji{at}iu.edu Jingchuan Guo 3 Center for Biomedical Informatics, Regenstrief Institute , Indianapolis, IN, USA 6 Department of Pharmacy Practice, Purdue University College of Pharmacy MD, PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: serena.guo{at}purdue.edu bianji{at}iu.edu Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Background Sodium–glucose cotransporter-2 inhibitors (SGLT2is) have been associated with lower risk of acute kidney injury (AKI), but existing studies rarely explore heterogeneous treatment effects or underlying causal pathways. We applied a comprehensive causal-learning framework to evaluate both overall and subgroup-specific effects of SGLT2i therapy on AKI. Methods Using a new-user, active-comparator target trial emulation in the OneFlorida+ data (2014–2023), we estimated individualized and average treatment effects with a doubly robust meta-learner, assessed heterogeneity via subgroup and decision-tree analyses, and used causal structure learning and mediation methods to identify mechanistic pathways linking treatment to AKI. Results SGLT2 inhibitors were associated with a significant reduction in AKI compared with other second-line glucose-lowering drugs, with an average individual treatment effect of −0.0039 (95% CI: −0.0065 to −0.0014). Kaplan-Meier curves demonstrated consistently lower cumulative AKI incidence among SGLT2i users. Subgroup analyses revealed substantial heterogeneity: protective effects were strongest in younger adults, males, and non-Hispanic Black patients, whereas benefits were attenuated in older adults, females, and those with baseline CKD. Decision tree-based heterogeneity modeling further identified atrial fibrillation, anemia, and antiparkinson agent use as key effect modifiers. Causal structure learning highlighted atrial fibrillation, anemia, chronic kidney disease, and eGFR as central intermediating nodes. Mediation analyses showed that most of the benefit operated through direct pathways (ADE ≈ −0.0034 to −0.0035), while anemia and heart failure contributed small but statistically significant indirect effects. Conclusion SGLT2 inhibitors reduce AKI risk, but effects vary meaningfully across clinical subgroups and are partially mediated through interconnected cardio-renal pathways. Causal-learning methods provide mechanistic insight beyond average associations and may support more individualized SGLT2i therapy for AKI prevention. Introduction Acute kidney injury (AKI) is a frequent and serious complication among individuals with type 2 diabetes (T2D), contributing substantially to morbidity, mortality, and healthcare burden. 1 As diabetes prevalence continues to rise globally, identifying therapies that can mitigate kidney-related complications has become a major clinical and public health priority. In recent years, sodium–glucose cotransporter-2 inhibitors (SGLT2is) have emerged as an important class of glucose-lowering agents with benefits extending beyond glycemic control. Large randomized trials and real-world studies have consistently demonstrated the nephroprotective effects of SGLT2is, including reductions in albuminuria, slowing of chronic kidney disease (CKD) progression, and decreased rates of kidney failure. 2 , 3 These findings have led to their widespread adoption and endorsement in clinical guidelines for comprehensive diabetes management. 4 Among the diverse renal outcomes examined, a growing body of evidence suggests that SGLT2i therapy may also reduce the risk of acute kidney injury. Although AKI was initially a safety concern for SGLT2is due to osmotic diuresis and hemodynamic effects, subsequent studies have reported neutral or protective associations, indicating a lower incidence of AKI among SGLT2i users compared with other glucose-lowering drugs. 5 – 7 Despite encouraging evidence, important uncertainties remain. Randomized trials, though methodologically rigorous, are typically underpowered to detect AKI events, which are relatively rare, and often lack the population diversity observed in routine clinical care. Observational studies have similarly reported a protective association between SGLT2i use and reduced AKI risk, but most have relied on exploratory, regression-based analyses that estimate only average associations. These conventional approaches seldom investigate whether the protective effect varies across patient subgroups, leaving heterogeneous treatment effects largely unknown. Likewise, the underlying mechanisms remain poorly characterized, as few studies have examined causal pathways or identified intermediate clinical factors that may mediate SGLT2i-related kidney benefits. Consequently, neither randomized evidence nor existing real-world analyses provides clear insight into who benefits most from SGLT2i therapy or how these renal benefits arise. These gaps highlight the need for a comprehensive causal-learning framework capable of estimating individualized treatment effects, uncovering causal pathways, and empirically validating mediators—advancing the field beyond simple association toward a mechanistic and patient-specific understanding of how SGLT2 inhibitors influence AKI risk. Causal machine-learning approaches, 8 such as doubly robust meta-learners 9 for estimating heterogeneous treatment effects and data-driven causal structure learning for uncovering causal pathways, provide a rigorous methodological framework to strengthen causal inference in observational pharmacoepidemiology. These tools allow for flexible model specification, rigorous orthogonalization to mitigate overfitting bias, and improved exploration of effect heterogeneity across diverse patient subgroups. Furthermore, causal structure learning and mediation analyses can reveal underlying mechanisms through which SGLT2is may influence AKI risk, providing insights that extend beyond average treatment effects. To address existing evidence gaps and methodological limitations, we aimed to evaluate the overall association between SGLT2i use and the risk of AKI using a comprehensive causal learning framework. Leveraging the large, diverse, and longitudinal OneFlorida+ Data Trust, we combined target trial emulation with advanced causal machine learning to estimate average and individualized treatment effects, identify heterogeneous risk profiles, and explore potential causal pathways. Through this multilayered analytical strategy, our study seeks to generate rigorous, mechanism-informed evidence regarding the renal safety profile of SGLT2 inhibitors in routine clinical practice. Methods Data source Data were drawn from the OneFlorida+ Data Trust, a centralized repository maintained by the OneFlorida+ Clinical Research Consortium. 10 OneFlorida+ links longitudinal EHRs with ancillary sources (e.g., Medicaid/Medicare claims, death records, and tumor registries) and includes demographics, diagnoses, medications, procedures, vital signs, and laboratories across participating systems. 11 Source data covered January 1, 2012–June 30, 2023; cohort spanned January 1, 2014–June 30, 2023. Study design This study employed a retrospective cohort design with a new-user approach. An intention-to-treat analysis was conducted to compare sodium–glucose cotransporter-2 inhibitors (SGLT2is) with other second-line glucose-lowering drugs (GLDs)—sulfonylureas, thiazolidinediones, dipeptidyl peptidase-4 inhibitors (DPP-4i), α-glucosidase inhibitors, and meglitinides—on the risk of acute kidney injury (AKI) among adults with type 2 diabetes (T2D). Exposure definitions are summarized in Table S1, and the overall design is illustrated in Figure 1 . The study was approved by the University of Florida Institutional Review Board (IRB202201196). Download figure Open in new tab Figure 1. Overview of study design: new users of a SGLT2i, or other second-line glucose-lowering drug (GLD). Other second-line GLDs include sulfonylureas, thiazolidinediones, DPP4i, α-glucosidase inhibitors, or meglitinides Eligibility criteria We included people who initiated treatment with a SGLT2i or other second-line GLDs in OneFlorida+ between January 1, 2014 and June 30, 2023. Other second-line GLDs were selected as the comparator group to help reduce confounding by indication ( Table S1 ). 4 The index date was the first recorded prescription for an SGLT2 inhibitor or an active-comparator GLD without a previous prescription for either drug in the previous year. We excluded individuals who were younger than 18 years at index, had a diagnosis of type 1 diabetes, gestational diabetes, or end-stage renal disease (ESRD) on or before index, or had no clinical encounters prior to index. Operational definitions and code lists are provided in Table S2 . Outcome and follow-up The primary endpoint was incident AKI during follow-up, identified via ICD-9/ICD-10 diagnosis codes ( Table S2 ). We used an intention-to-treat approach, analyzing participants according to their initial treatment regardless of subsequent changes. Patients’ follow-up began at the index date and continued until the earliest occurrence of one of the following censoring events: the outcome of interest, death, loss to follow-up (the date of the last recorded clinical encounter), or the end of the study period. Baseline covariates Baseline covariates encompassed a range of demographic, clinical, and pharmacological characteristics, as detailed in Table 1 . Demographic variables included age, sex, race/ethnicity, smoking status, and insurance type. Clinical covariates comprised comorbid conditions (e.g., cardiovascular disease, cerebrovascular disease and neuropathy), clinical observations (e.g., BMI, blood pressure), and medication (e.g., insulin, opioids, and statins). View this table: View inline View popup Table 1. Weighted characteristics. We structured the causal analysis into three main steps: (1) Primary effect estimation through target trial emulation, (2) heterogeneous treatment effect estimation, and (3) causal discovery, path decomposition, and mediation analysis. As shown in Figure 2 . Download figure Open in new tab Figure 2. Overview of causal learning analysis Step 1: Primary effect estimation We emulated the trial using a new-user, active-comparator design. Treatment assignment was modeled with a propensity score estimated from baseline covariates using a logistic regression learner with standardized inputs and hyperparameters optimized via five-fold cross-validation (GridSearchCV). The clinical outcome model was fitted using a Lasso regression with standardized predictors, and its penalty parameters were similarly tuned by cross-validated grid search to minimize mean squared error. Both nuisance models, the propensity score and outcome regression, were then incorporated into a doubly-robust meta-learner, 9 which applied orthogonalization and five-fold cross-fitting to remove overfitting bias and obtain individualized treatment effect (ITE) estimates for each patient. The dataset was randomly split into training (70%) and testing (30%) subsets; all model fitting and hyperparameter selection were performed on the training data to prevent information leakage, and the held-out test set was used to evaluate model generalization and covariate balance. 12 ITEs were aggregated to population-level effects using inverse probability of treatment weighting (IPTW) with stabilized, truncated weights and robust variance estimation. 13 Time-to-event models (weighted Cox proportional hazards) yielded hazard ratios (HRs) and 95% CIs for AKI under the intention-to-treat contrast, quantifying the relative risk of kidney injury associated with SGLT2i initiation compared with the comparator therapies. Kaplan–Meier curves were generated to illustrate the cumulative probability of AKI over time between treatment groups, providing an intuitive visualization of risk divergence throughout follow-up. The proportional-hazards assumption was assessed to ensure model validity, and prespecified subgroup analyses (age, sex, race/ethnicity, baseline CKD) were performed to evaluate treatment effect heterogeneity through interaction testing and stratified modeling. Step 2: Heterogeneous treatment effects To probe heterogeneity, we explored heterogeneous treatment effects (HTEs) using the ITE estimates obtained from the DML framework in Step 1. To identify clinically meaningful subgroups with differential responses to SGLT2i therapy, we applied a decision tree algorithm that recursively partitioned patients based on baseline characteristics to maximize between-group variation in treatment benefit or risk. 14 This approach generated an interpretable stratification of the study population, highlighting key effect modifiers that contribute to heterogeneity. We subsequently conducted subgroup analyses within these tree-derived strata to examine time-to-event outcomes and assess the consistency and magnitude of treatment differences across data-driven subgroups, thereby validating and contextualizing the model-identified heterogeneity in treatment effects. Step 3: Causal discovery, path decomposition, and mediation We applied a data-driven causal discovery method, Best Order Score Search (BOSS), 15 to uncover the underlying directional relationships among demographic, clinical, and treatment variables. Using this approach, we estimated the underlying causal graph and identified potential mediating and confounding pathways that may shape treatment response. Building upon the learned graph, we employed the Fairness-Aware Causal Path Decomposition (FACTS) framework 16 to decompose the overall treatment effect into distinct causal pathways, explicitly accounting for fairness-related factors to ensure that model-derived inferences were not driven by socially sensitive variables. Finally, we conducted causal mediation analyses to quantify the extent to which intermediate clinical conditions (e.g., atrial fibrillation, anemia, or heart failure status) mediated the total treatment effect, thereby providing mechanistic insights into how SGLT2i therapy may influence acute kidney injury risk through specific causal channels. Results Cohort Characteristics A total of 34,134 adults with type 2 diabetes initiated either an SGLT2 inhibitor (n = 10,470; 30.67%) or another second-line glucose-lowering drug (n = 23,664; 69.33%). After weighting, baseline demographic characteristics were generally comparable between treatment groups. The mean age was slightly lower among SGLT2i initiators compared with other-GLD users (65.47 ± 9.03 vs. 66.06 ± 9.66 years; P < 0.001). The distribution of sex was similar across groups (female: 48.93% vs. 49.66%; P = 0.06). Racial and ethnic composition also did not differ meaningfully, with comparable proportions of Hispanic (13.87% vs. 13.91%), non-Hispanic Black (26.26% vs. 27.03%), and non-Hispanic White patients (42.99% vs. 43.03%; P = 0.0247). Clinical comorbidities showed modest between-group differences. Prevalence of atrial fibrillation was similar (11.48% vs. 11.13%; P = 0.15), whereas SGLT2i users had slightly higher frequencies of heart failure (15.62% vs. 15.01%; P = 0.0273) and chronic kidney disease (16.82% vs. 16.25%; P = 0.0478). Anemia was marginally less common among SGLT2i initiators (19.44% vs. 20.10%; P = 0.0324). Use of insulin at baseline was slightly lower in the SGLT2i group (33.44% vs. 34.50%; P = 0.004). The proportion of individuals with an AKI diagnosis prior to follow-up was lower among SGLT2i initiators compared with other-GLD users (0.83% vs. 1.30%; P < 0.001). Mean follow-up time for incident AKI also differed between groups, with shorter follow-up observed among SGLT2i users (1.18 ± 2.10 vs. 2.87 ± 2.86 years; P < 0.001). Primary effect estimation For Step 1: primary estimation, we follow a TTE framework. SGLT2i have a trend to reduce the risk but not significant (HR, 0.87, 95%CI [0.75, 1.02]; P=0.087). A pre-defined subgroup analysis reveals that for young population (age < 65), SGLT2i have association with a reduced risk of AKI compared to other second-line GLDs (HR, 0.72, 95%CI [0.57, 0.9]; P=0.004). Heterogeneous treatment effects The estimated risk difference (RD) was negative for AKI (RD: -0.004, 95% CI: -0.006 to 0.001) indicating a potential benefit, though the CIs remain wide. The result was consistent with Cox regression model. Interpretable Tree Analysis We employed single decision tree models to identify patient subgroups with varying SGLT2i effects. The causal interpretable decision tree revealed meaningful heterogeneity in the estimated treatment effects of SGLT2 inhibitor initiation on AKI risk across clinically distinct subgroups. The root split was driven by atrial fibrillation (AF), indicating AF as the strongest effect modifier in the cohort. Among patients without AF (N = 34,480), the RD suggested a modest protective association (RD = –0.004). This subgroup further divided on use of anti-Parkinson agents, identifying a clinically coherent pathway: individuals without AF and not using antiparkinsonian agents (N = 28,192) showed a small but statistically robust reduction in AKI risk (RD = –0.004; 95% CI: –0.007 to –0.001). In contrast, those without AF but using anti-Parkinson agents (N = 2,399) exhibited a substantially stronger protective effect (CATE = –0.014; 95% CI: – 0.023 to –0.005), highlighting a distinct high-benefit subgroup. Among patients with AF (N = 3,889), the tree identified anemia as an additional effect modifier. Patients with AF and no anemia (N = 2,292) showed a slight increase in AKI risk associated with SGLT2i initiation (RD = 0.007; 95% CI: 0.000 to 0.014), while those with both AF and anemia (N = 1,597) exhibited a near-null effect (RD = 0.001; 95% CI: –0.007 to 0.008).Detailed interpretable tree for AKI was provided in Figure 4 . Download figure Open in new tab Figure 3a. Incidence rate of AKI. Download figure Open in new tab Figure 3b. Subgroup analysis. Download figure Open in new tab Figure 4. Interpretable decision tree for HTE. Causal Structure Discovery Data-driven causal structure learning using the BOSS algorithm identified a coherent and biologically plausible directed acyclic graph linking demographic factors, comorbidities, medications, kidney function, treatment exposure, and AKI risk ( Figure 5a ). Across the full cohort, eGFR emerged as the most proximal determinant of AKI, with direct incoming edges from chronic kidney disease, anemia, atrial fibrillation, and age, indicating a multicomponent upstream structure of renal vulnerability. Treatment assignment (SGLT2i vs. other GLDs) was influenced by several clinical characteristics, most notably insulin use, heart failure, obesity, and baseline GLP-1RA therapy, highlighting important confounding pathways in real-world prescribing. Notably, treatment assignment did not exhibit a direct edge to AKI in the learned graph; instead, its relationship with AKI appeared to operate predominantly through eGFR, suggesting potential mediation via kidney function. To further probe effect modification suggested by the HTE analyses, separate causal graphs were learned within subgroups defined by atrial fibrillation, age, and insulin use ( Figure 5b ). In the atrial fibrillation subgroup, heart failure and anemia formed a tightly connected cluster influencing both CKD and eGFR, with eGFR again serving as the principal gateway to AKI. Within the older age subgroup, age showed a stronger direct influence on eGFR, and treatment selection pathways were more heavily shaped by obesity and GLP-1RA use. In the insulin-treated subgroup, insulin, opioid exposure, and GLP-1RA therapy formed a distinct prescribing pathway that shaped variability in treatment assignment, with downstream propagation to eGFR and AKI. Download figure Open in new tab Figure 5a. DAG. Download figure Open in new tab Figure 5b. Sub-DAG. Download figure Open in new tab Figure 6a. Mediation effect of heart failue. Download figure Open in new tab Figure 6b. Mediation effect of anemia. Download figure Open in new tab Figure 6c. Mediation effect of heart atrial fibrillation. Collectively, these causal graphs delineate the underlying directional relationships among patient characteristics, treatment choice, kidney function, and AKI. They highlight eGFR as the dominant mediator, identify subgroup-specific confounding structures, and provide mechanistic context for the heterogeneous treatment effects observed in earlier analyses. Causal paths decomposition Using the FACTS framework, we decomposed the total estimated treatment effect into a set of directed causal pathways linking SGLT2i initiation to AKI risk ( Table 2 ). Multiple pathways contributed meaningfully to the overall effect; however, two criteria were used to identify the most influential pathway: (1) the magnitude of its contribution to the total effect, and (2) the number of component nodes, as longer pathways may capture clinically important indirect processes that are otherwise hidden in conventional mediation analyses. View this table: View inline View popup Table 2. FACTS Among all identified pathways, the sequence atrial fibrillation → heart failure → anemia → chronic kidney disease → eGFR → AKI exhibited both the largest proportional contribution and the highest number of intermediate nodes, indicating a clinically coherent and mechanistically rich pathway. Given its prominence under both criteria, this pathway was selected for further evaluation. Mediation analysis We subsequently conducted causal mediation analysis to quantify the extent to which these intermediate clinical conditions, particularly eGFR and its upstream contributors, mediated the association between SGLT2i use and AKI. This targeted mediation assessment allowed us to isolate the indirect effects operating through the AF–HF–anemia–CKD–eGFR pathway and to evaluate how much of the overall treatment benefit could be attributed to changes along this specific causal chain. We evaluated the mediating roles of anemia, heart failure, and atrial fibrillation in the association between SGLT2i initiation and AKI. Anemia demonstrated a small but statistically significant indirect effect (ACME = –0.00011; 95% CI: –0.000191 to – 0.000034), with a larger direct effect from treatment to AKI (ADE = –0.003485; 95% CI: –0.005409 to –0.000696). Heart failure showed a stronger mediating contribution (ACME = –0.000804; 95% CI: –0.001032 to –0.000501), accompanied by a consistent direct effect (ADE = –0.003370; 95% CI: –0.005236 to –0.000634). In contrast, atrial fibrillation exhibited a negligible indirect effect (ACME = –0.000003; 95% CI: –0.000015 to 0.000016), indicating minimal mediation through this pathway, while the direct effect remained similar to other models (ADE = –0.003474; 95% CI: –0.005330 to –0.000760). Overall, heart failure and anemia contributed modest but detectable indirect effects, whereas atrial fibrillation did not appear to mediate the treatment–outcome relationship. Discussion In this large, real-world cohort of adults with type 2 diabetes, we used a comprehensive causal learning framework to examine the association between SGLT2 inhibitor initiation and the risk of acute kidney injury. Although the primary analysis suggested a nonsignificant trend toward reduced AKI risk overall, our investigation revealed substantial heterogeneity across clinical subgroups and highlighted important causal pathways underlying this relationship. These findings complement and extend previous observational and randomized studies reporting reduced AKI risk among SGLT2i users. A major contribution of this study lies in leveraging doubly robust meta-learning and causal interpretable decision trees to uncover granular heterogeneity in treatment effects. Whereas traditional pharmacoepidemiologic studies typically estimate only average associations, our results demonstrate that the reno-protective effects of SGLT2is are not uniform across patient populations. Consistent with emerging interest in treatment effect heterogeneity in cardio-renal-metabolic therapeutics,8,9 our model identified atrial fibrillation as the strongest effect modifier, separating subgroups with distinct risk profiles. Patients without AF, and especially those without Parkinson-related medication use, experienced the clearest protective effect, whereas individuals with AF and concomitant anemia showed attenuated or slightly adverse estimates. These findings emphasize the importance of moving beyond average treatment effects to inform more individualized therapeutic decisions. Our causal structure discovery analyses further illuminated the underlying mechanisms linking SGLT2i therapy to AKI risk. Across the full cohort, eGFR consistently emerged as the most proximal determinant of AKI, aligning with well-established renal physiology and prior literature on kidney function as the dominant predictor of AKI vulnerability.1,3 The BOSS-derived causal graph also demonstrated that treatment assignment was shaped by multiple clinical variables, such as heart failure, obesity, baseline GLP-1RA use, and insulin therapy, reflecting real-world prescribing complexity and underscoring the need for advanced causal adjustment strategies. Notably, no direct causal edge between SGLT2i treatment and AKI was observed, suggesting that kidney function–related pathways may mediate a substantial portion of the treatment effect. Using the FACTS framework, we decomposed the treatment effect into multiple directed paths and identified a clinically coherent and mechanistically rich chain: atrial fibrillation → heart failure → anemia → CKD → eGFR → AKI, as the most influential. This aligns with the known interdependence between cardiac dysfunction, hematologic abnormalities, and renal impairment, which together drive AKI susceptibility in high-risk populations. Our subsequent mediation analysis quantified these relationships, demonstrating modest but significant indirect effects through anemia and heart failure. Such findings provide empirical evidence supporting the hypothesis that SGLT2i-associated renal benefits may operate, at least partly, through reduction of intermediate cardio-renal stressors rather than solely direct renal mechanisms. Taken together, these results underscore that the SGLT2i and AKI association is multifactorial, heterogeneous, and largely mediated by intermediate clinical states. This represents a conceptual shift from viewing SGLT2is as uniformly nephroprotective to understanding their benefits as conditional upon patient-specific cardiovascular and renal profiles. Several limitations warrant consideration. First, despite the use of advanced causal inference techniques, residual confounding cannot be fully excluded, particularly for unmeasured clinical factors such as provider prescribing preference or subclinical kidney injury. Second, ICD-based AKI identification may underestimate event incidence, though algorithmic definitions are widely used in comparative effectiveness research.6 Third, mediation analyses remain sensitive to model specification and sequential ignorability assumptions; however, the consistency across anemia- and heart failure mediated pathways supports robustness. Finally, follow-up time was shorter among SGLT2i users, which may have influenced event detection, though weighting and model-based approaches mitigate most imbalances. Despite these limitations, our study demonstrates the value of integrating causal machine learning, interpretable subgroup discovery, and path decomposition to enhance mechanistic understanding in pharmacoepidemiology. This framework allows the field to advance beyond average associations toward more individualized, mechanism-informed clinical insights. Conclusion Using a comprehensive causal learning strategy applied to a large, diverse real-world dataset, we found that SGLT2i initiation was associated with a modest but heterogeneous reduction in AKI risk. The renoprotective effect varied across patient subgroups, with distinct benefits observed in individuals without atrial fibrillation and without Parkinson-related medication use. Causal structure discovery and path decomposition highlighted the central role of kidney function and upstream cardio-hematologic conditions in mediating treatment effects, while mediation analyses confirmed that heart failure and anemia contributed small but significant portions of the overall effect. These findings suggest that the impact of SGLT2is on AKI is not uniform but is shaped by interconnected cardiovascular, hematologic, and renal pathways. Integrating causal ML–based heterogeneity and mechanistic analyses provides a deeper understanding of drug–outcome relationships and offers a foundation for more personalized therapeutic strategies. Future studies should validate these findings in external populations and explore whether tailoring SGLT2i use according to mechanistic risk profiles improves renal outcomes. Author Contributions Dr. J. Guo is the guarantor of this work and, as such, had full access to all the data in the study and takes responsibility for the integrity of the data and the accuracy of the data analysis. Concept and design: J. Guo, and J. Bian. Acquisition, analysis, or interpretation of data: All authors. Drafting of the manuscript: H. Dai, Y. Lee Critical review of the manuscript for important intellectual content: All authors. Statistical analysis: H. Dai and Y. Lee Obtained funding: J. Guo. Supervision: J. Guo and J. Bian. Funding/Support The study was supported by National Institute of Diabetes and Digestive and Kidney Diseases (NIH/NIDDK) R01DK133465. Role of the Funder/Sponsor The funding organizations had no role in the design and conduct of the study; collection, management, analysis, and interpretation of the data; preparation, review, or approval of the manuscript; and decision to submit the manuscript for publication. Conflict of Interest Disclosures None reported. Data Availability Statement Data set Available through OneFlorida+ Clinical Research Network (email, oneFloridaOperations{at}health.ufl.edu ) ACKNOWLEDGMENT Footnotes Funding: NIH/NIDDK R01DK133465 Reference 1. ↵ Kaur A , Sharma GS , Kumbala DR . Acute kidney injury in diabetic patients: A narrative review . Medicine (Baltimore ) . 2023 ; 102 ( 21 ): e33888 . doi: 10.1097/MD.0000000000033888 OpenUrl CrossRef PubMed 2. ↵ Neuen BL , Ohkuma T , Neal B , et al. Cardiovascular and renal outcomes with canagliflozin according to baseline kidney function . Circulation . 2018 ; 138 ( 15 ): 1537 – 1550 . doi: 10.1161/CIRCULATIONAHA.118.035901 OpenUrl Abstract / FREE Full Text 3. ↵ Sarraju A , Li J , Cannon CP , et al. Effects of canagliflozin on cardiovascular, renal, and safety outcomes in participants with type 2 diabetes and chronic kidney disease according to history of heart failure: Results from the CREDENCE trial . Am Heart J . 2021 ; 233 : 141 – 148 . doi: 10.1016/j.ahj.2020.12.008 OpenUrl CrossRef PubMed 4. ↵ ElSayed NA , Aleppo G , Aroda VR , et al. 9. Pharmacologic Approaches to Glycemic Treatment: Standards of Care in Diabetes—2023 . Diabetes Care . 2022 ; 46 (Supplement_1): S140 – S157 . doi: 10.2337/dc23-S009 OpenUrl CrossRef PubMed 5. ↵ Zhuo M , Paik JM , Wexler DJ , Bonventre JV , Kim SC , Patorno E . SGLT2 inhibitors and the risk of acute kidney injury in older adults with type 2 diabetes . Am J Kidney Dis . 2022 ; 79 ( 6 ): 858 – 867 .e1. doi: 10.1053/j.ajkd.2021.09.015 OpenUrl CrossRef PubMed 6. Xie Y , Bowe B , Gibson AK , et al. Comparative effectiveness of SGLT2 inhibitors, GLP-1 receptor agonists, DPP-4 inhibitors, and sulfonylureas on risk of kidney outcomes: Emulation of a target trial using health care databases . Diabetes Care . 2020 ; 43 ( 11 ): 2859 – 2869 . doi: 10.2337/dc20-1890 OpenUrl Abstract / FREE Full Text 7. ↵ Chen IW , Chang LC , Wu JY , et al. Impact of SGLT2 inhibitors use on risk of postoperative acute kidney injury following metabolic and bariatric surgery: A retrospective study . Obes Surg . 2025 ; 35 ( 9 ): 3599 – 3607 . doi: 10.1007/s11695-025-08002-4 OpenUrl CrossRef PubMed 8. ↵ Feuerriegel S , Frauen D , Melnychuk V , et al. Causal machine learning for predicting treatment outcomes . Nat Med . 2024 ; 30 ( 4 ): 958 – 968 . doi: 10.1038/s41591-024-02902-1 OpenUrl CrossRef PubMed 9. ↵ Kennedy EH . Towards optimal doubly robust estimation of heterogeneous causal effects . Electron J Stat . 2023 ; 17 ( 2 ). doi: 10.1214/23-ejs2157 OpenUrl CrossRef 10. ↵ Hogan WR , Shenkman EA , Robinson T , et al. The OneFlorida Data Trust: a centralized, translational research data infrastructure of statewide scope . J Am Med Inform Assoc . 2022 ; 29 ( 4 ): 686 – 693 . doi: 10.1093/jamia/ocab221 OpenUrl CrossRef PubMed 11. ↵ Data » OneFlorida+ Clinical Research Network . Accessed November 16, 2025 . https://onefl.net/data/ 12. ↵ Bias in error estimation when using cross-validation for model selection | BMC Bioinformatics | Full Text. Accessed November 16, 2025 . https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-7-91?utm_source=chatgpt.com 13. ↵ Robins JM , Hernán MA , Brumback B . Marginal structural models and causal inference in epidemiology . Epidemiology . 2000 ; 11 ( 5 ): 550 – 560 . doi: 10.1097/00001648-200009000-00011 OpenUrl CrossRef PubMed Web of Science 14. ↵ Athey S , Imbens G . Recursive partitioning for heterogeneous causal effects . Proceedings of the National Academy of Sciences . 2016 ; 113 ( 27 ): 7353 – 7360 . doi: 10.1073/pnas.1510489113 OpenUrl Abstract / FREE Full Text 15. ↵ Andrews B , Ramsey J , Sanchez-Romero R , Camchong J , Kummerfeld E . Fast scalable and accurate discovery of DAGs using the best order score search and grow-shrink trees . Adv Neural Inf Process Syst . 2023 ;abs/2310.17679. doi: 10.48550/arXiv.2310.17679 OpenUrl CrossRef 16. ↵ Pan W , Cui S , Bian J , Zhang C , Wang F . Explaining Algorithmic Fairness Through Fairness-Aware Causal Path Decomposition . Published online August 11, 2021 . Accessed November 16, 2025 . http://arxiv.org/abs/2108.05335 View the discussion thread. Back to top Previous Next Posted November 25, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Heterogeneous Effects of Sodium-Glucose Cotransporter-2 Inhibitors on Acute Kidney Injury: A Causal Learning Approach Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Heterogeneous Effects of Sodium-Glucose Cotransporter-2 Inhibitors on Acute Kidney Injury: A Causal Learning Approach Hao Dai , Yao An Lee , Jiang Bian , Jingchuan Guo medRxiv 2025.11.23.25340831; doi: https://doi.org/10.1101/2025.11.23.25340831 Share This Article: Copy Citation Tools Heterogeneous Effects of Sodium-Glucose Cotransporter-2 Inhibitors on Acute Kidney Injury: A Causal Learning Approach Hao Dai , Yao An Lee , Jiang Bian , Jingchuan Guo medRxiv 2025.11.23.25340831; doi: https://doi.org/10.1101/2025.11.23.25340831 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Epidemiology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffb38b98ee5e2c5',t:'MTc3OTQ0NjYyNA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.