Personalized Hemodynamic Management Using Reinforcement Learning to Prevent Persistent Acute Kidney Injury After Cardiac Surgery

doi:10.1101/2025.10.23.25338698

Personalized Hemodynamic Management Using Reinforcement Learning to Prevent Persistent Acute Kidney Injury After Cardiac Surgery

2025 · doi:10.1101/2025.10.23.25338698

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 56,188 characters · extracted from preprint-html · click to expand

Personalized Hemodynamic Management Using Reinforcement Learning to Prevent Persistent Acute Kidney Injury After Cardiac Surgery | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Personalized Hemodynamic Management Using Reinforcement Learning to Prevent Persistent Acute Kidney Injury After Cardiac Surgery Moein Sabounchi , Jacob M Desman , Idan Shenfeld Amit , Wonsuk Oh , Chris Capone , Pushkala Jayaraman , Gagan Kumar , Michelle Campoli , Mahima Vijayaraghavan , Prem Timsina , Paul McCarthy , Anthony Manasia , John Oropello , Robin Varghese , Ksenia Gorbenko , Hernando Gomez-Danies , Patricia Kovatch , Gordon Smith , Avniel Shetreat-Klein , Ashita Tolwani , Mayte Suárez-Fariñas , Kianoush Kashani , Ashish Khanna , Azra Bihorac , Jolion McGreevy , Lisa Stump , John Kellum , David Reich , Pulkit Agrawal , Girish N Nadkarni , Ankit Sakhuja doi: https://doi.org/10.1101/2025.10.23.25338698 Moein Sabounchi 1 Charles Bronfman Institute for Personalized Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jacob M Desman 1 Charles Bronfman Institute for Personalized Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Idan Shenfeld Amit 3 Improbable AI Lab, Massachusetts Institute of Technology , Cambridge, Massachusetts, USA MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Wonsuk Oh 1 Charles Bronfman Institute for Personalized Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Chris Capone 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA BS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Pushkala Jayaraman 1 Charles Bronfman Institute for Personalized Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Gagan Kumar 4 Department of Pulmonary and Critical Care Medicine, Northeast Georgia Medical Center , Gainesville, Georgia, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Michelle Campoli 1 Charles Bronfman Institute for Personalized Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mahima Vijayaraghavan 5 Division of Hospital Medicine, Department of Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Prem Timsina 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA 6 Clinical Data Science, Mount Sinai Hospital , New York, New York, USA ScD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Paul McCarthy 7 Section of Cardiovascular Critical Care, Department of Cardiovascular and Thoracic Surgery, West Virginia University , Morgantown, West Virginia, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Anthony Manasia 8 Institute for Critical Care Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site John Oropello 8 Institute for Critical Care Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Robin Varghese 8 Institute for Critical Care Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA 9 Department of Cardiothoracic Surgery, Icahn School of Medicine at Mount Sinai , New York, New York, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ksenia Gorbenko 10 Department of Population Health Science and Policy, Icahn School of Medicine at Mount Sinai , New York, New York, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hernando Gomez-Danies 11 Department of Emergency Medicine, University of Pittsburgh School of Medicine , Pittsburgh, Pennsylvania, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Patricia Kovatch 12 Scientific Computing, Icahn School of Medicine at Mount Sinai , New York, New York, USA BS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Gordon Smith 13 Department of Epidemiology and Biostatistics, West Virginia University , Morgantown, West Virginia, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Avniel Shetreat-Klein 14 Department of Rehabilitation and Physical Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA MD, PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ashita Tolwani 15 Division of Nephrology, Department of Medicine, University of Alabama at Birmingham , Birmingham, Alabama, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mayte Suárez-Fariñas 10 Department of Population Health Science and Policy, Icahn School of Medicine at Mount Sinai , New York, New York, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kianoush Kashani 16 Division of Nephrology and Hypertension, Division of Pulmonary and Critical Care Medicine, Department of Medicine, Mayo Clinic , Rochester, Minnesota, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ashish Khanna 17 Department of Anesthesiology, Division of Critical Care Medicine, Atrium Health Wake Forest Baptist Medical Center, Wake Forest School of Medicine , Winston-Salem, North Carolina, USA 18 Outcomes Research Consortium , Houston, Texas, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Azra Bihorac 19 Division of Nephrology, Hypertension, and Renal Transplantation, Department of Medicine, College of Medicine, University of Florida , Gainesville, Florida, USA 20 Intelligent Clinical Care Center, The University of Florida College of Medicine , Gainesville, Florida, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jolion McGreevy 21 Department of Emergency Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lisa Stump 22 Mount Sinai Health System and Icahn School of Medicine at Mount Sinai , New York, New York, USA MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site John Kellum 23 Department of Critical Care Medicine, University of Pittsburgh School of Medicine , Pittsburgh, Pennsylvania, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site David Reich 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA 6 Clinical Data Science, Mount Sinai Hospital , New York, New York, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Pulkit Agrawal 3 Improbable AI Lab, Massachusetts Institute of Technology , Cambridge, Massachusetts, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Girish N Nadkarni 1 Charles Bronfman Institute for Personalized Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA 24 The Hasso Plattner Institute for Digital Health at Mount Sinai, Icahn School of Medicine at Mount Sinai , New York, New York, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ankit Sakhuja 1 Charles Bronfman Institute for Personalized Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA 2 Windreich Department of Artificial Intelligence and Human Health, Icahn School of Medicine at Mount Sinai , New York, New York, USA 8 Institute for Critical Care Medicine, Icahn School of Medicine at Mount Sinai , New York, New York, USA MBBS Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: ankit.sakhuja{at}mssm.edu Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Importance Acute kidney injury (AKI) affects one-third of patients after cardiac surgery and increases morbidity and mortality. AKI lasting over 48 hours, known as persistent AKI (pAKI), has much worse outcomes. Hemodynamic optimization is cornerstone of AKI management, however, current strategies rely on bundled care interventions that are inconsistently implemented, underscoring the need for personalized hemodynamic optimization. Objective To develop and validate a reinforcement learning (RL) model to guide individualized dosing of intravenous (IV) fluids, vasopressors, and inotropes for prevention of pAKI after cardiac surgery. Design Cohort study. Model development and internal validation were performed retrospectively in MIMIC-IV, with external validation in SICdb, a European database (retrospective), and then in Mount Sinai Health System cohort using data from Jan 1–Aug 18, 2025). Setting Multicenter retrospective cohort study. Participants Admissions to ICU after cardiac surgery. Exposures Postoperative hemodynamic management during first 72 hours of ICU stay using IV fluids, vasopressors, and inotropes. Main Outcomes and Measures Primary outcome was pAKI within 5 days after surgery. The RL model optimized treatment policies through reward-based learning, where higher rewards reflected improved outcome. We assessed model performance relative to clinicians using Fitted Q Evaluation and adjusted weighted pooled logistic regression. Results There were 6,643 adult ICU admissions following cardiac surgery in MIMIC-IV, 2,254 in SICdb, and 846 in MSHS. Median age was 70 years in MIMIC-IV, 70.0 years in SICdb, and 64 years in MSHS cohort with 72%, 73%, and 70% males respectively. AKI occurred in 41.4%, 19.7%, and 22.5% of admissions, with pAKI in 30.5%, 43.0%, and 33.7% of AKI cases, respectively. RL model achieved higher cumulative rewards than clinicians across all cohorts. Concordance between clinician actions and RL model’s recommendations was associated with lower adjusted odds of pAKI (OR, 0.92 [0.89–0.96] in SICdb; 0.91 [0.86–0.96] in MSHS). RL model favored smaller IV fluid volumes, moderate vasopressor dosing, and greater inotrope use. Conclusions and Relevance In this study, personalization of early postoperative hemodynamic management using an RL model was associated with decreased risk of pAKI. These findings suggest that AI guided hemodynamic strategies may enhance postoperative care after cardiac surgery. Question Can reinforcement learning (RL) personalize early postoperative hemodynamic management to prevent persistent AKI (pAKI) after cardiac surgery? Findings In 9,743 postoperative cardiac surgery ICU admissions across 3 cohorts (MIMIC-IV, SICdb, and Mount Sinai Health System), the RL model achieved higher cumulative rewards than clinician policies and was associated with lower adjusted odds of developing pAKI when clinician actions aligned with model recommendations. The RL model favored smaller intravenous fluid volumes and earlier, graded adjustments in vasopressor and inotrope dosing compared with standard practice. Meaning RL guided individualized hemodynamic management after cardiac surgery shows promise in reducing the risk of persistent AKI and should be tested in randomized clinical trials. INTRODUCTION Cardiac surgery is common and involves significant physiological stress and hemodynamic instability in the early postoperative period. Acute kidney injury (AKI), 1 defined by a rise in serum creatinine occurs in over one-third of patients following cardiac surgery and associated with a four-fold increase in mortality and doubling of health-care costs. 2 – 4 Outcomes are substantially worse when AKI persists for over 48 hours 5 , also known as persistent AKI (pAKI). 6 The management of AKI is largely supportive with hemodynamic optimization as its cornerstone. This is especially important in the early post-operative period after cardiac surgery when patients have a dynamic hemodynamic profile that requires simultaneous use of intravenous (IV) fluids, vasopressors and inotropes. Although these interventions when used as part of a clinical bundle have shown benefit, 7 their uptake in routine practice remains very poor. 8 , 9 pAKI itself is a heterogeneous syndrome, with distinct phenotypes that exhibit variable responses to therapy. 10 Even when phenotypes are defined by widely available clinical data rather than specialized biomarkers, differences in disease trajectory and treatment response persist. 11 These findings highlight the need for personalized strategies to guide AKI management, particularly among high-risk cardiac surgery patients. Reinforcement learning 12 (RL), an AI approach that optimizes sequential decision-making, offers a promising framework for this challenge. In RL, the model learns to make decisions by taking actions in an environment and receiving feedback in the form of rewards or penalties. 13 This iterative learning process enables the agent to develop policies that maximize cumulative rewards over time. In healthcare, where clinical trajectories evolve dynamically, RL is particularly well-suited for guiding time-sensitive interventions such as titration of IV fluids, vasopressors, and inotropes in response to continuously changing physiological states. Prior applications of RL in critical care have shown promise 14 , 15 , including recent work demonstrating its utility for postoperative glucose control in cardiac surgery patients. 16 Building on this foundation we introduce an RL model designed to guide early postoperative hemodynamic management in order to prevent pAKI following cardiac surgery. METHODS Study Databases and Population We used data from adult (≥18 years) patients who had postoperative cardiac surgery intensive care unit (ICU) admissions in the Medical Information Mart for Intensive Care (MIMIC-IV) database as the development cohort and performed external validation in the retrospective cohort from Salzburg Intensive Care Database (SICdb) 17 and data from Jan 1, 2025 to Aug 18 2025 from the Mount Sinai Health System (MSHS). Ethical approval was obtained from the Icahn School of Medicine at Mount Sinai Institutional Review Board (study number 20-00338). The overall structure of study is shown in Figure 1 . We excluded admissions with end-stage kidney disease and those with missing data for serum creatinine, weight, or IV fluids. As the model was trained on data from the first 72 hours of ICU stay, patients who did not survive beyond this period were excluded from retrospective cohorts to avoid bias from nonrepresentative clinical trajectories. We have shared the details of the datasets and inclusion/exclusion criteria in e-Methods, e-Table 1 and e-Figure 1 in the Supplement. Download figure Open in new tab Fig. 1. Overview of the Study Abbreviations: IV Fluids, Intravenous Fluids; MAP, Mean Arterial Pressure; AKI, Acute Kidney Injury; MIMIC IV, Medical Information Mart for Intensive Care (MIMIC)-IV; SICdb, Salzburg Intensive Care database; MSHS: Mount Sinai Hospital System. Feature Extraction and Preprocessing We extracted features routinely available to clinicians at bedside for development of the RL model. These included demographics (age, sex), anthropometrics (height, weight, BMI), vital signs, laboratory values, fluid balance, medications (vasopressors, inotropes, IV fluids including crystalloids and colloids, and nephrotoxins), and SOFA Score. We converted the vasopressor and inotrope doses into their norepinephrine (NEE) and dobutamine equivalent (DE) doses 18 , 19 respectively. Data were extracted from ICU admission up to the earlier of either 72 hours or ICU discharge. We removed biologically implausible values. 20 The resulting time-series data were segmented into hourly bins, with features either summed or averaged as clinically appropriate. We excluded features with more than 30% missingness. We imputed missing values using established practices, using a combination of forward filling and k-nearest neighbor imputation. 16 Outcome The primary outcome of our study was pAKI within 5 days after cardiac surgery. We defined pAKI as AKI lasting for 48 hours or longer. 6 We defined AKI as per the Kidney Disease Improving Global Outcomes (KDIGO) guidelines as an increase in serum creatinine by 0.3mg/dL or more within 48 hours or an increase by at least 1.5 times the baseline serum creatinine within 7 days. 21 We used the last available pre-operative creatinine within the five days before surgery as the baseline creatinine. 22 If no pre-operative value was available, we used the first post-operative creatinine instead. 22 Model Training We used conservative Q-learning (CQL) 23 as the RL algorithm. We specified states, actions and rewards for the RL model on an hourly basis. States included laboratory values, vital signs, medications (IV fluids, vasopressors, inotropes, nephrotoxins), demographics, anthropometrics (height, weight, BMI), SOFA score. 24 We defined actions as recommended doses of IV fluids, inotropes and vasopressors discretized into clinically meaningful categories (e-Table 2). We calculated rewards for each epoch as a weighted combination of pAKI, AKI and MAP < 65mmHg. We have provided a details of computational modeling, state, actions and reward formulations in e-Methods in the Supplement. We developed the RL model using data from MIMIC-IV which was split into three subsets: 70% training, 15% validation, and 15% internal testing. We then externally validated it in the retrospective SICdb (and prospective MSHS cohorts. During training we used a discount factor rate of 0.99 which emphasizes long-term rewards and reflects the clinical importance of avoiding adverse outcomes over time. We set the conservativeness coefficient (alpha) to 0.25 which controls the degree to which the learned policy diverges from the observed clinician behavior, encouraging safe exploration without straying too far from established practices. We trained the model using a batch size of 512, an initial learning of 0.001 with a learning rate scheduler that progressively reduced the rate to 1e-8. Statistical Analysis We described continuous variables as median (inter quartile range; IQR) and categorical variables as counts (percentages). We used Kruskal-Wallis test 25 for continuous variables and Chi-squared test 26 categorical variables. We set the significance level at 0.05 without adjusting for multiple comparisons. We first evaluated the model’s performance using Fitted Q evaluation (FQE) 27 , an established OPE method to compare total rewards of an RL model to that of clinicians in the internal test set. To assess generalizability, we then used FQE to assess the performance of the model in retrospective SICdb and prospective MSHS cohorts. Next, we conducted a clinical evaluation comparing model’s recommendations with clinician actions to assess how the model’s learned policy differed from observed clinical practice. We examined how recommendations evolved over time and across mean arterial pressure (MAP) thresholds. To assess patient-level patterns, we randomly sampled 30 admissions from each external cohort, including 10 with pAKI, 10 with AKI only, and 10 without AKI, and generated heatmaps of mean arterial pressure trajectories, clinician actions, and model’s hourly recommendations. Finally, we used a weighted pooled logistic regression 28 , 29 to estimate the odds of development of pAKI when clinician actions were concordant with model’s recommendations. To account for time-varying confounding, we applied inverse probability of treatment weighting (IPTW) using the same baseline and time-dependent covariates as in model training 28 , 29 . As standard error estimates can be anti-conservative when using IPTW, we also report results using robust sandwich variance estimator 29 – 31 . Interpretability Interpretability is essential in RL for clinical decision-making to ensure transparency and clinician trust, particularly when recommending high-risk interventions. We used SHAP (SHapley Additive exPlanations) to assess both global and local interpretability of the RL model 32 . We selected SHAP for its strong theoretical foundation and ability to quantify the average contribution of each input feature to the model’s overall predictions. Shapley analysis stems from cooperative games in game theory and it is used to fairly distribute the payoffs in cooperative game to each contributing player. In RL, SHAP value represent the relative influence of each feature on the model’s recommended actions. 16 We computed SHAP values across the entire test set to identify the most influential clinical variables and evaluate their consistency with clinical expectations. We also computed hourly, patient-specific SHAP values to visualize how feature importance evolved over time. Model Fairness To evaluate model robustness across demographic subgroups, we assessed model’s performance using off-policy evaluation and concordance analyses stratified by sex and race. RESULTS Patient Characteristics We included 6,643 postoperative cardiac surgery ICU admissions from MIMIC-IV database which served as the development cohort. We externally validated on the performance of the RL model on 2,254 admissions from the retrospective SICdb dataset and 846 admissions from the prospective MSHS cohort. The median age was 69.6 (61.6, 77.1) years in MIMIC-IV, 70.0 (60.0, 75.0) years in SICdb and 64.3 (56.1, 72.2) years in MSHS, with 71.9%, 72.9% and 70.6% males respectively. AKI occurred in 2750 (41.4%) of MIMIC-IV, 444 (19.7%) of SICdb and 190 (22.45%) of MSHS admissions, while pAKI developed among 839 (30.5%), 191 (43.0%) and 64 (33.7%) of AKI cases, respectively ( Table 1 and e-Table 3). View this table: View inline View popup Table 1: Characteristics of ICU admissions after Cardiac Surgery Model Performance Using FQE 33 the model achieved higher cumulative rewards than clinician policies across all datasets, indicating consistently superior performance. The RL model achieved a mean overall reward of 53.86 (95% CI: 52.25 – 55.45) vs. 17.49 (95% CI: 16.28 – 18.68) for clinicians in MIMIC-IV internal test set, 77.36 (95% CI: 76.39-7830) vs. 21.33 (95% CI: 20.27 - 22.38) in SICdb and 73.93 (95% CI: 72.39 – 75.42) vs. 29.47 (28.26 - 30.66) in MSHS ( Figure 2 ). Download figure Open in new tab Fig. 2. FQE results (a) and Effect of Concordance between Recommendations of the Reinforcement Learning Model and Clinician Actions on Adjusted Odds for Development of Persistent AKI (b) Abbreviations: OPE, Off Policy Evaluation; AKI, Acute Kidney Injury; CI: Confidence Interval; MIMIC IV, Medical Information Mart for Intensive Care (MIMIC)-IV; SICdb, Salzburg Intensive Care database; MSHS: Mount Sinai Hospital System. Clinical Evaluation of RL Model Across cohorts, the RL model favored smaller IV fluid volumes, moderate vasopressor doses, and higher inotrope use than clinicians (e-Figure 2). In the SICdb cohort, 0–50 mL IV fluids were recommended by the model in 67.7% of cases vs 30.7% by clinicians (p < .001), and vasopressor dose 0.05–0.1 µg/kg/min NEE in 10.1% vs 7.8% (p < .001) whereas 0.1-0.2 µg/kg/min NEE in 7.9% vs 6.4% (p<0.001). Similar trends were observed in MSHS (IV fluids: 64.9% vs 52.6%, p<0.001; vasopressors: 0.05–0.1 µg/kg/min NEE in 12.3% vs 6.8%, p < .001; 0.1-0.2 µg/kg/min NEE in 6.3% vs 3.5%, p<0.001). The RL model also recommended higher inotrope doses more frequently than clinicians, particularly 5–7.5 µg/kg/min DE (1.4%% vs 0.4% in SiCdb, p < 0.001; 9.7% vs 1.8% in MSHS, p<0.001). We then evaluated how the model’s dosing policy evolved over time in comparison with clinician actions (e-Figure 2-a). Both clinicians and the model demonstrated temporal shifts in dosing patterns during the first 72 hours after ICU admission. Early in the ICU course, clinicians administered larger volumes of IV fluids with relatively lower doses of vasopressors and inotropes, whereas the model recommended smaller IV fluid volumes but relatively higher doses of vasopressors and inotropes during the same period. In later hours, dosing activity for both clinicians and the model decreased, reflecting a general reduction in therapeutic intensity as patients stabilized. At MAP < 65 mm Hg, the RL model continued to recommend lower IV fluid volumes (e-Figure 2-b). In the SICdb cohort, the model most frequently recommended 0–50 mL of IV fluids (55.9% vs 28.6% for clinicians, P 500 mL (1.35% vs 10.3%, P < .001). It recommended vasopressor doses 0.05–0.1 µg/kg/min NEE in 16.1% vs 14.1% of cases (P < .001), vasopressor doses 0.1-0.2 µg/kg/min NEE in 15.7% vs 12.7% of cases (P < 0.001) and inotropes of 5–7.5 µg/kg/min DE in 2.8% vs 0.7% (P < .001) cases. In the MSHS cohort, the RL model showed a similar pattern, favoring smaller IV fluid volumes (0–50 mL in 60.9% vs 56.0%, P 500 mL (1.7% vs 4.6%, p < 0.001). Vasopressor use followed the same trend, with 0.05 -0.1 µg/kg/min NEE dose in 14.8% vs 8.9% of cases (P < 0.001) and 0.1-0.2 µg/kg/min NEE dose in 7.3% vs 4.2% of cases (P < 0.001). It also more often recommended inotrope doses of 5–7.5 µg/kg/min DE (9.1% vs 0.9% cases, p < .001). Among patients who later developed pAKI, the same overall pattern was observed. To examine patient-level patterns, we randomly sampled 30 admissions from each external cohort (10 with pAKI, 10 with AKI only, and 10 without AKI) and generated hour-by-hour heatmaps of mean arterial pressure (MAP), clinician actions, and model’s recommendations ( Figure 3 ). Visual comparison of these heatmaps revealed that the RL model generally provided earlier and more graded adjustments in vasoactive support in response to declining MAP. Clinician actions, by contrast, often involved larger, intermittent fluid administrations and delayed or smaller vasoactive adjustments. Download figure Open in new tab Fig. 3. Comparison of Clinician Actions with Recommendations of the Reinforcement Learning Model Abbreviations: IV Fluids, Intravenous Fluids; SICdb, Salzburg Intensive Care database; MSHS: Mount Sinai Hospital System; AKI, Acute Kidney Injury; pAKI, persistent Acute Kidney Injury; MAP, Mean Arterial Pressure. Using weighted pooled multivariable logistic regression with IPTW, concordance between clinician actions and model recommendations was associated with lower odds of pAKI across all cohorts (Figure2) Odds ratios (95% CI) were 0.84 (95% CI: 0.81–0.86) in MIMIC-IV, 0.92 (95% CI: 0.89–0.96) in SICdb, and 0.91 (95% CI: 0.86–0.96) in MSHS using standard variance estimation, and 0.84 (95% CI: 0.76–0.92), 0.92 (95% CI: 0.79–1.07), 0.91 (95% CI: 0.80–1.03) using robust sandwich variance estimator respectively. Model Interpretability To understand the factors driving model’s recommendations, we examined feature importance using SHAP analysis. Global SHAP analysis showed that prior vasoactive actions, SOFA score, creatinine, and fluid balance were the main drivers of model’s dosing recommendations ( Figure 4 ). In SICdb, prior vasopressor actions had the strongest influence, whereas in MSHS, inotrope actions dominated, reflecting cohort-specific treatment patterns. This indicates that the RL model adapts its weighting of features to local practice contexts while preserving consistent physiologic logic across datasets. Local SHAP analyses (e-Figures 3a–j) further demonstrated temporal variation in feature importance within patients, with early decisions influenced by prior vasoactive use and illness severity and later actions shaped by fluid balance and kidney indices. Download figure Open in new tab Fig. 4. Feature Importance for the Reinforcement Learning Model Abbreviations: SHAP, SHapley Additive exPlanations; SICdb, Salzburg Intensive Care database; MSHS: Mount Sinai Hospital System; SOFA, Sequential Organ Failure Assessment; ALP, Alkaline Phosphatase; BUN, Blood Urea Nitrogen; RDW, Red Cell Distribution Width; FiO₂, Fraction of Inspired Oxygen; BMI, Body Mass Index; MCV, Mean Corpuscular Volume; RBC, Red Blood Cell; PTT, Partial Thromboplastin Time; A-a Gradient, Alveolar-arterial oxygen gradient; PO₂, Partial pressure of Oxygen; MCH, Mean corpuscular hemoglobin; PaO₂/FiO₂, Arterial Oxygen Partial Pressure to Fraction of Inspired Oxygen Ratio; CO₂, Carbon Dioxide; Arterial pH, Potential of Hydrogen; INR, International Normalized Ratio; MCHC, Mean Corpuscular Hemoglobin Concentration; IV Fluids, Intravenous Fluids; MAP, Mean Arterial Pressure; SO₂, Oxygen Saturation; PCO₂, Partial Pressure of Carbon Dioxide; PT, Prothrombin Time; SpO₂, Peripheral Capillary Oxygen Saturation; WBC, White Blood Cell; ALT, Alanine Aminotransferase; ; AST, Aspartate Aminotransferase; t-1, previous hour value; t-2, Value in two hours ago; t-3: Value in three hours ago. Model Fairness We evaluated model’s robustness across demographic subgroups using FQE and adjusted concordance analyses stratified by sex and race ( Figure 2 ;). FQE showed that the RL model achieved higher cumulative rewards than clinicians across all subgroups, indicating superior policy performance regardless of sex or race. In multivariable concordance analyses, the adjusted odds ratios for concordance between clinician actions and model recommendations were associated with a lower odds for development of pAKI. No subgroup showed evidence of harm with concordance, demonstrating equitable model performance across demographic groups. DISCUSSION In this multicohort study, we developed and validated an RL model for personalized hemodynamic management in the early post-operative period after cardiac surgery. Across two heterogeneous retrospective cohorts (MIMIC-IV, North America; SICdb, Europe) and a prospective cohort from the Mount Sinai Health System (North America), the RL model consistently achieved higher cumulative rewards than clinician policies and was associated with lower adjusted odds of pAKI when clinician actions aligned with its recommendations. Cardiac surgery associated AKI remains a leading cause of postoperative morbidity, mortality, and long-term kidney impairment. 2 – 4 Outcomes are much worse for patients that develop pAKI. 5 , 34 Management of hemodynamics with IV fluids, vasopressors and inotropes is the cornerstone of AKI treatment. However, there is a large variation in their utilization among cardiac surgery patients. 35 This reflects the underlying heterogeneity of patients, heterogeneity of AKI 10 , 36 and differing treatment protocols across institutions. While vasopressors and inotropes are frequently required to maintain perfusion in hemodynamically unstable patients, their use has been associated with higher rates of AKI 35 , an association that likely reflects underlying disease severity and compromised circulatory status rather than direct nephrotoxicity. Our RL model directly addresses this challenge by personalizing the dosing of IV fluids, vasopressors and inotropes for in the early post-operative phase in these critically ill patients. Moreover, rather than reactively responding to hypotensive events as is common in current standard of care, our RL model recommends proactive, graded adjustments in IV fluids and vasoactive medications aimed at maintaining perfusion and preventing progression to pAKI. This represents a paradigm shift from current standard of care reactive actions to continuous, physiology-informed proactive management of hemodynamics. Our results align with PrevAKI trial, which demonstrated that patients that received the KDIGO bundle had reduced AKI frequency and severity among cardiac surgery patients. 37 Importantly, the intervention arm in that trial received more inotropic support compared to usual care. Our model independently converged on a similar pattern, recommending earlier and higher inotrope dosing, particularly when MAPs were declining. This convergence of data-driven strategy and trial-based results strengthen the plausibility of our findings. Our results also align with emerging data that suggests that restrictive fluid strategies after AKI onset are associated higher rates of kidneyrecovery. 38 , 39 The fact that our RL model prioritized smaller IV fluid volumes supports this evolving paradigm and suggests that earlier hemodynamic optimization may not equate to aggressive IV fluids administration. Our study has several strengths. We developed an RL model using a large public ICU database (MIMIC-IV) and externally validated it in two independent cohorts, one retrospective (SICdb) and one prospective (MSHS), supporting generalizability. We used both Fitted Q Evaluation and adjusted concordance analyses to assess policy performance and clinical alignment. Model interpretability analyses identified physiologic variables driving model’s recommendations, reinforcing their clinical plausibility. Subgroup analyses showed generally similar effects across sex and race, with no subgroup demonstrating evidence of harm or systematic bias, addressing a central concern for AI in clinical care. Our study has some limitations. First, this was an observational study, and only a randomized controlled trial can confirm causal benefit from RL guided therapy. Second, due to lack of consistent availability, we did not include Invasive hemodynamic data such as pulmonary artery catheter measurements. Incorporating these data in future work may further improve model’s performance, however, may decrease generalizability. Finally, we developed this RL model to reduce the development of pAKI within five days after cardiac surgery. While this is a clinically meaningful outcome, future studies should examine longer-term outcomes such as Major Adverse Kidney Events by 30 days. In summary, we provide a data-driven framework for personalized hemodynamic management after cardiac surgery. By integrating routinely available clinical data, RL can identify personalized strategies that favor IV fluid, vasopressor and inotrope support to prevent the progression to pAKI. Its consistent performance across diverse cohorts highlights the potential for RL to enhance clinician judgment in complex critical care decisions. Future randomized studies are warranted to determine whether implementation of RL guided therapy can improve kidney and patient-centered outcomes. Data Availability The datasets utilized in development and retrospective validation of this RL model can be accessed after completion of necessary training and execution of data use agreement at https://physionet.org/content/mimiciv/2.2/ And: https://www.sicdb.com/ Data from Mount Sinai Health System used for prospective validation is not publicly available. Funding This study was supported by National Institutes of Health (NIH) grant K08DK131286. This work was supported in part through the computational and data resources and staff expertise provided by Scientific Computing and Data at the Icahn School of Medicine at Mount Sinai and supported by the Clinical and Translational Science Awards (CTSA) grant UL1TR004419 from the National Center for Advancing Translational Sciences. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. COI Statement This RL model is the subject of a patent cooperation treaty (PCT) application (International Application No. PCT/US2025/047727) in which MS, JMD, GNN, and AS are named inventors. GNN is a founder of Renalytix, Pensieve, Verici and provides consultancy services to AstraZeneca, Reata, Renalytix, Siemens Healthineer and Variant Bio, serves a scientific advisory board member for Renalytix and Pensieve. He also has equity in Renalytix, Pensieve and Verici. AS is a consultant for Roche Diagnostics Corporation. JAK reports receiving consulting fees from Astute Medical/bioMerieux, Astellas, Alexion, Chugai Pharma, Novartis, Mitsubishi Tenabe and GE Healthcare and is a Full-time employee of Spectral Medical. All remaining authors have declared no conflicts of interest. Code availability Code for this study may be shared upon reasonable requests to the corresponding author. Author Contributions Study concept and design: MS, JMD, AS; Acquisition of data: MS, JMD, WO, CC, AS; Analysis and interpretation of data: MS, JMD, ISA, WO, PJ, AS; Drafting of the manuscript: MS, JMD, WO, PJ, AS. Critical revision of the manuscript for important intellectual content: all authors. Statistical analysis: MS, WO; Obtained funding: AS, GNN. Administrative, technical, or material support: AS, GNN; Study supervision: AS ACKNOWLEDGEMENT REFERENCES 1. ↵ Bellomo R , Kellum JA , Ronco C. Acute kidney injury . Lancet . Aug 25 2012 ; 380 ( 9843 ): 756 - 66 . doi: 10.1016/S0140-6736(11)61454-2 OpenUrl CrossRef PubMed Web of Science 2. ↵ Karkouti K , Wijeysundera DN , Yau TM , et al. Acute kidney injury after cardiac surgery: focus on modifiable risk factors . Circulation. Feb 3 2009 ; 119 ( 4 ): 495 – 502 . doi: 10.1161/CIRCULATIONAHA.108.786913 OpenUrl CrossRef 3. Zeng J , Su X , Lin S , Li Z , Zhao Y , Zheng Z . Cardiac Surgery-Specific Subtle Perioperative Serum Creatinine Change in Defining Acute Kidney Injury After Coronary Surgery . JACC Adv . Nov 2024 ; 3 ( 11 ): 101326 . doi: 10.1016/j.jacadv.2024.101326 OpenUrl CrossRef PubMed 4. ↵ Alshaikh HN , Katz NM , Gani F , et al. Financial Impact of Acute Kidney Injury After Cardiac Operations in the United States . Ann Thorac Surg . Feb 2018 ; 105 ( 2 ): 469 – 475 . doi: 10.1016/j.athoracsur.2017.10.053 OpenUrl CrossRef PubMed 5. ↵ Brown JR , Kramer RS , Coca SG , Parikh CR . Duration of acute kidney injury impacts long-term survival after cardiac surgery . Ann Thorac Surg . Oct 2010 ; 90 ( 4 ): 1142 – 8 . doi: 10.1016/j.athoracsur.2010.04.039 OpenUrl CrossRef PubMed Web of Science 6. ↵ Chawla LS , Bellomo R , Bihorac A , et al. Acute kidney disease and renal recovery: consensus report of the Acute Disease Quality Initiative (ADQI) 16 Workgroup . Nat Rev Nephrol . Apr 2017 ; 13 ( 4 ): 241 – 257 . doi: 10.1038/nrneph.2017.2 OpenUrl CrossRef PubMed 7. ↵ Ahmed FR , Al-Yateem N , Nejadghaderi SA , Gamil R , AbuRuz ME . Effect of acute kidney injury care bundle on kidney outcomes in cardiac patients receiving critical care: a systematic review and meta-analysis . BMC Nephrol. Jan 10 2025 ; 26 ( 1 ): 17 . doi: 10.1186/s12882-025-03955-1 OpenUrl CrossRef PubMed 8. ↵ Kullmar M , Weiss R , Ostermann M , et al. A Multinational Observational Study Exploring Adherence With the Kidney Disease: Improving Global Outcomes Recommendations for Prevention of Acute Kidney Injury After Cardiac Surgery . Anesth Analg . Apr 2020 ; 130 ( 4 ): 910 – 916 . doi: 10.1213/ANE.0000000000004642 OpenUrl CrossRef PubMed 9. ↵ Kolhe NV , Staples D , Reilly T , et al. Impact of Compliance with a Care Bundle on Acute Kidney Injury Outcomes: A Prospective Observational Study . PLoS One . 2015 ; 10 ( 7 ): e0132279 . doi: 10.1371/journal.pone.0132279 OpenUrl CrossRef PubMed 10. ↵ Vaara ST , Bhatraju PK , Stanski NL , et al. Subphenotypes in acute kidney injury: a narrative review . Crit Care. Aug 19 2022 ; 26 ( 1 ): 251 . doi: 10.1186/s13054-022-04121-x OpenUrl CrossRef 11. ↵ Bhatraju PK , Mukherjee P , Robinson-Cohen C , et al. Acute kidney injury subphenotypes based on creatinine trajectory identifies patients at increased risk of death . Crit Care. Nov 17 2016 ; 20 ( 1 ): 372 . doi: 10.1186/s13054-016-1546-4 OpenUrl CrossRef 12. ↵ Barto AG . Reinforcement learning: An introduction. by richard’s sutton . SIAM Rev . 2021 ; 6 ( 2 ): 423 . OpenUrl 13. ↵ Jayaraman P , Desman J , Sabounchi M , Nadkarni GN , Sakhuja A . A Primer on Reinforcement Learning in Medicine for Clinicians . NPJ Digit Med. Nov 26 2024 ; 7 ( 1 ): 337 . doi: 10.1038/s41746-024-01316-0 OpenUrl CrossRef 14. ↵ Komorowski M , Celi LA , Badawi O , Gordon AC , Faisal AA . The Artificial Intelligence Clinician learns optimal treatment strategies for sepsis in intensive care . Nat Med . Nov 2018 ; 24 ( 11 ): 1716 – 1720 . doi: 10.1038/s41591-018-0213-5 OpenUrl CrossRef PubMed 15. ↵ Lee H , Yoon HK , Kim J , et al. Development and validation of a reinforcement learning model for ventilation control during emergence from general anesthesia . NPJ Digit Med. Aug 14 2023 ; 6 ( 1 ): 145 . doi: 10.1038/s41746-023-00893-w OpenUrl CrossRef 16. ↵ Desman JM , Hong ZW , Sabounchi M , et al. A distributional reinforcement learning model for optimal glucose control after cardiac surgery . NPJ Digit Med . May 27 2025 ; 8 ( 1 ): 313 . doi: 10.1038/s41746-025-01709-9 OpenUrl CrossRef PubMed 17. ↵ Rodemund N , Wernly B , Jung C , Cozowicz C , Kokofer A. The Salzburg Intensive Care database (SICdb): an openly available critical care dataset . Intensive Care Med . Jun 2023 ; 49 ( 6 ): 700 – 702 . doi: 10.1007/s00134-023-07046-3 OpenUrl CrossRef PubMed 18. ↵ Goradia S , Sardaneh AA , Narayan SW , Penm J , Patanwala AE . Vasopressor dose equivalence: A scoping review and suggested formula . J Crit Care . Feb 2021 ; 61 : 233 – 240 . doi: 10.1016/j.jcrc.2020.11.002 OpenUrl CrossRef PubMed 19. ↵ Mathew R , Di Santo P , Jung RG , et al. Milrinone as Compared with Dobutamine in the Treatment of Cardiogenic Shock . N Engl J Med. Aug 5 2021 ; 385 ( 6 ): 516 – 525 . doi: 10.1056/NEJMoa2026845 OpenUrl CrossRef 20. ↵ Oh W , Veshtaj M , Sawant A , et al. ORAKLE: Optimal Risk prediction for mAke30 in patients with sepsis associated AKI using deep LEarning . Crit Care. May 26 2025 ; 29 ( 1 ): 212 . doi: 10.1186/s13054-025-05457-w OpenUrl CrossRef 21. ↵ Okusa MD , Davenport A . Reading between the (guide)lines--the KDIGO practice guideline on acute kidney injury in the individual patient . Kidney Int . Jan 2014 ; 85 ( 1 ): 39 – 48 . doi: 10.1038/ki.2013.378 OpenUrl CrossRef PubMed 22. ↵ Rank N , Pfahringer B , Kempfert J , et al. Deep-learning-based real-time prediction of acute kidney injury outperforms human predictive performance . NPJ Digit Med . 2020 ; 3 : 139 . doi: 10.1038/s41746-020-00346-8 OpenUrl CrossRef PubMed 23. ↵ Kumar A , Zhou A , Tucker G , Levine S . Conservative q-learning for offline reinforcement learning . Advances in neural information processing systems . 2020 ; 33 : 1179 – 1191 . OpenUrl 24. ↵ Vincent JL , Moreno R , Takala J , et al. The SOFA (Sepsis-related Organ Failure Assessment) score to describe organ dysfunction/failure. On behalf of the Working Group on Sepsis-Related Problems of the European Society of Intensive Care Medicine . Intensive Care Med . Jul 1996 ; 22 ( 7 ): 707 – 10 . doi: 10.1007/BF01709751 OpenUrl CrossRef PubMed Web of Science 25. ↵ McKight PE , Najab J . Kruskal-wallis test . The corsini encyclopedia of psychology . 2010 : 1 – 1 . 26. ↵ Pearson K. X . On the criterion that a given system of deviations from the probable in the case of a correlated system of variables is such that it can be reasonably supposed to have arisen from random sampling. The London , Edinburgh, and Dublin Philosophical Magazine and Journal of Science . 1900 ; 50 ( 302 ): 157 – 175 . OpenUrl 27. ↵ Le H , Voloshin C , Yue Y. Batch policy learning under constraints . PMLR ; 2019 : 3703 - 3712 . 28. ↵ Robins JM , Hernan MA , Brumback B . Marginal structural models and causal inference in epidemiology . Epidemiology . Sep 2000 ; 11 ( 5 ): 550 – 60 . doi: 10.1097/00001648-200009000-00011 OpenUrl CrossRef PubMed Web of Science 29. ↵ Kalimouttou A , Kennedy JN , Feng J , et al. Optimal Vasopressin Initiation in Septic Shock: The OVISS Reinforcement Learning Study . JAMA. May 20 2025 ; 333 ( 19 ): 1688 – 1698 . doi: 10.1001/jama.2025.3046 OpenUrl CrossRef 30. White H . A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity . Econometrica: journal of the Econometric Society . 1980 : 817 – 838 . 31. ↵ Liang K-Y , Zeger SL . Longitudinal data analysis using generalized linear models . Biometrika . 1986 ; 73 ( 1 ): 13 – 22 . doi: 10.1093/biomet/73.1.13 OpenUrl CrossRef Web of Science 32. ↵ Lundberg SM , Lee S-I . A unified approach to interpreting model predictions . Advances in neural information processing systems . 2017 ; 30 33. ↵ Thomas P , Brunskill E . Data-efficient off-policy policy evaluation for reinforcement learning . PMLR ; 2016 : 2139 – 2148 . 34. ↵ Filiberto AC , Adiyeke E , Ozrazgat-Baslanti T , et al. Persistent Acute Kidney Injury is Associated with Poor Outcomes and Increased Hospital Cost in Vascular Surgery . Ann Vasc Surg . Jan 2024 ; 98 : 342 – 349 . doi: 10.1016/j.avsg.2023.06.023 OpenUrl CrossRef PubMed 35. ↵ Mathis MR , Mentz GB , Cao J , et al. Hospital and Clinician Practice Variation in Cardiac Surgery and Postoperative Acute Kidney Injury . JAMA Netw Open. May 1 2025 ; 8 ( 5 ): e258342 . doi: 10.1001/jamanetworkopen.2025.8342 OpenUrl CrossRef 36. ↵ Takkavatakarn K , Oh W , Chan L , et al. Machine learning derived serum creatinine trajectories in acute kidney injury in critically ill patients with sepsis . Crit Care. May 10 2024 ; 28 ( 1 ): 156 . doi: 10.1186/s13054-024-04935-x OpenUrl CrossRef 37. ↵ Zarbock A , Kullmar M , Ostermann M , et al. Prevention of Cardiac Surgery-Associated Acute Kidney Injury by Implementing the KDIGO Guidelines in High-Risk Patients Identified by Biomarkers: The PrevAKI-Multicenter Randomized Controlled Trial . Anesth Analg. Aug 1 2021 ; 133 ( 2 ): 292 – 302 . doi: 10.1213/ANE.0000000000005458 OpenUrl CrossRef 38. ↵ Oh W , Takkavatakarn K , Al-taie Z , et al. Personalized Fluid Management in Patients with Sepsis and AKI: A Casual Machine Learning Approach . medRxiv . 2025 : 2024.08.06.24311556 . doi: 10.1101/2024.08.06.24311556 OpenUrl Abstract / FREE Full Text 39. ↵ Vaara ST , Ostermann M , Bitker L , et al. Restrictive fluid management versus usual care in acute kidney injury (REVERSE-AKI): a pilot randomized controlled feasibility trial . Intensive Care Med . Jun 2021 ; 47 ( 6 ): 665 – 673 . doi: 10.1007/s00134-021-06401-6 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted October 25, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Personalized Hemodynamic Management Using Reinforcement Learning to Prevent Persistent Acute Kidney Injury After Cardiac Surgery Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Personalized Hemodynamic Management Using Reinforcement Learning to Prevent Persistent Acute Kidney Injury After Cardiac Surgery Moein Sabounchi , Jacob M Desman , Idan Shenfeld Amit , Wonsuk Oh , Chris Capone , Pushkala Jayaraman , Gagan Kumar , Michelle Campoli , Mahima Vijayaraghavan , Prem Timsina , Paul McCarthy , Anthony Manasia , John Oropello , Robin Varghese , Ksenia Gorbenko , Hernando Gomez-Danies , Patricia Kovatch , Gordon Smith , Avniel Shetreat-Klein , Ashita Tolwani , Mayte Suárez-Fariñas , Kianoush Kashani , Ashish Khanna , Azra Bihorac , Jolion McGreevy , Lisa Stump , John Kellum , David Reich , Pulkit Agrawal , Girish N Nadkarni , Ankit Sakhuja medRxiv 2025.10.23.25338698; doi: https://doi.org/10.1101/2025.10.23.25338698 Share This Article: Copy Citation Tools Personalized Hemodynamic Management Using Reinforcement Learning to Prevent Persistent Acute Kidney Injury After Cardiac Surgery Moein Sabounchi , Jacob M Desman , Idan Shenfeld Amit , Wonsuk Oh , Chris Capone , Pushkala Jayaraman , Gagan Kumar , Michelle Campoli , Mahima Vijayaraghavan , Prem Timsina , Paul McCarthy , Anthony Manasia , John Oropello , Robin Varghese , Ksenia Gorbenko , Hernando Gomez-Danies , Patricia Kovatch , Gordon Smith , Avniel Shetreat-Klein , Ashita Tolwani , Mayte Suárez-Fariñas , Kianoush Kashani , Ashish Khanna , Azra Bihorac , Jolion McGreevy , Lisa Stump , John Kellum , David Reich , Pulkit Agrawal , Girish N Nadkarni , Ankit Sakhuja medRxiv 2025.10.23.25338698; doi: https://doi.org/10.1101/2025.10.23.25338698 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Intensive Care and Critical Care Medicine Subject Areas All Articles Addiction Medicine (567) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4411) Dentistry and Oral Medicine (443) Dermatology (380) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1505) Epidemiology (15205) Forensic Medicine (30) Gastroenterology (1119) Genetic and Genomic Medicine (6574) Geriatric Medicine (666) Health Economics (994) Health Informatics (4511) Health Policy (1365) Health Systems and Quality Improvement (1608) Hematology (537) HIV/AIDS (1263) Infectious Diseases (except HIV/AIDS) (15903) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (666) Neurology (6573) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1139) Occupational and Environmental Health (954) Oncology (3319) Ophthalmology (968) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (662) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5422) Public and Global Health (9205) Radiology and Imaging (2191) Rehabilitation Medicine and Physical Therapy (1367) Respiratory Medicine (1191) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9feb0b3cbf8752ad',t:'MTc3OTI3Njk4OA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00