Systematic Exploration of Hospital Cost Variability: A Conformal Prediction-Based Outlier Detection Method for Electronic Health Records

doi:10.1101/2025.01.10.25320349

Systematic Exploration of Hospital Cost Variability: A Conformal Prediction-Based Outlier Detection Method for Electronic Health Records

2025 · doi:10.1101/2025.01.10.25320349

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 36,752 characters · extracted from preprint-html · click to expand

Systematic Exploration of Hospital Cost Variability: A Conformal Prediction-Based Outlier Detection Method for Electronic Health Records | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Systematic Exploration of Hospital Cost Variability: A Conformal Prediction-Based Outlier Detection Method for Electronic Health Records François Grolleau , Ethan Goh , Stephen P. Ma , Jonathan Masterson , Ted Ross , Arnold Milstein , Jonathan H. Chen doi: https://doi.org/10.1101/2025.01.10.25320349 François Grolleau 1 Stanford Center for Biomedical Informatics Research, Stanford University , Stanford, CA MD, PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ethan Goh 1 Stanford Center for Biomedical Informatics Research, Stanford University , Stanford, CA 2 Stanford Clinical Excellence Research Center, Stanford University , Stanford, CA MD, MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Stephen P. Ma 3 Department of Medicine, Stanford University , Stanford, CA MD, PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jonathan Masterson 4 Stanford Health Care , Menlo Park, CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ted Ross 4 Stanford Health Care , Menlo Park, CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Arnold Milstein 2 Stanford Clinical Excellence Research Center, Stanford University , Stanford, CA MD, MPH Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jonathan H. Chen 1 Stanford Center for Biomedical Informatics Research, Stanford University , Stanford, CA 2 Stanford Clinical Excellence Research Center, Stanford University , Stanford, CA 5 Division of Hospital Medicine, Stanford University , Stanford, CA MD, PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: jonc101{at}stanford.edu Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Marked variability in inpatient hospitalization costs poses significant challenges to healthcare quality, resource allocation, and patient outcomes. Traditional methods like Diagnosis-Related Groups (DRGs) aid in cost management but lack practical solutions for enhancing hospital care value. We introduce a novel methodology for outlier detection in Electronic Health Records (EHRs) using Conformal Prediction. This approach identifies and prioritizes areas for optimizing high-value care processes. Unlike conventional predictive models that neglect uncertainty, our method employs Conformal Quantile Regression (CQR) to generate robust prediction intervals, offering a comprehensive view of cost variability. By integrating Conformal Prediction with machine learning models, healthcare professionals can more accurately pinpoint opportunities for quality and efficiency improvements. Our framework systematically evaluates unexplained hospital cost variations and generates interpretable hypotheses for refining clinical practices associated with atypical costs. This data-driven approach offers a systematic method to generate clinically sound hypotheses that may inform processes to enhance care quality and optimize resource utilization. Introduction Marked variance amongst inpatient hospitalization costs impedes the efficient allocation of scarce medical resources and adversely affects healthcare quality, patient outcomes, and medical charges. In the United States, hospital costs remain significantly high, posing a substantial financial burden on both healthcare institutions and patients. The widespread use of Diagnosis-Related Groups (DRGs) has provided a foundational methodology for categorizing hospital cases to support cost management ( 1 ). However, accurately predicting and modulating hospital costs remains challenging. This challenge is further complicated by variations attributable to uncontrollable patient factors, such as illness severity or comorbidities present at admission, as well as deviations in medical practice and care delivery. A significant amount of current research and operational efforts, by organizations such as Vizient, have focused on predictive modeling techniques. These techniques identify risk factors associated with adverse outcomes, like increased mortality or length of stay, and play a critical role in calculating observed-to-expected cost ratios, thereby informing reimbursement strategies ( 2 – 5 ). Despite these advancements, there is a dearth of literature addressing the modifiability of hospital costs ( 6 ). Much work has illuminated predictability, but the modifiability of medical costs due to care delivery practices remains underexplored. Estimating causal effects using Electronic Health Records (EHRs) is particularly challenging due to the reliance on observational data and the dynamic nature of care delivery. Each potential medical intervention would typically require expert knowledge to specify the variables causing each intervention at different points in a patient’s trajectory ( 7 ), which is impractical given the vast number of interventions to evaluate. Without a systematic approach, numerous plausible areas for quality and efficiency improvements in healthcare—such as the early discharge process, nursing home placement accessibility, goals of care conversions, deterioration detection, nutrition, physical therapy early mobility, weekend discharge practice, bowel regimen, adequate hydration and electrolyte management, and transfusion practices—remain speculative. In this paper, we introduce a novel methodology using Conformal Prediction-based outlier detection for EHRs to systematically generate and prioritize hypotheses about high-value care processes and areas for improvement in hospital practice. Similar to Phenome-Wide Association Studies ( 8 ) (PheWAS), which associate genetic variants with a wide range of phenotypic traits, our approach systematically identifies and prioritizes care processes. While most predictive modeling studies do not quantify uncertainty in their predictions, we leverage Conformal Quantile Regression (CQR), a state-of-the-art framework, to provide valid prediction intervals for any machine learning model ( 9 ). To the best of our knowledge, this study is the first to apply conformal inference as an outlier detection method for EHR data, despite its prior use in other domains ( 10 ). Additionally, integrating Conformal Prediction in cost variability analysis facilitates the re-engagement of human experts in the loop ( 11 ). This method enables healthcare professionals to uncover insights into specific care processes that can be optimized, facilitating a more data-informed and expert-guided approach to enhancing hospital cost efficiency and patient care outcomes. Our objective is to develop a method for systematically evaluating unexplained hospital cost variability and to generate plausible, interpretable hypotheses for clinical care practice patterns associated with higher or lower-than-expected costs. This approach aims to provide actionable insights that can improve care quality and efficiency in clinical operations. Methods An overview of our workflow for systematic exploration of hospital cost variability is shown in Figure 1 . Download figure Open in new tab Figure 1. The conformal prediction-based outlier detection workflow for systematic exploration of hospital cost variability. Methodological details are provided in the body text. Data source and database preparations The data consists of deidentified Electronic Health Records (EHR) for 33 077 inpatients treated at a large academic medical center, Stanford Health Care in Palo Alto, CA, between March 2019 and August 2021. These records are obtained by merging the cost and Diagnosis-Related Group (DRG) databases, incorporating both MS-DRG and APR-DRG classification systems. The merging process utilizes patient identifiers along with admission and discharge dates. Unless otherwise mentioned all following steps are repeated separately for each DRG. For each patient-hospital encounter, we link the cost and DRG databases. Using the Deployr cohort builder ( 12 ) on Stanford Health Care EHR, we create DRG-specific cohorts. We subsequently apply the Deployr feature extractor pipeline ( 12 ) onto these cohorts to extract clinical features within 48 hours of hospitalization. Given that this automated feature extraction pipeline produces sparse, high-dimensional feature matrices, we concentrate on the two hundred features with the least amount of missing data. The resulting DRG-specific datasets include clinical features such as diagnosis (ICD-10) codes, medication orders, demographic variables, and laboratory results. Model fitting In our approach, for each DRG-specific dataset, we build a model to predict the direct inpatient costs (scaled relative to the average inpatient admission cost) from the features available within 48 hours of hospitalization. For scalability without hurting models’ predictive abilities ( 13 ) missing data is managed through imputation by the mode. Indicator variables denoting missingness are concatenated to the imputed feature matrices. The datasets are randomly split into training, calibration (for conformalization, see below ), and test sets with a 65/10/25% ratio. All models are gradient-boosting decision trees ( 14 , 15 ), which have proven highly effective for tasks involving tabular data ( 16 , 17 ). Model’s hyperparameters determined by minimizing the 5-fold cross-validated mean absolute error through randomized search ( 18 ). Models’ predictive performance is evaluated on held-out test sets by calibration plots including a smoothed (spline-based) calibration curve as well as the R 2 , Root Mean Squared Error (RMSE), intercept, and slope metrics ( 19 ). Model explainability To gain a first line of insights into the determinant driving high (and low) hospital costs, we calculate SHappley Additive exPlanation values (SHAP) ( 20 ) from our models for all observations in a DRG-specific dataset. Feature importance is systematically assessed via Beeswarm plots. To explore uncertainty in predicted costs, we calculate prediction intervals (PIs) for all predictions from our models using conformalized quantile regression (CQR) ( 9 , 21 ). Conformal prediction is a distribution-free uncertainty quantification method that has finite-sample guarantees applying to any non-parametric machine learning model, including gradient-boosting decision trees. In practice, we rely on the MAPIE library ( 22 ) and the held-out calibration sets to conformalize the standard (ie biased) PIs from quantile regression. To investigate cost variability, for each DRG, we calculate the mean for CQR PIs’ (i) lower bound, (ii) upper bound, and (iii) widths, all categorized by fifths of predicted costs. Outlier identification We characterize as high- (low-) cost outliers the patients whose observed cost was above (below) the upper (lower) bound of their 50% CQR PI. The nominal coverage of the PIs can be adjusted to vary the outlier group size. In our experiments, 50% PIs provide a reasonable balance between outlier group size and mean difference in costs for outliers vs other patients. Systematic identification of high- (low-) cost determinants For each DRG, we use Stanford Health Care EHR to compare the medicines, procedures, order sets, and diagnoses associated to high-cost outliers vs other patients. These comparisons are repeated for low-cost outliers vs other patients. More precisely, we query the medicine, procedure, order set, and diagnosis tables from Stanford Health Care EHR for outliers and nonoutlier patients, for all DRGs. The exposure of outliers to various medicines, procedures, or order sets, is compared to the corresponding exposure in nonoutliers. We build contingency tables for all comparisons, and the differences in exposures are quantified through odds ratios and p-values from Fisher exact tests. To facilitate prioritization, we merge results across all DRGs, for each exposure comparison (medicines, procedures, order sets, and diagnoses) and remove duplicates. Lastly, mimicking the methodology from PheWAS studies ( 8 ), we rank all comparisons by ascending p-values to highlight the most promising hypotheses for cost reduction. Since p-values are used exclusively for ranking associations, we do not apply corrections for multiple comparisons. Therefore, the magnitude of the p-values should be interpreted as exploratory and hypothesis-generating. Reflection of the findings In the final step, we engage domain experts—physicians, internal medicine specialists, hospitalists, and hospital finance specialists (specifically FG, EG, JHC, and JM in this study)—to evaluate the prioritized cost association hypotheses. The subject experts then review these to assess which medicines, procedures, and order sets could imply plausible interventions/scenarios/modifiers for improving value without compromising health outcomes. Results Model evaluation and explainability Our workflow produces calibration plots, evaluation metrics, SHAP values, and cost variability assessment for each DRG-specific prediction model. As an example, in Figure 2 , we show the output of our pipeline for the prediction model corresponding to the DRG “open craniotomy except trauma.” Download figure Open in new tab Figure 2. Example output from our systematic exploration pipeline for model fitting and explainability and the DRG “open craniotomy except trauma.” Panel A: Evaluation of the DRG-specific gradient-boosted tree model on the held-out test set. On Panel A, each dot represents a patient and the dot’s color depicts the uncertainty (50% CQR PI width) in the prediction for that patient. A positive value of R 2 indicates that the model using features collected within 48 hours of hospitalization, outperforms a model that would predict the mean cost for the relevant DRG. Panel B: Feature importance for SHAP values. For a better understanding of our machine learning models, our workflow systematically showcases SHAP values in a Beeswarm plot. Panel B illustrates that the absence of COVID-19 PCR testing correlates with higher hospital costs. Additionally, missing other medical orders, including complete blood count, ABO/Rh testing, and metabolic panels, are also correlated with increased costs. Severity indicators such as respiratory rate, SpO2, and temperature exhibit complex associations with costs, which might not be easily discernible when considering these variables individually. Panel C: Investigation of DRG-specific cost variability by fifths of the predicted costs. To investigate whether cost variability was greater in high or low-cost patients, we systematically represent uncertainty (50% CQR PI width) in the prediction categorized by fifths of predicted costs. The vertical intervals are ± standard errors for the means. Panel C shows that the uncertainty in the predicted cost was greatest in the patients with the highest predicted cost. In all panels, costs are scaled relative to the average inpatient admission cost. Systematic identification of high- (low-) cost determinants Our framework prioritizes hypotheses to improve the value of care by enabling comparisons of procedures, medications, order sets, and diagnoses associated with high- and low-cost outliers relative to other patients across all DRGs. For instance, in Table 1 we present comparisons for medical order sets where all top ten comparisons with the lowest p-values are related to high-cost outliers. Comparisons for low-cost outliers can be readily explored by conditioning on these comparisons. Table 2 provides an example of comparisons involving low-cost outliers. In both cases, we offer interpretations based on the review of clinical notes and transfer summaries of a selection of outlier patients. View this table: View inline View popup Download powerpoint Table 1. Top ten medical order sets entries with outliers vs nonoutliers comparisons ranked by p-values for prioritization. The outliers versus nonoutliers patients are identified by using conformal inference. The “Exposure” and “No Exposure” columns denote the number of patients within a specific DRG who received or did not receive the corresponding medical order set, respectively. The proportions in each category are calculated as . The odds ratios are computed as . An odds ratio greater than 1 indicates a positive association between the medical order set and the presence of high-cost outliers. Patients undergoing “Open Craniotomy Except Trauma” who receive the “Inpatient Tube Feeding Protocol” incur higher than expected costs, likely because those patients needing specialized nutritional support during recovery require more extensive and lengthy care. Similarly, those with “Septicemia and Disseminated Infections” needing “Inpatient Vascular Access (PICCs, EDPIVs, and USGPIVs)” likely require extended intravenous therapy, indicating severe illness and higher costs to administer treatment. Procedures like “Major Pancreas, Liver, and Shunt Procedures” and “Major Small Bowel Procedures” using the “Radiology CT Contrast Order Set” necessitate detailed imaging, indicating greater complexity and expense in their management. The “Inpatient General Electrolyte Replacement Protocol” for conditions like “Septicemia and Disseminated Infections” and “Open Craniotomy Except Trauma” suggests correction of critical imbalances, reflecting higher illness severity. The “Inpatient ICU/CCU General Admission” and “Inpatient Insulin Transition From IV Infusion” for critically ill patients point to extensive care and resource use, leading to higher costs. These findings provide assurance that we are not identifying false correlations while gaining deeper insights into how tube feeding, vascular access needs, and insulin drip requirements stand out as being more strongly associated with higher-than-expected costs. CT=Computed Tomography. DRG=Diagnosis-Related Group. EDPIV=Emergency Department Peripheral Intravenous Line. PICC=Peripherally Inserted Central Catheter. USGPIV=Ultrasound-Guided Peripheral Intravenous Line. View this table: View inline View popup Download powerpoint Table 2. Top ten medical order sets entries for low-cost outliers. The comparisons between low-cost outliers vs nonoutliers are ranked by p-values. The outliers versus nonoutliers patients are identified by using conformal inference. The “Exposure” and “No Exposure” columns denote the number of patients within a specific DRG who received or did not receive the corresponding medical order set, respectively. The proportions in each category are calculated as . The odds ratios are computed as . An odds ratio lower than 1 indicates a positive association between the medical order set not being received and the presence of low-cost outliers. Patients with “Septicemia Or Severe Sepsis Without MV >96 Hours With MCC” who do not receive treatments such as “Physical Therapy,” “Occupational Therapy,” and various diagnostic tests have lower-than-expected hospital costs, suggesting less severe conditions. Similarly, those undergoing “Dorsal and Lumbar Fusion” without the need for “Red Blood Cell Transfusion” or “Arterial Blood Gas Analysis,” as well as “Major Pancreas, Liver, and Shunt Procedures” patients not on an “NPO Diet Order,” also incur lower-than-expected costs, indicating simpler clinical situations. These findings confirm that we are not detecting spurious correlations and provide deeper insights into how physical therapy and parenteral nutrition, rather than other order sets, are more negatively associated with lower-than-expected costs. DRG=Diagnosis-Related Group. MV=Mechanical Ventilation. MCC=Major Complications or Comorbidities. NPO= Nil Per Os . Discussion In this analysis of Stanford EHR data, we have applied a systematic method to generate clinically sound hypotheses aimed at enhancing care quality and optimizing resource utilization. The originality of our approach lies in the integration of conformal inference with traditional machine learning techniques to identify high-cost and low-cost outliers. Our analysis revealed that interventions such as tube feeding, vascular access needs, insulin drip, physical therapy, and parenteral nutrition stand out as being more strongly associated with higher-than-expected costs. If validated, these findings suggest that focusing quality and efficiency improvements on these interventions may enhance the value of inpatient care. Beyond the specific results observed at Stanford, our approach holds promise for broader applications. The systematic integration of conformal inference with machine learning can be implemented in other healthcare institutions to uncover and prioritize areas where care processes have higher-than-expected costs. By identifying cost outliers, these institutions can pinpoint specific procedures or diagnoses that require targeted interventions to improve efficiency. This flexible and adaptable approach provides a robust framework for continuous improvement across a variety of clinical settings, ensuring that health care systems can optimize costs while maintaining or enhancing care quality. However, our analysis had several limitations. Within certain DRGs, such as “heart failure” and “respiratory infections,” cost prediction models demonstrated limited performance on left-out test sets. This limitation is mitigated by the primary objective of our method, which is to retrospectively identify high-cost and low-cost outlier patients rather than develop models for prospective deployment. Additionally, the data was collected between March 2019 and August 2021, during the COVID-19 pandemic. Rapidly evolving clinical practices during this period may have impacted our hypothesis generation and prioritization. Despite this, we successfully generated clinically sound hypotheses, validated by experts. We anticipate our methodology to be more robust when clinical practices are stable over time. As in PheWAS studies, our method ranks associations rather than evaluating causal effects. This scalability requires human expertise at the final stage to confirm or refute the causal likelihood of the generated hypotheses. Enhancing this step with language models to extract relevant information from clinical notes and discharge summaries could improve efficiency. We are currently working on solutions to facilitate this process. The American public and policymakers have made significant investments in digitizing health records. We have demonstrated proof of concept for our systematic method to generate clinically sound hypotheses that may inform processes to enhance care quality and optimize resource utilization. Other institutions can adopt our workflow to quantify cost inefficiencies associated with specific diagnoses or procedures and prioritize cost-reduction efforts by their potential impact. Conclusion We present a proof of concept for integrating conformal prediction with machine learning to systematically explore hospital cost variability. By incorporating human experts back into the decision-making loop, this approach not only identifies areas for potential cost savings but also aims to maintain or enhance the quality of care. This methodology offers a valuable tool for healthcare systems seeking to optimize resource utilization and improve patient outcomes. Our findings highlight the potential for data-driven strategies to inform clinical practice and drive efficiency improvements in hospital settings. Data Availability All data produced in the present study are available upon reasonable request to the authors. Contributions FG: study ideation and design, analysis, interpretation of results, manuscript first draft; EG: study ideation and design, clinical interpretation; SPM: study ideation and design, clinical interpretation; JM: study ideation and design, operational interpretation; TR: provided the cost data, operational interpretation; AM: study ideation, curated and provided data; JHC: study ideation and design, interpretation of results, manuscript first draft. All authors reviewed and provided feedback on manuscript. Acknowledgments We thank Selina Pi and Conor K. Corbin for their helpful feedback on the hospital cost database and the Deployr feature extraction pipeline respectively. The authors report no disclosures or conflicts of interest. References 1. ↵ Pi S , Masterson J , Ma SP , Corbin CK , Milstein A , Chen JH . Using Case Mix Index within Diagnosis-Related Groups to Evaluate Variation in Hospitalization Costs at a Large Academic Medical Center . AMIA Annu Symp Proc . 2024 Jan 11; 2023 : 1201 – 8 . OpenUrl PubMed 2. ↵ Thomas JW , Holloway JJ , Guire KE . Validating risk-adjusted mortality as an indicator for quality of care . Inq J Med Care Organ Provis Financ . 1993 ; 30 ( 1 ): 6 – 22 . OpenUrl 3. Werner RM , Bradlow ET . Relationship between Medicare’s hospital compare performance measures and mortality rates . JAMA. 2006 Dec 13 ; 296 ( 22 ): 2694 – 702 . 4. Werner RM , Bradlow ET , Asch DA . Does hospital performance on process measures directly measure high quality care or is it a marker of unmeasured care? Health Serv Res . 2008 Oct ; 43 ( 5 Pt 1 ): 1464 – 84 . OpenUrl PubMed Web of Science 5. ↵ Hanna Y , Nandra K , Kustera C , Smith J , Metzinger M , Patel K , et al. Debility Risk Model as a Predictor for Postsurgical Outcomes . Am Surg . 2021 Sep ; 87 ( 9 ): 1457 – 62 . OpenUrl PubMed 6. ↵ Rudy MD , Bentley J , Ahuja N , Rohatgi N. Determinants of Cost Variation in Total Hip and Knee Arthroplasty: Implications for Alternative Payment Models . J Am Acad Orthop Surg . 2020 Mar 15; 28 ( 6 ): e245 – 54 . OpenUrl PubMed 7. ↵ Grolleau F , Petit F , Gaudry S , Diard É , Quenot JP , Dreyfuss D , et al. Personalizing renal replacement therapy initiation in the intensive care unit: a reinforcement learning-based strategy with external validation on the AKIKI randomized controlled trials . J Am Med Inform Assoc . 2024 May 1; 31 ( 5 ): 1074 – 83 . OpenUrl PubMed 8. ↵ Bastarache L , Denny JC , Roden DM . Phenome-Wide Association Studies . JAMA . 2022 Jan 4; 327 ( 1 ): 75 – 6 . OpenUrl CrossRef PubMed 9. ↵ Romano Y , Patterson E , Candes E. Conformalized Quantile Regression . In: Advances in Neural Information Processing Systems . Curran Associates, Inc .; 2019 . Available from: https://proceedings.neurips.cc/paper/2019/hash/5103c3584b063c431bd1268e9b5e76fb-Abstract.html 10. ↵ Bates S , Candès E , Lei L , Romano Y , Sesia M. Testing for outliers with conformal p-values . Ann Stat . 2023 Feb ; 51 ( 1 ): 149 – 78 . OpenUrl 11. ↵ Nguyen M , Eulalio T , Marafino BJ , Rose C , Chen JH , Baiocchi M. Thick Data Analytics (TDA): An Iterative and Inductive Framework for Algorithmic Improvement . Am Stat . 0 ( 0 ): 1 – 9 . 12. ↵ Corbin CK , Maclay R , Acharya A , Mony S , Punnathanam S , Thapa R , et al. DEPLOYR: a technical framework for deploying custom real-time machine learning models into the electronic medical record . J Am Med Inform Assoc . 2023 Sep 1; 30 ( 9 ): 1532 – 42 . OpenUrl PubMed 13. ↵ Josse J , Chen JM , Prost N , Varoquaux G , Scornet E. On the consistency of supervised learning with missing values . Stat Pap . 2024 Dec 1; 65 ( 9 ): 5447 – 79 . OpenUrl 14. ↵ Friedman JH . Greedy function approximation: A gradient boosting machine . Ann Stat . 2001 Oct ; 29 ( 5 ): 1189 – 232 . OpenUrl CrossRef Web of Science 15. ↵ Ke G , Meng Q , Finley T , Wang T , Chen W , Ma W , et al. LightGBM: A Highly Efficient Gradient Boosting Decision Tree . In: Advances in Neural Information Processing Systems . Curran Associates, Inc .; 2017 . Available from: https://papers.nips.cc/paper_files/paper/2017/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html 16. ↵ Shwartz-Ziv R , Armon A. Tabular data: Deep learning is not all you need . Inf Fusion . 2022 May 1; 81 : 84 – 90 . OpenUrl 17. ↵ Grinsztajn L , Oyallon E , Varoquaux G. Why do tree-based models still outperform deep learning on typical tabular data? Adv Neural Inf Process Syst . 2022 Dec 6; 35 : 507 – 20 . OpenUrl 18. ↵ Pedregosa F , Varoquaux G , Gramfort A , Michel V , Thirion B , Grisel O , et al. Scikit-learn: Machine learning in Python . J Mach Learn Res . 2011 ; 12 : 2825 – 30 . OpenUrl CrossRef PubMed 19. ↵ Riley RD , Archer L , Snell KIE , Ensor J , Dhiman P , Martin GP , et al. Evaluation of clinical prediction models (part 2): how to undertake an external validation study . BMJ . 2024 Jan 15; 384 : e074820 . OpenUrl FREE Full Text 20. ↵ Lundberg SM , Lee SI . A Unified Approach to Interpreting Model Predictions . In: Advances in Neural Information Processing Systems . Curran Associates, Inc .; 2017 . Available from: https://proceedings.neurips.cc/paper/2017/hash/8a20a8621978632d76c43dfd28b67767-Abstract.html 21. ↵ Angelopoulos AN , Bates S. A Gentle Introduction to Conformal Prediction and Distribution-Free Uncertainty Quantification . arXiv ; 2022 . Available from: http://arxiv.org/abs/2107.07511 22. ↵ Taquet V , Blot V , Morzadec T , Lacombe L , Brunel N. MAPIE: an open-source library for distribution-free uncertainty quantification . arXiv ; 2022 . Available from: http://arxiv.org/abs/2207.12274 View the discussion thread. Back to top Previous Next Posted January 12, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Systematic Exploration of Hospital Cost Variability: A Conformal Prediction-Based Outlier Detection Method for Electronic Health Records Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Systematic Exploration of Hospital Cost Variability: A Conformal Prediction-Based Outlier Detection Method for Electronic Health Records François Grolleau , Ethan Goh , Stephen P. Ma , Jonathan Masterson , Ted Ross , Arnold Milstein , Jonathan H. Chen medRxiv 2025.01.10.25320349; doi: https://doi.org/10.1101/2025.01.10.25320349 Share This Article: Copy Citation Tools Systematic Exploration of Hospital Cost Variability: A Conformal Prediction-Based Outlier Detection Method for Electronic Health Records François Grolleau , Ethan Goh , Stephen P. Ma , Jonathan Masterson , Ted Ross , Arnold Milstein , Jonathan H. Chen medRxiv 2025.01.10.25320349; doi: https://doi.org/10.1101/2025.01.10.25320349 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4436) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4538) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (542) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3333) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00e37fc586d0db4',t:'MTc3OTY0NTgyMw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00