Full text
32,888 characters
· extracted from
preprint-html
· click to expand
Resampling Methods for Class Imbalance in Clinical Prediction Models: A Systematic Review and Meta-Regression Protocol | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Resampling Methods for Class Imbalance in Clinical Prediction Models: A Systematic Review and Meta-Regression Protocol View ORCID Profile Osama Abdelhay , Adam Shatnawi , Hassan Najadat , Taghreed Altamimi doi: https://doi.org/10.1101/2025.05.19.25327868 Osama Abdelhay 1 Department of Data Science and Artificial Intelligence, Princess Sumaya University for Technology , Amman, Jordan Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Osama Abdelhay For correspondence: o.abdelhay{at}psut.edu.jo Adam Shatnawi 2 Department of Computer Science, Jordan University of Science and Technology , Irbid, Jordan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hassan Najadat 2 Department of Computer Science, Jordan University of Science and Technology , Irbid, Jordan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Taghreed Altamimi 3 College of Engineering and Advanced Computing, Alfaisal University , Riyadh, Saudi Arabia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Introduction Class imbalance—situations where clinically important “positive” cases form <30 % of the dataset—systematically degrades the sensitivity and fairness of medical prediction models. Although data-level techniques such as random oversampling, random undersampling and SMOTE, and algorithm-level approaches like cost-sensitive learning, are widely used, the empirical evidence describing when these corrections improve model performance remains fragmented across diseases and modelling frameworks. This protocol outlines a scoping systematic review with meta-regression that will map and quantitatively summarise 15 years of research on resampling strategies in imbalanced clinical datasets, addressing a critical methodological gap in trustworthy medical AI. Methods and analysis We will search MEDLINE, EMBASE, Scopus, Web of Science Core Collection and IEEE Xplore, plus grey-literature sources (medRxiv, arXiv, bioRxiv) for primary studies (2009 – 31 Dec 2024) that apply at least one resampling or cost-sensitive method to binary clinical prediction tasks with a minority-class prevalence <30 %. No language restrictions will be applied. Two reviewers will screen records, extract data with a piloted form and document the process in a PRISMA flow diagram. A descriptive synthesis will catalogue clinical domain, sample size, imbalance ratio, resampling technique, model type and performance metrics where≥10 studies report compatible AUCs, a random-effects mixed-effects meta-regression (logit-transformed AUC) will examine moderators including imbalance ratio, resampling class, model family and sample size. Small-study effects will be probed with funnel plots, Egger’s test, trim-and-fill and weight-function models; influence diagnostics and leave-one-out analyses will assess robustness. Because this is a methodological review, formal clinical risk-of-bias tools are optional; instead, design-level screening, influence diagnostics and sensitivity analyses will ensure transparency. Discussion By combining a broad conceptual map with quantitative estimates, this review will establish when data-level versus algorithm-level balancing yields genuine improvements in discrimination, calibration and cost-sensitive metrics across diverse medical domains. The findings will guide researchers in choosing parsimonious, evidence-based imbalance corrections, inform journal and regulatory reporting standards, and highlight research gaps, such as the under-reporting of calibration and misclassification costs, that must be addressed before balanced models can be trusted in clinical practice. Systematic review registration INPLASY202550026 Introduction Medical prediction datasets are frequently imbalanced, with the clinically important “positive” class representing fewer than 30 % of observations. Such skew systematically biases classical (e.g., logistic regression) and modern machine-learning classifiers toward the majority class, eroding sensitivity for the minority group [ 1 – 3 ]. To mitigate this threat, a family of data-level resampling techniques—random oversampling (ROS), random undersampling (RUS), and the Synthetic Minority Oversampling Technique (SMOTE) adjust the training data before modelling [ 1 , 4 , 5 ]. While widely adopted, ROS can overfit duplicates, RUS discards potentially informative cases, and SMOTE or its variants may inject unrealistic synthetic examples [ 6 – 9 ]. Evidence comparing resampling to alternative strategies remains inconclusive. An extensive systematic review showed no consistent performance advantage of machine learning over logistic regression when event-per-variable ratios were adequate [ 10 ]. Moreover, simulation and empirical studies demonstrate that adequate sample size planning, rather than aggressive post-hoc balancing, often eliminates the need for resampling [ 11 – 16 ]. At the algorithm level, cost-sensitive learning penalises minority-class errors directly and can outperform data-level methods, yet it is rarely reported in medical AI research [ 4 , 17 ]. Developments in binary classification theory—from early statistical formulations to perceptrons, support vector machines, and boosted ensembles—underline that model choice interacts with class distribution and cost structure [ 18 – 21 ]. Against this backdrop, we will conduct a scoping systematic review with meta-regression to (i) map resampling and cost-sensitive strategies used in imbalanced medical datasets, (ii) quantify their impact on discrimination and calibration, and (iii) identify methodological moderators and research gaps. This protocol describes the planned methods. Objectives Primary objective To determine, across clinical prediction studies with binary outcomes and minority-class prevalence < 30 %, whether applying data-level resampling or algorithm-level cost-sensitive strategies measurably improves model performance relative to training on the original imbalanced data. Specific objectives Evidence mapping – Catalogue the full range of imbalance-correction strategies (oversampling, undersampling, hybrids, weighted or focal-loss models) reported between 2009 and 2024, together with the clinical domains, dataset sizes, imbalance ratios, and modelling frameworks in which they are used. Comparative effectiveness – Quantify and compare discrimination (e.g., AUC, sensitivity, specificity) and, where available, calibration metrics achieved by ∘ oversampling, ∘ undersampling, ∘ hybrid pipelines, and ∘ cost-sensitive algorithms, against models trained without any balancing. Moderator analysis – Using mixed-effects meta-regression, evaluate how study-level characteristics (imbalance ratio, sample size, number of predictors, model family, and clinical domain) modify the effectiveness of each imbalance-correction strategy. Bias and robustness assessment – Probe small-study effects, publication bias, and influential outliers through funnel-plot diagnostics, trim-and-fill, weight-function models, and leave-one-out analyses; gauge how these factors affect pooled estimates. Methodological gap identification – Highlight recurrent pitfalls, such as neglecting calibration, misclassification costs, or external validation, and formulate evidence-based recommendations for future research and reporting. Methods This protocol adheres to the PRISMA-P [ 22 ] and PRISMA-ScR [ 23 ] guidelines and has been registered with INPLASY (ID INPLASY202550026) (S1 File). Any amendments will be documented in the INPLASY record. Eligibility criteria (PICOTS) Population: Clinical prediction studies that analyse binary outcomes with an explicit minority-class prevalence < 30 %. For this review, a binary outcome is restricted to diagnostic, prognostic, or treatment-response predictions in which the dependent variable has exactly two mutually exclusive states (e.g., disease present/absent). Interventions: Data-level resampling (random oversampling, random undersampling, SMOTE or variants, hybrid pipelines) and algorithm-level cost-sensitive techniques (weighted losses, focal loss). Comparators: Models trained on the original imbalanced data and/or alternative resampling or weighting strategies. Outcomes: Primary—AUC; secondary—sensitivity, F1-score, specificity, balanced accuracy, calibration metrics, and reported mis-classification costs. Timing: Publications from 1 Jan 2009 to 31 Dec 2024. Study design: Retrospective or prospective primary studies (including model-development and validation papers) and systematic reviews that re-analyse primary data. Simulation-only papers, non-binary tasks, or abstracts lacking methods are excluded. Studies focused exclusively on radiomics, image-segmentation pipelines, or pixel-level classification tasks will be excluded because these do not output patient-level binary predictions. Information sources and search strategy Searches were executed in MEDLINE (PubMed), EMBASE, Scopus, Web of Science Core Collection and IEEE Xplore. Grey-literature repositories (medRxiv, arXiv, bioRxiv) and code platforms (GitHub) were also screened. A peer-reviewed strategy combined controlled vocabulary and free-text terms for class imbalance, resampling , and clinical prediction ; an example MEDLINE string is provided in S2 File. No language limits were applied, but non-English full texts had to be translatable. Study selection Search results will be imported into Zotero for deduplication [ 24 ] and prioritised with ASReview [ 25 ]. Two reviewers will independently screen titles/abstracts, followed by full texts, resolving conflicts by consensus or third adjudication. Reasons for exclusion will be recorded and displayed in a PRISMA flow diagram [ 26 ]. Data missing from the full text will be requested from authors (two-week window). Data extraction A piloted, standardised form will capture bibliometrics, clinical domain, sample size, imbalance ratio, resampling strategy, model family, performance metrics, calibration statistics, and cost-sensitive measures. Two independent reviewers will extract all items in duplicate into a REDCap (v14.0.19) database. A third reviewer will run the built-in comparison report, reconcile discrepancies, and export a single verified dataset. Statistical analyses will be performed in R (v4.4.0) using the metafor package (v4.8-0), dplyr (v1.1.4) and ggplot2 (v3.5.2). All code and a session-info file will be deposited in the OSF repository on publication. Outcomes and effect measures The primary effect size will be the logit-transformed AUC. When compatible statistics are reported, we will also collect sensitivity, F1 Score, calibration slope, Brier score, and cost-based metrics. External validation results will be catalogued separately. Risk-of-bias and methodological quality Because the review focuses on methodological interventions rather than clinical effects, formal study-level tools (e.g., PROBAST) will be optional [ 27 ]. Instead, we will apply design-level screening for reproducibility, influence diagnostics (Cook’s distance [ 28 ], studentised residuals [ 31 ]), and small-study-effect tests (funnel plot [ 31 ], Egger’s regression [ 29 ], Vevea–Hedges weight-function [ 30 ]) to inform sensitivity analyses. We will still chart whether studies report blinding, missing-data handling, and external validation; we plan to slot these parts into a supplementary risk-of-bias table. Data synthesis Phase 1—Descriptive mapping: Tables and visualisations (e.g., heat maps, temporal plots) will summarise trends in resampling use, model type, imbalance severity, and performance . Phase 2 — Quantitative synthesis : random-effects meta-regression of logit-AUC will examine moderators (imbalance ratio, sample size, resampling class, model family). REML estimator and Knapp-Hartung confidence intervals will be employed [ 31 ]. Heterogeneity will be assessed with τ 2 and I 2 [ 31 ]; leave-one-out analyses will test robustness. Analyses will be implemented in R (metafor, dplyr, ggplot2) [ 31 ]. Subgroup and sensitivity analyses Planned subgroup contrasts include oversampling vs undersampling, hybrid vs single-technique pipelines, cost-sensitive vs data-level only, high (>20 %) vs very-low (<5 %) minority prevalence, and deep-learning vs traditional models. Sensitivity analyses will exclude high-influence studies, studies without external validation, and those lacking calibration reporting. Imbalance ratio (IR) will be stratified a priori into four bins: very rare < 5 %, rare 5–10 %, moderate 10–20 %, and mild 20–30 % [ 6 ]. If any bin has < 10 studies, it will be merged with the next wider bin. For meta-regression, these bins will be dummy-coded (reference = mild), and IR will also be modelled as a restricted cubic spline to test linearity. Sensitivity analyses will repeat the model using two dichotomies (< 10 % vs ≥ 10 %; < 20 % vs ≥ 20 %). Living review plan Given rapid methodological advances, automated database alerts will rerun the search annually; new eligible studies will be screened and, where appropriate, integrated into updated meta-analyses, with version history transparently logged. Discussion Class imbalance remains one of the most stubborn threats to safe clinical prediction: skewed data encourage algorithms to optimise overall accuracy at the expense of rare—but clinically essential—events. Algorithm-level approaches that embed explicit mis-classification penalties can theoretically offset this bias [ 32 , 33 ]. At the same time, recent deep-learning innovations such as deep belief nets and focal-loss functions promise further gains in high-dimensional settings [ 34 , 35 ]. Yet the empirical value of these strategies has never been synthesised systematically across the medical spectrum. Our planned scoping review with meta-regression addresses a critical methodological gap. Anticipated challenges Extreme heterogeneity : preliminary scoping suggests wide dispersion in clinical domains, imbalance ratios, sample sizes, and metrics. Even when studies report AUC, transformation to a common logit scale may not fully harmonise differences in test–set construction and cross-validation folds. Inconsistent reporting : fewer than one in ten studies publish calibration indices, and details of cost-sensitive losses are often relegated to supplementary code or omitted entirely. Sparse external validation : most papers evaluate performance on random internal splits; true generalisability remains unknown. Publication and small-study effects: funnel-plot asymmetry is expected because smaller datasets often adopt aggressive oversampling that inflates apparent discrimination. Metric multiplicity : sensitivity, specificity, F-score, precision-recall AUC and balanced accuracy are reported idiosyncratically, complicating quantitative synthesis. Strengths Breadth of evidence : five bibliographic databases plus grey-literature repositories capture 15 years of work, producing the most extensive curated corpus of imbalance-related prediction studies. Dual synthesis : A descriptive map is paired with a random-effects meta-regression that tests moderators such as imbalance severity, sample size, and model family, offering granular insight unavailable in narrative reviews. Rigorous bias diagnostics : influence statistics, funnel-plot tests, trim-and-fill, and Vevea–Hedges models will quantify the robustness of pooled estimates, mitigating the optimism plaguing model-development literature. Technology-enabled workflows : machine learning–assisted screening via ASReview accelerates and transparently documents selection decisions [ 25 ]. Alignment with contemporary guidance : search, extraction, and reporting follow PRISMA 2020 extensions to enhance reproducibility and uptake [ 26 ]. Limitations Despite these safeguards, several constraints remain. First, residual heterogeneity is inevitable: even a comprehensive meta-regression may explain only a modest fraction of between-study variance. Second, the decision to use AUC as the primary effect size risks overlooking threshold-dependent performance and real-world utilities. Third, cost-sensitive studies may still be too few or inconsistently reported to support quantitative pooling, forcing a descriptive treatment that limits formal comparisons with resampling. Fourth, living-review updates will depend on the speed at which newly published work reports compatible statistics—the review could lag very recent methodological advances. Potential impact and influence on practice By establishing when and for whom resampling or weighting truly adds value, the review will help data scientists avoid reflexive oversampling that can obscure calibration or foster over-fitting. Evidence that cost-sensitive losses rival data-level balancing could shift practice toward simpler, loss-function–centric pipelines already available in mainstream frameworks [ 32 – 35 ]. Clinicians and journal editors could use the findings to demand fuller reporting of calibration, confusion matrices and mis-classification costs, accelerating the adoption of emerging AI reporting extensions (e.g., TRIPOD-AI), see also [ 26 ]. Regulators may likewise reference our recommendations when assessing the fairness of deployed diagnostic or prognostic models. Future directions The mapped gaps suggest four priorities: Prospective, multi-centre cohorts with rare outcomes to test whether cost-sensitive and focal-loss networks outperform oversampling in truly out-of-sample settings. Standardised reporting templates that mandate disclosure of class distribution, sampling strategy, calibration and decision-curve analysis; our findings can feed directly into upcoming guideline revisions. Generative augmentation and domain-adapted GANs : Early evidence (e.g., synthetic EEG and radiology data) hints at privacy-preserving promise but requires rigorous external validation [ 36 ]. Continuous evidence surveillance through annual database alerts and semi-automated screening pipelines aligns with the living-review paradigm and ensures the conclusions remain current as new imbalance-handling techniques emerge [ 25 , 26 ]. The planned review will quantify the performance lift (or degradation) attributable to balancing strategies and frame a research agenda aimed at more reproducible, cost-aware and clinically grounded predictive modelling. Data Availability No datasets were generated or analysed during the current study. All relevant data from this study will be made available upon study completion. References 1. ↵ Mena LJ , Gonzalez JA , editors. Machine Learning for Imbalanced Datasets: Application in Medical Diagnostic . FLAIRS ; 2006 . 2. Li D-C , Liu C-W , Hu SC . A learning method for the class imbalance problem with medical data sets . Computers in Biology and Medicine . 2010 ; 40 : 509 – 18 . OpenUrl PubMed 3. ↵ Rahman MM , Davis DN . Addressing the class imbalance problem in medical datasets . Int J Mach Learn Comput . 2013 ; 3 : 224 . OpenUrl 4. ↵ Mienye ID , Sun Y. Performance analysis of cost-sensitive learning methods with application to imbalanced medical data . Informatics Med Unlocked . 2021 ; 25 : 100690 . OpenUrl 5. ↵ Alahmari F. A comparison of resampling techniques for medical data using machine learning . J Inf Knowl Manag . 2020 ; 19 : 2040016 . OpenUrl 6. ↵ Carvalho M , Pinho AJ , Brás S. Resampling approaches to handle class imbalance: a review from a data perspective . J Big Data . 2025 ; 12 : 71 . OpenUrl 7. Panjainam P , Kanjanawattana S , editors. A Comparison of the Hybrid Resampling Techniques for Imbalanced Medical Data . Proc 7th Int Conf Robot Systems & Applications ; 2024 . 8. Jo T , Japkowicz N. Class imbalances versus small disjuncts . SIGKDD Explor Newsl . 2004 ; 6 : 40 – 9 . OpenUrl 9. ↵ van den Goorbergh R , van Smeden M , Timmerman D , Van Calster B. The harm of class-imbalance corrections for risk prediction models: illustration and simulation using logistic regression . J Am Med Inform Assoc . 2022 ; 29 : 1525 – 34 . OpenUrl CrossRef PubMed 10. ↵ Christodoulou E , Ma J , Collins GS , Steyerberg EW , Verbakel JY , Van Calster B. A systematic review shows no performance benefit of machine learning over logistic regression for clinical prediction models . J Clin Epidemiol . 2019 ; 110 : 12 – 22 . OpenUrl CrossRef PubMed 11. ↵ Demidenko E. Sample size determination for logistic regression revisited . Stat Med . 2007 ; 26 : 3385 – 97 . OpenUrl CrossRef PubMed Web of Science 12. Yenipinar A , KoçŞ , Çanga D , Kaya F. Determining sample size in logistic regression with G-Power . Black Sea J Eng Sci . 2019 ; 2 : 16 – 22 . OpenUrl 13. Charan J , Kaur R , Bhardwaj P , et al. Sample size calculation in medical research: a primer . Ann Natl Acad Med Sci (India) . 2021 ; 57 : 74 – 80 . OpenUrl 14. Balki I , Amirabadi A , Levman J , et al. Sample-size determination methodologies for machine learning in medical imaging research: a systematic review . Can Assoc Radiol J . 2019 ; 70 : 344 – 53 . OpenUrl CrossRef PubMed 15. Vabalas A , Gowen E , Poliakoff E , Casson AJ . Machine learning algorithm validation with a limited sample size . PLoS One . 2019 ; 14 : e0224365 . OpenUrl CrossRef PubMed 16. ↵ Figueroa RL , Zeng-Treitler Q , Kandula S , Ngo LH . Predicting sample size required for classification performance . BMC Med Inform Decis Mak . 2012 ; 12 : 1 – 10 . OpenUrl CrossRef PubMed 17. ↵ Araf I , Idri A , Chairi I. Cost-sensitive learning for imbalanced medical data: a review . Artif Intell Rev . 2024 ; 57 : 80 . OpenUrl 18. ↵ Cox DR . The regression analysis of binary sequences . J R Stat Soc B . 1958 ; 20 : 215 – 32 . OpenUrl 19. Block H-D. The perceptron: A model for brain functioning I . Rev Mod Phys . 1962 ; 34 : 123 . OpenUrl CrossRef Web of Science 20. Stitson M , Weston J , Gammerman A , et al. Theory of support vector machines . Univ London . 1996 ; 117 : 188 – 91 . OpenUrl 21. ↵ Hastie T , Tibshirani R , Friedman J. Boosting and additive trees . In: The Elements of Statistical Learning . 2nd ed. 2009 . p. 337 – 87 . 22. ↵ Moher D , Shamseer L , Clarke M , Ghersi D , Liberati A , Petticrew M , Shekelle P , Stewart LA ; PRISMA-P Group . Preferred reporting items for systematic review and meta-analysis protocols (PRISMA-P) 2015 statement . Syst Rev . 2015 Jan 1; 4 ( 1 ): 1 . doi: 10.1186/2046-4053-4-1 . PMID: 25554246 ; PMCID: PMC4320440 . OpenUrl CrossRef PubMed 23. ↵ Tricco AC , Lillie E , Zarin W , O’Brien KK , Colquhoun H , Levac D , Moher D , Peters MDJ , Horsley T , Weeks L , Hempel S , Akl EA , Chang C , McGowan J , Stewart L , Hartling L , Aldcroft A , Wilson MG , Garritty C , Lewin S , Godfrey CM , Macdonald MT , Langlois EV , Soares-Weiser K , Moriarty J , Clifford T , Tunçalp Ö , Straus SE . PRISMA Extension for Scoping Reviews (PRISMA-ScR): Checklist and Explanation . Ann Intern Med . 2018 Oct 2; 169 ( 7 ): 467 – 473 . doi: 10.7326/M18-0850 . Epub 2018 Sep 4. PMID: 30178033 . OpenUrl CrossRef PubMed 24. ↵ https://www.zotero.org/support/credits_and_acknowledgments . Zotero . 7.0.15 ed. Vienna, VA USA : Corporation for Digital Scholarship ; 2025 . 25. ↵ van de Schoot R , de Bruin J , Schram R , Zahedi P , de Boer J , Weijdema F , et al. An open source machine learning framework for efficient and transparent systematic reviews . Nature Machine Intelligence . 2021 ; 3 ( 2 ): 125 – 33 . OpenUrl 26. ↵ Page MJ , McKenzie JE , Bossuyt PM , Boutron I , Hoffmann TC , Mulrow CD , et al. The PRISMA 2020 statement: an updated guideline for reporting systematic reviews . BMJ . 2021 ; 372 : 71 . 27. ↵ Wolff RF , Moons KGM , Riley RD , Whiting PF , Westwood M , Collins GS , et al. PROBAST: A Tool to Assess the Risk of Bias and Applicability of Prediction-Model Studies . Ann Intern Med . 2019 ; 170 ( 1 ): 51 – 58 . OpenUrl CrossRef PubMed 28. ↵ Cook RD . Detection of Influential Observation in Linear Regression . Technometrics . 1977 ; 19 ( 1 ): 15 – 18 . doi: 10.1080/00401706.1977.10489493 . OpenUrl CrossRef 29. ↵ Egger M , Davey Smith G , Schneider M , Minder C. Bias in meta-analysis detected by a simple graphical test . BMJ . 1997 ; 315 : 629 – 634 . doi: 10.1136/bmj.315.7109.629 . OpenUrl Abstract / FREE Full Text 30. ↵ Vevea JL , Hedges LV . A general linear model for estimating effect size in the presence of publication bias . Psychol Methods . 1995 ; 1 ( 3 ): 241 – 255 . doi: 10.1037/1082-989X.1.3.241 . OpenUrl CrossRef 31. ↵ Harrer M , Cuijpers P , Furukawa TA , Ebert DD . Doing Meta-Analysis with R: A Hands-On Guide . Boca Raton (FL ): Chapman & Hall/CRC ; 2021 . 32. ↵ Elkan C. The foundations of cost-sensitive learning . IJCAI ; 2001 . 33. ↵ Zhou Z-H , Liu X-Y. Training cost-sensitive neural networks with methods addressing the class imbalance problem . IEEE Trans Knowl Data Eng . 2005 ; 18 : 63 – 77 . OpenUrl 34. ↵ Hinton GE , Osindero S , Teh Y-W. A fast learning algorithm for deep belief nets . Neural Comput . 2006 ; 18 : 1527 – 54 . OpenUrl CrossRef PubMed Web of Science 35. ↵ Lin T-Y , Goyal P , Girshick R , He K , Dollár P. Focal loss for dense object detection . In: Proc IEEE Int Conf Computer Vision ; 2017 . 36. ↵ Shickel B , Tighe PJ , Bihorac A , Rashidi P. Deep EHR: recent advances in deep learning for electronic health record analysis . IEEE J Biomed Health Inform . 2017 ; 22 : 1589 – 604 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted May 20, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Resampling Methods for Class Imbalance in Clinical Prediction Models: A Systematic Review and Meta-Regression Protocol Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Resampling Methods for Class Imbalance in Clinical Prediction Models: A Systematic Review and Meta-Regression Protocol Osama Abdelhay , Adam Shatnawi , Hassan Najadat , Taghreed Altamimi medRxiv 2025.05.19.25327868; doi: https://doi.org/10.1101/2025.05.19.25327868 Share This Article: Copy Citation Tools Resampling Methods for Class Imbalance in Clinical Prediction Models: A Systematic Review and Meta-Regression Protocol Osama Abdelhay , Adam Shatnawi , Hassan Najadat , Taghreed Altamimi medRxiv 2025.05.19.25327868; doi: https://doi.org/10.1101/2025.05.19.25327868 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffc76087b4f3fe2',t:'MTc3OTQ1OTYyMQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.