Artificial Intelligence for Significant Mitral Regurgitation Screening and Diagnosis: A Systematic Review and Meta-analysis

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 42,838 characters · extracted from preprint-html · click to expand
Artificial Intelligence for Significant Mitral Regurgitation Screening and Diagnosis: A Systematic Review and Meta-analysis | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Artificial Intelligence for Significant Mitral Regurgitation Screening and Diagnosis: A Systematic Review and Meta-analysis View ORCID Profile Udochukwu Godswill Anosike , Luena Seferasi , Marian Abedua Harrison , Ramiro Julian Nin Albonico , View ORCID Profile Sonia Ijeoma Etumudon , Leonardo Antunes Mesquita doi: https://doi.org/10.1101/2025.11.16.25340343 Udochukwu Godswill Anosike 1 Faculty of Medicine, Nnamdi Azikiwe University College of Health Sciences , Nigeria MBBS Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Udochukwu Godswill Anosike For correspondence: ug.anosike{at}stu.unizik.edu.ng Luena Seferasi 2 Faculty of Medicine, University of Medicine Tirana , Tirana, Albania Find this author on Google Scholar Find this author on PubMed Search for this author on this site Marian Abedua Harrison 3 Sakumono Specialist Hospital , Accra, Ghana MBChB, BSc Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ramiro Julian Nin Albonico 4 Centro Cardiológico Americano, Sanatorio Americano , Montevideo, Uruguay MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sonia Ijeoma Etumudon 5 Saratov State Medical University , Russia MBBS Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sonia Ijeoma Etumudon Leonardo Antunes Mesquita 6 Department of Electrophysiology, Hospital Madre Teresa , Belo Horizonte, Brazil MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Objectives To evaluate performance of artificial intelligence (AI) models using electrocardiogram (ECG) and echocardiogram (ECHO) for predicting significant mitral regurgitation (MR). Materials and methods We performed a systematic review and meta-analysis of studies assessing AI models based on ECG or ECHO for detection of significant MR. Search was conducted in PubMed, Scopus, and Cochrane. Endpoints included: sensitivity, specificity of the models. Area under the summary receiver-operating characteristic curve (AUC) was calculated using a bivariate random-effects model. Results Fifteen studies (n = 2,470,826) were included: seven using ECG (n = 2,467,390) and eight using ECHO (n = 3,436). For AI-ECG models validated on external datasets, pooled sensitivity was 87.7% (95% CI 80.4 to 92.5) and specificity was 54.0% (95% CI 40.3 to 67.1), with an AUC of 81%. For AI-ECHO, sensitivity was 89.7% (95% CI 78.2 to 95.5) and specificity was 92.8% (95% CI 81.8 to 97.4), with an AUC of 96%. Conclusion AI models applied to ECG and ECHO demonstrate strong performance for detecting significant MR and may support clinicians’ diagnosis of MR. Clinical implementation, however, requires further validation and external testing across diverse populations. Introduction Mitral regurgitation (MR) is one of the most prevalent valvular heart diseases globally - affecting up to 2-3% of general population, and increases with age, reaching approximately 10% in individuals over 75 years old. 1 , 2 Early identification of MR by imaging is critical to prevent irreversible progression to heart failure, reducing morbidity and mortality, and enabling timely therapeutic intervention. The European Society of Cardiology (ESC) and the European Association for Cardio-Thoracic Surgery (EACTS) guideline for management of valvular heart disease recommend timely intervention for symptomatic severe MR due to association with worse outcomes. 3 Conventional imaging (electrocardiogram) is not sensitive or specific for MR and has no role in MR screening due to poor sensitivity. Increasing efforts have been made by medical ultrasound experts, mathematicians, and computer scientists to promote the integration of ultrasound, medicine, and AI for mitral valve analysis, thereby improving the accuracy of ultrasonic diagnosis, reducing the misdiagnosis rate, shortening the reporting time, and meeting growing clinical needs. 4 Artificial intelligence (AI), a technological driving force at present, has emerged as a promising strategy for scalable and accessible MR screening and diagnosis using electrocardiogram (ECG) and echocardiogram (ECHO). AI-ECG can screen for moderate-to-severe valvular disease (including MR) with good discrimination, and modest positive predictive value in low-prevalence settings - so it is best used to prioritize ECHO. 5 AI models are utilizing conventional 12-lead ECGs to detect early signs of significant MR - QRS complex alterations, abnormal P-wave and T-wave morphology, improving identification of undetected moderate-to-severe MR, predicting progression to atrial fibrillation or left ventricular dysfunction and accelerating patient referral for confirmatory echocardiography. 6 , 7 , 8 AI-ECHO has the potential to automate view selection and quantification, decrease operator and inter-observer variability in image acquisition, measurements, and interpretation. 9 For assessing mitral regurgitation (MR) severity, it has demonstrated feasibility, speed, and high diagnostic accuracy, with predictive capability for 1-year mortality. 10 Clinical integration may enhance patient outcomes by streamlining referrals to specialized care and improving access to evidence-based treatments, while also optimizing workflow and diagnostic efficiency in echocardiography labs. Therefore, we conducted a systematic review and meta-analysis to evaluate the diagnostic accuracy of AI-based models for non-invasive detection of moderate-to-severe or severe MR through ECG and ECHO, synthesize evidence across various modalities, algorithms, and identify key sources of heterogeneity, and validation quality. Findings from this study may help to translate AI to bedside and to enhance clinician screening and diagnosis of MR. Methods Search Strategy This systematic review and meta-analysis were performed and reported in accordance with the Preferred Reporting Items for Systematic Reviews and Meta-Analyses of Diagnostic Test Accuracy (PRISMA-DTA) guidelines. 11 The protocol was registered in the International Prospective Register of Systematic Reviews (PROSPERO) under CRD420251090360. PubMed, SCOPUS, and Cochrane Library were systematically searched from inception to June 2025. Boolean operators (AND, OR) and the following terms were used in the search strategy: “mitral regurgitation”, “mitral valve”, “artificial intelligence”, “AI”, “machine learning”, “deep learning”, “electrocardiogram”, “ECG”, “echocardiogram”, “ECHO”. References from all included studies were also manually searched for any additional eligible studies (backward snowballing). Inclusion and exclusion criteria Studies were included if they met all the following criteria: (1) evaluated the use of AI models based on ECG or ECHO for detection of significant MR; (2) in pediatric and adult patients; and (3) reported any outcomes of interest. We excluded studies if they (1) involved overlapping patient populations; (2) were case reports, reviews, letters, or abstracts. Two separate reviewers (U.G.A. and L.S.) conducted screening of titles and abstracts, and selected studies underwent full-text assessment based on inclusion and exclusion criteria. Discrepancies were resolved by a third reviewer (L.A.M.) in consensus. Significant MR represents patients with moderate or severe MR. Data extraction Two reviewers (U.G.A. and L.S.) independently extracted data from the selected studies into a standardized form, and discrepancies were resolved by consensus with a third reviewer (L.A.M.). Following study characteristics were collected: (1) authors; (2) year of publication; (3) study design; (4) country, (5) imaging technique, (6) AI model, (7) Diagnostic performance of AI-ECG and AI-ECHO, including the number of true positives (TP), false positives (FP), false negatives (FN), and true negatives (TN), sensitivity and specificity. We extracted the following baseline patient data: (1) mean/median age; (2) percentage of male/female; (3) mean/median body mass index (BMI); (4) mean/median left ventricular ejection fraction (LVEF); (5) mean/median left atrial diameter (LAd); (6) percentage of patients with comorbidities (hypertension, diabetes, coronary artery disease, heart failure, atrial fibrillation). Quality assessment To assess bias, each study included in the analysis was independently evaluated by two reviewers (L.A.M. and U.G.A.) using Quality Assessment of Diagnostic Accuracy Studies (QUADAS-2) tool (Supplementary Figure 1 ). 12 This framework consists of four key domains: patient selection (D1), index test (D2), reference standard (D3), and flow and timing (D4). Each domain is analyzed for potential sources of bias and rated as low, high or unclear risk. Studies are categorized into one of two groups: “low risk of bias” if they received a low risk rating across all domains, or “at risk of bias” if any domain was rated as high or unclear risk. Discrepancies between the two reviewers were resolved by consulting a third reviewer (L.S.) Statistical analysis The pooled diagnostic sensitivity and specificity with corresponding 95% confidence intervals (CIs) were obtained using a bivariate random-effects model. Estimates of sensitivity and specificity were summarized in a summary receiver-operating characteristic curve (SROC) and forest plots to provide a graphical overview of the outcomes and potential sources of heterogeneity. Area under the curve (AUC) for the SROC was estimated using parametric bootstrapping. 13 Heterogeneity of studies was qualitatively assessed through visual examination of forest plots and quantitatively evaluated using the Zhou and Dendukuri bivariate Bayesian approach for diagnostic meta-analysis (I 2 > 50% was considered significant heterogeneity). 14 Correlation between logit sensitivity and 1-specificity was calculated to assess the potential for a threshold effect, and a coefficient (ρ) ≥ 0.6 was interpreted as significant. 15 Subgroup analysis was performed based on pre-specified covariate (AI model). Including year as a covariate allows evaluation of temporal trends in diagnostic performance, reflecting advances in AI methods and datasets over time. Sensitivity analyses explored sources of heterogeneity, and Deeks’ test for publication bias was planned if ≥10 studies were included. Analyses were conducted in R (version 4.5.1). Results Study selection and characteristics The initial search returned 930 studies, of which 94 were duplicates. We screened titles and abstracts of the remaining studies (n = 837). Of these, 30 studies were full text assessed for eligibility, and we excluded 15 studies. We included 15 studies, comprising 340,893 patients with significant MR and 2,126,497 with no MR or MR but mild [ Fig. 1 ]. Download figure Open in new tab Fig 1. PRISMA FLOW DIAGRAM OF STUDY SCREENING AND SELECTION All studies included were retrospective cohort. The median age of patients ranged from 64 to 71 years, 57% were male, and median LVEF ranged from 45% to 63%. A total of seven studies employed AI to predict MR using ECG 5 , 7 , 8 , 16 , 17 – 19 , while eight studies used ECHO 20 – 27 . Baseline characteristics were similar across significant MR and control group. Refer to Table 1 for baseline characteristics of the included studies and supplementary table 1 for the key findings of each study. View this table: View inline View popup Download powerpoint TABLE 1. BASELINE CHARACTERISTICS OF STUDY Quality appraisal Study’s quality assessed by QUADAS-2 tool is illustrated in supplementary Figure 1 . One study was classified as low risk, while the remaining 14 were unclear risk. Most studies were downgraded as unclear risk in “index test” domain due to unclear blinding between AI-based ECG interpretation and echocardiographic reference standard. Diagnostic accuracy of significant MR Pooled analysis The forest plots demonstrating the sensitivity and specificity of the included studies are shown in Fig 2 &4. The correlation between logit-sensitivity and 1-specificity for AI-ECG and AI-ECHO were 0.93 and −1.00 respectively, indicating presence of a threshold effect. Due to inconsistent reporting of the diagnostic thresholds among the included studies, meta-regression analysis to explore the source of the threshold effect could not be performed. Consequently, pooled sensitivity and specificity were interpreted cautiously, and the hierarchical summary receiver operating characteristic (HSROC) model and area under the curve (AUC) which account for the trade-off between sensitivity and specificity, were used to summarize diagnostic performance. Download figure Open in new tab Fig 2. Forest plot of AI-ECG model external validation set (EVS) demonstrating sensitivity and specificity. Using a bivariate approach, the pooled sensitivity and specificity of 87.7% of AI-ECG models were (95% CI 80.4 to 92.5) and 54% (95% CI 40.3 to 67.1), respectively ( Fig 3 ). AUC for the pooled sROC curve was 81% (95% CI 76 to 85). There was substantial heterogeneity using the bivariate model (I 2 = 93.5%), reflecting varied architectures and datasets.. Download figure Open in new tab Fig 3. Summary Receiver-Operating Characteristic Curve (sROC) for ECG detecting significant mitral regurgitation. The filled dark circle represents the summary point of sensitivity and specificity. Each circle surrounding the sROC curve reflects one of the included studies with a pair of sensitivity and 1-specificity. The 95% confidence interval around the summary estimates represents a 95% probability the true average sensitivity and specificity of AI-ECG lies within that region. The area under the curve (AUC) was 0.81 (95% CI 0.76–0.85) Using a bivariate approach, pooled sensitivity and specificity of AI-ECHO models were 89.7% (95% CI 78.2 to 95.5) and 92.8% (95% CI 81.8 to 97.4), respectively ( Fig 5 ). AUC for the pooled sROC curve was 96% (95% CI 0.85 to 0.98). There was no heterogeneity using the bivariate model (I 2 = 0%) - this likely reflects the narrow internal test design rather than absence of heterogeneity. Endpoints for AI-ECHO external validation set were not available in studies. Dhingra et al. reported individual endpoints of sites where AI-ECG model was internally tested and externally validated ( Fig 2 ). B, G, L, W, P represent external validation hospital sites. B - Bridgeport hospital; G - Greenwich hospital; L - Lawrence + Memorial hospital; W - Westerly hospital; P - Population-based cohort (ELSA-Brazil). Vrudhala et al. and Moghaddasi et al. reported individual endpoints of MR severity and machine learning models respectively ( Fig 4 ). M - Moderate MR; S - Severe MR. MS - Support vector machine model; ML - Linear discriminant analysis model + convolutional neural network. Download figure Open in new tab Fig 4. Forest plot of AI-ECHO models internal test set (ITS) demonstrating sensitivity and specificity. Download figure Open in new tab Fig 5. Summary Receiver-Operating Characteristic Curve (sROC) for ECHO detecting significant mitral regurgitation. The filled dark circle represents the summary point of sensitivity and specificity. Each circle surrounding the sROC curve reflects one of the included studies with a pair of sensitivity and 1-specificity. The 95% confidence interval around the summary estimates represents a 95% probability the true average sensitivity and specificity of AI-ECHO lies within that region. The area under the curve (AUC) was 0.96 (95% CI 0.85–0.98) Subgroup analysis Subgroup analysis compared the performance of AI models applied to ECHO ( Fig 6 ). Machine learning (ML) showed better performance than deep learning for diagnosing significant MR - though only very few studies were involved. 17 , 20 , 21 Deep learning was the major AI model used in ECG studies - hence the absence of subgroup analysis. Download figure Open in new tab Fig 6. Subgroup analysis of AI-ECHO model demonstrating sensitivity and specificity. Sensitivity analyses The leave-one-out analysis (supplementary Table 3) showed consistent pooled estimates for both sensitivity and specificity, indicating the robustness of the findings. Notably, exclusion of Shiraga et al. (2023) led to the largest increase in pooled sensitivity, indicating that this study contributed a lower-than-average sensitivity. Exclusion of Dhingra et al. (2025) and Kwon et al. (2020) resulted in a modest increase in specificity. No individual study significantly altered the overall estimates, suggesting minimal influence from outliers and stable diagnostic performance of the AI-ECG models. Discussion This systematic review and meta-analysis included 15 studies with a total of 2,470,826 patients. Of these, seven studies were based on ECG studies (n = 2,467,390) and eight were based on ECHO (n = 3,436). Our analysis demonstrated that AI models based on ECG showed high sensitivity (88%) but limited specificity (54%) for detecting significant MR. In contrast, models based on ECHO showed excellent specificity (93%) and a similarly high sensitivity (90%). Sensitivity ≥87% suggest that AI-ECG models could be clinically useful as screening tools for significant MR. This could help triage patients for further echocardiographic evaluation, especially where access to imaging is limited. On the other hand, the low specificity of AI-ECG may lead to increased false positives, with unnecessary downstream testing if used in isolation, and serves as a barrier to clinical implementation as a screening tool. Furthermore, the introduction of a standardized flowchart (for concomitant assessment of symptoms or comorbidities that increase the pre-test probability of MR diagnosis) to support AI-ECG at the initial stage of screening should be considered as this may reduce the rate of false positives as well as unwarranted echocardiogram. As shown in supplementary table 2 , convolutional neural network was the prominent architecture employed by studies to aid AI models process data from ECGs with accuracy and precision. There were differences in the sensitivity and specificity among the included studies. Heterogeneity of estimates using the bivariate model was also significant. Upon further exploration, studies by Lin et al., and Vaid et al., contributed the most to the heterogeneity based on inspection of EVS forest plots ( Fig 2 ) with relatively lower sensitivity and higher specificity than others. 16 , 18 Variability in comorbidities, and clinical setting likely contributed to the observed heterogeneity. Notably, Lin et al.’s external cohort included older patients with more comorbidities and combined data from an academic medical center and a community hospital, introducing heterogeneity in population characteristics and clinical practice. Likewise, Vaid et al. reported significant demographic differences in external cohorts. These findings underscore the importance of contextual model validation and the need for external datasets that reflect real-world diversity to ensure robust and generalizable AI performance. The implementation of noninvasive and scalable AI-based ECG as a screening tool could improve early detection of mitral regurgitation and facilitate timely referral and management. While cardiologist interpretation of echocardiography is essential, the low resolution and artifacts in images can hinder decision-making, making computer-aided diagnosis (CAD) particularly valuable. 21 AI-ECHO automates quantification of MR severity, provides mechanistic classification and informs treatment - which AI-ECG cannot offer. Our study highlights the high sensitivity and specificity observed in AI-ECHO models signaling their clinically viability and ability to support clinicians as a tool for accurate diagnosis of significant MR. Several architectures (video-based convolutional neural network, self-supervised learning algorithm, support vector machine, fully convolutional neural network and mask region with convolutional neural network) were employed by studies to aid AI models process data from ECHOs with accuracy and precision ( Supplementary table 2 ). According to our subgroup analyses ( Fig 6 ), machine learning models outperformed deep learning models. This contrasts with a study assumption that deep learning approaches have shown superior performance compared to machine-learning (ML) approaches when applied to ECHO. 9 This apparent superiority of ML over DL in ECHO-based studies should be interpreted cautiously, given the very small number of ML studies and potential differences in dataset size and quality. Though focused cardiac ultrasound (FCU), a form of POCUS can provide reasonably accurate assessments of chamber size, ventricular function, pericardial effusion, and central venous volume, it is suboptimal for evaluating more complex cardiac pathology such as valvular disease and diastolic dysfunction. 28 The accuracy of fully automated ML-enabled POCUS devices for quantification of LVEF were comparable to reference echocardiograms interpreted by expert cardiologists. 29 Beyond quantification of LVEF, ML-enabled POCUS and DL-enabled POCUS can become useful diagnostic tools for accurate detection of significant MR. Therefore, the integration of artificial intelligence with POCUS can move cardiac ultrasound beyond the echo lab, improving access to efficient, high-quality care. Based on the total sample, the database size of AI-based ECG studies was evidently larger than AI-based ECHO studies ( supplementary table 1 ). The larger database size of AI-based ECG studies likely reflects practical, logistical, and technical factors - ECGs are cheaper, standardized, widely available, easy to digitize, and easier to share than ECHOs. The implication is that ECG AI models are more likely to reach real-world clinical deployment faster because large datasets support robust validation whereas ECHO AI models may lag behind, needing multicenter collaborations to achieve sufficient data volume. Also, majority of AI-ECG studies utilized complex deep learning model architecture such as convolutional neural networks while AI-ECHO studies had variations in the model architecture used ( supplementary table 2 ). Despite the variations in size of datasets and model architecture, included studies reported strong performance across diverse patient demographics including those at risk of developing moderate-to-severe or severe MR (pediatric patients with rheumatic heart disease and adult patients with chronic atrial fibrillation). Quality assessment using the QUADAS-2 tool ( supplementary figure 1 ) revealed that most included studies were at unclear risk of bias, primarily due to insufficient reporting of blinding between the AI model output and the reference standard. Only one study was rated as low risk across all domains. The most frequent concern was in the index test domain, where lack of clarity regarding interpretation independence was common. These findings highlight the methodological limitations in the current literature and reinforce the need for standardized reporting and rigorous design, including prospective validation and clear blinding protocols, in future AI diagnostic studies. Our study has several limitations. We included a few studies, only seven AI-ECG and eight AI-ECHO studies met the eligibility criteria, which may limit the overall generalizability of our findings. Existing studies vary widely in AI architecture used and range of performance ( Supplementary table 2 ). Meta-regression based on age group, disease etiology (degenerative/functional vs rheumatic), and MR grading could not be performed due to limited data. Also, the inability to conduct meta-regression due to insufficient threshold data restricted the evaluation of threshold-related heterogeneity. This limitation may affect the precision of pooled estimates and underscores the importance of consistent threshold reporting in diagnostic research. Notably, none of the AI-ECHO studies reported external validation datasets, restricting the ability to assess model performance across independent populations and real-world settings. Among the AI-ECG studies, only four included external validation cohorts, which limits the strength of conclusions. Fewer studies were available for the estimation of specificity compared to sensitivity in analyses of AI-ECHO models. This imbalance may introduce bias in the pooled specificity estimate and limit its precision. Additionally, studies that reported specificity may differ methodologically or clinically from those that did not, potentially affecting the representativeness of the findings. As a result, conclusions drawn regarding the ability of AI-augmented ECHO to correctly identify patients without significant MR should be interpreted with caution. Conclusion In this systematic review and meta-analysis, we found that AI-based tools leveraging ECG and echocardiographic data show promise in predicting significant mitral regurgitation (MR). AI-ECG models offer high sensitivity but limited specificity, suggesting potential as a screening tool, while AI-ECHO demonstrates both high sensitivity and specificity, supporting its use in diagnostic workflows. Despite encouraging results, limited generalizability and low specificity of AI-ECG pose challenges for clinical implementation. Standardized datasets and external validation are needed to advance clinical adoption and integration into guidelines for diagnosis of MR. Data Availability All data produced in the present study are available upon reasonable request to the authors Footnotes Disclosures: All authors report no relationships that could be construed as a conflict of interest. All authors take responsibility for all aspects of the reliability and freedom from bias of the data presented and their discussed interpretation. ABBREVIATIONS AI Artificial Intelligence AUC Area under curve CMR Cardiac Magnetic Resonance DL Deep learning ECG Electrocardiogram ECHO Echocardiography EVS External validation set FCU Focused cardiac ultrasound ITS Internal test set ML Machine learning MR Mitral regurgitation POCUS Point-of-care-ultrasound PRISMA Preferred Reporting Items for Systematic Review and Meta-Analyses PROSPERO International Prospective Register of Systematic Reviews QUADAS Quality Assessment of Diagnostic Accuracy Studies sROC Summary receiver-operating characteristic curve References 1. ↵ Wu S , Chai A , Arimie S , Mehra A , Clavijo L , Matthews RV , Shavelle DM . Incidence and treatment of severe primary mitral regurgitation in contemporary clinical practice . Cardiovascular Revascularization Medicine . 2018 Dec 1 ; 19 ( 8 ): 960 – 3 . OpenUrl CrossRef PubMed 2. ↵ Douedi S , Douedi H. Mitral regurgitation . In StatPearls [Internet] 2024 Apr 30 . StatPearls Publishing . 3. ↵ Praz F , Borger MA , Lanz J , Marin-Cuartas M , Abreu A , Adamo M , Marsan NA , Barili F , Bonaros N , Cosyns B , De Paulis R . 2025 ESC/EACTS Guidelines for the management of valvular heart disease: Developed by the task force for the management of valvular heart disease of the European Society of Cardiology (ESC) and the European Association for Cardio-Thoracic Surgery (EACTS) . European Journal of Cardio-Thoracic Surgery . 2025 Aug ; 67 ( 8 ): ezaf276 . OpenUrl PubMed 4. ↵ Jeganathan J , Knio Z , Amador Y , Hai T , Khamooshian A , Matyal R , Khabbaz KR , Mahmood F . Artificial intelligence in mitral valve analysis . Annals of cardiac anaesthesia . 2017 Apr 1 ; 20 ( 2 ): 129 – 34 . OpenUrl PubMed 5. ↵ Lin YT , Lin CS , Tsai CS , Tsai DJ , Lou YS , Fang WH , Lee YT , Lin C . Comprehensive clinical application analysis of artificial intelligence-enabled electrocardiograms for screening multiple valvular heart diseases . Aging (Albany NY ). 2024 May 16 ; 16 ( 10 ): 8717 . OpenUrl PubMed 6. ↵ Dhingra LS , Aminorroaya A , Sangha V , Pedroso AF , Shankar SV , Coppi A , Foppa M , Brant LC , Barreto SM , Ribeiro AL , Krumholz HM . Ensemble deep learning algorithm for structural heart disease screening using electrocardiographic images: PRESENT SHD . Journal of the American College of Cardiology . 2025 Apr 1 ; 85 ( 12 ): 1302 – 13 . OpenUrl PubMed 7. ↵ Attia ZI , Kapa S , Lopez-Jimenez F , McKie PM , Ladewig DJ , Satam G , Pellikka PA , Enriquez-Sarano M , Noseworthy PA , Munger TM , Asirvatham SJ . Screening for cardiac contractile dysfunction using an artificial intelligence–enabled electrocardiogram . Nature medicine . 2019 Jan ; 25 ( 1 ): 70 – 4 . OpenUrl CrossRef PubMed 8. ↵ Kwon JM , Kim KH , Akkus Z , Jeon KH , Park J , Oh BH . Artificial intelligence for detecting mitral regurgitation using electrocardiography . Journal of electrocardiology . 2020 Mar 1 ; 59 : 151 – 7 . OpenUrl PubMed 9. ↵ Akkus Z , Aly YH , Attia IZ , Lopez-Jimenez F , Arruda-Olson AM , Pellikka PA , Pislaru SV , Kane GC , Friedman PA , Oh JK . Artificial intelligence (AI)-empowered echocardiography interpretation: a state-of-the-art review . Journal of clinical medicine . 2021 Mar 30 ; 10 ( 7 ): 1391 . OpenUrl PubMed 10. ↵ Sadeghpour A , Jiang Z , Hummel YM , Frost M , Lam CS , Shah SJ , Lund LH , Stone GW , Swaminathan M , Weissman NJ , Asch FM . An Automated Machine Learning–Based Quantitative Multiparametric Approach for Mitral Regurgitation Severity Grading . Cardiovascular Imaging . 2025 Jan 1 ; 18 ( 1 ): 1 – 2 . OpenUrl PubMed 11. ↵ Salameh JP , Bossuyt PM , McGrath TA , Thombs BD , Hyde CJ , Macaskill P , Deeks JJ , Leeflang M , Korevaar DA , Whiting P , Takwoingi Y. Preferred reporting items for systematic review and meta-analysis of diagnostic test accuracy studies (PRISMA-DTA): explanation, elaboration, and checklist . bmj . 2020 Aug 14 ; 370 . 12. ↵ Whiting PF , Rutjes AW , Westwood ME , Mallett S , Deeks JJ , Reitsma JB , Leeflang MM , Sterne JA , Bossuyt PM , QUADAS-2 Group*. QUADAS-2: a revised tool for the quality assessment of diagnostic accuracy studies . Annals of internal medicine. 2011 Oct 18 ; 155 ( 8 ): 529 – 36 . OpenUrl CrossRef PubMed Web of Science 13. ↵ Reitsma JB , Glas AS , Rutjes AW , Scholten RJ , Bossuyt PM , Zwinderman AH . Bivariate analysis of sensitivity and specificity produces informative summary measures in diagnostic reviews . Journal of clinical epidemiology . 2005 Oct 1 ; 58 ( 10 ): 982 – 90 . OpenUrl CrossRef PubMed Web of Science 14. ↵ Zhou Y , Dendukuri N . Statistics for quantifying heterogeneity in univariate and bivariate meta-analyses of binary data: the case of meta-analyses of diagnostic accuracy . Statistics in medicine . 2014 Jul 20 ; 33 ( 16 ): 2701 – 17 . OpenUrl CrossRef PubMed 15. ↵ Devillé WL , Buntinx F , Bouter LM , Montori VM , De Vet HC , Van der Windt DA , Bezemer PD . Conducting systematic reviews of diagnostic studies: didactic guidelines . BMC medical research methodology . 2002 Dec ; 2 : 1 – 3 . OpenUrl PubMed 16. ↵ Vaid A , Argulian E , Lerakis S , Beaulieu-Jones BK , Krittanawong C , Klang E , Lampert J , Reddy VY , Narula J , Nadkarni GN , Glicksberg BS . Multi-center retrospective cohort study applying deep learning to electrocardiograms to identify left heart valvular dysfunction . Communications Medicine . 2023 Feb 14 ; 3 ( 1 ): 24 . OpenUrl PubMed 17. ↵ Ulloa-Cerna AE , Jing L , Pfeifer JM , Raghunath S , Ruhl JA , Rocha DB , Leader JB , Zimmerman N , Lee G , Steinhubl SR , Good CW . rECHOmmend: an ECG-based machine learning approach for identifying patients at increased risk of undiagnosed structural heart disease detectable by echocardiography . Circulation . 2022 Jul 5 ; 146 ( 1 ): 36 – 47 . OpenUrl CrossRef PubMed 18. ↵ Shiraga T , Makimoto H , Kohlmann B , Magnisali CE , Imai Y , Itani Y , Makimoto A , Schölzel F , Bejinariu A , Kelm M , Rana O . Improving valvular pathologies and ventricular dysfunction diagnostic efficiency using combined auscultation and electrocardiography data: A multimodal AI approach . Sensors . 2023 Dec 14 ; 23 ( 24 ): 9834 . OpenUrl PubMed 19. ↵ Sakuma M , Suzuki S , Hirota N , Motogi J , Umemoto T , Nakai H , Matsuzawa W , Takayanagi T , Hyodo A , Satoh K , Arita T . Utility of convolutional neural network-enhanced electrocardiogram to diagnose and predict mitral regurgitation in patients with chronic atrial fibrillation . Heart and Vessels . 2025 May 15 : 1 – 2 . 20. ↵ Edwards LA , Feng F , Iqbal M , Fu Y , Sanyahumbi A , Hao S , McElhinney DB , Ling XB , Sable C , Luo J . Machine learning for pediatric echocardiographic mitral regurgitation detection . Journal of the American Society of Echocardiography . 2023 Jan 1 ; 36 ( 1 ): 96 – 104 . OpenUrl CrossRef PubMed 21. ↵ Moghaddasi H , Nourian S . Automatic assessment of mitral regurgitation severity based on extensive textural features on 2D echocardiography videos . Computers in biology and medicine . 2016 Jun 1 ; 73 : 47 – 55 . OpenUrl PubMed 22. Brown K , Roshanitabrizi P , Rwebembera J , Okello E , Beaton A , Linguraru MG , Sable CA . Using artificial intelligence for rheumatic heart disease detection by echocardiography: Focus on mitral regurgitation . Journal of the American Heart Association . 2024 Jan 16 ; 13 ( 2 ): e031257 . OpenUrl PubMed 23. Vrudhula A , Duffy G , Vukadinovic M , Liang D , Cheng S , Ouyang D . High-throughput deep learning detection of mitral regurgitation . Circulation . 2024 Sep 17 ; 150 ( 12 ): 923 – 33 . OpenUrl PubMed 24. Yang F , Zhu J , Wang J , Zhang L , Wang W , Chen X , Lin X , Wang Q , Burkhoff D , Zhou SK , He K . Self-supervised learning assisted diagnosis for mitral regurgitation severity classification based on color Doppler echocardiography . Annals of Translational Medicine . 2022 Jan ; 10 ( 1 ): 3 . OpenUrl PubMed 25. Zhang Q , Liu Y , Mi J , Wang X , Liu X , Zhao F , Xie C , Cui P , Zhang Q , Zhu X . Automatic Assessment of Mitral Regurgitation Severity Using the Mask R-CNN Algorithm with Color Doppler Echocardiography Images . Computational and Mathematical Methods in Medicine . 2021 ; 2021 ( 1 ): 2602688 . OpenUrl 26. Yang F , Chen X , Lin X , Chen X , Wang W , Liu B , Li Y , Pu H , Zhang L , Huang D , Zhang M . Automated analysis of Doppler echocardiographic videos as a screening tool for valvular heart diseases . Cardiovascular Imaging . 2022 Apr 1 ; 15 ( 4 ): 551 – 63 . OpenUrl PubMed 27. ↵ Zhong L , Deng Q , Wang Y , Song H , Chen J , Zhou Q , Xiao J , Cao S . A fully convolutional neural network for the quantification of mitral regurgitation in echocardiography . Quantitative Imaging in Medicine and Surgery . 2024 Nov 11 ; 14 ( 12 ): 8707 . OpenUrl 28. ↵ East SA , Wang Y , Yanamala N , Maganti K , Sengupta PP . Artificial Intelligence-Enabled Point-of-Care Echocardiography: Bringing Precision Imaging to the Bedside . Current Atherosclerosis Reports . 2025 Dec ; 27 ( 1 ): 70 . OpenUrl CrossRef PubMed 29. ↵ Asch FM , Poilvert N , Abraham T , Jankowski M , Cleve J , Adams M , Romano N , Hong H , Mor-Avi V , Martin RP , Lang RM . Automated echocardiographic quantification of left ventricular ejection fraction without volume measurements using a machine learning algorithm mimicking a human expert . Circulation: Cardiovascular Imaging . 2019 Sep ; 12 ( 9 ): e009303 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted November 17, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Artificial Intelligence for Significant Mitral Regurgitation Screening and Diagnosis: A Systematic Review and Meta-analysis Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Artificial Intelligence for Significant Mitral Regurgitation Screening and Diagnosis: A Systematic Review and Meta-analysis Udochukwu Godswill Anosike , Luena Seferasi , Marian Abedua Harrison , Ramiro Julian Nin Albonico , Sonia Ijeoma Etumudon , Leonardo Antunes Mesquita medRxiv 2025.11.16.25340343; doi: https://doi.org/10.1101/2025.11.16.25340343 Share This Article: Copy Citation Tools Artificial Intelligence for Significant Mitral Regurgitation Screening and Diagnosis: A Systematic Review and Meta-analysis Udochukwu Godswill Anosike , Luena Seferasi , Marian Abedua Harrison , Ramiro Julian Nin Albonico , Sonia Ijeoma Etumudon , Leonardo Antunes Mesquita medRxiv 2025.11.16.25340343; doi: https://doi.org/10.1101/2025.11.16.25340343 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Cardiovascular Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffb82776ee858d3',t:'MTc3OTQ0OTY0NA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00