Machine learning to classify left ventricular hypertrophy using ECG feature extraction by variational autoencoder

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 44,940 characters · extracted from preprint-html · click to expand
Machine learning to classify left ventricular hypertrophy using ECG feature extraction by variational autoencoder | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Machine learning to classify left ventricular hypertrophy using ECG feature extraction by variational autoencoder View ORCID Profile Amulya Gupta , Christopher J. Harvey , Ashley DeBauge , Sumaiya Shomaji , Zijun Yao , Yongkuk Lee , View ORCID Profile Amit Noheria doi: https://doi.org/10.1101/2024.10.14.24315460 Amulya Gupta 1 Department of Cardiovascular Medicine, The University of Kansas Medical Center , Kansas City, Kansas MBBS Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Amulya Gupta Christopher J. Harvey 1 Department of Cardiovascular Medicine, The University of Kansas Medical Center , Kansas City, Kansas MBBS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ashley DeBauge 2 Department of Internal Medicine, Washington University School of Medicine , St. Louis, Missouri, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sumaiya Shomaji 3 Department of Electrical Engineering and Computer Science, The University of Kansas , Lawrence, Kansas PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Zijun Yao 3 Department of Electrical Engineering and Computer Science, The University of Kansas , Lawrence, Kansas PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yongkuk Lee 4 Department of Biomedical Engineering, Wichita State University , Wichita, Kansas PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Amit Noheria 1 Department of Cardiovascular Medicine, The University of Kansas Medical Center , Kansas City, Kansas MBBS, SM Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Amit Noheria For correspondence: noheriaa{at}gmail.com Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF ABSTRACT Background Traditional ECG criteria for left ventricular hypertrophy (LVH) have modest diagnostic yield. Objective Develop and validate machine learning models for LVH diagnosis from ECG. Methods ECG summary features (rate, intervals, axis), R-wave, S-wave and overall-QRS amplitudes, and QRS voltage-time integrals (VTI QRS ) were extracted from 12-lead, vectorcardiographic X-Y-Z-lead, and 3D (L2 norm) representative-beat ECGs. Latent features (30 per ECG) were extracted using a variational autoencoder (trained on unselected >1 million ECGs) from X-Y-Z-lead representative-beat ECG signals. Logistic regression, random forest, light gradient boosted machine (LGBM), residual network (ResNet) and multilayer perceptron network (MLP) models using ECG features and sex, and a convolutional neural network (CNN) using ECG signals alone, were trained to predict LVH (left ventricular mass indexed in women >95 g/m 2 , men >115 g/m 2 ) on 482,734 adult ECG-echocardiogram (within 45 days) pairs. ROC-AUCs for LVH classification are reported from a separate hold-out test set. Results In the test set (n=54,984), AUC for LVH classification was higher for ML models using ECG features (LGBM 0.794, MLP 0.793, ResNet 0.795) compared with the best individual ECG variable (VTI QRS-Z 0.707), the best traditional criterion (Cornell voltage-duration product 0.716), and the CNN using ECG signals (0.788). Among patients without LVH who had a follow-up echocardiogram >1 (closest to 5) year later, LGBM false positives, compared to true negatives, had a 3.07 (95% CI 2.44, 3.86)-fold higher odds of developing future LVH (p<0.0001). Conclusions ML models are superior to traditional ECG criteria to classify LVH. Models trained on extracted ECG features, including latent variational autoencoder representations, can outperform CNN models directly trained on ECG signals. INTRODUCTION Left ventricular hypertrophy (LVH) refers to increased left ventricular mass, characterized by an increase in left ventricular wall thickness and/or enlargement of the left ventricular cavity. This is often secondary to pathological or physiological stressors such as chronic hypertension, valvular heart disease, athletic training, or genetic conditions. LVH is associated with over a two-fold increase in cardiovascular morbidity and all-cause mortality ( 1 ). Early detection and initiation of pharmacological treatment and lifestyle modifications can improve outcomes ( 2 ). Transthoracic echocardiography is the standard-of-care for the diagnosis of LVH. While non-invasive and widely available, echocardiography-based universal screening—even among high-risk populations, such as those with hypertension—is not cost-effective ( 3 , 4 ). Electrocardiography (ECG) is an affordable, widely accessible, and frequently used diagnostic tool for cardiovascular screening. Often considered an extension of the cardiovascular physical examination, it is estimated that over 100-300 million ECGs are performed annually in the United States ( 5 ). Traditionally, LVH diagnosis via 12-lead ECG relies on voltage-based criteria, yet these show poor sensitivity, limiting their utility as standalone screening tools ( 6 – 8 ). Machine learning (ML) can reduce reliance on human interpretation and yet increase the diagnostic accuracy of ECG ( 9 , 10 ). Several ECG-based ML models have been developed for detecting LVH, with varying sensitivities and specificities ( 11 ). Many of these studies use convolutional neural network (CNN) deep learning architectures to train models using ECG signals often with fewer than 10,000 training ECGs. Given that each 12-lead 10-second ECG signal at 500 Hz consists of 60,000 data points, using such a high-dimensionality input for ML training with a limited number of samples can result in overfitting and reduced generalizability ( 12 – 14 ). On the other hand, non-neural network ML architectures, such as logistic regression, random forest, gradient boosted machine, are not suited to use high-dimensional ECG signal data as input and are usually limited to using extracted ECG features, which may result in loss of diagnostic information ( 14 ). To mitigate these limitations, while preserving the advantages of deep learning, we developed a variational autoencoder (VAE) that can encode 0.75-sec-representative-beat from either X-Y-Z-lead or root-mean-squared ECG into 30 variables ( 14 – 16 ). These VAE latent encodings retain the ECG morphological information and can reconstruct back the ECG signal with high fidelity. In this study, we aimed to train and test different ML models using extracted ECG features including the latent encodings or the ECG signal to classify LVH from the representative-beat ECG. METHODS Patient selection and data retrieval An automated retrospective retrieval of records was performed from our clinical database at the University of Kansas Medical Center between May 2010 and January 2022 to search for ECG and echocardiogram performed on the same patient within 45 days of each other. Echocardiograms-ECG pairs with echocardiographic left ventricular mass index (LVMi) >95 g/m 2 for females and >115 g/m 2 for males were labelled as ‘LVH’ while rest of the pairs were assigned to the ‘no LVH’ group ( 14 ). The study was conducted with approval from the Institutional Review Board. Data extraction ECGs were acquired from the Philips 12-lead ECG system. The 12-lead 1200-ms representative-beat signals along with standard features like heart rate, PR interval, etc. were extracted. Echocardiograms were standard clinical studies performed for clinical indications both as outpatient and inpatient evaluations. Individual echocardiogram numeric variables including diastolic measurements of left ventricular internal diameter (LVIDd), interventricular septum (IVSd) and posterior wall (PWd) from 2D parasternal long-axis view were extracted using a backend query in HERON (Healthcare Enterprise Repository for Ontological Narration), a search discovery tool that facilitates searches on various hospital electronic data sources ( 17 , 18 ). Left ventricular mass was calculated using the American Society of Echocardiography recommended formula: 0.8 × 1.04[(LVIDd + IVSd + PWd) 3 and indexed to body surface area ( 19 ). ECG processing The details of ECG processing performed using Python are provided in prior publications ( 14 , 20 , 21 ). In summary, vectorcardiographic X-Y-Z-lead ECGs were constructed from 12-lead ECGs using Kors’ matrix ( 22 ). L2 norm of these orthogonal leads signal was used to derive the 3D-ECG lead. Voltage-time integrals of QRS (VTI QRS ) were obtained by the integration of the instantaneous voltage over the duration of QRS. R and S amplitudes were calculated as the maximum positive and negative deflections respectively of the QRS complex from the isoelectric line. Overall QRS amplitudes were calculated as the maximum absolute difference between the R and S deflections. Traditional Criteria and Univariable Models Based on review of literature, we selected 5 widely used ECG-based LVH diagnostic criteria for comparison, i.e. Peguero-Lo Presti criteria (max S + S v4 ), Cornell voltage (R avL + S v3 ), Cornell voltage-duration product (VDP), Sokolow-Lyon criteria (S V1 + max R (V5 or V6) ), and Gubner-Ungerleider critera (R I + S III ) ( 23 ). We also selected ECG features for comparison namely QRS duration, Amplitude QRS-3D , and VTI QRS-3D ( 20 , 21 , 23 ). The latter 2 were calculated off the QRS from the L2 norm/3D ECG. In addition, Amplitude QRS and VTI QRS in the X,Y, and Z projections were included. Variational Autoencoder We trained a variational autoencoder (VAE) on 1.18 million unlabeled ECG signals to encode a 0.75-sec segment centered on the 1.2-sec representative beat ECG signal into 30 variables. The VAE has a dual neural network architecture with the encoder taking the ECG input and outputting 30 latent variables, and the decoder inputting the 30 latent variables and outputting the ECG signal. The network was rewarded in training to encode the signal such as to learn accurate reconstruction of the original signal from the latent variables alone. Our VAEs were able to reconstruct the original signal back from the latent variables with high fidelity ( 15 , 16 , 24 ). The X-Y-Z-lead representative-beat ECGs included in this study were processed using this VAE to generate latent encodings or variables. Model Input For LVH diagnosis, these features were available for model training: Summary features like heart rate, PR interval, QRS duration, corrected QT interval ( 25 ), frontal plane QRS axis, etc. From 16 leads, each of 12-leads, 3 X-Y-Z-leads and 1 L2 norm/3D ECG, we obtained QRS amplitudes, VTI QRS , VTI QRST , R-wave amplitudes, S-wave amplitudes. 30 latent variables each from VAEs trained to reconstruct the X-Y-Z-lead and L2 norm/3D representative-lead ECGs. Sex Model Training and Testing Approximately 10% of the medical record numbers in the dataset were withheld as the testing set, and the remainder were used for model training. We trained the following ML architectures on the training set: logistic regression, random forests, light gradient boosted machine (LGBM), residual neural network (ResNet), multilayered perceptron (MLP) and CNN. All the models were trained on the above mentioned features except for CNN, which was trained directly on the representative-beat X-Y-Z-lead ECG signal. Sex was provided to the models as the definition of LVH is sex specific. The results are reported from the performance of the trained models in the holdout test set. We also report the models’ performance in 4 subgroups based on intraventricular conduction, QRS duration ≤120 ms, typical right bundle branch block (RBBB, QRS duration >120), typical left bundle branch block (LBBB, QRS duration >120 ms), and interventricular conduction delay (IVCD, QRS duration >120 ms but not meeting either RBBB or LBBB criteria). The American Heart Association-American College of Cardiology Foundation-Heart Rhythm Society criteria were used for classifying bundle branch blocks ( 26 ). Statistical analysis Continuous variables are reported as mean ± standard deviation, and categorical variables as percentages. Comparisons were made using Student’s t-test for continuous variables and 2 -test for categorical variables. ECG processing and model training was performed in Python (version 3.12.7). Statistical analysis was conducted in R (version 4.4.1), and a two-tailed p-value of less than 0.05 was considered statistically significant. RESULTS Patient characteristics A total of 537,718 ECG-echocardiogram pairs from 89,145 patients were included, with 237,592 (44.2%) pairs belonging to females. The mean age of the overall population of ECG-echocardiogram samples was 65.0 ± 15.4 years. This dataset was split into 90% training ( n = 482,734) and 10% ( n = 54,984) testing sets. In the training set, 70,659 (33.1%) of the female samples and 77,777 (28.9%) male samples had echocardiographic LVH. In the testing set, 8,051 (33.6%) female samples and 8,793 (28.4%) male samples had LVH. The detailed distributions of the ECG and echocardiographic variables in the testing set are shown in Table 1 and for the training set in Supplementary Table 1 . As shown in Figure 1 , the testing samples were divided into 4 subgroups i.e. narrow QRS <120 ms ( n = 39,936; 72.6%), typical RBBB ( n = 6,731; 12.2%), typical LBBB ( n = 6,372; 11.6%), and IVCD ( n = 1,945; 3.5%). Download figure Open in new tab Figure 1. Data pipeline for model development and testing. View this table: View inline View popup Download powerpoint Table 1. Patient characteristics of the validation set. LVH classification models The testing set performance of the univariable models, traditional criteria and ML models is summarized in Table 2 . Model performance statistics in QRS morphological subgroups are available Supplementary Table 2A-D . View this table: View inline View popup Download powerpoint Table 2. Model performance for LVH prediction in the entire validation set. Area under receiver-operating characteristic curve (AUC) and sensitivity at specificity fixed at 0.75 are provided. Univariable models Amongst the linear univariable models, VTI QRS-Z was the best predictor of LVH in the overall population, with an AUC 0.707 which was closely followed by VTI QRS-3D with AUC 0.704. Furthermore, VTI QRS-Z demonstrated the highest performance in the narrow QRS subgroup (AUC 0.704), whereas VTI QRS-3D performed best in the typical RBBB subgroup (AUC 0.683). The best performing criteria in typical LBBB was Amplitude QRS-3D (0.655) and in IVCD was QRS duration (0.677). Traditional criteria Overall, the performance of traditional ECG criteria for predicting LVH was fair, with AUCs ranging from 0.545 to 0.716. Cornell VDP was the best performing criteria in all groups except typical LBBB (AUC overall 0.716; narrow QRS 0.713, typical RBBB 0.678, IVCD 0.694). In the typical LBBB subgroup, Peguero-Lo Presti criteria (0.651) performed the best. In general, these criteria performed better in females as compared to males. ML Models All ML models outperformed the traditional criteria and univariate models. LGBM (AUC 0.794), MLP (0.793) and ResNet (0.795), which were trained on ECG features including VAE latent encodings and sex, were the best performing models in the overall population. The CNN model, which was trained on the raw ECG signal alone, demonstrated an AUC 0.788. The ROC curves, separately for females and males, for the top 4 ML models vis-à-vis the best univariable and best traditional criteria are plotted in Figure 2 . Download figure Open in new tab Figure 2. ROC curves for machine learning models from the entire testing set for males (left panel) and females (right panel). ROC curves for VTI QRS-3D , Cornell VDP, and LGBM for overall test set and each of the 4 subgroups separately for females and males are shown in Figure 3 and 4 . Download figure Open in new tab Figure 3. ROC curves for best peforming LVH models – univariate (VTI¬QRS-Z), traditional criteria (Cornell VDP), and machine learning (LGBM) from the entire testing set for females (left) and males (right). Download figure Open in new tab Figure 4. ROC curves for best peforming LVH models – univariate (VTI¬QRS-Z), traditional criteria (Cornell VDP), and machine learning (LGBM) from the wide QRS subgroups (> 120 ms) for females (left) and males (right). Linear analysis of LGBM prediction probabilities LVMi was plotted against the prediction probabilities output generated by LGBM model for females and males as shown in Figure 5 . A strong linear trend between prediction probabilities and LVMi can be noted for both females and males (respectively R 2 0.34 and 0.26; correlation coefficient 0.582 and 0.508). Download figure Open in new tab Figure 5. Scatterplots of echocardiographic left ventricular mass indexed (LVMi) plotted against prediction probabilities from the LGBM model for females (left panel) and males (right panel). Longitudinal analysis of LVH negatives Among false positives and true negatives produced by the LGBM model in the validation set, we searched for the ECG-echocardiogram pairs where a follow-up echocardiogram >1 year and closest to 5 years later was available for further analysis. We used a 2×2 table to compare the development of LVH in 612 false-positive as compared to the 1,543 true-negative samples. On mean follow-up of 3.3 ± 1.7 years, 189/612 (30.9%) patients in false-positive group, and 196/1543 (12.7%) patients in true-negative group developed LVH. The odds ratio for development of LVH was 3.07 (95% CI 2.44, 3.86, p<0.0001) in false-positives compared to true-negatives from the LGBM model ( Table 3 ). View this table: View inline View popup Download powerpoint Table 3. Comparison between presence of LVH on subsequent echocardiogram (>1 year and closest to 5 years after index echocardiogram) in false positives versus true negatives of LVH LGBM model in testing set. DISCUSSION To the best of our knowledge, this study represents the largest evaluation of ECG criteria and ML models for LVH prediction to date. We have applied the innovative framework of using DL-based latent space ECG encodings for building ML models, which allows simpler models to make accurate predictions without overfitting. Salient findings First, traditional ECG-based criteria demonstrated moderate utility in diagnosing LVH, with the Cornell VDP showing the highest discrimination among them (AUC 0.716). Second, univariable criteria including VTI QRS-Z and VTI QRS-3D were at par with traditional criteria for the diagnosis of LVH, with VTI QRS-Z achieving the best overall results (AUC 0.707). Third, our ML models outperform both traditional and univariable models, with LGBM, ResNet, and MLP demonstrating the highest performance (AUC 0.793-0.795). Last, the performance of traditional, univariable, and ML models vary across sex and QRS morphologies. Furthermore, the LGBM model trained on ECG latent encodings and features captured the underlying trend of cardiac mass, not just the LVH labels, showing strong correlation with LVMi and predicting future development of LVH. Univariable models Previous studies have demonstrated the utility of linear univariable predictors of LVH, such as QRS duration and area ( 21 , 27 ). In our analysis, we evaluated QRS duration and vectorcardiographic amplitudes/voltage-time integrals for predicting LVH across various subgroups. Among them, VTI QRS-Z and VTI QRS-3D emerged as the best overall criteria. Similar to Cornell VDP, both voltage and duration components of the QRS complex are incorporated in these criteria. Prolongation of the QRS duration in LVH is likely attributable to delays in ventricular activation associated with increased wall thickness and frequent conduction abnormalities. Furthermore, since VTI QRS-3D is calculated from the reconstructed 3D-orthogonal leads, it ostensibly captures the sum of net surface potentials produced by the ventricular depolarization wavefront in a cardiac cycle. The absolute value of surface potentials, especially in Z-axis (sagittal axis) are likely to be increased due to LVH as the mean depolarization vector shifts leftwards and posteriorly ( 28 ). Consequently, VTI QRS-Z and VTI QRS-3D are likely to be increased in LVH and could be explored as a simple univariable predictor of LVH. Traditional ECG criteria As demonstrated in previous studies, our analysis reaffirmed the modest-to-fair discrimination of LVH offered by standard electrocardiographic criteria using a large dataset ( 29 , 30 ). Unlike other voltage-based rules, Cornell VDP, which emerged as the best overall criterion, accounts for both QRS voltage and duration in its calculation, both of which are affected in LVH ( 31 ). In the subset of ECGs with LBBB, Peguero-Lo Presti criteria performed better than Cornell VDP. Although the difference in performance was marginal, if this trend is physiological, it could be explained by obfuscation of LVH-related changes in QRS duration due to QRS prolongation inherent to conduction delays in LBBB. However, this cannot be verified in our study. Additionally, compared to the combined population, individual criteria generally performed better in females and males separately. This underscores the importance of using different cut-off values for females and males, recognizing the sex-based differences in ECGs and definition of LVH ( 29 , 30 ). ML models We tested several ML architectures for LVH prediction, including simple models (LR), tree-based models (RF, LGBM), and neural networks (ResNet, MLP, and CNN). The best overall performance was shown by LGBM, ResNet, and MLP with AUCs ∼0.79. Performance declined across all models in the subgroups with conduction abnormalities. Interestingly, these models, despite only using the summary ECG features and VAE encodings, performed slightly better than the CNN model, which had access to the raw signal data. The use of the LGBM architecture for such classification tasks is pragmatic for two main reasons. First, as an implementation of gradient boosting decision trees, it enables efficient model training with relatively low computational resources. Second, its decision-making process is inherently interpretable, as feature importance can be evaluated using metrics like split count and gain. We further evaluated the interpretability and physiological relevance of the LGBM model. First, we plotted the prediction probabilities from this model against LVMi, which showed a strong linear positive correlation, suggesting that the model captures meaningful physiological patterns rather than artificial class boundaries. Second, we analyzed the false positives produced by this model for future development of LVH, finding that the false positives had 3 times the odds of developing LVH in the future compared to true negatives. This indicates that the model may capture underlying ECG abnormalities even before patients meet the criteria for overt LVH diagnosis. Previous literature In a recently published study from China, Zhu et al. used a large dataset comprising of over 90,000 ECGs to create deep learning multilabel classifier algorithms. They achieved AUCs ranging from 0.78-0.92 using their 12-lead model and showed that a reduced 4-lead model using lead I, aVR, V1 and V5 had equivalent performance ( 32 ). In a Taiwanese study, Liu et al. developed a deep learning model for predicting LVH using approximately 23,000 training samples ( 33 ). They achieved high AUCs ranging from 0.83-0.89 across different validation sets. However, the definition of LVH used in this study was different, using LV mass >186 g for females and >258 g for males. In a South Korean study, Kwon et al. developed an ensemble deep neural network + CNN model using approximately 36,000 training samples, combining information from ECG signal, ECG features, and patient demographics ( 34 ). While using higher cut-off values for LVMi (109 g/m 2 females and 132 g/m 2 males), their model achieved AUCs ranging from 0.87-0.88 in validation sets. In a study from Massachusetts General Hospital, Haimovich et al. create ML models for predicting LVH in specific disease populations like cardiac amyloidosis, hypertrophic cardiomyopathy, aortic stenosis, and others using a total of 34,258 training samples ( 35 ). Similar to our approach, they used a pretrained deep learning model to produce latent encodings and trained a simpler classifier for LVH classification although they used full 10-second ECG signal instead of representative beat ECG. Their model achieved AUCs ranging from 0.69 to 0.96 in various subgroups. Khurshid et al. used data from the UK Biobank to create a CNN model trained on 32,000 samples and achieved AUCs ranging from 0.62 to 0.65 in predicting LVH. Owing to heterogeneity in study populations, data structures, and labels for LVH, it is difficult to evaluate the performance of models across studies. Nonetheless, the AUCs attained by ML models in our study are comparable to previous work. Limitations Our work is best understood in the context of its limitations. Both training and testing sets for the models were from a single center, and these models might have sub-optimal performance when generalized to other datasets. The inclusion of in-hospital ECGs may have biased our dataset towards sicker patients, who often get multiple ECGs in each hospital stay. Further, since the median beat ECGs were derived from a proprietary system, additional steps may be required in processing ECGs from other systems. Additionally, to calculate ECG parameters for traditional criteria and univariate models, automated feature extraction was done, which might not be as accurate as expert-created labels. CONCLUSIONS Traditional voltage-based ECG criteria demonstrate only modest performance in detecting LVH. Simple univariable models, especially VTI QRS-Z , may perform at par with traditional criteria. Regardless, ML techniques can significantly enhance the accuracy of ECG-based diagnosis of LVH over both traditional voltage-based criteria and univariable models. Our findings highlight the utility of dimensionality reduction via variational autoencoders, which enables the application of non-deep learning ML models to high-dimensional ECG data without compromising performance. These approaches offer both interpretability and scalability, suggesting potential for future clinical integration. Further external validation and testing is needed for clinical utilization of these ML models. Data Availability The data supporting findings of this study were obtained from our institutional database that contains identifiable patient information. Access to the data is restricted and subject to approval by the institutional review board. Researchers interested in accessing the data may contact the corresponding author for information about the necessary procedures and approvals required. Footnotes Conflict of interest: None Funding acknowledgment: This work was supported by the Department of Cardiovascular Medicine at The University of Kansas Medical Center, the American Heart Association (AHA) Transformational Project Award ( https://doi.org/10.58275/AHA.24TPA1291852.pc.gr.196660 ) granted to Amit Noheria, and Clinical and Translational Science Award (CTSA) grant from National Center for Advancing Translational Sciences (NCATS) awarded to The University of Kansas for Frontiers: University of Kansas Clinical and Translational Science Institute (UL1TR002366). The content is solely the responsibility of the authors and does not necessarily represent the official views of the AHA, NCATS or National Institutes of Health (NIH). Data availability: The data used in this study is part of an institutional research database with access restricted to institutional review board-approved researchers. The data cannot be shared publicly or made available to researchers at other institutions without a data use agreement. Updated the data and ECG processing pipeline, leading to small changes in results. Added evaluation of X,Y,and Z QRS-VTI and QRS-Amplitudes for detecting LVH. Abbreviations ECG electrocardiogram LVH left ventricular hypertrophy ML machine learning AI artificial intelligence MLP multilayered perceptron LGBM light gradient-boosting machine AUC area under the receiver operator characteristic curve VAE variational Autoencoder LVMi left ventricular mass indexed REFERENCES 1. ↵ Vakili BA , Okin PM , Devereux RB . Prognostic implications of left ventricular hypertrophy . Am Heart J 2001 ; 141 : 334 – 41 . OpenUrl CrossRef PubMed Web of Science 2. ↵ Sayin BY , Oto A . Left Ventricular Hypertrophy: Etiology-Based Therapeutic Options . Cardiol Ther 2022 ; 11 : 203 – 230 . OpenUrl PubMed 3. ↵ Cuspidi C , Meani S , Valerio C , Fusi V , Sala C , Zanchetti A . Left ventricular hypertrophy and cardiovascular risk stratification: impact and cost-effectiveness of echocardiography in recently diagnosed essential hypertensives . Journal of Hypertension 2006 ; 24 . 4. ↵ Whelton PK , Carey RM , Aronow WS et al. 2017 ACC/AHA/AAPA/ABC/ACPM/AGS/APhA/ASH/ASPC/NMA/PCNA Guideline for the Prevention, Detection, Evaluation, and Management of High Blood Pressure in Adults: A Report of the American College of Cardiology/American Heart Association Task Force on Clinical Practice Guidelines . Hypertension 2018 ; 71 : e13 – e115 . OpenUrl CrossRef 5. ↵ Tison GH , Zhang J , Delling FN , Deo RC . Automated and Interpretable Patient ECG Profiles for Disease Detection , Tracking, and Discovery. Circulation: Cardiovascular Quality and Outcomes 2019 ; 12 : e005289 . OpenUrl 6. ↵ Ricciardi D , Vetta G , Nenna A et al. Current diagnostic ECG criteria for left ventricular hypertrophy: is it time to change paradigm in the analysis of data? Journal of Cardiovascular Medicine 2020 ; 21 . 7. Leese PJ , Viera AJ , Hinderliter AL , Stearns SC . Cost-Effectiveness of Electrocardiography vs. Electrocardiography Plus Limited Echocardiography to Diagnose LVH in Young, Newly Identified, Hypertensives . American Journal of Hypertension 2010 ; 23 : 592 – 598 . OpenUrl CrossRef PubMed 8. ↵ Hancock EW , Deal BJ , Mirvis DM et al. AHA/ACCF/HRS recommendations for the standardization and interpretation of the electrocardiogram: part V: electrocardiogram changes associated with cardiac chamber hypertrophy: a scientific statement from the American Heart Association Electrocardiography and Arrhythmias Committee, Council on Clinical Cardiology; the American College of Cardiology Foundation; and the Heart Rhythm Society: endorsed by the International Society for Computerized Electrocardiology . Circulation 2009 ; 119 : e251 – 61 . OpenUrl FREE Full Text 9. ↵ Ose B , Sattar Z , Gupta A , Toquica C , Harvey C , Noheria A . Artificial Intelligence Interpretation of the Electrocardiogram: A State-of-the-Art Review . Curr Cardiol Rep 2024 ; 26 : 561 – 580 . OpenUrl CrossRef PubMed 10. ↵ Ranka S , Reddy M , Noheria A . Artificial intelligence in cardiovascular medicine . Curr Opin Cardiol 2021 ; 36 : 26 – 35 . OpenUrl CrossRef PubMed 11. ↵ Siranart N , Deepan N , Techasatian W et al. Diagnostic accuracy of artificial intelligence in detecting left ventricular hypertrophy by electrocardiograph: a systematic review and meta-analysis . Scientific Reports 2024 ; 14 : 15882 . OpenUrl PubMed 12. ↵ Ying X . An Overview of Overfitting and its Solutions . Journal of Physics: Conference Series 2019 ; 1168 : 022022 . OpenUrl 13. Kligfield P , Gettes LS , Bailey JJ et al. Recommendations for the standardization and interpretation of the electrocardiogram: part I: the electrocardiogram and its technology a scientific statement from the American Heart Association Electrocardiography and Arrhythmias Committee, Council on Clinical Cardiology; the American College of Cardiology Foundation; and the Heart Rhythm Society endorsed by the International Society for Computerized Electrocardiology . J Am Coll Cardiol 2007 ; 49 : 1109 – 27 . OpenUrl FREE Full Text 14. ↵ Harvey CJ , Shomaji S , Yao Z , Noheria A . Comparison of Autoencoder Encodings for ECG Representation in Downstream Prediction Tasks . arXiv preprint 2024 :2410.02937. 15. ↵ Harvey C , Noheria A . DEEP LEARNING ENCODED ECG – AVOIDING OVERFITTING IN ECG MACHINE LEARNING . Journal of the American College of Cardiology 2024 ; 83 : 172 – 172 . OpenUrl 16. ↵ Harvey C , Noheria A . REDUCING DATA DIMENSIONALITY OF ECG SIGNAL USING DEEP LEARNING . Journal of the American College of Cardiology 2024 ; 83 : 26 – 26 . OpenUrl 17. ↵ Murphy SN , Weber G , Mendis M et al. Serving the enterprise and beyond with informatics for integrating biology and the bedside (i2b2) . J Am Med Inform Assoc 2010 ; 17 : 124 – 30 . OpenUrl CrossRef PubMed 18. ↵ Waitman LR , Warren JJ , Manos EL , Connolly DW . Expressing observations from electronic medical record flowsheets in an i2b2 based clinical data repository to support research and quality improvement . AMIA Annu Symp Proc 2011; 2011 : 1454 – 63 . 19. ↵ Lang RM , Badano LP , Mor-Avi V et al. Recommendations for cardiac chamber quantification by echocardiography in adults: an update from the American Society of Echocardiography and the European Association of Cardiovascular Imaging . J Am Soc Echocardiogr 2015 ; 28 : 1 – 39 e14. OpenUrl CrossRef PubMed 20. ↵ Fairbank T , DeBauge A , Harvey CJ et al. Electrocardiographic Z-axis QRS-T voltage-time-integral in patients with typical right bundle branch block - Correlation with echocardiographic right ventricular size and function . J Electrocardiol 2024 ; 82 : 73 – 79 . OpenUrl PubMed 21. ↵ DeBauge A , Fairbank T , Harvey CJ et al. Electrocardiographic prediction of left ventricular hypertrophy in women and men with left bundle branch block - Comparison of QRS duration, amplitude and voltage-time-integral . J Electrocardiol 2023 ; 80 : 34 – 39 . OpenUrl CrossRef PubMed 22. ↵ Kors JA , van Herpen G , Sittig AC , van Bemmel JH . Reconstruction of the Frank vectorcardiogram from standard electrocardiographic leads: diagnostic comparison of different methods . Eur Heart J 1990 ; 11 : 1083 – 92 . OpenUrl CrossRef PubMed Web of Science 23. ↵ DeBauge A , Harvey CJ , Gupta A et al. Evaluation of electrocardiographic criteria for predicting left ventricular hypertrophy and dilation in presence of left bundle branch block . Journal of Electrocardiology 2024 ; 87 : 153787 . OpenUrl CrossRef PubMed 24. ↵ Harvey CJ , Shomaji S , Yao Z , Noheria A. Comparison of Autoencoder Encodings for ECG Representation in Downstream Prediction Tasks: arXiv . 25. ↵ Fridericia LS . Die Systolendauer im Elektrokardiogramm bei normalen Menschen und bei Herzkranken . Acta Medica Scandinavica 1920 ; 53 : 469 – 486 . OpenUrl CrossRef 26. ↵ Surawicz B , Childers R , Deal BJ et al. AHA/ACCF/HRS recommendations for the standardization and interpretation of the electrocardiogram: part III: intraventricular conduction disturbances: a scientific statement from the American Heart Association Electrocardiography and Arrhythmias Committee, Council on Clinical Cardiology; the American College of Cardiology Foundation; and the Heart Rhythm Society: endorsed by the International Society for Computerized Electrocardiology . Circulation 2009 ; 119 : e235 – 40 . OpenUrl FREE Full Text 27. ↵ Okin PM , Roman MJ , Devereux RB , Kligfield P . Time-Voltage Area of the QRS for the Identification of Left Ventricular Hypertrophy . Hypertension 1996 ; 27 : 251 – 258 . OpenUrl 28. ↵ Peguero JG , Lo Presti S , Perez J , Issa O , Brenes JC , Tolentino A . Electrocardiographic Criteria for the Diagnosis of Left Ventricular Hypertrophy . J Am Coll Cardiol 2017 ; 69 : 1694 – 1703 . OpenUrl FREE Full Text 29. ↵ Fragola PV , Autore C , Ruscitti G , Picelli A , Cannata D. Electrocardiographic diagnosis of left ventricular hypertrophy in the presence of left bundle branch block: a wasted effort . Int J Cardiol 1990 ; 28 : 215 – 21 . OpenUrl CrossRef PubMed Web of Science 30. ↵ Haskell RJ , Ginzton LE , Laks MM . Electrocardiographic diagnosis of left ventricular hypertrophy in the presence of left bundle branch block . J Electrocardiol 1987 ; 20 : 227 – 32 . OpenUrl CrossRef PubMed Web of Science 31. ↵ Molloy TJ , Okin PM , Devereux RB , Kligfield P . Electrocardiographic detection of left ventricular hypertrophy by the simple QRS voltage-duration product . J Am Coll Cardiol 1992 ; 20 : 1180 – 6 . OpenUrl FREE Full Text 32. ↵ Zhu H , Jiang Y , Cheng C et al. Four-Channel ECG as a Single Source for Early Diagnosis of Cardiac Hypertrophy and Dilation — A Deep Learning Approach . NEJM AI 2024 ; 1 : AIoa2300297 . OpenUrl 33. ↵ Liu C-M , Hsieh M-E , Hu Y-F et al. Artificial Intelligence–Enabled Model for Early Detection of Left Ventricular Hypertrophy and Mortality Prediction in Young to Middle-Aged Adults . Circulation: Cardiovascular Quality and Outcomes 2022 ; 15 : e008360 . OpenUrl PubMed 34. ↵ Kwon J-M , Jeon K-H , Kim HM et al. Comparing the performance of artificial intelligence and conventional diagnosis criteria for detecting left ventricular hypertrophy using electrocardiography . EP Europace 2020 ; 22 : 412 – 419 . OpenUrl 35. ↵ Haimovich JS , Diamant N , Khurshid S et al. Artificial intelligence–enabled classification of hypertrophic heart diseases using electrocardiograms . Cardiovascular Digital Health Journal 2023 ; 4 : 48 – 59 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted September 09, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Machine learning to classify left ventricular hypertrophy using ECG feature extraction by variational autoencoder Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Machine learning to classify left ventricular hypertrophy using ECG feature extraction by variational autoencoder Amulya Gupta , Christopher J. Harvey , Ashley DeBauge , Sumaiya Shomaji , Zijun Yao , Yongkuk Lee , Amit Noheria medRxiv 2024.10.14.24315460; doi: https://doi.org/10.1101/2024.10.14.24315460 Share This Article: Copy Citation Tools Machine learning to classify left ventricular hypertrophy using ECG feature extraction by variational autoencoder Amulya Gupta , Christopher J. Harvey , Ashley DeBauge , Sumaiya Shomaji , Zijun Yao , Yongkuk Lee , Amit Noheria medRxiv 2024.10.14.24315460; doi: https://doi.org/10.1101/2024.10.14.24315460 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Cardiovascular Medicine Subject Areas All Articles Addiction Medicine (573) Allergy and Immunology (865) Anesthesia (304) Cardiovascular Medicine (4457) Dentistry and Oral Medicine (445) Dermatology (383) Emergency Medicine (610) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1517) Epidemiology (15244) Forensic Medicine (30) Gastroenterology (1132) Genetic and Genomic Medicine (6620) Geriatric Medicine (669) Health Economics (1002) Health Informatics (4557) Health Policy (1372) Health Systems and Quality Improvement (1615) Hematology (543) HIV/AIDS (1272) Infectious Diseases (except HIV/AIDS) (15936) Intensive Care and Critical Care Medicine (1106) Medical Education (624) Medical Ethics (147) Nephrology (670) Neurology (6634) Nursing (346) Nutrition (999) Obstetrics and Gynecology (1148) Occupational and Environmental Health (957) Oncology (3348) Ophthalmology (980) Orthopedics (369) Otolaryngology (421) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1696) Pharmacology and Therapeutics (693) Primary Care Research (714) Psychiatry and Clinical Psychology (5463) Public and Global Health (9257) Radiology and Imaging (2210) Rehabilitation Medicine and Physical Therapy (1371) Respiratory Medicine (1198) Rheumatology (598) Sexual and Reproductive Health (716) Sports Medicine (532) Surgery (714) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a0364db50f5e52ad',t:'MTc4MDA2NjE0Mw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2024) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00