The Imperative for Innovation: Gradient Boosting Capabilities in Diagnosing Ischemic Heart Disease Using Exhaled Breath Analysis

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 44,359 characters · extracted from preprint-html · click to expand
The Imperative for Innovation: Gradient Boosting Capabilities in Diagnosing Ischemic Heart Disease Using Exhaled Breath Analysis | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search The Imperative for Innovation: Gradient Boosting Capabilities in Diagnosing Ischemic Heart Disease Using Exhaled Breath Analysis View ORCID Profile Basheer Abdullah Marzoog , Anastasia Stroeva , Philipp Kopylov doi: https://doi.org/10.1101/2025.11.01.25339309 Basheer Abdullah Marzoog 1 Institute of Personalized Cardiology of The Center “Digital Biodesign and Personalized Healthcare” of Biomedical Science and Technology Park, Federal State Autonomous Educational Institution of Higher Education I.M. Sechenov First Moscow State Medical University of the Ministry of Health of the Russian Federation (Sechenovskiy University) , 119991 Moscow, Russia; postal address: Russia, Moscow, 8-2 Trubetskaya street, 119991 Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Basheer Abdullah Marzoog For correspondence: marzug{at}mail.ru Anastasia Stroeva 1 Institute of Personalized Cardiology of The Center “Digital Biodesign and Personalized Healthcare” of Biomedical Science and Technology Park, Federal State Autonomous Educational Institution of Higher Education I.M. Sechenov First Moscow State Medical University of the Ministry of Health of the Russian Federation (Sechenovskiy University) , 119991 Moscow, Russia; postal address: Russia, Moscow, 8-2 Trubetskaya street, 119991 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Philipp Kopylov 1 Institute of Personalized Cardiology of The Center “Digital Biodesign and Personalized Healthcare” of Biomedical Science and Technology Park, Federal State Autonomous Educational Institution of Higher Education I.M. Sechenov First Moscow State Medical University of the Ministry of Health of the Russian Federation (Sechenovskiy University) , 119991 Moscow, Russia; postal address: Russia, Moscow, 8-2 Trubetskaya street, 119991 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Background Ischemic heart disease and the related sequalae remains the leading cause of mortality and morbidity globally. The poor diagnostic accuracy and availability of early screening methods are the leading objectives. Aims To assess the diagnostic capabilities of the machine leaning model in the diagnosis of ischemic heart disease using the exhaled breath analysis predictors. Materials and methods A single center prospective study involved participants with vs without stress induced myocardial perfusion defect. All the participants underwent real-time breath analysis using a PTR-TOF-MS-1000, bicycle ergometry test, and multidetector computed tomography (MDCT) of the coronary arteries with myocardial perfusion assessment. The obtained exhaled breath analysis data were analyzed using machine learning model. For statistical analysis used programme Statistica, SPSS, and python. Results An 80 participants divided into 31 with positive stress-induced myocardial perfusion defect vs 49 without. The diagnostic features of the built model in compare to the reference the MDCT, AUC 86 % (95% confidence interval, [0.7805-0.9338]), sensitivity 0.6129 (95 % CI, [0.4414-0.7844] ), and specificity 0.8367 (95 % CI [0.7332-0.9402]). Conclusion The Gradient Boosting model shows fascinating results using the exhaled breath analysis in the diagnosis of ischemic heart disease. However, further investigations on a larger sample size are required to uncover the hidden part of the plot. Introduction Ischemic heart disease (IHD), also commonly referred to as coronary artery disease (CAD), remains the preeminent global cause of morbidity and mortality, casting a long shadow over cardiovascular health worldwide [ 1 ]. Defined fundamentally as a condition characterized by an imbalance between myocardial oxygen supply and demand, IHD manifests along a spectrum from transient, reversible myocardial ischemia (angina pectoris) to irreversible myocardial infarction (MI) and heart failure [ 2 ]. This imbalance is overwhelmingly caused by atherosclerotic plaque formation, progression, and complications (rupture, erosion, thrombosis) within the epicardial coronary arteries, leading to critical luminal narrowing or occlusion [ 3 ]. Despite decades of research and clinical advancement, the timely and accurate diagnosis of IHD, particularly in its early or atypical presentations, continues to pose significant challenges, driving an urgent need for transformative diagnostic approaches [ 4 ]. The epidemiology of IHD underscores its devastating societal impact. According to the Global Burden of Disease Study, IHD persists as the single leading cause of death globally, responsible for approximately 9 million deaths annually [ 5 ]. Its prevalence is staggering, affecting hundreds of millions of individuals. While age-standardized mortality rates have declined in high-income countries due to improved prevention, acute management, and revascularization techniques, the absolute burden continues to rise due to aging populations and increasing prevalence of risk factors like diabetes, hypertension, dyslipidemia, obesity, and sedentary lifestyles, particularly in low- and middle-income countries [ 6 ]. Furthermore, a significant proportion of initial presentations are catastrophic events like acute MI or sudden cardiac death, highlighting the critical failure of early detection in current paradigms [ 7 ]. The pathophysiology of IHD is centered on coronary atherosclerosis, a chronic inflammatory process initiated by endothelial dysfunction and lipid accumulation within the arterial intima. Progressive plaque growth narrows the vessel lumen, restricting blood flow [ 8 ]. Crucially, acute coronary syndromes often arise not from the most severely stenotic lesions, but from plaques deemed “vulnerable” – characterized by a thin fibrous cap, large lipid core, and intense inflammatory activity – which are prone to rupture or erosion, triggering platelet adhesion, aggregation, and thrombus formation, causing sudden, often complete, vessel occlusion [ 9 ]. This complex interplay of structural stenosis, endothelial dysfunction (impairing vasodilation during increased demand), inflammation, and thrombosis underlies the diverse clinical manifestations. However, conventional diagnostic tools often fail to capture this dynamic complexity, particularly the functional significance of non-obstructive plaques or the inflammatory milieu preceding acute events [ 10 ]. The limitations of current diagnostic modalities are a key driver of IHD’s persistent dominance. While foundational, the resting electrocardiogram (ECG) lacks sensitivity and specificity for ischemia, especially without active symptoms [ 11 ]. Exercise stress testing, while valuable, has variable accuracy, is contraindicated in many, and requires significant patient effort and monitoring resources. Non-invasive imaging like stress echocardiography, myocardial perfusion imaging (MPI), and coronary computed tomography angiography (CCTA) offer improved accuracy but come with substantial drawbacks: high cost, limited accessibility (especially in resource-poor settings), exposure to ionizing radiation (MPI, CCTA), the need for sophisticated equipment and specialized personnel, and often, lengthy wait times. Invasive coronary angiography (ICA), the gold standard for defining coronary anatomy, is expensive, carries inherent procedural risks, and primarily identifies significant luminal narrowing, often missing the functional impact of less severe lesions or the underlying plaque vulnerability. These limitations create significant diagnostic gaps, leading to delayed diagnosis, missed opportunities for early intervention, unnecessary procedures, and ultimately, preventable adverse outcomes. Overcoming the predominance of IHD necessitates a paradigm shift towards accessible, non-invasive, cost-effective, and highly sensitive tools capable of detecting the disease in its nascent or subclinical stages, particularly within the high-throughput environment of the outpatient department (OPD) [ 12 ]. This imperative has catalyzed the exploration of novel diagnostic frontiers. Among the most promising is exhaled breath analysis (EBA). Human breath contains a complex mixture of volatile organic compounds (VOCs), metabolic end-products reflecting physiological and pathophysiological processes occurring throughout the body, including the heart. Research indicates that specific VOCs, or distinct patterns (breathprints), are generated during myocardial ischemia due to oxidative stress, inflammation, lipid peroxidation, and shifts in energy metabolism [ 13 – 15 ]. The profound appeal of EBA lies in its inherent characteristics: it is completely non-invasive, requiring only a simple breath sample; painless and safe for patients; rapid (results potentially available in minutes); low-cost compared to imaging; and highly accessible, requiring minimal infrastructure beyond the collection device and analyzer. This makes it uniquely suited for large-scale screening programs in the OPD setting [ 16 ]. However, interpreting the complex VOC signatures within breath presents a formidable analytical challenge. This is where the transformative power of machine learning (ML) converges with EBA. ML algorithms, particularly sophisticated pattern recognition techniques like deep learning, possess an unparalleled ability to discern subtle, multidimensional patterns within vast and complex datasets – precisely the nature of breathomics data [ 17 ]. By training ML models on breath samples from well-characterized cohorts (confirmed IHD patients vs. healthy controls), these algorithms can learn to identify the unique “breathprint” associated with ischemic heart disease with high sensitivity and specificity. ML can integrate EBA data with other readily available clinical parameters (age, risk factors, basic biomarkers) to create even more powerful predictive and diagnostic models [ 18 ]. The potential impact is dramatic: ML-enhanced EBA could revolutionize IHD diagnostics by enabling ultra-early detection of underlying ischemia or vulnerable plaques before significant symptoms or catastrophic events occur. Deployed in the OPD, this technology could facilitate rapid, point-of-care screening, allowing clinicians to efficiently triage patients at high risk for expedited, more targeted investigation (like CCTA or stress testing), while confidently reassuring low-risk individuals [ 19 ]. This shift towards proactive, accessible screening holds the potential to significantly reduce the diagnostic gap, mitigate the overwhelming burden of IHD, and usher in a new era of precision cardiovascular medicine focused on true prevention. The convergence of breath analysis and artificial intelligence represents not just an incremental improvement, but a potential revolution in our approach to conquering the world’s leading cause of death [ 20 ]. Material and methods Study design This prospective single-center study enrolled 80 participants: 31 exhibiting stress-induced myocardial perfusion defects and 49 without. Conducted according to Good Clinical Practice (GCP) standards and the Helsinki Declaration, the study received approval from the Sechenov University ethics committee (Protocol No. 19-23, Oct 26, 2023) and was registered on ClinicalTrials.gov ( NCT06181799 ), where inclusion/exclusion criteria are available. All participants provided written informed consent for both the study and personal data processing. Contrast-enhanced CT perfusion imaging results under adenosine triphosphate (ATP) stress were assessed by physicians with at least two years of clinical experience. The study design involved performing physical exertion test to make us able to assess the dynamic in the volatile organic compounds and calculate delta instead of just at rest assess the VOCs. This makes the study more accurate while physical exertion induced ischemia make us finding the most important VOCs that are associated with ischemic heart disease and not artifacts. Data collection Screening – Patient interviews, collection of medical and lifestyle history, review of medical records, physical examination, and documentation of height, weight, blood pressure, pulse, and heart rate. All participants underwent real-time breath analysis at rest using a PTR-TOF-MS-1000 instrument (IONICON PTR-TOF-MS-1000 Trace VOC Analyzer, Austria). Testing occurred in the hospital morning under fasting conditions (6–8 hours), with no toothbrushing permitted [ 21 ]. Participants used sterile disposable mouthpieces; no additional filters were required per manufacturer guidelines. Each provided a 1-minute breath sample (yielding 12–16 exhalation cycles). Molecules were ionized using H O primary ions, separated by their mass-to-charge ratio (m/z), and detected. Full-scan mass spectra covered m/z 10–685 (scan time: 1000 ms). The drift tube (T-Drift) and inlet (T-Inlet) temperatures were maintained at 80°C. Study participants passed exercise bicycle ergometry test (on SCHILLER CS 200 device; Bruce protocol) to evaluate the response to physical activity. According to the results metabolic equivalent and Watt, the angina functional class (FC) in participants with positive physical stress test results determined, where watts/Mets <50/100/7 FC-I. During bicycle ergometry, the participants monitored with 12-lead electrocardiogram (ECG) and manual blood pressure measurement, one measured at the end of each 2 minute. The ergometry procedure was stopped if an increase in blood pressure >220 mmHg or horizontal or downward ST segment on the ECG ≥ 1 mm. Furthermore, stop the procedure if the target heart rate (86% of the 220-age) is reached. Prior to undergoing multidetector computed tomography (MDCT) of the coronary arteries with myocardial perfusion assessment, all patients were required to provide laboratory test results indicating serum creatinine levels, followed by calculation of the glomerular filtration rate (GFR) using the Chronic Kidney Disease Epidemiology Collaboration (CKD-EPI) formula. The GFR was required to be no lower than 30 mL/min/1.73 m² to ensure eligibility for the procedure [ 22 – 25 ]. Subsequently, radial vein catheterization was performed to administer a contrast agent and sodium adenosine triphosphate (ATP), with the objective of inducing myocardial ischemia by elevating heart rate. Multidetector computed tomography (MDCT) of the coronary arteries with myocardial perfusion assessment was performed using a Canon Aquilion One Genesis scanner (Japan; manufacturer: Canon Medical Systems Corporation; device registered with Roszdravnadzor under No. RZN 2021/16161, dated 28 December 2021). The protocol included 640 slices with a native (non-contrast) slice thickness of 0.5 mm, followed by contrast-enhanced imaging to evaluate myocardial perfusion at rest and 20 minutes after intravenous administration of Triphosadeninum (sodium adenosine triphosphate [ATP]; Russia; manufacturer: Ellara; registration certificate No. LP-004667, dated 25 January 2018). The contrast agent used was Iohexol, 50 mL (Omnipaque; Norway; manufacturer: GE Healthcare; registered with Roszdravnadzor under No. P N015799/01, linked to LP-N(008665)-(RG-RU), dated 14 May 2009). Sodium adenosine triphosphate (3 ampoules, each containing 1 mL [10 mg/mL]) was diluted in 17 mL of 0.9% sodium chloride solution. The resulting 20 mL solution was administered intravenously via slow bolus over 2 minutes at a dose of 300 μg/kg (adjusted for body weight: 60 kg – 12 mL, 70 kg – 14 mL, 80 kg – 16 mL, 100 kg – 20 mL). Cardiothoracic radiologists performing the CT myocardial stress perfusion analysis were blinded to the results of the stress ECG test. Image interpretation followed the standardized 16-segment myocardial model endorsed by the American Heart Association, with layered analysis of basal, mid-ventricular, and apical cardiac segments. Initial segmental evaluation was conducted in the short-axis projection to identify artifacts complicating diagnostic interpretation, followed by detailed analysis for perfusion defects [ 12 ]. The topographic distribution of coronary blood supply was aligned with the following anatomical landmarks: Segments 1, 2, 7, 8, 13, and 14 were assigned to the perfusion territory of the left anterior descending artery (LAD). Segments 5, 6, 11, 12, and 16 corresponded to the vascular territory of the circumflex branch (LCx) of the left coronary artery. Segments 3, 4, 9, 10, and 15 were associated with the perfusion territory of the right coronary artery (RCA). Myocardial perfusion analysis involved the visual identification of regions of relative hypoperfusion. Areas of reduced X-ray attenuation, visualized during the arterial phase following adenosine triphosphate (ATP) administration, were interpreted as ischemic changes. Subsequently, automated software analysis was performed using the Vitrea Advanced platform (Vitrea Workstation). The transmural perfusion ratio (TPR) was calculated, and polar maps were generated to illustrate the distribution of myocardial X-ray attenuation at rest and under stress. Global and segmental perfusion assessments were based on TPR values. A five-color scale was employed to visualize perfusion defect severity: Blue (TPR 2.5–0.99): Normal perfusion; Green (TPR 0.99–0.97): Minimal perfusion abnormalities; Yellow (TPR 0.97–0.94): Moderate hypoperfusion; Orange (TPR 0.94–0.60): Significant hypoperfusion; Red (TPR 0.60–0.20): Absent perfusion. Stress perfusion results were considered positive if a stress-induced perfusion defect (TPR <0.97) was observed in one or more myocardial segments. Multidetector computed tomography (MDCT) with perfusion stress testing served as the reference method for diagnosing coronary artery disease (CAD), in accordance with the 2024 guidelines of the Russian Society of Cardiology (RSC), European Society of Cardiology (ESC), and American College of Cardiology (ACC) [ 26 – 28 ]. Following completion of all study phases, all patients were referred for a follow-up cardiology consultation to adjust or initiate treatment based on the diagnostic findings. Statistical analysis Following data collection, a comprehensive database was compiled. Statistical analysis included descriptive statistics: for quantitative parameters, normality (assessed via Shapiro-Wilk test), mean, standard deviation, median, interquartile range (IQR), minimum, and maximum; for categorical variables, frequencies, and percentages. Comparative analysis used Welch’s t-test for normally distributed data and the Mann-Whitney U-test for non-normally distributed data. Analyses were performed using Statistica 12 (StatSoft, Inc., 2014) and IBM SPSS Statistics (version 29.0.1.1, IBM Corp., 2024), with statistical significance set at p < 0.05. The study design and statistical approach were rigorously aligned with the research objectives and hypotheses. Method of Machine learning method building The Gradient Boosting model demonstrated superior performance in predicting myocardial perfusion defects following adenosine triphosphate stress testing using breath analysis-derived metabolic features, achieving a high AUC through a rigorous methodology that encompassed comprehensive feature engineering, advanced data preprocessing, and robust validation. Delta features (Δ10 and Δ30) were calculated for each metabolite by normalizing post-stress measurements against baseline values, with interaction terms generated between top-variance delta features to capture nonlinear relationships. The preprocessing pipeline excluded features with >50% missing values, applied K-nearest neighbors imputation (k=5) for remaining missing data, addressed class imbalance via SMOTE oversampling exclusively during training folds, and encoded target classes (“Yes”/”No”). Feature selection utilized Recursive Feature Elimination with Gradient Boosting (RFE-GB) to identify the 15 most predictive features from 186 engineered delta features. The optimized Gradient Boosting classifier (n_estimators=200, subsample=0.8, max_depth=5) was evaluated against XGBoost and Random Forest algorithms using Leave-One-Out Cross-Validation (LOOCV) with embedded SMOTE to prevent data leakage, ultimately demonstrating the highest diagnostic with clinically relevant specificity and NPV, while maintaining sensitivity and PPV, with feature importance analysis identifying key metabolic delta features driving predictions. This comprehensive validation framework ensures model generalizability for clinical deployment. Results Descriptive features of the sample The study initially enrolled 101 patients. Following the application of predefined exclusion criteria, 21 patients were excluded, yielding a final cohort of 80 participants. Participants were stratified into two groups based on the transmural perfusion ratio (TPR): Group 1: Stress-induced perfusion defects (TPR ≤ 0.97; n=31); Group 2: No stress-induced perfusion defects (TPR > 0.97; n=49). The initial study phase lasted approximately 1.5–2 hours for all participants. The mean waiting time for myocardial perfusion CT was 24.85 days (±23.472 SD). The mean interval between the initial phase (cardiologist consultation with single-channel ECG and plethysmography) and the perfusion CT phase was 21.41 days (±24.09 SD) in Group 1 and 27.02 days (±23.053 SD) in Group 2. This inter-group difference was not statistically significant (*p* = 0.301). Comparative group characteristics are presented in Tables 1 and 2 . View this table: View inline View popup Download powerpoint Table 1 Comparative characteristic of the patients of both groups. View this table: View inline View popup Table 2 A comparative analysis of categorical variables was conducted between patients with stress-induced myocardial perfusion defects (Group 1) and those without such defects (Group 2). The Gradient Boosting model performance in diagnosing ischemic heart disease According to the method that explained in the section “material and method”, the machine learning model was built, and showed a good performance that is clinical acceptable. ( Figure 1 and Table 3 ) Download figure Open in new tab Figure 1 Diagrammatic representation of the built Gradient boosting machine model based on the exhaled breath analysis predictors. The model shows a clinically acceptable results in th diagnosis of ischemic heart disease, AUC 86 %. View this table: View inline View popup Download powerpoint Table 3 The diagnostic features of the built Gradient boosting machine model based on the exhaled breath analysis predictors. Feature importance, the VOCs that have important role in the diagnosis of ischemic heart disease represented in the table below. ( Table 4 ) View this table: View inline View popup Download powerpoint Table 4 The feature importance of the obtained volatile organic compounds in the diagnosis of ischemic heart disease based on the built Gradient boosting machine learning model. Discussion The present study demonstrates the significant diagnostic potential of exhaled breath analysis (EBA) coupled with gradient boosting machine learning (ML) for detecting stress-induced myocardial perfusion defects—a key marker of ischemic heart disease (IHD). Our model achieved an AUC of 0.8571 (95% CI: 0.7805–0.9338) and specificity of 0.8367 (95% CI: 0.7332–0.9402), highlighting its capacity to identify high-risk individuals while minimizing false positives. These findings align with the urgent need for non-invasive, accessible screening tools in IHD diagnostics, particularly given the limitations of conventional methods like stress ECG or CT perfusion imaging, which entail radiation exposure, cost, and accessibility barriers. Comparing the obtained results with other studies covering the diagnosis of ischemic heart disease using the analysis of the exhaled breath analysis using machine learning model, our model shows the best results. A recent study using the exhaled breath analysis in the diagnosis of ischemic heart disease demonstrated a diagnostic accuracy of 84% [ 29 ]. Whereas, another study aimed to assess the existence of coronary artery disease utilizing exhaled breath analysis using a designated portable electronic nose (eNose) system showed a 68% accuracy [ 30 ]. Conclusion The built Gradient boosting machine learning model based on the exhaled breath data analysis showed a clinically acceptable diagnostic accuracy. However, to confirm the clinical validation of the model, further investigation required to be done to on a larger sample with external validation of the sample. Decelerations Ethics approval and consent to participate: the study approved by the Sechenov University, Russia, from “Ethics Committee Requirement № 19-23 from 26.10.2023”. An informed written consent is taken from the study participants. Clinical trial registration: title; Breathome and Single Lead Electrocardiogram Optimizes Ischemic Heart Disease Diagnosis, ID number; NCT06181799 , registration date; 2023-12-13 , link to the study; https://clinicaltrials.gov/study/NCT06181799 Consent for publication: applicable on reasonable request Availability of data and materials: applicable on reasonable request Competing interests: The authors declare that they have no competing interests regarding publication. Funding’s The work of Philipp Kopylov was financed by the government assignment 1023022600020-6 «Application of mass spectrometry and exhaled air emission spectrometry for cardiovascular risk stratification». The Work of Philipp Kopylov was financed by the Priority 2030 program of the Ministry of Science and Higher Education of Russia, project “Screening of cardiac pathology using telemedicine technologies and elements of artificial intelligence”, code 03.000.B.163. The work of Basheer Marzoog was financed by the Priority 2030 program of the Ministry of Science and Higher Education of Russia, project «The Digital Cardiology with Artificial Intelligence». Authors’ contributions MB is the writer, researcher, collected and analyzed data, interpreted the results. and revised the final version of the paper, biostatistical analysis of the sample, AS revised the paper, and PhK revised the final version of the manuscript. All authors have read and approved the manuscript. Data Availability All data produced in the present work are contained in the manuscript Authors’ information Basheer Abdullah Marzoog , World-Class Research Center «Digital Biodesign and Personalized Healthcare», I.M. Sechenov First Moscow State Medical University (Sechenov University), 119991 Moscow, Russia; postal address: Russia. Moscow, 8-2 Trubetskaya street, 119991, (marzug @mail.ru, +79969602820). ORCID: 0000-0001-5507-2413. Scopus ID: 57486338800. Anastasia Stroeva, World-Class Research Center «Digital Biodesign and Personalized Healthcare», I.M. Sechenov First Moscow State Medical University (Sechenov University), 119991 Moscow, Russia; postal address: Russia. Moscow, 8-2 Trubetskaya street, 119991, ( anstroewa{at}yandex.ru ). ORCID: 0009-0002-3694-5295. Philipp Kopylov, director of the institute of the Research Center «Digital Biodesign and Personalized Healthcare», World-Class Research Center «Digital Biodesign and Personalized Healthcare», I.M. Sechenov First Moscow State Medical University (Sechenov University), 119991 Moscow, Russia; postal address: Russia. Moscow, 8-2 Trubetskaya street, 119991. ORCID: 0000-0002-4535-8685. Scopus ID: 6507736224. email: kopylovf_yu{at}staff.sechenov.ru The paper has not been submitted elsewhere Declaration of AI use: not used STANDARDS OF REPORTING STROBE guideline has been followed. - The TRIPOD+AI standard of reporting for prediction models has been followed Competing interests No competing interests regarding the publication. Acknowledgments not applicable Reference 1. ↵ Khan , M. A. , Hashim , M. J. , Mustafa , H. , Baniyas , M. Y. , Al Suwaidi , S. K. B. M. , AlKatheeri , R. , … Lootah , S. N. A. H. ( 2020 ). Global Epidemiology of Ischemic Heart Disease: Results from the Global Burden of Disease Study . Cureus . doi: 10.7759/cureus.9349 OpenUrl CrossRef PubMed 2. ↵ Thygesen , K. , Alpert , J. S. , Jaffe , A. S. , Chaitman , B. R. , Bax , J. J. , Morrow , D. A. , … Parkhomenko , A . ( 2018 ). Fourth Universal Definition of Myocardial Infarction (2018). Circulation , 138 ( 20 ), e618 – e651 . doi: 10.1161/CIR.0000000000000617 OpenUrl CrossRef PubMed 3. ↵ Młynarska , E. , Czarnik , W. , Fularski , P. , Hajdys , J. , Majchrowicz , G. , Stabrawa , M. , … Franczyk , B . ( 2024 ). From Atherosclerotic Plaque to Myocardial Infarction—The Leading Cause of Coronary Artery Occlusion . International Journal of Molecular Sciences , 25 ( 13 ), 7295 . doi: 10.3390/ijms25137295 OpenUrl CrossRef PubMed 4. ↵ Qureshi , M. A. ( 2025 ). Letter to the Editor: “Accurate diagnosis of ischemic heart disease without exposure to radiation using non-stress unshielded magnetocardiography (MCG).” American Heart Journal Plus: Cardiology Research and Practice , 52 , 100519 . doi: 10.1016/j.ahjo.2025.100519 OpenUrl CrossRef 5. ↵ Wang , Y. , Li , Q. , Bi , L. , Wang , B. , Lv , T. , & Zhang , P . ( 2025 ). Global trends in the burden of ischemic heart disease based on the global burden of disease study 2021: the role of metabolic risk factors . BMC Public Health , 25 ( 1 ), 310 . doi: 10.1186/s12889-025-21588-9 OpenUrl CrossRef PubMed 6. ↵ Hasani , W. S. R. , Musa , K. I. , Cheng , K. Y. , & Dass , S. C . ( 2024 ). Exploring the trend of age-standardized mortality rates from cardiovascular disease in Malaysia: a joinpoint analysis (2010–2021) . BMC Public Health , 24 ( 1 ), 2519 . doi: 10.1186/s12889-024-19103-7 OpenUrl CrossRef PubMed 7. ↵ Bricoli , S. , Magnani , G. , Ardissino , M. , Maglietta , G. , Celli , P. , Ferrario , M. , … Ardissino , D . ( 2024 ). Sudden cardiac death after early-onset myocardial infarction: a multicentre longitudinal cohort study with a 20-year follow-up . European Heart Journal: Acute Cardiovascular Care , 13 ( 10 ), 726 – 730 . doi: 10.1093/ehjacc/zuae089 OpenUrl CrossRef PubMed 8. ↵ Jebari-Benslaiman , S. , Galicia-García , U. , Larrea-Sebal , A. , Olaetxea , J. R. , Alloza , I. , Vandenbroeck , K. , … Martín , C . ( 2022 ). Pathophysiology of Atherosclerosis . International Journal of Molecular Sciences , 23 ( 6 ), 3346 . doi: 10.3390/ijms23063346 OpenUrl CrossRef 9. ↵ Kimura , K. , Kimura , T. , Ishihara , M. , Nakagawa , Y. , Nakao , K. , Miyauchi , K. , … Yamazaki , T . ( 2019 ). JCS 2018 Guideline on Diagnosis and Treatment of Acute Coronary Syndrome . Circulation Journal , 83 ( 5 ), 1085 – 1196 . doi: 10.1253/circj.CJ-19-0133 OpenUrl CrossRef PubMed 10. ↵ Toya , T . ( 2025 ). Coronary Endothelial Dysfunction and Vasomotor Dysregulation in Myocardial Bridging . Journal of Cardiovascular Development and Disease , 12 ( 2 ), 54 . doi: 10.3390/jcdd12020054 OpenUrl CrossRef 11. ↵ Marzoog , B. A. , Chomakhidze , P. , Gognieva , D. , Silantyev , A. , Suvorov , A. , Abdullaev , M. , … Kopylov , P . ( 2025 ). Development and validation of a machine learning model for diagnosis of ischemic heart disease using single-lead electrocardiogram parameters . World Journal of Cardiology , 17 ( 4 ). doi: 10.4330/wjc.v17.i4.104396 OpenUrl CrossRef 12. ↵ Baggiano , A. , Italiano , G. , Guglielmo , M. , Fusini , L. , Guaricci , A. I. , Maragna , R. , … Pontone , G . ( 2022 ). Changing Paradigms in the Diagnosis of Ischemic Heart Disease by Multimodality Imaging . Journal of Clinical Medicine , 11 ( 3 ), 477 . doi: 10.3390/jcm11030477 OpenUrl CrossRef PubMed 13. ↵ Moura , P. C. , Raposo , M. , & Vassilenko , V . ( 2023 ). Breath volatile organic compounds (VOCs) as biomarkers for the diagnosis of pathological conditions: A review . Biomedical Journal , 46 ( 4 ), 100623 . doi: 10.1016/j.bj.2023.100623 OpenUrl CrossRef PubMed 14. Marzoog , B. A . ( 2024 ). Volatilome: A Novel Tool for Risk Scoring in Ischemic Heart Disease . Current cardiology reviews , 20 ( 6 ), e080724231719 . doi: 10.2174/011573403X304090240705063536 OpenUrl CrossRef 15. ↵ Marzoog , B. A. , Chomakhidze , P. , Gognieva , D. , Parunova , A. Y. , Demchuk , S. N. , Silantyev , A. , … Kopylov , P . ( 2025 ). Updates in breathomics behavior in ischemic heart disease and heart failure, mass-spectrometry . World Journal of Cardiology , 17 ( 2 ). doi: 10.4330/wjc.v17.i2.102851 OpenUrl CrossRef 16. ↵ Marzoog , B. A. , & Kopylov , P . ( 2025 ). Volatilome and machine learning in ischemic heart disease: Current challenges and future perspectives . World Journal of Cardiology , 17 ( 4 ). doi: 10.4330/wjc.v17.i4.106593 OpenUrl CrossRef 17. ↵ Schmitt , M. , Ahmadi , S. A. , Xu , Y. , Taşkin , G. , Verma , U. , Sica , F. , & Hänsch , R . ( 2023 ). There Are No Data Like More Data: Datasets for deep learning in Earth observation . IEEE Geoscience and Remote Sensing Magazine , 11 ( 3 ), 63 – 97 . doi: 10.1109/MGRS.2023.3293459 OpenUrl CrossRef 18. ↵ Ng , M. L. , Ang , X. , Yap , K. Y. , Ng , J. J. , Goh , E. C. H. , Khoo , B. B. J. , … Drum , C. L . ( 2023 ). Novel Oxidative Stress Biomarkers with Risk Prognosis Values in Heart Failure . Biomedicines , 11 ( 3 ), 917 . doi: 10.3390/biomedicines11030917 OpenUrl CrossRef PubMed 19. ↵ Currie , G. , & Kiat , H . ( 2025 ). Beyond the Lumen: Molecular Imaging to Unmask Vulnerable Coronary Plaques . Journal of Cardiovascular Development and Disease , 12 ( 2 ), 51 . doi: 10.3390/jcdd12020051 OpenUrl CrossRef 20. ↵ Meder , B. , Asselbergs , F. W. , & Ashley , E . ( 2025 ). Artificial intelligence to improve cardiovascular population health . European Heart Journal , 46 ( 20 ), 1907 – 1916 . doi: 10.1093/eurheartj/ehaf125 OpenUrl CrossRef PubMed 21. ↵ Horváth , I. , Barnes , P. J. , Loukides , S. , Sterk , P. J. , Högman , M. , Olin , A.-C. , … Vink , T. J . ( 2017 ). A European Respiratory Society technical standard: exhaled biomarkers in lung disease . European Respiratory Journal , 49 ( 4 ), 1600965 . doi: 10.1183/13993003.00965-2016 OpenUrl Abstract / FREE Full Text 22. ↵ Cockcroft , D. W. , & Gault , M. H . ( 1976 ). Prediction of creatinine clearance from serum creatinine . Nephron , 16 ( 1 ), 31 – 41 . doi: 10.1159/000180580 OpenUrl CrossRef PubMed Web of Science 23. Winter , M. A. , Guhr , K. N. , & Berg , G. M . ( 2012 ). Impact of various body weights and serum creatinine concentrations on the bias and accuracy of the cockcroft-gault equation . Pharmacotherapy , 32 ( 7 ), 604 – 612 . doi: 10.1002/J.1875-9114.2012.01098.X OpenUrl CrossRef PubMed 24. Brown , D. L. , Masselink , A. J. , & Lalla , C. D . ( 2013 ). Functional range of creatinine clearance for renal drug dosing: a practical solution to the controversy of which weight to use in the Cockcroft-Gault equation . The Annals of pharmacotherapy , 47 ( 7–8 ), 1039 – 44 . doi: 10.1345/aph.1S176 OpenUrl CrossRef PubMed 25. ↵ Delgado , C. , Baweja , M. , Crews , D. C. , Eneanya , N. D. , Gadegbeku , C. A. , Inker , L. A. , … Powe , N. R . ( 2022 ). A Unifying Approach for GFR Estimation: Recommendations of the NKF-ASN Task Force on Reassessing the Inclusion of Race in Diagnosing Kidney Disease . American Journal of Kidney Diseases , 79 ( 2 ), 268 – 288 .e1. doi: 10.1053/j.ajkd.2021.08.003 OpenUrl CrossRef PubMed 26. ↵ Knuuti , J. , Wijns , W. , Saraste , A. , Capodanno , D. , Barbato , E. , Funck-Brentano , C ., … ESC Scientific Document Group. ( 2020 ). 2019 ESC Guidelines for the diagnosis and management of chronic coronary syndromes . European heart journal , 41 ( 3 ), 407 – 477 . doi: 10.1093/eurheartj/ehz425 OpenUrl CrossRef PubMed 27. Gulati , M. , Levy , P. D. , Mukherjee , D. , Amsterdam , E. , Bhatt , D. L. , Birtcher , K. K. , … Shaw , L. J . ( 2022 ). 2021 AHA/ACC/ASE/CHEST/SAEM/SCCT/SCMR Guideline for the Evaluation and Diagnosis of Chest Pain . Journal of Cardiovascular Computed Tomography , 16 ( 1 ), 54 – 122 . doi: 10.1016/j.jcct.2021.11.009 OpenUrl CrossRef PubMed 28. ↵ Barbarash , O. L. , Karpov , Y. A. , Panov , A. V. , Akchurin , R. S. , Alekyan , B. G. , Alekhin , M. N. , … Yakushin , S. S . ( 2024 ). 2024 Clinical practice guidelines for Stable coronary artery disease . Russian Journal of Cardiology , 29 ( 9 ), 6110 . doi: 10.15829/1560-4071-2024-6110 OpenUrl CrossRef 29. ↵ Marzoog , B. A. , Chomakhidze , P. , Gognieva , D. , Gagarina , N. V. , Silantyev , A. , Suvorov , A. , … Kopylov , P . ( 2024 ). Machine Learning Model Discriminate Ischemic Heart Disease Using Breathome Analysis . Biomedicines , 12 ( 12 ), 2814 . doi: 10.3390/biomedicines12122814 OpenUrl CrossRef PubMed 30. ↵ Nardi Agmon , I. , Broza , Y. Y. , Alaa , G. , Eisen , A. , Hamdan , A. , Kornowski , R. , & Haick , H. ( 2022 ). Detecting Coronary Artery Disease Using Exhaled Breath Analysis . Cardiology , 147 ( 4 ), 389 – 397 . doi: 10.1159/000525688 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted November 05, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following The Imperative for Innovation: Gradient Boosting Capabilities in Diagnosing Ischemic Heart Disease Using Exhaled Breath Analysis Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share The Imperative for Innovation: Gradient Boosting Capabilities in Diagnosing Ischemic Heart Disease Using Exhaled Breath Analysis Basheer Abdullah Marzoog , Anastasia Stroeva , Philipp Kopylov medRxiv 2025.11.01.25339309; doi: https://doi.org/10.1101/2025.11.01.25339309 Share This Article: Copy Citation Tools The Imperative for Innovation: Gradient Boosting Capabilities in Diagnosing Ischemic Heart Disease Using Exhaled Breath Analysis Basheer Abdullah Marzoog , Anastasia Stroeva , Philipp Kopylov medRxiv 2025.11.01.25339309; doi: https://doi.org/10.1101/2025.11.01.25339309 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Cardiovascular Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffb74ada99f1640',t:'MTc3OTQ0OTA3OQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Outcome instruments

MUSA

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00