Full text
36,951 characters
· extracted from
preprint-html
· click to expand
Automated detection of bicuspid aortic valve from echocardiographic reports using natural language processing: a large-scale Veterans Affairs study | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Automated detection of bicuspid aortic valve from echocardiographic reports using natural language processing: a large-scale Veterans Affairs study Annie E. Bowles , View ORCID Profile Julie A. Lynch , Francisca Bermudez , Gabrielle E. Shakt , Tia DiNatale , Kathryn M. Pridgen , Renae L. Judy , View ORCID Profile Michael G. Levin , Katherine Hartmann , View ORCID Profile Scott M. Damrauer , Patrick R. Alba doi: https://doi.org/10.1101/2025.06.30.25330573 Annie E. Bowles 1 VA Informatics and Computing Infrastructure (VINCI), VA Salt Lake City Health Care System , Salt Lake City, UT, USA MA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: annie.bowles{at}va.gov Julie A. Lynch 1 VA Informatics and Computing Infrastructure (VINCI), VA Salt Lake City Health Care System , Salt Lake City, UT, USA 2 Division of Epidemiology, Department of Internal Medicine, University of Utah School of Medicine , Salt Lake City, UT, USA PhD, MBA, RN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Julie A. Lynch Francisca Bermudez 3 Georgetown University School of Medicine , Washington, DC, USA 4 Department of Surgery, Perelman School of Medicine and the University of Pennsylvania , Philadelphia, PA, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Gabrielle E. Shakt 4 Department of Surgery, Perelman School of Medicine and the University of Pennsylvania , Philadelphia, PA, USA 5 Corporal Michael J. Crescenz VA Medical Center , Philadelphia, PA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tia DiNatale 1 VA Informatics and Computing Infrastructure (VINCI), VA Salt Lake City Health Care System , Salt Lake City, UT, USA MPH Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kathryn M. Pridgen 1 VA Informatics and Computing Infrastructure (VINCI), VA Salt Lake City Health Care System , Salt Lake City, UT, USA MA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Renae L. Judy 4 Department of Surgery, Perelman School of Medicine and the University of Pennsylvania , Philadelphia, PA, USA 5 Corporal Michael J. Crescenz VA Medical Center , Philadelphia, PA, USA MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Michael G. Levin 5 Corporal Michael J. Crescenz VA Medical Center , Philadelphia, PA, USA 6 Department of Medicine, Perelman School of Medicine and the University of Pennsylvania , Philadelphia, PA, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Michael G. Levin Katherine Hartmann 7 Department of Radiology, Hospital of the University of Pennsylvania , Philadelphia, PA, USA MD, PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Scott M. Damrauer 4 Department of Surgery, Perelman School of Medicine and the University of Pennsylvania , Philadelphia, PA, USA 5 Corporal Michael J. Crescenz VA Medical Center , Philadelphia, PA, USA MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Scott M. Damrauer Patrick R. Alba 1 VA Informatics and Computing Infrastructure (VINCI), VA Salt Lake City Health Care System , Salt Lake City, UT, USA 2 Division of Epidemiology, Department of Internal Medicine, University of Utah School of Medicine , Salt Lake City, UT, USA MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Background Bicuspid aortic valve (BAV) is the most common congenital heart defect but often evades timely diagnosis due to variable clinical presentations. Prior to October 2024, no specific diagnosis code existed for BAV, limiting retrospective identification. Objectives To develop and validate a natural language processing (NLP) system for automated extraction of heart valve morphology from echocardiographic reports, with focus on BAV detection. Methods We developed a rule-based NLP system using MedSpaCy to analyze echocardiographic reports from the Veterans Affairs Corporate Data Warehouse. The system was trained on 555 manually annotated reports and validated on 170 held-out reports. Performance was measured using precision, recall, and F1-score for valve leaflet structure identification. Results The NLP system achieved excellent performance for BAV detection with precision of 0.984, recall of 0.955, and F1-score of 0.969. When applied to 14,453,591 echocardiographic documents from 3,478,658 patients, the system identified 84,019 patients (2.42%) with affirmed BAV. Among patients identified by the ICD-10 code Q23.81, NLP showed 86.1% concordance, with manual review confirming NLP accuracy in discordant cases. Conclusions This NLP approach enables large-scale retrospective identification of BAV patients from clinical text, creating the largest BAV cohort to date and facilitating future cardiovascular research and clinical decision-making. Introduction Advances in echocardiography have substantially improved screening capabilities for valvular heart disease over the past decade. However, the unstructured free-text format of echocardiographic reports creates significant barriers to systematic data analysis and large-scale research applications. 1 , 2 Lack of standardization in report formatting and time-intensive manual extraction limit our ability to analyze echocardiographic data on a larger scale. Bicuspid aortic valve (BAV) is the most common congenital cardiac malformation, affecting 0.5-2% of the general population. 3 This condition results from developmental fusion of two aortic valve cusps, creating a central raphe and eliminating a functional commissure. The resulting structural abnormalities disrupt normal aortic valve hemodynamics and predispose patients to serious cardiovascular complications including aortic stenosis, aortic regurgitation, and thoracic aortic aneurysm formation. 4 Given the wide spectrum of clinical presentations—with many patients remaining asymptomatic for years—BAV frequently escapes early detection, 5 , 6 emphasizing the critical importance of systematic screening and identification strategies. The primary diagnostic modality for BAV is transthoracic echocardiography, with findings typically documented in clinical reports. 4 , 5 Until October 2024, the International Classification of Diseases (ICD) lacked a specific code for BAV, creating substantial challenges for retrospective patient identification and epidemiological studies. Natural language processing (NLP) has emerged as a powerful tool for extracting structured information from clinical text, with several studies successfully applying NLP to echocardiographic reports for valve assessment and quantitative measurements. 7 – 13 Recent investigations have expanded beyond single-valve analysis to comprehensive evaluation of all four cardiac valves. 1 , 14 These advances demonstrate the potential for large-scale automated analysis of cardiac valve data, creating new opportunities for studying complex conditions like BAV. This study presents a novel NLP framework specifically designed to extract heart valve morphological information from echocardiographic reports, with particular emphasis on accurate BAV identification. Using a comprehensive dataset from the U.S. Department of Veterans Affairs (VA) health system, we developed and rigorously validated an automated approach for systematic extraction of BAV-related clinical information. We examine system performance characteristics, implementation challenges, and the broader implications for improving BAV diagnosis and cardiovascular research. Methods Study Setting and Data Source The VA operates the largest integrated healthcare system in the United States, encompassing 170 medical centers and 1,380 outpatient clinics across the United States, territories, and Philippines. The Corporate Data Warehouse (CDW) maintains comprehensive electronic health record (EHR) data for over 25 million patients dating back to 1994. This retrospective study was conducted using VA CDW clinical data with institutional review board approval and waiver of informed consent and HIPAA authorization from the Philadelphia and Salt Lake City VA Medical Centers. Study Population and Data Selection The study cohort included all patients who underwent at least one echocardiography procedure within the VA healthcare system. Procedures were identified using a comprehensive list of Current Procedural Terminology (CPT) codes (see Appendix). Associated echocardiographic reports were extracted from CDW. Given the primary research focus on BAV identification, an additional keyword search was implemented to capture documents containing variants of “bicuspid” and “bileaflet” terminology (complete keyword list provided in Appendix). The final cohort comprised 14,453,591 echocardiographic documents, corresponding to 3,478,658 unique patients. For model development and validation, 850 reports were randomly selected for manual annotation using a stratified approach: 555 reports (65%) for training, 125 reports (15%) for validation, and 170 reports (20%) for final testing. Patient-level separation was maintained between training and testing cohorts to ensure unbiased performance evaluation. As additional validation, we identified VA patients who received the newly implemented BAV-specific ICD-10 code (Q23.81) following echocardiographic procedures and compared these cases with NLP-identified BAV patients. Manual Annotation Protocol Clinical reports were annotated using the eHOST annotation platform 15 by two registered nurses with extensive chart review experience. The annotation guideline specified identification of four cardiac valves (aortic, mitral, tricuspid, pulmonary), associated leaflet structures when present, and creation of relationships between valve and structure entities. Given the limited terminology used for cardiac valve and leaflet structure description, the eHOST pre-annotation tool was utilized to highlight known keywords prior to manual review, with annotators instructed to identify any missed entities. Context statements including uncertainty, negation, historical mentions, or non-patient experiencers were captured and linked to corresponding valve entities. Prosthetic heart valves were annotated as distinct entities. For bicuspid leaflet structures, annotators specified whether the valve was functionally or congenitally bicuspid (default: unspecified). The final annotation schema included 14 entities, 2 relationship types (valve-to-leaflet and valve-to-context), and 1 attribute classification. A total of 1,641 annotations were created, with 315 utilized in the held-out test dataset. NLP System Architecture The NLP system was designed as a rule-based framework for identifying cardiac valves and associated leaflet morphology in echocardiographic reports. Development utilized MedSpaCy, 16 a Python clinical NLP library built on the spaCy framework. Figure 1 illustrates the system architecture and processing logic. Download figure Open in new tab Figure 1. Overview of the NLP system logic with an example of the input and output structure. The system initiates document processing by identifying mentions of the four cardiac valves: aortic, tricuspid, pulmonary, and mitral. While valve acronyms were accepted, complete valve phrases were required for entity recognition (isolated terms like “aortic” were excluded). Each identified heart valve served as an anchor point for subsequent concept attachment, with valve identification representing the minimum system output. Following valve identification, the system searched the containing sentence for leaflet structure terms: bicuspid, tricuspid, and normal. Priority assignment favored more specific terms (bicuspid, tricuspid) based on proximity to the valve mention. Normal structure classification was assigned only when specific terms were absent. To distinguish functional BAV from congenital variants, the system searched for terms indicating functional bicuspid morphology. Prosthetic valve identification was implemented to differentiate mechanical from native BAV, as many prosthetic aortic valves exhibit bicuspid structure. The ConText algorithm 17 was then applied at the sentence level to identify uncertainty, negation, historical, and non-patient experiencer modifiers related to valve leaflet structure. To enhance BAV detection sensitivity, a custom component analyzed aortic valve instances lacking sentence-level leaflet structure assignment by creating a 100-token window extension and flagging bicuspid terminology within this expanded context. System output included the document identification, valve type and raw text, leaflet structure type and raw text, context flags, functional, and prosthetic indicators, the sentence containing the heart valve, and the heart valve start and end indices. The NLP system has been made publicly available at https://github.com/VINCI-AppliedNLP/bicuspid-aortic-valve . Results NLP System Performance The system was evaluated on the 315 instances from the held-out test set where both heart valve and leaflet structure annotations were present. Table 1 presents detailed performance metrics at the instance level, stratified by valve type. Given the primary focus on BAV identification, the bicuspid aortic valve subset is presented separately within the aortic valve group. View this table: View inline View popup Download powerpoint Table 1. System performance on the identification of each heart valve and corresponding leaflet structure. Support is the number of annotated heart valves where the leaflet structure is given. For BAV detection specifically, the system achieved recall of 0.955 and precision of 0.984, corresponding to an F1-score of 0.969. Overall system performance across all valve types demonstrated precision of 0.899, recall of 0.937, and F1-score of 0.918. The total support count of 315 represents non-overlapping instances, with bicuspid aortic valve cases constituting a subset of the broader aortic valve category. Patient-Level BAV Classification Application of the NLP system to the complete dataset of 14,453,591 documents identified 655,762 documents (4.54%) and 84,019 patients (2.42%) with affirmed BAV. Patient-level BAV classification required at least one instance of aortic valve with bicuspid structure that was neither prosthetic nor functionally bicuspid. Given the congenital nature of BAV, instances with historical modifiers were considered affirmed cases. Patients with exclusively uncertain BAV mentions were classified as possible BAV. For patients who underwent aortic valve replacement procedures, affirmed BAV classification required evidence of congenital BAV in pre-procedural reports. Comparison with ICD-10 BAV Classification Analysis of patients assigned the BAV-specific ICD-10 code (Q23.81) identified 1,573 individuals who received this diagnosis following echocardiographic procedures. Among these, 1,355 patients (86.1%) were concordantly identified as BAV cases by the NLP system. To investigate the 218 discordant cases (ICD-10 positive, NLP negative), 50 patients were randomly selected for detailed manual review of corresponding echocardiographic reports and NLP output. The majority of these reports explicitly documented tricuspid aortic valve morphology or structurally normal valves, with the NLP system correctly extracting this information. Less frequently, reports indicated poor aortic valve visualization or absent morphological description, resulting in appropriate null NLP output. Importantly, no discordant cases were attributed to NLP system errors, suggesting potential issues with ICD-10 coding accuracy or documentation of BAV diagnosis in reports not captured by our selection criteria. Discussion We successfully developed and validated an NLP system capable of accurately extracting heart valve leaflet morphology from echocardiographic reports, with particular strength in BAV identification. Implementation across the VA healthcare system identified 84,019 patients with affirmed BAV, representing the largest BAV cohort assembled to date, exceeding previous studies by more than ten-fold. 18 – 21 Previous BAV research has relied on CPT codes for aortic valve replacement procedures and ICD codes related to congenital cardiac malformations for patient identification. 18 However, these approaches lack specificity for BAV and may substantially underestimate disease prevalence. Our NLP approach enables researchers to create focused BAV cohorts that extend well beyond the limitations of structured EHR data elements. The high-performance metrics achieved on the independent test set (F1-score of 0.969 for BAV) demonstrate the system’s reliability for accurate case identification. The recent introduction of the BAV-specific ICD-10 code (Q23.81) occurred near the completion of our study, resulting in limited overlap between NLP-identified and ICD-coded patients within our dataset. Manual review of discordant cases revealed that ICD-10 positive but NLP-negative patients had clear documentation of tricuspid or normal aortic valve morphology, suggesting either documentation of BAV in external reports not captured by our analysis or potential diagnostic coding errors. While this ICD-10 code provides a new avenue for BAV identification, it only became available in October 2024. Our NLP system enables comprehensive retrospective identification of BAV patients using historical EHR data predating this coding implementation. This work demonstrates the potential for NLP technology to unlock valuable clinical information embedded within unstructured medical text. The ability to systematically identify large BAV cohorts from historical echocardiographic data creates new opportunities for cardiovascular research, including studies of disease progression, treatment outcomes, and genetic associations. The automated approach also supports clinical decision-making by facilitating identification of patients who may benefit from specialized cardiovascular care or surveillance protocols. Error Analysis and System Limitations Analysis of NLP output errors revealed that the majority involved tricuspid and normal leaflet structure classification. The dual meaning of “tricuspid” as both a valve name and structure type created classification challenges when both references appeared in the same sentence (e.g., “TRICUSPID VALVE: tricuspid is normal in morphology”). More sophisticated disambiguation logic would be required to address these instances. Similarly, “normal” references to concepts other than leaflet structure (e.g., “Aortic valve excursion is normal”) were incorrectly classified as normal leaflet morphology. This issue particularly affected pulmonary valve classification, which had the lowest annotation support (50 instances) and frequent references to normal functional parameters. The system’s prioritization of specific terms (tricuspid, bicuspid) before general terms (normal) improved overall performance but requires refinement for comprehensive valve assessment. Additional error sources included incomplete valve phrase identification, unrecognized prosthetic valves, and spelling variants not captured during training. These issues were relatively infrequent but highlight areas for future system enhancement. Study Limitations Several limitations merit consideration. The NLP approach only identifies BAV in patients who underwent echocardiographic procedures within the VA system, potentially missing cases diagnosed through external healthcare providers. The system was specifically developed and validated using echocardiographic reports, and performance may decrease when applied to other clinical note types. To minimize overall errors, the system assigns each structure term to only one valve entity, though some reports use single structure terms to describe multiple valves (e.g., “AV, MV, TV, PV are normal”). More sophisticated logic would be needed to handle these instances, though their rarity in our dataset did not justify implementation complexity. The literature describes unicuspid and quadricuspid valve morphologies as extremely rare variants. No instances were identified during development, so these structure types are not included in the current system. The prosthetic valve detection rules were optimized for aortic valves given the BAV focus, and additional development would be needed for comprehensive prosthetic valve identification across all cardiac valves. Conclusion We present a validated rule-based NLP system that accurately extracts heart valve leaflet morphology from clinical echocardiographic reports. The system demonstrated excellent performance for BAV identification, enabling creation of the largest BAV patient cohort reported to date. This automated approach addresses the historical challenge of BAV identification in structured EHR data and facilitates large-scale retrospective cardiovascular research. The successful implementation of NLP for cardiac valve assessment illustrates the broader potential for automated clinical text processing to enhance both research capabilities and clinical care. As healthcare systems increasingly recognize the value of unstructured clinical data, sophisticated NLP approaches will play an essential role in translating documented clinical observations into actionable insights for patient care and scientific discovery. In this article, we present a custom NLP rule-based system that accurately extracts heart valve leaflet structure information from clinical notes. The initial use case for this data was the identification of BAV, which has not been available in structured data elements of the EHR until quite recently. The NLP system identified 84,019 patients with BAV from free-text echocardiography reports, making this the largest cohort of patients with BAV to our knowledge and enabling large-scale retrospective studies of this phenotype. Data Availability Patient-level data are already accessible to all VA researchers with appropriate IRB approvals Acknowledgements This work was supported using resources and facilities of the Department of Veterans Affairs (VA) Informatics and Computing Infrastructure (VINCI), including NLP resources, which is funded under the research priority to Put VA Data to Work for Veterans (VA ORD 24-D4V-02). This publication does not represent the views of the Department of Veterans Affairs or the United States Government. Footnotes Conflicts of interest: AEB, JAL, TD, and PRA report grants from Alnylam Pharmaceuticals, Inc., AstraZeneca Pharmaceuticals LP, Biodesix, Inc, Janssen Pharmaceuticals, Inc., Novartis International AG, Parexel International Corporation through the University of Utah or Western Institute for Veteran Research outside the submitted work. MGL reports grants from the Doris Duke Foundation (2023-2024), research funding to the institution from MyOme, and consulting fees from BridgeBio outside the submitted work. SMD reports grants from the National Heart Lung and Blood Institute, in kind support from Novo Nordisk, and consulting fees from Tourmaline Bio, outside the current work. Abbreviations BAV bicuspid aortic valve CDW Corporate Data Warehouse CPT Current Procedural Terminology EHR electronic health record ICD International Classification of Diseases NLP natural language processing VA U.S. Department of Veterans Affairs References 1. ↵ Xie F , Lee M sum , Allahwerdy S , Getahun D , Wessler B , Chen W. Identifying the Severity of Heart Valve Stenosis and Regurgitation Among a Diverse Population Within an Integrated Health Care System: Natural Language Processing Approach . JMIR Cardio . 2024 ; 8 : e60503 . doi: 10.2196/60503 OpenUrl CrossRef 2. ↵ Gonzalez-Hernandez G , Sarker A , O’Connor K , Savova G. Capturing the Patient’s Perspective: a Review of Advances in Natural Language Processing of Health-Related Text . Yearb Med Inform . 2017 ; 26 ( 01 ): 214 – 227 . doi: 10.15265/IY-2017-029 OpenUrl CrossRef PubMed 3. ↵ Siu SC , Silversides CK . Bicuspid Aortic Valve Disease . J Am Coll Cardiol . 2010 ; 55 ( 25 ): 2789 – 2800 . doi: 10.1016/j.jacc.2009.12.068 OpenUrl FREE Full Text 4. ↵ Tessler I , Albuisson J , Goudot G , et al. Bicuspid Aortic Valve: Genetic and Clinical Insights . AORTA . 2021 ; 09 ( 04 ): 139 – 146 . doi: 10.1055/s-0041-1730294 OpenUrl CrossRef 5. ↵ Liu T , Xie M , Lv Q , et al. Bicuspid Aortic Valve: An Update in Morphology, Genetics, Biomarker, Complications, Imaging Diagnosis and Treatment . Front Physiol . 2019 ; 9 . doi: 10.3389/fphys.2018.01921 OpenUrl CrossRef PubMed 6. ↵ Michelena HI , Desjardins VA , Avierinos JF , et al. Natural History of Asymptomatic Patients With Normally Functioning or Minimally Dysfunctional Bicuspid Aortic Valve in the Community . Circulation . 2008 ; 117 ( 21 ): 2776 – 2784 . doi: 10.1161/CIRCULATIONAHA.107.740878 OpenUrl Abstract / FREE Full Text 7. ↵ Solomon MD , Tabada G , Allen A , Sung SH , Go AS . Large-scale identification of aortic stenosis and its severity using natural language processing on electronic health records . Cardiovasc Digit Health J . 2021 ; 2 ( 3 ): 156 – 163 . doi: 10.1016/j.cvdhj.2021.03.003 OpenUrl CrossRef 8. Fontenla-Seco Y , Lama M , González-Salvado V , Peña-Gil C , Bugarín-Diz A. A framework for the automatic description of healthcare processes in natural language: Application in an aortic stenosis integrated care process . J Biomed Inform . 2022 ; 128 : 104033 . doi: 10.1016/j.jbi.2022.104033 OpenUrl CrossRef 9. Nath C , Albaghdadi MS , Jonnalagadda SR . A Natural Language Processing Tool for Large-Scale Data Extraction from Echocardiography Reports . PLoS One . 2016 ; 11 ( 4 ): e0153749 . doi: 10.1371/journal.pone.0153749 OpenUrl CrossRef PubMed 10. Dong T , Sunderland N , Nightingale A , et al. Development and Evaluation of a Natural Language Processing System for Curating a Trans-Thoracic Echocardiogram (TTE) Database . Bioengineering (Basel) . 2023 ; 10 ( 11 ). doi: 10.3390/bioengineering10111307 OpenUrl CrossRef PubMed 11. Vaid A , Argulian E , Lerakis S , et al. Multi-center retrospective cohort study applying deep learning to electrocardiograms to identify left heart valvular dysfunction . Communications Medicine . 2023 ; 3 ( 1 ): 24 . doi: 10.1038/s43856-023-00240-w OpenUrl CrossRef 12. Strange G , Stewart S , Watts A , Playford D. Enhanced detection of severe aortic stenosis via artificial intelligence: a clinical cohort study . Open Heart . 2023 ; 10 ( 2 ): e002265 . doi: 10.1136/openhrt-2023-002265 OpenUrl Abstract / FREE Full Text 13. ↵ Ueda D , Yamamoto A , Ehara S , et al. Artificial intelligence-based detection of aortic stenosis from chest radiographs . European Heart Journal - Digital Health . 2022 ; 3 ( 1 ): 20 – 28 . doi: 10.1093/ehjdh/ztab102 OpenUrl CrossRef 14. ↵ Patterson O V. , Freiberg MS , Skanderson M , J. Fodeh S , Brandt CA , DuVall SL . Unlocking echocardiogram measurements for heart disease research through natural language processing . BMC Cardiovasc Disord . 2017 ; 17 ( 1 ): 151 . doi: 10.1186/s12872-017-0580-8 OpenUrl CrossRef 15. ↵ Leng C. eHOST Annotation Tool . 2011 . Accessed January 13, 2025 . https://github.com/chrisleng/ehost 16. ↵ Eyre H , Chapman AB , Peterson KS , et al. Launching into clinical space with medspaCy: a new clinical text processing toolkit in Python . AMIA Annu Symp Proc . 2021 ; 2021 : 438 – 447 . OpenUrl 17. ↵ Harkema H , Dowling JN , Thornblade T , Chapman WW . ConText: An algorithm for determining negation, experiencer, and temporal status from clinical reports . J Biomed Inform . 2009 ; 42 ( 5 ): 839 – 851 . doi: 10.1016/j.jbi.2009.05.002 OpenUrl CrossRef PubMed Web of Science 18. ↵ Glotzbach JP , Hanson HA , Tonna JE , et al. Familial Associations of Prevalence and Cause-Specific Mortality for Thoracic Aortic Disease and Bicuspid Aortic Valve in a Large-Population Database . Circulation . 2023 ; 148 ( 8 ): 637 – 647 . doi: 10.1161/CIRCULATIONAHA.122.060439 OpenUrl CrossRef 19. Lim MS , Strange G , Playford D , Stewart S , Celermajer DS . Characteristics of Bicuspid Aortic Valve Disease and Stenosis: The National Echo Database of Australia . J Am Heart Assoc . 2021 ; 10 ( 17 ). doi: 10.1161/JAHA.121.020785 OpenUrl CrossRef 20. Yoon SH , Kim WK , Dhoble A , et al. Bicuspid Aortic Valve Morphology and Outcomes After Transcatheter Aortic Valve Replacement . J Am Coll Cardiol . 2020 ; 76 ( 9 ): 1018 – 1030 . doi: 10.1016/j.jacc.2020.07.005 OpenUrl FREE Full Text 21. ↵ Song S , Seo J , Cho I , Hong GR , Ha JW , Shim CY . Progression and Outcomes of Non-dysfunctional Bicuspid Aortic Valve: Longitudinal Data From a Large Korean Bicuspid Aortic Valve Registry . Front Cardiovasc Med . 2021 ; 7 . doi: 10.3389/fcvm.2020.603323 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted July 02, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Automated detection of bicuspid aortic valve from echocardiographic reports using natural language processing: a large-scale Veterans Affairs study Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Automated detection of bicuspid aortic valve from echocardiographic reports using natural language processing: a large-scale Veterans Affairs study Annie E. Bowles , Julie A. Lynch , Francisca Bermudez , Gabrielle E. Shakt , Tia DiNatale , Kathryn M. Pridgen , Renae L. Judy , Michael G. Levin , Katherine Hartmann , Scott M. Damrauer , Patrick R. Alba medRxiv 2025.06.30.25330573; doi: https://doi.org/10.1101/2025.06.30.25330573 Share This Article: Copy Citation Tools Automated detection of bicuspid aortic valve from echocardiographic reports using natural language processing: a large-scale Veterans Affairs study Annie E. Bowles , Julie A. Lynch , Francisca Bermudez , Gabrielle E. Shakt , Tia DiNatale , Kathryn M. Pridgen , Renae L. Judy , Michael G. Levin , Katherine Hartmann , Scott M. Damrauer , Patrick R. Alba medRxiv 2025.06.30.25330573; doi: https://doi.org/10.1101/2025.06.30.25330573 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Cardiovascular Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15228) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6597) Geriatric Medicine (668) Health Economics (997) Health Informatics (4534) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9230) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a0047ddf1d578650',t:'MTc3OTU0MzgyOA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.