Performance of a Protein Language Model for Variant Annotation in Cardiac Disease

doi:10.1101/2024.06.04.24308460

Performance of a Protein Language Model for Variant Annotation in Cardiac Disease

2024 · doi:10.1101/2024.06.04.24308460

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 34,951 characters · extracted from preprint-html · click to expand

Performance of a Protein Language Model for Variant Annotation in Cardiac Disease | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Performance of a Protein Language Model for Variant Annotation in Cardiac Disease View ORCID Profile Aviram Hochstadt , Chirag Barbhaiya , View ORCID Profile Anthony Aizer , Scott Bernstein , View ORCID Profile Marina Cerrone , Leonid Garber , Douglas Holmes , Robert J Knotts , View ORCID Profile Alex Kushnir , Jacob Martin , View ORCID Profile David Park , Michael Spinelli , Felix Yang , Larry A Chinitz , Lior Jankelson doi: https://doi.org/10.1101/2024.06.04.24308460 Aviram Hochstadt a NYU Langone Health and the NYU Grossman School of Medicine MD, MPH Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Aviram Hochstadt Chirag Barbhaiya a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Anthony Aizer a NYU Langone Health and the NYU Grossman School of Medicine MD, MSc Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Anthony Aizer Scott Bernstein a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Marina Cerrone a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marina Cerrone Leonid Garber a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Douglas Holmes a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Robert J Knotts a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alex Kushnir a NYU Langone Health and the NYU Grossman School of Medicine MD, PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Alex Kushnir Jacob Martin a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site David Park a NYU Langone Health and the NYU Grossman School of Medicine MD, PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for David Park Michael Spinelli a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Felix Yang a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Larry A Chinitz a NYU Langone Health and the NYU Grossman School of Medicine MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lior Jankelson a NYU Langone Health and the NYU Grossman School of Medicine MD, PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: Lior.Jankelson{at}nyulangone.org Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Introduction Genetic testing is a cornerstone in the assessment of many cardiac diseases. However, variants are frequently classified as Variants of Unknown Significance (VUS), limiting the utility of testing. Recently, the DeepMind group (Google, USA) developed AlphaMissense, a unique Artificial Intelligence (AI) based model, based on language model principles for the prediction of missense variant pathogenicity. Objective To report on the performance of AlphaMissense, accessed by VarCardio, an open web-based variant annotation engine, in a real-world cardiovascular genetics center. Methods All genetic variants from an inherited arrhythmia program were examined using AlphaMissense via VarCard.io and compared to the ClinVar variant classification system, as well as another variant classification platform (Franklin by Genoox). The mutation reclassification rate and genotype phenotype concordance were examined for all variants in the study. Results We included 266 patients with heritable cardiac diseases, harboring 339 missense variants. Of those, 230 (67.8%) were classified by ClinVar as either VUS or non-classified. Using VarCard.io, 198 VUSs (86.1%, CI 80.9-90.3%) were reclassified to either Likely Pathgenic (LP) or Likely benign (LB). The reclassification rate was significantly higher for VarCard.io than for Franklin (86.1% vs 34.8%, p<0.001). Genotype-Phenotype concordance was highly aligned using VarCard.io predictions, at 95.9% (CI 92.8-97.9%) concordance rate. For 109 variants classified as Pathogenic, LP, Benign or LB by ClinVar, concordance with VarCard.io was high (90.5%). Conclusion AlphaMissense, accessed via VarCard.io, may be a highly efficient tool for cardiac genetic variant interpretation. The engine’s notable performance in assessing variants that are classified as VUS in ClinVar, demonstrates its potential to enhance cardiac genetic testing. Introduction Genetic testing for monogenic disease has transformed the care of patients affected by a wide spectrum of cardiovascular conditions by enabling the identification of specific mutations which explain individual phenotypes 1 . The enhanced precision that comes with genetic analysis results in improved inference of risk, tailoring of gene-specific targeted therapy (i.e. Nadolol in LQT1), and upstream prevention using cascade screening in family members. In recent cohorts, as many as 50% of patients with genetically linked cardiovascular conditions are found to be positive for a reportable genetic variant. 1 – 3 A major limitation of genetic testing remains the finding of a Variant of Unknown Significance (VUS), occurring in up to 40% of genetic tests. 4 – 7 Recently, the DeepMind group (Google, USA) developed AlphaFold and AlphaMissense 8 , 9 , Artificial Intelligence (AI) based models for the prediction of protein folding and variant pathogenicity, respectively. Here, we report the performance of AlphaMissense, interfaced via VarCard.io 10 , an open web-based variant annotation engine allowing cDNA and protein change queries aligned with Clingen derived gene-disease correlation, and its performance in a real-world cardiovascular genetics center. Methods AlphaMissesne and VarCard.io The development and testing of AlphaMissesne have been reported elsewhere 9 . Briefly, AlphaMissense predicts the pathogenicity of all possible amino acid (AA) changes in all known protein sequences. The model is trained by using an unsupervised learning approach, implementing a language model architecture. In an initial pretraining step, deep complex representations of protein sequence data are built by masking and unmasking amino acids at random along a given sequence. By training the model to predict the masked AAs, the model learns the fundamental properties of the human proteome and the compatibility of any AA in any position. In the second step, the model is fine-tuned on a set of variants which are labeled based on either being highly frequent or completely absent from human and primate populations. To allow easy interaction with AlphaMissense predictions, we built VarCard.io, a web engine that allows query of all missense variants by gene name and cDNA or AA change and extracts the AlphaMissense annotation of each variant as either Likely Benign (LB), Likely Pathogenic (LP) or Ambiguous. In addition, VarCard.io extracts the gene-disease correlation for the queried gene, as assessed by ClinGen. Patients and annotations All patients with missense variants found in probands presenting to the NYU Inherited Arrhythmia program were included. All missense variants found in each patient were included. We compared the annotation of AlphaMissense via VarCard.io to the ClinVar 11 database, as well as to the annotation of an independent commercial classification engine, Franklin 12 (Genoox, Palo Alto, CA, USA). ClinVar annotations were recorded as per ACMG guidelines to be either Pathogenic (P), Likely pathogenic (LP), Variant of unknown significance (VUS), Benign (B) or likely benign (LB). If the variant did not appear on the ClinVar database it was treated in this study as a VUS unless otherwise specified. If a variant had a mixed classification (i.e. more than one entry in ClinVar), it was treated as a VUS if at least one annotation was a VUS. Assessing clinical Genotype-Phenotype relationship All genes in the database were assessed using the ClinGen 13 framework (ClinGen, National Institutes of Health) as to their known gene-disease correlation. Patient records were reviewed, and the clinical phenotype was assessed by using all clinical data available including patient’s history, ECG tracings, provocative tests and imaging (e.g. echocardiography, cardiac MRI, Calcium Pyrophosphate imaging). The genotype-phenotype correlation was considered as positively concordant If: 1) a patient had a variant in a gene with known gene-disease correlation compatible with the designated clinical phenotype for this gene by ClinGen, and 2) the variant was classified as P/LP (positive concordance). Alternatively, negative concordance was determined if: 1) a variant was found in a gene not compatible with the patients’ clinical phenotype by ClinGen, and 2) the variant was classified as B/LB. Otherwise, the genotype-phenotype was considered as discordant (i.e. a patient with a P/LP variant in a gene not compatible with the patients’ phenotype). Patients were excluded from analysis if their phenotype was indeterminable due to missing data. Patients included in the study provided written informed consent before inclusion which was approved by the NYU Langone Health IRB committee in accordance with the Helsinki Declaration. Statistical analysis Discrete variables are reported as numbers and rates, 95% confidence intervals are calculated using the Clopper-Pearson method. The statistical significance of rate differences was assessed using the Chi-Square test. Results were considered significant when p-values were <0.05. All calculations were done using R version 3.3.2 (R Foundation for Statistical Computing, Vienna, Austria). Results We reviewed 623 probands in our original database, of which 266 patients were identified as meeting the inclusion criteria of having at least one genetic variant that was classified as a missense variant in a gene recognized to be associated with a cardiac phenotype by ClinGen ( Figure-1 ). These patients had a total of 339 different genetic variants. The most common phenotype was Long QT syndrome, followed by Non-ischemic Cardiomyopathy and Hypertrophic Cardiomyopathy ( Figure-1 ). The most frequently involved genes were SCN5A (implicated in Brugada syndrome, dilated cardiomyopathy and LQT syndrome), followed by KCNQ1, KCNH2 and RyR2 ( Figure-2 ). Download figure Open in new tab Figure-1 Frequency graph of patients’ most frequent phenotypes in the cohort. LQTS- Long QT syndrome, NICMP- Non-Ischemic Cardiomyopathy, HCM- Hypertrophic Cardiomyopathy. BrS-Brugada Syndrome. CPVT - Catecholaminergic Polymorphic Ventricular Tachycardia. PCCD- Progressive Cardiac Conduction Defect, SQTS- Short QT Syndrome. Download figure Open in new tab Figure-2: Variant frequency and annotation according to A. VarCard.io, B. ClinVar. P- Pathogenic, LP- Likely Pathogenic, B-Benign, LB- Likely Benign, VUS- Variant of Unknown Significance. Of the 339 variants tested in VarCard.io, 328 variants had ClinVar entries, of which 110 (33.5%) variants had a Clinically Significant Variant (CSV) annotation of pathogenicity (i.e. P, LP, B or LB without VUS entries), and the rest were classified as VUS or had mixed classification ( Table-1 ). View this table: View inline View popup Download powerpoint Table-1: Variant classification according to ClinVar. P- Pathogenic, LP- Likely Pathogenic, B-Benign, LB- Likely Benign, CSV- Clinically Significant Variant, VUS- Variant of Unknown Significance. Comparison of ClinVar annotation to VarCard.io For the 230 (67.8%) VUSs (i.e. variants with at least one ClinVar entry as VUS or not reported), 198 variants were reclassified by VarCard.io as either LP or LB, yielding a reclassification rate of 86.1% (CI 80.9-90.3%). These reclassifications were to LB in 111 variants (56.1%, CI 48.8% - 63.1%) and to LP in 87 variants (43.9%, CI 36.9% - 51.2%, Figure-3 ). Only 32 variants (13.9%) had an ambiguous annotation by VarCard.io. Download figure Open in new tab Figure-3: Sankey Diagram showing reclassification of Variant reclassification changes from ClinVar to VarCard.io. VUS – Variant of unknown significance. Of the 109 variants with a CSV ClinVar annotation (i.e. variants without any VUS entry), 105 remained a CSV, and the other four (3.7 %) were annotated as Ambiguous by VarCard.io. The overall agreement rate between ClinVar and VarCard.io on CSVs (i.e. same annotation in both systems) was 90.5% (CI 83.2-95.3%). This agreement rate was similar for 81 variants with ClinVar P/PL annotation and for 24 variants with ClinVar B/LB annotation (91.4% and 87.5%, respectively, p=0.865). VarCard.io genotype-phenotype concordance Of the 303 variants with a CSV annotation by VarCard.io, 269 variants were related to a patient with a defined phenotype. Of these, 258 variants (95.9%, CI 92.8-97.9%) had concordant genotype-phonotype prediction: 138 (96.5%, CI 92-98.9%) with positive concordance and 120 (95.2%, CI 89.9-98.2%) with negative concordance. There were 34 (11.2%) variants in patients without sufficient clinical data to establish a defined phenotype and thus were not included in this analysis. Comparison with another publicly available variant classification engine (Franklin) Using Franklin on the 230 VUS variants appearing in ClinVar resulted in reclassification to either P/LP or B/LB in only 80 variants, giving a reclassification rate of 34.8% (CI 28.6-41.3%), significantly lower than the 86.1% (CI 80.9%-90.3%) VarCard.io reclassification rate (p<0.001). For the 100 variants with CSV annotation in both ClinVar and Franklin, agreement was 100% (CI 96.4-100). Discussion In this work, we aimed to evaluate the performance of AlphaMissense, interfaced through VarCard.io, an open searchable web-based engine for variant pathogenicity prediction, on a real-world cohort of patients with heritable arrhythmia and cardiomyopathy syndromes (Central illustration). Our main findings are: Using VarCard.io, we reclassify 86.1% of VUSs to either likely pathogenic or likely benign classes. This reclassification rate was significantly greater than the reclassification rate of 35% observed in a widely used commercial variant annotation engine (Franklin). Assigned annotations by VarCard.io were highly concordant with the observed clinical phenotypes, with 95.9% genotype-phenotype concordance. We found a 90.5% agreement rate between ClinVar and VarCard.io on variants with pathogenic or benign annotations. A diagnosis of VUS in a gene with cardiac manifestations, occurring in up to 40% of genetic tests 4 – 7 , is a pressing clinical problem which may have direct implications on patients’ risk assessment, specific therapy, possible preventive strategies (such as in PKP2 mutation carriers), family screening opportunities, and patients’ anxiety. 14 , 15 As genetic panels become increasingly available and encompass more genes, the incidence of VUSs is also increasing, stressing the need for reliable and accessible variant annotation strategies. The AlphaMissense variant pathogenicity model has the potential to facilitate the field of genetic testing by significantly enhancing the performance of computational VUS reclassification. The use of AlphaMissense was previously shown to result in the classification of 88.8% of 69.5 million variants present on gnomAD 9 to LP or LB, compatible with our finding of 86.1% reclassification rate for ClinVar-reported VUSs. The major difference in architecture between AlphaMissesne and prevailing popular computational models such as SIFT 16 and REVEL 17 is that these models are largely based on labels of pathogenicity guided by sequence homology between species and prevalence in population databases. This introduces inherent biases, as some relatively rare variants are pathogenic, and many cardiac conditions are characterized by age-dependent penetrance. In addition, traditional supervised AI models are often trained on human-provided labels, thereby preserving a circular logic and bias. The high performance achieved by AlphaMissense is the result of the model’s unique self-supervised architecture, built and trained based on the principals of an LLM. In the case of a language model, each word is represented by a vector (embedding) which captures semantic relationships between words, allowing the model to reason about language, i.e. predict the compatibility of any word in any given context. In the case of AlphaMissense and variant prediction, vectors represent Amino Acids, and their compatibility in each genomic position is computed based on complex properties that capture their “context” within the protein. To further enhance performance, the model is then fine-tuned on a defined set of variants with well-established annotations based on extreme population frequency properties. While our results demonstrate VarCard.io’s utility, it is important to acknowledge certain limiting aspects. As AlphaMissense is an AI model with predictions based on complex, convoluted representations, there is no simple way to derive linear mechanistic explanations for the different predictions and their pathogenesis in the clinical phenotype. In addition, AlphaMissense does not infer on penetrance nor predict how a variant would interact with a patient’s specific multi-omic inventory to create the specific phenotype (i.e. would not predict if a specific SCN5A variant would cause Brugada Syndrome or Long QT Syndrome). Furthermore, assessing clinical concordance of genes and phenotypes might be biased as both a gene’s function and possibly pathogenicity and a patient’s phenotype may be complex and not straightforward to assess for this study’s purposes. Variants may also have incomplete penetrance and age-related penetrance, thus a patient not having a phenotype at a specific time point does not necessarily mean a specific variant is not pathogenic in nature. Finally, as suggested by ACMG, computational predictions are one component in a matrix of criteria recommended for the clinical evaluation of genetic variants. Thus, although the PP3 criterion was recently upgraded from to “supporting” to “moderate” or “strong” 18 , it should still be considered within the context of other features. In conclusion, despite the above limitations, VarCard.io, propelled by AlphaMissense, may be a highly efficient tool for cardiac genetic variant interpretation. The AlphaMissense notable performance in assessing variants that are classified as VUS in ClinVar demonstrates the potential of AI as an important step forward in the field of clinical cardiovascular genetics. Data Availability Data will be made available for researchers after IRB approval for the proposed use of the data. Central illustration Download figure Open in new tab Central Illustration Figure legend Architecture and performance of AlphaMissense and VarCard.io. Letters represent amino acid symbols. The AlphaMissense model is built based on principals of a large language model (LLM), aimed at predicting text. In the first step of training, the model is learning to predict amino acid sequences in a self-supervised manner. This process results in the learning of embeddings, i.e. representations of amino acids conditioned on their context (sequence), which are in turn used to infer on protein folding and amino acid compatibility. Next, the model is fine-tuned on a small subset of protein variants with known pathogenicity based on their population frequencies (i.e. extremely frequent and extremely rare). We then use AlphaMissesnse via the VarCard.io search engine to assess variant annotation in a cohort of patients with inherited arrhythmia and cardiomyopathy. LB-VUS-Variant of unknown significance. LB - Likely Benign, LP Likely Pathogenic. Footnotes Funding: No grants, contracts, and other forms of financial support were given to produce this work. Disclosures: None of the co-authors have any conflicts of interest to declare. Abbreviations VUS Variant of Unknown Significance AI Artificial Intelligence AA Amino Acids LB Likely Benign LP Likely Pathogenic B Benign P Pathogenic CSV Clinically Significant Variant LQTS Long QT syndrome NICMP Non-Ischemic Cardiomyopathy HCM Hypertrophic Cardiomyopathy BrS Brugada Syndrome CPVT Catecholaminergic Polymorphic Ventricular Tachycardia PCCD Progressive Cardiac Conduction Defect SQTS Short QT Syndrome. References 1. ↵ Cirino AL , Harris S , Lakdawala NK , Michels M , Olivotto I , Day SM , Abrams DJ , Charron P , Caleshu C , Semsarian C , Ingles J , Rakowski H , Judge DP , Ho CY . Role of Genetic Testing in Inherited Cardiovascular Disease: A Review . JAMA Cardiology . 2017 ; 2 : 1153 – 1160 . OpenUrl 2. Behere SP , Weindling SN . Inherited arrhythmias: The cardiac channelopathies . Ann Pediatr Cardiol . 2015 ; 8 : 210 – 220 . OpenUrl CrossRef PubMed 3. ↵ Morales A , Hershberger RE . The rationale and timing of molecular genetic testing for dilated cardiomyopathy . Can J Cardiol . 2015 ; 31 : 1309 – 1312 . OpenUrl CrossRef PubMed 4. ↵ Mazzaccara C , Lombardi R , Mirra B , Barretta F , Esposito MV , Uomo F , Caiazza M , Monda E , Losi MA , Limongelli G , D’Argenio V , Frisso G. Next-Generation Sequencing Gene Panels in Inheritable Cardiomyopathies and Channelopathies: Prevalence of Pathogenic Variants and Variants of Unknown Significance in Uncommon Genes . Biomolecules . 2022 ; 12 : 1417 . OpenUrl 5. van Lint FHM , Mook ORF , Alders M , Bikker H , Lekanne dit Deprez RH , Christiaans I. Large next-generation sequencing gene panels in genetic heart disease: yield of pathogenic variants and variants of unknown significance . Neth Heart J . 2019 ; 27 : 304 – 309 . OpenUrl CrossRef PubMed 6. Young WJ , Maung S , Ahmet S , Kirkby C , Ives C , Schilling RJ , Lowe M , Lambiase PD . The frequency of gene variant reclassification and its impact on clinical management in the inherited arrhythmia clinic . Heart Rhythm . 2024 ; 0 . doi: 10.1016/j.hrthm.2024.01.008 . OpenUrl CrossRef 7. ↵ Rehm HL , Alaimo JT , Aradhya S , Bayrak-Toydemir P , Best H , Brandon R , Buchan JG , Chao EC , Chen E , Clifford J , Cohen ASA , Conlin LK , Das S , Davis KW , Gaudio D del , Viso FD , DiVincenzo C , Eisenberg M , Guidugli L , Hammer MB , Harrison SM , Hatchell KE , Dyer LH , Hoang LU , Holt JM , Jobanputra V , Karbassi ID , Kearney HM , Kelly MA , Kelly JM , Kluge ML , Komala T , Kruszka P , Lau L , Lebo MS , Marshall CR , McKnight D , McWalter K , Meng Y , Nagan N , Neckelmann CS , Neerman N , Niu Z , Paolillo VK , Paolucci SA , Perry D , Pesaran T , Radtke K , Rasmussen KJ , Retterer K , Saunders CJ , Spiteri E , Stanley C , Szuto A , Taft RJ , Thiffault I , Thomas BC , Thomas-Wilson A , Thorpe E , Tidwell TJ , Towne MC , Zouk H , Marshall C , Meng L , Jobanputra V , Taft R , Ashley E , Nakouzi G , Shen W , Kingsmore S , Rehm H. The landscape of reported VUS in multi-gene panel and genomic testing: Time for a change . Genetics in Medicine . 2023 ; 25 . doi: 10.1016/j.gim.2023.100947 . OpenUrl CrossRef 8. ↵ Jumper J , Evans R , Pritzel A , Green T , Figurnov M , Ronneberger O , Tunyasuvunakool K , Bates R , Žídek A , Potapenko A , Bridgland A , Meyer C , Kohl SAA , Ballard AJ , Cowie A , Romera-Paredes B , Nikolov S , Jain R , Adler J , Back T , Petersen S , Reiman D , Clancy E , Zielinski M , Steinegger M , Pacholska M , Berghammer T , Bodenstein S , Silver D , Vinyals O , Senior AW , Kavukcuoglu K , Kohli P , Hassabis D. Highly accurate protein structure prediction with AlphaFold . Nature . 2021 ; 596 : 583 – 589 . OpenUrl CrossRef PubMed 9. ↵ Cheng J , Novati G , Pan J , Bycroft C , Ž emgulytė A , Applebaum T , Pritzel A , Wong LH , Zielinski M , Sargeant T , Schneider RG , Senior AW , Jumper J , Hassabis D , Kohli P , Avsec Ž. Accurate proteome-wide missense variant effect prediction with AlphaMissense . Science . 2023 ; 381 : eadg7492 . OpenUrl 10. ↵ VarCard.io - AI Diagnostic Tool . Available at https://www.varcard.io/ . Accessed February 14 , 2024 . 11. ↵ Landrum MJ , Chitipiralla S , Brown GR , Chen C , Gu B , Hart J , Hoffman D , Jang W , Kaur K , Liu C , Lyoshin V , Maddipatla Z , Maiti R , Mitchell J , O’Leary N , Riley GR , Shi W , Zhou G , Schneider V , Maglott D , Holmes JB , Kattman BL . ClinVar: improvements to accessing data . Nucleic Acids Res . 2020 ; 48 : D835 – D844 . OpenUrl CrossRef PubMed 12. ↵ Franklin . Available at https://franklin.genoox.com/clinical-db/home . Accessed February 14 , 2024 . 13. ↵ Rehm HL , Berg JS , Brooks LD , Bustamante CD , Evans JP , Landrum MJ , Ledbetter DH , Maglott DR , Martin CL , Nussbaum RL , Plon SE , Ramos EM , Sherry ST , Watson MS . ClinGen — The Clinical Genome Resource . New England Journal of Medicine . 2015 ; 372 : 2235 – 2242 . OpenUrl CrossRef PubMed 14. ↵ Muller RD , McDonald T , Pope K , Cragun D. Evaluation of Clinical Practices Related to Variants of Uncertain Significance Results in Inherited Cardiac Arrhythmia and Inherited Cardiomyopathy Genes . Circulation: Genomic and Precision Medicine . 2020 ; 13 : e002789 . OpenUrl 15. ↵ Hellwig LD , Biesecker BB , Lewis KL , Biesecker LG , James CA , Klein WMP . Ability of Patients to Distinguish Among Cardiac Genomic Variant Subclassifications . Circulation: Genomic and Precision Medicine . 2018 ; 11 : e001975 . OpenUrl 16. ↵ Ng PC , Henikoff S. SIFT: Predicting amino acid changes that affect protein function . Nucleic Acids Res . 2003 ; 31 : 3812 – 3814 . OpenUrl CrossRef PubMed Web of Science 17. ↵ Ioannidis NM , Rothstein JH , Pejaver V , Middha S , McDonnell SK , Baheti S , Musolf A , Li Q , Holzinger E , Karyadi D , Cannon-Albright LA , Teerlink CC , Stanford JL , Isaacs WB , Xu J , Cooney KA , Lange EM , Schleutker J , Carpten JD , Powell IJ , Cussenot O , Cancel-Tassin G , Giles GG , MacInnis RJ , Maier C , Hsieh C-L , Wiklund F , Catalona WJ , Foulkes WD , Mandal D , Eeles RA , Kote-Jarai Z , Bustamante CD , Schaid DJ , Hastie T , Ostrander EA , Bailey-Wilson JE , Radivojac P , Thibodeau SN , Whittemore AS , Sieh W. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants . The American Journal of Human Genetics . 2016 ; 99 : 877 – 885 . OpenUrl CrossRef PubMed 18. ↵ Pejaver V , Byrne AB , Feng B-J , Pagel KA , Mooney SD , Karchin R , O’Donnell-Luria A , Harrison SM , Tavtigian SV , Greenblatt MS , Biesecker LG , Radivojac P , Brenner SE , ClinGen Sequence Variant Interpretation Working Group. Calibration of computational tools for missense variant pathogenicity classification and ClinGen recommendations for PP3/BP4 criteria . Am J Hum Genet . 2022 ; 109 : 2163 – 2177 . OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted June 05, 2024. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Performance of a Protein Language Model for Variant Annotation in Cardiac Disease Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Performance of a Protein Language Model for Variant Annotation in Cardiac Disease Aviram Hochstadt , Chirag Barbhaiya , Anthony Aizer , Scott Bernstein , Marina Cerrone , Leonid Garber , Douglas Holmes , Robert J Knotts , Alex Kushnir , Jacob Martin , David Park , Michael Spinelli , Felix Yang , Larry A Chinitz , Lior Jankelson medRxiv 2024.06.04.24308460; doi: https://doi.org/10.1101/2024.06.04.24308460 Share This Article: Copy Citation Tools Performance of a Protein Language Model for Variant Annotation in Cardiac Disease Aviram Hochstadt , Chirag Barbhaiya , Anthony Aizer , Scott Bernstein , Marina Cerrone , Leonid Garber , Douglas Holmes , Robert J Knotts , Alex Kushnir , Jacob Martin , David Park , Michael Spinelli , Felix Yang , Larry A Chinitz , Lior Jankelson medRxiv 2024.06.04.24308460; doi: https://doi.org/10.1101/2024.06.04.24308460 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (574) Allergy and Immunology (865) Anesthesia (304) Cardiovascular Medicine (4460) Dentistry and Oral Medicine (445) Dermatology (383) Emergency Medicine (611) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1517) Epidemiology (15251) Forensic Medicine (31) Gastroenterology (1132) Genetic and Genomic Medicine (6621) Geriatric Medicine (669) Health Economics (1002) Health Informatics (4564) Health Policy (1372) Health Systems and Quality Improvement (1617) Hematology (544) HIV/AIDS (1272) Infectious Diseases (except HIV/AIDS) (15938) Intensive Care and Critical Care Medicine (1107) Medical Education (624) Medical Ethics (147) Nephrology (670) Neurology (6642) Nursing (346) Nutrition (1001) Obstetrics and Gynecology (1148) Occupational and Environmental Health (957) Oncology (3350) Ophthalmology (981) Orthopedics (369) Otolaryngology (421) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1698) Pharmacology and Therapeutics (694) Primary Care Research (714) Psychiatry and Clinical Psychology (5464) Public and Global Health (9259) Radiology and Imaging (2212) Rehabilitation Medicine and Physical Therapy (1372) Respiratory Medicine (1198) Rheumatology (598) Sexual and Reproductive Health (716) Sports Medicine (533) Surgery (715) Toxicology (100) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a039399d6bb8300f',t:'MTc4MDA5Njc3Nw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2024) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00