DAVE: how to use explainable AI to interpret missense variants for genome diagnostics based on functional protein modeling

doi:10.1101/2025.11.25.25340947

DAVE: how to use explainable AI to interpret missense variants for genome diagnostics based on functional protein modeling

2025 · doi:10.1101/2025.11.25.25340947

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 52,723 characters · extracted from preprint-html · click to expand

DAVE: how to use explainable AI to interpret missense variants for genome diagnostics based on functional protein modeling | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search DAVE: how to use explainable AI to interpret missense variants for genome diagnostics based on functional protein modeling View ORCID Profile Tim Niemeijer , View ORCID Profile René Mulder , View ORCID Profile Helga Westers , View ORCID Profile Jan D.H. Jongbloed , View ORCID Profile Bart Charbon , View ORCID Profile Birgit Sikkema-Raddatz , View ORCID Profile Lennart F. Johansson , View ORCID Profile Marielle E. van Gijn , View ORCID Profile Cleo C. van Diemen , View ORCID Profile Dennis Hendriksen , View ORCID Profile Kristin M. Abbott , View ORCID Profile W.T. Kars Maassen , View ORCID Profile Morris A. Swertz , View ORCID Profile K. Joeri van der Velde doi: https://doi.org/10.1101/2025.11.25.25340947 Tim Niemeijer 1 Genomics Coordination Center, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tim Niemeijer For correspondence: t.niemeijer{at}umcg.nl René Mulder 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for René Mulder Helga Westers 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Helga Westers Jan D.H. Jongbloed 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jan D.H. Jongbloed Bart Charbon 1 Genomics Coordination Center, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Bart Charbon Birgit Sikkema-Raddatz 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Birgit Sikkema-Raddatz Lennart F. Johansson 1 Genomics Coordination Center, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Lennart F. Johansson Marielle E. van Gijn 3 Department of Human Genetics, Amsterdam University Medical Centres , Van der Boechorststraat 6a, 1081 BT, Amsterdam, the Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marielle E. van Gijn Cleo C. van Diemen 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Cleo C. van Diemen Dennis Hendriksen 1 Genomics Coordination Center, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Dennis Hendriksen Kristin M. Abbott 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kristin M. Abbott W.T. Kars Maassen 1 Genomics Coordination Center, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for W.T. Kars Maassen Morris A. Swertz 1 Genomics Coordination Center, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Morris A. Swertz K. Joeri van der Velde 1 Genomics Coordination Center, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands 2 Department of Genetics, University of Groningen and University Medical Center Groningen , Antonius Deusinglaan 1, 9713 AV, Groningen, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for K. Joeri van der Velde Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Diagnostic yield in NGS genome diagnostics is constraint by the high fraction of variants of uncertain significance (VUS), in large part due to insufficient interpretability of missense variation. Existing pathogenicity predictors offer strong performance, but often produce an unexplainable score lacking mechanistic insight. Here, we present the Digital Approxima-tion of Variant Effects (MOLGENIS DAVE), an explainable missense variant predictor built on 12 biophysically grounded features spanning stability, hydrophobicity, electrostatics, and molecular interactions. Trained on curated Dutch diagnostic data, DAVE reliably classifies and breaks down predictions into interpretable feature contributions. With a focus on ex-plainability, this framework aims to alleviate the VUS burden, advances clinically actionable variant interpretation and enables mechanistic follow-up. All source code used to process and integrate the data, along with the data required to reproduce all annotations and anal-yses, is available at https://github.com/molgenis/dave . 1 Introduction 1.1 Missense is still a bottleneck in genome diagnostics Despite decades of active research, the diagnostic yield of next-generation sequencing (NGS) in genome diagnostic analysis rarely exceeds 35% 1 – 6 . A leading cause is the large fraction of DNA variants classified as variants of uncertain significance (VUS) 7 . Particularly missense variants are notoriously difficult to interpret as a result of their subtle and context-dependent effects on protein function. According to established clinical guidelines 8 , determining whether a missense variant is pathogenic requires a combination of multiple strong lines of evidence and supporting data. However, these criteria are often only partially met, leading to uncertainty in classification 9 , 10 . Additional strong evidence from follow-up functional studies can resolve VUS by directly assessing the mechanistic consequence of individual variants. Yet, such studies are laborious, variant specific, and given the sheer volume of variants identified in NGS genome diagnostics, often impractical 11 . 1.2 Most predictors lack interpretable insights To aid in prioritizing and classifying missense variants, a multitude of pathogenicity predictors have been developed 12 . These predictors employ a range of approaches, including evolutionary conservation 13 , allele frequency data 14 , ensemble learning methods 15 , and training labels de-rived from evolutionary 16 or clinical sources 17 . Other strategies focus on the impact of variants on protein domains 18 or structural features 19 , while some emphasize visualization to aid inter-pretation 20 . Currently, there are over 400 such predictors 21 . And while the field has started to suffer from data leakage 22 , imbalance 23 , bias, and circularity 24 , 25 , few predictors have at-tempted to combine genomics with less biased physical modeling 26 .The few methods that do, lack explainability 27 – 30 or are highly disease specific 31 , 32 . AlphaFold 33 , 34 represents a major breakthrough in our ability to simulate the molecular effects of amino acid substitutions by dramatically increasing the number of high-confidence protein structure models across the human proteome 35 . An unbiased and high-performance missense pathogenicity predictor based on AlphaFold protein models along with evolutionary data and contextual features is AlphaMissense 36 . However, like most predictors, it produces only numer-ical predictions without explanations. This lack of interpretability limits our ability to investigate the underlying causal mechanisms, which is essential for translating computational predictions into clinically actionable insights and to guide functional laboratory experiments. 1.3 Using explainable AI to interpret pathogenicity predictions We hypothesize that the gap between traditional predictive modeling and mechanistic evidence can be bridged by breaking down prediction output into comprehensible feature contributions that allow further functional interpretation. This requires selecting biologically relevant features for the predictor, derived from protein stability, hydrophobicity, electrostatics, and interaction with ligands, DNA, RNA, and other proteins, combined with insight into the model’s decision making provided by SHapley Additive exPlanations (SHAP) 37 that quantify the contribution of each feature to individual predictions. The result can guide targeted digital or experimental follow-up tests to confirm or refute pathogenicity and help prioritize missense variants of in-terest. In this manuscript, we describe how we tested this hypothesis by developing Digital Approximation of Variant Effects, part of the MOLGENIS software family (MOLGENIS DAVE) to predict pathogenicity on carefully selected functionally relevant features, trained on pathogenic and benign variants from accredited Dutch genome diagnostic laboratories. We applied this model to the VUS of this dataset and compared the predictions with a more recent release of the same dataset and classifications in ClinVar 38 . Lastly, we break down the selected predic-tions and highlight three examples in which explainability assists in their mechanistic evalua-tion. 2 Results 2.1 The DAVE prediction model Functional features were computed for a selection of missense variants from the April 2024 re-lease of fully de-identified and publicly accessible variant classifications provided by all Dutch genome diagnostic laboratories through the Datashare working group of the Dutch Clinical Ge-netics Laboratory Society (VKGL) 39 . From these, we selected 12 biophysically grounded features from five complementary sources: P2Rank 40 , FoldX 5 41 , GLM-Score 42 , Peptides R package 43 and GeoNet 44 , as summarized in table 1 . The DAVE model was trained on the feature differ-ences (i.e., delta) between wild-type and protein structures containing an amino acid substitu-tion caused by benign (LB/B) and pathogenic (LP/P) missense variants split into a train and test dataset. We determined an ROC AUC of 86% on the test set and a probability of 0.286 as the most effective binary classification threshold to identify pathogenic variants. View this table: View inline View popup Download powerpoint Table 1: The 12 features used for training the DAVE model. References to tools and structures are provided within the table for additional information. 2.2 Retrospective classification of VUS The trained DAVE model was applied on VUS missense variants from the VKGL April 2024 data release. Using the binary classification threshold, 3,801 variants were characterized as pathogenic and 7,420 as benign. To compare DAVE’s classification performance with real-world reclassification of VUS, we used a more recent release of the VKGL dataset and variant classifi-cations from ClinVar. In July 2025, compared to April 2024, only 12 VUS had been reclassified as likely pathogenic/pathogenic or likely benign/benign on the VKGL data-sharing platform. These variants are listed in table 2 . Of these, nine DAVE predictions were in concordance with the re-classification, of which four true positives (i.e., correctly identified as pathogenic) and five true negatives (i.e., correctly identified as benign). Two were false positives (i.e., incorrectly predicted as pathogenic), and one was false negative (i.e., incorrectly predicted as benign). Additionally, cross-referencing of the VKGL VUS with ClinVar revealed that 478 variants were classified as be-nign/likely benign, while 176 were considered likely pathogenic/pathogenic. Figure 1 illustrates the distribution of DAVE pathogenicity probabilities and the corresponding reclassification la-bels from VKGL and ClinVar. Download figure Open in new tab Figure 1: Box plots of DAVE pathogenicity probabilities for 654 ClinVar and 12 VKGL retrospectively classified variants. These variants were VUS in VKGL release April 2024 and have since been reclassified, categorized by classification label (LB/LP) and classification source. View this table: View inline View popup Download powerpoint Table 2: The 12 VKGL VUS missense variants that have received an updated classification in a more recent release of the VKGL dataset. These variants were VUS in VKGL release April 2024 and have since been reclassified by the VKGL in release July 2025. DAVE pathogenicity probabilities and verdict, based on a threshold of 0.286, are shown. Outcomes: TN = True Negative, FN = False Negative, TP = True Positive, FP = False Positive. 2.3 Functional interpretation To demonstrate how DAVE can guide interpretation of functional consequences, we performed a more detailed analysis of three VUS that were reclassified in the July 2025 VKGL data-sharing release compared to the April 2024 release or had a different classification in ClinVar. These analyses are enabled by DAVE’s ability to break down the final pathogenicity probability into separate feature contributions as SHAP values 37 , capturing interactions unique to each pre-diction. SHAP values of contributing features, including their delta and unit, are shown in a cumulative plot per variant. ChimeraX 47 was used to visualize the predicted effects on protein structures. 2.3.1 WNT7B T111P The variant in WNT7B NM_058238.3:c.331A>C causes the amino acid substitution Thr111Pro in the Wnt-7b protein. It was selected for having the highest absolute folding energy change of the variants with an updated classification in the VKGL dataset in table 2 ). It is predicted to have a substantial increase in ΔG folding energy, see figure 2 . A higher ΔG causes protein folding to occur less spontaneous, affecting correct folding and stability. Molecular visualization shows a change in the hydrogen bond integral in the secondary structures and their connecting elements ( figure 3 ). The impact on protein structural integrity can be a good mechanistic explanation of the effect of this missense variant. Download figure Open in new tab Figure 2: DAVE decision plot illustrating the predicted impact of the T111P amino acid substitution resulting from a variant in WNT7B . Features are on the y-axis, starting with a baseline pathogenicity probability, followed by contributing features ranked by their impact. Cumulative SHAP probability contributions are shown with arrows on the x-axis, ending on the final pathogenicity probability at the bottom. Download figure Open in new tab Figure 3: Structural comparison of wild-type protein encoded by WNT7B to the variant protein with a T111P substitution. The complete protein wild-type structure is shown on top with, indicated with the red box are the affected region of the variant. Cropped images of the indicated region are shown at the bottom left and at the bottom right. The bottom left showing the wild-type structure. At the bottom right a cropped image of wild-type is shown with the T111P structure overlay in red. Hydrogen bonds are indicated with blue dotted lines with changes highlighted with solid lines. 2.3.2 SLCO2A1 G554R The SLCO2A1 variant NM_005630.2:c.1660G>A, causes the amino acid substitution Gly554Arg in the solute carrier organic anion transporter family member 1A2 protein. It was selected for having the largest absolute change in solvent-accessible surface (SAS) points within its top-ranking ligand-binding pocket. Although currently still classified as a VUS in the VKGL dataset, it is reported as pathogenic in ClinVar 48 . DAVE predicted the variant to be pathogenic, see figure 4 ). The predicted differences in the electrostatic surface potential of the ligand-binding pocket in the simulated structure of the G554R versus wild-type protein, are shown in figure 5 . Moreover, recent functional characterization of variants in the transmembrane domain of SLCO2A1 replacing glycine with residues with larger side chains underscored this, as it revealed a loss of helical dynamics, accompanied by potential disruption of the helical packing 49 . Download figure Open in new tab Figure 4: DAVE decision plot illustrating the predicted impact of the G554R amino acid substitution resulting from a variant in SLCO2A1 . Features are on the y-axis, starting with a baseline pathogenicity probability, followed by contributing features ranked by their impact. Cumulative SHAP probability contributions are shown with arrows on the x-axis, ending on the final pathogenicity probability at the bottom. Download figure Open in new tab Figure 5: Structural comparison of wild-type protein encoded by SLCO2A1 to the protein with the G554R substitution. Top: wild-type protein is displayed in white, with pink, light-blue, orange and yellow accents given to the predicted ligand binding pockets. The region of interest is indicated with the red box. Bottom: the Coulombic electrostatic surface potential is shown for region of interest, where red indicates negative potential and blue is positive potential. 2.3.3 NKX2-5 L153P The variant NKX2-5 NM_004387.3:c.458T>C, causes the amino acid change, Leu153Pro in the homeobox Nkx-2.5 protein. It was selected for having largest absolute change in protein binding site residues. This variant is currently classified as VUS in the VKGL dataset while reported as pathogenic in ClinVar. DAVE predicts that this variant is pathogenic, see figure 6 . The protein encoded by NKX2-5 is a transcription factor, and the substitution L153P, lies in an important homeodomain (AA 138-197) responsible for the binding of DNA as well as other transcription factors 50 . Interestingly, despite the location of the variant, the most contributing feature to the predicted pathogenicity of this variant is not the change in binding site residues, but the change in folding energy ΔG. The predicted structural change of the L153P variant protein compared to the wild-type is visualized in figure 7 . Download figure Open in new tab Figure 6: DAVE decision plot illustrating the predicted impact of the L153 amino acid substitution resulting from a variant in NKX2-5 . Features are on the y-axis, starting with a baseline pathogenicity probability, followed by contributing features ranked by their impact. Cumulative SHAP probability contributions are shown with arrows on the x-axis, ending on the final pathogenicity probability at the bottom. Download figure Open in new tab Figure 7: Structural comparison of the protein with the L153P substitution to the wild-type protein encoded by the NKX2-5 gene. Left: Homeodomain (HD) interacting with ANF-242 DNA sequence. Right bottom: Highlighted in the red box, the location of the HD in the protein encoded by NKX2-5 . Right top: HD where L153P structure changes are shown in red. 3 Discussion 3.1 DAVE successfully prioritizes physical explainability In this study, we developed MOLGENIS DAVE, a supervised learning model that predicts pathogenic-ity of missense variants and provides interpretable insights via contributions of functionally relevant features based on protein modeling. Unlike methods such as AlphaMissense 7 and REVEL 15 , DAVE prioritizes physical explainability over predictive power, providing unique and complementary evidence for variant classification and guidance for targeted experimental val-idation of VUS either in dry or wet laboratory. 3.2 Evaluation of features in the DAVE model The selected features of DAVE represent multiple disease-causing mechanisms 51 and thus pro-vide a robust starting point for interpretation. However, no single model can fully capture the complexity and unique biology of all proteins. Manual follow-up will remain essential to ac-curately determine the molecular consequences of prioritized variants. By integrating more diverse and context-dependent features, future versions of DAVE could better capture the nu-anced mechanisms underlying variant effects, ultimately improving both accuracy and inter-pretability across a broader range of proteins and variant types. For instance, variants that have a gain-of-function or dominant-negative consequence have different, often milder, molec-ular mechanisms compared to loss-of-function variants 52 , 53 . Other examples of such effects include allostery, post-translational modifications (PTMs), toxicity, structural flexibility, folding rate, kinetic effects, aggregation propensity, membrane association, immunogenicity, and co-operativity. 3.3 Limitations in current protein structure Some AlphaFold structures are predicted with low confidence, particularly for disordered regions or poorly conserved domains 54 , 55 . In these regions, predictive accuracy can be significantly lower, especially if models rely heavily on structural data 56 . Another limitation of current mod-els is that they represent the protein without their biological context, and that models larger than 2700 residues (<2% of all models) are chunked into smaller fragments due to computational limitations, making them unsuitable for meaningful predictions. To increase applicability across diverse protein types and missense effect types, structure confidence metrics and more biologi-cal context should be integrated to refine functional predictions. We expect that future versions of AlphaFold and similar efforts will produce unfragmented models for all proteins. 3.4 Overcoming technical barriers for broad applicability While DAVE could certainly be configured to run as part of DNA interpretation pipelines, cur-rently it is not offered an easy to install standalone product. The reasons are the integration with external software, one of which used under academic personal license, as well as a prohibitive computational burden, averaging around 20 minutes per variant on commodity hardware. This hinders its applicability in clinical genetics workflows where rapid turnaround time is essential. For future versions, we will prioritize improving computational efficiency and ensuring permis-sive licensing of underlying methods. 3.5 Conclusion and future perspectives In summary, DAVE combines predictive accuracy with meaningful information on how its pre-dictions are made. By complementing numerical predictions with mechanistic insights, we demonstrate the potential to transform in silico variant effect predictions into interpretable molecularly grounded explanations. The insights provide a practical route to generate testable hypotheses for the resolution of VUS in functional follow-up studies, thereby bridging the gap between prediction, explanation, and clinical utility. Looking ahead, we envision a framework that integrates DAVE with complementary evidence such as population frequency, conservation, inheritance, and phenotypes as well as specialized tools that cover effects on transcription and translation to systematically classify missense variants and reduce VUS. The importance of us-ing an ensemble of dedicated tools to cover specific effects is exemplified by our false negative prediction for CACNA1A Leu622Gln, see table 2 . This variant was classified in diagnostics as likely pathogenic due to a predicted impact on splicing, which explains why DAVE did not pre-dict a significant pathogenic effect at the protein level. Integrating DAVE into pipelines such as MOLGENIS VIP 57 would enable the inclusion of these complementary tools within a scalable and modular infrastructure for variant interpretation. Such an integrative approach enables large scale reanalysis efforts, to prioritize variants for experimental validation, and ultimately accelerate the route from variant discovery to informed clinical decision making. 4 Methods 4.1 Datasets The main resource is the fully de-identified and publicly accessible DNA variant classifications dataset collected in April 2024 by the Datashare working group of the nine genome diagnos-tic labs in the Netherlands, organized in the VKGL (Vereniging Klinisch Genetische Laborato-riumdiagnostiek) 39 . We downloaded the GRCh38 liftover version from the MOLGENIS VIP 57 resources. If variants within the same gene with the same protein change had differences in classification, they were marked as conflicting. The July 2025 version of the VKGL release, was collected similarly to the April 2024 release. For protein structures, we used pdb’s from the Al-phaFold v4 release. AlphaFold structures are represented by UniProt 58 and mappings to HGNC gene symbols are provided. The GRCh38 VCF file for ClinVar 38 release 2025-09-23 was used as-is by matching on chromosome, position, reference and alternative allele. 4.2 Data processing The VKGL variant dataset was annotated with Ensembl VEP 59 version 112. Based on protein localization data from the Protein Atlas 60 we randomly selected 1007 intracellular, 1028 mem-brane, and 692 secreted proteins for which we also had genomic variants in the dataset. This selection of proteins was used to subset the VKGL variants to balance the data on localization. Additionally, we annotated whether chaperones were involved in the folding of these proteins, for this we used a dataset of 194 manually curated chaperones 61 and expanded these genes with UniProt/SwissProt IDs using Ensembl BioMart 62 . These known chaperones were connected to their interaction protein partners using BioGRID 63 . We applied the following five protein analysis tools on the selected variants, P2Rank 40 , FoldX 5 41 , GLM-Score 42 , Peptides R package 43 , and GeoNet 44 . Complete data processing was suc-cessful for 23,417 variants, of which the classifications, localizations, and chaperoned folding characteristics are shown in table 3 . View this table: View inline View popup Download powerpoint Table 3: D a ta used to train, test and apply the DAVE model. Each number represents a set of DNA variants randomly sampled from the VKGL release of April 2024, though equalized across protein localization and folding pathway. Benign includes ’likely benign’ and ’benign’, Pathogenic includes ’likely pathogenic’ and ’pathogenic’. VUS are variants of uncertain signifi-cance. 4.3 Feature engineering To obtain the most informative and independently contributing set of features, the set was min-imized by removing correlations and selecting the most informative and explainable features. Among the remaining features, pairwise correlations were minimal, with the exception of corre-lation between ’Net electric charge at pH 7’ and ’Isoelectric point’. Despite this, both features were deemed valuable to retain due to their individual relevance. The R function ’cor’ from the stats (r-core) package was used to calculate the correlation of features as the Pearson correla-tion coefficient (PCC), shown in figure 8 . Download figure Open in new tab Figure 8: Feature correlation in the DAVE training data . Correlation was calculated as the Pearson correlation coefficient. 4.4 Model training A subset of the full annotated dataset, consisting of 12,048 variants classified as likely pathogenic (LP) or likely benign (LB), was split into training and testing sets using an 80/20 ratio. The DAVE model was implemented using the randomForest package in R (version 4.7-1.1) 64 using the stan-dard Random Forest algorithm with default parameters. To determine the contribution of each selected feature to individual predictions, SHAP values were calculated using the R package fastshap 65 (version 0.1.1) with 10-fold Monte Carlo sampling. It is important to note that SHAP values do not correlate with feature values, but instead capture feature contributions based on the interactions among all features uniquely for this prediction. 4.5 Optimal classification threshold For binary classification, the VKGL April 2024 ’test’ set was used to determine the optimal threshold with the R package cutpointr 66 (version 1.1.2). From this, a threshold of 0.286 was established as the cutoff point using Youden’s J statistic 67 expressed as: 4.6 Retrospective comparison To evaluate the utility of DAVE in the reclassification of the 11,221 variants of uncertain signifi-cance (VUS), we compared variant classifications between the April 2024 and July 2025 VKGL datasets. Additionally, we compared variants classified as VUS in the VKGL that had a B/LB or LP/P classification in ClinVar (September 23, 2025, with at least a one star review status and no conflicting classifications) with the predictions of DAVE. For variant specific structural comparison, we substituted wild-type amino acid with the variant in the AlphaFold 33 structure .pdbs using FoldX5 41 with the BuildModel and RepairPDB command. These structures were then visualized with ChimeraX 47 and compared with wild-type structures. Declarations Ethics approval and consent to participate Not applicable. Consent for publication Not applicable. Availability of data and materials The April 2024 VKGL public datashare variants were downloaded from https://download.m olgeniscloud.org/downloads/vip/resources/GRCh38/vkgl_consensus_20240401.tsv the original GRCh37 version is available via https://vkgl.molgeniscloud.org . Protein localization data were downloaded from: https://www.proteinatlas.org/about/download Chaperone interaction datasets were downloaded from the supplement of Shemesh et al, from https://static-content.springer.com/esm/art%3A10.1038%2Fs41467-021-22369-9/MediaObjects/41467_2021_22369_MOESM3_ESM.xlsx and BIOGRID ORGANISM 4.4.234 tab3 from https://downloads.thebiogrid.org/BioGRID/Release-Archive/BIOGRID-4.4.234/ AlphaFold (version 4) structures were downloaded from: https://ftp.ebi.ac.uk/pub/da tabases/alphafold/v4/UP000005640_9606_HUMAN_v4.tar All training/testing data and predictions of the reinterpreted VUS are available at https://github.com/molgenis/DAVE . The DAVE results for VKGL VUS data are also available through an interactive dashboard at https://dave.molgeniscloud.org/ . Availability of source code All code is available as free and open source software under the GNU Lesser General Public License (LGPL) v3.0 at https://github.com/molgenis/dave . P2Rank 40 is available at https://github.com/rdk/p2rank . FoldX 5 41 is available under academic license at https://foldxsuite.crg.eu/ . GLM-Score 42 is available at https://github.com/Klab-Bioinfo-Tools/GLM-Score . Peptides R package 43 is available at https://cran.r-project.org/web/packages/ Peptides. GeoNet 44 , the version we used is available at https://github.com/joerivandervelde/GeoNet . Competing interests The authors declare no conflicts of interest. Funding This research was supported by the ERDERA project (grant agreement No. 101156595), which has received funding from the European Union’s Horizon Europe research and innovation pro-gramme, and The Netherlands Organisation for Scientific Research NWO under VIDI grant num-ber 917.164.455. Author contributions T.N., R.M., H.W., J.D.H.J. and K.J.V. conceived the project. T.N. performed the functional in-terpretation. K.J.V. performed the data processing. T.N. and K.J.V. wrote the manuscript with critical input and revisions from R.M., H.W., J.D.H.J., B.C., B.S.R., L.F.J., M.E.G., C.C.D., D.H., K.M.A., W.T.K.M., and M.A.S. All authors have reviewed and approved the manuscript. Data Availability All data produced are available online at https://github.com/molgenis/dave https://download.molgeniscloud.org/downloads/vip/resources/GRCh38/vkgl_consensus_20240401.tsv https://vkgl.molgeniscloud.org https://www.proteinatlas.org/about/download https://static-content.springer.com/esm/art%3A10.1038%2Fs41467-021-22369-9/MediaObjects/41467_2021_22369_MOESM3_ESM.xlsx https://downloads.thebiogrid.org/BioGRID/Release-Archive/BIOGRID-4.4.234/ https://ftp.ebi.ac.uk/pub/databases/alphafold/v4/UP000005640_9606_HUMAN_v4.tar Acknowledgements We would like to thank our colleagues at Genomics Coordination Center for engineering support and access to the UMCG high performance compute cluster. We are grateful to our colleagues from all Dutch genome diagnostic laboratories for their continuing efforts to share variant clas-sifications through the VKGL Datashare working group. We would also like to thank our previous and current heads of department Prof. Nine Knoers and Prof. Anke-Hilse Maitland-van der Zee as well as the UMCG Board of Directors, in particular Prof. Stephanie Klein Nagelvoort Schuit and Prof. Wiro Niessen for their encouragement and support to develop and use new AI meth-ods at the University Medical Center Groningen. Lastly, we would like to express our gratitude to the exquAIro AI bootcamp organization and board members Prof. Gerard Koppelman, Dr. Martin Smit, Ilya Petoukhov, Dr. Marnix Bügel, as well as Geerte Koster of REWIRE and Dr. Kai Yu Ma of UMCG for their training, coaching and valuable input. Footnotes ↵ † k.j.van.der.velde{at}umcg.nl References [1]. ↵ Abul-Husn , N. S. et al. Genetics in Medicine 2023 , [2]. Pandey , R. ; Brennan , N. F. ; Trachana , K. ; Katsandres , S. ; Bodamer , O. ; Belmont , J. ; Veen-stra , D. L. ; Peng , S . Genetics in Medicine 2025 , 27 , 101398 . [3]. Oud , M. S. ; de Leeuw , N. ; Smeets , D. F. C. M. ; Ramos , L. ; van der Heijden , G. W. ; Timmermans , R. G. J. ; van de Vorst , M. ; Hofste , T. ; Kempers , M. J. E. ; Stokman , M. F. ; D’Hauwers , K. W. M. ; Faas , B. H. W. ; Westra , D . Andrology 2024 , 13 , 1078 – 1092 . OpenUrl PubMed [4]. Henkel , J. ; Laner , A. ; Locher , M. ; Wohlfrom , T. ; Neitzel , B. ; Becker , K. ; Neuhann , T. ; Abicht , A. ; Steinke-Lange , V. ; Holinski-Feder , E . European Journal of Human Genetics 2023 , 31 , 925 – 930 . OpenUrl PubMed [5]. Neuens , S. et al. European Journal of Medical Genetics 2025 , 76 , 105030 . [6]. ↵ Earle , N. J. ; Winbo , A. ; Crawford , J. ; Wheeler , M. ; Stiles , R. ; Donoghue , T. ; Stiles , M. K. ; Hayes , I. ; Marcondes , L. ; Martin , A. ; Skinner , J. R . Circulation: Heart Failure 2024 , 17 . [7]. ↵ Chen , E. ; Facio , F. M. ; Aradhya , K. W. ; Rojahn , S. ; Hatchell , K. E. ; Aguilar , S. ; Ouyang , K. ; Saitta , S. ; Hanson-Kwan , A. K. ; Capurro , N. N. ; Takamine , E. ; Jamuar , S. S. ; McKnight , D. ; Johnson , B. ; Aradhya, S. JAMA Network Open 2023 , 6e, e2339571. 8. ↵ Richards , S. ; Aziz , N. ; Bale , S. ; Bick , D. ; Das , S. ; Gastier-Foster , J. ; Grody , W. W. ; Hegde , M. ; Lyon , E. ; Spector , E. ; Voelkerding , K. ; Rehm , H. L . Genetics in Medicine 2015 , 17 , 405 – 424 . OpenUrl CrossRef PubMed [9]. ↵ Palafox , M. F. ; Boatner , L. ; Wilde , B. R. ; Christofk , H. ; Backus , K. M. ; Arboleda , V. A . The American Journal of Human Genetics 2025 , 112 , 1649 – 1663 . OpenUrl PubMed [10]. ↵ Ramadane-Morchadi , L. ; Rotenberg , N. ; Esteban-Sánchez , A. ; Fortuno , C. ; Gómez-Sanz , A. ; Varga , M. J. ; Chamberlin , A. ; Richardson , M. E. ; Michailidou , K. ; Pérez-Segura , P. ; Spur-dle , A. B. ; de la Hoya , M . The American Journal of Human Genetics 2025 , 112 , 993 – 1002 . OpenUrl CrossRef PubMed [11]. ↵ Anderson , C. L. ; Munawar , S. ; Reilly , L. ; Kamp , T. J. ; January , C. T. ; Delisle , B. P. ; Eckhardt , L. L . Frontiers in Cardiovascular Medicine 2022 , 9 . [12]. ↵ Hu , Z. ; Yu , C. ; Furutsuki , M. ; Andreoletti , G. ; Ly , M. ; Hoskins , R. ; Adhikari , A. N. ; Brenner , S. E . Human Mutation 2019 , 40 , 1202 – 1214 . OpenUrl CrossRef PubMed [13]. ↵ Ng , P. C . Nucleic Acids Research 2003 , 31 , 3812 – 3814 . OpenUrl CrossRef PubMed Web of Science [14]. ↵ Wu , Y. ; Liu , H. ; Li , R. ; Sun , S. ; Weile , J. ; Roth , F. P . The American Journal of Human Genetics 2021 , 108 , 1891 – 1906 . OpenUrl CrossRef PubMed [15]. ↵ Ioannidis , N. M. et al. The American Journal of Human Genetics 2016 , 99 , 877 – 885 . OpenUrl CrossRef PubMed [16]. ↵ Schubach , M. ; Maass , T. ; Nazaretyan , L. ; Röner , S. ; Kircher, M. Nucleic Acids Research 2024 , 52, D1143–D1154. [17]. ↵ Rogers , M. F. ; Shihab , H. A. ; Mort , M. ; Cooper , D. N. ; Gaunt , T. R. ; Campbell , C . Bioinformatics 2017 , 34 , 511 – 513 . OpenUrl [18]. ↵ Wiel , L. ; Baakman , C. ; Gilissen , D. ; Veltman , J. A. ; Vriend , G. ; Gilissen, C . Human Mutation 2019 , [19]. ↵ Ittisoponpisan , S. ; Islam , S. A. ; Khanna , T. ; Alhuzimi , E. ; David , A. ; Sternberg , M. J . Journal of Molecular Biology 2019 , 431 , 2197 – 2212 . OpenUrl CrossRef PubMed [20]. ↵ Sierk , M. ; Ratnayake , S. ; Wagle , M. M. ; Chen , B. ; Park , B. ; Wang , J. ; Youkharibache , P. ; Meerzaman , D . BMC Bioinformatics 2023 , 24 . [21]. ↵ Lin , Y.-J. ; Menon , A. S. ; Hu , Z. ; Brenner , S. E . Human Genomics 2024 , 18 . [22]. ↵ Bernett , J. ; Blumenthal , D. B. ; Grimm , D. G. ; Haselbeck , F. ; Joeres , R. ; Kalinina , O. V. ; List , M . Nature Methods 2024 , 21 , 1444 – 1453 . OpenUrl PubMed [23]. ↵ Rastogi , R. et al. Human Genetics 2025 , 144 , 281 – 293 . OpenUrl CrossRef PubMed [24]. ↵ Grimm , D. G. ; Azencott , C. ; Aicheler , F. ; Gieraths , U. ; MacArthur , D. G. ; Samocha , K. E. ; Cooper , D. N. ; Stenson , P. D. ; Daly , M. J. ; Smoller , J. W. ; Duncan , L. E. ; Borgwardt , K. M . Human Mutation 2015 , 36 , 513 – 523 . OpenUrl CrossRef PubMed [25]. ↵ Heo , J. Y. ; Kim , J. H . BMC Genomics 2025 , 26 . [26]. ↵ Hollingsworth , S. A. ; Dror , R. O . Neuron 2018 , 99 , 1129 – 1143 . OpenUrl CrossRef PubMed [27]. ↵ Banerjee , A. ; Saha , S. ; Tvedt , N. C. ; Yang , L.-W. ; Bahar , I . Current Opinion in Structural Biology 2023 , 78 , 102517 . [28]. Banerjee , A. ; Bogetti , A. T. ; Bahar , I . Proceedings of the National Academy of Sciences 2025 , 122 . [29]. Lai , J. ; Yang , J. ; Gamsiz Uzun , E. D. ; Rubenstein , B. M. ; Sarkar , I. N . Bioinformatics Advances 2021 , 2 . [30]. ↵ Zhao , H. et al. Cell Reports Methods 2024 , 4 , 100687 . [31]. ↵ Wang , J. ; Yang , M. ; Zong , C. ; Li , Y. ; Verkhivker , G. ; Xiao , F. ; Hu , G . Journal of Chemical Information and Modeling 2025 , 65 , 8375 – 8384 . OpenUrl PubMed [32]. ↵ KA, H.; Naghinejad , M. ; Amirfiroozy , A. ; Shamsir , M. S. ; Parvizpour , S. ; Razmara , J . Journal of Human Genetics 2025 , 70 , 341 – 348 . OpenUrl PubMed [33]. ↵ Jumper , J. et al. Nature 2021 , 596 , 583 – 589 . OpenUrl CrossRef PubMed [34]. ↵ Abramson , J. et al. Nature 2024 , 630 , 493 – 500 . OpenUrl CrossRef PubMed [35]. ↵ Porta-Pardo , E. ; Ruiz-Serra , V. ; Valentini , S. ; Valencia, A. PLOS Computational Biology 2022 , 18 , e1009818. [36]. ↵ Cheng , J. et al. Science 2023 , 381 . [37]. ↵ Lundberg , S. M. ; Lee , S.-I . A unified approach to interpreting model predictions. Proceed-ings of the 31st International Conference on Neural Information Processing Systems. Red Hook, NY, USA , 2017; p 4768 – 4777 . [38]. ↵ Landrum , M. J. ; Lee , J. M. ; Riley , G. R. ; Jang , W. ; Rubinstein , W. S. ; Church , D. M. ; Ma-glott, D. R. Nucleic Acids Research 2013 , 42 , D 980 – D985 . OpenUrl [39]. ↵ Fokkema , I. F. A. C. et al. Human Mutation 2019 , 40 , 2230 – 2238 . OpenUrl PubMed [40]. ↵ Krivák , R. ; Hoksza , D . Journal of Cheminformatics 2018 , 10 . [41]. ↵ Delgado , J. ; Radusky , L. G. ; Cianferoni , D. ; Serrano , L . Bioinformatics 2019 , 35 , 4168 – 4169 . OpenUrl CrossRef PubMed [42]. ↵ Dias , R. ; Kolazckowski, B. Proteins: Structure, Function, and Bioinformatics 2015 , 83 , 2100 – 2114 . OpenUrl [43]. ↵ Osorio , D. ; Rondon-Villarreal , P. ; Torres, R . Peptides: Calculate Indices and Theoretical Physicochemical Properties of Protein Sequences . 2014 ; doi: 10.3261 4/CRAN.package.Peptides. OpenUrl CrossRef [44]. ↵ Han , J. ; Zhang , S. ; Guan , M. ; Li , Q. ; Gao , X. ; Liu , J . Structure 2024 , 32 , 2435 – 2448 .e5. OpenUrl [45]. Drew , H. R. ; Wing , R. M. ; Takano , T. ; Broka , C. ; Tanaka , S. ; Itakura , K. ; Dickerson , R. E . Pro-ceedings of the National Academy of Sciences 1981 , 78 , 2179 – 2183 . OpenUrl Abstract / FREE Full Text [46]. Safaee , N. ; Noronha , A. M. ; Rodionov , D. ; Kozlov , G. ; Wilds , C. J. ; Sheldrick , G. M. ; Gehring , K . Angewandte Chemie International Edition 2013 , 52 , 10370 – 10373 . OpenUrl PubMed [47]. ↵ Meng , E. C. ; Goddard , T. D. ; Pettersen , E. F. ; Couch , G. S. ; Pearson , Z. J. ; Morris , J. H. ; Ferrin , T. E . Protein Science 2023 , 32 . [48]. ↵ Xu , Y. ; Zhang , Z. ; Yue , H. ; Li , S. ; Zhang , Z . Journal of Bone and Mineral Research 2020 , 36 , 1459 – 1468 . OpenUrl [49]. ↵ Xia , Z. ; Lu , G. ; Wu , D. ; Zhao , J. ; Zhang , B. ; Xu , H. ; Du , Y. ; Jiang , D . Nature Communications 2025 , 16 . [50]. ↵ Pradhan , L. ; Genis , C. ; Scone , P. ; Weinberg , E. O. ; Kasahara , H. ; Nam , H.-J . Biochemistry 2012 , 51 , 6312 – 6319 . OpenUrl CrossRef PubMed Web of Science [51]. ↵ Sen , N. ; Anishchenko , I. ; Bordin , N. ; Sillitoe , I. ; Velankar , S. ; Baker , D. ; Orengo , C . Briefings in Bioinformatics 2022 , 23 . [52]. ↵ Gerasimavicius , L. ; Livesey , B. J. ; Marsh , J. A . Nature Communications 2022 , 13 . [53]. ↵ Backwell , L. ; Marsh , J. A . Annual Review of Genomics and Human Genetics 2022 , 23 , 475 – 498 . OpenUrl CrossRef PubMed [54]. ↵ Luppino , F. ; Lenz , S. ; Chow , C. F. W. ; Toth-Petroczy , A . BMC Genomics 2025 , 26 . [55]. ↵ Brotzakis , Z. F. ; Zhang , S. ; Murtada , M. H. ; Vendruscolo , M . Nature Communications 2025 , 16 . [56]. ↵ Kong , S. W. ; Lee , I.-H. ; Collen , L. V. ; Field , M. ; Manrai , A. K. ; Snapper , S. B. ; Mandl , K . D. npj Genomic Medicine 2025 , 10 . [57]. ↵ Maassen , W. T. K. ; Johansson , L. F. ; Charbon , B. ; Hendriksen , D. ; van den Hoek, S.; Slofs-tra, M. K.; Mulder, R.; Meems-Veldhuis, M. T.; Sietsma, R.; Lemmink, H. H.; van Diemen, C. C.; van Gijn, M. E.; Swertz, M. A.; van der Velde, K. J. NAR Genomics and Bioinformatics 2025 , 7. [58]. ↵ Bateman , A. , et al. Nucleic Acids Research 2022 , 51, D523–D531. [59]. ↵ McLaren , W. ; Gil , L. ; Hunt , S. E. ; Riat , H. S. ; Ritchie , G. R. S. ; Thormann , A. ; Flicek , P. ; Cun-ningham , F . Genome Biology 2016 , 17 . [60]. ↵ Uhlén , M. et al. Science 2015 , 347 . [61]. ↵ Shemesh , N. ; Jubran , J. ; Dror , S. ; Simonovsky , E. ; Basha , O. ; Argov , C. ; Hekselman , I. ; Abu-Qarn , M. ; Vinogradov , E. ; Mauer , O. ; Tiago , T. ; Carra , S. ; Ben-Zvi , A. ; Yeger-Lotem , E . Nature Communications 2021 , 12 . [62]. ↵ Durinck , S. ; Spellman , P. T. ; Birney , E. ; Huber , W . Nature Protocols 2009 , 4 , 1184 – 1191 . OpenUrl PubMed [63]. ↵ Oughtred , R. ; Rust , J. ; Chang , C. ; Breitkreutz , B. ; Stark , C. ; Willems , A. ; Boucher , L. ; Leung , G. ; Kolas , N. ; Zhang , F. ; Dolma , S. ; Coulombe-Huntington , J. ; Chatr-aryamontri , A. ; Dolinski , K. ; Tyers , M . Protein Science 2020 , 30 , 187 – 200 . OpenUrl PubMed [64]. ↵ Breiman , L . Machine Learning 2001 , 45 , 5 – 32 . OpenUrl [65]. ↵ Štrumbelj , E. ; Kononenko , I . Knowledge and Information Systems 2013 , 41 , 647 – 665 . OpenUrl [66]. ↵ Thiele , C. ; Hirschfeld , G . Journal of Statistical Software 2021 , 98 . [67]. ↵ Youden , W. J . Cancer 1950 , 3 , 32 – 35 . OpenUrl CrossRef PubMed Web of Science View the discussion thread. Back to top Previous Next Posted November 27, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following DAVE: how to use explainable AI to interpret missense variants for genome diagnostics based on functional protein modeling Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share DAVE: how to use explainable AI to interpret missense variants for genome diagnostics based on functional protein modeling Tim Niemeijer , René Mulder , Helga Westers , Jan D.H. Jongbloed , Bart Charbon , Birgit Sikkema-Raddatz , Lennart F. Johansson , Marielle E. van Gijn , Cleo C. van Diemen , Dennis Hendriksen , Kristin M. Abbott , W.T. Kars Maassen , Morris A. Swertz , K. Joeri van der Velde medRxiv 2025.11.25.25340947; doi: https://doi.org/10.1101/2025.11.25.25340947 Share This Article: Copy Citation Tools DAVE: how to use explainable AI to interpret missense variants for genome diagnostics based on functional protein modeling Tim Niemeijer , René Mulder , Helga Westers , Jan D.H. Jongbloed , Bart Charbon , Birgit Sikkema-Raddatz , Lennart F. Johansson , Marielle E. van Gijn , Cleo C. van Diemen , Dennis Hendriksen , Kristin M. Abbott , W.T. Kars Maassen , Morris A. Swertz , K. Joeri van der Velde medRxiv 2025.11.25.25340947; doi: https://doi.org/10.1101/2025.11.25.25340947 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15228) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6597) Geriatric Medicine (668) Health Economics (997) Health Informatics (4535) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9230) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a0049b764b6758f4',t:'MTc3OTU0NTA0MA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00