Availability of benign missense variant “truthsets” for validation of functional assays: current status and a novel systematic approach

doi:10.1101/2025.03.20.25324299

Availability of benign missense variant “truthsets” for validation of functional assays: current status and a novel systematic approach

2025 · doi:10.1101/2025.03.20.25324299

preprint OA: gold CC-BY-4.0

📄 Open PDF Full text JSON View at publisher

Full text 62,699 characters · extracted from preprint-html · click to expand

Availability of benign missense variant “truthsets” for validation of functional assays: current status and a novel systematic approach | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Availability of benign missense variant “truthsets” for validation of functional assays: current status and a novel systematic approach View ORCID Profile Charlie F. Rowlands , View ORCID Profile Sophie Allen , View ORCID Profile Alice Garrett , View ORCID Profile Miranda Durkie , View ORCID Profile George J. Burghel , Rachel Robinson , View ORCID Profile Alison Callaway , Joanne Field , Bethan Frugtniet , Sheila Palmer-Smith , Jonathan Grant , Judith Pagan , View ORCID Profile Trudi McDevitt , View ORCID Profile Katie Snape , View ORCID Profile Helen Hanson , View ORCID Profile Terri McVeigh , View ORCID Profile Clare Turnbull , CanVIG-UK doi: https://doi.org/10.1101/2025.03.20.25324299 Charlie F. Rowlands 1 Division of Genetics and Epidemiology, The Institute of Cancer Research , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Charlie F. Rowlands Sophie Allen 1 Division of Genetics and Epidemiology, The Institute of Cancer Research , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sophie Allen Alice Garrett 1 Division of Genetics and Epidemiology, The Institute of Cancer Research , London, UK 2 St George’s University Hospitals NHS Foundation Trust , Tooting, London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Alice Garrett Miranda Durkie 3 Sheffield Diagnostic Genetics Service, NEY Genomic Laboratory Hub, Sheffield Children’s NHS Foundation Trust , Sheffield, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Miranda Durkie George J. Burghel 4 Manchester Centre for Genomic Medicine and NW Laboratory Genetics Hub, Manchester University Hospitals NHS Foundation Trust , Manchester, UK 5 Division of Cancer Sciences, School of Medical Sciences, Faculty of Biology, Medicine and Health, The University of Manchester , Manchester, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for George J. Burghel Rachel Robinson 6 Yorkshire Regional Genetics Service, Leeds Teaching Hospitals NHS Trust , Leeds, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alison Callaway 7 Central and South Genomics Laboratory Hub, Wessex Genomics Laboratory Service, University Hospital Southampton NHS Foundation Trust , Salisbury, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Alison Callaway Joanne Field 8 Genomics and Molecular Medicine Service, Nottingham University Hospitals NHS Trust , Nottingham, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Bethan Frugtniet 2 St George’s University Hospitals NHS Foundation Trust , Tooting, London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sheila Palmer-Smith 9 Wales Genomic Health Centre, Cardiff and Vale University Health Board , Cardiff, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jonathan Grant 10 Laboratory Genetics, Queen Elizabeth University Hospital , NHS Greater Glasgow and Clyde, Glasgow, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Judith Pagan 11 South East Scotland Clinical Genetics, Western General Hospital , Edinburgh, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Trudi McDevitt 12 Department of Clinical Genetics , CHI at Crumlin, Dublin, Ireland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Trudi McDevitt Katie Snape 2 St George’s University Hospitals NHS Foundation Trust , Tooting, London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Katie Snape Helen Hanson 13 Peninsula Regional Genetics Service, Royal Devon University Healthcare NHS Foundation Trust , Exeter, UK 14 Department of Clinical and Biomedical Sciences, University of Exeter Medical School , Exeter, United Kingdom Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Helen Hanson Terri McVeigh 15 The Royal Marsden NHS Foundation Trust , Fulham Road, London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Terri McVeigh Clare Turnbull 1 Division of Genetics and Epidemiology, The Institute of Cancer Research , London, UK 15 The Royal Marsden NHS Foundation Trust , Fulham Road, London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Clare Turnbull For correspondence: Turnbull.Lab{at}icr.ac.uk Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Multiplex assays of variant effect (MAVEs) provide promising new sources of functional evidence, potentially empowering improved classification of germline genomic variants, particularly rare missense variants, which are commonly assigned as VUS (variants of uncertain significance). However, paradoxically, quantification of clinically applicable evidence strengths for MAVEs requires construction of “truthsets” comprising missense variants already robustly classified as pathogenic and benign. In this study, we demonstrate how benign truthset size is the primary driver of applicable functional evidence towards pathogenicity (PS3). We demonstrate, when using existing ClinVar classifications as a source of benign missense truthset variants, for only 19.8% (23/116) of established cancer susceptibility genes was a PS3 evidence strength of “strong” attainable when simulating validation for a hypothetical new MAVE (applying also favourable assumption of perfect concordance). We describe a “proactive-systematic” framework for benign truthset construction, in which all possible missense variants in a gene of interest are concurrently assessed for assignation of (likely) benignity via established ACMG/AMP combination rules including population frequency, in silico evidence codes and case-control signal. We apply this framework to eight hereditary breast and ovarian cancer genes, demonstrating that proactive-systematically generated benign missense truthsets allow maximum application of PS3 at greater (or equivalent) strength – reaching “moderate” for CHEK2 and “strong” for the other seven genes – than those derived from ClinVar ≥2* classifications alone. We propose, given many genes have few existing benign-classified missense variants, application of this proactive-systematic framework to disease genes more broadly will be important for leveraging full value from MAVEs. Introduction With rapid advances in high-throughput molecular and bioinformatic pipelines for genomic sequencing, the clinical interpretation of detected variants has become the primary bottleneck in clinical genetic testing 1 . Most commonly problematic for interpretation of pathogenicity are rare missense variants, which are in aggregate frequent 2 . For the majority of such missense variants encountered on clinical testing, clinical case data are unavailable or insufficient to provide informative signals. and they are ascribed as variants of uncertain significance (VUS) 2 – 5 . One attractive source of information to assist in resolving VUS classifications, and to determine if a variant may be deleterious or of neutral impact, are functional assays of variant effect. Recently, through concurrent advances in high throughput sequencing and gene-editing technologies, multiplexed assays of variant effect (MAVEs) have provided systematic data on the functional impact of many thousands of variants 4 , 6 , 7 . Such assays offer potential pre-annotation of (nearly) every possible variant that could arise ahead of it being observed clinically 8 – 10 . However, for either low- or high-throughput assays, an assay readout of neutral or deleterious function should not be automatically presumed to recapitulate clinical pathogenicity without suitably robust validation. In 2019, on behalf of the ClinGen SVI group, Brnich et al . described a methodology to quantify the PS3/BS3 functional evidence criteria, such that assays could be assigned individualised evidence strengths based on their ability to discriminate between two “truthsets” of pathogenic and benign variants 11 . This specification by Brnich et al . built upon the looser stipulation in the 2015 ACMG-AMP variant interpretation framework in which assignation of evidence towards pathogenicity (PS3) or benignity (BS3) was allowed where “well-established in vitro or in vivo studies [are] supportive of/[show] no damaging effect on the gene or gene product” 12 . The Brnich et al. methodology incorporated the subsequent evolution of the 2015 ACMG-AMP guidelines by Tavtigian et al. , which provided a Bayesian framework by which evidence items may be assigned strengths dependent on the likelihood ratio (LR) afforded towards pathogenicity or benignity 13 . Missense variants may pose particular challenges in assessment and application of functional evidence, as the mechanism of impact of missense variants on protein function may vary, with effects for both pathogenic and benign variants potentially more likely to lie towards the intermediate zone of assay dynamic range 14 . The complex spectrum of missense variant effects is thus not necessarily well-reflected in standard preliminary calibration/validation of assay performance (for loss-of-function genes) against nonsense/frameshift and synonymous variants. It is thus argued that, as a second phase of validation, assays should be clinically validated against truthsets of benign-classified and pathogenic-classified missense variants (in particular given that missense variants are the group for which functional data will more strongly drive a classification of pathogenicity) 15 . Publicly available databases, most commonly ClinVar, have typically served as the de facto sources of truthset variants 3 . However, most missense variants in ClinVar are classified as VUS 4 . Furthermore, for many missense variants in ClinVar classified as (likely) benign (B/LB) or (likely) pathogenic (P/LP), the level of detailing of evidence items underpinning these classifications may vary substantially. Accordingly, it may be unclear for many variants whether their extant classifications have been attained through inclusion of functional data (PS3/BS3 codes). This circularity renders problematic inclusion of such variants in truthsets for evaluating the evidence strength to be assigned for a new assay 16 . Limited availability of robust missense truthset variants is a primary roadblock in regard of our ability to best leverage for clinical variant interpretation the recent surge in availability of data from MAVEs 15 . This will be particularly problematic for understudied genes and genes that are rarer causes of disease (which, paradoxically, constitute the genes for which variant interpretation stands to gain most from integration of functional evidence). In recognition of the incipient emergence into the clinical diagnostic arena of many new MAVEs, we sought to better evaluate our ‘preparedness’ in regard of missense truthsets for validation. In the work presented here, we: Demonstrate that the number of benign-classified truthset variants is (paradoxically) the principal driver of the strength with which the PS3 code towards pathogenicity can be assigned. Examine the availability from ClinVar of truthset variants for 116 cancer susceptibility genes that are routinely tested for in clinical practice, considering in detail eight genes associated with hereditary breast and ovarian cancer (HBOC). Describe a “proactive-systematic” approach to construction of benign missense truthsets based on the original ACMG-AMP combination rules. We demonstrate, for the eight HBOC genes, the equal or improved power for MAVE assay validation and accordant strength for allocation of PS3 afforded through this proactive-systematic approach to construction of benign missense truthsets. Methods Extraction of ClinVar classifications and comments Counts of ClinVar missense variant classifications were retrieved for eight HBOC genes of interest ( BRCA1 [MIM: 113705], BRCA2 [MIM: 600185], PALB2 [MIM: 610355] , ATM [MIM: 607585], CHEK2 [MIM: 604373], RAD51C [MIM: 602774], RAD51D [MIM: 602954] and BRIP1 [MIM: 605882]) and 108 additional inherited cancer susceptibility genes through direct querying of the ClinVar bulk data file download, and attached comments collated for HBOC gene classifications via the ClinVar API. Extraction of gnomAD, UK Biobank and BRIDGES population variant data The coding sequence of the MANE transcript for each HBOC gene was extracted using Ensembl BioMart 17 (v111) and bespoke Python scripts used to generate all possible SNVs in these transcripts. Variants overlapping these coding regions were extracted and processed from the UK Biobank (UKB) final exome release population VCF 18 and the gnomAD v2.1.1 19 variant count bulk file download using the bcftools view command (see Supplemental Methods). Collated variant counts were stratified by non-founder ancestry grouping (in gnomAD and UKB) and case vs. control status (in UKB, see Supplemental Methods for ancestry and case cohort definitions). Counts of missense variant observations in the BRIDGES unselected breast cancer case and control series were also retrieved from the primary publication 20 . Assignation of putative pathogenicity and benignity flags We devised a “proactive-systematic” approach to construction of benign truthsets based on in silico and population frequency ACMG/AMP evidence codes. All possible missense variants in each HBOC gene were annotated with BayesDel 21 , REVEL 22 and SpliceAI 23 in silico scores using their respective flat file data downloads, ensuring matching of transcripts where scores were transcript-specific. Variants were excluded from proactive-systematically generated benign truthsets if in silico predictions indicated possible pathogenicity (BayesDel score ≥0.13 or REVEL score ≥0.644, as per previously calibrated PP3_supporting thresholds 24 ) or if their maximum SpliceAI score was ≥0.2. We further excluded from benign truthsets any missense variant with observed effect size (i.e. association with gene-associated cancer type) of odds ratio (OR) >2 and lower confidence interval (CI) limit >1 in any sub-ancestry in the BRIDGES and UKB datasets, as described above and in Supplemental Methods. Remaining missense variants with REVEL scores ≤0.29 were considered to have met the BP4_sup criterion as per the aforementioned thresholds. 24 Population frequency thresholds for application of BA1 and BS1 were identified via calculation of gene-phenotype dyad-specific maximum tolerated allele frequencies (MTAFs; Supplemental Methods), as previously described 25 , as well as BS1_supporting (BS1_sup) thresholds, as employed in several ClinGen-ratified VCEP guidelines 26 – 29 . Variants present at frequencies above these thresholds in any superpopulation or non-founder subpopulation in gnomAD or UKB were annotated with the corresponding population evidence code. Proactive-systematically generated benign truthsets of varying stringency were constructed according to different combinations of evidence codes, as detailed in the Results, Figure S1 and Table S1. Results Impact of truthset size on applicable PS3/BS3 evidence strength We first investigated the impact of varying pathogenic/benign truthset size on the applicable PS3/BS3 strengths of assays using the established Bayesian framework developed by Brnich et al . 11 ( Table 1 ). We observed that, holding the pathogenic truthset size constant for a hypothetical perfectly concordant assay (i.e. one exhibiting complete consistency between assay readout and truthset classification), an increase in benign truthset size led to an accordant increase in applicable exponent points (EPs) for the PS3 code, and thereby in applicable PS3 strength ( Table 1A ). Conversely, holding benign truthset size constant and increasing pathogenic truthset size led to an increase in the applicable BS3 strength, illustrating the inverse relationship between the two. View this table: View inline View popup Download powerpoint Table 1. Quantification of impact of truthset size on applicable evidence strengths under the Brnich framework for PS3/BS3 strength evaluation. Displayed are the assay strengths in terms of both exponent points (EPs) and equivalent 2015 ACMG/AMP evidence strengths for hypothetical MAVEs exhibiting (A) perfect discrimination between and varying levels of discordance with assay readout of (B) pathogenic, (C) benign and (D) both types of truthset variant in truthsets of varying sizes. For perfectly discriminant assays, PS3 strength is entirely dependent on benign truthset size, and BS3 strength entirely on pathogenic truthset size. In the presence of any discordance with assay readout, benign and pathogenic truthset size remain the respective drivers of PS3 and BS3 strength, but with the opposing truthset (i.e. the pathogenic truthset for PS3 and the benign for BS3) having some mild influence on applicable EPs. SUP, supporting; MOD, moderate; STR, strong PS3 strength is also wholly determined by the benign truthset size when discordance between the assay readouts and truthset classifications is restricted to deleterious readouts for benign-classified truthset variants ( Table 1B ). Likewise, applicable BS3 strength is wholly driven by the size of the pathogenic truthset when discordancy is restricted to neutral readouts for pathogenic-classified truthset variants ( Table 1C ). Current availability of benign and pathogenic missense variant truthsets for cancer susceptibility genes We next sought to quantify the number of missense variants available with existing classifications of (i) B/LB and (ii) P/LP in ClinVar ( Table 2 ). We first examined eight widely-tested genes implicated in hereditary breast and ovarian cancer (HBOC), namely BRCA1 , BRCA2 , PALB2 , ATM , CHEK2 , RAD51C , RAD51D and BRIP1 . View this table: View inline View popup Download powerpoint Table 2. Comparison of the number of missense variant ClinVar classifications for eight HBOC genes. Shown are the counts of variants (of any variant type and of missense variants alone) with ClinVar classifications of (likely) pathogenic and (likely) benign for eight selected hereditary breast and ovarian cancer (HBOC) genes. Counts are additionally stratified by review status (≥2* or ≥1*). Across all genes and review status thresholds, it was observed that missense variants constituted a minority of total (likely) benign/pathogenic classifications, disproportionate to their frequency in clinical testing. This imbalance was particularly noticeable for CHEK2 , RAD51C , RAD51D and BRIP1 . B, benign; LB, likely benign; LP, likely pathogenic; P, pathogenic. We first quantified the relative deposition of missense variant classifications in ClinVar for these eight HBOC genes ( Table 2 ). What was first notable was the modest absolute numbers of P/LP and B/LB classifications for missense variants in these widely-tested genes: the median average across the genes of 1.28/1.11% of P/LP and 1.39/1.35% of B/LB ≥1*/≥2* classifications being missense variants. Available B/LB classifications of ≥1*/≥2* varied widely between genes ( Table 2 ) but were strikingly low for CHEK2 (5/1), RAD51C (4/2) and RAD51D (4/3). By gene, of ≥1* B/LB missense variants, a median of 51.0% were “benign” (versus “likely benign”) whilst a median of 50.0% were ≥2* (versus ≥1*). These data highlight the disproportionate paucity in ClinVar of (i) missense variants and (ii) high-confidence classifications of benignity therein for many genes. We next evaluated the hypothetical evidence strengths applicable for MAVEs if using ClinVar as a source of missense variants for construction of “benignity truthsets”. Considering in the first instance a stringent inclusion criterion for prospective benign missense truthset variants of B/LB with ≥2* ClinVar review status, for perfectly concordant MAVEs, PS3 would be applicable at a maximum of “strong” for assays of ATM , BRCA1 and BRCA2 , at “moderate” for PALB2 and BRIP1 , and would not be applicable at all for CHEK2, RAD51C or RAD51D . The PS3 strengths attainable for MAVEs in these genes increase somewhat on inclusion of ClinVar B/LB variants with only 1* review status (classifications based on a single submitter, Table 2 ), with moderate/supporting strength now hypothetically attainable for CHEK2, RAD51C and RAD51D . We extended this analysis to 116 established cancer susceptibility genes (CSGs) with which are routinely tested for clinically (and are included in the NHS National Test Directory; Table 3 ). For a perfectly concordant assay, the minimum number of benign truthset variants permitting application of PS3 at strong is 19. Sufficient ClinVar B/LB missense classifications to meet this threshold were available for only 19.8% (≥2*; 23/116) and 36.2% (≥1*; 42/116) of these genes (Figure S2). These findings highlight the relative paucity of ClinVar B/LB missense classifications in this broader group of clinically-tested CSGs. View this table: View inline View popup Download powerpoint Table 3. Comparison of numbers of pathogenic/likely pathogenic and benign/likely benign ClinVar classifications for missense variants in 116 selected inherited cancer susceptibility genes. Relative numbers of ClinVar missense classifications were counted for each gene from a download of the ClinVar bulk data file. To avoid bias and circularity, truthsets for evaluation of MAVE strength should not include variants classified using any functional data 16 . For missense variants with B/LB ClinVar classifications in the eight HBOC genes selected above, we further explored the composition of the evidence contributing to their assignation as benign through analysis of the text provided against classifications in ClinVar (Supplemental Methods; Figure S3). We could ascertain explicitly or implicitly for 79.1% (239/302) of BRCA1 and 81.3% (260/320) of BRCA2 missense variants classified as B/LB in ClinVar that this classification had been achieved without application of functional evidence codes. However, for the remaining 6 genes, for only 39.8% (45/113) of variants could the B/LB classification be shown explicitly or implicitly to exclude functional evidence, highlighting the limitations of ClinVar as a source of truthset variants for these genes. As might be predicted, for 2* classifications, it was more common to identify at least one contributing classification for which application of functional data could be excluded (whether explicitly or implicitly); 1* (single-submitter) classifications were more likely to be unclear or to omit comments entirely (Figure S3). Proactive-systematic generation of benign missense truthsets for eight HBOC genes Workflow for proactive-systematic generation of benign missense truthsets We thus went on to devise a proactive approach for construction of benign missense truthsets using established ACMG-AMP criteria indicative of benignity, hereafter termed the “proactive-systematic” approach (Figure S1). We firstly applied filters to exclude from benign truthsets any missense variant with evidence of pathogenicity on the basis of existing ClinVar classifications, in silico missense or splicing predictions, or evidence of case-control signal of association (see Methods). We then used ancestry subgroup data from both UK Biobank (UKB) and gnomAD v2.1.1 to annotate remaining missense variants with codes relating to population frequency, namely the BA1 stand-alone code, the BS1 code and a BS1_sup code (see Supplemental Methods). Finally, we assigned evidence towards benignity using the BP4 criterion for in silico evidence using REVEL thresholds as previously established 24 . In accordance with the 2015 ACMG/AMP framework rules for evidence combination, we considered as the “standard” designation for variant benignity either (i) a BA1 annotation (with or without any in silico evidence), or (ii) annotation of both a BS1/BS1_sup population frequency code and a BP4 in silico code of any strength. However, in fitting with emergent v4.0 ClinGen-ACMG updates 30 , we also explored inclusion as likely benign based on the more permissive thresholds of a single evidence item towards benignity of (i) strong or greater, and (ii) supporting or greater strength (Table S1). The distinct BA1, BS1 and BS1_sup allele frequency thresholds were calculated by specifying allelic heterogeneity values of 1, 0.1 and 0.05 during MTAF construction (see Supplemental Methods). Allocation of the population codes (BA1/BS1/BS1_sup) was based on the maximum tolerated allele count (MTAC) for the given population/subpopulation, which is the number of observations reflecting the upper 95% confidence interval for the corresponding MTAF. We explored applying, as an additional level of conservatism, a minimum number of individuals in whom the variant must be observed for application of these population codes of ≥2 or ≥5 (Table S1); this would constitute an additional cautionary measure against stochastic occurrences and/or erroneous genotyping. For the primary results presented, we stipulated observation of ≥2 individuals to attain BS1/BS1_sup and ≥5 to attain BA1. Quantification of benign missense truthset sizes and applicable ACMG evidence strengths Using the proactive-systematic construction approach under “standard” ACMG/AMP combination rules and requiring ≥2/≥5 variant carriers for application of BS1/BA1 population codes, respectively, we generated benign missense truthsets of between 12 (for CHEK2 ) and 188 (for BRCA2 ) variants ( Table 4 ; Table S2). For all genes except CHEK2 , these truthsets would be of sufficient size to enable evidence points (EPs) of between ≥4 (4.69 to 7.15) towards pathogenicity, equivalent to application of PS3 at strong (assuming a perfectly concordant assay). For CHEK2 , the equivalent EPs applicable towards PS3 were 3.39, equating to moderate strength. We noted that, under the rules applied, the count of variant carriers in the given population group whereby a population evidence code was applied ranged from 2-77 for application of BS1_sup and 2-585 for application of BS1. For BA1, the range was 5-177,212, although, aside from four PALB2 variants identified in the small UKB African and Chinese sub-ancestries, for all other instances in which BA1 was awarded the number of variant carriers was ≥10 (Figure S4). View this table: View inline View popup Download powerpoint Table 4. Evidence strengths applicable for hypothetical MAVEs in eight HBOC genes using proactive-systematically constructed validation truthsets of benign missense variants. Displayed are the applicable exponent points and corresponding evidence strengths, calculated as previously described 11 for simulated MAVEs in eight HBOC genes using both ClinVar and our proactive-systematic method as sources of truthsets of benign missense truthsets. For the proactive-systematic approach, we required ≥2 variant carriers for application of BS1/BS1_sup population codes and ≥5 carriers for application of BA1. For both benign truthset construction methodologies, the pathogenic missense truthsets comprise the total number of pathogenic missense variants present in ClinVar, stratified by review status (≥2* or ≥1*); accordingly, the EPs Ben shown for the ClinVar-based analysis are also applicable to the proactive-systematic analysis. Strengths are assigned assuming all variants are covered by the MAVE, and there is perfect discrimination of the truthset variants. *For RAD51D , an arbitrary pathogenic truthset size of 1 was used for calculation of PS3 EPs and strength due to an absence of eligible pathogenic missense variants in ClinVar. EPs Ben , exponent points towards pathogenicity; EPs Path , exponent points towards pathogenicity; MOD, moderate; STR, strong; SUP, supporting. We then explored a model of benignity in which a single benign evidence item of any strength (including a single item of supporting evidence) was sufficient for truthset assignation (Table S1). Using this approach, and requiring ≥2/≥5 variant carriers for BS1/BA1, we observed that a much greater number of missense variants were eligible for truthset inclusion, ranging from 1026 (for BRCA1 ) to 14,342 (for BRCA2 ). This then meant that for all genes the size of benign missense truthset would be readily sufficient to generate PS3 strengths of very strong (applying assumption of perfect concordance). However, assigning benignity (B/LB) status on a lesser threshold of evidence is likely to incorporate larger numbers of “false positive” assignations of benignity. Improvement upon and correlation with existing clinical classifications To evaluate the concordance of our proactive-systematic approach with existing clinical classifications, we compared the specific variants comprising benign missense truthsets generated using our “standard” proactive-systematic construction methodology (as defined above, and requiring ≥2/≥5 variant carriers for application of BS1/BA1 population frequency codes) with those generated using existing ClinVar ≥2* B/LB classifications ( Table 4 ; Figure S5). Notably, for four of the surveyed HBOC genes ( PALB2 , CHEK2 , RAD51C and BRIP1 ), our proactive-systematically generated benign missense truthsets included all variants present in the respective ClinVar-derived truthset ( n = 11, 1, 2, and 5, respectively). For ATM , five ≥2* B/LB ClinVar variants were not captured by our proactive-systematic approach; for BRCA1 and BRCA2 , there were 157 and 118 such variants, respectively. As noted above, our approach implements a filter precluding any variant with an existing ClinVar classification of pathogenic (≥1*) from inclusion in a benign truthset. However, we noted that this filtering step did not trigger exclusion of any variants from “standard” proactive-systematically generated benign variant truthsets for any of the surveyed genes, including BRCA1 and BRCA2 , which have substantial numbers of missense variants with ≥1* pathogenic ClinVar classifications (172 and 69, respectively). This suggests our approach is unlikely to falsely ascribe benignity to pathogenic variants. Discussion Pathogenic variant classifications are typically deemed of greater clinical value to perform and deposit. By this corollary, counts of available benign-classified variants would be anticipated to lag behind on account of lower motivation for classification and deposition into variant databases by clinical diagnostic laboratories. However, as we demonstrate, there is a paradoxical relationship for clinical assay validation, namely that it is the number of benign-classified variants available for truthsets that most strongly determines the evidence strength towards pathogenicity applicable following clinical validation of a new functional assay. We also demonstrate the paucity of benign-classified missense variants available for the majority of 116 clinically tested cancer susceptibility genes, which is the case even for well-studied, frequently tested HBOC genes. We demonstrate even greater paucity when restricting to those missense variants classified firmly as benign (rather than likely benign) and/or with 2* review status (i.e. more than just a single submitter). In this study, we present a novel proactive-systematic approach for construction of benign missense variant truthsets that we apply to eight HBOC genes. This methodology could readily be extended to other genes: primarily limiting to this is calculation of the estimated BA1/BS1/BS1_sup thresholds via calculation of an MTAF, which requires clinical expertise in the gene-phenotype dyad (and appraisal of often-complex literature to attain estimates for phenotype/disease incidence, penetrance and/or attribution). We demonstrate the substantial uplift afforded by our proactive-systematic approach for six of eight HBOC genes widely tested clinically. BRCA1 / BRCA2 are outliers in part because of the markedly longer timecourse and higher volume of clinical testing of BRCA1 / BRCA2 compared to the other genes. Also influential is the early inception of the ENIGMA group who have instigated frameworks and variant classification activity, long predating the wider establishment of VCEPs. 31 Most impactfully, landmark multifactorial genetic epidemiologic analyses published in 2007 leveraged unique BRCA1/BRCA2 clinical datasets assembled from Myriad Genetics, who at that time held the international patent for diagnostic analysis of BRCA1 and BRCA2. 32 These and downstream multifactorial-type classifications utilised (i) family history scoring, (ii) co-occurrence in trans with a known pathogenic variant and (iii) co-segregation with disease in pedigrees to generate large numbers of classifications but did not include either population frequency or in silico predictions, thus consistent with the lower degree of overlap for these variants with our proactive-systematically generated set. 33 – 36 . Also of note is the paucity of BRCA1 missense variants meeting the criteria for benignity discrepancy during proactive-systematic truthset generation. This is a consequence of the distribution of REVEL scores for BRCA1 missense variants being skewed towards the deleterious range, thereby precluding the majority from attaining BP4 at any strength. Limitations We have sought in our proactive-systematic benignity classification approach to incorporate accessible data for filtering out variants with indicators of possible pathogenicity (namely in silico predictions suggesting protein effect or splicing impact, indicative case-control signal and/or suggestive ClinVar classifications). However, this approach does not identify/exclude existence of every type of evidence indicative of potential pathogenicity that might/should have been sought as part of a manual, bespoke clinical classification. For example, albeit highly infrequent, reports in the literature of the variant identified in biallelic configuration in cases of Fanconi anaemia. At the time of developing the proactive-systematic benignity classification approach, we continue to be in transition from the 2015 v3.0 ACMG/AMP Richards et al. framework (and various evolutions thereof by VCEPs), to the piloting of the draft v4.0 ClinGen/ACMG/AMP Framework, with accordant disparity and ambiguity in best practice for application of evidence codes, weightings and thresholds. For example, in the v3.0 2015 ACMG/AMP framework there are only two levels delineated for population codes (BA1 and BS1). As per the evolution of this by a number of the VCEPs 26 – 29 , we have employed three strengths of benignity scoring for population frequency: BA1, BS1 and BS1_supporting. Notably, there are likewise three levels described for benignity scoring for population frequency in the in the draft version of the v4.0 ACMG/AMP framework 30 . Furthermore, and as previously discussed, whilst published thresholds for REVEL and BayesDel were validated against a large dataset 24 , in silico prediction tools are recognised to perform differentially for different genes. This could be obviated by allowing multiple in silico tools within our proactive-systematic benignity classification approach, albeit this introduces additional permissiveness. Additional challenges in generation of truthsets for clinical validation of MAVES In assessing the power (evidence strength) afforded from a given number of truthset classifications available for clinical validation of a hypothetical MAVE, we assume (i) there is an assay readout for every truthset variant and (ii) perfect concordance between assay readouts and truthset-classifications (i.e. a perfectly discriminatory assay). In practice, neither of these standards is likely to be attained for a real assay: an assay will likely miss readouts for some of the variants present in the truthset and there will likely be some level of discordance. What we present are thus “best-case scenarios”. Another challenge may be domain bias: in silico tools may more easily classify variants as benign in non-functional domains compared to functional domains. Assays, in particular non-saturation MAVEs, may only target specific regions of the gene of greater interest, namely the functional domains. Accordingly, the expansion of benign missense truthsets will likely disproportionately better serve MAVEs adopting a saturation approach and/or MAVEs that include non-functional domains. There will also be considerable variation gene-by-gene in factors influencing the maximum size of benign missense truthset that can be generated even using the proactive-systematic approach, which include gene length (and, relatedly, the number of possible missense variants) and general constraint against missense variation. We demonstrate the sizeable uplift in benignity classifications attainable when applying a more permissive evidence requirement for attaining classification as LB (≥1 evidence item of supporting strength), reflecting the proposed threshold in the v4.0 ACMG/AMP Framework 30 . It is likely overall advantageous to enhance variant numbers (power) at the expense of potential minor contamination with “false positives”. The same considerations apply when mandating a number of observed variant carriers to attain evidence for population frequency. We elected to mandate ≥2/≥5 variant carriers for BS1/BA1 in our proactive-systematic approach. This prevented inclusion of variants whose benignity was predicated on a single, possibly stochastic, observation. Enforcing a higher variant carrier threshold (e.g. ≥5 throughput) would be expected to increase the precision of population frequency estimates but be punitive to attaining BS1/BS1_sup from smaller population datasets. Whilst for most genes the number of B/LB missense variant classifications is particularly low, the number of P/LP missense variant classifications is likely also limiting. More systematic approaches to construction of pathogenic truthsets are thus also urgently required: these will likely be predicated on the challenging endeavour of assembly and integration of large clinical datasets. Genes in which different variant types are associated with multiple disparate disease phenotypes require additional caution. Clinical validation of the assay should be undertaken for each disease phenotype separately, with appropriate generation of benign and pathogenic missense variant sets. Related to this, we have thus far applied the assumption that all missense variants throughout the gene will be acting by an identical mechanism to the selected missense pathogenic variant truthset; in practice, missense variants in different domains may plausibly have different mechanisms of action related to disease. Whilst there is expert consensus that best practice would be to have domain-specific validation of assays 15 , not only does this further exacerbate the problem of truthset paucity, but the definition of domains is also not straightforward. A further consideration is the likelihood that many variants, and in particular missense variants, will have effect sizes of intermediate magnitude (reduced penetrance). Whilst in concept assays might present a potential solution for providing a readout to quantify more sensitively disease effect beyond a dichotomous assignation of pathogenic/benign, to provide such a “quantitative” validation of the assays is an even more massive challenge. This would require truthsets of not just of established pathogenicity, but for which variants had been accurately quantified by orthogonal approaches as being of reduced penetrance. In conclusion, whilst MAVEs provide a highly promising approach by which to advance classification of rare variants newly identified on clinical testing, the paucity of available truthset classifications is a substantive roadblock to the robust clinical validation required for clinical adoption of these new assays. Urgently required are proactive systematic approaches by which available data are used to generate variant classifications, such as that we present here to generate truthsets of benign missense variants. Data Availability All data produced in the present work are contained in the manuscript except Table S2 (available upon reasonable request to the authors) Author contributions C.T. devised the initial proactive-systematic approach and all authors contributed to its evolution. C.F.R. and S.A. conducted all analyses and generated all figures for use in the manuscript. C.T. and C.F.R. produced the initial manuscript draft and all authors were involved in proofreading of this and all subsequent drafts of the manuscript. Declaration of interests The authors declare no competing interests. Data and code availability The published article includes all datasets generated or analyzed during this study. Acknowledgements A.G. and H.H. are supported by Cancer Research UK Catalyst award CanGene-CanVar [C61296/A27223]. S.A. and C.F.R. are supported by CG-MAVE, Cancer Research UK Programme Award [EDDPGM-Nov22/100004]. References 1. ↵ Krysiak , K. , Danos , A.M. , Kiwala , S. , McMichael , J.F. , Coffman , A.C. , Barnell , E.K. , Sheta , L. , Saliba , J. , Grisdale , C.J. , Kujan , L. , et al. ( 2022 ). A community approach to the cancer-variant-interpretation bottleneck . Nat Cancer 3 , 522 – 525 . doi: 10.1038/s43018-022-00379-w . OpenUrl CrossRef PubMed 2. ↵ Chen , E. , Facio , F.M. , Aradhya , K.W. , Rojahn , S. , Hatchell , K.E. , Aguilar , S. , Ouyang , K. , Saitta , S. , Hanson-Kwan , A.K. , Capurro , N.N. , et al. ( 2023 ). Rates and Classification of Variants of Uncertain Significance in Hereditary Disease Genetic Testing . JAMA Netw Open 6 , e2339571 . doi: 10.1001/jamanetworkopen.2023.39571 . OpenUrl CrossRef PubMed 3. ↵ Landrum , M.J. , Lee , J.M. , Benson , M. , Brown , G. , Chao , C. , Chitipiralla , S. , Gu , B. , Hart , J. , Hoffman , D. , and Hoover , J. ( 2016 ). ClinVar: public archive of interpretations of clinically relevant variants . Nucleic acids research 44 , D862 – D868 . OpenUrl CrossRef PubMed 4. ↵ Weile , J. , and Roth , F.P. ( 2018 ). Multiplexed assays of variant effects contribute to a growing genotype-phenotype atlas . Human genetics 137 , 665 – 678 . doi: 10.1007/s00439-018-1916-x . OpenUrl CrossRef PubMed 5. ↵ Rehm , H.L. , Alaimo , J.T. , Aradhya , S. , Bayrak-Toydemir , P. , Best , H. , Brandon , R. , Buchan , J.G. , Chao , E.C. , Chen , E. , Clifford , J. , et al. ( 2023 ). The landscape of reported VUS in multi-gene panel and genomic testing: Time for a change . Genetics in medicine : official journal of the American College of Medical Genetics 25 , 100947 . doi: 10.1016/j.gim.2023.100947 . OpenUrl CrossRef PubMed 6. ↵ Fowler , D.M. , Adams , D.J. , Gloyn , A.L. , Hahn , W.C. , Marks , D.S. , Muffley , L.A. , Neal , J.T. , Roth , F.P. , Rubin , A.F. , Starita , L.M. , and Hurles , M.E. ( 2023 ). An Atlas of Variant Effects to understand the genome at nucleotide resolution . Genome biology 24 , 147 . doi: 10.1186/s13059-023-02986-x . OpenUrl CrossRef PubMed 7. ↵ Rubin , A.F. , Stone , J. , Bianchi , A.H. , Capodanno , B.J. , Da , E.Y. , Dias , M. , Esposito , D. , Frazer , J. , Fu , Y. , Grindstaff , S.B. , et al. ( 2025 ). MaveDB 2024: a curated community database with over seven million variant effects from multiplexed functional assays . Genome biology 26 , 13 . doi: 10.1186/s13059-025-03476-y . OpenUrl CrossRef PubMed 8. ↵ Tabet , D. , Parikh , V. , Mali , P. , Roth , F.P. , and Claussnitzer , M. ( 2022 ). Scalable Functional Assays for the Interpretation of Human Genetic Variation . Annu Rev Genet 56 , 441 – 465 . doi: 10.1146/annurev-genet-072920-032107 . OpenUrl CrossRef PubMed 9. Fayer , S. , Horton , C. , Dines , J.N. , Rubin , A.F. , Richardson , M.E. , McGoldrick , K. , Hernandez , F. , Pesaran , T. , Karam , R. , Shirts , B.H. , et al. ( 2021 ). Closing the gap: Systematic integration of multiplexed functional data resolves variants of uncertain significance in BRCA1, TP53, and PTEN . American journal of human genetics 108 , 2248 – 2258 . doi: 10.1016/j.ajhg.2021.11.001 . OpenUrl CrossRef 10. ↵ Scott , A. , Hernandez , F. , Chamberlin , A. , Smith , C. , Karam , R. , and Kitzman , J.O. ( 2022 ). Saturation-scale functional evidence supports clinical variant interpretation in Lynch syndrome . Genome biology 23 , 266 . doi: 10.1186/s13059-022-02839-z . OpenUrl CrossRef PubMed 11. ↵ Brnich , S.E. , Abou Tayoun , A.N. , Couch , F.J. , Cutting , G. , Greenblatt , M.S. , Heinen , C.D. , Kanavy , D.M. , Luo , X. , McNulty , S.M. , Starita , L.M. , et al. ( 2019 ). Recommendations for application of the functional evidence PS3/BS3 criterion using the ACMG/AMP sequence variant interpretation framework . bioRxiv . 12. ↵ Richards , S. , Aziz , N. , Bale , S. , Bick , D. , Das , S. , Gastier-Foster , J. , Grody , W.W. , Hegde , M. , Lyon , E. , Spector , E. , et al. ( 2015 ). Standards and guidelines for the interpretation of sequence variants: a joint consensus recommendation of the American College of Medical Genetics and Genomics and the Association for Molecular Pathology . Genetics in medicine : official journal of the American College of Medical Genetics 17 , 405 – 424 . doi: 10.1038/gim.2015.30 . OpenUrl CrossRef PubMed 13. ↵ Tavtigian , S.V. , Greenblatt , M.S. , Harrison , S.M. , Nussbaum , R.L. , Prabhu , S.A. , Boucher , K.M. , and Biesecker , L.G. ( 2018 ). Modeling the ACMG/AMP variant classification guidelines as a Bayesian classification framework . Genetics in medicine : official journal of the American College of Medical Genetics 20 , 1054 – 1060 . doi: 10.1038/gim.2017.210 . OpenUrl CrossRef PubMed 14. ↵ Findlay , G.M. , Daza , R.M. , Martin , B. , Zhang , M.D. , Leith , A.P. , Gasperini , M. , Janizek , J.D. , Huang , X. , Starita , L.M. , and Shendure , J. ( 2018 ). Accurate classification of BRCA1 variants with saturation genome editing https://sge.gs.washington.edu/BRCA1/ . Nature 562 , 217 – 222 . doi: 10.1038/s41586-018-0461-z . OpenUrl CrossRef PubMed 15. ↵ Allen , S. , Garrett , A. , Muffley , L. , Fayer , S. , Foreman , J. , Adams , D.J. , Hurles , M. , Rubin , A.F. , Roth , F.P. , Starita , L.M. , et al. ( 2024 ). Workshop report: the clinical application of data from multiplex assays of variant effect (MAVEs), 12 July 2023 . European journal of human genetics : EJHG 32 , 593 – 600 . doi: 10.1038/s41431-024-01566-2 . OpenUrl CrossRef PubMed 16. ↵ Gelman , H. , Dines , J.N. , Berg , J. , Berger , A.H. , Brnich , S. , Hisama , F.M. , James , R.G. , Rubin , A.F. , Shendure , J. , Shirts , B. , et al. ( 2019 ). Recommendations for the collection and use of multiplexed functional data for clinical variant interpretation . Genome medicine 11 , 85 . doi: 10.1186/s13073-019-0698-7 . OpenUrl CrossRef PubMed 17. ↵ Dyer , S.C. , Austine-Orimoloye , O. , Azov , A.G. , Barba , M. , Barnes , I. , Barrera-Enriquez , V.P. , Becker , A. , Bennett , R. , Beracochea , M. , Berry , A. , et al. ( 2025 ). Ensembl 2025 . Nucleic acids research 53 , D948 – D957 . doi: 10.1093/nar/gkae1071 . OpenUrl CrossRef PubMed 18. ↵ Backman , J.D. , Li , A.H. , Marcketta , A. , Sun , D. , Mbatchou , J. , Kessler , M.D. , Benner , C. , Liu , D. , Locke , A.E. , Balasubramanian , S. , et al. ( 2021 ). Exome sequencing and analysis of 454,787 UK Biobank participants . Nature 599 , 628 – 634 . doi: 10.1038/s41586-021-04103-z . OpenUrl CrossRef 19. ↵ Karczewski , K.J. , Francioli , L.C. , Tiao , G. , Cummings , B.B. , Alföldi , J. , Wang , Q. , Collins , R.L. , Laricchia , K.M. , Ganna , A. , Birnbaum , D.P. , et al. ( 2020 ). The mutational constraint spectrum quantified from variation in 141,456 humans . Nature 581 , 434 – 443 . doi: 10.1038/s41586-020-2308-7 . OpenUrl CrossRef PubMed 20. ↵ Dorling , L. , Carvalho , S. , Allen , J. , González-Neira , A. , Luccarini , C. , Wahlström , C. , Pooley , K.A. , Parsons , M.T. , Fortuno , C. , Wang , Q. , et al. ( 2021 ). Breast Cancer Risk Genes - Association Analysis in More than 113,000 Women . The New England journal of medicine 384 , 428 – 439 . doi: 10.1056/NEJMoa1913948 . OpenUrl CrossRef PubMed 21. ↵ Feng , B.-J. ( 2017 ). PERCH: A Unified Framework for Disease Gene Prioritization . Human mutation 38 , 243 – 251 . doi: 10.1002/humu.23158 . OpenUrl CrossRef PubMed 22. ↵ Ioannidis , N.M. , Rothstein , J.H. , Pejaver , V. , Middha , S. , McDonnell , S.K. , Baheti , S. , Musolf , A. , Li , Q. , Holzinger , E. , Karyadi , D. , et al. ( 2016 ). REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants . American journal of human genetics 99 , 877 – 885 . doi: 10.1016/j.ajhg.2016.08.016 . OpenUrl CrossRef PubMed 23. ↵ Jaganathan , K. , Kyriazopoulou Panagiotopoulou , S. , McRae , J.F. , Darbandi , S.F. , Knowles , D. , Li , Y.I. , Kosmicki , J.A. , Arbelaez , J. , Cui , W. , Schwartz , G.B. , et al. ( 2019 ). Predicting Splicing from Primary Sequence with Deep Learning . Cell 176 , 535 – 548 e524. doi: 10.1016/j.cell.2018.12.015 . OpenUrl CrossRef PubMed 24. ↵ Pejaver , V. , Byrne , A.B. , Feng , B.J. , Pagel , K.A. , Mooney , S.D. , Karchin , R. , O’Donnell-Luria , A. , Harrison , S.M. , Tavtigian , S.V. , Greenblatt , M.S. , et al. ( 2022 ). Calibration of computational tools for missense variant pathogenicity classification and ClinGen recommendations for PP3/BP4 criteria . American journal of human genetics 109 , 2163 – 2177 . doi: 10.1016/j.ajhg.2022.10.013 . OpenUrl CrossRef PubMed 25. ↵ Whiffin , N. , Minikel , E. , Walsh , R. , O’Donnell-Luria , A.H. , Karczewski , K. , Ing , A.Y. , Barton , P.J.R. , Funke , B. , Cook , S.A. , MacArthur , D. , and Ware , J.S. ( 2017 ). Using high-resolution variant frequencies to empower clinical genome interpretation . Genetics in medicine : official journal of the American College of Medical Genetics . doi: 10.1038/gim.2017.26 . OpenUrl CrossRef PubMed 26. ↵ Oza , A.M. , DiStefano , M.T. , Hemphill , S.E. , Cushman , B.J. , Grant , A.R. , Siegert , R.K. , Shen , J. , Chapin , A. , Boczek , N.J. , Schimmenti , L.A. , et al. ( 2018 ). Expert specification of the ACMG/AMP variant interpretation guidelines for genetic hearing loss . Human mutation 39 , 1593 – 1613 . doi: 10.1002/humu.23630 . OpenUrl CrossRef PubMed 27. DeMille , D. , McDonald , J. , Bernabeu , C. , Racher , H. , Olivieri , C. , Cantarini , C. , Sbalchiero , A. , Thompson , B.A. , Jovine , L. , Shovlin , C.L. , et al. ( 2024 ). Specifications of the ACMG/AMP Variant Curation Guidelines for Hereditary Hemorrhagic Telangiectasia Genes—ENG and ACVRL1 . Human mutation 2024 , 3043736 . doi: 10.1155/2024/3043736 . OpenUrl CrossRef 28. Parsons , M.T. , de la Hoya , M. , Richardson , M.E. , Tudini , E. , Anderson , M. , Berkofsky-Fessler , W. , Caputo , S.M. , Chan , R.C. , Cline , M.S. , Feng , B.J. , et al. ( 2024 ). Evidence-based recommendations for gene-specific ACMG/AMP variant classification from the ClinGen ENIGMA BRCA1 and BRCA2 Variant Curation Expert Panel . American journal of human genetics . doi: 10.1016/j.ajhg.2024.07.013 . OpenUrl CrossRef PubMed 29. ↵ ClinGen Genome Resource ( 2024 ). ClinGen PTEN Expert Panel Specifications to the ACMG/AMP Variant Interpretation Guidelines for PTEN Version 3.1.0 . https://cspec.genome.network/cspec/ui/svi/doc/GN003?version=3.1.0 . 30. ↵ ClinGen Genome Resource ( 2023 ). ClinGen Summer Workshop Series 2023: Overview of DRAFT ACMG/AMP v4 Sequence Variant Guidelines . https://clinicalgenome.org/tools/clingen-summer-workshop-series-2023/sept-15-2023/ . 31. ↵ Spurdle , A.B. , Healey , S. , Devereau , A. , Hogervorst , F.B. , Monteiro , A.N. , Nathanson , K.L. , Radice , P. , Stoppa-Lyonnet , D. , Tavtigian , S. , Wappenschmidt , B. , et al. ( 2012 ). ENIGMA--evidence-based network for the interpretation of germline mutant alleles: an international initiative to evaluate risk and clinical significance associated with sequence variation in BRCA1 and BRCA2 genes . Human mutation 33 , 2 – 7 . doi: 10.1002/humu.21628 . OpenUrl CrossRef PubMed 32. ↵ Easton , D.F. , Deffenbaugh , A.M. , Pruss , D. , Frye , C. , Wenstrup , R.J. , Allen-Brady , K. , Tavtigian , S.V. , Monteiro , A.N. , Iversen , E.S. , Couch , F.J. , and Goldgar , D.E. ( 2007 ). A systematic genetic assessment of 1,433 sequence variants of unknown clinical significance in the BRCA1 and BRCA2 breast cancer-predisposition genes . American journal of human genetics 81 , 873 – 883 . doi: 10.1086/521032 . OpenUrl CrossRef PubMed Web of Science 33. ↵ Parsons , M.T. , Tudini , E. , Li , H. , Hahnen , E. , Wappenschmidt , B. , Feliubadalo , L. , Aalfs , C.M. , Agata , S. , Aittomaki , K. , Alducci , E. , et al. ( 2019 ). Large scale multifactorial likelihood quantitative analysis of BRCA1 and BRCA2 variants: An ENIGMA resource to support clinical variant classification . Human mutation 40 , 1557 – 1578 . doi: 10.1002/humu.23818 . OpenUrl CrossRef PubMed 34. Lindor , N.M. , Guidugli , L. , Wang , X. , Vallee , M.P. , Monteiro , A.N. , Tavtigian , S. , Goldgar , D.E. , and Couch , F.J. ( 2012 ). A review of a multifactorial probability-based model for classification of BRCA1 and BRCA2 variants of uncertain significance (VUS) . Human mutation 33 , 8 – 21 . doi: 10.1002/humu.21627 . OpenUrl CrossRef PubMed 35. Vallee , M.P. , Francy , T.C. , Judkins , M.K. , Babikyan , D. , Lesueur , F. , Gammon , A. , Goldgar , D.E. , Couch , F.J. , and Tavtigian , S.V. ( 2012 ). Classification of missense substitutions in the BRCA genes: a database dedicated to Ex-UVs . Human mutation 33 , 22 – 28 . doi: 10.1002/humu.21629 . OpenUrl CrossRef PubMed 36. ↵ Li , H. , LaDuca , H. , Pesaran , T. , Chao , E.C. , Dolinsky , J.S. , Parsons , M. , Spurdle , A.B. , Polley , E.C. , Shimelis , H. , Hart , S.N. , et al. ( 2020 ). Classification of variants of uncertain significance in BRCA1 and BRCA2 using personal and family history of cancer from individuals in a large hereditary cancer multigene panel testing cohort . Genetics in medicine : official journal of the American College of Medical Genetics 22 , 701 – 708 . doi: 10.1038/s41436-019-0729-1 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted March 20, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Availability of benign missense variant “truthsets” for validation of functional assays: current status and a novel systematic approach Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Availability of benign missense variant “truthsets” for validation of functional assays: current status and a novel systematic approach Charlie F. Rowlands , Sophie Allen , Alice Garrett , Miranda Durkie , George J. Burghel , Rachel Robinson , Alison Callaway , Joanne Field , Bethan Frugtniet , Sheila Palmer-Smith , Jonathan Grant , Judith Pagan , Trudi McDevitt , Katie Snape , Helen Hanson , Terri McVeigh , Clare Turnbull , CanVIG-UK medRxiv 2025.03.20.25324299; doi: https://doi.org/10.1101/2025.03.20.25324299 Share This Article: Copy Citation Tools Availability of benign missense variant “truthsets” for validation of functional assays: current status and a novel systematic approach Charlie F. Rowlands , Sophie Allen , Alice Garrett , Miranda Durkie , George J. Burghel , Rachel Robinson , Alison Callaway , Joanne Field , Bethan Frugtniet , Sheila Palmer-Smith , Jonathan Grant , Judith Pagan , Trudi McDevitt , Katie Snape , Helen Hanson , Terri McVeigh , Clare Turnbull , CanVIG-UK medRxiv 2025.03.20.25324299; doi: https://doi.org/10.1101/2025.03.20.25324299 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4421) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15212) Forensic Medicine (30) Gastroenterology (1121) Genetic and Genomic Medicine (6581) Geriatric Medicine (667) Health Economics (996) Health Informatics (4520) Health Policy (1366) Health Systems and Quality Improvement (1611) Hematology (539) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15906) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (667) Neurology (6580) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1141) Occupational and Environmental Health (956) Oncology (3324) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5433) Public and Global Health (9212) Radiology and Imaging (2193) Rehabilitation Medicine and Physical Therapy (1368) Respiratory Medicine (1194) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ff54da1da7cdf88',t:'MTc3OTM4NDU2Ng=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-21T05:10:58.409756+00:00

License: CC-BY-4.0