Full text
21,493 characters
· extracted from
preprint-html
· click to expand
Detection of clonal hematopoiesis of indeterminate potential via genome or exome sequencing profoundly underestimates disease associations | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Detection of clonal hematopoiesis of indeterminate potential via genome or exome sequencing profoundly underestimates disease associations View ORCID Profile Robert Corty , View ORCID Profile Yash Pershad , View ORCID Profile Caitlyn Vlasschaert , Leo Luo , View ORCID Profile Taralynn Mack , View ORCID Profile Kaushik Amancherla , View ORCID Profile Cassianne Robinson-Cohen , View ORCID Profile Michael Savona , View ORCID Profile Alexander G. Bick doi: https://doi.org/10.1101/2025.08.11.25333294 Robert Corty 1 Department of Internal Medicine, Vanderbilt University Medical Center , Nashville, TN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Robert Corty Yash Pershad 1 Department of Internal Medicine, Vanderbilt University Medical Center , Nashville, TN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yash Pershad Caitlyn Vlasschaert 2 Department of Internal Medicine, Queen’s University , Kingston, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Caitlyn Vlasschaert Leo Luo 3 Department of Radiation Oncology, Vanderbilt University Medical Center , Nashville, TN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Taralynn Mack 4 University of Washington , Seattle, WA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Taralynn Mack Kaushik Amancherla 1 Department of Internal Medicine, Vanderbilt University Medical Center , Nashville, TN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kaushik Amancherla Cassianne Robinson-Cohen 1 Department of Internal Medicine, Vanderbilt University Medical Center , Nashville, TN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Cassianne Robinson-Cohen Michael Savona 1 Department of Internal Medicine, Vanderbilt University Medical Center , Nashville, TN, USA 5 Vanderbilt-Ingram Cancer Center, Vanderbilt University Medical Center , Nashville, TN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Michael Savona Alexander G. Bick 1 Department of Internal Medicine, Vanderbilt University Medical Center , Nashville, TN, USA 5 Vanderbilt-Ingram Cancer Center, Vanderbilt University Medical Center , Nashville, TN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Alexander G. Bick For correspondence: alexander.bick{at}vumc.org Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Clonal hematopoiesis of indeterminate potential (CHIP) occurs when at least 4% of blood cells harbor somatic mutations in leukemogenic genes and is associated with increased risk for cardiovascular disease, malignancy, and mortality. While deep sequencing (>1,000x coverage) is the gold standard for CHIP detection, most large-scale studies rely on shallow genome/exome sequencing (∼35x coverage) from biobanks. However, the sensitivity and specificity of genome-based CHIP detection remain unknown, raising concerns about the accuracy of reported disease associations. We performed both deep targeted sequencing and genome sequencing on identical DNA samples from 9,925 participants to characterize genome-based CHIP detection performance. Genome sequencing showed poor sensitivity (28%) and positive predictive value (44%) compared to deep sequencing, with performance highly dependent on clone size. Simulation studies revealed that these ascertainment errors dramatically reduce statistical power and underestimate true effect sizes by >80%. These findings indicate that genome-based studies profoundly underestimate CHIP-disease associations, necessitating targeted deep sequencing for accurate clinical risk assessment. Main Clonal hematopoiesis of indeterminate potential (CHIP) occurs when ≥4% of nucleated blood cells harbor a somatic mutation in a leukemogenic gene ( 1 ). The gold standard method to detect CHIP is deep (>1,000x) sequencing of a peripheral blood ( 2 , 3 ). Researchers have also detected CHIP using shallow (∼35x) sequencing of genomes or exomes in large biobanks ( 4 ). They found that CHIP is associated with heightened risk for a wide range of diseases including cardiovascular disease, solid-organ malignancy, auto-immune disease, kidney disease, and all-cause mortality ( 5 , 6 ). A critical gap in the field is that the sensitivity and specificity of genome-sequencing-based CHIP detection are not known, nor are the effects of CHIP ascertainment errors on epidemiologic associations. Errors in CHIP ascertainment could render the estimates of CHIP-attributable risk inaccurate. Accurate estimation of the CHIP-attributable risk for disease is critical for clinical applications such as risk stratification, disease monitoring, and treatment selection. To address this gap, we performed both genome sequencing and deep sequencing on the same DNA sample from 9,925 research participants and characterized the testing properties of genome-sequencing-based CHIP detection. Using the empiric sensitivity and specificity of genome-sequencing-based CHIP detection, we simulated how ascertainment error influences the power and precision of CHIP-disease association studies. For deep sequencing, we performed error-corrected targeted sequencing of the exons of CHIP driver genes with median depth after deduplication of ∼1700x ( 2 ). Genome sequencing was performed using the Illumina DNA PCR-Free Prep targeting a median depth of 30x. To detect CHIP, Mutect2 was used to call somatic mutations in CHIP driver regions. We filtered variants based on read depth (≥100 for deep sequencing, ≥ 15 for genome sequencing), variant allele read depth (≥3 for deep sequencing, ≥ 2 for genome sequencing), double-strand support, and inconsistency with germline heterozygosity, and variant allele fraction (VAF) ≥ 2% ( 4 ). Among 9,925 participants, we identified 1,255 (13%) with CHIP by genome sequencing and 1,509 (15%) with CHIP mutations by deep sequencing. We calculated the sensitivity and positive predictive value (PPV) of genome-sequencing-based CHIP calling on a per-variant level using deep-sequencing-based calls as the gold-standard and genome-sequencing-based calls as the index test. Genome-sequencing-based CHIP calling had a sensitivity of 28% (426/1,509), specificity of 94% (8,258/8,794), and PPV of 44% (426/962). These performance metrics were highly dependent on the size of the CHIP clone, as quantified by VAF. Sensitivity was 9% for VAF 2-5%, 32% for VAF 5-10%, 67% for VAF 10-20%, and 86% for VAF > 20% ( Figure 1A ). PPV was 0%, 26%, 44%, and 58% across the same VAF bins ( Figure 1B ). Download figure Open in new tab Figure 1. Performance characteristics of genome-sequencing-based CHIP calling and impact on epidemiologic associations. Empiric performance of genome-sequencing-based CHIP detection compared to the gold standard, deep sequencing, stratified by VAF for (A) sensitivity and (B) positive predictive value. Simulated CHIP-disease associations using logistic regression across odds ratios, with minimum VAF thresholds of 0.02, 0.05, and 0.1 using deep-sequencing-based CHIP detection for (C) statistical power and (D) odds ratio estimation compared to true odds ratio. As above, simulated CHIP-disease associations using logistic regression, but instead using genome-sequencing-based CHIP detection for (E) statistical power and (F) bias in odds ratio estimation compared to true odds ratio. We performed simulations to determine how ascertainment errors influence the power and precision of CHIP associations with prevalent and incident disease. For each scenario, we simulated 100,000 persons with random sex, random age (40-79 years), CHIP status and VAF calibrated to age and repeated the simulation 1,000 times. First, we tested the impact of ascertainment errors in studies of the effect of CHIP on disease prevalence. We simulated disease prevalence based on age, sex, and CHIP status for odds ratios (ORs) ranging from 1.0 to 3.0. Genome-sequencing-based CHIP calls were simulated based on empiric, VAF-dependent sensitivity and specificity reported above. We tested for CHIP-disease association using multivariate logistic regression. With deep-sequencing-based CHIP calls, power was 100% and OR estimation was near perfect ( Figures 1C and Figure 1D ). Using all genome-sequencing-based CHIP calls, the power to detect the association was 5%, 25%, 46%, and 71% for a disease with CHIP-associated OR of 1.5, 2.0, 2.5, and 3.0, respectively. Power was higher when people with genome-sequencing-estimated VAF < 10% were excluded, consistent with the high rate of false positives observed in that group ( Figure 1E ). Using genome-sequencing-based CHIP calls, estimated ORs captured ∼16% of true liability ( Figure 1F ). Second, we tested the impact of ascertainment errors in studies of the effect of CHIP on disease incidence. We simulated age of disease onset and age of censoring based on age, sex, and CHIP status for hazard ratios (HRs) ranging from 1.0 to 3.0. Genome-sequencing-based CHIP calls were simulated as above. We tested for CHIP-disease association using Cox proportional hazards regression. With deep-sequencing-based CHIP calls, power was 100% and the estimated HR captured 73% of the true HR, consistent with a known downward bias in Cox proportional hazards regression ( Supplementary Figure 3 ). Using all genome-sequencing-based CHIP calls, the power to detect the association was 8%, 28%, 53%, and 70% for a disease with CHIP-associated HR of 1.5, 2.0, 2.5, and 3.0, respectively ( Supplementary Figure 3 ). As with prevalence, power was higher when people with genome-sequencing-estimated VAF < 10% were excluded. Estimated HRs using genome-sequence-based CHIP calls captured ∼9% of the true hazard for disease ( Supplementary Figure 3 ). Our study has several implications. First, accurate estimation of the strength of association between CHIP and disease necessitates sensitive CHIP detection, which is not possible with genome-sequencing-based CHIP ascertainment. Even though studies of ever-larger biobanks with genome-sequencing-based CHIP calls can overcome deficits in power, ascertainment errors lead to underestimation of the strength of CHIP-disease association regardless of cohort size. Second, since most well-powered CHIP epidemiology studies reanalyze genome or exome sequencing, the widely cited associations between CHIP and disease risk, such as cardiovascular disease, are significantly underestimated. Studies with targeted CHIP sequencing are necessary to understand the true strength of association between CHIP and disease. Third, people with genome-sequencing-based CHIP calls with VAF < 10% contain enough false positives that removing them from the analysis is more beneficial than including them. While it is widely reported in studies that use genome-or exome-based CHIP detection that CHIP with VAF >10% carry a higher risk for many diseases, this observation is likely due to CHIP ascertainment errors. In summary, errors in CHIP ascertainment in genome and exome-based CHIP calling, lead to ( 1 ) insensitive studies, which can be partially rescued by excluding people with observed VAF < 10%, and ( 2 ) a profound underestimation of the strength of CHIP-disease association, which can be rescued only with improved CHIP ascertainment – that is, by deep, targeted sequencing. Data Availability A table of CHIP mutations detected by deep targeted sequencing and whole-genome sequencing is available for download on github.com/bicklab/wgs_chip_is_spec_but_not_sens. https://www.github.com/bicklab/wgs_chip_is_spec_but_not_sens Footnotes ↵ * RWC and YP (second listed author) are co–first authors. The authorship order reflects that the study was initiated by RWC, who was joined by YP (second listed author) in leading the project. Conflict of Interest: M.R.S. has received honoraria for advisory board membership or consultancy from Bristol Myers Squibb, CTI, Forma, Geron, GlaxoSmithKline/Sierra Oncology, Karyopharm, Ryvu Therapeutics, and Taiho Pharmaceutical; has received research funding from ALX Oncology, Astex Pharmaceuticals, Incyte Corporation, Takeda, and TG Therapeutics; holds equity in Empath Biosciences, Karyopharm, and Ryvu Therapeutics; and has been reimbursed for travel expenses by Astex. References 1. ↵ Jaiswal S , et al. Age-Related Clonal Hematopoiesis Associated with Adverse Outcomes . N Engl J Med . 2014 ; 371 ( 26 ): 2488 – 2498 . OpenUrl CrossRef PubMed 2. ↵ Mack T , et al. Cost-Effective and Scalable Clonal Hematopoiesis Assay Provides Insight into Clonal Dynamics . The Journal of Molecular Diagnostics . 2024 ; 26 ( 7 ): 563 – 573 . OpenUrl PubMed 3. ↵ Stewart CM , et al. Clonal hematopoiesis detection by simultaneous assessment of peripheral blood mononuclear cells, blood plasma, and saliva . Journal of Clinical Investigation . [published online ahead of print: June 19, 2025]. doi: 10.1172/JCI191256 . OpenUrl CrossRef 4. ↵ Vlasschaert C , et al. A practical approach to curate clonal hematopoiesis of indeterminate potential in human genetic datasets . Blood . 2023;blood.2022018825 . 5. ↵ Jaiswal S , Ebert BL . Clonal hematopoiesis in human aging and disease . Science . 2019 ; 366 ( 6465 ): eaan4673 . OpenUrl Abstract / FREE Full Text 6. ↵ Walsh K. The emergence of clonal hematopoiesis as a disease determinant . Journal of Clinical Investigation . 2024 ; 134 ( 19 ): e180063 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted August 16, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Detection of clonal hematopoiesis of indeterminate potential via genome or exome sequencing profoundly underestimates disease associations Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Detection of clonal hematopoiesis of indeterminate potential via genome or exome sequencing profoundly underestimates disease associations Robert Corty , Yash Pershad , Caitlyn Vlasschaert , Leo Luo , Taralynn Mack , Kaushik Amancherla , Cassianne Robinson-Cohen , Michael Savona , Alexander G. Bick medRxiv 2025.08.11.25333294; doi: https://doi.org/10.1101/2025.08.11.25333294 Share This Article: Copy Citation Tools Detection of clonal hematopoiesis of indeterminate potential via genome or exome sequencing profoundly underestimates disease associations Robert Corty , Yash Pershad , Caitlyn Vlasschaert , Leo Luo , Taralynn Mack , Kaushik Amancherla , Cassianne Robinson-Cohen , Michael Savona , Alexander G. Bick medRxiv 2025.08.11.25333294; doi: https://doi.org/10.1101/2025.08.11.25333294 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9220) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffde34f4e65dfa9',t:'MTc3OTQ3NDU4Mg=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.