Full text
81,512 characters
· extracted from
preprint-html
· click to expand
Evaluating the impact of compound heterozygosity involving microdeletions and sequence-level variants: findings in autism | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Evaluating the impact of compound heterozygosity involving microdeletions and sequence-level variants: findings in autism View ORCID Profile Worrawat Engchuan , Kara Han , View ORCID Profile Rayssa MMW Feitosa , Nelson Bautista Salazar , View ORCID Profile David J Mager , Shania Wu , Faraz Ali , Alexander Chan , View ORCID Profile Marla Mendes de Aquino , View ORCID Profile Xiaopu Zhou , View ORCID Profile Rulan Shaath , View ORCID Profile Nickie Safarian , View ORCID Profile Bhooma Thiruvahindrapuram , View ORCID Profile Thomas Nalpathamkalam , View ORCID Profile Giovanna Pellecchia , View ORCID Profile Jill de Rijke , Mehdi Zarrei , View ORCID Profile Elemi Breetvelt , View ORCID Profile Stephen W. Scherer , View ORCID Profile Brett Trost , View ORCID Profile Jacob Vorstman doi: https://doi.org/10.1101/2025.10.17.25338215 Worrawat Engchuan 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Worrawat Engchuan Kara Han 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rayssa MMW Feitosa 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada 3 Institute of Medical Science, University of Toronto , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rayssa MMW Feitosa Nelson Bautista Salazar 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site David J Mager 4 Department of Pediatrics, Division of Respiratory Medicine and Allergology, Erasmus Medical Centre—Sophia Children’s Hospital , 3015 CN Rotterdam, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for David J Mager Shania Wu 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Faraz Ali 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alexander Chan 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 5 Department of Human Genetics , McGill CERC Program in Genomic Medicine, Victor Phillip Dahdaleh Institute of Genomic Medicine at McGill University Find this author on Google Scholar Find this author on PubMed Search for this author on this site Marla Mendes de Aquino 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marla Mendes de Aquino Xiaopu Zhou 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xiaopu Zhou Rulan Shaath 6 Laboratory of Genomic Medicine, Research Section , Sidra Medicine, Doha, Qatar 7 College of Health and Life Sciences, Hamad Bin Khalifa University , Doha, Qatar Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rulan Shaath Nickie Safarian 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada 8 Molecular Medicine Program, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Nickie Safarian Bhooma Thiruvahindrapuram 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Bhooma Thiruvahindrapuram Thomas Nalpathamkalam 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Thomas Nalpathamkalam Giovanna Pellecchia 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Giovanna Pellecchia Jill de Rijke 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jill de Rijke Mehdi Zarrei 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Elemi Breetvelt 9 Department of Psychiatry, Temerty Faculty of Medicine, University of Toronto , Toronto, Ontario, Canada 10 Department of Psychiatry, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Elemi Breetvelt Stephen W. Scherer 1 The Centre for Applied Genomics, Hospital for Sick Children , Toronto, Ontario, Canada 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada 11 Department of Molecular Genetics, University of Toronto , Toronto, Ontario, Canada 12 McLaughlin Centre and Department of Molecular Genetics, University of Toronto , Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Stephen W. Scherer Brett Trost 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada 8 Molecular Medicine Program, The Hospital for Sick Children , Toronto, Ontario, Canada 11 Department of Molecular Genetics, University of Toronto , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Brett Trost Jacob Vorstman 2 Program in Genetics and Genome Biology, The Hospital for Sick Children , Toronto, Ontario, Canada 9 Department of Psychiatry, Temerty Faculty of Medicine, University of Toronto , Toronto, Ontario, Canada 10 Department of Psychiatry, The Hospital for Sick Children , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jacob Vorstman For correspondence: Jacob.vorstman{at}sickkids.ca Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Compound heterozygous events involving a chromosome deletion and on the remaining allele a functional DNA sequence-level variant can underpin a range of medical conditions. Most large-scale genetic studies do not include a systematic analysis of such compound heterozygous deletion (DelCH) events. We developed three frameworks: i) traditional burden analysis; ii) deletion-matched burden analysis; and iii) transmission disequilibrium test (TDT), to examine the possible contribution of DelCH to clinical presentations, and report results of their implementation in 9,766 families of autistic individuals. Across the three strategies, we observed enrichment of rare DelCH events in autistic individuals at a nominal significance level for individual tests. Collectively, six genes; CFHR4 , HSDL1 , MYO15A , NEFH , and three olfactory receptor genes; OR1A2 , OR4P2 , were affected by DelCH events in at least two unrelated autistic individuals (and not in unaffected family members), while the reverse analyses identified no genes (p<2.2 x 10 -16 ). Gene set enrichment analysis of the extended network of candidate genes showing a remarkable convergence to processes related to neurogenesis. Our findings suggest a modest role for DelCH events in ASD. The strategies described here are available via a GitHub repository, allowing the research community to examine the role of DelCH in other genome sequencing cohorts. 1. Introduction Many biological processes require finely tuned regulation of gene expression, in which the expression level of each gene (or gene pair) must fall within a certain range 1 , 2 . Genomic copy number variants (CNVs), which include deletions and duplications of DNA, can impact biological function by decreasing or increasing gene dosage. CNVs can be pathogenic if they affect expression of genes that are sensitive to dosage changes 3 . Genes for which just one copy is insufficient to maintain normal biological function are described as ‘haploinsufficient’. The estimated number of human protein-coding genes that are haploinsufficient is just under 3,000 4 , based on a gnomAD (v2.1) 5 loss-of-function observed/expected upper bound fraction (LOEUF) threshold of below 0.35 6 . Consequently, for most human genes, one copy is sufficient for normal biological function 7 – 10 . However, pathogenicity may still arise when the remaining copy of a deletion-impacted gene is also affected by functionally relevant variation 11 . The scenario in which both alleles of a given gene are impacted by different deleterious genetic variants is called compound heterozygosity. In the present work, we focus on an understudied type of compound heterozygosity, namely the co-occurrence of a deletion on one allele and a sequence-level variant (single nucleotide variant [SNV] or indel) affecting the other allele, hereafter referred to as deletion compound heterozygosity (DelCH). Examining DelCH events in disease cohorts is important for several reasons. First, DelCH events are typically under-scored in large-scale genomics studies and thus including them should contribute to achieving a more comprehensive library of disease-associated genetic variation. Second, DelCH may help explain why some individuals with a clinical phenotype have potentially clinically relevant deletions inherited from an unaffected parent. Third, including DelCH in genetic analyses may lead to the identification of recessive disease genes that would otherwise escape detection. Although less frequent than compound heterozygosity caused by sequence-level variants on both alleles, case reports of diseases caused by DelCH have been reported. These include phenotypes ranging from osteogenesis imperfecta to chylomicronemia, often involving inheritance of the deletion from one parent and the sequence-level variant from the other, with both parents phenotypically unaffected 12 – 15 . There are also several studies reporting neurodevelopmental and/or psychiatric phenotypes putatively caused by this genetic mechanism 16 – 20 . Beyond these case reports, systematic analyses of this phenomenon in case-control cohorts are scarce, and include two studies providing tentative evidence for a role for DelCH in schizophrenia 21 and autism spectrum disorder (ASD) 22 . ASD is a neurodevelopmental disorder whose common features include social/communication impairments, repetitive behaviors, and restricted interests 23 . The severity of these characteristics varies greatly—some autistic individuals require high levels of support in their daily lives, whereas others are entirely independent and excel in many societal domains. In addition to its phenotypic variability, studies indicate a high level of genetic heterogeneity. Over the past three decades, the proportion of autistic individuals for whom a contributing rare genetic variant can be identified has increased from approximately 2-4% to approximately 5-25%, depending on the genotyping method and the characteristics of the population examined 24 . The identification of genetic variants associated with ASD is a crucial step toward elucidating the genes and biological mechanisms underpinning this condition 25 . For these reasons, genomics research in ASD has focused on the analysis of large cohorts and the evaluation of different genetic variant types. As a result of these efforts, the number of ASD-associated genes continues to rise, with over one hundred genes now definitively implicated in ASD etiology 26 – 28 . It is also noteworthy that a subset of these genes was identified through the study of recessive mutations being found in mostly consanguineous families 29 , 30 . Variants that substantially affect ASD susceptibility are always rare and include sequence-level variants in ASD-associated genes as well as structural variants such as CNVs, which often affect more than one gene. Common variants individually exert small effects, but in aggregate have larger effects and account for a substantial fraction of ASD liability 31 , although ASD polygenic scores currently explain only about 2.5% of its variance 32 . Increasing efforts are being made to understand the combined influence of common and rare variants 33 – 35 , through the inclusion of the most comprehensive catalogue of variants detectable in the human genome 28 . There is an increasing realisation that an integrative approach to identifying genotype-phenotype associations, considering all classes of genetic variation, is a crucial step towards precision medicine, both in psychiatry and across all medical disciplines 36 . The reliable identification of compound heterozygous events is part of such an integrative approach to genetic analysis. Unfortunately, analytical strategies for examining DelCH on a cohort-wide scale have not yet been developed. There are several reasons for this. First, this analysis requires whole-genome sequencing (WGS) data, which only became available for researchers in recent years. Furthermore, given that the mechanism typically consists of two a priori rare events at the same locus, the role of DelCH in any given condition is likely modest, suggesting that a large sample size is necessary to achieve sufficient statistical power. In addition to this “lightning striking twice” feature, the data preparation is demanding, as each individual presents with unique deletion regions which must be examined individually for sequence-level variants on the other allele. In addition, the selection of variants requires careful consideration; for example, deletions or deleterious sequence-level variants that are known to affect ASD susceptibility in an autosomal dominant fashion are not candidates for DelCH, since they do not require the second hit on the other allele; consequently, patients with dominant pathogenic variants associated with ASD are a priori excluded from the analysis. Finally, defining exactly where on the non-deleted allele the second variant can occur can be challenging, as it depends on the type of data and chosen analytical approach; it can be based on the start and end positions of the deletion, or on the boundaries of any gene(s) affected by the deletion ( Fig. 1 ). The selection criteria for these variables require careful consideration as they impact the results of the analysis. Download figure Open in new tab Fig. 1 Examples of compound heterozygous events involving a deletion on one allele and a sequence-level variant on the other allele. Scenario 1: The coding region of a gene (orange) is partially deleted on one allele, with a sequence-level variant (asterisk) on the other allele that is within both the coding region and the boundaries of the deletion. Scenario 2: same as scenario 1, except the sequence-level variant is outside the boundaries of the deletion. In this study, we developed three complementary strategies for examining the role of genome-wide DelCH in large cohorts of individuals with WGS data, namely, (1) traditional burden analysis, (2) deletion-matched burden analysis and (3) transmission disequilibrium test (TDT) ( Fig. 2 ). While our approaches can be applied to any condition with a putative genetic contribution, we evaluated our methods in a large WGS dataset of 9,766 families of autistic individuals from the MSSNG 28 , 37 , Simons Simplex Collection (SSC) 28 , 37 , 38 , and SPARK 39 cohorts. Along with a discussion of the advantages and disadvantages of each approach, we make the analytical pipeline available to the scientific community via a GitHub repository for further improvements and application in other cohorts. Ultimately, we also analysed and discussed the significance of our results in the tested cohorts. Download figure Open in new tab Fig. 2 The three strategies used to examine the role of DelCH events in disease. Strategy 1 (“traditional burden analysis”) is a burden analysis of DelCH events, comparing children and parents, but agnostic to inheritance status of the variants involved (i.e., inherited, non-transmitted, and de novo ). Strategy 2 (“deletion-matched burden analysis”) compares the burden of DelCH events in inherited deletions between children and their deletion-transmitting parents. Strategy 3 (“TDT”) is a transmission disequilibrium test, which is calculated using the expected and observed inherited SNVs in the non-deletion-transmitting parent. Note that the Expected SNVs (EXP) is the total SNVs of non-deletion-transmitting parents divided by two (diploid genome). For TDT, children can either be disease-affected children alone, or (as implemented here) a comparison between transmission to affected children and transmission to unaffected siblings. DelCH = deletion compound heterozygosity; SNV = single nucleotide variant; Vs = versus. * For traditional burden analysis, the comparison is between children and both parents. For deletion-matched burden analysis, children are compared to the parent from whom the deletion was inherited. This does not apply to TDT, as the comparison was not done between deletion-carriers. 2. Methods 2.1. Data filtering Short-read WGS data were obtained from three large cohorts of ASD families: MSSNG 28 , 37 , SSC 38 , and SPARK 39 . A total of 9,766 families were analyzed in this study. SNVs were detected using the HaplotypeCaller from Genome Analysis Toolkit (GATK 40 ), while deletions were detected using two structural variation detection pipelines: a read-depth based pipeline (CNVnator 41 and ERDS 42 ) and a paired-end mapping-based pipeline (Manta 43 and DELLY 44 ). Standard quality controls were performed as described previously 28 . We only included SNVs in coding sequence (annotated by ANNOVAR 45 as “exonic” or “exonic; splicing” in the typeseq priority column) and those affecting canonical splice sites (i.e., an SNV affecting the 2 bp at either end of an intron). Given that DelCH follows a recessive mode of action, the X chromosome was excluded from our analysis. Given that paired-end mapping-based pipeline may also detect somatic deletions, we removed deletions encompassing at least one high-quality heterozygous SNV call, indicative of likely mosaicism. The remaining germline deletions were those detected by either of the two pipelines, with breakpoints defined by the read-depth-based pipeline if a deletion was detected by both. Gene definitions were obtained from RefSeq release 200 (GRCh38), which contains 19,433 protein-coding genes 46 . The recessive mode of action of DelCH events implies that each component (the deletion and the sequence-level variant) is not expected to be pathogenic on its own. Therefore, we excluded 1,149 families where the ASD phenotype was already attributable to recurrent CNVs, large deletions (>3Mb), or dominant loss-of-function SNVs or indels (i.e., stop gain, frameshift indels, canonical splicing variants) impacting 134 previously reported ASD candidate genes 28 . Only trios/quad families (n=6,902 families) were retained for subsequent analyses to ensure ability to accurately infer deletion and SNV phasing. Furthermore, we excluded genes predicted to be of dominant effect/loss-of-function intolerant (i.e., those with gnomAD v2.1 LOEUF score < 0.35), retaining 16,636 genes. Of these, 8,036 genes were impacted by at least one deletion with a frequency of less than 10% in our dataset. This gene set was the substrate for our main analysis ( Fig. 3 ). Download figure Open in new tab Fig. 3. Filtering procedure for family, gene, and variants. Families: we retained quad or trio families where no ASD-relevant variants were identified in any of the family members. Genes: we retained only protein-coding genes not defined as LoF-intolerant by gnomAD LOEUF cut-off of <0.35). Variants: Exonic (and splicing) SNVs across all MAF range and deletion with MAF < 10% were retained for our analyses. A) pRec and neurodevelopmental disorder (NDD) recessive gene distributions for genes involved in at least one deletion. Low, Medium, and High are based on Genomic England’s neurodevelopmental disorder recessive genes set, while Internal curation refers to our curated neurodevelopmental genes set (supplementary section 1, table S1) . B) Distribution of DelCH events within versus outside deletion boundaries and by frequency. The asterisk represents an SNV on the non-deletion-harbouring allele inside the coding region of a gene (orange bar). 2.1.1. Annotation for recessive intolerance and association with neurodevelopmental conditions We then annotated these 8,036 genes by the gnomAD v2.1 5 pRec score, which predicts the probability of intolerance to biallelic loss-of-function variants, but not to heterozygous loss-of-function variants. Of these, 7,239 genes had pRec scores available, and 2,257 had high pRec scores, the latter indicating a low tolerance to biallelic loss-of-function variants ( Fig. 3A ). We also annotated the genes for their association with neurodevelopmental conditions based on the gene’s inclusion in our curated neurodevelopmental genes set using information from known ASD gene lists 24 , 27 , ClinGen 47 , and OMIM 48 (supplementary section 1) and/or in the Genomics England neurodevelopmental disorder recessive genes set 49 , generating a set of 1,309 genes predicted to be associated with neurodevelopmental risk under a recessive model ( Fig. 3A ; Fig. S1; Table S1). 2.1.2. Minor allele frequency (MAF) thresholds Because DelCH involves two different variants, the individual frequency of these variants (denoted MAF Del and MAF SNV ) may be higher than what is typical for pathogenic variants with dominant effects. However, the frequency of their co-occurrence must be small in order for it to potentially have a large effect size. Thus, we calculated the event frequency as the product of the deletion MAF and the SNV MAF (Freq DelCH = MAF Del * MAF SNV ). We examined DelCH events at different upper bound frequency thresholds, ranging from 0.01 to 0.00005. This approach allows for a lenient inclusion of SNVs and deletions, as long as the product of their MAFs meets the set Freq DelCH threshold for each individual DelCH event. Given the expected stronger deleteriousness of deletions compared to SNVs, MAF Del was capped in keeping with the prevalence of the studied condition. For our study, the maximum value for MAF Del was set at 0.1; a higher MAF threshold would predict homozygous deletions to occur at a frequency above 1%, which would be incompatible with ASD epidemiology 23 . 2.1.3. Identification of DelCH events A DelCH event requires one allele to be deleted and the other allele to be affected by a functional SNV. Consequently, the deletion and SNV can either be inherited separately from each parent, or one or both can exist only in the child due to de novo event(s). For example, the deletion-matched burden analysis ( Fig. 2 ) relies on deletions inherited from one parent, in which case the SNV was either inherited from the other parent or is de novo . DelCH events in parents necessarily involve either a deletion transmitted to their children, and therefore a non-transmitted SNV on the remaining allele, or vice-versa . Using this framework, we identified a total of 147,737 DelCH events in our dataset ( Fig. 3 ). Due to phasing limitations, only certain subsets of these DelCH events were included in each strategy (supplementary section 2). 2.1.4. Within-family comparison of DelCH events and SNV stratification The majority of parental couples share the same ancestry (>80% in a 2015 US study 50 and 94% in the current study). Given that all analytical approaches proposed in this study are based on within-family comparisons, we expect minimal confounding bias conferred by population structure – which includes differences in structural variant haplotypes 51 ,- and therefore no correction for population structure is incorporated in our analyses. For all three approaches, statistical analyses were performed while stratifying by the functional effect of the SNV involved in the DelCH event. Specifically, SNVs were divided into five groups: i) loss-of-function variants (LoF; i.e., stop gain, frameshift indels, and canonical splicing variants), ii) missense variants (i.e., nonsynonymous SNVs), iii) damaging missense variants (i.e., missense variants predicted to be deleterious by at least four out of nine algorithms/conservation scores (SIFT 52 <0.05, PolyPhen2 53 ≥0.9, PROVEAN 0) 56 , iv) LoF and damaging missense variants combined, and v) synonymous variants. For brevity, we abbreviate the five functional variant types as LoF, Miss, DMiss, LoF_DMiss, and Syn, respectively. 2.2. DelCH event comparison strategies Following these filtering steps, the DelCH events can be compared between cases (children, which may be affected or unaffected) and controls (unaffected parents). We used three different strategies to examine possible association of DelCH with ASD ( Fig. 2 ): (1) “Traditional burden analysis”, (2) “Deletion-matched burden analysis”, and (3) “Transmission Disequilibrium Test” (TDT). For all three strategies, we performed analyses separately for each functional variant type described above, with Syn considered to be a negative control. 2.2.1. Traditional burden analysis: Within-family comparison of DelCH burden between children and their parents Traditional burden analysis compares the rate of DelCH events between children and their parents ( Fig. 2 , strategy 1). This strategy includes all deletions ( de novo , inherited, and non-transmitted). Its main advantage is the inclusion of a larger number of individuals. However, this comes at the expense of a loss of distinction between de novo and inherited variants, as well as of phasing information for de novo deletions (in children) and non-transmitted deletions (in parents) (see supplementary section phasing considerations). Consequently, when the SNV is outside the deletion boundaries, it cannot be assumed that the two variants are on different alleles (and thus constitute a DelCH event). False positives, i.e., the occurrence of the deletion and SNV on the same allele, could negatively affect the signal to noise ratio in the data. While we recognise such limitation of traditional burden analysis, we estimated the rate of potential false DelCH events in our data at just 4.5% of the total number of DelCH events. We used logistic regression, stratified by family, to test the hypothesis of an increased burden of DelCH in cases (autistic individuals) compared to controls (parents). Given that each individual presents with a unique set of deletions for this analysis, the totality of sequence on the remaining alleles of deleted regions varies per individual, which affects the a priori likelihood of the occurrence of DelCH events - the more sequence is deleted, the higher the probability of additional variation on the remaining allele. Consequently, the a priori DelCH probability under the null hypothesis cannot be assumed to be identical between the comparison groups. Thus, covariates for statistical correction in traditional burden analysis included the total length of coding sequence (bp) of all genes affected by deletions, and sex. If data are available for unaffected siblings, the same comparison can be made between parents and their unaffected children. 2.2.2. Deletion-matched burden analysis: A burden analysis of DelCH events in inherited deletions comparing children to their deletion-transmitting parents The second strategy focuses exclusively on deletions in children that are inherited. The haploid sequence of the remaining alleles of each inherited deletion in all children is queried for SNVs to identify the collective burden of DelCH events. Similarly, the collective burden of SNVs in the haploid sequence of the identical set of remaining alleles in the deletion-transmitting parents is determined ( Fig. 2 , strategy 2), allowing for a direct comparison between children and deletion-transmitting parents using conditional logistic regression stratified by the deletion. If the data include unaffected siblings, as is the case here, the same analysis can be performed starting with inherited deletions identified in unaffected children. Note that this approach provides unambiguous allelic phasing; given that the allele with the deletion is transmitted from the deletion-transmitting parent to the child, the remaining allele cannot be shared in this parent-child pair, allowing for the comparison of sequence-level variants in exons of any gene affected by the deletion, both within and outside deletion boundaries ( Fig. 1 , scenarios 1 and 2, see supplementary section phasing considerations). An additional advantage of this approach is that the location and amount of queried deleted sequence is identical between children and parents, thus the a priori DelCH probability under the null hypothesis can be assumed to be identical between the comparison groups (in contrast with traditional burden analysis). 2.2.3. Transmission disequilibrium test (TDT) of SNVs from non-deletion transmitting parents making up DelCH events in the children Although traditional burden analysis allows for a larger sample size compared to deletion-matched burden analysis, the potential gain of statistical power is affected by the lack of complete phasing information, which may reduce the signal to noise ratio. To overcome this, the third strategy employs a transmission disequilibrium test (TDT) to examine whether SNVs contributing to a DelCH event are transmitted to autistic individuals at a rate higher than expected ( Fig. 2 , strategy 3). The vast majority of deletions (97.6%) are inherited, in which case the SNV is transmitted by the parent who does not transmit the deletion. Using Fisher’s Exact Test, the observed transmission rate (OBS) was estimated for a specified type of SNV (“target SNVs”: LoF, DMiss, etc.), while the expected transmission rate (EXP) was estimated on the remaining variant types (“non-target SNVs”). For TDT, homozygous variants in the parents are excluded, as they violate transmission equilibrium. Similarly, scenarios wherein variants are identified in both parents (implying a DelCH in one parent) are also excluded, as this suggests non-pathogenicity of the DelCH event. To boost the statistical power of the TDT, we also incorporate unaffected sibling data, specifically by testing for over-transmission in autistic individuals and under-transmission in unaffected siblings (see equations below). with: 2.3. Assessing DelCH in ASD across strategies To evaluate a potential enrichment of DelCH events in ASD, we calculated the number of genes affected by DelCH in at least two unrelated autistic individuals, while not observed in any unaffected family member. We also calculated the reverse: genes involved in DelCH events in at least two unaffected siblings and not in any individual with ASD. We used permutation to assess statistical significance of any difference between the two results (supplementary section 3). 2.4 Gene set enrichment analysis on the candidate genes To explore common biological pathways among the candidate genes identified from the section 2.3 , we used GeneMania to expand the candidate gene list, by 50 genes using information such as gene co-expressions, genetic interactions, and physical interactions, etc. 57 The different interaction databases were weighted equally in the GeneMania analysis with the option of “equal by data type” selected. Using the extended gene list, we performed gene set enrichment analysis on Gene Ontology gene sets that have the size between 50-2,000 genes using g:Profiler 58 . 3. Results We developed three analytical strategies to investigate the role of DelCH events in disease etiology and implemented these methods using genomic data from ASD cohorts. After data filtering, we identified 147,737 DelCH events in 8,617 families from MSSNG, SSC, and SPARK. Of these, 30,203 events (20%) consisted of SNVs occurring within the deletion boundaries ( Fig. 1 , scenario 1; Fig. 3B ), allowing unambiguous phasing of the SNV and the deletion. In the remaining 117,534 DelCH events (80%), sequencing data from the family members was used to inform phasing of DelCH events where possible ( Fig. 3B ). Of these, 93,910 were DelCH events with Freq DelCH <1% ( Fig. 3B ). Fourteen percent of deletions involved in DelCH events, are full gene deletion, 46% partially delete at least 10% of the gene body, while 40% overlap less than 10% of the gene body (Table S2). Given that this dataset included both affected and unaffected children, we performed two iterations of traditional and deletion-matched burden analyses. In the first iteration, we compared the DelCH burden in autistic individuals to that in their parents. As a negative control, we then repeated this approach comparing the DelCH burden in unaffected siblings to their parents. Similarly, we carried out two iterations of TDT, allowing us to compare deviations from the equilibrium in transmissions from parents to their children with ASD versus transmissions from parents to their unaffected children. An overview of our findings is presented in Fig. 4 . Download figure Open in new tab Fig. 4. Results from the three proposed strategies in A) autistic individuals vs parents, B) unaffected siblings vs parents, and C) transmission disequilibrium test where we combined the data from autistic individuals and unaffected siblings. Colors indicate the direction of effect of DelCH events, with red indicating enrichment and blue indicating depletion. A single asterisk indicates a trend toward significant with p<0.1, and a double asterisk indicates a nominal significance level (p<0.05). The size of the circles represents the effect size on a logarithmic scale as indicated in the bottom right. 3.1. Traditional burden analysis Comparing DelCH events involving all deletions (i.e., inherited, de novo , and non-transmitted) between autistic individuals and parents (significance expressed as one-sided Wald’s p-values, with p 0.9 genes with marginal p-values at Freq DelCH < 1% (OR=1.59, p=0.050) and <0.5% (OR=1.76, p=0.060). Unexpectedly, we found a slightly higher burden of DelCH events involving Syn variants in both autistic individual-parent and unaffected sibling-parent comparisons, with ORs range between 1-1.1 in both cases. In our reiteration of the traditional burden analysis using unaffected siblings, we also found a depletion of DelCH events involved LoF and LoF_DMiss in NDD genes and genes with pRec>0.9, although not statistically significant, as well as a nominally significant depletion of DelCH events involved Miss variants at pRec≤0.9 and event frequency cut-offs of 0.1%, 0.05%, and 0.01% (OR ranges between 0.95-0.96). 3.2. Deletion-matched burden analysis Again, all p-values reported in this section are from one-sided Wald’s tests, depicting nominal significance levels uncorrected for multiple testing. We observed a nominally significant enrichment of DelCH events in autistic individuals compared to deletion-transmitting parents in DelCH events involving a LoF or missense variant in genes predicted to be intolerant to biallelic LoF (pRec>0.9) with event frequency less than 0.01% (Odds Ratio;OR LoF =2.14, p LoF =0.048; OR Miss =1.10, p Miss =0.043). Limiting the analysis to DelCH events affecting genes in the NDD gene list showed enrichment in autistic individuals of extremely rare DelCH events (frequency < 0.01%) across variant types, but this effect did not reach statistical significance except for missense variants (OR Miss =1.33, p Miss =0.031). Although not statistically significant, we also observed a general depletion of DelCH events in genes more tolerant to biallelic variation among autistic individuals, across most variant types, except for damaging missense variants, which showed a near-null effect. (OR DMiss =1.01) ( Fig. 4A ). Repeating this analysis in unaffected siblings and their parents, we observed a significant depletion of DelCH events involving DMiss and LoF_DMiss SNVs in NDD genes in any event frequency, as well as LoF and LoF_D Miss in genes intolerant to biallelic LOF (pRec>0.9) with less stringent event frequency cut-offs ( Fig. 4B ). 3.3. TDT The p-values reported in this section is based on one-sided Fisher’s exact test. We observed a significant over-transmission of variants in DMiss and LoF_DMiss categories involved in DelCH events affecting NDD genes, across all event frequency cut-offs (OR DMiss =[1.4-1.76], p DMiss <0.05; OR LoF_DMiss =[1.37-1.7], p LoF_DMiss 0.9), results showed a trend toward significance of over-transmission of LoF (OR=1.77, p=0.076), DMiss (OR=1.2, p=0.089), and a significant over-transmission of LoF_DMiss (OR=1.25, p=0.034) involved in DelCH events with frequency < 0.01%. As a post-hoc analysis, we repeated the TDT analysis but performed it separately for autistic individuals and unaffected siblings. We found that all significant results were not only the result of over-transmission in autistic individuals, but also of under-transmission in unaffected siblings, suggesting negative selection on the involved DelCH events (Fig. S2). 3.4. ASD candidate gene prioritization through DelCH events After identifying genes affected by DelCH events in multiple unrelated individuals, we found seven genes (Table S3) affected by DelCH in two unrelated autistic individuals and no unaffected family members. In contrast, no genes were identified when applying the same criteria to unaffected siblings (compared to all other family members). Results from our permutation analyses indicated a very low probability of finding this case-control difference by chance (one-sided Wilcoxon rank-sum test, p<2.2 x 10 -16 ; supplementary section 3; Fig. S3). We examined the DelCH event in the seven genes in the Integrative Genomics Viewer (IGV) 59 to confirm the CNV and SNV calls for the entire family if the Compressed Reference-oriented Alignment Map (CRAM) file is available (Fig. S5). With IGV visualization, we found the inherited deletion in SP0140142 sample having inaccurate breakpoint. The deletion was no longer overlapping OR10A7 gene after breakpoint correction. Thus, the OR10A7 gene was removed from the candidate genes leaving only six genes shown in the Table 1 . View this table: View inline View popup Download powerpoint Table 1. List of DelCH events impacting the six genes affected by DelCH in two unrelated autistic individuals and no unaffected family members. Each unique deletion represents a deletion in one sample and the DelCH event can involve more than one SNV. Remarks; Inh = Inheritance, Freq = frequency, POS = position, M = maternal, P = paternal, D = de novo, A = ambiguous, FD=full-gene deletion, PD=partial-gene deletion (>50% of the gene body is deleted), FS=less than 50% of gene body is deleted but causes a frame-shift, SL=less than 50% of the gene body is deleted but causes a stop-loss. Of the six prioritized genes, OR1A2 and OR5P2 are olfactory receptor genes. Clinically, olfactory disturbances are frequently observed as part of broader sensory abnormalities in many autistic individuals and likely contribute to the commonly observed rigid food preferences 60 . Despite solid evidence for olfactory dysfunction in ASD 61 , the nature of this association remains largely unknown. Possibly, changes in olfactory function may influence social behaviors, and/or variation in some olfactory genes may alter aspects of brain development directly associated with the ASD phenotype. Of note, two other olfactory receptor genes have been associated with ASD, OR52M1 62 – 64 and OR2M4 65 , both of which are strong ASD gene candidates according to SFARI Gene. Of the three olfactory receptor genes prioritized here, OR1A2 was previously associated with neuroticism 66 . MYO15A encodes myosin, a motor protein involved in actin organization and preservation of cochlear hair cells and their stereocilia 67 . Autosomal recessive variants in this gene have been associated with hearing loss 68 and Usher syndrome, while deletions of 17p11.2 involving MYO15A cause Smith-Magenis syndrome. Both Usher and Smith-Magenis syndrome are associated with increased rates of ASD and/or autistic features 69 , 70 . Interestingly, a possible role of MYO15A in ASD was previously suggested due to its involvement in a compound heterozygous event identified in monozygotic twins with ASD 71 . HSDL1 encodes an inactive enzyme of the short-chain dehydrogenase/reductase superfamily 72 . The protein predominantly localizes to mitochondria, mediated by mitochondrial localization signals encoded in both the amino and carboxyl terminals. However, its biological function in the mitochondria remains to be elucidated. Although HSDL1 lacks enzymatic activity, it is strongly conserved across vertebrate evolution, implying that the protein might perform an essential, but still uncharacterized, process. There is accumulating evidence for a role of mitochondrial dysfunction in ASD 73 . NEFH encodes the heavy chain of neurofilaments—the key cytoskeletal component that provides structural support to axons 74 , 75 . Both light and heavy chains are potential biomarkers for axonal damage 76 – 78 , and several studies have reported elevated plasma neurofilament levels in autistic individuals 79 , 80 . In our analysis, both deletions are short and mainly encompass the C-terminal region of NEFH . One of the deletions also includes the last exon of the adjacent gene THOC5 . Although observed in unrelated individuals, both DelCH events involved the same missense variant (g.22-29490054 A > C; p.E805A), consistent with its high gnomAD allele frequency (0.154). Such a high allele frequency predicts that homozygosity of the missense variant would be expected to occur at a substantial rate in the population and thus is likely benign. We hypothesize therefore that pathogenicity of NEFH compound heterozygosity may exist when missense variation on one allele co-occurs with loss of function of the other allele (in this study, a deletion). There are precedents for this mechanism; e.g., congenital sensorineural hearing loss caused by compound heterozygosity affecting GJB2 involving a rare truncating and a relatively commonly occurring missense variant 81 . Homozygosity of the missense variant can lead to mild to moderate hearing loss but is also found in individuals with normal hearing 82 , 83 . 81 . Homozygosity of the missense variant can lead to mild to moderate hearing loss but is also found in individuals with normal hearing 82 , 83 . Autosomal recessive retinal dystrophy can be caused by deep intronic variants in trans with a loss of function variant in the ABCA4 gene 84 . Similarly, compound heterozygosity underpinning oculocutaneous albinism can involve truncating variants in the gene TYR in trans with missense variants, which by themselves have ∼4% predicted homozygosity in the population 85 . Finally, CFHR4 encodes a plasma protein that regulates the complement system and is primarily expressed in the liver 86 , 87 . Although the two observed DelCH events in our analysis mainly affect CFHR4, one of the deletions also contains the adjacent gene CFHR1 . Elevated levels of CFHR4 have been reported in individuals with Down Syndrome 88 and CFHR4 has been identified as one of several immune-related plasma proteins significantly associated with psychiatric disorders in a recent Mendelian randomization study combining proteomics and GWAS findings in schizophrenia, bipolar disorder and depression 89 . 3.5 Gene set enrichment analysis shows prominent involvement of neurogenesis We performed gene set enrichment analysis on the GeneMania-extended network of the six candidate genes and their 50 neighbouring genes (Fig. S6). We identified 32 significantly enriched gene sets (adjusted p<0.05; Table S4) showing a remarkable convergence to processes related to neurogenesis (e.g., neuron and glial cell development, neuron projection, and sensory organ development; Table S4). Out of the 32 enriched gene sets, 13 gene sets involved all candidate genes (either as a membership of the gene set or the member gene is directly connected to the candidate gene). Interestingly, there are common genes linking candidate genes in the network; GNGT1 links olfactory receptor genes and CFHR4 , CDC42 links CFHR4 and HSDL1 , AHI1 links HSDL1 and MYO15A , and NEFM and STXBP1 link MYO15A and NEFH . This finding highlights how candidate genes with their unique biological function might work closely with each other at higher level in the development of nervous system. 4. Discussion We report three analytical approaches to examining the role of DelCH in disease, and report results from their implementation in a large ASD family dataset. The strongest and most consistent signal across the three methods was observed with LoF variants affecting genes with high predicted intolerance under a recessive model (pRec>0.9). In deletion-matched burden analysis and TDT, these approaches yielded nominal p-values <0.01 in the lower DelCH frequency range (<0.01%), while in traditional burden analysis, similar p-values were recorded in the higher event frequency range (0.5-1%). As shown in Fig. 4 , this signal was reasonably consistent across the three methods. Intersecting DelCH events with the a priori selected NDD gene set showed enrichment in autistic individuals and depletion in unaffected siblings. While the number of observed DelCH events involving LoF sequence-level variants was too small to allow meaningful observations on individual genes, results from our permutation indicated a very low probability of finding the observed enrichment in ASD by chance (p<2.2 x 10 -16 ). Collectively, our findings indicate a modest enrichment of rare (<0.01%) DelCH events in autistic individuals. Not unexpectedly, DelCH events are rare, requiring large datasets to fully expose their impact on ASD liability. Based on our observations, we conclude that the strongest signals of DelCH events can be expected for LoF variants affecting genes with high predicted intolerance under a recessive model (pRec>0.9). Our a posteriori power analysis predicts 80% power to detect significant signals (α=0.05) for variants meeting these criteria with sample sizes of 25,943 and 41,668 for traditional and deletion-matched burden analyses, respectively, and 14,554 families for TDT (supplementary section 4; Fig. S4). Note that our power analysis is specific to ASD, and therefore its applicability to conditions with different genetic architectures may be limited. Several observations lend circumstantial evidence to our findings: i) genes with high pRec values generated the strongest signals, ii) the enrichment in TDT was observed in LoF and damaging missense variants, but not for missense or synonymous variants, and iii) the observed enrichment of DelCH in autistic individuals did not emerge when traditional and deletion-matched burden analyses were applied to unaffected individuals. Finally, we identified six genes ( CFHR4 , HSDL1 , MYO15A , NEFH , and three olfactory receptor genes, OR1A2 , OR4P2 ) affected by DelCH events in two unrelated autistic individuals, while not observed in non-affected family members. In contrast, applying the same criteria to unaffected siblings, no genes were identified. While evidence linking each of these candidate genes to ASD specifically is tentative, our gene set enrichment analysis strongly suggests that when considered together, they point towards neurogenesis, consistent with our current understanding of the pathophysiology of ASD 90 – 93 . Nevertheless, our study has several limitations. First, the ASD dataset examined here is reasonably large in absolute terms but limited in size due to the rarity of the event under study. As a result, our analysis was insufficiently powered to robustly reject the null hypothesis, i.e. the involvement of specific genes in ASD due to DelCH events. In light of the nominal p-values reported for individual comparisons, the prioritization of the seven genes should be considered hypothesis-generating, requiring replication in other studies. Indeed, based on our power analysis, we estimate that sample sizes in the range of 26,000 – 42,000 individuals for burden analyses, and 15,000 families for TDT would be required to address this question with adequate power. Second, it is important to note that while each of the three proposed strategies maximizes advantages of different dataset properties, they are examining the same dataset and therefore are not fully independent tests. The analytical strength of burden analyses lies in the comparison with controls. The strength of deletion-matched burden analysis is the perfect match of queried deleted sequence between cases and controls, whereas traditional burden analysis allows for a larger number of events to be analyzed. However, due to the phasing limitation of de novo and non-transmitting deletions, traditional burden analysis may introduce a degree of noise into the data for the case of SNVs outside of the deletion boundaries. To fully benefit from traditional burden analysis, phased genotype data generated through technologies like long-read sequencing is required. Additionally, implementation of deletion-matched burden analysis and TDT in other datasets requires the availability of trio data. Our present study focused exclusively on DelCH; future work could extend this approach to other types of compound heterozygous events, while taking into account the specific nature of its genomic components. For example, the impact of a duplication fully encompassing a (set of) gene(s) in combination with an SNV/indel on the other allele cannot assumed to be obligatorily deleterious. In contrast, a duplication involving part of a gene may, combined with an SNV/indel on the other allele, may be hypothesized to have deleterious effects similar to that of a DelCH. In conclusion, findings from our three strategies suggest a modest role of DelCH in ASD and highlight the potential involvement of six genes in ASD under a recessive mode of action. Despite lacking power (the number of families in our analyses was about half of the required number for adequate power), we consistently observed an enrichment of DelCH events in autistic individuals. While individually, our observations were only nominally significant, finding a significantly different number of genes affected at least twice in autistic individuals versus those without ASD provides further statistical support for the hypothesis that DelCH contributes to the genetic etiology of ASD. The analytical pipeline implementing our three complementary strategies for analyzing this specific type of compound heterozygosity is available for application in other studies. Data Availability All data used have been accessible through the 3 databases: MSSNG, SPARK, and SSC autism cohorts. Specific genetic variant events investigated in this study are available as supplementary data also provided with this manuscript. Data availability The 147,737 DelCH events are provided in Table S2. The complete MSSNG and SFARI datasets can be obtained via data access agreements; please see https://research.mss.ng/ and https://www.sfari.org/resource/sfari-base/ for more details. Code availability All the analytical strategies were implemented in R. The scripts developed and used to generate results shown in this study are available on GitHub ( https://github.com/naibank/CHASE ). Author contributions Conceptualization: JV, BTrost, WE. Methodology: WE, EM, BTrost, JV. Software: WE, KH, RMMWF, SW, FA, AC. Investigation: WE, KH, RMMWF, NBS, DJM, SW, FA, AC, MMA, XZ, RS, NS, JR, MZ. Visualization: WE, KH, RMMWF, NBS. Data curation: WE, NBS, BThiruv, TN, GP, BTrost. Funding acquisition: SWS, JV. Supervision: WE, EM, SWS, BTrost, JV. Writing—original draft: WE, KH, RMMWF, NBS, BTrost, JV. Writing—review & editing: WE, KH, RMMWF, NBS, MMA, XZ, EM, SWS, BTrost, JV. WE and JV have accessed and verified the underlying data. All authors read and approved the final version of the manuscript. Competing interests JV serves as a consultant for NoBias Therapeutics Inc. and has received speaker fees for Henry Steward Talks Ltd. SWS has served on the Scientific Advisory Committee of Population Bio and has been involved in Deep Genomics. Intellectual property from aspects of his research held at the Hospital for Sick Children are licensed to Athena Diagnostics and Population Bio. These relationships did not influence content of this manuscript but are disclosed for potential future considerations. Acknowledgements Support from The University of Toronto McLaughlin Centre, the Hospital for Sick Children (SickKids) Foundation, the Ontario Brain Institute, Genome Canada/Ontario Genomics Institute, the Northbridge Chair in Paediatric Research held at the Hospital for Sick Children and University of Toronto (SWS) and the SickKids Psychiatry Associates Chair in Developmental Psychopathology (JV). References 1. ↵ Morrill , S.A. & Amon , A. Why haploinsufficiency persists . Proceedings of the National Academy of Sciences of the United States of America 116 , 11866 – 11871 ( 2019 ). OpenUrl Abstract / FREE Full Text 2. ↵ Lee , C. & Scherer , S.W . The clinical context of copy number variation in the human genome . Expert Rev Mol Med 12 , e8 ( 2010 ). OpenUrl CrossRef PubMed 3. ↵ Chiang , C. , et al. The impact of structural variation on human gene expression . Nature genetics 49 , 692 – 699 ( 2017 ). OpenUrl CrossRef PubMed 4. ↵ Karczewski , K.J. , et al. The mutational constraint spectrum quantified from variation in 141,456 humans . Nature 581 , 434 – 443 ( 2020 ). OpenUrl CrossRef PubMed 5. ↵ Singer-Berk , M. , et al. Advanced variant classification framework reduces the false positive rate of predicted loss-of-function variants in population sequencing data . American journal of human genetics 110 , 1496 – 1508 ( 2023 ). OpenUrl CrossRef PubMed 6. ↵ Gudmundsson , S. , et al. Variant interpretation using population databases: Lessons from gnomAD . Hum Mutat 43 , 1012 – 1030 ( 2022 ). OpenUrl CrossRef PubMed 7. ↵ Lek , M. , et al. Analysis of protein-coding genetic variation in 60,706 humans . Nature 536 , 285 – 291 ( 2016 ). OpenUrl CrossRef PubMed Web of Science 8. Johnson , A.F. , Nguyen , H.T. & Veitia , R.A . Causes and effects of haploinsufficiency . Biol Rev Camb Philos Soc 94 , 1774 – 1785 ( 2019 ). OpenUrl CrossRef PubMed 9. Wilkie , A.O . The molecular basis of genetic dominance . J Med Genet 31 , 89 – 98 ( 1994 ). OpenUrl Abstract / FREE Full Text 10. ↵ Zarrei , M. , MacDonald , J.R. , Merico , D. & Scherer , S.W . A copy number variation map of the human genome . Nat Rev Genet 16 , 172 – 183 ( 2015 ). OpenUrl CrossRef PubMed 11. ↵ Shaikh , T.H . Copy Number Variation Disorders . Curr Genet Med Rep 5 , 183 – 190 ( 2017 ). OpenUrl CrossRef PubMed 12. ↵ Igarashi , Y. , et al. A new mutation causing inherited growth hormone deficiency: a compound heterozygote of a 6.7 kb deletion and a two base deletion in the third exon of the GH-1 gene . Human molecular genetics 2 , 1073 – 1074 ( 1993 ). OpenUrl CrossRef PubMed Web of Science 13. Peces , R. , et al. Severe congenital nephrogenic diabetes insipidus in a compound heterozygote with a new large deletion of the AQP2 gene. A case report . Mol Genet Genomic Med 7 , e00568 ( 2019 ). OpenUrl 14. Schwarze , U. , Cundy , T. , Liu , Y.J. , Hofman , P.L. & Byers , P.H . Compound heterozygosity for a frameshift mutation and an upstream deletion that reduces expression of SERPINH1 in siblings with a moderate form of osteogenesis imperfecta . American journal of medical genetics. Part A 179 , 1466 – 1475 ( 2019 ). OpenUrl PubMed 15. ↵ Huang , Y. , Qin , Y. , Liao , L. & Lin , F . Familial chylomicronemia syndrome caused by compound heterozygous mutation of lipoprotein lipase gene: A case report and review of literature . Clin Chim Acta 537 , 112 – 117 ( 2022 ). OpenUrl 16. ↵ Vorstman , J.A. , et al. A double hit implicates DIAPH3 as an autism risk gene . Molecular psychiatry 16 , 442 – 451 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 17. Siu , W.K. , et al. Unmasking a novel disease gene NEO1 associated with autism spectrum disorders by a hemizygous deletion on chromosome 15 and a functional polymorphism . Behav Brain Res 300 , 135 – 142 ( 2016 ). OpenUrl PubMed 18. Bacchelli , E. , et al. A CTNNA3 compound heterozygous deletion implicates a role for alphaT-catenin in susceptibility to autism spectrum disorder . J Neurodev Disord 6 , 17 ( 2014 ). 19. Phetthong , T. , Khongkrapan , A. , Jinawath , N. , Seo , G.H. & Wattanasirichaigoon , D . Compound Heterozygote of Point Mutation and Chromosomal Microdeletion Involving OTUD6B Coinciding with ZMIZ1 Variant in Syndromic Intellectual Disability . Genes (Basel) 12 ( 2021 ). 20. ↵ Duong , L. , et al. Mutations in NRXN1 in a family multiply affected with brain disorders: NRXN1 mutations and brain disorders . Am J Med Genet B Neuropsychiatr Genet 159B , 354 – 358 ( 2012 ). OpenUrl CrossRef 21. ↵ Vorstman , J.A.S. , et al. Double hits in schizophrenia . Human molecular genetics 27 , 2755 – 2761 ( 2018 ). OpenUrl PubMed 22. ↵ Lin , B.D. , et al. The role of rare compound heterozygous events in autism spectrum disorder . Translational psychiatry 10 , 204 ( 2020 ). 23. ↵ Lord , C. , et al. Autism spectrum disorder . Nat Rev Dis Primers 6 , 5 ( 2020 ). OpenUrl PubMed 24. ↵ Schaaf , C.P. , et al. A framework for an evidence-based gene list relevant to autism spectrum disorder . Nat Rev Genet 21 , 367 – 376 ( 2020 ). OpenUrl CrossRef PubMed 25. ↵ Vorstman , J.A.S. , et al. Autism genetics: opportunities and challenges for clinical translation . Nat Rev Genet 18 , 362 – 376 ( 2017 ). OpenUrl CrossRef PubMed 26. ↵ Fu , J.M. , et al. Rare coding variation provides insight into the genetic architecture and phenotypic context of autism . Nature genetics 54 , 1320 – 1331 ( 2022 ). OpenUrl CrossRef PubMed 27. ↵ Banerjee-Basu , S. & Packer , A . SFARI Gene: an evolving database for the autism research community . Dis Model Mech 3 , 133 – 135 ( 2010 ). OpenUrl FREE Full Text 28. ↵ Trost , B. , et al. Genomic architecture of autism from comprehensive whole-genome sequence annotation . Cell 185 , 4409 – 4427 e4418 ( 2022 ). OpenUrl CrossRef PubMed 29. ↵ Morrow , E.M. , et al. Identifying autism loci and genes by tracing recent shared ancestry . Science 321 , 218 – 223 ( 2008 ). OpenUrl Abstract / FREE Full Text 30. ↵ Gupta , V. , et al. Genetic Variant Analyses Identify Novel Candidate Autism Risk Genes from a Highly Consanguineous Cohort of 104 Families from Oman . Int J Mol Sci 25 ( 2024 ). 31. ↵ Gaugler , T. , et al. Most genetic risk for autism resides with common variation . Nature genetics 46 , 881 – 885 ( 2014 ). OpenUrl CrossRef PubMed 32. ↵ Grove , J. , et al. Identification of common genetic risk variants for autism spectrum disorder . Nature genetics 51 , 431 – 444 ( 2019 ). OpenUrl CrossRef PubMed 33. ↵ Jacquemont , S. , et al. Genes To Mental Health (G2MH): A Framework to Map the Combined Effects of Rare and Common Variants on Dimensions of Cognition and Psychopathology . The American journal of psychiatry 179 , 189 – 203 ( 2022 ). OpenUrl CrossRef PubMed 34. D’Abate , L. , et al. Predictive impact of rare genomic copy number variations in siblings of individuals with autism spectrum disorders . Nat Commun 10 , 5519 ( 2019 ). OpenUrl CrossRef PubMed 35. ↵ Antaki , D. , et al. A phenotypic spectrum of autism is attributable to the combined effects of rare variants, polygenic risk and sex . Nature genetics 54 , 1284 – 1292 ( 2022 ). OpenUrl CrossRef PubMed 36. ↵ Vorstman , J. , Sebat , J. , Bourque , V.R. & Jacquemont , S . Integrative genetic analysis: cornerstone of precision psychiatry . Molecular psychiatry ( 2024 ). 37. ↵ RK, C.Y., et al. Whole genome sequencing resource identifies 18 new candidate genes for autism spectrum disorder . Nature neuroscience 20 , 602 – 611 ( 2017 ). OpenUrl CrossRef PubMed 38. ↵ Fischbach , G.D. & Lord , C . The Simons Simplex Collection: a resource for identification of autism genetic risk factors . Neuron 68 , 192 – 195 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 39. ↵ pfeliciano{at}simonsfoundation.org, S.C.E.a. & Consortium, S . SPARK: A US Cohort of 50,000 Families to Accelerate Autism Research . Neuron 97 , 488 – 493 ( 2018 ). OpenUrl CrossRef PubMed 40. ↵ McKenna , A. , et al. The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data . Genome Res 20 , 1297 – 1303 ( 2010 ). OpenUrl Abstract / FREE Full Text 41. ↵ Abyzov , A. , Urban , A.E. , Snyder , M. & Gerstein , M . CNVnator: an approach to discover, genotype, and characterize typical and atypical CNVs from family and population genome sequencing . Genome Res 21 , 974 – 984 ( 2011 ). OpenUrl Abstract / FREE Full Text 42. ↵ Zhu , M. , et al. Using ERDS to infer copy-number variants in high-coverage genomes . American journal of human genetics 91 , 408 – 421 ( 2012 ). OpenUrl CrossRef PubMed 43. ↵ Chen , X. , et al. Manta: rapid detection of structural variants and indels for germline and cancer sequencing applications . Bioinformatics 32 , 1220 - 1222 ( 2016 ). OpenUrl CrossRef PubMed 44. ↵ Rausch , T. , et al. DELLY: structural variant discovery by integrated paired-end and split-read analysis . Bioinformatics 28 , i333 – i339 ( 2012 ). OpenUrl CrossRef PubMed Web of Science 45. ↵ Wang , K. , Li , M. & Hakonarson , H . ANNOVAR: functional annotation of genetic variants from high-throughput sequencing data . Nucleic Acids Res 38 , e164 ( 2010 ). OpenUrl CrossRef PubMed 46. ↵ O’Leary , N.A. , et al. Reference sequence (RefSeq) database at NCBI: current status, taxonomic expansion, and functional annotation . Nucleic Acids Res 44 , D733 – 745 ( 2016 ). OpenUrl CrossRef PubMed 47. ↵ Rehm , H.L. , et al. ClinGen--the Clinical Genome Resource . N Engl J Med 372 , 2235 – 2242 ( 2015 ). OpenUrl CrossRef PubMed 48. ↵ Amberger , J.S. , Bocchini , C.A. , Schiettecatte , F. , Scott , A.F. & Hamosh , A. OMIM . org: Online Mendelian Inheritance in Man (OMIM(R)), an online catalog of human genes and genetic disorders . Nucleic Acids Res 43 , D789 – 798 ( 2015 ). OpenUrl CrossRef PubMed 49. ↵ Martin , A.R. , et al. PanelApp crowdsources expert knowledge to establish consensus diagnostic gene panels . Nature genetics 51 , 1560 – 1565 ( 2019 ). OpenUrl CrossRef PubMed 50. ↵ Borrell , L.N. , Kodali , H. & Rodriguez-Alvarez , E . Interracial/ethnic marriage and adverse birth outcomes: The effect of neighborhood racial/ethnic composition . Soc Sci Med 270 , 113560 ( 2021 ). 51. ↵ Sudmant , P.H. , et al. An integrated map of structural variation in 2,504 human genomes . Nature 526 , 75 – 81 ( 2015 ). OpenUrl CrossRef PubMed 52. ↵ Ng , P.C. & Henikoff , S . Predicting deleterious amino acid substitutions . Genome Res 11 , 863 – 874 ( 2001 ). OpenUrl Abstract / FREE Full Text 53. ↵ Adzhubei , I.A. , et al. A method and server for predicting damaging missense mutations . Nat Methods 7 , 248 – 249 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 54. ↵ Reva , B. , Antipin , Y. & Sander , C . Predicting the functional impact of protein mutations: application to cancer genomics . Nucleic Acids Res 39 , e118 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 55. ↵ Kircher , M. , et al. A general framework for estimating the relative pathogenicity of human genetic variants . Nature genetics 46 , 310 – 315 ( 2014 ). OpenUrl CrossRef PubMed 56. ↵ Pollard , K.S. , Hubisz , M.J. , Rosenbloom , K.R. & Siepel , A . Detection of nonneutral substitution rates on mammalian phylogenies . Genome Res 20 , 110 – 121 ( 2010 ). OpenUrl Abstract / FREE Full Text 57. ↵ Warde-Farley , D. , et al. The GeneMANIA prediction server: biological network integration for gene prioritization and predicting gene function . Nucleic Acids Res 38 , W214 – 220 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 58. ↵ Reimand , J. , Kull , M. , Peterson , H. , Hansen , J. & Vilo , J. g:Profiler--a web-based toolset for functional profiling of gene lists from large-scale experiments . Nucleic Acids Res 35 , W193 – 200 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 59. ↵ Robinson , J.T. , et al. Integrative genomics viewer . Nat Biotechnol 29 , 24 – 26 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 60. ↵ Scheier , Z.A. , Sturm , K.L. , Colavecchio , J.A. , Pradhan , A. & Otazu , G.H . Role of Odor Novelty on Olfactory Issues in Autism Spectrum Disorder . Genes Brain Behav 23 , e70008 ( 2024 ). OpenUrl CrossRef PubMed 61. ↵ Crow , A.J.D. , et al. Olfactory Dysfunction in Neurodevelopmental Disorders: A Meta-analytic Review of Autism Spectrum Disorders, Attention Deficit/Hyperactivity Disorder and Obsessive-Compulsive Disorder . Journal of autism and developmental disorders 50 , 2685 – 2697 ( 2020 ). OpenUrl CrossRef PubMed 62. ↵ Sanders , S.J. , et al. Insights into Autism Spectrum Disorder Genomic Architecture and Biology from 71 Risk Loci . Neuron 87 , 1215 – 1233 ( 2015 ). OpenUrl CrossRef PubMed 63. Iossifov , I. , et al. The contribution of de novo coding mutations to autism spectrum disorder . Nature 515 , 216 – 221 ( 2014 ). OpenUrl CrossRef PubMed Web of Science 64. ↵ Woodbury-Smith , M. , et al. Mutational Landscape of Autism Spectrum Disorder Brain Tissue . Genes (Basel ) 13 ( 2022 ). 65. ↵ Kuo , P.H. , et al. Genome-Wide Association Study for Autism Spectrum Disorder in Taiwanese Han Population . PLoS One 10 , e0138695 ( 2015 ). OpenUrl PubMed 66. ↵ Kim , H.N. , et al. Genome-wide association study of the five-factor model of personality in young Korean women . J Hum Genet 58 , 667 – 674 ( 2013 ). OpenUrl PubMed 67. ↵ Zheng , K. , et al. Novel compound heterozygous MYO15A splicing variants in autosomal recessive non-syndromic hearing loss . BMC Med Genomics 17 , 4 ( 2024 ). 68. ↵ Morovvati , S. , et al. The clinical and genetic spectrum of twenty-six individuals with hearing loss affected by MYO15A variants . Sci Rep 15 , 14320 ( 2025 ). 69. ↵ Korteling , D. , Musch , J.L.I. , Zinkstok , J.R. & Boot , E . Psychiatric and neurological manifestations in adults with Smith-Magenis syndrome: A scoping review . Am J Med Genet B Neuropsychiatr Genet 195 , e32956 ( 2024 ). OpenUrl PubMed 70. ↵ Dammeyer , J . Children with Usher syndrome: mental and behavioral disorders . Behav Brain Funct 8 , 16 ( 2012 ). 71. ↵ Anitha , A. , et al. Rare Pathogenic Variants Identified in Whole Exome Sequencing of Monozygotic Twins With Autism Spectrum Disorder . Pediatr Neurol 158 , 113 – 123 ( 2024 ). OpenUrl PubMed 72. ↵ Meier , M. , Tokarz , J. , Haller , F. , Mindnich , R. & Adamski , J . Human and zebrafish hydroxysteroid dehydrogenase like 1 (HSDL1) proteins are inactive enzymes but conserved among species . Chem Biol Interact 178 , 197 – 205 ( 2009 ). OpenUrl PubMed 73. ↵ Khaliulin , I. , Hamoudi , W. & Amal , H . The multifaceted role of mitochondria in autism spectrum disorder . Molecular psychiatry 30 , 629 – 650 ( 2025 ). OpenUrl CrossRef PubMed 74. ↵ Yuan , A. , Rao , M.V. , Veeranna & Nixon , R.A. Neurofilaments and Neurofilament Proteins in Health and Disease . Cold Spring Harb Perspect Biol 9 ( 2017 ). 75. ↵ Hoffman , P.N. , et al. Neurofilament gene expression: a major determinant of axonal caliber . Proceedings of the National Academy of Sciences of the United States of America 84 , 3472 – 3476 ( 1987 ). OpenUrl Abstract / FREE Full Text 76. ↵ Heckler , I. & Venkataraman , I . Phosphorylated neurofilament heavy chain: a potential diagnostic biomarker in amyotrophic lateral sclerosis . J Neurophysiol 127 , 737 – 745 ( 2022 ). OpenUrl PubMed 77. Olsson , B. , et al. Association of Cerebrospinal Fluid Neurofilament Light Protein Levels With Cognition in Patients With Dementia, Motor Neuron Disease, and Movement Disorders . JAMA Neurol 76 , 318 – 325 ( 2019 ). OpenUrl PubMed 78. ↵ Backstrom , D. , et al. NfL as a biomarker for neurodegeneration and survival in Parkinson disease . Neurology 95 , e827 – e838 ( 2020 ). OpenUrl CrossRef PubMed 79. ↵ Simone , M. , et al. Serum Neurofilament Light Chain and Glial Fibrillary Acidic Protein as Potential Diagnostic Biomarkers in Autism Spectrum Disorders: A Preliminary Study . Int J Mol Sci 24 ( 2023 ). 80. ↵ He , W.C. , Zhang , X.J. , Zhang , Y.Q. & Zhang , W.J . Elevated serum neurofilament light chain in children autism spectrum disorder: A case control study . Neurotoxicology 80 , 87 – 92 ( 2020 ). OpenUrl PubMed 81. ↵ Chan , D.K. & Chang , K.W . GJB2-associated hearing loss: systematic review of worldwide prevalence, genotype, and auditory phenotype . Laryngoscope 124 , E34 – 53 ( 2014 ). OpenUrl CrossRef PubMed 82. ↵ Chai , Y. , et al. The homozygous p.V37I variant of GJB2 is associated with diverse hearing phenotypes . Clin Genet 87 , 350 – 355 ( 2015 ). OpenUrl PubMed 83. ↵ Kriukelis , R. , et al. The congenital hearing phenotype in GJB2 in Queensland, Australia: V37I and mild hearing loss predominates . European journal of human genetics : EJHG 33 , 208 - 219 ( 2025 ). OpenUrl PubMed 84. ↵ Nassisi , M. , et al. Prevalence of ABCA4 Deep-Intronic Variants and Related Phenotype in An Unsolved “One-Hit” Cohort with Stargardt Disease . Int J Mol Sci 20 ( 2019 ). 85. ↵ Thuma , T.B.T. , Procopio , R.A. , Jimenez , H.J. , Gunton , K.B. & Pulido , J.S . Hypomorphic variants in inherited retinal and ocular diseases: A review of the literature with clinical cases . Surv Ophthalmol 69 , 337 – 348 ( 2024 ). OpenUrl PubMed 86. ↵ Skerka , C. , Chen , Q. , Fremeaux-Bacchi , V. & Roumenina , L.T . Complement factor H related proteins (CFHRs) . Mol Immunol 56 , 170 – 180 ( 2013 ). OpenUrl CrossRef PubMed 87. ↵ Hellwage , J. , Skerka , C. & Zipfel , P.F . Biochemical and functional characterization of the factor-H-related protein 4 (FHR-4) . Immunopharmacology 38 , 149 – 157 ( 1997 ). OpenUrl CrossRef PubMed Web of Science 88. ↵ Veteleanu , A. , et al. Complement dysregulation and Alzheimer’s disease in Down syndrome . Alzheimers Dement 19 , 1383 – 1392 ( 2023 ). OpenUrl PubMed 89. ↵ Dang , X. , Song , M. , Lv , L. , Yang , Y. & Luo , X.J . Proteome-wide Mendelian randomization reveals the causal effects of immune-related plasma proteins on psychiatric disorders . Human genetics 142 , 809 – 818 ( 2023 ). OpenUrl PubMed 90. ↵ Wegiel , J. , et al. The neuropathology of autism: defects of neurogenesis and neuronal migration, and dysplastic changes . Acta Neuropathol 119 , 755 – 770 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 91. Packer , A . Neocortical neurogenesis and the etiology of autism spectrum disorder . Neuroscience and biobehavioral reviews 64 , 185 – 195 ( 2016 ). OpenUrl CrossRef PubMed 92. Willsey , H.R. , Willsey , A.J. , Wang , B. & State , M.W . Genomics, convergent neuroscience and progress in understanding autism spectrum disorder . Nat Rev Neurosci 23 , 323 – 341 ( 2022 ). OpenUrl CrossRef PubMed 93. ↵ Wang , B. , et al. A foundational atlas of autism protein interactions reveals molecular convergence . bioRxiv ( 2024 ). View the discussion thread. Back to top Previous Next Posted October 21, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Evaluating the impact of compound heterozygosity involving microdeletions and sequence-level variants: findings in autism Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Evaluating the impact of compound heterozygosity involving microdeletions and sequence-level variants: findings in autism Worrawat Engchuan , Kara Han , Rayssa MMW Feitosa , Nelson Bautista Salazar , David J Mager , Shania Wu , Faraz Ali , Alexander Chan , Marla Mendes de Aquino , Xiaopu Zhou , Rulan Shaath , Nickie Safarian , Bhooma Thiruvahindrapuram , Thomas Nalpathamkalam , Giovanna Pellecchia , Jill de Rijke , Mehdi Zarrei , Elemi Breetvelt , Stephen W. Scherer , Brett Trost , Jacob Vorstman medRxiv 2025.10.17.25338215; doi: https://doi.org/10.1101/2025.10.17.25338215 Share This Article: Copy Citation Tools Evaluating the impact of compound heterozygosity involving microdeletions and sequence-level variants: findings in autism Worrawat Engchuan , Kara Han , Rayssa MMW Feitosa , Nelson Bautista Salazar , David J Mager , Shania Wu , Faraz Ali , Alexander Chan , Marla Mendes de Aquino , Xiaopu Zhou , Rulan Shaath , Nickie Safarian , Bhooma Thiruvahindrapuram , Thomas Nalpathamkalam , Giovanna Pellecchia , Jill de Rijke , Mehdi Zarrei , Elemi Breetvelt , Stephen W. Scherer , Brett Trost , Jacob Vorstman medRxiv 2025.10.17.25338215; doi: https://doi.org/10.1101/2025.10.17.25338215 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffb94f49ec0e2c5',t:'MTc3OTQ1MDQwMg=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.