Full text
35,931 characters
· extracted from
preprint-html
· click to expand
Segpy: a streamlined, user-friendly pipeline for variant segregation analysis | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Segpy: a streamlined, user-friendly pipeline for variant segregation analysis Michael R. Fiorini , Saeid Amiri , Allison A. Dilliott , Dan Spiegelman , Guy Rouleau , Sali M.K. Farhan doi: https://doi.org/10.1101/2024.12.26.24319616 Michael R. Fiorini 1 The Montreal Neurological Institute-Hospital, McGill University , Montreal, Quebec H3A 2B4, Canada 2 Department of Human Genetics, McGill University , Montreal, Quebec H3A 2B4, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Saeid Amiri 1 The Montreal Neurological Institute-Hospital, McGill University , Montreal, Quebec H3A 2B4, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Allison A. Dilliott 1 The Montreal Neurological Institute-Hospital, McGill University , Montreal, Quebec H3A 2B4, Canada 3 Department of Neurology and Neurosurgery, McGill University , Montreal, Quebec H3A 2B4, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Dan Spiegelman 1 The Montreal Neurological Institute-Hospital, McGill University , Montreal, Quebec H3A 2B4, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Guy Rouleau 1 The Montreal Neurological Institute-Hospital, McGill University , Montreal, Quebec H3A 2B4, Canada 3 Department of Neurology and Neurosurgery, McGill University , Montreal, Quebec H3A 2B4, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sali M.K. Farhan 1 The Montreal Neurological Institute-Hospital, McGill University , Montreal, Quebec H3A 2B4, Canada 2 Department of Human Genetics, McGill University , Montreal, Quebec H3A 2B4, Canada 3 Department of Neurology and Neurosurgery, McGill University , Montreal, Quebec H3A 2B4, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: sali.farhan{at}mcgill.ca Abstract Full Text Info/History Metrics Preview PDF Abstract Understanding the role of genetic variants in disease is essential for diagnostics and the advancement of genomic medicine. While the advent of high-throughput sequencing has been matched by the development of sophisticated genomic analysis tools, these packages often involve complex analytical procedures that can be challenging for researchers with limited computational experience. Additionally, modern genomic datasets require high-performance computing (HPC) systems, which may be difficult to implement for unfamiliar users. To address these challenges, we introduce Segpy, a streamlined, user-friendly pipeline for variant segregation analysis that integrates seamlessly with HPC environments. Segpy supports single-family, multi-family, and population-based datasets, allowing researchers to evaluate how genetic variants co-segregate with disease in pedigree-based analyses and compare allele frequencies between affected and unaffected individuals in case-control analyses. To date, the application of Segpy has facilitated the identification of genetic variants contributing to many human diseases and is now available as a publicly available framework. Introduction Traditional pedigree-based analyses, including family trios, quartets, and multi-generational families, have been key to characterizing Mendelian diseases ( 1 ). Specifically, segregation analyses that examine how genetic variants co-segregate with disease can assess inheritance patterns, zygosity, potential risks for future generations, and diagnostics, while also nominating possible genetic underpinnings of disease. However, pedigree-based methods are less effective for studying complex diseases due to the potential involvement of multiple genetic factors with small effect sizes ( 2 ). In contrast, population-based case-control cohorts, which compare allele frequencies between large groups of affected and unaffected individuals, are sufficiently powered to detect common alleles of small effect through genome-wide association studies, while also identifying rare variants through gene burden analyses ( 3 ). Both pedigree-based and population-based studies rely on accurate imputation of allelic counts at millions of variant sites in affected and unaffected individuals, which can present significant challenges for investigators with limited computational expertise. Advancements in understanding the genetic determinants of disease have been driven largely by improvements in high-throughput DNA sequencing technologies, enabling whole-genome and whole-exome sequencing (WGS/WES). Naturally, the rise of high-throughput sequencing technologies has corresponded with the development of sophisticated genomic analysis tools ( 4 - 6 ). However, these comprehensive packages often constitute complex installation, configuration, and execution requirements, posing a steep learning curve for researchers with limited computational experience. Additionally, the size of modern genomic datasets requires high-performance computing (HPC) systems to meet the substantial data processing and storage demands, which can be challenging to implement for unfamiliar investigators. To address these issues, we introduce Segpy—a streamlined, user-friendly pipeline designed for variant segregation analysis, which integrates directly with HPC environments and facilitates the analysis of single-family, multi-family, and population-based datasets. Segpy overview The Segpy pipeline is designed for variant segregation analysis applied to both pedigree-based family cohorts — those involving single or multi-family trios, quartets, or extended families — and population-based case-control cohorts to compute allelic carrier counts at variant sites across study subjects. As input, users must provide a single Variant Call Format (VCF) file describing the genetic variants of all study subjects, which can be optionally annotated with Ensembl Variant Effect Predictor (VEP) ( 7 ) to prioritize genetic determinants, as well as a pedigree file describing the familial relationships among those individuals (if applicable) and their disease status. As output, Segpy computes variant counts for affected and unaffected individuals, both within and outside of families, by categorizing wild-type individuals, heterozygous carriers, and homozygous carriers at specific loci. These counts are organized into a comprehensive data frame, with each row representing a single variant and labeled with the Sample IDs of the corresponding carriers. The data frame serves as the foundation for downstream statistical analyses tailored to the user’s specific research question, including the investigation of variant inheritance patterns, variant co-segregation with disease, and the identification of de novo pathogenic variants for pedigree-based analyses, as well as the assessment of allelic frequencies in both affected and unaffected individuals for population-based case-control analyses. Packaged as a containerized pipeline, Segpy includes all necessary code and software dependencies, simplifying the installation process and standardizing analyses across independent research groups. While implemented as a command-line tool, Segpy was developed with varying levels of computational expertise in mind, prioritizing a streamlined process for investigators. Considering the scale of modern WES and WGS datasets and the computational power required for their analyses, the Segpy pipeline was designed for integration with the users’ HPC clusters, effectively eliminating size limitations on the datasets that can be analyzed. Underlying framework The Segpy pipeline is packaged as an Apptainer (v1.2.4) container, offering a standardized environment that encapsulates all required software and dependencies, ensuring compatibility with all Linux systems ( 8 ). The containerized framework consists of three main components: (i) analytical scripts written in Python (v3.10.2), (ii) customizable parameter and configuration files, and (iii) job submission scripts written in Bash ( Figure 1 ). Users can customize their analysis parameters using the provided configuration file, where they must specify their reference genome build, indicate whether to retain VEP annotations, and choose to utilize a HPC cluster or a local workstation. In the case where users opt to use a HPC cluster, Bash scripts are submitted to create ‘Jobs’—resource requests that depend on the parameters specified in the configuration file, such as CPUs, memory, and time. These jobs are submitted to the HPC system through the SLURM scheduler to run the analytical Python scripts ( 9 ). To meet the requirements of various study designs, the pipeline integrates the ability to analyze pedigree-based and population-based datasets by providing three distinct, yet highly comparable analysis tracks: 1) single-family, 2) multi-family, and 3) case-control. Download figure Open in new tab Figure 1. Segpy framework overview. The Segpy container encapsulates all necessary code and software dependencies and comprises three major components: 1) adjustable parameter and configuration files, 2) job submission scripts for high performance (HPC) environments, and analytical Python scripts. Upon installing the Segpy container, users must define their job mode, electing to use either their HPC system or local workstation. Next, users select their analysi track. To accommodate the diverse requirements of various study designs, Segpy offers three distinct, yet highly comparable analysis tracks: 1) single-family, 2) multi-family, or 3) case-control. All three analysis tracks follow a four-step pipeline. In Step 0, a working directory is created to store configuration files and the outputs of subsequent steps. In Step 1, the user-provided Variant Call Format (VCF) file is converted to the Hail MatrixTable format to efficiently manipulate large scale datasets. In Step 2, variant segregation analysis is performed based on the user-provided pedigree file to generate a comprehensive data frame that details the number of variant carriers according to disease status and familial status, when applicable. In Step 3, the carrier counts data frame is parsed based on user specifications to reduce the computational burden of analyzing the final dataset. Abbreviations: VEP, Variant Effect Predictor. Each of Segpy’s three analytic tracks follow a four-step pipeline: Step 0: Pipeline Setup, Step 1: VCF to MatrixTable, Step 2: Segregation Analysis, and Step 3: Parse Output File. In Step 0, a working directory is created for the analysis, where a modifiable configuration file and a text file documenting the job settings and user-defined analytical parameters are stored to ensure replicability. The outputs from each subsequent analytical step are saved in this working directory. In Step 1, the user-provided VCF file is converted to the Hail MatrixTable format, which is optimized for efficiently storing and manipulating large-scale genomic datasets ( https://github.com/hail-is/hail ). All intermediate MatrixTable files are saved in the working directory for easy access and further analysis. In Step 2, variant segregation analysis is conducted based on the user-provided pedigree file, which defines the Sample IDs, disease status, and familial relationships (where applicable) of the study subjects, alongside the outputs from Step 1 to generate a comprehensive data frame that details the number of variant carriers stratified by disease and familial status, when applicable. For pedigree-based analyses, Segpy iterates through each user-defined family to compute the number of wild-type individuals, heterozygous carriers, and homozygous carriers for a given allele at a specific locus, both within the corresponding family and in the broader cohort. For the case-control analysis, the same process is applied, but without the family-wise iteration, to calculate the number of affected and unaffected allele carriers at the given locus. Importantly, the carrier counts are accompanied by the Sample IDs of heterozygous and homozygous carriers to facilitate downstream analyses. In Step 3, the carrier counts data frame is parsed based on user specifications to reduce the computational burden of analyzing the final dataset. Namely, users can eliminate duplicated variant entries from divergent VEP annotations or remove uninformative characters (e.g., quotes and brackets). The underlying code for Segpy is available on GitHub ( https://github.com/neurobioinfo/segpy ). Running Segpy A comprehensive user guide for running Segpy is available on GitHub ( neurobioinfo.github.io/segpy/ ). The containerized pipeline can be freely downloaded from Zenodo ( https://zenodo.org/records/14503733 ). After installation, users can initiate the pipeline by running Step 0, specifying their desired job mode — “slurm” for HPC clusters or “local” for local workstations — and analytical track — “single_family”, “multi_family”, or “case_control”. The pipeline can be initiated using the following command in the terminal: bash segpy.pip/launch_segpy.sh \ -d /path/to/working/directory \ --steps 0 \ --jobmode [slurm | local] --analysis_mode [single_family | multiple_family | case-control] Users can then execute the full segregation analysis with the following command: bash segpy.pip/launch_segpy.sh \ -d $PWD \ --steps 1-3 \ --vcf $VCF \ --ped $PED \ --parser [general | unique] Application cases Despite the vast diversity of cohort configurations and research questions that accompany genetic analyses, the generalizable framework of the Segpy pipeline can be applied to an array of study designs to enable meaningful insights into the genetic underpinnings of disease. This adaptability and capacity to streamline complex analyses has made the Segpy pipeline a cornerstone for genetic analysis among our group and others at the Montreal Neurological Institute-Hospital (The Neuro) over the past decade. Specifically, we have applied this tool in our work, which resulted in >60 manuscripts over the past decade. Within the pedigree-based analysis track, the Segpy pipeline can accommodate a wide range of inputs, including single-family trios or quartets, multigenerational families, and multi-family pedigrees. Single-family, trio-based segregation analyses have been instrumental in identifying genetic drivers of diseases ( 1 ), leading to a tenfold reduction in candidate variants and a 50% increase in diagnostic yield compared to singleton sequencing ( 10 ). Moreover, multigenerational and extended families, which include a larger number of affected and unaffected individuals, provide valuable genetic context that enhances the detection of rare causal variants and supports more robust co-segregation analyses ( 11 ). Indeed, single-family analyses have proven opportunistic at The Neuro as Segpy has been effectively applied to uncover genetic contributors to a range of diseases, including Pelizaeus-Merzbacher disease ( 12 ), hereditary spastic paraplegia ( 13 ), and essential tremor ( 14 ). Incorporating multiple families with probands exhibiting similar clinical presentations — whether for an unrecognized rare disease or cases of the same condition — further strengthens evidence for a variant’s involvement in disease causation through consistent co-segregation. Segpy has also been extensively applied in multi-family studies investigating a range of neurological phenotypes, such as autism spectrum disorder ( 15 ), bipolar disorder ( 16 , 17 ), schizophrenia ( 18 ), spastic paraplegia ( 19 , 20 ), and restless leg syndrome ( 21 , 22 ), among others. Beyond identifying causal variants and enhancing diagnostic yield, pedigree-based designs are vital for establishing inheritance patterns and assessing risks for current and future family members. They allow for the mapping of Mendelian inheritance patterns, including autosomal dominant, autosomal recessive, and X-linked patterns, while also enabling the filtering of variants with Mendelian inconsistencies. For example, Segpy has been applied to unravel genetic drivers of autosomal recessive hereditary spastic paraplegia ( 23 , 24 ) and has identified recessive CDK5RAP2 variants in patients with isolated agenesis of the corpus callosum ( 25 ). Additionally, pedigree-based analyses facilitated by Segpy have detected de novo variants contributing to disorders such as obsessive-compulsive disorder ( 26 ), schizophrenia ( 27 , 28 ), and developmental and epileptic encephalopathies ( 29 ). In turn, Segpy’s case-control analysis track is specifically designed for population-based cohorts, providing an efficient approach for studying well-characterized phenotypes without the need to enroll family members of affected individuals. While case-control studies are not intended to assess inheritance patterns, they offer a powerful framework for identifying novel disease-associated loci through a variety of downstream statistical analyses. The output from the Segpy pipeline supports multiple analytical approaches, including genome-wide and exome-wide association studies to identify common genetic variants linked to disease, rare variant burden analysis to assess the cumulative impact of rare variants across the cohort, and polygenic risk scoring to evaluate the combined effects of multiple genetic variants on disease susceptibility. Previous case-control analyses using Segpy have implicated rare variants in several diseases, including essential tremor ( 30 ), Parkinson’s disease ( 31 - 34 ), rapid eye movement (REM) sleep behavior disorder ( 35 , 36 ), and congenital hypothyroidism ( 37 ). Additionally, the pipeline has been used to evaluate polygenic risk in amyotrophic lateral sclerosis (ALS) ( 38 ). Conclusion The Segpy pipeline offers a versatile and accessible solution for variant segregation analysis across diverse genomic study designs, including both pedigree-based and population-based cohorts. Notably, its direct integration with HPC environments enhances analytical efficiency by enabling rapid data processing, effectively eliminating size limitations associated with large-scale datasets. While multiple genomic analysis packages catering primarily to expert geneticists have been developed, their complex frameworks can be challenging for labs without specialized genetic expertise looking to supplement their broad research program ( 4 - 6 ). In contrast, Segpy’s targeted focus on segregation analysis, user-friendly design, and containerized framework empowers researchers to conduct sophisticated analyses without demanding intensive learning efforts to familiarize with the framework. To date, Segpy has proven to be exceptionally valuable at The Neuro in identifying genetic contributors to wide range of neurologic disease from both pedigree-based and population-based study designs. We anticipate that our publicly available framework will expand our in-house success across multiple research groups and institutions, ultimately fostering contributions from investigators with diverse research backgrounds to the growing wealth of genetic discovery to advance the promise of genomic medicine. Data availability Data sharing is not applicable to this article as no datasets were generated or analysed during the current study. The code underlying the Segpy pipeline is publicly available on GitHub ( https://github.com/neurobioinfo/segpy ). Conflicts of interest The authors declare no conflicts of interest. Funding statement This work was supported in part by grants from CIHR, Brain Canada, and ALS Canada. Acknowledgments The authors have no acknowledgements to declare. Footnotes ( michael.fiorini{at}mail.mcgill.ca ), ( saeid.amiri{at}mcgill.ca ), ( allison.dilliot{at}mcgill.ca ), ( dan.spiegelman.hsj{at}ssss.gouv.qc.ca ), ( guy.rouleau{at}mcgill.ca ) References 1. ↵ Kanzi AM , San JE , Chimukangara B , Wilkinson E , Fish M , Ramsuran V , et al. Next Generation Sequencing and Bioinformatics Analysis of Family Genetic Inheritance . Front Genet . 2020 ; 11 : 544162 . OpenUrl CrossRef PubMed 2. ↵ Kilpinen H , Barrett JC . How next-generation sequencing is transforming complex disease genetics . Trends in Genetics . 2013 ; 29 ( 1 ): 23 – 30 . OpenUrl CrossRef PubMed 3. ↵ Risch N , Merikangas K. The future of genetic studies of complex human diseases . Science . 1996 ; 273 ( 5281 ): 1516 – 7 . OpenUrl Abstract / FREE Full Text 4. ↵ Pais LS , Snow H , Weisburd B , Zhang S , Baxter SM , DiTroia S , et al. Seqr: a web□based analysis and collaboration tool for rare disease genomics . Human mutation . 2022 ; 43 ( 6 ): 698 – 707 . OpenUrl CrossRef PubMed 5. McKenna A , Hanna M , Banks E , Sivachenko A , Cibulskis K , Kernytsky A , et al. The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data . Genome research . 2010 ; 20 ( 9 ): 1297 – 303 . OpenUrl Abstract / FREE Full Text 6. ↵ Purcell S , Neale B , Todd-Brown K , Thomas L , Ferreira MA , Bender D , et al. PLINK: a tool set for whole-genome association and population-based linkage analyses . The American journal of human genetics . 2007 ; 81 ( 3 ): 559 – 75 . OpenUrl CrossRef PubMed 7. ↵ McLaren W , Gil L , Hunt SE , Riat HS , Ritchie GR , Thormann A , et al. The Ensembl Variant Effect Predictor . Genome Biol . 2016 ; 17 ( 1 ): 122 . OpenUrl CrossRef PubMed 8. ↵ Kurtzer GM , Sochat V , Bauer MW . Singularity: Scientific containers for mobility of compute . PloS one . 2017 ; 12 ( 5 ): e0177459 . OpenUrl CrossRef PubMed 9. ↵ Yoo AB , Jette MA , Grondona M , editors. Slurm: Simple linux utility for resource management . Workshop on job scheduling strategies for parallel processing ; 2003 : Springer . 10. ↵ Wright CF , FitzPatrick DR , Firth HV . Paediatric genomics: diagnosing rare disease in children . Nat Rev Genet . 2018 ; 19 ( 5 ): 325 . OpenUrl CrossRef PubMed 11. ↵ Hansen RD , Christensen AF , Olesen J. Family studies to find rare high risk variants in migraine . J Headache Pain . 2017 ; 18 ( 1 ): 32 . OpenUrl PubMed 12. ↵ Lyahyai J , Bencheikh BOA , Elalaoui SC , Mansouri M , Boualla L A DI-L , et al. Correction to: Exome sequencing reveals a novel PLP1 mutation in a Moroccan family with connatal Pelizaeus-Merzbacher disease: a case report . BMC Pediatr . 2018 ; 18 ( 1 ): 138 . OpenUrl PubMed 13. ↵ Daoud H , Papadima EM , Ouled Amar Bencheikh B , Katsila T , Dionne-Laporte A , Spiegelman D , et al. Identification of a novel homozygous SPG7 mutation by whole exome sequencing in a Greek family with a complicated form of hereditary spastic paraplegia . Eur J Med Genet . 2015 ; 58 ( 11 ): 573 – 7 . OpenUrl PubMed 14. ↵ Merner ND , Girard SL , Catoire H , Bourassa CV , Belzil VV , Rivière J-B , et al. Exome sequencing identifies FUS mutations as a cause of essential tremor . The American Journal of Human Genetics . 2012 ; 91 ( 2 ): 313 – 9 . OpenUrl CrossRef PubMed 15. ↵ Schmilovich Z , Huguet G , He Q , Musa-Johnson A , Douard E , Loum MA , et al. Copy-number variants in the contactin-5 gene are a potential risk factor for autism spectrum disorder . Research in Autism Spectrum Disorders . 2022 ; 99 : 102055 . OpenUrl 16. ↵ Cruceanu C , Schmouth JF , Torres-Platas SG , Lopez JP , Ambalavanan A , Darcq E , et al. Rare susceptibility variants for bipolar disorder suggest a role for G protein-coupled receptors . Mol Psychiatry . 2018 ; 23 ( 10 ): 2050 – 6 . OpenUrl CrossRef PubMed 17. ↵ Cruceanu C , Ambalavanan A , Spiegelman D , Gauthier J , Lafreniere RG , Dion PA , et al. Family-based exome-sequencing approach identifies rare susceptibility variants for lithium-responsive bipolar disorder . Genome . 2013 ; 56 ( 10 ): 634 – 40 . OpenUrl 18. ↵ Ambalavanan A , Chaumette B , Zhou S , Xie P , He Q , Spiegelman D , et al. Exome sequencing of sporadic childhood-onset schizophrenia suggests the contribution of X-linked genes in males . Am J Med Genet B Neuropsychiatr Genet . 2019 ; 180 ( 6 ): 335 – 40 . OpenUrl PubMed 19. ↵ Varghaei P , Estiar MA , Ashtiani S , Veyron S , Mufti K , Leveille E , et al. Genetic, structural and clinical analysis of spastic paraplegia 4 . Parkinsonism Relat Disord . 2022 ; 98 : 62 – 9 . OpenUrl PubMed 20. ↵ Estiar MA , Yu E , Haj Salem I , Ross JP , Mufti K , Akcimen F , et al. Evidence for Non-Mendelian Inheritance in Spastic Paraplegia 7 . Mov Disord . 2021 ; 36 ( 7 ): 1664 – 75 . OpenUrl CrossRef PubMed 21. ↵ Akcimen F , Spiegelman D , Dionne-Laporte A , Gan-Or Z , Dion PA , Rouleau GA . Screening of novel restless legs syndrome-associated genes in French-Canadian families . Neurol Genet . 2018 ; 4 ( 6 ): e296 . OpenUrl Abstract / FREE Full Text 22. ↵ Gan-Or Z , Zhou S , Ambalavanan A , Leblond CS , Xie P , Johnson A , et al. Analysis of functional GLO1 variants in the BTBD9 locus and restless legs syndrome . Sleep Med . 2015 ; 16 ( 9 ): 1151 – 5 . OpenUrl CrossRef PubMed 23. ↵ Gan-Or Z , Bouslam N , Birouk N , Lissouba A , Chambers DB , Veriepe J , et al. Mutations in CAPN1 Cause Autosomal-Recessive Hereditary Spastic Paraplegia . Am J Hum Genet . 2016 ; 98 ( 6 ): 1271 . OpenUrl CrossRef 24. ↵ Leveille E , Estiar MA , Krohn L , Spiegelman D , Dionne-Laporte A , Dupré N , et al. SPTAN1 variants as a potential cause for autosomal recessive hereditary spastic paraplegia . Journal of Human Genetics . 2019 ; 64 ( 11 ): 1145 – 51 . OpenUrl PubMed 25. ↵ Jouan L , Ouled Amar Bencheikh B , Daoud H , Dionne-Laporte A , Dobrzeniecka S , Spiegelman D , et al. Exome sequencing identifies recessive CDK5RAP2 variants in patients with isolated agenesis of corpus callosum . Eur J Hum Genet . 2016 ; 24 ( 4 ): 607 – 10 . OpenUrl CrossRef PubMed 26. ↵ Bornais K , Ross J , Schmilovich Z , Medeiros M , Spiegelman D , Dion P , et al. De novo variant analysis of childhood-onset obsessive-compulsive disorder in the French-Canadian population . Journal of the Neurological Sciences . 2023 ; 455 . 27. ↵ Ambalavanan A , Girard SL , Ahn K , Zhou S , Dionne-Laporte A , Spiegelman D , et al. De novo variants in sporadic cases of childhood onset schizophrenia . Eur J Hum Genet . 2016 ; 24 ( 6 ): 944 – 8 . OpenUrl CrossRef PubMed 28. ↵ Girard SL , Gauthier J , Noreau A , Xiong L , Zhou S , Jouan L , et al. Increased exonic de novo mutation rate in individuals with schizophrenia . Nat Genet . 2011 ; 43 ( 9 ): 860 – 3 . OpenUrl CrossRef PubMed 29. ↵ Hamdan FF , Myers CT , Cossette P , Lemay P , Spiegelman D , Laporte AD , et al. High Rate of Recurrent De Novo Mutations in Developmental and Epileptic Encephalopathies . Am J Hum Genet . 2017 ; 101 ( 5 ): 664 – 85 . OpenUrl CrossRef PubMed 30. ↵ Medeiros M , Liao C , Dilliott A , Ross J , Spiegelman D , Farhan S , et al. Rare copy number variation in TMEM50A implicated in essential tremor . Journal of the Neurological Sciences . 2023 ; 455 . 31. ↵ Senkevich K , Beletskaia M , Dworkind A , Yu E , Ahmad J , Ruskey JA , et al. Association of Rare Variants in ARSA with Parkinson’s Disease . Mov Disord . 2023 ; 38 ( 10 ): 1806 – 12 . OpenUrl PubMed 32. Hu J , Waters CH , Spiegelman D , Fon EA , Yu E , Asayesh F , et al. Gene-based burden analysis of damaging private variants in PRKN, PARK7 and PINK1 in Parkinson’s disease cohorts of European descent . Neurobiol Aging . 2022 ; 119 : 136 – 8 . OpenUrl PubMed 33. Rudakou U , Ruskey JA , Krohn L , Laurent SB , Spiegelman D , Greenbaum L , et al. Analysis of common and rare VPS13C variants in late-onset Parkinson disease . Neurol Genet . 2020 ; 6 ( 1 ): 385 . OpenUrl Abstract / FREE Full Text 34. ↵ Rudakou U , Ouled Amar Bencheikh B , Ruskey JA , Krohn L , Laurent SB , Spiegelman D , et al. Common and rare GCH1 variants are associated with Parkinson’s disease . Neurobiol Aging . 2019 ; 73 : 231 e1-e6. OpenUrl 35. ↵ Sosero YL , Yu E , Estiar MA , Krohn L , Mufti K , Rudakou U , et al. Rare PSAP Variants and Possible Interaction with GBA in REM Sleep Behavior Disorder . J Parkinsons Dis . 2022 ; 12 ( 1 ): 333 – 40 . OpenUrl PubMed 36. ↵ Mufti K , Yu E , Rudakou U , Krohn L , Ruskey JA , Asayesh F , et al. Novel Associations of BST1 and LAMP3 With REM Sleep Behavior Disorder . Neurology . 2021 ; 96 ( 10 ): e1402 – e12 . OpenUrl PubMed 37. ↵ Larrivée-Vanier S , Jean-Louis M , Magne F , Bui H , Rouleau GA , Spiegelman D , et al. Whole-exome sequencing in congenital hypothyroidism due to thyroid dysgenesis . Thyroid . 2022 ; 32 ( 5 ): 486 – 95 . OpenUrl PubMed 38. ↵ Ross JP , Akcimen F , Liao C , Kwan K , Phillips DE , Schmilovich Z , et al. Rare-variant and polygenic analyses of amyotrophic lateral sclerosis in the French-Canadian genome . Genet Med . 2024 ; 26 ( 1 ): 100967 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted December 29, 2024. Download PDF Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Segpy: a streamlined, user-friendly pipeline for variant segregation analysis Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Segpy: a streamlined, user-friendly pipeline for variant segregation analysis Michael R. Fiorini , Saeid Amiri , Allison A. Dilliott , Dan Spiegelman , Guy Rouleau , Sali M.K. Farhan medRxiv 2024.12.26.24319616; doi: https://doi.org/10.1101/2024.12.26.24319616 Share This Article: Copy Citation Tools Segpy: a streamlined, user-friendly pipeline for variant segregation analysis Michael R. Fiorini , Saeid Amiri , Allison A. Dilliott , Dan Spiegelman , Guy Rouleau , Sali M.K. Farhan medRxiv 2024.12.26.24319616; doi: https://doi.org/10.1101/2024.12.26.24319616 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (574) Allergy and Immunology (866) Anesthesia (304) Cardiovascular Medicine (4463) Dentistry and Oral Medicine (445) Dermatology (383) Emergency Medicine (611) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1517) Epidemiology (15253) Forensic Medicine (31) Gastroenterology (1132) Genetic and Genomic Medicine (6622) Geriatric Medicine (669) Health Economics (1002) Health Informatics (4565) Health Policy (1372) Health Systems and Quality Improvement (1617) Hematology (544) HIV/AIDS (1272) Infectious Diseases (except HIV/AIDS) (15938) Intensive Care and Critical Care Medicine (1107) Medical Education (624) Medical Ethics (147) Nephrology (670) Neurology (6643) Nursing (346) Nutrition (1001) Obstetrics and Gynecology (1149) Occupational and Environmental Health (957) Oncology (3351) Ophthalmology (981) Orthopedics (369) Otolaryngology (421) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1699) Pharmacology and Therapeutics (694) Primary Care Research (714) Psychiatry and Clinical Psychology (5465) Public and Global Health (9259) Radiology and Imaging (2212) Rehabilitation Medicine and Physical Therapy (1372) Respiratory Medicine (1199) Rheumatology (598) Sexual and Reproductive Health (716) Sports Medicine (533) Surgery (715) Toxicology (100) Transplantation (289) Urology (266) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a03eca222a062bef',t:'MTc4MDE1NTEyNg=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.