Full text
126,004 characters
· extracted from
preprint-html
· click to expand
D4Z4End2End: complete genetic and epigenetic architecture of D4Z4 macrosatellites in FSHD, BAMS and reference cohorts | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search D4Z4End2End: complete genetic and epigenetic architecture of D4Z4 macrosatellites in FSHD, BAMS and reference cohorts View ORCID Profile Lucinda C. Xiao , Ayush Semwal , Brianna St John , View ORCID Profile Kathleen Zeglinski , View ORCID Profile Shian Su , View ORCID Profile James Lancaster , View ORCID Profile Shifeng Xue , View ORCID Profile Bruno Reversade , View ORCID Profile Matthew E. Ritchie , View ORCID Profile Frédérique Magdinier , View ORCID Profile Marnie E. Blewitt , View ORCID Profile Quentin Gouil doi: https://doi.org/10.1101/2025.04.24.25326320 Lucinda C. Xiao 1 The Walter and Eliza Hall Institute of Medical Research , Parkville, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Lucinda C. Xiao Ayush Semwal 1 The Walter and Eliza Hall Institute of Medical Research , Parkville, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Brianna St John 1 The Walter and Eliza Hall Institute of Medical Research , Parkville, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kathleen Zeglinski 1 The Walter and Eliza Hall Institute of Medical Research , Parkville, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kathleen Zeglinski Shian Su 1 The Walter and Eliza Hall Institute of Medical Research , Parkville, Victoria, Australia 2 Department of Medical Biology, The University of Melbourne , Parkville, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Shian Su James Lancaster 1 The Walter and Eliza Hall Institute of Medical Research , Parkville, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for James Lancaster Shifeng Xue 3 Department of Biological Sciences, National University of Singapore , Singapore Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Shifeng Xue Bruno Reversade 4 Laboratory of Human Genetics and Therapeutics, King Abdullah University of Science and Technology , Thuwal, Kingdom of Saudi Arabia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Bruno Reversade Matthew E. Ritchie 1 The Walter and Eliza Hall Institute of Medical Research , Parkville, Victoria, Australia 2 Department of Medical Biology, The University of Melbourne , Parkville, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Matthew E. Ritchie Frédérique Magdinier 5 Aix Marseille Univ, INSERM, Marseille Medical Genetics , Marseille, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Frédérique Magdinier Marnie E. Blewitt 1 The Walter and Eliza Hall Institute of Medical Research , Parkville, Victoria, Australia 2 Department of Medical Biology, The University of Melbourne , Parkville, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marnie E. Blewitt Quentin Gouil 1 The Walter and Eliza Hall Institute of Medical Research , Parkville, Victoria, Australia 2 Department of Medical Biology, The University of Melbourne , Parkville, Victoria, Australia 6 Olivia Newton-John Cancer Research Institute , Heidelberg, Victoria, Australia 7 School of Cancer Medicine, La Trobe University , Bundoora, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Quentin Gouil For correspondence: quentin.gouil{at}onjcri.org.au Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract The D4Z4 locus is a macrosatellite array on chromosome 4q that normally comprises 8 to >100 3.3-kb repeat units. Its size and repetitiveness render it refractory to most sequencing technologies; consequently its genetic and epigenetic architectures remain incompletely understood despite their relevance to human health, in particular facioscapulohumeral muscular dystrophy (FSHD). Molecular diagnosis for FSHD following clinical description currently involves complex, multi-step and low resolution assays, aiming at identifying contractions on permissive haplotypes (FSHD type 1) or epigenetic reactivation (FSHD type 2) due to pathogenic variants in the epigenetic machinery (most often in SMCHD1 ). Here we leverage ultra-long whole-genome and Cas9-targeted sequencing to develop a fast and accurate workflow, D4Z4End2End, to comprehensively charactere the genetics and methylation of D4Z4 alleles. We apply it to samples from patients affected by FSHD1, FSHD2, and another disease caused by SMCHD1 variants, Bosma arhinia microphthalmia syndrome (BAMS), as well as publicly-available data from the 1000 Genomes Project and Human Pangenome Reference Consortium. We attain high read depth sequencing of full-length D4Z4 arrays of up to 40 repeat units (~132 kb), accurately capture contracted arrays, genetic mosaicism, and pathogenic SMCHD1 variants, and generate accurate consensus sequences of the full set of D4Z4 alleles for variant analysis. Moreover, we identify new allelic variants, analyse complex D4Z4 rearrangements including in- cis duplications, and reveal striking length- and SMCHD1 status-dependent methylation patterns across the D4Z4 array. Our findings provide new insights into human macrosatellite genetics and epigenetics, and demonstrate the potential of long-read nanopore sequencing to accelerate FSHD research and diagnostics. Introduction Facioscapulohumeral muscular dystrophy (FSHD) is a genetic disorder that causes progressive weakness of the muscles of the face, scapula and upper arms, as well as the lower legs, hip girdle and abdomen ( 1 , 2 ). It is the third most common muscular dystrophy worldwide, with an estimated prevalence of between 1/8000-20000 ( 3 – 5 ). FSHD has a complex aetiology involving genetic and epigenetic dysregulation of the D4Z4 macrosatellite array in the subtelomere of chromosome 4 (4q35). The D4Z4 array comprises 1 to >100 3.3-kb tandem repeats, each of which contains a partial copy of the DUX4 gene, with a full-length DUX4 gene being present at the distal end of the array ( Figure 1A ). DUX4 encodes a transcription factor that is normally expressed in a small window of embryonic development, where it plays a key role in zygotic genome activation ( 6 ), but is thereafter silenced in most somatic tissues ( 7 ). FSHD arises when loss of silencing at D4Z4 leads to aberrant expression of DUX4 in skeletal muscle, and subsequent activation of myotoxic signalling pathways ( 8 ). Download figure Open in new tab Fig. 1. D4Z4End2End: an all-in-one, long-read-based workflow for the genetic and epigenetic analysis of FSHD. (A) Schematic of the pathogenesis of FSHD. The causative locus of FSHD is the D4Z4 macrosatellite array at the chromosome 4q subtelomere, which comprises 1 to >100 ~ 3.3 kb tandem repeat units. FSHD occurs in the presence of i) a polyadenylation signal (PAS)-containing D4Z4 haplotype (4qA), which enables stable DUX4 expression and ii) epigenetic dysregulation of the array, which can occur via array contraction (FSHD1) or pathogenic variants in chromatin modifiers (FSHD2). Both FSHD1 and FSHD2 are associated with D4Z4 hypomethylation and ectopic DUX4 expression in skeletal muscle. A non-pathogenic, homologous D4Z4 array is located at the chromosome 10q subtelomere. A 4q-specific XapI site and 10q-specific BlnI site is used to distinguish 4q and 10q alleles in Southern blot assays. (B) Overview of the all-in-one, long-read-based workflow for FSHD. Ultra-high-molecular-weight (UHMW) DNA is used to perform Nanopore Cas9-targeted or whole-genome sequencing (WGS). Canonical and 5mCG basecalling is performed using Dorado. Variants in reads overlapping FSHD2-associated genes are called using DeepVariant ( 23 ), and the potential pathogenicity of these variants is assessed using Variant Effect Predictor (VEP) ( 24 ). For assessment of the D4Z4 array, raw reads are input into a custom script which annotates and haplotypes D4Z4 reads. The output of the haplotyping pipeline can be explored using an interactive visualisation tool, which displays raw reads as individual dots grouped into their haplotypes and plotted based on their number of D4Z4 repeat units (y-axis), and highlights 4q and 10q spanning reads. Hovering over individual dots displays the annotated raw read. Accurate consensus sequences for 4q and 10q alleles can be generated from spanning reads using Racon ( 25 ), enabling analysis of allele-specific D4Z4 genetic variants. Allele-specific, array-wide methylation analysis is performed using NanoMethViz ( 26 , 27 ), allowing detection and visualisation of FSHD1- and FSHD2-specific methylation patterns. Epigenetic derepression of D4Z4 can occur through two different mechanisms, which define the two main subtypes of FSHD (FSHD1 and FSHD2) ( Figure 1A ) ( 9 ). FSHD1 accounts for the majority of cases (~95%), and is caused by contraction of the D4Z4 array to 1-10 repeat units, which can either be inherited in an autosomal dominant manner or arise de novo (10–30% of cases) ( 10 , 11 ). FSHD2 (~5% of cases) is caused by pathogenic variants in chromatin modifiers including SMCHD1 ( 12 ), DNMT3B ( 13 ) and LRIF1 ( 14 ) in the presence of an intermediate-length array of commonly 8–20 repeat units ( 15 ) or cis -duplicated arrays with short distal arrays ( 16 ). Non-contracted D4Z4 arrays generally have high levels of DNA methylation, whereas both FSHD1 and FSHD2 are associated with hypomethylation of either the contracted array (FSHD1) or all D4Z4 alleles (FSHD2) ( 2 , 17 ). Further factors complicate the genetics of FSHD ( Figure 1A ). Two main haplotypes of the 4q D4Z4 allele have been identified, designated 4qA and 4qB, based on differences in the sequence immediately distal to the D4Z4 array ( 18 ). Only the 4qA haplotype contains a polyadenylation signal (PAS) required for stabilisation of the DUX4 mRNA, and thus only 4qA alleles are pathogenic ( 9 ). Moreover, there is a homologous qA-type D4Z4 array on chromosome 10q that shares 98% identity with the 4q D4Z4 array ( 19 , 20 ), yet 10qA alleles contain a single nucleotide variant (SNV) within the PAS that renders them non-pathogenic ( 9 ). Further D4Z4 subhaplotypes have been identified based on other genetic variants within and adjacent to the D4Z4 array ( 21 ), including two variants of the 4qA allele (4qAS and 4qAL) that differ in the lengths of their distal D4Z4 units, but have been found to be equally pathogenic ( 22 ). These factors should be accounted for to provide accurate diagnoses for FSHD. While previous studies have done much to advance our understanding of FSHD, many aspects of D4Z4 genetics and epigenetics remain poorly understood. It is still unclear the exact mechanisms by which epigenetic factors normally interact with D4Z4 repeats to mediate silencing of the array. Moreover, even amongst patients with the same number of D4Z4 repeats, there is great variability in onset, severity and penetrance of FSHD ( 28 – 30 ), suggesting the existence of asyet unresolved modifiers of the disease. A prominent example of this is the ‘gray zone’ in the repeat number threshold for FSHD1, where 4qA alleles with 8-10 repeat units can give rise to FSHD, but are also found in unaffected control individuals ( 15 ). Additionally, Bosma arhinia microphthalmia syndrome (BAMS), another phenotypically-distinct human disease caused by pathogenic SMCHD1 variants, has also been associated with D4Z4 hypomethylation ( 31 ), yet most BAMS patients do not have the skeletal muscle manifestations of FSHD ( 32 , 33 ). These outstanding questions all highlight the need for methods that can more comprehensively characterise the genetics and epigenetics of D4Z4 regulation. Until recently, study of D4Z4 has been limited by the low resolution and/or low read length of prior techniques. This is reflected in the current ‘gold standard’ for FSHD diagnostics, which after clinical assessment minimally includes counting of D4Z4 repeat number via Southern blotting to detect pathogenic contractions that are consistent with FSHD1 ( 15 ). The Southern blotting assay is able to distinguish 4q/10q alleles based on haplotype-specific SNVs that create a 4q-specific XapI restriction enzyme site, and a 10q-specific BlnI restriction enzyme site, however provides no further information about the 4q haplotype or epigenetic status. Therefore, if this first test shows that the patient does not have a 4q allele of 1-7 repeat units (strongly suggestive of FSHD), additional assays may be required to determine A/B haplotype (additional Southern blotting assay), evaluate FSHD2-associated genes (exome sequencing), or assess for D4Z4 hypomethylation (bisulfite sequencing) ( 15 ). Using this traditional diagnostic approach, it can take months to years for a patient to receive a definitive FSHD diagnosis. Moreover, more complex cases involving mosaicism and complex D4Z4 alleles, including rare 4qA haplotypes, D4Z4 duplications, hybrid arrays, D4Z4 proximal extended deletion (DPED) alleles, and 4q/10q translocations, can give rise to false positives or false negatives, or are often unable to be resolved ( 15 ). Newer technologies such as molecular combing and optical genome mapping have enabled more in-depth characterisation of complex D4Z4 alleles that are missed by traditional technologies ( 34 – 36 ). Nevertheless, these assays are still limited by their inability to resolve the base-level D4Z4 sequence, and do not provide methylation information. Several groups have also developed bisulfite-sequencing-based methylation assays that have been shown to accurately diagnose FSHD1 and FSHD2 ( 37 – 39 ), but can still only provide the average methylation across all D4Z4 units and/or target the final D4Z4 repeat, limiting the study of array-wide D4Z4 methylation patterns. More recently, several studies have shown the potential of long-read Nanopore sequencing for the study of the D4Z4 macrosatellite array ( 40 – 46 ). In this study, we extend upon these results (Supplementary Table S1) by developing an all-in-one, long-read-based workflow for the genetic and epigenetic analysis of FSHD (D4Z4End2End). D4Z4End2End combines high-read-depth Cas9-targeted sequencing of 4q and 10q D4Z4 arrays with a custom script for annotation and haplotyping of raw D4Z4 reads, alongside targeted sequencing and variant calling for a panel of FSHD2-associated genes ( Figure 1B ). Protocol optimisations allow for the capture of genetic mosaicism and full-length arrays of up to 40 repeat units (~142 kb including flanking regions). We combine this targeted assay with whole-genome ultra-long sequencing to reveal potential biases and complex genetic rearrangements that may be missed with a targeted approach. We apply D4Z4End2End to the study of several FSHD1, FSHD2, BAMS and control samples, and demonstrate its ability to resolve the genetics and methylation of complete sets of patient 4q and 10q alleles. Accurate haplotyping enables us to perform in-depth genetic analysis of D4Z4 alleles, identify several previously-unreported D4Z4 variants, and document a variety of unique DNA methylation patterns across full-length D4Z4 arrays in both health and disease. Moreover, for the first time we characterise the base-level genetic structure and methylation of full-length D4Z4 duplication and triplication alleles, providing further clues towards the (epi)genetic machinery involved in the genesis and regulation of these complex D4Z4 rearrangements. These novel findings show that targeted long-read nanopore sequencing can serve as a powerful tool for the study of macrosatellite repeats, and for more informative and streamlined FSHD diagnostics. Results Haplotyping and annotation of 4q/10q long-read sequencing data to characterise D4Z4 alleles We developed a pipeline to process and visualise long-read sequencing data covering the 4q35 and 10q26 regions, including assignment to chromosome 4q/10q, determination of A/B haplotype, counting of D4Z4 repeat units, and highlighting of reads that span the entire D4Z4 array ( Figure 1B ). Reads that overlap the D4Z4 array were identified by aligning the raw reads against the D4Z4 sequence and the 4q and 10q reference sequences from CHM13v2.0 ( 47 ). XapI and BlnI restriction sites, used to identify each allele in current diagnostic approaches, were also assessed within the raw reads, to confirm correct assignment to 4q or 10q. We further assigned the reads to A and B haplotypes based on the presence of pLAM (qA-specific sequence) and qB-specific sequences, respectively. Reads that contained both p13E-11 (a region flanking the proximal end of the D4Z4 array) and either pLAM or qB-specific sequence were classified as spanning reads, and these spanning reads were used to determine the haplotypes and number of D4Z4 units for each allele. To aid in visualisation of the results, we created an interactive tool that can be used to explore individual raw 4q and 10q reads that have been processed by the pipeline and annotated with their features (p13E-11, D4Z4 units, pLAM, qB-specific sequence, and XapI and BlnI restriction sites) and haplotypes (4q/10q and A/B) ( Figure 1B , Figure 2 ). Download figure Open in new tab Fig. 2. An interactive tool for exploring annotated and haplotyped D4Z4 raw reads from long-read sequencing data. (A) Sequencing data obtained for the 4q35 and 10q26 D4Z4 regions using ultra-long whole-genome sequencing (WGS) and Cas9-targeted sequencing, for sample 17706 (FSHD1). (Left) Alignment of sequencing data for the D4Z4 region against a single reference genome, such as CHM13v2.0 (32 repeat units for both the 4q35 and 10q26 D4Z4 arrays), leads to large gaps in alignment and potential mis-mapping of reads, making interpretation difficult. (Right) Our pipeline processes the sequencing data to annotate raw reads with their D4Z4 features, enabling accurate repeat unit counting and assignment to 4q/10q and A/B haplotypes. (Top) WGS generates a high total sequencing output (44.86 Gb for sample 17706) but yields low coverage of the 4q35 and 10q26 regions, which can lead to pathogenic alleles being missed. (Bottom) Cas9-targeted sequencing leads to much higher coverage of the 4q35 and 10q26 regions with a much lower total sequencing output (2.31 Gb for sample 17706), which enables the capture of low-frequency pathogenic alleles, genetic mosaicism, long full-length arrays, and high numbers of spanning reads. (B) The pipeline is accompanied by an interactive visualisation tool that enables exploration of the structure of raw D4Z4 reads, revealing D4Z4 variants such as (Left) 4qAS, 4qAL and a newly-identified 4qAM allele, as well as (Right) 10qB alleles from 4q/10q translocations, and in-cis triplication alleles. We first used the pipeline to analyse publicly-available raw Nanopore data and assess assemblies published by the Human Pangenome Reference Consortium (HPRC) ( 48 ), which have previously been studied in the context of FSHD ( 49 ). The pipeline was able to successfully identify and annotate raw reads that spanned complete D4Z4 arrays, enabling accurate D4Z4 counting and resolution of 4q and 10q alleles (Supplementary Table S2). Despite the presence of raw spanning reads in the Nanopore data, assessment of the associated HPRC assemblies showed many of the D4Z4 arrays to be incomplete or split across multiple scaffolds (Supplementary Table S2), creating difficulties for accurate D4Z4 counting and haplotyping. Thus, based on evidence from the raw reads, we find different D4Z4 repeat numbers to those reported previously ( 49 ). Additionally, we used the pipeline to process the publicly-available raw Nanopore data used for the CHM13v2.0 assembly ( 47 ), and found the number of repeat units in CHM13v2.0’s assembly of the 10q array (33 full 3.3 kb units) to be inconsistent with the raw Nanopore reads (32 full 3.3 kb units). This shows that there are still outstanding issues in the automated assembly of macrosatellite repeat regions, and demonstrates the need for a tool to visualise the repeat structure of raw reads. We then used the pipeline to analyse Nanopore ultra-long whole-genome sequencing (WGS) data from the fibroblasts of two healthy controls, three patients diagnosed with FSHD1, three patients diagnosed with FSHD2, and two patients diagnosed with BAMS ( Table 1 ). The D4Z4 haplotypes of the FSHD1 and FSHD2 patients have previously been studied using Southern blotting and/or molecular combing ( 50 – 54 ), with one of the FSHD1 patients exhibiting somatic mosaicism for a pathogenic 4qA 2RU allele (25% based on Southern blot) and another of the FSHD1 samples harbouring a 10qA cis triplication allele ( 53 ) (Supplementary Table S3). The D4Z4 haplotypes of the controls and BAMS patients have not previously been studied. View this table: View inline View popup Download powerpoint Table 1. Haplotyping results and number of spanning reads for D4Z4 alleles from FSHD, BAMS and control fibroblasts, from whole-genome and Cas9-targeted Nanopore sequencing. The pipeline was able to successfully highlight and haplotype 4q and 10q reads from the patient fibroblasts ( Table 1 ), with results having variable concordance with those from Southern blot and molecular combing (Supplementary Table S3). WGS captured spanning reads for 30 out of 41 alleles, with a maximum of six spanning reads being obtained for a single allele. We captured spanning reads corresponding to the pathogenic, contracted allele for two of the FSHD1 samples, but not for the mosaic FSHD1 sample, perhaps due to the contracted allele having a lower mosaic proportion (25%) ( Figure 2A ). The interactive visualisation tool also enabled assessment of the length of the distal D4Z4 unit, and we detected both the previously-reported 4qAS (~0.3 kb) and 4qAL (~1.9 kb) haplotypes. We additionally identified a new, previously unreported 4qA haplotype with a distal D4Z4 unit of length ~0.6 kb, present in the BAMS1 sample, which we designate ‘4qAM’ ( Figure 2B ). All 10qA alleles contained a distal D4Z4 unit of length ~0.3 kb, similar to 4qAS alleles. Ultra-long read sequencing also enabled assessment of extended regions of 4q and 10q sequence upstream and downstream of the D4Z4 array ( Figure 2B ). For eight of the 4q alleles, reads were obtained that captured the inverted D4Z4 unit (D4S2463) containing DUX4c that is found ~42 kb upstream of the 4q D4Z4 array ( 55 , 56 ) ( Table 1 , Figure 2B ). For three of the samples (12566, 15166, 34140), 10q alleles were identified that belonged to B haplotypes and contained XapI restriction sites, indicating possible 4q-to-10q translocations upstream of the array. The region of homology between 4q and 10q extends ~42 kb upstream of the D4Z4 array ( 53 , 57 ), and these alleles were able to be confidently assigned to chromosome 10q as the ultra-long reads spanned >42 kb of upstream sequence ( Figure 2B ). Moreover, the pipeline identified reads that spanned all three arrays of the 10qA in- cis triplication allele of sample FSHD1_3 (37-I 1 in ( 53 )), revealing the allele’s complex structure with nucleotide resolution ( Figure 2B ). As has been previously suggested, translocated or duplicated alleles with intact DUX4 and PAS sequences may give rise to DUX4 expression ( 16 ), however these complex alleles cannot always be accurately resolved by Southern blotting and molecular combing ( 15 , 53 ). This demonstrates that our pipeline can comprehensively characterise 4q and 10q D4Z4 alleles, including reliable identification of contracted 4qA alleles to confirm the diagnosis of FSHD1, and identification of other structural variants which may contribute to FSHD pathogenesis. Cas9-targeted sequencing of 4q/10q D4Z4 alleles and FSHD2-associated genes While ultra-long WGS is able to capture reads that span the entire 4q and 10q D4Z4 arrays with no prior assumption of the repeat composition or structure, a single PromethION flow cell per patient only provides low coverage. This can lead to pathogenic alleles being missed ( Table 1 , Figure 2A ), difficulty in creating an accurate consensus sequence of each allele, and in exploring cell-to-cell epigenetic variability or indeed genetic mosaicism. To address this, we used Cas9-targeted sequencing to enrich for the 4q and 10q D4Z4 regions, aiming to capture the whole set of alleles in each patient. We designed guide RNAs targeting regions upstream and downstream of the D4Z4 array, for both A and B alleles (Supplementary Table S4). Additionally, we complemented this with guide RNAs designed to target a panel of FSHD2-associated genes ( SMCHD1, DNMT3B and LRIF1 ) (Supplementary Table S4), with a view to developing an all-in-one genetic and epigenetic research and diagnostic tool for FSHD. This enabled us to obtain high-read-depth sequencing of both chr 4q35 and 10q26 ( Table 1 , Figure 2A ) and of SMCHD1, DNMT3B and LRIF1 exons, respectively (Supplementary Figure S1). For all except one of the samples assayed using Cas9-targeted sequencing (three FSHD1 samples, one FSHD2 sample, and one BAMS sample), we were able to determine the full set of 4q and 10q haplotypes by capturing at least one spanning read from each of the patient’s 4q/10q alleles ( Table 1 ). This included the capture of all three 4q alleles from the two mosaic FSHD1 patients ( Table 1 , Figure 2A ). While the proportions of spanning 4q reads for each allele from the mosaic samples did not match the expected mosaic proportions, sample 19187’s 4qA 2RU allele was captured at a much higher frequency than sample 17706’s 4qA 1RU allele, perhaps reflecting its higher mosaic proportion (50% vs 25%). Overall, there were far more spanning reads for shorter arrays than for longer arrays, likely due to the greater potential for fragmentation of long reads during library preparation, and preference for short reads during sequencing. Nevertheless, we were able to capture multiple spanning reads for alleles of up to 40 repeat units (~142 kb), well beyond the maximum read length for Cas9-targeted sequencing recommended by Oxford Nanopore Technologies (30 kb). Genetic analysis of 4q/10q alleles using high-accuracy consensus sequences For alleles with high read depth from Cas9-targeted sequencing, we used Racon ( 25 ) to create full-length consensus sequences for the D4Z4 array. While many automated assembly tools struggle to accurately reconstruct repetitive alleles (Supplementary Table S2), our consensus-calling approach (see Methods) was able to correct errors while faithfully retaining the structure of the raw D4Z4 reads, including the number of repeat units and unusual variants such as a truncated internal D4Z4 repeat (Supplementary Figure S3C). This was able to be readily verified by using our interactive visualisation tool ( Figure 2 ) to compare the repeat structure of the consensus sequences to that of the raw reads. Using the consensus sequences, we were able to reliably analyse single-nucleotide variants (SNVs) and small insertions/deletions (indels) from 4qA, 4qB, and 10qA alleles. We identified 4q- and 10q-specific variants that were consistent with those seen in CHM13v2.0, including the two SNVs responsible for the 4q-specific XapI site and 10q-specific BlnI site ( Figure 3A ). Additionally, we identified qB-specific variants from the BAMS9 4qB alleles, that were consistent with those found in the 4qB allele from GRCh38 ( Figure 3A ). Several haplotype-specific variants were present in the DUX4 open reading frame (ORF), including four silent SNVs, a 4qB-specific A>G variant leading to an I229V substitution, and a 10qA-specific 6-bp deletion leading to A340_S341del. Several variants were also identified in the DUX4 promoter region, but none were found to overlap known promoter elements including the GC box, TACAA box, and initiator sequence (Supplementary Figure S2). Download figure Open in new tab Fig. 3. Array-wide and base-level analysis of D4Z4 genetic variants using consensus sequences from high-coverage long-read sequencing. (A) IGV coverage plots for alignments of D4Z4 units from the consensus sequences for patient 4q and 10q alleles, showing 4qA-, 4qB- and 10qA-specific genetic variants consistent with CHM13v2.0 and GRCh38. (B) IGV alignments of D4Z4 units from the consensus sequences for representative 4qA, 4qB and 10qA alleles from samples 17706 and BAMS3. Within each alignment track, full-length D4Z4 units are arranged from top to bottom in order of proximal to distal position within the array. Indels <4nt which were not consistent between repeat units were hidden to aid in visualisation (Supplementary Figure S3). Consensus sequences were generated using Racon using spanning reads from high-coverage Cas9-targeted Nanopore sequencing. SNV: single nucleotide variant. Comparison of the variants between individual D4Z4 units within each 4q and 10q array showed 4q arrays to be more heterogeneous than 10q arrays ( Figure 3B , Supplementary Figure S3), consistent with previous observations ( 49 ). For both 4qA and 4qB arrays, the proximal repeat unit contained a distinct set of variants compared to most other D4Z4 units in the array, although these variants were occasionally also found in internal repeats ( Figure 3B ). Variants in the proximal and internal repeat units have previously been used to define D4Z4 subhaplotypes ( 21 ), however these have largely been restricted to SNVs affecting restriction enzyme sites. The additional 4qA-, 4qB- and 10qA-specific genetic variants identified here provide further insight into D4Z4 subhaplotypes and the composition of the D4Z4 array. The composition of the D4Z4 array could also be used to reconstruct full-length haplotypes for alleles without spanning reads, and we demonstrated this for the BAMS1 4qAM allele (Supplementary Figure S4). Interestingly, we also identified a 4qAM allele from the Cas9-sequencing data for sample 19187 (FSHD1) ( Figure 4 ), and we found the full-length BAMS1 4qAM allele to be the same as 19187’s 4qAM allele (Supplementary Figure S3C), suggesting a common ancestral origin. Download figure Open in new tab Fig. 4. Base-level comparison of distal D4Z4 sequences from 4qAS, 4qAM, 4qAL and 10qA alleles. (A) Alignment of distal D4Z4 sequences beginning from the proximal KpnI site for the final partial D4Z4 unit, and extending into the proximal portion of the pLAM sequence containing DUX4 exon 3. Breakpoints for the final partial D4Z4 units (vertical arrows) were determined by alignment to a full-length D4Z4 sequence. The start of pLAM is marked as the first nucleotide following the final partial 4qAS D4Z4 unit. The 4qAM and 4qAL alleles contain an extra length of sequence between the end of their final partial D4Z4 units and the start of pLAM, which corresponds to the end of the 4qAS allele’s final partial D4Z4 unit (shown in orange boxes). Locations of 4qAS- and 4qAL-specific forward PCR primers (4AS-F, 4qAL-F) and a polyadenylation signal (PAS) reverse primer (PAS-R) used in ( 22 ) are shown above their respective sequences. Sequences are extracted from consensus sequences generated using Racon from high-coverage Cas9-targeted Nanopore sequencing. The consensus sequences also enabled more detailed assessment of the 4q subhaplotype based on sequences proximal and distal to the D4Z4 array. Alignment of the distal D4Z4 sequences of 4qAS, 4qAL and 4qAM alleles showed all three haplotypes to have identical pLAM sequences, containing an intact PAS ( Figure 4 ). The different length of the final partial repeat unit raises the possibility that 4qAM alleles give rise to different DUX4 transcripts. To further assess the potential pathogenicity of 4qAM alleles, we also assessed the simple sequence length polymorphism (SSLP) found ~3.5 kb upstream of the D4Z4 array that has been used to further delineate permissive and non-permissive D4Z4 subhaplotypes ( 58 ). We found that the SSLP for the BAMS1 4qAM allele corresponded to the 4A161 subhaplotype, which is permissive for FSHD. This suggests that 4qAM alleles may have the potential to be pathogenic, and may need to be considered in genetic testing. These findings demonstrate the utility of our approach for analysing D4Z4 alleles at array-wide, base-level resolution, including the potential to further characterise known D4Z4 subhaplotypes, discover new subhaplotypes, and resolve the composition of D4Z4 arrays. Detection of pathogenic variants in SMCHD1, LRIF1 and DNMT3B We next applied a variant calling and variant effect prediction pipeline to the whole-genome and Cas9-targeted Nanopore sequencing data, to assess whether our assay could obviate the need for separate genetic testing. We focused on a panel of FSHD2-associated genes, SMCHD1, LRIF1 and DNMT3B , part of the gene panel for Cas9-mediated enrichment. SMCHD1 genotyping has previously been performed for the patients included in this study (Supplementary Table S3). We identified pathogenic SMCHD1 variants for three of the four FSHD2 samples (11440, 15166, 11491) and both of the BAMS samples (BAMS1, BAMS9) that recapitulated the prior genotyping results (Supplementary Table S3). For one of the patients diagnosed as FSHD2 (34140), no pathogenic variant in SMCHD1, DNMT3B or LRIF1 was found. The presence of a synonymous variant was confirmed (p.L1031), but with no further evidence of pathogenicity, calling into question the diagnosis as FSHD2. The BAMS variants were heterozygous missense variants located in or adjacent to SMCHD1 ’s N-terminal AT-Pase domain (sample BAMS1: p.E136G, sample BAMS9: p.D420V). Conversely, the FSHD2 variants were heterozygous splice donor (sample 11440: c.2338+4A>G (intron 18); sample 11491: c.5547+5G>A (intron 44)) or frameshift (sample 15166: c.4608_4614dup, p.Ala1539Tyrfs*6 (exon 37)) variants found closer towards the C-terminal end of the protein, predicted to lead to SMCHD1 haploinsufficiency and consistent with an FSHD2 diagnosis. In one of the FSHD1 samples (12566), we also identified a previously unreported p.R1887L missense variant in exon 45 of SMCHD1 which was predicted to be pathogenic by AlphaMissense ( 59 ), with a pathogenicity score of 0.9642. None of the samples had pathogenic variants in LRIF1 or DNMT3B . Notably, whereas some FSHD2-associated genes had only low coverage with whole-genome sequencing, the read depth was significantly increased with Cas9-targeted sequencing, aiding in detection of heterozygous pathogenic variants. This confirmed the suitability of a single nanopore sequencing assay for 4q haplotyping, 4q structural characterisation and variant detection in an FSHD gene panel. Allele-specific CpG methylation patterns across the D4Z4 array Array-size-dependent methylation patterns are seen in FSHD1 and control fibroblasts To assess the methylation patterns of D4Z4 arrays of different lengths and SMCHD1 variant status, we called CpG methylation for the 4q and 10q reads, and used NanoMethViz ( 26 , 27 ) to plot the methylation of individual molecules as well as the smoothed methylation profile across the complete array for each allele. For FSHD1 and control samples, an oscillatory pattern of D4Z4 methylation was seen within each repeat unit, with low levels of methylation around the DR1 region, which corresponds to the distal portion of a proposed CTCF binding site ( 37 , 60 ), and high levels of methylation around the DUX4 transcription start site (TSS) ( Figure 5 , Supplementary Figure S6), consistent with previous studies ( 40 , 61 ). These oscillatory patterns were present in all non-contracted alleles, but absent from the hypomethylated, contracted FSHD1 alleles. Autocorrelation of the methylation status of CpGs across the D4Z4 array indicated strong correlation between sites separated by ~3.3 kbp, corresponding to the size of one D4Z4 unit, as well as between sites separated by ~180 bp (Supplementary Figure S7), which likely represents nucleosome positioning ( 40 , 62 ). The high read depth provided by Cas9-targeted sequencing enabled us to see that for each allele, the overall methylation patterns were highly consistent between individual molecules ( Figure 5 ), but that there was significant intramolecular heterogeneity at the level of individual CpG sites (Supplementary Figure S8). Download figure Open in new tab Fig. 5. Allele-specific, array-wide D4Z4 methylation profiles for FSHD1 fibroblasts from Cas9-targeted sequencing. Single-molecule and smoothed methylation plots for 4q and 10q D4Z4 alleles from the FSHD1 samples (A) 19187, (B) 17706 and (C) 12566, generated using NanoMethViz ( 26 , 27 ). The locations of DUX4 exons are shaded in gray on the smoothed methylation plot. Annotations for D4Z4 repeat units, CTCF insulator regions, and DUX4 exons are shown below each plot. Mean methylation rates over the DR1 region (positions 563–814 of the D4Z4 KpnI-KpnI unit ( 37 ) and DUX4 transcription start site (TSS) region (−200 to +200 with respect to the TSS at position 1688 of the D4Z4 KpnI-KpnI unit) within each D4Z4 unit were also plotted for each allele. Points for DR1 and DUX4 TSS regions from the same D4Z4 unit are connected by gray lines. Differing array-wide methylation patterns were observed for short, intermediate and long D4Z4 arrays ( Figure 5 , Supplementary Figure S6). The short, pathogenic FSHD1 arrays, which ranged from 1-3 repeat units, were severely hypomethylated across the full length of the array ( Figure 5 and Supplementary Figure S6, alleles shaded in yellow). Long arrays of >20 repeat units were much more highly methylated, and tended to display a regular, stepwise increase in D4Z4 methylation over the first ten or so repeat units before reaching a constant, high level of methylation ( Figure 5 and Supplementary Figure S6, alleles shaded in blue), similarly to what has been described previously ( 40 ). By contrast, short D4Z4 arrays at the upper end of the pathogenic range for FSHD1 (6-10 repeat units) and intermediate-length arrays (11-20 repeat units) were more variable in their methylation patterns ( Figure 5 and Supplementary Figure S6, alleles shaded in green). While these arrays were often more highly-methylated at their distal ends compared to their proximal ends, this proximal-to-distal increase in methylation did not always occur in a stepwise fashion, as exemplified by the ‘irregular’ methylation patterns seen in several of the alleles from sample 17706 ( Figure 5B ). These observations may partially explain why array sizes in the 1-4 repeat unit range are seen in more clinically-severe and early-onset cases of FSHD, while longer arrays are associated with variable penetrance and a broad range of clinical severity ( 29 , 64 ). The differences in D4Z4 methylation between short, intermediate and long D4Z4 arrays were also reflected in the global D4Z4 methylation level for each allele, as measured by the proportion of all methylated CpGs across the D4Z4 array (henceforth ‘global D4Z4 5mC rate’). Overall, we found a positive correlation between the global D4Z4 5mC rate and the number of repeat units for both FSHD1 samples and controls (Pearson correlation coefficient 0.844, Spearman correlation coefficient 0.826) ( Figure 6A ). As only the terminal DUX4 gives rise to stable transcripts in permissive 4qA alleles, we additionally assessed the mean methylation of the final full D4Z4 unit, which contains the promoter and exons 1 and 2 of the terminal DUX4 . Amongst FSHD1 and control samples, there was a moderate correlation between the number of repeat units and mean methylation rate of the final full D4Z4 unit (‘final D4Z4 5mC rate’) (Pearson correlation coefficient 0.701, Spearman correlation coefficient 0.732) ( Figure 6B ). As expected, the pathogenic, contracted FSHD1 alleles had very low final D4Z4 5mC rates, ranging from 0.064 to 0.097. However, there were two intermediate-length SMCHD1 -wildtype alleles which also had low final D4Z4 5mC rates, the FSHD1_3 4qB 17RU allele (final D4Z4 5mC rate 0.118) and the AG10803 (ag10) 4qB 18RU allele (final D4Z4 5mC rate 0.149). Interestingly, the AG10803 4qB 18RU allele appeared to be hypomethylated across the full length of the array (global D4Z4 5mC rate 0.116), although there were only two spanning reads to support the methylation signal (Supplementary Figure S6A). These findings once again suggest that in FSHD1 and control fibroblasts, while contracted D4Z4 arrays are consistently hypomethylated, and long D4Z4 arrays are consistently hypermethylated, methylation patterns for intermediate-length arrays are more variable. Download figure Open in new tab Fig. 6. Correlation of overall D4Z4 methylation levels with the number of repeat units. Mean methylation rates across (A) the full 4q/10q D4Z4 array or (B) the final D4Z4 unit against the number of D4Z4 units, plotted for alleles from SMCHD1 -wildtype fibroblasts (FSHD1 and non-FSHD/BAMS), pathogenic- SMCHD1 -variant fibroblasts (FSHD2 and BAMS), and 30 B-lymphocyte lymphoblastoid cell lines (LCLs) from the 1000 Genomes Project ( 63 ). Sample 34140, initially clinically diagnosed as FSHD2, was included in the ‘non-FSHD/BAMS’ group based on the lack of pathogenic SMCHD1, LRIF1 or DNMT3B variants and overall methylation profile. Mean methylation rate was calculated as the total number of methylated CpGs from all reads / the total number of methylated + unmethylated CpGs from all reads, across the defined regions. Regression lines were plotted for each group of samples. Alleles presumed to be the pathogenic alleles for each of the FSHD patients are indicated with red rings. (A) Pearson and Spearman correlation coefficients for FSHD1 and non-FSHD/BAMS fibroblasts, FSHD2 and BAMS fibroblasts, and B-lymphocyte LCLs were 0.844 and 0.826, 0.757 and 0.742, and 0.746 and 0.798, respectively. (B) Pearson and Spearman correlation coefficients for FSHD1 and non-FSHD fibroblasts, FSHD2 and BAMS fibroblasts, and B-lymphocyte LCLs were 0.701 and 0.732, 0.583 and 0.607, and 0.531 and 0.552, respectively. B-lymphocyte lymphoblastoid cell lines show similar D4Z4 methylation patterns to FSHD1 and control fibroblasts, but have higher methylation levels To assess whether these length-dependent methylation patterns are also seen in a larger cohort and other cell types, we analysed D4Z4 methylation in 30 B-lymphocyte lymphoblastoid cell lines (LCLs) from the 1000 Genomes Project (1KGP), using publicly-available Nanopore WGS data from the 1KGP ONT Sequencing Consortium (1KGP-ONT) ( 63 ). We found 83 4q/10q alleles with reads that spanned the whole D4Z4 array, ranging in length from 6 to 43 repeat units (Supplementary Table S5). The patterns of DNA methylation across the array in the B-lymphocyte LCLs were similar to those seen in SMCHD1 -wildtype fibroblasts. All arrays had oscillatory patterns of low methylation around the DR1/CTCF insulator region and high methylation around the DUX4 TSS (Supplementary Figure S10). For long arrays (>20 repeats), methylation levels increased over the first ten or so repeat units then plateaued at a constant, high level across the remainder of the array, or were consistently high across the whole array. Short and intermediate-length arrays (6-20 repeat units) tended to display more regular, stepwise increases in methylation from the proximal to distal end of the array, contrasting with the ‘atypical’ methylation patterns seen in several of the alleles from the FSHD1 and control fibroblasts. Similarly to what we found in fibroblasts, there was a positive correlation between the global D4Z4 5mC and final D4Z4 5mC rates, and the number of D4Z4 units ( Figure 6 ). However, mean D4Z4 methylation rates were generally higher in the B-lymphocyte LCLs compared to the SMCHD1 -wildtype fibroblasts, suggesting that global D4Z4 methylation levels are cell-type specific. Both FSHD2 and BAMS fibroblasts have global D4Z4 hypomethylation We sought to investigate the effect of pathogenic FSHD2 and BAMS SMCHD1 variants on D4Z4 methylation. In both FSHD2 and BAMS, there was global hypomethylation of D4Z4 arrays compared to similar-length FSHD1 and control samples ( Figure 6A ). Despite the differing effects of FSHD2 and BAMS variants on the SMCHD1 protein (predicted haploinsufficiency and missense mutations in the ATPase domain, respectively), FSHD2 and BAMS samples showed similar overall methylation levels and methylation patterns to each other ( Figure 7 , Supplementary Figure S9). Short and intermediate-length arrays were generally severely hypomethylated across the full length of the array. Longer alleles had relatively higher methylation levels, yet were still markedly hypomethylated compared to similar-length SMCHD1 -wildtype samples ( Figure 6A ), and generally had lower methylation rates for their final D4Z4 units ( Figure 6B ) owing to minimal proximal-to-distal increase in array-wide methylation. This was clearly demonstrated by comparison of the 32RU 4qAM alleles from BAMS9 and 19187_FSHD1 ( Figure 5A , Figure 7B ). However, interestingly, we also identified several reads with very high methylation levels that mapped to the BAMS1 47RU 4qAS allele (Supplementary Figure S11), suggesting either the presence of a separate, highly-methylated cell population, or an unexpected and sporadic ‘rescue’ of these cells from hypomethylation. Download figure Open in new tab Fig. 7. Allele-specific, array-wide D4Z4 methylation profiles for FSHD2 and BAMS fibroblasts. Single-molecule and smoothed methylation plots for 4q and 10q D4Z4 alleles from (A) FSHD2 and (B) BAMS samples, generated using NanoMethViz ( 26 , 27 ). Annotations for D4Z4 repeat units, CTCF insulator regions, and DUX4 exons are shown below each plot. Mean methylation rates over the DR1 region (positions 563–814 of the D4Z4 KpnI-KpnI unit ( 37 ) and DUX4 transcription start site (TSS) region (−200 to +200 with respect to the TSS at position 1688 of the D4Z4 KpnI-KpnI unit) within each D4Z4 unit were also plotted for each allele. Points for DR1 and DUX4 TSS regions from the same D4Z4 unit are connected by gray lines. The full-length reference sequences for the BAMS1 4qAM and 4qAS alleles were determined as described in Supplementary Figures S4 and S11. In terms of intra- and inter-repeat methylation patterns, oscillations between low methylation around the DR1/CTCF insulator region and high methylation around the DUX4 TSS were variably present in hypomethylated alleles, but were prominent in more highly methylated longer alleles ( Figure 7 ). Autocorrelation of 5mC signals once again indicated methylation patterns of ~3.3 kbp and ~180 bp periodicity (Supplementary Figure S7), suggesting that the mechanisms that give rise to these patterns are retained in FSHD2 and BAMS. Complete genetic and epigenetic analysis of in- cis duplicated arrays and upstream inverted D4Z4 units While several studies have used molecular combing to study the structure of in- cis duplication alleles (16, 34, 36, 45, 53), this technique is unable to reveal the underlying genetic sequence or epigenetic status, and many aspects of the genetics and epigenetics of these complex alleles have yet to be resolved. Therefore, we sought to use long-read sequencing to further investigate the composition and methylation of in- cis duplicated arrays. Our pipeline identified in- cis duplications downstream of the main D4Z4 array in two of the FSHD1 alleles (19187 10qA, FSHD1_3 10qA). Sample 19187’s complex 10qA allele was composed of a 26RU array followed by a 1RU array, separated by a ~6.5 kb spacer sequence, with the proximal partial D4Z4 of the duplicated array beginning ~0.3 kb after the KpnI restriction site ( Figure 8A ). Sample FSHD1_3’s complex 10qA allele contained an in- cis triplication, comprising a 15RU array followed by a 2RU array and a 5RU array, separated by two spacer sequences of ~20 kb, with the proximal partial D4Z4s of the duplicated arrays both beginning ~1.5 kb after the KpnI restriction site ( Figure 8B ). Analysis of the genetic sequence revealed all of the arrays to be 10qA-type, as determined by the presence of BlnI sites and 10qA-type pLAM sequences lacking a functional PAS. Moreover, the spacer sequences and the sequences distal to the duplicated array all corresponded to the sub-telomeric sequence that is normally found immediately distal to the 10qA D4Z4 array. Download figure Open in new tab Fig. 8. Analysis of the structure and methylation of duplicated D4Z4 arrays and upstream inverted D4Z4 units. (A), (B), (C) Structure of 4q and 10q alleles identified by the pipeline to have in- cis duplications downstream of the D4Z4 array, alongside smoothed methylation plots showing that these duplicated arrays are hypermethylated. The overall %5mC for each D4Z4 array is shown below the annotation track for the methylation plot. (D) Representative example of the inverted D4S2463 unit located ~ 42 kb upstream of the 4q D4Z4 array, which is found to be hypomethylated in FSHD and control fibroblasts and B-lymphocyte LCL samples. (E) The HG00675 cell line from the 1000 Genomes Project ( 63 ) has a 4qB allele with an upstream inverted D4Z4 array of 2.5 repeat units in place of the D4S2463 unit. This upstream inverted array is also hypomethylated. We also identified the first reported case of a 4qB in cis duplicated array, present in the B-lymphocyte LCL sample HG00675. The allele contained a proximal array of at least 14RU, followed by an in- cis duplicated array of 5RU. The arrays were separated by a ~13.7 kb spacer sequence, with the proximal partial D4Z4 of the duplicated array beginning ~0.8 kb after the KpnI restriction site ( Figure 8C ). Both arrays were 4qB-type, and once again, the spacer sequence was identical to the sequence distal to the duplicated array, both corresponding to the 4qB sub-telomeric sequence. These findings mirror those in Lemmers et al. ( 45 ), where the spacer sequence for 4qA-type in- cis duplication alleles was found to correspond to the 4qA sub-telomeric sequence. The shared basic structure of these 4qA, 4qB and 10qA duplication alleles suggests that they emerged via a common underlying mechanism, likely involving intrachromatid gene conversion. We then assessed the methylation of the duplicated arrays. Interestingly, for all of these alleles, the downstream, contracted arrays were highly methylated, with average methylation levels similar to or higher than those of the non-contracted upstream arrays ( Figure 8A-C ). Additionally, for sample FSHD1_3’s in- cis triplication 10qA allele, the average methylation of the proximal 15RU array was higher than might be expected if it were an isolated 15RU array, based on comparison with the global D4Z4 5mC rate of sample FSHD1_3’s 4qB 17RU allele (0.155) and the methylation of intermediate-length arrays in other SMCHD1 -wildtype samples ( Figure 6A ). Lemmers et al. ( 45 ) found a similar phenomenon for a mosaic 17+9RU 4qA allele and 17+2RU 4qA allele, where the proximal 17RU allele for the 17+9RU array was more highly-methylated than for the 17+2RU array. This may indicate the existence of bi-directional methylation spreading mechanisms between proximal and distal arrays, as suggested by Lemmers et al. ( 45 ), or reflect an underlying chromatin state that is dependent upon the additive effect of both duplicated and non-duplicated arrays. To further characterise the structure and methylation of in- cis D4Z4 repeats, we also assessed the inverted D4Z4 unit (D4S2463) that is found ~42 kb upstream of the 4q D4Z4 array. Consistent with previous reports ( 55 , 56 ), we found that D4S2463 is a truncated unit of ~1.5 kb that is homologous to the distal portion of the KpnI-KpnI D4Z4 unit, and contains a homologous DUX4c gene. Contrary to the downstream in- cis duplicated arrays, the upstream inverted repeat was consistently hypomethylated in the FSHD, BAMS and control fibroblast samples and in the B-lymphocyte LCL samples ( Figure 8D ). Moreover, we identified one B-lymphocyte LCL allele (HG00099 4qB) that had an inverted D4Z4 array of 2.5 repeat units ~42 kb upstream of the main D4Z4 array, rather than a single D4S2463 unit ( Figure 8E ). Several cases of the same allelic variant have also previously been identified using molecular combing ( 16 ), where the inverted array was suggested to be an expansion of the D4S2463 repeat. Here, we were able to analyse the composition of the inverted array at base-level resolution, and we found that the first inverted D4Z4 unit was ~1.6 kb and corresponded to the distal ~1.6 kb of D4Z4, whereas the second and third inverted D4Z4 units were ~3.3 kb units containing DUX4 and DUX4c sequences, respectively. Once again, Racon ( 25 ) was used to create a consensus sequence for the upstream inverted D4Z4 array, and it was found that all three repeat units contained 4qB-type SNVs, while the sequence for the last ~1.5 kb of the final repeat corresponded to the normal sequence for the inverted D4S2463 unit (Supplementary Figure S12). This suggests that the inverted array may have arisen by insertion of a segment of 4qB array at the breakpoint site for the D4S2463 unit, mediated by partial homology between the D4S2463 unit and the normal D4Z4 sequence; or alternatively, that the 4qB-type inverted array is in fact the ancestral allele, and gave rise to the partial D4S2463 repeat by contraction. We then assessed the methylation of this inverted array. Similarly to the single inverted D4S2463 units, the inverted array was hypomethylated, with a mean 5mC rate of 0.160. Therefore, overall, we found that downstream, in- cis duplicated arrays were methylated at similar levels to the first D4Z4 array, whereas upstream inverted arrays were hypomethylated similar to the typical D4S2463 unit. This could suggest that methylation spreading mechanisms at the D4Z4 locus, if present, are orientation-dependent, or may provide further support for an insulator mechanism at the D4Z4 locus that separates upstream inverted and downstream non-inverted arrays into separate chromatin compartments ( 50 , 60 ). Discussion The D4Z4 macrosatellite array is one of the most difficult regions in the genome to study. Here we extend upon previous studies of D4Z4 using nanopore sequencing ( 40 – 46 ) to comprehensively characterise the array-wide genetic structure and methylation patterns of macrosatellite repeats, at base-level resolution. We have developed an optimised protocol and all-in-one analysis pipeline for FSHD, generating reads that span full-length D4Z4 arrays of up to 42 repeat units (~140 kb), as well as entire in- cis duplication and triplication alleles (~173 kb). Moreover, we designed guide RNAs (gRNAs) for Cas9-targeted sequencing that were able to enrich for both A- and B-type 4q and 10q arrays with high efficiency, identifying the full complement of patient 4q and 10q alleles, including cases of mosaicism, and to generate accurate consensus sequences for downstream analysis. Coupling this with gRNAs for FSHD2-associated genes ( SMCHD1, DNMT3B and LRIF1 ) allowed us to accurately detect pathogenic FSHD2 gene variants in the same assay. To aid in haplotyping and interpretation of the sequencing results, we developed a custom pipeline and interactive tool for annotation and visualisation of raw D4Z4 reads, out-lining issues with current automated macrosatellite assembly. By annotating and exploring individual raw reads, we were able to more reliably resolve the structure, repeat number, and haplotype of D4Z4 alleles, which is critical for accurate downstream analyses and accurate FSHD diagnoses. We demonstrated the efficacy of our pipeline on samples from several patients diagnosed with FSHD1, FSHD2 and BAMS. This enabled us to recapitulate and refine prior genotyping and methylation results, as well as to make several novel findings. By generating accurate consensus sequences for full-length patient alleles, we were able to perform detailed analysis of D4Z4 genetic variation, including the identification of haplotype-specific SNVs, and characterisation of inter-repeat and inter-array variation. Moreover, we discovered a previously-unreported 4qA variant, ‘4qAM’, which differs from the previously-identified 4qAS and 4qAL variants by the length of its distal repeat unit, but has an identical pLAM sequence and intact polyadenylation site. Further studies could elucidate the prevalence and distribution of 4qAM alleles in the general population, whether they can serve as the causative allele for FSHD, and whether they give rise to different DUX4 transcripts, as is the case for 4qAS and 4qAL alleles ( 22 ). If 4qAM alleles are indeed permissive for DUX4 expression, this would need to be considered in FSHD genetic testing, including updated conversion tables for calculation of repeat unit size using southern blotting, and potentially 4qAM-specific primers in addition to the 4qAS- and 4qAL-specific primers already used for (bisulfite)-PCR assays ( 15 , 22 , 38 ). Accurate haplotyping and the capture of full-length spanning reads also allowed us to analyse the allele-specific and array-wide methylation profiles of FSHD and BAMS D4Z4 alleles, in aggregate and at single-molecule resolution. We were able to observe broad patterns in D4Z4 methylation that correlate with D4Z4 array length and SMCHD1 status, including clear hypomethylation of contracted arrays in FSHD1, and of all 4q and 10q arrays in FSHD2. Notably, one of the samples that had been clinically-diagnosed as FSHD2 (34140) displayed comparatively high levels of methylation for all alleles, consistent with the absence of contracted arrays or pathogenic SMCHD1, DNMT3B or LRIF1 variants. This illustrates that our workflow can be used to confirm a diagnosis of FSHD in patients with clinical manifestations of the disease, or conversely, to suggest an alternative diagnosis. High-coverage and full-length methylation profiles also yielded several interesting findings. Many arrays showed a stepwise proximal-to-distal increase in methylation, however we observed some arrays with ‘atypical’ methylation patterns, especially in intermediate-length fibroblast alleles. Of note, overall DNA methylation patterns were highly consistent between individual molecules from the same allele, even for arrays with atypical methylation patterns, although not at the level of individual CpG sites. This aligns with observations from previous studies, which have shown that overall DNA methylation patterns can be stably inherited in clonal populations while being heterogeneous at the per-base level, which may represent differences in nucleosome phasing between molecules, stochasticity in the de novo and maintenance activities of epigenetic modifiers, and the influence of the methylation states of neighbouring CpGs ( 62 , 65 , 66 ). Future work should reveal whether those full-length D4Z4 methylation characteristics hold in other relevant cell types, such as myocytes to investigate disease pathogenesis and primary peripheral blood cells which could be used for diagnostic testing. Previous studies have also indicated that BAMS is associated with D4Z4 hypomethylation in a similar manner to in FSHD2 ( 31 ), and here we have performed the first full-length analysis of methylation at the D4Z4 array in BAMS patients. Our results confirm that, at least in primary fibroblasts, D4Z4 methylation patterns are highly similar between BAMS and FSHD2, with retention of oscillatory patterns at the ~3.3 kbp and ~180 bp level, but with overall marked hypomethylation. So far, there has only been one reported case of a patient with overlapping symptoms of BAMS and FSHD, on the background of a FSHD2-permissive D4Z4 genotype ( 31 ). Shaw et al. ( 31 ) and Mohassel et al. ( 67 ) identified several other BAMS patients without clinical FSHD who had potentially permissive D4Z4 genotypes, however array size information was not available for all patients, D4Z4 methylation values were aggregate results obtained via bisulfite sequencing, and for young patients FSHD onset might not have been reached (as it typically manifests in young adults). Further genotyping and allele-specific methylation analysis could be used to more comprehensively characterise FSHD-permissive alleles in BAMS patients, and explore the possibility of as-yet undiscovered mechanisms that protect these patients from FSHD. Finally, we also characterised several complex D4Z4 alleles containing duplicated D4Z4 arrays. Our results suggest that 4qA, 4qB and 10qA duplication alleles likely arise via the same mechanism, which results in in- cis duplications of the distal end of the subtelomere in the non-inverted orientation. We found that downstream in- cis duplicated arrays are highly methylated, whereas upstream inverted D4Z4 repeats are hypomethylated, which may suggest orientation-dependent long-range DNA methylation spreading mechanisms, inheritance of methylation patterns from the ancestral array, or insulator mechanisms that generate local differences in chromatin organisation. Lemmers et al. ( 45 ) found that in- cis duplication alleles may be able to give rise to FSHD-level DUX4 expression and cause clinical FSHD in the absence of D4Z4 hypomethylation, demonstrating the need to further characterise the epigenetic mechanisms mediating DUX4 repression. We anticipate that, as a combined assay for D4Z4 haplotyping, D4Z4 methylation, and FSHD2-associated gene genotyping, our D4Z4End2End workflow has the potential to greatly simplify and streamline FSHD diagnostics. Several adaptations to the protocol could optimise it even further for use in clinical testing. First, in our study we use a single flow cell per patient sample, however we envisage that real-time analysis could facilitate dynamic processing of the sequencing data using the haplotyping pipeline, so that runs can be stopped once enough spanning reads are reached. This could allow multiple samples to be run on a single flow cell after sequential washes and reloads, decreasing sequencing costs. Second, adaptive sampling has recently emerged as an alternative to Cas9 for targeted sequencing ( 68 ), and may be able to further simplify library preparation while also capturing sequences further upstream and downstream of the D4Z4 array, obviating the need for separate whole-genome sequencing. As adaptive sampling targets regions of interest based on an easily-editable browser extensible data (BED) file, another benefit of this approach is that, with no added cost or complexity in library preparation, it could allow additional genetic loci to be added with the discovery of new causative variants for FSHD, as well as enabling optional inclusion of loci responsible for other neuromuscular diseases (NMDs). Indeed, previous studies have demonstrated the efficacy of adaptive sampling for diagnosing short-tandem repeat (STR) expansion-associated NMDs ( 69 , 70 ), and we imagine that STRs, the D4Z4 region, FSHD2-associated genes, and causative loci for other genetic NMDs could be combined into a single neuromuscular gene panel for targeted sequencing. Moreover, there are still many unknowns surrounding D4Z4 regulation, and our approach could serve as a powerful research tool to answer fundamental questions in macrosatellite biology, such as further investigating the dynamic changes in D4Z4 methylation across different cell types, stages of development, and disease states. Indeed, in addition to embryonic development and FSHD, DUX4 expression is also seen physiologically in the human testis and thymus, and pathologically in several solid cancers, inflammatory disorders, and herpesvirus infection ( 7 ). Elucidating the epigenetic changes in these contexts may help to reveal the underlying mechanisms of DUX4 silencing and reactivation, paving the way for the identification of new therapeutic targets. Additionally, with the recent development of simultaneous profiling techniques such as DiMeLo-seq ( 71 ), nanoNOMe ( 72 ) and nanoHiMe-seq ( 73 ), the long-read, single-molecule approach presented here has the potential to be extended to the exploration of even more dimensions of D4Z4 regulation, such as SMCHD1 and CTCF binding, nucleosome occupancy, and histone modifications, alongside genetic sequence and methylation. Our workflow for investigating macrosatellite (epi)genetic landscapes therefore provides a means to explore alterations in the molecular machinery in functional studies, investigate the effects of emerging gene therapies, and map these dynamic and intriguing repetitive regions of the genome with much more depth, detail and precision than previously possible. Methods Patient sample collection and cell culture All participants provided signed informed consent for research and publication at the time of recruitment. The study complied with the Declaration of Helsinki. This study received ethics approval from the Walter Eliza Hall Institute (Ethics ID: 20/16B) and the French Agency of Biomedicine (CRBAP-HM OCP05P01E003; Étude D-2023-FSHD). FSHD and BAMS patients were clinically diagnosed and have previously received clinical genetic testing for D4Z4 array size (FSHD and controls), D4Z4 methylation (FSHD) and SMCHD1 genotype (FSHD, BAMS and controls), as described in Supplementary Table S3. Controls were previously confirmed to have D4Z4 array size >10 units and to have no pathogenic SMCHD1 variant. Primary fibroblasts from FSHD and BAMS patients and controls were obtained via skin biopsies. Fibroblasts were cultured in DMEM with 4.5 g/L D-glucose, FBS South American, PenicillinStreptomycin 5000 U/mL, and GlutaMAX™ (Thermofisher), and incubated at 37°C, 5% CO2 / 9% air in a humidified incubator. DNA extraction DNA was extracted from primary fibroblasts using the Monarch® high-molecular weight (HMW) DNA extraction kit for cells and blood as per the protocol (NEB T3050) using 300 rpm agitation, and eluted in elution buffer (EB) from the relevant Oxford Nanopore Technologies (ONT) kit used for library preparation (SQK-ULK001, SQK-ULK114 or SQK-CS9109). Nanopore library preparation and sequencing For ultra-long whole-genome sequencing, libraries were prepared using the ONT ultra-long DNA sequencing kit (either SQK-ULK001 or SQK-ULK114) according to the relevant ONT protocol. For Cas9-targeted sequencing, Cas9 crRNAs targeting 4q/10q, SMCHD1, DNMT3B and LRIF1 (Supplementary Table S3) were designed using CHOPCHOP (v3) (GRCh38 or CHM13 T2T v1.1, CRISPR/Cas9, nanopore enrichment, ‘Doench et al. 2014 – only for NGG PAM’). Cas9 RNP formation and DNA library preparation for Cas9-targeted sequencing were performed using the ONT Cas9 Sequencing Kit (SQK-CS9109) as per the ONT protocol. Libraries were loaded on R9.4.1 PromethION flow cells (FLO-PRO002) if prepared using SQK-ULK001 or SQK-CS9109, and on R10.4.1 (FLO-PRO114M) if prepared using SQK-ULK114, and sequenced to exhaustion on a PromethION P24. Basecalling Reads from fast5 files were basecalled using the latest Guppy version at the time of sequencing (v6.1.1 or v6.3.4) using the modified basecalling model dna_r9.4.1_450bps_modbases_5mc_cg_sup_prom.cfg. Reads from pod5 files from later sequencing runs were basecalled using the latest Dorado version at the time of sequencing (v0.3.2 or v0.3.3), using model [email protected] with the argument --modified-bases 5mCG_5hmCG for concurrent modified basecalling. Haplotyping pipeline Processing of fastq files to identify, annotate and haplotype 4q and 10q reads was performed using a custom script, available at https://github.com/lucindaxiao/D4Z4End2End . Briefly, reads were mapped against a D4Z4 reference sequence (the first full KpnI-KpnI D4Z4 unit from the 4q array of CHM13v1.0) using minimap2 (v2.24), to identify D4Z4-containing reads and count the number of D4Z4 units. These reads were then mapped against the 4q and 10q regions of CHM13v2.0, and secondary and supplementary alignments were filtered out using samtools view -F 2304 , to identify 4q- and 10q-specific D4Z4 reads and assign them to 4q or 10q haplotypes. D4Z4-containing reads were also mapped against reference sequences for p13E-11 (from ( 74 )), pLAM (sequence from end of last D4Z4 to end of DUX4 exon 3 in 4q allele of CHM13v2.0), and the 4qB distal region (from GRCh38) for read annotation. Reads containing pLAM and qB-sequence were assigned to haplotype A and B, respectively. Reads containing both p13E-11 and a D4Z4-distal feature (pLAM or qB-sequence) were classified as spanning reads, and all other reads were classified as non-spanning reads. Reads were also searched for exact matches for XapI sites, BlnI sites, and 4qA/10qA-specific poly(A)-signal sequences using regular expression matching in Python. Noise-Cancelling Repeat Finder (v1.01.00) ( 75 ) was used to search reads for beta-satellite repeats (from ( 76 )) and telomeric repeats. The interactive visualisation tool for exploration of haplotyped reads was created using D3 . js . The haplotyping pipeline was used to process the ultra-long whole-genome sequencing and Cas9-targeted sequencing data from all patient samples, and publicly-available ONT sequencing data for CHM13v2.0 from the Telomere to Telomere (T2T) Consortium (accessed from s3://human-pangenomics/T2T/CHM13/nanopore/rel8-guppy-5.0.7/reads.fastq.gz) and for five cell lines from the Human Reference Pangenome Consortium (HPRC) (accessed from s3://human-pangenomics/working/HPRC/). D4Z4 genetic analysis For comparison of the haplotyping pipeline results for CHM13 and the HPRC cell lines to their corresponding assemblies (CHM13v2.0 and HPRC assemblies), D4Z4 arrays in the assemblies were annotated by mapping against the D4Z4, p13E-11, pLAM and 4qB reference sequences described above, using minimap2 ( 77 ). Consensus sequences for patient 4q and 10q alleles were generated from spanning reads identified by the haplotyping pipeline, using a two-step process: ( 1 ) minimap2 ( 77 ) for overlap detection, followed by ( 2 ) Racon (v1.4.20) ( 25 ) for consensus calling, using the --no-trimming argument and using the longest spanning read as the target sequence for correction. Minimap2 ( 77 ) was used to extract 4q/10q D4Z4 units from the consensus sequences and from the CHM13v2.0 and GRCh38 assemblies (GRCh38 accessed from http://hgdownload.soe.ucsc.edu/goldenPath/hg38/chromosomes/ ), and to map them against a 4qA-type D4Z4 reference sequence. The 4qA-type D4Z4 reference sequence was generated by performing a MUSCLE (v3.8) ( 78 ) multiple sequence alignment of all full-length D4Z4 units from the CHM13v2.0 4q allele, and taking the most frequent value at each position (nucleotide or gap). D4Z4 alignments were visualised using Integrative Genomics Viewer (v2.17.4) ( 79 ). MUSCLE was also used for multiple sequence alignments of distal D4Z4 sequences, which were visualised using the R package ggmsa ( 80 ). For the BAMS1 sample, WhatsHap (v2.1) ( 81 ) was used to phase the non-spanning 4qA reads that overlapped the proximal end of the D4Z4 array. FSHD2 gene panel variant calling BAM files for patient samples were run through DeepVariant (v1.5.0) ( 23 ) using model type ONT_R104, to generate variant call format (VCF) files targeting SMCHD1, DNMT3B and LRIF1 . VCF files were filtered for ‘PASS’ calls using bcftools ( 82 ). VCF files were input into Variant Effect Predictor (VEP) (v107.0) ( 24 ) using the T2T-CHM13v2.0 VEP cache file, to annotate variant calls for potential functional impact. Variant calls overlapping SMCHD1, DNMT3B or LRIF1 with IMPACT=HIGH or IMPACT=MODERATE as output by VEP were further assessed by comparison to previously reported pathogenic SMCHD1 variants ( https://databases.lovd.nl/shared/genes/SMCHD1 ) and variants in healthy controls, as well as variant pathogenicity predictions from AlphaMissense ( 59 ). Methylation analysis To generate allele-specific methylation plots, haplotyped reads were extracted from the mod-BAM files and re-aligned using minimap2 to either the allele’s Racon consensus sequence, or a raw read from the allele if no consensus sequence was generated. For samples with high coverage Cas9-targeted sequencing (17706, 19187, BAMS9), all 4q and 10q reads were mapped against the full set of consensus sequences for the sample’s alleles, to haplotype both spanning and non-spanning reads. For samples with lower coverage, reads included for methylation analysis were either spanning reads, or reads that could be confidently assigned to an allele based on allele-specific differences in SNVs (XapI or BlnI sites for 4q vs 10q), distal sequences (AS/AM/AL/B haplotype) and/or number of repeat units. Allele-specific methylation plots were generated using NanoMethViz ( 26 , 27 ), using the plot_region() function with smoothing_window=2000 . Annotations shown below the methylation plots were generated using minimap2 , as described above. To perform allele-specific autocorrelation of 5mC signals, bedmethyl files were first produced from the re-aligned modBAM files using modkit (v0.2.5) ( https://github.com/nanoporetech/modkit ) with the --cpg and --combine-strands arguments. A vector of length n where n is the length in nucleotides of the reference sequence for the allele was populated with the %5mC (‘fraction modified’ from modkit ) for all CpG sites with coverage ≥5, and ‘NA’ otherwise. The acf() function within R was then used to calculate the autocorrelation between %5mC values across the length of the vector, and the results were plotted for lag values (corresponding to distance in nucleotides between CpG sites) of up to 10000 nucleotides (approximately three full D4Z4 units) and up to 500 nucleotides. Allele-specific %5mC plots were generated from the bedmethyl files, once again filtering for CpG sites with coverage 5, using ggplot2 (v3.5.1) ( 83 ). Smoothed lines were generated using the geom_smooth() function using the LOESS method with span = 0.2 . Mean methylation rate was calculated using the region_methy_stats() function within NanoMethViz ( 26 , 27 ), using a threshold of 0.5 to call methylated and unmethylated sites. Regression lines for the scatterplots of mean methylation prevalence vs number of repeats were produced using geom_line(stat=“smooth”, method = “lm”) , and Pearson and Spearman correlations were calculated using the cor() function within R . For comparison of the mean methylation prevalence of the DR1 site and DUX4 transcription start sites (TSS) within each repeat unit, the DR1 site was defined as nucleotides 563–814 of the D4Z4 unit with respect to the start of the KpnI site ( 37 ), and the DUX4 TSS region was defined as −200nt to +200nt with respect to the DUX4 TSS (nucleotide 10732 in Genbank AF117653.3 ). Data Availability 1KGP-ONT data are available on https://s3.amazonaws.com/1000g-ont/index.html . Human Pangenome Consortium draft assemblies and data used are available on https://github.com/human-pangenomics/HPP_Year1_Assemblies . https://s3.amazonaws.com/1000g-ont/index.html https://github.com/human-pangenomics/HPP\_Year1\_Assemblies Data access 1KGP-ONT ( 63 ) data available on https://s3.amazonaws.com/1000g-ont/index.html . Human Pangenome Consortium draft assemblies and data used are available on https://github.com/humanpangenomics/HPP_Year1_Assemblies . Competing interests statement QG and LCX received travel support from Oxford Nanopore Technologies to attend a conference. Funding MER, MEB and QG are supported by Australian National Health and Medical Research Council (NHMRC) Investigator Grants (GNT2017257, GNT1194345 plus GNT2041117, and GNT2007996, respectively). This work was supported by an early career research grant from the Brockhoff foundation and the Marian and E.H. Flack Trust to QG. The Walter and Eliza Hall Institute receives support from the Victorian State Government through its Operational Infrastructure Support Program. Additional support was provided by the Australian Government through the National Collaborative Research Infrastructure Strategy (NCRIS) program and an Australian National Health and Medical Research Council IRIISS grant (9000719). Acknowledgements We thank Stephen Wilcox and Sarah MacRaild from the WEHI Genomics Platform for their assistance with nanopore sequencing; Kelsey Breslin and Hannah Vanyai for their help with cell culture. Bibliography 1. ↵ KM Flanigan . Facioscapulohumeral muscular dystrophy and scapuloperoneal disorders . Myology ,, 2 : 1123 – 1133 , 2004 . OpenUrl 2. ↵ JC De Greef , RJLF Lemmers , P Camano , JW Day , S Sacconi , M Dunand , BGM Van Enge-len , S Kiuru-Enari , GW Padberg , Alberto Luis Rosa , et al. Clinical features of facioscapulo-humeral muscular dystrophy 2 . Neurology , 75 ( 17 ): 1548 – 1554 , 2010 . OpenUrl CrossRef PubMed 3. ↵ Johanna C.W. Deenen , Hisse Arnts , Silvère M. Van Der Maarel , George W. Padberg , Jan J.G.M. Verschuuren , Egbert Bakker , Stephanie S. Weinreich , André L.M. Verbeek , and Baziel G.M. Van Engelen . Population-based incidence and prevalence of facioscapulo-humeral dystrophy . Neurology , 83 ( 12 ): 1056 – 1059 , September 2014 . ISSN 0028-3878, 1526-632X . doi: 10.1212/WNL.0000000000000797 . OpenUrl CrossRef PubMed 4. Ml Mostacciuolo , E Pastorello , G Vazza , M Miorin , C Angelini , G Tomelleri , G Galluzzi , and Cp Trevisan . Facioscapulohumeral muscular dystrophy: epidemiological and molecular study in a north-east Italian population sample . Clinical Genetics , 75 ( 6 ): 550 – 555 , June 2009 . ISSN 0009-9163, 1399-0004 . doi: 10.1111/j.1399-0004.2009.01158.x . OpenUrl CrossRef PubMed Web of Science 5. ↵ Zhiqiang Wang , Liangliang Qiu , Minting Lin , Long Chen , Fuze Zheng , Lin Lin , Feng Lin , Zhixian Ye , Xiaodan Lin , Junjie He , Lili Wang , Xin Lin , Qifang He , Wanjin Chen , Yi Lin , Ying Fu , and Ning Wang . Prevalence and disease progression of genetically-confirmed facioscapulohumeral muscular dystrophy type 1 (FSHD1) in China between 2001 and 2020: a nationwide population-based study . The Lancet Regional Health – Western Pacific , 18 , January 2022 . ISSN 2666-6065 . doi: 10.1016/j.lanwpc.2021.100323 . Publisher: Elsevier . OpenUrl CrossRef PubMed 6. ↵ Alberto De Iaco , Evarist Planet , Andrea Coluccio , Sonia Verp , Julien Duc , and Didier Trono . DUX-family transcription factors regulate zygotic genome activation in placental mammals . Nature Genetics , 49 ( 6 ): 941 – 945 , June 2017 . ISSN 1546-1718 . doi: 10.1038/ng.3858 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed 7. ↵ Emanuele Mocciaro , Valeria Runfola , Paola Ghezzi , Maria Pannese , and Davide Gabellini . Dux4 role in normal physiology and in fshd muscular dystrophy . Cells , 10 ( 12 ): 3322 , 2021 . OpenUrl CrossRef PubMed 8. ↵ Amy E Campbell , Andrea E Belleville , Rebecca Resnick , Sean C Shadle , and Stephen J Tapscott . Facioscapulohumeral dystrophy: activating an early embryonic transcriptional program in human skeletal muscle . Human Molecular Genetics , 27 ( R2 ): R153 – R162 , 04 2018 . ISSN 0964-6906 . doi: 10.1093/hmg/ddy162 . OpenUrl CrossRef 9. ↵ R. J. L. F. Lemmers , P. J. van der Vliet , R. Klooster , S. Sacconi , P. Camano , J. G. Dauwerse , L. Snider , K. R. Straasheijm , G. Jan van Ommen , G. W. Padberg , D. G. Miller , S. J. Tapscott , R. Tawil , R. R. Frants , and S. M. van der Maarel . A Unifying Genetic Model for Facioscapu-lohumeral Muscular Dystrophy . Science , 329 ( 5999 ): 1650 – 1653 , September 2010 . ISSN 0036-8075, 1095-9203 . doi: 10.1126/science.1189044 . OpenUrl Abstract / FREE Full Text 10. ↵ Richard J. L. F. Lemmers , Petra G. M. van Overveld , Lodewijk A. Sandkuijl , Harry Vrieling , George W. Padberg , Rune R. Frants , and Silvère M. van der Maarel . Mechanism and Timing of Mitotic Rearrangements in the Subtelomeric D4Z4 Repeat Involved in Facioscapulo-humeral Muscular Dystrophy . The American Journal of Human Genetics , 75 ( 1 ): 44 – 53 , July 2004 . ISSN 0002-9297, 1537-6605 . doi: 10.1086/422175 . Publisher: Elsevier . OpenUrl CrossRef PubMed 11. ↵ Silvère M. van der Maarel , Giancarlo Deidda , Richard J. L. F. Lemmers , Petra G. M. van Overveld , Michiel van der Wielen , Jane E. Hewitt , Lodewijk Sandkuijl , Bert Bakker , Gert-Jan B. van Ommen , George W. Padberg , and Rune R. Frants . De Novo Facioscapu-lohumeral Muscular Dystrophy: Frequent Somatic Mosaicism, Sex-Dependent Phenotype, and the Role of Mitotic Transchromosomal Repeat Interaction between Chromosomes 4 and American Journal of Human Genetics , 66 ( 1 ): 26 – 35 , January 2000 . ISSN 0002-9297 . OpenUrl 12. ↵ Richard JLF Lemmers , Rabi Tawil , Lisa M Petek , Judit Balog , Gregory J Block , Gijs WE Santen , Amanda M Amell , Patrick J van Der Vliet , Rowida Almomani , Kirsten R Straasheijm , et al. Digenic inheritance of an smchd1 mutation and an fshd-permissive d4z4 allele causes facioscapulohumeral muscular dystrophy type 2 . Nature genetics , 44 ( 12 ): 1370 – 1374 , 2012 . OpenUrl CrossRef PubMed 13. ↵ Marlinde L van den Boogaard , Richard JLF Lemmers , Judit Balog , Mariëlle Wohlgemuth , Mari Auranen , Satomi Mitsuhashi , Patrick J van der Vliet , Kirsten R Straasheijm , Rob FP van den Akker , Marjolein Kriek , et al. Mutations in dnmt3b modify epigenetic repression of the d4z4 repeat and the penetrance of facioscapulohumeral dystrophy . The American Journal of Human Genetics , 98 ( 5 ): 1020 – 1029 , 2016 . OpenUrl CrossRef PubMed 14. ↵ Kohei Hamanaka , Darina Šikrová , Satomi Mitsuhashi , Hiroki Masuda , Yukari Sekiguchi , At-suhiko Sugiyama , Kazumoto Shibuya , Richard JLF Lemmers , Remko Goossens , Megumu Ogawa , et al. Homozygous nonsense variant in lrif1 associated with facioscapulohumeral muscular dystrophy . Neurology , 94 ( 23 ): e2441 – e2447 , 2020 . OpenUrl Abstract / FREE Full Text 15. ↵ Emiliano Giardina , Pilar Camaño , Sarah Burton-Jones , Gina Ravenscroft , Franclo Henning , Frederique Magdinier , Nienke Van Der Stoep , Patrick J. Van Der Vliet , Rafaëlle Bernard , Pedro J. Tomaselli , Mark R. Davis , Ichizo Nishino , Piraye Oflazer , Valerie Race , Venugopalan Y. Vishnu , Victoria Williams , Cláudia F. R. Sobreira , Silvere M. Van Der Maarel , Steve A. Moore , Nicol C. Voermans , and Richard J. L. F. Lemmers . Best practice guidelines on genetic diagnostics of facioscapulohumeral muscular dystrophy: Update of the 2012 guidelines . Clinical Genetics , 106 ( 1 ): 13 – 26 , July 2024 . ISSN 0009-9163, 1399-0004 . doi: 10.1111/cge.14533 . OpenUrl CrossRef PubMed 16. ↵ Richard J L F Lemmers , Patrick J van der Vliet , Jeroen P Vreijling , Don Henderson , Nienke van der Stoep , Nicol Voermans , Baziel van Engelen , Frank Baas , Sabrina Sacconi , Rabi Tawil , and Silvère M van der Maarel . Cis D4Z4 repeat duplications associated with fa-cioscapulohumeral muscular dystrophy type 2 . Human Molecular Genetics , 27 ( 20 ): 3488 – 3497 , October 2018 . ISSN 0964-6906, 1460-2083 . doi: 10.1093/hmg/ddy236 . OpenUrl CrossRef PubMed 17. ↵ Jessica C de Greef , Richard JLF Lemmers , Baziel GM van Engelen , Sabrina Sacconi , Shannon L Venance , Rune R Frants , Rabi Tawil , and Silvère M van der Maarel . Common epigenetic changes of d4z4 in contraction-dependent and contraction-independent fshd . Human mutation , 30 ( 10 ): 1449 – 1459 , 2009 . OpenUrl CrossRef PubMed Web of Science 18. ↵ Richard JLF Lemmers , Peggy de Kievit , Lodewijk Sandkuijl , George W Padberg , Gert-Jan B van Ommen , Rune R Frants , and Silvere M van der Maarel . Facioscapulohumeral muscular dystrophy is uniquely associated with one of the two variants of the 4q subtelomere . Nature genetics , 32 ( 2 ): 235 – 236 , 2002 . OpenUrl CrossRef PubMed Web of Science 19. ↵ Egbert Bakker , Cisca Wijmenga , Rolf H. A. M. Vossen , George W. Padberg , Jane Hewitt , Michiel van Der Wielen , Kirsten Rasmussen , and Rune R. Frants . The FSHD-linked locus D4F104S1 (p13E-11) ON 4q35 has a homologue on 10qter . Muscle & Nerve , 18 ( S13 ): S39 – S44 , 1995 . ISSN 1097-4598 . doi: 10.1002/mus.880181309 . _eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/mus.880181309 . OpenUrl CrossRef 20. ↵ S. Cacurri , N. Piazzo , G. Deidda , E. Vigneti , G. Galluzzi , L. Colantoni , B. Merico , E. Ricci , and L. Felicetti . Sequence Homology between 4qter and 10qter Loci Facilitates the Instability of Subtelomeric KpnI Repeat Units Implicated in Facioscapulohumeral Muscular Dystrophy . The American Journal of Human Genetics , 63 ( 1 ): 181 – 190 , July 1998 . ISSN 0002-9297, 1537-6605 . doi: 10.1086/301906 . Publisher: Elsevier . OpenUrl CrossRef PubMed Web of Science 21. ↵ Richard J. L. F. Lemmers , Patrick J. van der Vliet , Kristiaan J. van der Gaag , Sofia Zuniga , Rune R. Frants , Peter de Knijff , and Silvère M. van der Maarel . Worldwide Population Analysis of the 4q and 10q Subtelomeres Identifies Only Four Discrete Interchromosomal Sequence Transfers in Human Evolution . The American Journal of Human Genetics , 86 ( 3 ): 364 – 377 , March 2010 . ISSN 0002-9297 . doi: 10.1016/j.ajhg.2010.01.035 . OpenUrl CrossRef PubMed Web of Science 22. ↵ Richard JLF Lemmers , Patrick J. van der Vliet , Judit Balog , Jelle J. Goeman , Wibowo Arindrarto , Yvonne D. Krom , Kirsten R. Straasheijm , Rashmie D. Debipersad , Gizem Özel , Janet Sowden , Lauren Snider , Karlien Mul , Sabrina Sacconi , Baziel van Engelen , Stephen J. Tapscott , Rabi Tawil , and Silvère M. van der Maarel . Deep characterization of a common D4Z4 variant identifies biallelic DUX4 expression as a modifier for disease penetrance in FSHD2 . European Journal of Human Genetics , 26 ( 1 ): 94 – 106 , January 2018 . ISSN 1476-5438 . doi: 10.1038/s41431-017-0015-0 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed 23. ↵ Ryan Poplin , Pi-Chuan Chang , David Alexander , Scott Schwartz , Thomas Colthurst , Alexander Ku , Dan Newburger , Jojo Dijamco , Nam Nguyen , Pegah T Afshar , et al. A universal snp and small-indel variant caller using deep neural networks . Nature biotechnology , 36 ( 10 ): 983 – 987 , 2018 . OpenUrl CrossRef PubMed 24. ↵ William McLaren , Laurent Gil , Sarah E Hunt , Harpreet Singh Riat , Graham RS Ritchie , Anja Thormann , Paul Flicek , and Fiona Cunningham . The ensembl variant effect predictor . Genome biology , 17 : 1 – 14 , 2016 . OpenUrl CrossRef PubMed 25. ↵ Robert Vaser , Ivan Sović , Niranjan Nagarajan , and Mile Šikić . Fast and accurate de novo genome assembly from long uncorrected reads . Genome Research , 27 ( 5 ): 737 – 746 , May 2017 . ISSN 1088-9051, 1549-5469 . doi: 10.1101/gr.214270.116 . OpenUrl Abstract / FREE Full Text 26. ↵ Shian Su , Quentin Gouil , Marnie E Blewitt , Dianne Cook , Peter F Hickey , and Matthew E Ritchie . Nanomethviz: An r/bioconductor package for visualizing long-read methylation data . PLOS Computational Biology , 17 ( 10 ): e1009524 , 2021 . OpenUrl 27. ↵ Shian Su , Lucinda Xiao , James Lancaster , Tamara Cameron , Kelsey Breslin , Peter F. Hickey , Marnie E. Blewitt , Quentin Gouil , and Matthew E. Ritchie . A streamlined work-flow for long-read DNA methylation analysis with NanoMethViz and Bioconductor , February 2025 . 28. ↵ K. Goto , I. Nishino , and Y. K. Hayashi . Very low penetrance in 85 Japanese families with facioscapulohumeral muscular dystrophy 1A . Journal of Medical Genetics , 41 ( 1 ): e12 – e12 , January 2004 . ISSN 0022-2593, 1468-6244 . doi: 10.1136/jmg.2003.008755 . Publisher: BMJ Publishing Group Ltd Section: Online mutation report . OpenUrl FREE Full Text 29. ↵ Karlien Mul , Nicol C. Voermans , Richard J.L.F. Lemmers , Marianne A. Jonker , Patrick J. van der Vliet , George W. Padberg , Baziel G.M. van Engelen , Silvère M. van der Maarel , and Corinne G.C. Horlings . Phenotype-genotype relations in facioscapulohumeral muscular dystrophy type 1 . Clinical Genetics , 94 ( 6 ): 521 – 527 , 2018 . ISSN 1399-0004 . doi: 10.1111/cge.13446 . _eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/cge.13446 . OpenUrl CrossRef PubMed 30. ↵ Emmanuelle Salort-Campana , Karine Nguyen , Rafaelle Bernard , Elisabeth Jouve , Guilhem Solé , Aleksandra Nadaj-Pakleza , Julien Niederhauser , Estelle Charles , Elisabeth Ollagnon , Françoise Bouhour , Sabrina Sacconi , Andoni Echaniz-Laguna , Claude Desnuelle , Christine Tranchant , Christophe Vial , Frederique Magdinier , Marc Bartoli , Marie-Christine Arne-Bes , Xavier Ferrer , Thierry Kuntzer , Nicolas Levy , Jean Pouget , and Shahram Attarian . Low penetrance in facioscapulohumeral muscular dystrophy type 1 with large pathological D4Z4 alleles: a cross-sectional multicenter study . Orphanet Journal of Rare Diseases , 10 ( 1 ): 2 , January 2015 . ISSN 1750-1172 . doi: 10.1186/s13023-014-0218-1 . OpenUrl CrossRef PubMed 31. ↵ Natalie D Shaw , Harrison Brand , Zachary A Kupchinsky , Hemant Bengani , Lacey Plummer , Takako I Jones , Serkan Erdin , Kathleen A Williamson , Joe Rainger , Alexei Stortchevoi , Kaitlin Samocha , Benjamin B Currall , Donncha S Dunican , Ryan L Collins , Jason R Willer , Angela Lek , Monkol Lek , Malik Nassan , Shahrin Pereira , Tammy Kammin , Diane Lucente , Alexandra Silva , Catarina M Seabra , Colby Chiang , Yu An , Morad Ansari , Jacqueline K Rainger , Shelagh Joss , Jill Clayton Smith , Margaret F Lippincott , Sylvia S Singh , Nirav Patel , Jenny W Jing , Jennifer R Law , Nalton Ferraro , Alain Verloes , Anita Rauch , Katharina Steindl , Markus Zweier , Ianina Scheer , Daisuke Sato , Nobuhiko Okamoto , Christina Jacobsen , Jeanie Tryggestad , Steven Chernausek , Lisa A Schimmenti , Benjamin Brasseur , Claudia Cesaretti , Jose E García-Ortiz , Tatiana Pineda Buitrago , Orlando Perez Silva , Jodi D Hoffman , Wolfgang Mühlbauer , Klaus W Ruprecht , Bart L Loeys , Masato Shino , Angela M Kaindl , Chie-Hee Cho , Cynthia C Morton , Richard R Meehan , Veronica Van Heyningen , Eric C Liao , Ravikumar Balasubramanian , Janet E Hall , Stephanie B Seminara , Daniel Macarthur , Steven A Moore , Koh-ichiro Yoshiura , James F Gusella , Joseph A Marsh , John M Graham , Angela E Lin , Nicholas Katsanis , Peter L Jones , William F Crowley , Erica E Davis , David R FitzPatrick , and Michael E Talkowski . SMCHD1 mutations associated with a rare muscular dystrophy can also cause isolated arhinia and Bosma arhinia microphthalmia syndrome . Nature Genetics , 49 ( 2 ): 238 – 248 , February 2017 . ISSN 1061-4036, 1546-1718 . doi: 10.1038/ng.3743 . OpenUrl CrossRef PubMed 32. ↵ Christopher T Gordon , Shifeng Xue , Gökhan Yigit , Hicham Filali , Kelan Chen , Nadine Rosin , Koh-ichiro Yoshiura , Myriam Oufadem , Tamara J Beck , Ruth McGowan , Alex C Magee , Janine Altmüller , Camille Dion , Holger Thiele , Alexandra D Gurzau , Peter Nürnberg , Dieter Meschede , Wolfgang Mühlbauer , Nobuhiko Okamoto , Vinod Varghese , Rachel Irving , Sabine Sigaudy , Denise Williams , S Faisal Ahmed , Carine Bonnard , Mung Kei Kong , Ilham Ratbi , Nawfal Fejjal , Meriem Fikri , Siham Chafai Elalaoui , Hallvard Reigstad , Christine Bole-Feysot , Patrick Nitschké , Nicola Ragge , Nicolas Lévy , Gökhan Tunçbilek , Audrey S M Teo , Michael L Cunningham , Abdelaziz Sefiani , Hülya Kayserili , James M Murphy , Chalermpong Chatdokmaiprai , Axel M Hillmer , Duangrurdee Wattanasirichaigoon , Stanislas Lyonnet , Frédérique Magdinier , Asif Javed , Marnie E Blewitt , Jeanne Amiel , Bernd Wollnik , and Bruno Reversade . De novo mutations in SMCHD1 cause Bosma arhinia microphthalmia syndrome and abrogate nasal development . Nature Genetics , 49 ( 2 ): 249 – 255 , February 2017 . ISSN 1061-4036, 1546-1718 . doi: 10.1038/ng.3765 . OpenUrl CrossRef PubMed 33. ↵ Karlien Mul , Richard J.L.F. Lemmers , Marjolein Kriek , Patrick J. van der Vliet , Marlinde L. van den Boogaard , Umesh A. Badrising , John M. Graham , Angela E. Lin , Harrison Brand , Steven A. Moore , Katherine Johnson , Teresinha Evangelista , Ana Töpf , Volker Straub , Solange Kapetanovic García , Sabrina Sacconi , Rabi Tawil , Stephen J. Tapscott , Nicol C. Voermans , Baziel G.M. van Engelen , Corinne G.C. Horlings , Natalie D. Shaw , and Silvère M. van der Maarel . FSHD type 2 and Bosma arhinia microphthalmia syndrome . Neurology , 91 ( 6 ): e562 – e570 , August 2018 . doi: 10.1212/WNL.0000000000005958 . Publisher: Wolters Kluwer . OpenUrl Abstract / FREE Full Text 34. ↵ Jessica Vasale , Fatih Boyar , Michael Jocson , Vladimira Sulcova , Patricia Chan , Khalida Liaquat , Carol Hoffman , Marc Meservey , Isabell Chang , David Tsao , Kerri Hensley , Yan Liu , Renius Owen , Corey Braastad , Weimin Sun , Pierre Walrafen , Jun Komatsu , Jia-Chi Wang , Aaron Bensimon , Arturo Anguiano , Malgorzata Jaremko , Zhenyuan Wang , Sat Batish , Charles Strom , and Joseph Higgins . Molecular combing compared to Southern blot for measuring D4Z4 contractions in FSHD . Neuromuscular Disorders , 25 ( 12 ): 945 – 951 , December 2015 . ISSN 0960-8966 . doi: 10.1016/j.nmd.2015.08.008 . OpenUrl CrossRef PubMed 35. Karine Nguyen , Pierre Walrafen , Rafaëlle Bernard , Shahram Attarian , Charlène Chaix , Catherine Vovan , Emilie Renard , Nathalie Dufrane , Jean Pouget , Anne Vannier , Aaron Bensimon , and Nicolas Lévy . Molecular combing reveals allelic combinations in facioscapulo-humeral dystrophy . Annals of Neurology , 70 ( 4 ): 627 – 633 , 2011 . ISSN 1531-8249 . doi: 10.1002/ana.22513 . _eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/ana.22513 . OpenUrl CrossRef PubMed 36. ↵ Karine Nguyen , Francesca Puppo , Stéphane Roche , Marie-Cécile Gaillard , Charlène Chaix , Arnaud Lagarde , Marjorie Pierret , Catherine Vovan , Sylviane Olschwang , Emmanuelle Salort-Campana , Shahram Attarian , Marc Bartoli , Rafaëlle Bernard , Frédérique Magdinier , and Nicolas Levy . Molecular combing reveals complex 4q35 rearrangements in Facioscapu-lohumeral dystrophy . Human Mutation , 38 ( 10 ): 1432 – 1441 , 2017 . ISSN 1098-1004 . doi: 10.1002/humu.23304 . _eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/humu.23304 . OpenUrl CrossRef PubMed 37. ↵ Lynn M. Hartweck , Lindsey J. Anderson , Richard J. Lemmers , Abhijit Dandapat , Erik A. Toso , Joline C. Dalton , Rabi Tawil , John W. Day , Silvère M. van der Maarel , and Michael Kyba . A focal domain of extreme demethylation within D4Z4 in FSHD2 . Neurology , 80 ( 4 ): 392 – 399 , January 2013 . doi: 10.1212/WNL.0b013e31827f075c . Publisher: Wolters Kluwer . OpenUrl Abstract / FREE Full Text 38. ↵ Takako I. Jones , Chi Yan , Peter C. Sapp , Diane McKenna-Yasek , Peter B. Kang , Colin Quinn , Johnny S. Salameh , Oliver D. King , and Peter L. Jones . Identifying diagnostic DNA methylation profiles for facioscapulohumeral muscular dystrophy in blood and saliva using bisulfite sequencing . Clinical Epigenetics , 6 ( 1 ): 23 , October 2014 . ISSN 1868-7083 . doi: 10.1186/1868-7083-6-23 . OpenUrl CrossRef PubMed 39. ↵ Patrizia Calandra , Isabella Cascino , Richard J. L. F. Lemmers , Giuliana Galluzzi , Emanuela Teveroni , Mauro Monforte , Giorgio Tasca , Enzo Ricci , Fabiola Moretti , Silvère M. van der Maarel , and Giancarlo Deidda . Allele-specific DNA hypomethylation characterises FSHD1 and FSHD2 . Journal of Medical Genetics , 53 ( 5 ), May 2016 . ISSN 0022-2593, 1468-6244 . doi: 10.1136/jmedgenet-2015-103436 . OpenUrl Abstract / FREE Full Text 40. ↵ Russell J. Butterfield , Diane M. Dunn , Brett Duval , Sarah Moldt , and Robert B. Weiss . Deci-phering D4Z4 CpG methylation gradients in fascioscapulohumeral muscular dystrophy using nanopore sequencing . Genome Research , 33 ( 9 ): 1439 – 1454 , September 2023 . ISSN 1088-9051, 1549-5469 . doi: 10.1101/gr.277871.123 . OpenUrl Abstract / FREE Full Text 41. Yosuke Hiramuki , Yuriko Kure , Yoshihiko Saito , Megumu Ogawa , Keiko Ishikawa , Madoka Mori-Yoshimura , Yasushi Oya , Yuji Takahashi , Dae-Seong Kim , Noriko Arai , Chiaki Mori , Tsuyoshi Matsumura , Tadanori Hamano , Kenichiro Nakamura , Koji Ikezoe , Shinichiro Hayashi , Yuichi Goto , Satoru Noguchi , and Ichizo Nishino . Simultaneous measurement of the size and methylation of chromosome 4qA-D4Z4 repeats in facioscapulohumeral muscular dystrophy by long-read sequencing . Journal of Translational Medicine , 20 ( 1 ): 517 , November 2022 . ISSN 1479-5876 . doi: 10.1186/s12967-022-03743-7 . OpenUrl CrossRef PubMed 42. Mingtao Huang , Qinxin Zhang , Jiao Jiao , Jianquan Shi , Yiyun Xu , Cuiping Zhang , Ran Zhou , Wenwen Liu , Yixuan Liang , Hao Chen , Yan Wang , Zhengfeng Xu , and Ping Hu . Comprehensive genetic analysis of facioscapulohumeral muscular dystrophy by Nanopore long-read whole-genome sequencing . Journal of Translational Medicine , 22 ( 1 ): 451 , May 2024 . ISSN 1479-5876 . doi: 10.1186/s12967-024-05259-8 . OpenUrl CrossRef PubMed 43. Patra Yeetong , Pimchanok Kulsirichawaroj , Theerawat Kumutpongpanich , Chalurmpon Srichomthong , Phichittra Od-ek , Supphakorn Rakwongkhachon , Titaporn Thamcharoen-vipas , Oranee Sanmaneechai , Monnat Pongpanich , and Vorasuk Shotelersuk . Long-read Nanopore sequencing identified D4Z4 contractions in patients with facioscapulohumeral muscular dystrophy . Neuromuscular Disorders , 33 ( 7 ): 551 – 556 , July 2023 . ISSN 09608966 . doi: 10.1016/j.nmd.2023.05.004 . OpenUrl CrossRef PubMed 44. Yanan Wang , Zhenhua Zhao , Fei Meng , and Xiangdong Kong . Accurate prenatal diagnosis of facioscapulohumeral muscular dystrophy 1 using nanopore sequencing . Journal of Medical Genetics , 2024 . ISSN 0022-2593 . doi: 10.1136/jmg-2023-109832 . OpenUrl Abstract / FREE Full Text 45. ↵ Richard J L F Lemmers , Russell Butterfield , Patrick J van der Vliet , Jan L de Bleecker , Ludo van der Pol , Diane M Dunn , Corrie E Erasmus , Marc D’Hooghe , Kristof Verhoeven , Judit Balog , Anne Bigot , Baziel van Engelen , Jeffrey Statland , Enrico Bugiardini , Nienke van der Stoep , Teresinha Evangelista , Chiara Marini-Bettolo , Peter van den Bergh , Rabi Tawil , Nicol C Voermans , John Vissing , Robert B Weiss , and Silvère M van der Maarel . Autosomal dominant in cis D4Z4 repeat array duplication alleles in facioscapulohumeral dystrophy . Brain , page awad312 , September 2023 . ISSN 0006-8950, 1460-2156 . doi: 10.1093/brain/awad312 . OpenUrl CrossRef PubMed 46. ↵ Kun Li , Daniel Quiat , Fei She , Yuanwei Liu , Rong He , Alireza Haghighi , Fang Liu , Rui Zhang , Steven Robert DePalma , Ying Yang , Wen Wang , Christine E. Seidman , Ping Zhang , and Jonathan G. Seidman . Genetic diagnosis of facioscapulohumeral muscular dystrophy type 1 using rare-variant linkage analysis and long-read genome sequencing . Genetics in Medicine Open , 2 , January 2024 . ISSN 2949-7744 . doi: 10.1016/j.gimo.2024.101817 . Publisher: Elsevier . OpenUrl CrossRef 47. ↵ Sergey Nurk , Sergey Koren , Arang Rhie , Mikko Rautiainen , Andrey V. Bzikadze , Alla Mikheenko , Mitchell R. Vollger , Nicolas Altemose , Lev Uralsky , Ariel Gershman , Sergey Aganezov , Savannah J. Hoyt , Mark Diekhans , Glennis A. Logsdon , Michael Alonge , Stylianos E. Antonarakis , Matthew Borchers , Gerard G. Bouffard , Shelise Y. Brooks , Gina V. Caldas , Nae-Chyun Chen , Haoyu Cheng , Chen-Shan Chin , William Chow , Leonardo G. de Lima , Philip C. Dishuck , Richard Durbin , Tatiana Dvorkina , Ian T. Fiddes , Giulio Formenti , Robert S. Fulton , Arkarachai Fungtammasan , Erik Garrison , Patrick G. S. Grady , Tina A. Graves-Lindsay , Ira M. Hall , Nancy F. Hansen , Gabrielle A. Hartley , Marina Haukness , Kerstin Howe , Michael W. Hunkapiller , Chirag Jain , Miten Jain , Erich D. Jarvis , Peter Kerpedjiev , Melanie Kirsche , Mikhail Kolmogorov , Jonas Korlach , Milinn Kremitzki , Heng Li , Valerie V. Maduro , Tobias Marschall , Ann M. McCartney , Jennifer McDaniel , Danny E. Miller , James C. Mullikin , Eugene W. Myers , Nathan D. Olson , Benedict Paten , Paul Peluso , Pavel A. Pevzner , David Porubsky , Tamara Potapova , Evgeny I. Rogaev , Jeffrey A. Rosenfeld , Steven L. Salzberg , Valerie A. Schneider , Fritz J. Sedlazeck , Kishwar Shafin , Colin J. Shew , Alaina Shumate , Ying Sims , Arian F. A. Smit , Daniela C. Soto , Ivan Sović , Jessica M. Storer , Aaron Streets , Beth A. Sullivan , Françoise Thibaud-Nissen , James Torrance , Justin Wagner , Brian P. Walenz , Aaron Wenger , Jonathan M. D. Wood , Chunlin Xiao , Stephanie M. Yan , Alice C. Young , Samantha Zarate , Urvashi Surti , Rajiv C. McCoy , Megan Y. Dennis , Ivan A. Alexandrov , Jennifer L. Gerton , Rachel J. O’Neill , Winston Timp , Justin M. Zook , Michael C. Schatz , Evan E. Eichler , Karen H. Miga , and Adam M. Phillippy . The complete sequence of a human genome . Science , 376 ( 6588 ): 44 – 53 , April 2022 . ISSN 0036-8075, 1095-9203 . doi: 10.1126/science.abj6987 . OpenUrl CrossRef PubMed 48. ↵ Wen-Wei Liao , Mobin Asri , Jana Ebler , Daniel Doerr , Marina Haukness , Glenn Hickey , Shuangjia Lu , Julian K. Lucas , Jean Monlong , Haley J. Abel , Silvia Buonaiuto , Xian H. Chang , Haoyu Cheng , Justin Chu , Vincenza Colonna , Jordan M. Eizenga , Xiaowen Feng , Christian Fischer , Robert S. Fulton , Shilpa Garg , Cristian Groza , Andrea Guarracino , William T. Harvey , Simon Heumos , Kerstin Howe , Miten Jain , Tsung-Yu Lu , Charles Markello , Fergal J. Martin , Matthew W. Mitchell , Katherine M. Munson , Moses Njagi Mwaniki , Adam M. Novak , Hugh E. Olsen , Trevor Pesout , David Porubsky , Pjotr Prins , Jonas A. Sibbesen , Jouni Sirén , Chad Tomlinson , Flavia Villani , Mitchell R. Vollger , Lucinda L. Antonacci-Fulton , Gunjan Baid , Carl A. Baker , Anastasiya Belyaeva , Konstantinos Billis , Andrew Carroll , Pi-Chuan Chang , Sarah Cody , Daniel E. Cook , Robert M. Cook-Deegan , Omar E. Cornejo , Mark Diekhans , Peter Ebert , Susan Fairley , Olivier Fedrigo , Adam L. Felsenfeld , Giulio Formenti , Adam Frankish , Yan Gao , Nanibaa’ A. Garrison , Carlos Garcia Giron , Richard E. Green , Leanne Haggerty , Kendra Hoekzema , Thibaut Hourlier , Hanlee P. Ji , Eimear E. Kenny , Barbara A. Koenig , Alexey Kolesnikov , Jan O. Korbel , Jennifer Kordosky , Sergey Koren , HoJoon Lee , Alexandra P. Lewis , Hugo Magalhães , Santiago Marco-Sola , Pierre Marijon , Ann McCartney , Jennifer McDaniel , Jacquelyn Mountcastle , Maria Nattestad , Sergey Nurk , Nathan D. Olson , Alice B. Popejoy , Daniela Puiu , Mikko Rautiainen , Allison A. Regier , Arang Rhie , Samuel Sacco , Ashley D. Sanders , Valerie A. Schneider , Baergen I. Schultz , Kishwar Shafin , Michael W. Smith , Heidi J. Sofia , Ahmad N. Abou Tayoun , Françoise Thibaud-Nissen , Francesca Floriana Tricomi , Justin Wagner , Brian Walenz , Jonathan M. D. Wood , Aleksey V. Zimin , Guillaume Bourque , Mark J. P. Chaisson , Paul Flicek , Adam M. Phillippy , Justin M. Zook , Evan E. Eichler , David Haussler , Ting Wang , Erich D. Jarvis , Karen H. Miga , Erik Garrison , Tobias Marschall , Ira M. Hall , Heng Li , and Benedict Paten . A draft human pangenome reference . Nature , 617 ( 7960 ): 312 – 324 , May 2023 . ISSN 0028-0836, 1476-4687 . doi: 10.1038/s41586-023-05896-x . OpenUrl CrossRef PubMed 49. ↵ Valentina Salsi , Matteo Chiara , Sara Pini , Paweł Kuś , Lucia Ruggiero , Silvia Bonanno , Carmelo Rodolico , Stefano C. Previtali , Maria Grazia D’Angelo , Lorenzo Maggi , Diego Lopergolo , Marek Kimmel , Filippo M. Santorelli , Graziano Pesole , and Rossella G. Tupler . A human pan-genomic analysis reconfigures the genetic and epigenetic make up of facioscapu-lohumeral muscular dystrophy , June 2023 . 50. ↵ Marie-Cécile Gaillard , Natacha Broucqsault , Julia Morere , Camille Laberthonnière , Camille Dion , Cherif Badja , Stéphane Roche , Karine Nguyen , Frédérique Magdinier , and Jérôme D. Robin. Analysis of the 4q35 chromatin organization reveals distinct long-range interactions in patients affected with Facio-Scapulo-Humeral Dystrophy . Scientific Reports , 9 ( 1 ): 10327 , July 2019 . ISSN 2045-2322 . doi: 10.1038/s41598-019-46861-x . OpenUrl CrossRef PubMed 51. Camille Laberthonnière , Elva-Maria Novoa-del Toro , Mégane Delourme , Raphaël Chevalier , Natacha Broucqsault , Kilian Mazaleyrat , Nathalie Streichenberger , Véronique Manel , Rafaëlle Bernard , Emmanuelle Salort Campana , et al. Facioscapulohumeral dystrophy weakened sarcomeric contractility is mimicked in induced pluripotent stem cells-derived 1. innervated muscle fibres . Journal of Cachexia, Sarcopenia and Muscle , 13 ( 1 ): 621 – 635 , 2022 . OpenUrl 52. Camille Dion , Stéphane Roche , Camille Laberthonnière , Natacha Broucqsault , Virginie Mariot , Shifeng Xue , Alexandra D Gurzau , Agnieszka Nowak , Christopher T Gordon , Marie-Cécile Gaillard , et al. Smchd1 is involved in de novo methylation of the dux4-encoding d4z4 macrosatellite . Nucleic acids research , 47 ( 6 ): 2822 – 2839 , 2019 . OpenUrl CrossRef PubMed 53. ↵ Megane Delourme , Chaix Charlene , Laurene Gerard , Benjamin Ganne , Pierre Perrin , Catherine Vovan , Karine Bertaux , Karine Nguyen , Rafaëlle Bernard , and Frederique Magdinier . Complex 4q35 and 10q26 Rearrangements: A Challenge for Molecular Diagnosis of Patients With Facioscapulohumeral Dystrophy . Neurology Genetics , 9 ( 3 ): e200076 , June 2023 . ISSN 2376-7839 . doi: 10.1212/NXG.0000000000200076 . OpenUrl Abstract / FREE Full Text 54. ↵ Laurène Gérard , Mégane Delourme , Charlotte Tardy , Benjamin Ganne , Pierre Perrin , Charlene Chaix , Jean Philippe Trani , Nathalie Eudes , Camille Laberthonnière , Karine Bertaux , Chantal Missirian , Guillaume Bassez , Anthony Behin , Pascal Cintas , Florent Cluse , Elisa De La Cruz , Emilien Delmont , Teresinha Evangelista , Mélanie Fradin , Nawale Hadouiri , Ludivine Kouton , Pascal Laforêt , Claire Lefeuvre , Armelle Magot , Véronique Manel , Juliette Nectoux , Antoine Pegat , Guilhem Sole , Marco Spinazzi , Tanya Stojkovic , Juliette Svahn , Celine Tard , Christel Thauvin , Camille Verebi , Emmanuelle Salort Campana , Shahram Attarian , Karine Nguyen , Ali Badache , Rafaëlle Bernard , and Frédérique Magdinier . SM-CHD1 genetic variants in type 2 facioscapulohumeral dystrophy and challenges in predicting pathogenicity and disease penetrance . European Journal of Human Genetics , December 2024 . ISSN 1018-4813, 1476-5438 . doi: 10.1038/s41431-024-01781-x . OpenUrl CrossRef 55. ↵ Tracy J Wright , Cisca Wijmenga , Lorraine N Clark , Rune R Frants , Robert Williamson , and Jane E Hewitt . Fine mapping of the fshd gene region orientates the rearranged fragment detected by the probe p13e-11 . Human molecular genetics , 2 ( 10 ): 1673 – 1678 , 1993 . OpenUrl CrossRef PubMed Web of Science 56. ↵ Eugénie Ansseau , Dalila Laoudj-Chenivesse , Aline Marcowycz , Alexandra Tassin , Céline Vanderplanck , Sébastien Sauvage , Marietta Barro , Isabelle Mahieu , Axelle Leroy , India Leclercq , et al. Dux4c is up-regulated in fshd. it induces the myf5 protein and human my-oblast proliferation . PloS one , 4 ( 10 ): e7482 , 2009 . OpenUrl CrossRef PubMed 57. ↵ Giancarlo Deidda , Stefania Cacurri , Paola Grisanti , Eliana Vigneti , Nicola Piazzo , and Luciano Felicetti . Physical mapping evidence for a duplicated region on chromosome 10qter showing high homology with the facioscapulohumeral muscular dystrophy locus on chromosome 4qter . European Journal of Human Genetics , 3 ( 3 ): 155 – 167 , 1995 . OpenUrl CrossRef PubMed Web of Science 58. ↵ Richard JLF Lemmers , Mariëlle Wohlgemuth , Kristiaan J van der Gaag , Patrick J van der Vliet , Corrie MM van Teijlingen , Peter de Knijff , George W Padberg , Rune R Frants , and Silvère M van der Maarel . Specific sequence variations within the 4q35 region are associated with facioscapulohumeral muscular dystrophy . The American Journal of Human Genetics , 81 ( 5 ): 884 – 894 , 2007 . OpenUrl CrossRef PubMed Web of Science 59. ↵ Jun Cheng , Guido Novati , Joshua Pan , Clare Bycroft , Akvilė Žemgulytė , Taylor Applebaum , Alexander Pritzel , Lai Hong Wong , Michal Zielinski , Tobias Sargeant , Rosalia G. Schneider , Andrew W. Senior , John Jumper , Demis Hassabis , Pushmeet Kohli , and Žiga Avsec . Accurate proteome-wide missense variant effect prediction with AlphaMissense . Science , 381 ( 6664 ): eadg7492 , September 2023 . ISSN 0036-8075, 1095-9203 . doi: 10.1126/science.adg7492 . OpenUrl CrossRef PubMed 60. ↵ Alexandre Ottaviani , Sylvie Rival-Gervier , Amina Boussouar , Andrea M Foerster , Delphine Rondier , Sabrina Sacconi , Claude Desnuelle , Eric Gilson , and Frederique Magdinier . The d4z4 macrosatellite repeat acts as a ctcf and a-type lamins-dependent insulator in facio-scapulo-humeral dystrophy . PLoS genetics , 5 ( 2 ): e1000394 , 2009 . OpenUrl 61. ↵ Claudia Huichalaf , Stefano Micheloni , Giulia Ferri , Roberta Caccia , and Davide Gabellini . DNA Methylation Analysis of the Macrosatellite Repeat Associated with FSHD Muscular Dystrophy at Single Nucleotide Level . PLoS ONE , 9 ( 12 ): e115278 , December 2014 . ISSN 1932-6203 . doi: 10.1371/journal.pone.0115278 . OpenUrl CrossRef PubMed 62. ↵ Lyndsay Kerr , Ioannis Kafetzopoulos , Ramon Grima , and Duncan Sproul . Genome-wide single-molecule analysis of long-read DNA methylation reveals heterogeneous patterns at heterochromatin that reflect nucleosome organisation . PLOS Genetics , 19 ( 10 ): e1010958 , October 2023 . ISSN 1553-7404 . doi: 10.1371/journal.pgen.1010958 . OpenUrl CrossRef PubMed 63. ↵ Jonas A. Gustafson , Sophia B. Gibson , Nikhita Damaraju , Miranda P.G. Zalusky , Kendra Hoekzema , David Twesigomwe , Lei Yang , Anthony A. Snead , Phillip A. Richmond , Wouter De Coster , Nathan D. Olson , Andrea Guarracino , Qiuhui Li , Angela L. Miller , Joy Goffena , Zachary B. Anderson , Sophie H.R. Storz , Sydney A. Ward , Maisha Sinha , Claudia Gonzaga-Jauregui , Wayne E. Clarke , Anna O. Basile , André Corvelo , Catherine Reeves , Adrienne Helland , Rajeeva Lochan Musunuri , Mahler Revsine , Karynne E. Patterson , Cate R. Paschal , Christina Zakarian , Sara Goodwin , Tanner D. Jensen , Esther Robb , The 1000 Genomes ONT Sequencing Consortium, University of Washington Center for Rare Disease Research (UW-CRDR), Genomics Research to Elucidate the Genetics of Rare Diseases (GREGoR) Consortium , William Richard McCombie , Fritz J. Sedlazeck , Justin M. Zook , Stephen B. Montgomery , Erik Garrison , Mikhail Kolmogorov , Michael C. Schatz , Richard N. McLaughlin , Harriet Dashnow , Michael C. Zody , Matt Loose , Miten Jain , Evan E. Eichler , and Danny E. Miller . High-coverage nanopore sequencing of samples from the 1000 Genomes Project to build a comprehensive catalog of human genetic variation . Genome Research , 34 ( 11 ): 2061 – 2073 , November 2024 . ISSN 1088-9051, 1549-5469 . doi: 10.1101/gr.279273.124 . OpenUrl Abstract / FREE Full Text 64. ↵ Rianne J.M. Goselink , Karlien Mul , Caroline R. van Kernebeek , Richard J.L.F. Lemmers , Silvère M. van der Maarel , Tim H.A. Schreuder , Corrie E. Erasmus , George W. Padberg , Jeffrey M. Statland , Nicol C. Voermans , and Baziel G.M. van Engelen . Early onset as a marker for disease severity in facioscapulohumeral muscular dystrophy . Neurology , 92 ( 4 ): e378 – e385 , January 2019 . doi: 10.1212/WNL.0000000000006819 . Publisher: Wolters Kluwer . OpenUrl Abstract / FREE Full Text 65. ↵ Qiujun Wang , Guang Yu , Xuan Ming , Weikun Xia , Xiguang Xu , Yu Zhang , Wenhao Zhang , Yuanyuan Li , Chunyi Huang , Hehuang Xie , Bing Zhu , and Wei Xie . Imprecise DNMT1 activity coupled with neighbor-guided correction enables robust yet flexible epigenetic inheritance . Nature Genetics , 52 ( 8 ): 828 – 839 , August 2020 . ISSN 1546-1718 . doi: 10.1038/s41588-020-0661-y . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed 66. ↵ Luis Busto-Moner , Julien Morival , Honglei Ren , Arjang Fahim , Zachary Reitz , Timothy L. Downing , and Elizabeth L. Read . Stochastic modeling reveals kinetic heterogeneity in post-replication DNA methylation . PLOS Computational Biology , 16 ( 4 ): e1007195 , April 2020 . ISSN 1553-7358 . doi: 10.1371/journal.pcbi.1007195 . OpenUrl CrossRef 67. ↵ Payam Mohassel , Ning Chang , Kaoru Inoue , Angela Delaney , Ying Hu , Sandra Donker-voort , Dimah Saade , B. Jeanne Billioux , Brooke Meader , Rita Volochayev , Chamindra G. Konersman , Angela M. Kaindl , Chie-Hee Cho , Bianca Russell , Adrian Rodriguez , K. Wade Foster , A. Reghan Foley , Steven A. Moore , Peter L. Jones , Carsten G. Bonnemann , Takako Jones , and Natalie D. Shaw . Cross-sectional Neuromuscular Phenotyping Study of Patients With Arhinia With SMCHD1 Variants . Neurology , 98 ( 13 ): e1384 – e1396 , March 2022 . ISSN 0028-3878 . doi: 10.1212/WNL.0000000000200032 . OpenUrl Abstract / FREE Full Text 68. ↵ Matthew Loose , Sunir Malla , and Michael Stout . Real-time selective sequencing using nanopore technology . Nature Methods , 13 ( 9 ): 751 – 754 , September 2016 . ISSN 1548-7105 . doi: 10.1038/nmeth.3930 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed 69. ↵ Igor Stevanovski , Sanjog R. Chintalaphani , Hasindu Gamaarachchi , James M. Ferguson , Sandy S. Pineda , Carolin K. Scriba , Michel Tchan , Victor Fung , Karl Ng , Andrea Cortese , Henry Houlden , Carol Dobson-Stone , Lauren Fitzpatrick , Glenda Halliday , Gianina Ravenscroft , Mark R. Davis , Nigel G. Laing , Avi Fellner , Marina Kennerson , Kishore R. Kumar , and Ira W. Deveson . Comprehensive genetic diagnosis of tandem repeat expansion disorders with programmable targeted nanopore sequencing . Science Advances , 8 ( 9 ): eabm5386 , March 2022 . doi: 10.1126/sciadv.abm5386 . Publisher: American Association for the Advancement of Science . OpenUrl CrossRef PubMed 70. ↵ Satoko Miyatake , Eriko Koshimizu , Atsushi Fujita , Hiroshi Doi , Masaki Okubo , Taishi Wada , Kohei Hamanaka , Naohisa Ueda , Hitaru Kishida , Gaku Minase , Atsuhiro Matsuno , Minori Kodaira , Katsuhisa Ogata , Rumiko Kato , Atsuhiko Sugiyama , Ayako Sasaki , Takabumi Miyama , Mai Satoh , Yuri Uchiyama , Naomi Tsuchida , Haruka Hamanoue , Kazuharu Misawa , Kiyoshi Hayasaka , Yoshiki Sekijima , Hiroaki Adachi , Kunihiro Yoshida , Fumiaki Tanaka , Takeshi Mizuguchi , and Naomichi Matsumoto . Rapid and comprehensive diagnostic method for repeat expansion diseases using nanopore sequencing . NPJ Genomic Medicine , 7 : 62 , October 2022 . ISSN 2056-7944 . doi: 10.1038/s41525-022-00331-y . OpenUrl CrossRef PubMed 71. ↵ Nicolas Altemose , Annie Maslan , Owen K. Smith , Kousik Sundararajan , Rachel R. Brown , Reet Mishra , Angela M. Detweiler , Norma Neff , Karen H. Miga , Aaron F. Straight , and Aaron Streets . DiMeLo-seq: a long-read, single-molecule method for mapping protein–DNA interactions genome wide . Nature Methods , 19 ( 6 ): 711 – 723 , June 2022 . ISSN 1548-7105 . doi: 10.1038/s41592-022-01475-6 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed 72. ↵ Isac Lee , Roham Razaghi , Timothy Gilpatrick , Michael Molnar , Ariel Gershman , Norah Sadowski , Fritz J. Sedlazeck , Kasper D. Hansen , Jared T. Simpson , and Winston Timp . Simultaneous profiling of chromatin accessibility and methylation on human cell lines with nanopore sequencing . Nature Methods , 17 ( 12 ): 1191 – 1199 , December 2020 . ISSN 1548-7105 . doi: 10.1038/s41592-020-01000-7 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed 73. ↵ Xue Yue , Zhiyuan Xie , Moran Li , Kai Wang , Xiaojing Li , Xiaoqing Zhang , Jian Yan , and Yimeng Yin . Simultaneous profiling of histone modifications and DNA methylation via nanopore sequencing . Nature Communications , 13 ( 1 ): 7939 , December 2022 . ISSN 2041-1723 . doi: 10.1038/s41467-022-35650-2 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed 74. ↵ Jane E Hewitt , Robert Lyle , Lorraine N Clark , Elizabeth M Valleley , Tracy J Wright , Cisca Wijmenga , Judith CT van Deutekom , Fiona Francis , Paul T Sharpe , Marten Hofker , et al. Analysis of the tandem repeat locus d4z4 associated with facioscapulohumeral muscular dystrophy . Human molecular genetics , 3 ( 8 ): 1287 – 1295 , 1994 . OpenUrl CrossRef PubMed Web of Science 75. ↵ Robert S Harris , Monika Cechova , and Kateryna D Makova . Noise-cancelling repeat finder: uncovering tandem repeats in error-prone long-read sequencing data . Bioinformatics , 35 ( 22 ): 4809 – 4811 , 2019 . OpenUrl CrossRef PubMed 76. ↵ Gillian M Greig and Huntington F Willard . β satellite dna: characterization and localization of two subfamilies from the distal and proximal short arms of the human acrocentric chromosomes . Genomics , 12 ( 3 ): 573 – 580 , 1992 . OpenUrl CrossRef PubMed Web of Science 77. ↵ Heng Li . Minimap2: pairwise alignment for nucleotide sequences . Bioinformatics , 34 ( 18 ): 3094 – 3100 , 2018 . OpenUrl CrossRef PubMed 78. ↵ Robert C Edgar . Muscle: multiple sequence alignment with high accuracy and high through-put . Nucleic acids research , 32 ( 5 ): 1792 – 1797 , 2004 . OpenUrl CrossRef PubMed Web of Science 79. ↵ James T Robinson , Helga Thorvaldsdóttir , Wendy Winckler , Mitchell Guttman , Eric S Lander , Gad Getz , and Jill P Mesirov . Integrative genomics viewer . Nature biotechnology , 29 ( 1 ): 24 – 26 , 2011 . OpenUrl CrossRef PubMed Web of Science 80. ↵ Lang Zhou , Tingze Feng , Shuangbin Xu , Fangluan Gao , Tommy T Lam , Qianwen Wang , Tianzhi Wu , Huina Huang , Li Zhan , Lin Li , Yi Guan , Zehan Dai , and Guangchuang Yu . ggmsa: a visual exploration tool for multiple sequence alignment and associated data . Brief- ings in Bioinformatics , 23 ( 4 ): bbac222 , 06 2022 . ISSN 1477-4054 . doi: 10.1093/bib/bbac222 . OpenUrl CrossRef 81. ↵ Marcel Martin , Murray Patterson , Shilpa Garg , Sarah O Fischer , Nadia Pisanti , Gunnar W Klau , Alexander Schöenhuth , and Tobias Marschall . Whatshap: fast and accurate read-based phasing . BioRxiv, page 085050 , 2016 . 82. ↵ Petr Danecek , James K Bonfield , Jennifer Liddle , John Marshall , Valeriu Ohan , Martin O Pollard , Andrew Whitwham , Thomas Keane , Shane A McCarthy , Robert M Davies , et al. Twelve years of samtools and bcftools . Gigascience , 10 ( 2 ): giab008 , 2021 . OpenUrl CrossRef PubMed 83. ↵ Hadley Wickham . ggplot2 . Wiley interdisciplinary reviews: computational statistics , 3 ( 2 ): 180 – 185 , 2011 . OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted April 26, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following D4Z4End2End: complete genetic and epigenetic architecture of D4Z4 macrosatellites in FSHD, BAMS and reference cohorts Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share D4Z4End2End: complete genetic and epigenetic architecture of D4Z4 macrosatellites in FSHD, BAMS and reference cohorts Lucinda C. Xiao , Ayush Semwal , Brianna St John , Kathleen Zeglinski , Shian Su , James Lancaster , Shifeng Xue , Bruno Reversade , Matthew E. Ritchie , Frédérique Magdinier , Marnie E. Blewitt , Quentin Gouil medRxiv 2025.04.24.25326320; doi: https://doi.org/10.1101/2025.04.24.25326320 Share This Article: Copy Citation Tools D4Z4End2End: complete genetic and epigenetic architecture of D4Z4 macrosatellites in FSHD, BAMS and reference cohorts Lucinda C. Xiao , Ayush Semwal , Brianna St John , Kathleen Zeglinski , Shian Su , James Lancaster , Shifeng Xue , Bruno Reversade , Matthew E. Ritchie , Frédérique Magdinier , Marnie E. Blewitt , Quentin Gouil medRxiv 2025.04.24.25326320; doi: https://doi.org/10.1101/2025.04.24.25326320 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4536) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (541) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00c83a59c1758d3',t:'MTc3OTYyNzk1MA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.