Large-scale Proteomics Profiling of Peripheral Blood of DM1 patients identifies biomarkers for disease severity and functional capacity

doi:10.1101/2025.09.05.25335077

Large-scale Proteomics Profiling of Peripheral Blood of DM1 patients identifies biomarkers for disease severity and functional capacity

2025 · doi:10.1101/2025.09.05.25335077

preprint OA: gold CC-BY-4.0

📄 Open PDF Full text JSON View at publisher

Full text 101,842 characters · extracted from preprint-html · click to expand

Large-scale Proteomics Profiling of Peripheral Blood of DM1 patients identifies biomarkers for disease severity and functional capacity | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Large-scale Proteomics Profiling of Peripheral Blood of DM1 patients identifies biomarkers for disease severity and functional capacity Daniël van As , Tine Claeys , Renee Salz , Delphi Van Haver , Sara Dufour , Amber van Deelen , Jolein Gloerich , Ralf Gabriels , Pieter Jan Volders , Vera Dobelmann , Andrea Gangfuss , Tobias Ruck , Genevieve Gourdon , Elise Duchesne , Cynthia Gagnon , Andreas Roos , Alain van Gool , Francis Impens , Lennart Martens , Hanns Lochmüller , Benedikt Schoser , Guillaume Bassez , Baziel G.M. van Engelen , View ORCID Profile Peter A.C. ’t Hoen OPTIMISTIC consortium, ReCognitION consortium doi: https://doi.org/10.1101/2025.09.05.25335077 Daniël van As a Department of Medical BioSciences, Radboud University Medical Center , Nijmegen, The Netherlands b Department of Neurology, Donders Institute for Brain , Cognition and Behaviour, Radboud University Medical Center , The Netherlands M.D Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tine Claeys c VIB-UGent Center for Medical Biotechnology , VIB, Ghent, Belgium PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Renee Salz a Department of Medical BioSciences, Radboud University Medical Center , Nijmegen, The Netherlands PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Delphi Van Haver c VIB-UGent Center for Medical Biotechnology , VIB, Ghent, Belgium d Department of Biomolecular Medicine, Faculty of Medicine and Healthy Sciences, Ghent University , Ghent, Belgium MSc Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sara Dufour c VIB-UGent Center for Medical Biotechnology , VIB, Ghent, Belgium d Department of Biomolecular Medicine, Faculty of Medicine and Healthy Sciences, Ghent University , Ghent, Belgium BSc Find this author on Google Scholar Find this author on PubMed Search for this author on this site Amber van Deelen e Translational Metabolic Laboratory, Department of Human Genetics, Radboud University Medical Center , Nijmegen, The Netherlands BSc Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jolein Gloerich e Translational Metabolic Laboratory, Department of Human Genetics, Radboud University Medical Center , Nijmegen, The Netherlands PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ralf Gabriels c VIB-UGent Center for Medical Biotechnology , VIB, Ghent, Belgium d Department of Biomolecular Medicine, Faculty of Medicine and Healthy Sciences, Ghent University , Ghent, Belgium PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Pieter Jan Volders f Limburg Clinical Research Center (LCRC) , UHasselt, Diepenbeek, Belgium g Laboratory of Molecular Diagnostics, Jessa Hospital , Hasselt, Belgium PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Vera Dobelmann h Department of Neurology, University Hospital Düsseldorf, Heinrich Heine University Düsseldorf , Düsseldorf, Germany MSc Find this author on Google Scholar Find this author on PubMed Search for this author on this site Andrea Gangfuss i Department of Pediatric Neurology, Centre for Neuromuscular Disorders, University Duisburg-Essen , Essen, Germany M.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tobias Ruck j Ruhr University Bochum, BG University Hospital Bergmannsheil, Department of Neurology , Bochum, Germany k BG University Hospital Bergmannsheil, Heimer Institute for Muscle Research , Bochum, Germany M.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site Genevieve Gourdon l Sorbonne Université, Inserm, Institut de Myologie, Centre de Recherche en Myologie , Paris, France PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Elise Duchesne m Interdisciplinary Research Group on Neuromuscular Diseases (GRIMN), Integrated University Health and Social Services Centre of Saguenay–Lac-Saint-Jean , Saguenay, QC, Canada n Interdisciplinary Research Center in Rehabilitation and Social Integration (Cirris), Integrated University Health and Social Services Centre of the Capitale-Nationale , Quebec City, QC, Canada o CHU de Québec - Université Laval Research Center , Quebec City, Quebec, Canada p School of Rehabilitation Sciences, Faculty of Medicine; CHU de Québec – Research Center, Laval University , Quebec City, QC, Canada PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Cynthia Gagnon m Interdisciplinary Research Group on Neuromuscular Diseases (GRIMN), Integrated University Health and Social Services Centre of Saguenay–Lac-Saint-Jean , Saguenay, QC, Canada q School of Rehabilitation, Faculty of Medecine and Health Sciences, CHU de Sherbrooke-Research Center, Université de Sherbrooke , Sherbrooke, Qc, Canada PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Andreas Roos h Department of Neurology, University Hospital Düsseldorf, Heinrich Heine University Düsseldorf , Düsseldorf, Germany PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alain van Gool e Translational Metabolic Laboratory, Department of Human Genetics, Radboud University Medical Center , Nijmegen, The Netherlands PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Francis Impens c VIB-UGent Center for Medical Biotechnology , VIB, Ghent, Belgium d Department of Biomolecular Medicine, Faculty of Medicine and Healthy Sciences, Ghent University , Ghent, Belgium PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lennart Martens c VIB-UGent Center for Medical Biotechnology , VIB, Ghent, Belgium d Department of Biomolecular Medicine, Faculty of Medicine and Healthy Sciences, Ghent University , Ghent, Belgium r BioOrganic Mass Spectrometry Laboratory (LSMBO), IPHC UMR 7178, University of Strasbourg , CNRS, Strasbourg, 67000, France s Infrastructure Nationale de Protéomique ProFI − UAR 2048 , Strasbourg, 67087, France PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hanns Lochmüller t Children’s Hospital of Eastern Ontario Research Institute; Division of Neurology, Department of Medicine, The Ottawa Hospital. u Brain and Mind Research Institute, University of Ottawa , Ottawa, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Benedikt Schoser v Friedrich-Baur-Institute, Dep. of Neurology LMU Clinic , 80336 Munich, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Guillaume Bassez w Neuromuscular Reference Centre, Pitiè-Salpêtrière Hospital, Assistance Publique Hôpitaux de Paris , Paris, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site Baziel G.M. van Engelen b Department of Neurology, Donders Institute for Brain , Cognition and Behaviour, Radboud University Medical Center , The Netherlands PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Peter A.C. ’t Hoen a Department of Medical BioSciences, Radboud University Medical Center , Nijmegen, The Netherlands PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Peter A.C. ’t Hoen For correspondence: peter-bram.thoen{at}radboudumc.nl Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Background Myotonic Dystrophy Type 1 (DM1), the most common genetic neuromuscular disorder in adults, poses significant challenges for drug development due to its multisystem nature and high clinical variability in symptoms and disease progression. With a growing number of therapies entering clinical trials, this study addresses the urgent need for biomarkers that can serve as surrogate endpoints. Methods We profiled 437 serum samples from adult DM1 patients collected at two timepoints of the OPTIMISTIC trial using bottom-up mass spectrometry with data-independent acquisition. Associations between protein expression, the disease-causing CTG-repeat and 25 clinical outcome measures were studied using linear mixed-effect models. All key study findings were validated in an independent cohort of 69 DM1 patients and 10 healthy controls. Results Of the 259 identified proteins, 161 showed significant associations with the CTG-repeat length (FDR < 5%). Hypogammaglobulinemia was confirmed and shown to be worse in severely affected patients. A strong proteomic signature was associated with clinical measures of functional capacity, with the 6-Minute Walk Test showing the strongest signal (70 associations, FDR < 5%). These novel associations reveal a compelling link between chronic inflammation and reduced functional capacity. A machine learning algorithm identified a minimal set of 13 proteins robustly reflecting both the underlying genetic defect and functional capacity. Conclusions DM1 induces a broad disease fingerprint in the serum proteome, predominantly affecting proteins of the immune system. A carefully selected panel of proteins showed the greatest potential to meet the statistical criteria required for surrogate endpoints in clinical trials. Background Myotonic Dystrophy type 1 (DM1) is a progressive, multisystem disease with an estimated prevalence of 1 in 8,000, with some studies reporting a locally much higher prevalence such as 1 in 550 in the Saguenay-Lac-Saint-Jean area in Northeastern Quebec and 1 in 2,100 for the state of New York 1,2 . Various organ systems are involved in the disease, causing symptoms such as muscular weakness, myotonia, severe fatigue, apathy, cataracts, diabetes and various respiratory, cardiac and gastrointestinal problems. DM1 is caused by a trinucleotide expansion of more than 50 CTG repeats in the DM1 protein kinase ( DMPK ) gene 3–5 . While often serving as a proxy for disease severity, the magnitude of the (progenitor) CTG repeat expansion has only a moderate negative correlation with the age of disease onset and a moderate positive association with the severity of the clinical phenotype 6 . Furthermore, it has been shown that interruptions of the CTG-repeat by variant repeats, such as CCG or CGG, are associated with a milder clinical phenotype 7 . The largest randomized controlled clinical trial in DM1, OPTIMISTIC, has investigated the effect of personalized Cognitive Behavioural Therapy (CBT), with the optional addition of graded exercise therapy, in a cohort of more than 250 genetically characterized DM1 patients. While it has been shown that CBT can, on average, slightly but significantly improve the capacity for activity and participation of DM1 patients, the magnitude of the improvements and the improved disease aspects were highly heterogenous 8,9 . This result highlights an important limitation in current DM1 clinical trial research. The heterogeneity of the patient population asks for more targeted clinical trials of patient subpopulations with shared disease characteristics. However, patient subpopulations are currently not well-defined, and subgroup analyses in a classical statistical framework may suffer from reduced statistical power. In novel clinical trial designs, biomarkers are a valuable addition to the evaluation of therapy effects. Given the shared molecular dysregulations induced by the CTG expansion, one may expect more homogenous molecular responses to an intervention in comparison to clinical responses. Furthermore, these molecular changes may be more sensitive to change, preceding clinical benefits such as delayed disease progression. Since an ideal biomarker can be efficiently obtained with a low burden for patients, analyses of (peripheral) blood have sparked great interest in biomarker research. For various neurological and psychiatric disorders, including Duchenne Muscular Dystrophy, Huntington’s disease, major depressive disorder and DM1, disease-relevant findings have been reported in blood 10–13 . This finding aligns with our earlier transcriptomic research of the OPTIMISTIC cohort, where we identified significant associations between the CTG repeat and 608 genes expressed in peripheral blood, of which 97 returned to more normal expression levels in patients who clinically improved 14 . Here, we expanded on this work by performing proteomic analysis of 437 serum samples collected at two timepoints of the OPTMISTIC trial. The most considerable differences in protein expression between patients were linked to the size of the CTG-repeat and confirmed the known hypogammaglobulinemia in DM1 15–17 . A strong proteomic fingerprint was also associated with multiple measures of functional capacity, showing primarily an increase in components of the complement system in patients with reduced functional performance. Rather than relying on individual associations with disease pathology or physical activity, we demonstrate that a set of 13 proteins collectively has the strongest potential to meet the criteria for surrogate endpoints in clinical trials. All key study findings, including the set of 13 proteins, were independently validated in an extensive cohort of 69 patients with DM1 and 10 healthy controls. Methods Sample sources OPTIMISTIC clinical trial samples All primary analyses were based on the samples and metadata that were collected during the OPTIMISTIC clinical trial ( NCT02118779 ) 8,18 . In this European multi-centre clinical trial, 255 genetically confirmed adult DM1 patients across four European countries were enrolled. Patients were randomized either into the intervention arm (n=128), consisting of personalized cognitive behavioural therapy (CBT) with optionally the addition of graded exercise therapy (GET, n=32), or the control arm (n=127), consisting of standard of care. In both study arms, a rich phenotypic characterisation was obtained by assessing more than 25 outcome measures. We grouped these outcome measures into 6 disease domains (Functional capacity; Fatigue; Social Functioning, Behaviour and Impact on Caregivers; Cognitive Function, Depression, Illness Coping; Disease Impact, Severity and Pain; Apathy) and listed them together with their abbreviations in Table 1 . Details of the individual outcome measures, including scoring ranges, have been published elsewhere 8,9 . Additionally, whole blood was collected at multiple timepoints for biomarker analyses (10 ml for DNA analysis, 10ml for RNA analysis and 10ml serum). Serum was isolated from serum-specific BD Vacutainer tubes (Ref 368815) and centrally stored at -80°C at the John Walton Muscular Dystrophy Research Centre Biobank 19 . Serum samples obtained at the start of the trial (n= 252) and at the primary trial endpoint after 10 months (n= 211) were sent to the Department of Biomolecular Medicine at Ghent University, Belgium, for mass-spectrometry based proteome analyses. View this table: View inline View popup Download powerpoint Table 1: Overview of OPTIMISTIC outcome measures Canadian and German cohort samples External validation of the study findings was implemented on a cohort of Canadian (n= 56) and German (n= 13) DM1 patients. DM1 patients from the Canadian cohort were recruited between 2011-2013 as part of a larger longitudinal study, all of whom having participated in the first phase of the study between 2002 and 2004. Inclusion criteria were to have the late-onset, adult or juvenile phenotype of DM1 confirmed by genetic analysis and to be aged 18 years or older. CTG repeat length was determined using the same methodology as that used in previous studies 20,21 . All clinical assessments were performed by the same physiotherapist. The 6-Minute Walk Test (6MWT) was performed, where the maximum distance walked along a 30-meter corridor over a 6-minute period was measured (in meters). Grip strength was assessed using a Jamar digital dynamometer (Asimow Engineering Co., Los Angeles, CA). The mean of three trials was used for analyses. DM1 patients from the German cohort were recruited between 2019 and 2024 in the Department of Pediatric Neurology of the University Hospital Essen (University Duisburg-Essen, Germany). Inclusion criteria were clinical and genetically confirmed DM1. This cohort included paediatric cases (juvenile DM1 patients) as well as diseased parents (adult DM1 patients). Moreover, serum samples of 10 healthy donors were collected from the same German site (clinically unaffected family members and unrelated individuals). Mass-spectrometry based proteome analysis of the OPTIMISTIC cohort Undepleted serum samples contain proteins in a large dynamic range, making the identification of low-abundant proteins challenging. To maximize the number of quantified serum proteins, a state-of-the-art data independent acquisition (DIA) mass-spectrometry-based workflow for clinical proteomics was applied at Ghent University. The recorded spectra were analysed using the DIA-NN software employing the built-in spectral library prediction 22 . Since peptides are quantified in MS-based proteomics, proteins must be inferred by matching these peptides to known sequences. When peptides could not be uniquely assigned to a single protein, a protein group is reported instead. This group encompasses all protein matches, thereby preventing the overestimation of protein identifications. Out of the 463 samples that were processed in this workflow, 11 samples were lost due to corrupt or incorrectly recorded MS datafiles (n=452 samples remaining). For a detailed workflow of the proteome quantification analysis, including sample preparation, LC-MS/MS analysis and proteomics data analysis, please refer to the ‘Mass spectrometry-based protein quantification of the OPTIMISTIC samples’ section in the appendix. Proteomic analysis of the Canadian and German cohort The proteomes of the Canadian (n=56) and German (n=23, including n=10 healthy controls) sera samples were assessed using a (DIA) mass-spectrometry-based workflow for clinical proteomics at the Radboud University Medical Center in Nijmegen. Recorded spectra were then analysed in real-time using the Proteoscape-implemented version of DIA-NN 23 . Similar to the DIA-NN based workflow applied to the OPTIMISTIC data, peptides that could not be uniquely matched to a specific protein were instead associated with a protein group. For a detailed workflow of the proteome quantification analysis, including sample preparation, LC-MS/MS analysis and proteomics data analysis, please refer to the ‘Mass spectrometry-based protein quantification of the Canadian and German cohort samples’ section in the appendix. ELISA-based validation of ITIH3 serum levels Additionally, for the Canadian sera samples (n=56), ELISA-based ITIH3 levels were measured in using the “Human inter-alpha-trypsin inhibitor heavy chain H3 (ITIH3) ELISA kit” (CSB-EL011896HU, Cusabio). The assay was performed at the Department of Neurology of the University Hospital Düsseldorf (Germany) according to the manufacturer’s protocol. In brief, samples and standards were added to the precoated plate, followed by addition of horseradish peroxidase conjugate. The plate was then incubated for 30 min at 37°C. The wells were washed five times, and TMB substrate was added. The plate was then incubated for 20 min at 37°C. Subsequently, the stop solution was added to end the reaction, and the optical density was measured with a microplate reader (Tecan) at 450 nm. Samples were used in a dilution of 1:500 and measured in duplicate. Ethics approval and consent to participate OPTIMISTIC clinical trial samples The OPTIMISTIC clinical trial ( NCT02118779 ) was conducted in accordance with the Declaration of Helsinki and approved by the medical-ethical scientific committee for human research at each of the four participating clinical centres. Prior to the trial, all enrolled patients provided written informed consent, which included the usage of the pseudonymized blood samples for the research purposes of this study. Ethical approval for mass spectrometry-based proteomics profiling of the serum samples was obtained from the Ethics Committee of Ghent University Hospital (B670201940027). For more specific methodological details of the clinical trial, including trial protocols and an overview of all (patient-reported) outcome measures, please refer to the published trial protocol and the main study publication 8,18 . Canadian cohort samples The study was conducted at the Saguenay Neuromuscular Clinic and was approved by the Ethics Review Board of the Centre Intégré Universitaire de Santé et Services Sociaux du Saguenay–Lac-St-Jean (Chicoutimi, Québec, Canada; #2010-046). Written informed consent was obtained from all participants including biomarkers studies. German cohort samples All patients and/or caregivers, as well as healthy donors, gave written consent to donate blood samples for research-driven biomarker studies. The local ethical committee approved biomarker studies on neuromuscular patients and controls (19-9011-BO). Availability of data and materials OPTMISTIC clinical trial Researchers interested in accessing the phenotype data from the OPTIMISTIC study are invited to contact K. Mul ( karlien.mul{at}radboudumc.nl ) and complete a data access agreement. All requests will be reviewed by a panel comprising from each of the four participating clinical sites, with K. Mul serving as chair 8 . The mass spectrometry proteomics data of the OPTIMISITIC samples have been deposited to the ProteomeXchange Consortium via the PRIDE partner repository with the dataset identifier PXD067476 24 . External cohorts The phenotype data used and/or analysed from the Canadian cohort are available from the corresponding author upon reasonable request following the proper evaluation of the research protocol by the Ethics Review Board of the Centre intégré universitaire de santé et de services sociaux du Saguenay–Lac-St-Jean (Saguenay, Québec, Canada; cynthia.gagnon4{at}usherbrooke.ca ). The phenotype data used from the German cohort, including the ELISA-based quantification data of ITIH3, are stored at the Department of Neurology of the University Hospital Düsseldorf (Heinrich Heine University) and are available upon request to roos{at}andreas-roos.de.The mass spectrometry proteomics data of both the Canadian and German cohort have been deposited to the ProteomeXchange Consortium via the PRIDE partner repository with the dataset identifier PXD060035 24 . Results and Code availability For both studies, the full list of significant protein group associations with the CTG-repeat and 6MWT scores, as well as the table containing all Variable Inclusion Probabilities, will be made available on GitHub after publication. Additionally, all R scripts used in this work are available via https://github.com/cmbi/DM1_ReCognitION_Proteomics Data analysis All analyses were implemented with the statistical programming language R within the RStudio integrated development environment 25,26 . All figures have been generated using the ggplot2 package and arranged using the cowplot package 27,28 . All selected candidate protein group biomarkers have been associated with gene names using UniProt 29 . Where appropriate, data presented in tabular format has been rounded to 4 digits. Outlier handling of the phenotype data For all outcome measures, we calculated the interquartile range (IQR) and considered measurements with Q1 – 3 * IQR and Q3 + 3 * IQR as potential outliers. Subsequently, these potential outliers were visually inspected to confirm the likelihood of an unreasonable or faulty measurement that could potentially skew the results of the statistical frameworks. For the 25 outcome measures of the OPTIMISTIC data, a total of 10 outliers were set to ‘NA’ (0.09% of the screened datapoints). For the Canadian cohort, a total of two outliers were set to ‘NA’ concerning the grip strength (left and right) of one unusually strong patient. Pre-processing of raw peptide and protein group intensities For the OPTIMISTIC study, pre-processing of both the peptide and protein group data has been implemented; however, the Canadian cohort validation study focused only on the protein group data. For the OPTIMISTIC peptide samples, all identified peptides with different post-translational modifications (PTMs) were summed up and total intensity values of less than 10,000 per peptide were changed to ‘NA’. To identify samples with low/unreliable peptide detection rates, total peptide abundance per sample was calculated by summing up all detected peptides. Ten samples with total intensities lower than Q1 – 3 * IQR were removed. Five of these samples were from the same row of a well plate, indicating a possible dilution error. Since the OPTIMSTIC protein group data were inferred based on the peptide data, these samples were also removed from the protein group dataset (n=442 samples remaining). For the protein group data of both studies, intensities of less than 10,000 per protein group were set to ‘NA’. Technical replicates, which were only present in the external validation dataset, were averaged. For the OPTIMISTIC dataset, protein groups and associated peptides linked to the expected contaminants trypsin and bovine albumin were excluded. In contrast, for the Canadian cohort, the iRT-Kit protein group was removed. Next, for all the datasets, intensity values were log2 transformed and all samples were subsequently scaled using the ‘equalMedianNormalization’ function of the R package DEqMS (independently per dataset) 30 . Peptides or protein group measurements with intensities lower than Q1 – 3 * IQR for that particular peptide or protein group across samples were removed. For the OPTIMISTIC samples, 2916 peptide measurements (0.29% of screened datapoints) and 141 protein group measurements (0.13% of screened datapoints) were set to ‘NA’ because of substantially different abundances across samples. Likewise, for the external validation dataset, 29 protein groups were set to ‘NA’ (0.22% of screened datapoints). During the last filter step, peptides or protein groups identified in fewer than 100 (OPTIMISTIC data, 23%) or 20 (external validation data, 36%) samples were removed because they are neither useful as biomarkers nor for robust statistical inference. In doing so, 25 peptides (n=2670 remaining) and one protein group (n=259 remaining) of the OPTIMISTIC data and 223 protein groups (n=259 remaining) of the external validation data were removed. As a quality control procedure, mean-variance plots were generated for the protein group datasets. For the 189 paired OPTIMISTIC samples (from the same patient, at two timepoints), the Pearson correlation was calculated for each protein group between the baseline and the matched 10-month sample. This was compared with the average Pearson correlation of each protein group between the baseline and all other 10-month samples. For five baseline samples of the OPTIMISTIC datasets, no phenotype data were present, as these patients were ultimately not enrolled in the clinical trial. Accordingly, these peptide and protein group samples were removed, resulting in a final sample size of n=437. An additional dataset was analysed based on the combined set of the Canadian (n=56) and German (n=23, including 10 healthy controls) samples. The pre-processing of this combined cohort was similar to the protein group datasets above, except that no intensity minimum of 10,000 was set and no outliers were removed. This dataset of 79 samples (comprising both Canadian and German cohorts) was exclusively used for the comparison of the top identified biomarkers in DM1 patients (n=69) versus healthy controls (n=10). Principal component analyses Principal component analyses (PCA) were implemented independently for the full peptide and protein group datasets of the OPTIMISTIC study, combining samples from both timepoints. Because PCA requires complete datasets, samples within the 25 th quantile for the number of identified peptides or protein groups were filtered out (respectively 327 and 315 samples remaining for the peptide and protein group data). Subsequently, both the peptide and protein group datasets were filtered for molecules present in all filtered samples (901 peptides and 173 protein groups remaining). The PCA was implemented using the R function ‘prcomp’ with the arguments ‘center’ and ‘scale’ set to TRUE. To study the amount of variance of each principal component that can be attributed to different phenotype measures, mixed effect models were fitted for the first 10 principal components using the ‘lmer’ function of the lme4 package with default settings 31 [ 1 ]. Marginal R-squared values (R-squared values attributable to the fixed effects) were obtained using the ‘r.squaredGLMM’ function of the MuMIn package 32 and associations were visualized using the ggcorrplot package 33 . Statistical Analyses For the OPTIMISTIC protein group data, four different statistical mixed effect models were implemented for each protein group to respectively study the association with the CTG repeat [ 2 ], associations with different outcome measures [ 3 ], the effect of the cognitive behavioural therapy (CBT) [ 4 ] and the effect of the graded exercise therapy (GET) [ 5 ]. To account for proteomic differences attributable to biological sex, the covariate ‘Sex’ was included as fixed effect in all models. To account for the dependency of measures from the same patients and well plates, the variables ‘PatientID’ and ‘Plate’ were included as random effects in all models. In model [ 2 ], the same CTG repeat length was used at both timepoints, as the CTG-repeat size was not re-estimated after the primary trial endpoint. In models [ 2 ] and [ 3 ], the variable ‘Timepoint’ (before/after the trial) was included as a fixed effect to account for possible temporal changes that occurred during the trial. All models were generated using the ‘lmer’ function of the lme4 package with default settings 31 . P-values associated with the variables of interest (bold in the formulae) were generated with the lmerTest package and multiple testing corrected based on the Benjamini & Hochberg procedure using the R base ‘p.adjust’ function with method=‘fdr’ 34 . Additionally, for all associations studied with models [ 2 ] and [ 3 ], Pearson correlations were calculated between the variable of interest and the protein group using the R base ‘p.cor’ function with use= ‘pairwise.complete.obs’ and method= ‘pearson’. Fits with convergence problems were automatically removed. As a measure of statistical robustness, the number of reported significant association with the CTG-repeat [ 2 ] and the 6MWT [ 3 ] are also reported after randomization of these two measures across the patients. Due to the unequal distribution of samples across well plates with respect to study timepoint, models [ 4 ] and [ 5 ] were additionally implemented without the random plate effect as a supplemental analysis to assess the potential confounding influence of plate-specific variation on treatment effects. For the external validation study (Canadian cohort, n= 56), three linear models were fit to study the association of physical outcome measures (6MWT, grip strength left and right hand) with the ELISA-based ITIH3 levels [ 6 ]. Furthermore, for each MS-based identified protein group a linear model was fit to study the association with the CTG repeat [ 7 ] and 6MWT score [ 8 ]. All models included the covariate ‘Sex’ to account for possible differences attributable to biological sex. Plots were generated to illustrate the associations between log2 ITIH3 (and other candidate biomarkers) and the clinical scores. Additionally, for the associations between the variables of interest and the protein groups in models [ 7 ] and [ 8 ], Pearson correlations were calculated using the R base ‘p.cor’ function with use= ‘pairwise.complete.obs’ and method= ‘pearson’. Although adjusted p-values (Benjamini Hochberg procedure) for the results from models 7 and 8 were calculated, the validation analysis reported in this study focuses on the nominal p-values of the already multiple testing corrected top hits from the OPTIMISTIC study-based results. To externally validate the statistical model hits, we compared the FDR-adjusted significant protein group associations with the CTG repeat length and 6MWT from the OPTIMISTIC cohort [ 2 , 3 ] with the nominally significant associations observed in the external Canadian validation cohort [ 7 , 8 ]. However, direct comparison of protein groups between the OPTIMISTIC and Canadian cohorts was not feasible due to slight differences in their protein compositions. To address this, all protein groups were decomposed into individual proteins. Next, a non-redundant list of overlapping significant proteins was generated. For each protein, the most significantly associated protein group in the OPTIMISTIC cohort was identified. An optimal matching protein group from the Canadian cohort was then selected based on maximal overlap in constituent proteins. In cases where multiple external protein groups exhibited the same degree of overlap with a given OPTIMISTIC protein group, the group with the fewest total proteins was selected. If redundancy persisted, the most statistically significant external protein group was chosen. Finally, only matches with Pearson correlations in the same direction were retained. For the models [ 2 , 3 , 4 , 5 ], Volcano plots were generated, where for models [ 2 , 3 ] additional colour coding was used to illustrate if the results were also nominally significant in the respective models [ 7 , 8 ]. For models [ 2 , 3 ], example plots of the strongest associations that were also externally validated were generated. To visualize the possible impact of CTG repeat interruptions (repeat variants) on the protein group-CTG repeat associations, the identified top immunoglobulin and complement-associated protein groups identified in model [ 2 ] were visualized with the color-coding reflecting whether a repeat interruption was present. For the models [ 4 , 5 ] example plots were generated for the significant results (adjusted p < 0.05). For the most promising biomarkers, differences in expression (healthy vs DM1) plots were generated based on the unfiltered combined Canadian (n=56) and German samples (13 DM1 patients, 10 healthy controls). Significance labels were added using the ‘stat_compare_means function (method= “t.test”, paired= “F”) of the ggpubr package 35 . Bootstrap enhanced multivariate Elastic-Net regression To identify a minimum subset of protein groups linked with both the disease pathology and the clinical phenotype, a multivariate Elastic-Net regression framework has been implemented. First, the 47 protein groups that were significantly associated with both the CTG-repeat expansion and the 6MWT, as determined by the statistical analyses of the OPTIMISTIC cohort, were selected as candidate predictors. To validate the results of this statistical framework, the 47 protein groups were reduced to 42 protein groups that were also unambiguously identified in the Canadian cohort. Model training was exclusively done using the OPTIMISTIC (baseline) dataset. The results were internally validated using the 10-month data (internal testing) and externally validated with the completely independent Canadian cohort. Since the Elastic-Net framework necessitates a complete dataset, 10 imputed OPTIMISTIC protein group datasets were generated using Multiple Imputation Using Chained Equations (MICE) 36,37 . For the data imputation, the whole protein group dataset, spanning both timepoints, was used to allow for the most accurate results. Protein groups missing in more than 20% of the samples were excluded. For the remaining protein groups(n=239), each missing value was imputed based on the 100 protein groups with the highest associated absolute Pearson correlation, with 50 iterations per dataset. The multivariate variable selection algorithm was then implemented on the imputed baseline datasets from the OPTIMISTIC study (model training) for cases with no missing CTG-repeat of 6MWT information (n=231). Given the large distribution differences of the dependent variables (CTG-repeat, 6MWT) between the OPTIMISTIC and external validation study, as well as the multivariate Gaussian framework of the Elastic-Net regression, the dependent variables were scaled using the ‘boxcox’ function of the R MASS package with automatically derived optimal lambda values 38 . Next, for each imputed dataset, 5000 bootstrap distributions were generated using the ‘boot’ function of the package boot 39 . Subsequently, for each bootstrap distribution, the ‘cv.glmnet’ function of the glmnet package, using the parameters type.measure=“mse”, family = “mgaussian”, alpha=0.5, nfolds=10, standardize=TRUE and standardize.response=TRUE, was implemented 40 . Coefficients were selected based on the “lambda.1se” setting, allowing for a minimum number of predictors that still perform within one standard error of the model with the best performance. For each imputed dataset, it was calculated how frequently a protein group has been selected across the 5000 bootstrap distributions, which is referred to as Variable Inclusion Probability (VIP) 9,41,42 . Finally, the VIPs across all imputed datasets were averaged, yielding a single average VIP for each protein group. A set of candidate predictors could then be obtained by selecting all protein groups that have a certain number of average VIPs. The predictive power of a combined set of protein groups was systematically assessed using different average VIP thresholds (90%, 80%, 70% and 60%). For this, a regular multivariate linear regression framework was fitted on the unimputed OPTIMISTIC baseline data, where the dependent variables (CTG-repeat and 6MWT) were predicted based on the combined set of protein groups (baseline model). The baseline model was then used to predict the baseline values, the 10 month OPTIMISTIC values (unseen, internal validation), and the values from the Canadian cohort (unseen, external validation). In line with the variable selection algorithm, for all three datasets (OPTIMISTIC baseline and 10 months, external validation) the dependent variables were independently scaled using the ‘boxcox’ function of the R MASS package with automatically derived optimal lambda values and subsequently together with the independent variables (candidate protein groups) z-score transformed using the R-base ‘scale’ function with the default parameters center=True and scale=True 38 . (Adjusted) R-squared values of the baseline model are reported, as well as the Root Mean Square Error (RMSE) values for the predictions of the OPTIMISTIC baseline, OPTIMISTIC 10-month and Canadian cohorts. For the predictions of the OPTIMISTIC 10-month and Canadian cohort values using the baseline model, out-of-sample R-squared values are also reported, which were calculated based on: 𝑦 𝑖 = 𝑜𝑏𝑠𝑒𝑟𝑣𝑒𝑑 𝑡𝑒𝑠𝑡𝑖𝑛𝑔 𝑑𝑎𝑡𝑎, 𝑦^ = 𝑝𝑟𝑒𝑑𝑖𝑐𝑡𝑒𝑑 𝑤𝑖𝑡ℎ 𝑏𝑎𝑠𝑒𝑙𝑖𝑛𝑒 𝑚𝑜𝑑𝑒𝑙, 𝑦̅ = 𝑚𝑒𝑎𝑛 𝑜𝑓 𝑜𝑏𝑠𝑒𝑟𝑣𝑒𝑑 𝑡𝑒𝑠𝑡𝑖𝑛𝑔 𝑑𝑎𝑡𝑎 For the most optimal VIP value (60%), the coefficients and p-values of the associated multivariate OPTIMISTIC baseline model are reported (n=209). To assess linear model assumptions, univariate multiple regression models were generated to individually predict CTG-repeat (n=210) and 6MWT (n=212) scores based on the identified protein group set using the OPTIMISTIC baseline data. Linear model assumptions (Global Stat, Skewness, Kurtosis, Link Function, Heteroscedasticity) of these two models were checked using the gvlma package 43 . If assumptions were not met, influential datapoints were identified through visual inspection of Cook’s distances. Results Patient cohorts characteristics and comparisons This study focused on the identification of protein biomarkers in serum samples of DM1 patients. For this purpose, bottom-up mass-spectrometry (MS)-based proteomic profiles of more than 400 serum samples of the OPTIMISTIC study cohort were generated. The OPTIMISTIC samples were derived from two study timepoints, at the start of the trial (n = 235) and the primary trial endpoint after 10 months (n = 202). OPTIMISTIC was not only the largest clinical trial in DM1 to date but also characterised by rich phenotype profiling of all patients. In addition to measuring the CTG-repeat expansion in blood, which we use as a proxy for both disease severity and pathology in this study, 25 different clinical outcome measures were used. We grouped all outcome measures into 6 disease domains which, including the abbreviations used in this study and the corresponding full names, are summarized in table 1 . We refer the reader for to earlier publications for more details on how the clinical measurements were obtained and specific scoring ranges, including the calculation of interference scores such as those for the Stroop Colour-Word Test 8,9 . To externally validate the study findings, serum-based MS-based proteomic profiles were also generated for 56 Canadian DM1 patients, 13 German DM1 patients and 10 German healthy controls in an independent laboratory ( Table 2 ). For all 69 DM1 patients, information on the CTG-repeat was present, and for the 56 DM1 patients from the Canadian cohort, also information on the six-minute walk test (6MWT) and grip strength of both hands was available. View this table: View inline View popup Download powerpoint Table 2: Overview of patient cohorts, proteomic datasets and implemented analyses At both time points, the sex distribution was roughly balanced, and the age distributions were comparable. Based on the minimum, maximum and median values, the CTG-repeat length was also comparable between the two cohorts ( Table 2 ). However, the distributions of the OPTIMISTIC CTG-repeat scores resembled a slightly-right skewed normal distribution. In contrast, the distribution of the Canadian cohort-based CTG-repeat scores was more uniform with a right skew (S Figure 1 , panels A, B, C). Similar to the CTG-repeat, the minimum, maximum and median values of the 6MWT scores were also roughly comparable between the OPTIMISTIC and Canadian cohorts ( Table 2 ). The 6MWT scores of the OPTIMISTIC cohort resemble a slightly right-skewed normal distribution, whereas the 6MWT scores of the Canadian cohort showed a more uniform distribution (S Figure 2 , panels A, B, C). Download figure Open in new tab Figure 1: Drivers of protein variance in OPTIMISTIC serum samples. Results from PCA on a filtered subset of the OPTIMISTIC protein samples with complete observations. For the first ten principal components (PCs), a mixed-effect model was fitted with various phenotype measures as predictors and PatientID as a random effect (model 1). (A) Heatmap of the marginal R-squared values for each of the phenotype measures (rows) and the first 10 PCs (columns). (B) Scree plot showing the proportion of variance explained by the first 10 PCs. (C) Plot of sample scores on PC1 and PC2 coloured by CTG repeat length. (D) Plot of sample scores on PC4 and PC5 coloured by biological sex. (E) Plot of sample scores on PC2 and PC3 coloured by distance on the 6MWT. Grey dots reflect missing phenotype information. Abbreviations: See Table 1 for outcome measures; GET Graded Exercise Therapy. Download figure Open in new tab Figure 2: Protein group associations with CTG-repeat length. For each identified protein group (PG) in the OPTIMISTIC serum samples, a mixed effect model (model 2) was fitted with the CTG-repeat as predictor, the covariates sex and study timepoint as fixed and PatientID and well plate as random effects. (A) Volcano plot showing the significance (-log10 of nominal p-values, y-axis) and the effect size of the CTG-repeat (per 100 CTG repeats, x-axis) for the protein groups. Protein groups for which the CTG-repeat was significant (FDR < 0.05) are visualized in black, proteins that were also significant (p < 0.05) in the Canadian validation dataset are visualized in red. (B-C) Scatter plots of the abundance of the protein group IGHG1/IGHG3/IGHG4 and the protein IGHG3 (y-axis) plotted against the CTG-repeat in the OPTIMISTIC and external Canadian validation cohort, respectively. (D-E) Scatter plots of the abundance of the complement protein C3 (y-axis) plotted against the CTG-repeat in the OPTIMISTIC and external Canadian validation cohort, respectively. In Panels B and D, red dots represent samples taken at baseline, and blue dots represent samples taken 10 months after the start of the trial. Rho indicates Pearson’s correlation coefficient. Quality control Since not all peptides could be uniquely matched with a protein, protein groups are reported instead. This group includes all possible protein matches that are equally likely to be present in the samples. After all filtering steps, a total of 259 protein groups were identified in both the OPTIMISTIC and Canadian cohorts. For the 189 paired OPTIMISTIC samples (from the same patient, at two timepoints), the Pearson correlation was calculated for each protein group between the baseline and the matched 10-month expression. This was compared to the average protein group correlation between the baseline and all other 10-month samples. The average correlation of samples from the same patient was 0.97, whereas the average correlation across all unrelated sample combinations was 0.94 (S Figure 3A ). For 143 out of the 189 patients, the correlation of the baseline sample was higher with its matched 10-month sample than with any other 10-month sample in the dataset. Download figure Open in new tab Figure 3: Protein group associations with the 6MWT score. For each identified protein group in the OPTIMISTIC serum samples, a mixed effect model (model 3) was fitted with the 6MWT as predictor, the covariates sex and study timepoint as fixed and PatientID and well plate as random effects. (A) Volcano plot showing the significance (-log10 of nominal p-values (y-axis) and the effect size of the 6MWT for the protein groups (x-axis). Protein groups for which the 6MWT was significant (FDR < 0.05) are visualized in black, proteins that were also significant (p < 0.05) in the Canadian validation dataset are visualized in red. (B-E) Scatter plots of the abundance of Complement C3 (Panels B and C) and Complement factor I (CFI, Panels D and E) (y-axis) against the 6MWT score in metres (x-axis) in the OPTIMISTIC cohort (Panels B and D) and the external Canadian validation cohort (Panels C and E). In Panels B and D, red dots represent samples taken at baseline, and blue dots represent samples taken 10 months after the start of the trial. Rho indicates Pearson’s correlation coefficient . From this, we conclude that there were no systematic sample mix-ups. However, we cannot rule out the possibility of an occasional sample mix-up in our study. Mean-variance protein intensity plots of the OPTIMISTIC samples and the external validation samples demonstrated adequate data normalization and the expected pattern of slightly higher variation in low-abundant proteins (S Figure 3 , panels C, D). The CTG-repeat length is the strongest driver of variance in protein abundance Principal component analysis (PCA) was applied to identify which clinical variables are important drivers of differences in serum peptide and protein abundance within the OPTIMISTIC cohort. For the protein data, the first principal component (PC1) was most strongly associated with the CTG-repeat length, indicating that differences in CTG-repeat sizes can explain up to 20% of differences in the blood proteome in DM1 patients ( Figure 1 , panels A, B, C). Interestingly, PC2 was most strongly linked to the 6MWT ( Figure 1 , panels A, E). The association of PC5 with biological sex was an expected finding, as the blood proteome is known to differ between men and women ( Figure 1 , panels A, D) 44 . The PCA implemented on the peptide-level data revealed a surprising association between PC1 and the plate measured in the mass spectrometer (S Figure 4 , Panel A, E). Further analysis suggests that this is likely due to an uneven distribution of study timepoints across the well plates, with baseline samples primarily located on plates 1–3 and 10-month samples predominantly on plates 4 and 5 (S Figure 3 , panel B). Consequently, to correct for this imbalance, the plate effect was included as a random effect in the statistical analyses of the OPTIMISTIC data. Phenotype PC associations were generally weaker for the peptide data; however, the CTG-repeat was also a driver of variance here (associated with PC4 and PC5, S Figure 4 , panels A, B, C). The association of biological sex with a PC was weaker for the peptide data as well, although a clear separation along PC8 and PC9 was found (S Figure 4 , panel D). Given better interpretability and robustness, we continued with the analysis of the protein group-level data. Download figure Open in new tab Figure 4: ITIH3 associations with markers of physical performance and CTG-repeat length. Association of the MS-quantified relative abundance levels of ITIH3 abundance with the 6MWT scores for the OPTIMISTIC samples (Panel A, colour coded by study visit) and external Canadian validation cohort (Panel B). Association of the ELISA-based quantification of the absolute ITIH3 abundance (mg/ml, log-scale) with the 6MWT scores (Panel C) and grip strength measures (left (Panel D) and right (Panel E) hand) in the external Canadian validation cohort samples. Association of the MS-quantified ITIH3 relative abundance levels with the CTG-repeat length in the OPTIMISTIC (red) and external Canadian validation (blue) cohorts (Panel F). Rho indicates Pearson’s correlation coefficient. Serum protein associations with CTG-repeat length confirm hypogammaglobulinemia in DM1 Out of the 259 protein groups discovered in the OPTIMISTIC serum samples, 161 were significantly associated with CTG-repeat length (FDR < 0.05; Figure 2 , panel A). This finding is in line with the results from the PCA. The validity of this result is further supported by the finding that no significant associations remained after randomization of the CTG-repeat length values. Twelve of the identified candidate protein biomarkers were matched with protein groups that were also significantly associated with CTG-repeat length in the external Canadian validation cohort (p < 0.05; Figure 2 , panel A; Table 3 ). Among these validated findings, the absolute strongest correlations with the CTG repeat were observed for immunoglobulin heavy constant gamma chains IGHG1, IGHG3 and IGHG4 ( Table 3 , Figures 2B and 2C). All of the significant and validated immunoglobulin-related protein groups were negatively associated with the CTG-repeat length, confirming hypogammaglobulinemia in DM1. Furthermore, based on the complete external validation dataset, the four immunoglobulin biomarkers with the strongest negative Pearson Rho value show a consistent trend of lower expression in DM1 patients compared to healthy controls (SFigure 5, panels A-D). In addition to the negative association between the CTG-repeat length and extent of the hypogammaglobulinemia, it is interesting to note that the CTG-repeat length was also positively associated with several protein groups of the complement system (C3, CFI, CFH, Table 3 ). Given the milder clinical phenotype in DM1 patients with an interruption of the CTG-repeat, we expected milder protein group-CTG repeat associations in these patients. However, similar associations with both immunoglobulins and complement factors were found for patients with a CTG repeat interruption (SFigure 6, panels A-F). View this table: View inline View popup Download powerpoint Table 3: Top significant and validated protein associations with CTG-repeat and 6MWT score Increased abundance of complement factors in DM1 patients with reduced functional capacity When studying the associations of protein groups with individual clinical outcome measures, a surprising pattern emerged. In line with the PCA findings, the majority of FDR corrected significant associations were linked to measures of functional capacity (6MWT, DM1ActivC, Actometry based activity; Table 4 ). Yet virtually no other associations were found with disease domains such as fatigue, social functioning, cognitive function, disease impact or apathy. The most significant associations were found for the 6MWT score (n=70; Table 4 ; Figure 3 , panel A), encompassing most of the hits found with DM1ActivC (45 out of 61), MeanENMO (29 out of 36) and M5ENMO (28 out of 32). The finding further supports the validity of the associations with the 6MWT scores, as only one significant association was found after randomizing the 6MWT scores. View this table: View inline View popup Download powerpoint Table 4: Numbers of significant protein group associations with clinical outcome measures Out of the 70 protein groups associated with the 6MWT, 18 were matched with protein groups that were also significant in the external Canadian validation cohort ( Table 3 , Figure 3 , panel A). Interestingly, the majority of these 18 validated hits were also significantly associated with DM1ActivC (n=17, excluding LBP), MeanENMO (n=17, excluding LBP) and M5ENMO (n=15, excluding LBP, SERPINA3 and HPT;HPR) in the OPTIMISTIC cohort. Among these, the two protein groups that show the strongest absolute correlation with the 6MWT are complement C3 and complement factor I, which exhibit an increase in abundance in patients with reduced functional capacity ( Table 3 ; Figure 3 , panels B-E). This increase is also observed for all other validated complement components or factors ( Table 3 ). The abundance of the top biomarker candidates (complement C3 and C5, complement factor I) also showed a trend of higher expression in DM1 patients compared to healthy controls (SFigure 5, panels E-G). In the OPTIMISTIC cohort, one of the significant findings with the 6MWT was ITIH3 (inter-alpha-trypsin inhibitor heavy chain 3), which has previously been linked to complement modulation and disease severity in Myasthenia Gravis (FDR < 0.001; Figure 4 , panel A) 45 . While this negative association with the 6MWT could neither be validated in the MS-nor ELISA-based protein quantification of the external Canadian cohort (resp. p=0.4, p= 0.21, Figure 4 , panels B, C), external significant negative associations were found for the ELISA-based protein quantification and grip strength of both the left and right hand (both p < 0.001; Figure 4 , panels D, E). Interestingly, ITIH3 showed a significant weak positive association with the CTG-repeat size in the OPTIMISTIC cohort (FDR = 0.014), which was, however, not validated in the MS-based protein quantification of the Canadian cohort (p = 0.58) ( Figure 4 , panel F). Based on the complete external validation cohort, no difference in ITIH3 expression was found between DM1 patients and healthy controls (SFigure 5, panel H). No effect of Cognitive Behavioural Therapy or Graded Exercise on the serum blood proteome Given the significant clinical effects of Cognitive Behavioural Therapy (CBT) on the ability to perform daily tasks and to participate in social activities in the OPTIMISTIC clinical trial, we investigated whether these benefits also translated to significant changes in the serum proteome of DM1 patients. However, only eight significant changes between the CBT and standard-of-care group were identified (Vitamin D-binding protein, Complement C5 and C9, Ceruloplasmin, Immunoglobulin heavy constant gamma 3 and mu, Inter-alpha-trypsin inhibitor heavy chain H4, Carboxypeptidase B2; SFigure 7, panels A-I). Upon closer inspection, none showed a convincing link with the CBT intervention, and most were associated with changes in the standard-of-care group over the 10-month study period. Since a subgroup of patients in the intervention group also completed a graded exercise program (GET), we also investigated whether proteome changes could be linked to this intervention. Yet only two proteins were significantly linked to GET (Cadherin-5, Fibronectin; SFigure 8, panel A). Upon closer inspection, these proteins did not show convincing differences between the study groups either (SFigure 8, panels B-C). Since samples were not evenly distributed across well plates for study timepoint (S Figure 3 , panel B), the random effect associated with the ‘well plate’ variable may have confounded this finding. However, applying the same statistical frameworks without the random plate effect did not yield additional findings for either CBT or GET. Combined set of proteins associated with both CTG-repeat length and 6MWT score We subsequently tested whether a set of protein groups would show stronger associations than individual biomarkers. Since the ideal set of biomarkers is strongly associated with both the disease pathology (disease relevant) and the clinical phenotype (clinically relevant), a machine learning based algorithm (bootstrap enhanced Elastic-Net regression) was implemented to find a minimum subset of proteins that together optimally explain the variance of both the CTG-repeat length (disease pathology) and 6MWT scores (clinical phenotype). As a starting point, we considered 47 protein groups that were significantly associated with both the CTG-repeat length (model 2) and the 6MWT score (model 3). We subsequently selected the 42 protein groups that were also unambiguously identified in the external Canadian validation dataset. In an effort to harmonize the distributional differences for the CTG-repeat length and 6MWT scores between the OPTIMISTIC and external Canadian validation dataset, these two dependent variables were box-cox transformed to more closely resemble a normal distribution, and subsequently standardized (SFigure1, panels D-F; S Figure 2 , panels D-F). For each of the 42 protein groups, a Variable Inclusion Probability (VIP) score was generated based on how frequently the protein group was selected as a predictor for both CTG-repeat and 6MWT score across 50,000 statistical model fits (10 imputed datasets * 5,000 bootstrap distributions). These VIP scores were purely based on the OPTIMISTIC baseline dataset, and their predictive value was subsequently evaluated on the OPTIMISTIC 10M (internal validation, test set) and the external Canadian validation cohort (external validation set) samples ( Table 5 ). View this table: View inline View popup Download powerpoint Table 5: Statistical performance estimates of multivariate predictions We observed that up to 32% (Adjusted R-squared in the OPTIMISTIC training set) of the variance in CTG-repeat length can be explained with a set of only 13 protein groups ( Table 5 ). When evaluating the performance of these protein groups in the internal and external validation data, we see a (slightly) lower amount of variance explained (respective R-squared values of 29 and 17%). Interestingly, the same set of 13 protein groups can also explain up to 28% of the variance in the 6MWT scores of the OPTIMISTIC training set (Adjusted R-squared), while also showing similar performance for the internal and external validation data (both R-squared values are 26%). The 13 protein groups, together with the regression coefficient estimates for both the CTG-repeat and 6MWT scores of the OPTIMISTIC baseline model are summarized in Table 6 . Univariate prediction models of the CTG-repeat and 6MWT with these 13 protein groups satisfied all assumptions of linear regression as assessed by the R-package gvlma after removal of one outlier sample identified through visual inspection of Cook’s distances 43 . View this table: View inline View popup Download powerpoint Table 6: Coefficient estimates of multivariate OPTIMISTIC baseline CTG-repeat and 6MWT prediction Discussion This study identified and externally validated a large set of candidate protein biomarkers in the serum of DM1 patients. Low-invasive biomarkers to monitor the response to treatments currently in development for DM1 are much needed, as they may demonstrate a faster and more homogeneous response compared to clinical outcome measures 46 . The serum appeared to be a rich source of low-invasive disease-relevant biomarkers. This is highlighted by the finding that the strongest driver of variance in protein group expression (PC1) was linked to the CTG-repeat length, and that 161 out of the 259 protein groups were significantly associated with the CTG-repeat in the OPTIMISTIC cohort. Most importantly, twelve of these significant associations were confirmed in an independent set of 56 Canadian cohort samples measured in a different laboratory on a different MS instrument. This much smaller validated set of protein groups is most likely the result of the substantially smaller cohort size, leading to a much lower statistical power to detect and validate weaker associations. The strongest association was found with a protein group matched with IGHG1;IGHG3;IGHG4, confirming the known hypogammaglobulinemia in DM1. Although hypogammaglobulinemia in DM1 has been known for a long time, its underlying mechanism remains poorly understood. Serum comparisons of DM1 patients versus healthy controls have shown that the differences predominantly affect total IgG, IgG1, and IgG3, with no differences observed for other immunoglobulins (IgA, IgM, IgG2 and IgG4) 15,17 . Our results demonstrate that hypogammaglobulinemia is worse in more severely affected patients with longer CTG-repeats, a finding which has been reported before but was not consistently found in all studies 15,17,47 . Rather than a deficient production, hypogammaglobulinemia in DM1 appears to be caused by a more rapid breakdown of immunoglobulins, with one study also suggesting the possibility of extravascular redistribution due to increased capillary permeability 17,48,49 . While the exact mechanism of this disease-relevant finding remains unknown, the confirmed hypogammaglobulinemia strongly supports the validity of the untargeted MS-based approach used in this study for biomarker discovery. A novel finding of our biomarker study is the elevation of protein groups associated with several components of the complement system in DM1 patients. Not only was this elevation positively correlated with the CTG-repeat length, but also with clinical scores reflecting reduced functional capacity. Moreover, the top immunoglobulins and complement components or factors both exhibited consistent patterns of respective down- and up-regulation in DM1 patients compared to healthy controls. While consistent, these findings must be interpreted with caution, given the small and heterogeneous cohort of the control group, particularly regarding age. The implemented MS workflow can only measure the abundance of complement proteins but cannot distinguish between unactivated (intact) and activated (cleaved) complement components. Complement activation markers arise from proteolytic cleavage, but the resulting peptides are indistinguishable from those of unactivated proteins, since the trypsin enzyme used in sample preparation cleaves at the same sites. Therefore, higher levels of complement proteins do not necessarily indicate increased complement activity but could also reflect increased hepatic production or reduced consumption of complement proteins. In addition to central complement components (C3, C5), several important regulators of the complement system, such as Complement Factor I and H, show a similar increase in abundance. In case of complement activation, this would suggest a controlled form of chronic inflammation. Complement activation is present in many chronic conditions such as type 2 diabetes, obesity and autoimmunity, all known comorbidities for DM1 50,51 . We can only speculate on the exact causes of the potential complement activation in DM1. It may be due to a more general pro-inflammatory status in DM1 patients 52,53 . On the other hand, reduced serum levels of interleukin 6 (IL-6), a key regulator of the acute phase response and hepatic complement protein synthesis, have been previously shown to significantly correlate with muscle weakness and functional capacity limitations in DM1 54 . Further supporting the association with muscle pathology, the complement system is known to contribute to both fibrotic tissue remodelling and muscle fibre necrosis, and monitoring muscle fibrosis through serum periostin has recently been proposed as a novel stratification biomarker for DM1 55–57 . Another notable finding was the significant negative association between ITIH3 levels and 6MWT performance in the OPTIMISTIC cohort, which was, however, only partially and indirectly validated by significant negative ELISA-based correlations between ITIH3 and grip strength. ITIH3 has recently been identified as a potential biomarker for disease activity in Myasthenia Gravis (MG) 45 . Given the proposed role of ITIH3 in the early stages of complement activation, it has been hypothesized that elevated levels may be the result of an enhanced negative feedback loop in response to dysregulated complement activation. Considering the broad biological functions and disease associations of ITIHs, the authors further concluded that it is not a disease-specific marker, but rather has a potential use in already diagnosed patients to monitor disease activity. Our study in DM1 patients supports this conclusion. Furthermore, in addition to MG, dysregulation of the complement system has also been associated with other neuromuscular disorders such as Facioscapulohumeral muscular dystrophy (FSHD), Amyotrophic Lateral Sclerosis (ALS), and Duchenne Muscular Dystrophy (DMD) 58–60 . Even more so, modulators of the complement system are an active field of study and may provide valuable disease-modifying treatments within the broader neuromuscular field 59,60 . Given the independent associations of the complement proteins with markers for functional capacity, one may also hypothesize that these changes are not exclusively disease associated. Also within the healthy populations, clear associations are described between higher fitness levels and decreased C3 levels in blood, as well as increased C1q levels with reduced muscle strength 61 . As a consequence, the recently demonstrated positive clinical effects of exercise in DM1 may also act as an independent disease modifier by attenuating complement activation 62–64 . Although some patients in the OPTIMISTIC intervention group also participated in a graded exercise therapy (GET) program, this unexpectedly had no significant effect on the serum proteome. This GET program was delivered to only a small number of patients in a highly individualized manner and did not yield clinical benefits beyond those achieved with cognitive behavioural therapy (CBT) alone 8 . We therefore hypothesize that a more standardized training program in a larger cohort is necessary to detect meaningful clinical improvement that is linked to complement modulation. Despite the large number of protein groups being associated with markers of functional capacity, virtually no associations were found with other important DM1 disease-relevant domains such as apathy, cognition, fatigue, pain or social measures. While the blood-brain-barrier may prevent the detection of specific disease-associated proteins from the central nervous system (CNS) in serum, another well-studied CNS-derived biomarker, Neurofilament Light chain, is detectable in blood 65 . More sensitive and targeted detection methods quantifying absolute protein abundance may be necessary to identify molecular biomarkers associated with neurocognitive phenotypes. Considering the significant improvements of the DM1-Activ-C and the 6MWT scores in the OPTIMISTIC study, in combination with the abundant number of significant associations between protein groups and these outcome measures, it was surprising that no CBT-induced effects were observed in the blood proteome 8 . This was particularly surprising given that a very large number of genes were associated with the average intervention response in our previous transcriptomic study 14 . A confounder in the proteomic study was the uneven distribution of the OPTIMISTIC samples across the MS plates with regard to the study time point. Since this MS plate effect was regressed out, possible CBT-induced effects might have also been masked. Yet, peptide-level quantification seemed to suffer more from technical plate effects than protein-level quantification, and a supplemental analysis without regressing out the well plate effect did not reveal significant associations either. On the other hand, the general lack of overlap in identified biomarkers between our transcriptomic and proteomic study is likely the result of several biological and technical reasons. On a biological level, many serum proteins are either produced in the liver or secreted by various tissues and organs, whereas blood-based RNA-seq mostly reflects the leukocyte transcriptome. On a technical level, many of the mRNA biomarkers code for low-abundant proteins like cytokines, which are not detectable by our current MS-based methods. Given that the significant associations with both the CTG-repeat and the 6MWT were individually relatively weak, we hypothesized that the clinical utility of these candidate biomarkers could be improved by finding a minimum combined subset of protein groups. Moreover, by combining multiple proteins, random variation in individual expression levels is more likely to average out, making the biomarker set a more robust indicator of disease state than individual proteins. The implemented bootstrap-enhanced Elastic-Net algorithm has robustly led to the identification of 13 protein groups that together can explain up to 32% and 28% of the variance of the CTG-repeat and 6MWT, respectively, while also performing comparatively well on the internal validation data. It is crucial to interpret the internal validation results with caution, as true independence is not achieved because many measurements originate from the same patients at different time points. Yet, even for the external validation, up to 17% and 26% of the CTG-repeat and 6MWT variance, respectively, were explained with our OPTIMISTIC baseline model. Given the inherent variability in protein expression, as well as for the CTG-repeat and 6MWT measurements, the amount of variance explained is in line with the expectations and supports the use of a combined set of proteins over individual proteins for therapeutic biomarker discovery. To establish their utility in clinical trials, further research is needed on longitudinal and potentially non-linear relationships between changes in the blood proteome and clinical outcomes. Conclusions Our study extends the repertoire of lab-based biomarkers, including mRNA biomarkers and protein biomarkers, for potential use as surrogate endpoints in DM1 trials 14,65,66 . We have performed careful internal and external validation to confirm the robustness of the identified protein biomarkers. Going beyond individual protein associations, we demonstrated that a set of proteins is most likely to meet the statistical criteria required for surrogate clinical trial endpoints. Further longitudinal studies are needed to validate these findings, and methods that enable the absolute quantification of selected proteins are essential to advance their clinical utility. Data Availability Researchers interested in accessing the phenotype data from the OPTIMISTIC study are invited to contact K. Mul (karlien.mul-at-radboudumc.nl) and complete a data access agreement. All requests will be reviewed by a panel comprising from each of the four participating clinical sites, with K. Mul serving as chair. The mass spectrometry proteomics data of the OPTIMISITIC samples have been deposited to the ProteomeXchange Consortium via the PRIDE partner repository with the dataset identifier PXD067476. The phenotype data used and/or analysed from the Canadian cohort are available from the corresponding author upon reasonable request following the proper evaluation of the research protocol by the Ethics Review Board of the Centre intégré universitaire de santé et de services sociaux du Saguenay - Lac-St-Jean (Saguenay, Québec, Canada; cynthia.gagnon4 -at- usherbrooke.ca). The phenotype data used from the German cohort, including the ELISA-based quantification data of ITIH3, are stored at the Department of Neurology of the University Hospital Duesseldorf (Heinrich Heine University) and are available upon request to roos -at andreas-roos.de. The mass spectrometry proteomics data of both the Canadian and German cohort have been deposited to the ProteomeXchange Consortium via the PRIDE partner repository with the dataset identifier PXD060035. Author’s contributions D. van As: Methodology, Validation, Software, Data Curation, Formal analysis, Writing – Original Draft, Writing – Review & Editing, Visualization. T. Caeys, R. Salz, D. Van Haver, S. Dufour, A. van Deelen, J. Gloerich, R. Gabriels, P.J. Volders, A. van Gool, F. Impens, and L. Martens Investigation, Resources, Data Curation, Writing – Review & Editing. V. Dobelmann, A. Gangfuss, T. Ruck, G. Gourdon, E. Duchesne, C. Gagnon, A. Roos Investigation, Resources, Writing – Review & Editing, Validation. H. Lochmüller, B. Schoser, G. Bassez, B.G.M. van Engelen Conceptualization, Writing – Review & Editing. P.A.C. ‘t Hoen Conceptualization, Methodology, Writing – Original Draft, Writing – Review & Editing, Supervision, Project administration, Funding acquisition. Statements and Declarations Not applicable. Ethical considerations & Consent to participate (also mentioned in methods) OPTIMISTIC clinical trial samples The OPTIMISTIC clinical trial ( NCT02118779 ) was conducted in accordance with the Declaration of Helsinki and approved by the medical-ethical scientific committee for human research at each of the four participating clinical centres. Prior to the trial, all enrolled patients provided written informed consent, which included the usage of the pseudonymized blood samples for the research purposes of this study. Ethical approval for mass spectrometry-based proteomics profiling of the serum samples was obtained from the Ethics Committee of Ghent University Hospital (B670201940027). For more specific methodological details of the clinical trial, including trial protocols and an overview of all (patient-reported) outcome measures, please refer to the published trial protocol and the main study publication 8,18 . Canadian cohort samples The study was conducted at the Saguenay Neuromuscular Clinic and was approved by the Ethics Review Board of the Centre Intégré Universitaire de Santé et Services Sociaux du Saguenay–Lac-St-Jean (Chicoutimi, Québec, Canada; #2010-046). Written informed consent was obtained from all participants including biomarkers studies. German cohort samples All patients and/or caregivers, as well as healthy donors, gave written consent to donate blood samples for research-driven biomarker studies. The local ethical committee approved biomarker studies on neuromuscular patients and controls (19-9011-BO). Conflict of interest The authors declared no potential conflicts of interest with respect to the research, authorship, and/or publication of this article. Funding This research was partly funded by the European Union’s Horizon 2020 research and innovation program “ERA-NET rare disease research implementing IRDiRC objectives - N° 643578” via the Dutch research funding agency ZON-MW, through the E-Rare Joint Transnational Call JTC 2018 “Translational Research Projects on Rare Diseases” (ReCognitION project: Recognition and validation of druggable targets from the response to Cognitive Behavior Therapy in Myotonic Dystrophy type 1 patients from integrated -omics networks). This study was also partially funded by the European Union Seventh Framework Program, under grant agreement no. 305697 (the Observational Prolonged Trial In Myotonic dystrophy type 1 to Improve Quality of Life Standards, a Target Identification Collaboration [OPTIMISTIC] project) and by a Dutch Research Council (NWO) grant to the Netherlands X-omics Initiative (project 184.034.019). ED is supported by a Chercheur-boursier Junior 1 salary award from the Fonds de recherche du Québec-santé (FRQS-311186). AR received funding from the German Society of Muscular Diseases (DGM). TR, AR and VD acknowledge the financial support of the Myositis-Netz. HL receives support from the Canadian Institutes of Health Research (CIHR) for Foundation Grant FDN-167281 (Precision Health for Neuromuscular Diseases), Transnational Team Grant ERT-174211 (ProDGNE) and Network Grant OR2-189333 (NMD4C), from the Canada Foundation for Innovation (CFI-JELF 38412), the Canada Research Chairs program (Canada Research Chair in Neuromuscular Genomics and Health, 950-232279), the European Commission (Grant # 101080249) and the Canada Research Coordinating Committee New Frontiers in Research Fund (NFRFG-2022-00033) for SIMPATHIC, and from the Government of Canada Canada First Research Excellence Fund (CFREF) for the Brain-Heart Interconnectome (CFREF-2022-00007). Data availability (also mentioned in methods) OPTMISTIC clinical trial Researchers interested in accessing the phenotype data from the OPTIMISTIC study are invited to contact K. Mul ( karlien.mul{at}radboudumc.nl ) and complete a data access agreement. All requests will be reviewed by a panel comprising from each of the four participating clinical sites, with K. Mul serving as chair 8 . The mass spectrometry proteomics data of the OPTIMISITIC samples have been deposited to the ProteomeXchange Consortium via the PRIDE partner repository with the dataset identifier PXD067476 24 . External cohorts The phenotype data used and/or analysed from the Canadian cohort are available from the corresponding author upon reasonable request following the proper evaluation of the research protocol by the Ethics Review Board of the Centre intégré universitaire de santé et de services sociaux du Saguenay–Lac-St-Jean (Saguenay, Québec, Canada; cynthia.gagnon4{at}usherbrooke.ca ). The phenotype data used from the German cohort, including the ELISA-based quantification data of ITIH3, are stored at the Department of Neurology of the University Hospital Düsseldorf (Heinrich Heine University) and are available upon request to roos{at}andreas-roos.de.The mass spectrometry proteomics data of both the Canadian and German cohort have been deposited to the ProteomeXchange Consortium via the PRIDE partner repository with the dataset identifier PXD060035 24 . Results and Code availability For both studies, the full list of significant protein group associations with the CTG-repeat and 6MWT scores, as well as the table containing all Variable Inclusion Probabilities, will be made available on GitHub after publication. Additionally, all R scripts used in this work are available via https://github.com/cmbi/DM1_ReCognitION_Proteomics Acknowledgements We are grateful to the John Walton Muscular Dystrophy Research Centre Biobank, and in particular to Dan Cox, for the assistance in the storage and delivery of the patient samples used in this study. We would also like to thank dr. Dick Thijssen, Professor of Cardiovascular Physiology at the Radboudumc, for critically reviewing the manuscript. OPTIMISTIC consortium: K. Okkersen, C. Jimenez-Moreno, S. Wenninger, F. Daidj, J. C. Glennon, S. Cumming, R. Littleford, D.G. Monckton, H. Lochmüller, M. Catt, C.G. Faber, A. Hapca, P.T. Donnan, G. Gorman, G. Bassez, B. Schoser, H. Knoop, S. Treweek, B.G.M. van Engelen. ReCognitION consortium: D. van As, P.A. C. ‘t Hoen, D. G. Wansink, F. Impens, R. Gabriels, T. Claeys, A. Ravel-Chapuis, B.J. Jasmin, J.C. Glennon, N. Mahon, S. Nieuwenhuis, L. Martens, P. Novak, D. Furling, B.G.M. van Engelen, M. Catt, A. Baak, G. Gourdon, A. MacKenzie, C. Martinat, N. Neault, A. Roos, E. Duchesne, R. Salz, R. Thompson, S. Baghdoyan, A.M. Varghese, P. Blom, S. Spendiff, A. Manta. References 1. ↵ Johnson NE , Butterfield RJ , Mayne K , et al. Population-Based Prevalence of Myotonic Dystrophy Type 1 Using Genetic Analysis of Statewide Blood Screening Program . Neurology 2021 ; 96 : e1045 – e1053 . OpenUrl CrossRef PubMed 2. ↵ Yotova V , Labuda D , Zietkiewicz E , et al. Anatomy of a founder effect: myotonic dystrophy in Northeastern Quebec . Hum Genet 2005 ; 117 : 177 – 187 . OpenUrl CrossRef PubMed Web of Science 3. ↵ Brook JD , McCurrach ME , Harley HG , et al. Molecular basis of myotonic dystrophy: Expansion of a trinucleotide (CTG) repeat at the 3′ end of a transcript encoding a protein kinase family member . Cell 1992 ; 68 : 799 – 808 . OpenUrl CrossRef PubMed Web of Science 4. ↵ Mahadevan M , Tsilfidis C , Sabourin L , et al. Myotonic dystrophy mutation: An unstable CTG repeat in the 3′ untranslated region of the gene . Science 1992 ; 255 : 1253 – 1255 . OpenUrl Abstract / FREE Full Text 5. ↵ Fu YH , Pizzuti A , Fenwick RG , et al. An unstable triplet repeat in a gene related to myotonic muscular dystrophy . Science 1992 ; 255 : 1256 – 1258 . OpenUrl Abstract / FREE Full Text 6. ↵ Cumming SA , Jimenez-Moreno C , Okkersen K , et al. Genetic determinants of disease severity in the myotonic dystrophy type 1 OPTIMISTIC cohort . Neurology 2019 ; 93 : e995 LP-e1009. OpenUrl CrossRef PubMed 7. ↵ Cumming SA , Hamilton MJ , Robb Y , et al. De novo repeat interruptions are associated with reduced somatic instability and mild or absent clinical features in myotonic dystrophy type 1 . Eur J Hum Genet 2018 ; 26 : 1635 – 1647 . OpenUrl CrossRef PubMed 8. ↵ Okkersen K , Jimenez-Moreno C , Wenninger S , et al. Cognitive behavioural therapy with optional graded exercise therapy in patients with severe fatigue with myotonic dystrophy type 1: a multicentre, single-blind, randomised trial . Lancet Neurol 2018 ; 17 : 671 – 680 . OpenUrl PubMed 9. van As D , Okkersen K , Bassez G , et al. Clinical Outcome Evaluations and CBT Response Prediction in Myotonic Dystrophy . J Neuromuscul Dis 2021 ; 1 – 16 . 10. Signorelli M , Ebrahimpoor M , Veth O , et al. Peripheral blood transcriptome profiling enables monitoring disease progression in dystrophic mice and patients . EMBO Mol Med 2021 ; 13 : e13328 . OpenUrl CrossRef PubMed 11. Byrne LM , Rodrigues FB , Blennow K , et al. Neurofilament light protein in blood as a potential biomarker of neurodegeneration in Huntington’s disease: a retrospective cohort analysis . Lancet Neurol 2017 ; 16 : 601 . OpenUrl CrossRef PubMed 12. Spijker S , Van Zanten JS , De Jong S , et al. Stimulated gene expression profiles as a blood marker of major depressive disorder . Biol Psychiatry 2010 ; 68 : 179 – 186 . OpenUrl CrossRef PubMed 13. Sznajder ŁJ , Scotti MM , Shin J , et al. Loss of MBNL1 induces RNA misprocessing in the thymus and peripheral blood . Nat Commun 2020 ; 11 : 2022 . 14. van Cruchten RTP , van As D , Glennon JC , et al. Clinical improvement of DM1 patients reflected by reversal of disease-induced gene expression in blood . BMC Med 2022 ; 20 : 1 – 17 . OpenUrl CrossRef PubMed 15. Kaminsky P , Lesesve JF , Jonveaux P , et al. IgG deficiency and expansion of CTG repeats in myotonic dystrophy . Clin Neurol Neurosurg 2011 ; 113 : 464 – 468 . OpenUrl PubMed 16. Larsen B , Johnson G , van Loghem E , et al. Immunoglobulin concentration and Gm allotypes in a family with thirty-three cases of myotonic dystrophy . Clin Genet 1980 ; 18 : 13 – 19 . OpenUrl PubMed 17. Sasson SC , Corbett A , McLachlan AJ , et al. Enhanced serum immunoglobulin G clearance in myotonic dystrophy-associated hypogammaglobulinemia: a case series and review of the literature . J Med Case Rep ; 13 . Epub ahead of print 20 November 2019 . DOI: 10.1186/S13256-019-2285-3 . OpenUrl CrossRef 18. van Engelen B , Abghari S , Aschrafi A , et al. Cognitive behaviour therapy plus aerobic exercise training to increase activity in patients with myotonic dystrophy type 1 (DM1) compared to usual care (OPTIMISTIC): Study protocol for randomised controlled trial . Trials ; 16 . Epub ahead of print 23 May 2015 . DOI: 10.1186/s13063-015-0737-7 . OpenUrl CrossRef 19. Reza M , Cox D , Phillips L , et al. MRC Centre Neuromuscular Biobank (Newcastle and London): Supporting and facilitating rare and neuromuscular disease research worldwide . Neuromuscul Disord 2017 ; 27 : 1054 – 1064 . OpenUrl CrossRef PubMed 20. Monckton DG , Wong L-JC , Ashizawa T , et al. Somatic mosaicism, germline expansions, germline reversions and intergenerational reductions in myotonic dystrophy males: small pool PCR analyses . Hum Mol Genet 1995 ; 4 : 1 – 8 . OpenUrl CrossRef PubMed Web of Science 21. Morales F , Couto JM , Higham CF , et al. Somatic instability of the expanded CTG triplet repeat in myotonic dystrophy type 1 is a heritable quantitative trait and modifier of disease severity . Hum Mol Genet 2012 ; 21 : 3558 – 3567 . OpenUrl CrossRef PubMed Web of Science 22. Demichev V , Messner CB , Vernardis SI , et al. DIA-NN: Neural networks and interference correction enable deep proteome coverage in high throughput . Nat Methods 2020 ; 17 : 41 . OpenUrl CrossRef PubMed 23. Demichev V , Szyrwiel L , Yu F , et al. dia-PASEF data analysis using FragPipe and DIA-NN for deep proteomics of low sample amounts . Nat Commun 2022 131 2022 ; 13 : 1 – 8 . OpenUrl CrossRef PubMed 24. Perez-Riverol Y , Bandla C , Kundu DJ , et al. The PRIDE database at 20 years: 2025 update . Nucleic Acids Res 2025 ; 53 : D543 – D553 . OpenUrl CrossRef PubMed 25. R Core Team . R: A Language and Environment for Statistical Computing . 26. RStudio Team . RStudio: Integrated Development Environment for R . 27. Valero-Mora PM . ggplot2: Elegant Graphics for Data Analysis . J Stat Softw ; 35 . Epub ahead of print 2010 . DOI: 10.18637/jss.v035.b01 . OpenUrl CrossRef 28. Wilke CO . cowplot: Streamlined Plot Theme and Plot Annotations for ‘ggplot2’ . 29. Consortium TU , Bateman A , Martin M-J , et al. UniProt: the Universal Protein Knowledgebase in 2025 . Nucleic Acids Res 2025 ; 53 : D609 – D617 . OpenUrl CrossRef PubMed 30. Zhu Y , Orre LM , Tran YZ , et al. DEqMS: A Method for Accurate Variance Estimation in Differential Protein Expression Analysis . Mol Cell Proteomics 2020 ; 19 : 1047 . OpenUrl Abstract / FREE Full Text 31. Bates D , Mächler M , Bolker B , et al. Fitting Linear Mixed-Effects Models Using lme4 . J Stat Software ; Vol 1, Issue 1 . Epub ahead of print 2015 . DOI: 10.18637/jss.v067.i01 . OpenUrl CrossRef PubMed 32. Barton K . MuMIn: Multi-Model Inference . R package version 1.46.0. 33. Kassambara A. ggcorrplot: Visualization of a Correlation Matrix using ‘ggplot2’ . 34. Kuznetsova A , Brockhoff PB , Christensen RHB . lmerTest Package: Tests in Linear Mixed Effects Models . J Stat Software ; Vol 1 , Issue 13 . Epub ahead of print 2017 . DOI: 10.18637/jss.v082.i13 . OpenUrl CrossRef 35. Kassambara A. ggpubr: ‘ggplot2’ Based Publication Ready Plots . 36. van Buuren S , Groothuis-Oudshoorn K. mice: Multivariate imputation by chained equations in R . J Stat Softw 2011 ; 45 : 1 – 67 . OpenUrl CrossRef 37. Audigier V , Resche-Rigon M. micemd: Multiple Imputation by Chained Equations with Multilevel Data . 38. Venables W , Ripley BD . Modern Applied Statistics with S. Fourth Edition . Fourth Edi. New York : Springer-Verlag New York , 2002 . 39. Canty A , Ripley B. boot: Bootstrap R (S-Plus) Functions . 40. Friedman J , Hastie T , Tibshirani R . Regularization paths for generalized linear models via coordinate descent . J Stat Softw 2010 ; 33 : 1 – 22 . OpenUrl CrossRef PubMed Web of Science 41. Abram S V. , Helwig NE , Moodie CA , et al. Bootstrap enhanced penalized regression for variable selection with neuroimaging data . Front Neurosci ; 10 . Epub ahead of print 2016 . DOI: 10.3389/fnins.2016.00344 . OpenUrl CrossRef 42. Bunea F , She Y , Ombao H , et al. Penalized least squares regression methods and applications to neuroimaging . Neuroimage 2011 ; 55 : 1519 – 1527 . OpenUrl CrossRef PubMed Web of Science 43. Peña EA , Slate EH . Global validation of linear model assumptions . Journal of the American Statistical Association 2006 ; 101 : 341 – 354 . OpenUrl CrossRef PubMed 44. Ramsey JM , Schwarz E , Guest PC , et al. Molecular Sex Differences in Human Serum . PLoS One 2012 ; 7 : e51504 . OpenUrl CrossRef PubMed 45. Schroeter CB , Nelke C , Stascheit F , et al. Inter-alpha-trypsin inhibitor heavy chain H3 is a potential biomarker for disease activity in myasthenia gravis . Acta Neuropathol ; 147 . Epub ahead of print 1 June 2024 . DOI: 10.1007/S00401-024-02754-6 . OpenUrl CrossRef 46. Pascual-Gilabert M , Artero R , López-Castel A . The myotonic dystrophy type 1 drug development pipeline: 2022 edition . Drug Discov Today 2023 ; 28 : 103489 . OpenUrl PubMed 47. Pan-Hammarström Q , Wen S , Ghanaat-Pour H , et al. Lack of correlation between the reduction of serum immunoglobulin concentration and the CTG repeat expansion in patients with type 1 Dystrofia Myotonica . J Neuroimmunol 2003 ; 144 : 100 – 104 . OpenUrl CrossRef PubMed 48. Wochner RD , Drews G , Strober W , et al. Accelerated breakdown of immunoglobulin G (IgG) in myotonic dystrophy: a hereditary error of immunoglobulin catabolism . J Clin Invest 1966 ; 45 : 321 . OpenUrl CrossRef PubMed Web of Science 49. Suzumura A , Yamada H , Matsuoka Y , et al. Immunoglobulin abnormalities in patients with myotonic dystrophy . Acta Neurol Scand 1986 ; 74 : 132 – 139 . OpenUrl CrossRef PubMed 50. Shim K , Begum R , Yang C , et al. Complement activation in obesity, insulin resistance, and type 2 diabetes mellitus . World J Diabetes 2020 ; 11 : 1 . 51. Thurman JM , Frazer-Abel A , Holers VM . The Evolving Landscape for Complement Therapeutics in Rheumatic and Autoimmune Diseases . Arthritis Rheumatol 2017 ; 69 : 2102 – 2113 . OpenUrl PubMed 52. Nieuwenhuis S , Widomska J , Blom P , et al. Blood Transcriptome Profiling Links Immunity to Disease Severity in Myotonic Dystrophy Type 1 (DM1) . Int J Mol Sci ; 23 . Epub ahead of print 1 March 2022 . DOI: 10.3390/IJMS23063081 ,. OpenUrl CrossRef 53. Ozimski LL , Sabater-Arcis M , Bargiela A , et al. The hallmarks of myotonic dystrophy type 1 muscle dysfunction . Biol Rev 2021 ; 96 : 716 – 730 . OpenUrl 54. Conte TC , Duran-Bishop G , Orfi Z , et al. Clearance of defective muscle stem cells by senolytics restores myogenesis in myotonic dystrophy type 1 . Nat Commun 2023 ; 14 : 4033 . OpenUrl CrossRef PubMed 55. Llorián-Salvador M , Byrne EM , Szczepan M , et al. Complement activation contributes to subretinal fibrosis through the induction of epithelial-to-mesenchymal transition (EMT) in retinal pigment epithelial cells . J Neuroinflammation 2022 ; 19 : 182 . OpenUrl CrossRef PubMed 56. Engel AG , Biesecker G . Complement activation in muscle fiber necrosis: demonstration of the membrane attack complex of complement in necrotic fibers . Ann Neurol 1982 ; 12 : 289 – 296 . OpenUrl CrossRef PubMed Web of Science 57. Nguyen CDL , Jimenez-Moreno AC , Merker M , et al. Periostin as a blood biomarker of muscle cell fibrosis, cardiomyopathy and disease severity in myotonic dystrophy type 1 . J Neurol 2023 ; 270 : 3138 – 3158 . OpenUrl PubMed 58. Wong CJ , Wang L , Holers VM , et al. Elevated plasma complement components in facioscapulohumeral dystrophy . Hum Mol Genet 2022 ; 31 : 1821 – 1829 . OpenUrl CrossRef PubMed 59. Dalakas MC , Alexopoulos H , Spaeth PJ . Complement in neurological disorders and emerging complement-targeted therapeutics . Nat Rev Neurol 2020 ; 16 : 601 – 617 . OpenUrl PubMed 60. Lee JD , Woodruff TM . The emerging role of complement in neuromuscular disorders . Semin Immunopathol 2021 ; 43 : 817 – 828 . OpenUrl 61. Rothschild-Rodriguez D , Causer AJ , Brown FF , et al. The effects of exercise on complement system proteins in humans: a systematic scoping review . Exerc Immunol Rev 2022 ; 28 : 1 – 35 . OpenUrl PubMed 62. Girard-Côté L , Gallais B , Gagnon C , et al. Resistance training in women with myotonic dystrophy type 1: a multisystemic therapeutic avenue . Neuromuscul Disord 2024 ; 40 : 38 – 51 . OpenUrl PubMed 63. Di Leo V , Lawless C , Roussel MP , et al. Resistance Exercise Training Rescues Mitochondrial Dysfunction in Skeletal Muscle of Patients with Myotonic Dystrophy Type 1 . J Neuromuscul Dis 2023 ; 10 : 1111 – 1126 . OpenUrl PubMed 64. Mikhail AI , Nagy PL , Manta K , et al. Aerobic exercise elicits clinical adaptations in myotonic dystrophy type 1 patients independently of pathophysiological changes . J Clin Invest ; 132 . Epub ahead of print 16 May 2022 . DOI: 10.1172/JCI156125 . OpenUrl CrossRef 65. Nicoletti TF , Rossi S , Vita MG , et al. Elevated serum Neurofilament Light chain (NfL) as a potential biomarker of neurological involvement in Myotonic Dystrophy type 1 (DM1) . J Neurol 2022 ; 269 : 5085 – 5092 . OpenUrl PubMed 66. Heatwole CR , Miller J , Martens B , et al. Laboratory abnormalities in ambulatory patients with myotonic dystrophy type 1 . Arch Neurol 2006 ; 63 : 1149 – 1153 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted September 08, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Large-scale Proteomics Profiling of Peripheral Blood of DM1 patients identifies biomarkers for disease severity and functional capacity Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Large-scale Proteomics Profiling of Peripheral Blood of DM1 patients identifies biomarkers for disease severity and functional capacity Daniël van As , Tine Claeys , Renee Salz , Delphi Van Haver , Sara Dufour , Amber van Deelen , Jolein Gloerich , Ralf Gabriels , Pieter Jan Volders , Vera Dobelmann , Andrea Gangfuss , Tobias Ruck , Genevieve Gourdon , Elise Duchesne , Cynthia Gagnon , Andreas Roos , Alain van Gool , Francis Impens , Lennart Martens , Hanns Lochmüller , Benedikt Schoser , Guillaume Bassez , Baziel G.M. van Engelen , Peter A.C. ’t Hoen medRxiv 2025.09.05.25335077; doi: https://doi.org/10.1101/2025.09.05.25335077 Share This Article: Copy Citation Tools Large-scale Proteomics Profiling of Peripheral Blood of DM1 patients identifies biomarkers for disease severity and functional capacity Daniël van As , Tine Claeys , Renee Salz , Delphi Van Haver , Sara Dufour , Amber van Deelen , Jolein Gloerich , Ralf Gabriels , Pieter Jan Volders , Vera Dobelmann , Andrea Gangfuss , Tobias Ruck , Genevieve Gourdon , Elise Duchesne , Cynthia Gagnon , Andreas Roos , Alain van Gool , Francis Impens , Lennart Martens , Hanns Lochmüller , Benedikt Schoser , Guillaume Bassez , Baziel G.M. van Engelen , Peter A.C. ’t Hoen medRxiv 2025.09.05.25335077; doi: https://doi.org/10.1101/2025.09.05.25335077 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Neurology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4428) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1508) Epidemiology (15225) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6592) Geriatric Medicine (667) Health Economics (997) Health Informatics (4530) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15914) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6593) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (971) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (129) Pathology (663) Pediatrics (1692) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5441) Public and Global Health (9223) Radiology and Imaging (2197) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9fffe97b2ef60db4',t:'MTc3OTQ5NTgwNw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-21T05:10:58.409756+00:00

License: CC-BY-4.0