Accurate classification of CNS tumors through DNA methylation data analysis of select genomic regions

doi:10.1101/2025.10.07.25337348

Accurate classification of CNS tumors through DNA methylation data analysis of select genomic regions

2025 · doi:10.1101/2025.10.07.25337348

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 86,595 characters · extracted from preprint-html · click to expand

Accurate classification of CNS tumors through DNA methylation data analysis of select genomic regions | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Accurate classification of CNS tumors through DNA methylation data analysis of select genomic regions Elaheh Moradi , Juuso Vuorinen , Alejandra Rodriguez-Martinez , Meeri Pekkarinen , Miina Vulli , Suvi Lehtipuro , Vidal Fey , Francesco Tabaro , Anja Hartewig , Sinikka Ampuja , Andries De Koker , Ruben Van Paemel , Bram De Wilde , Nico Callewaert , Milla E.L. Kuusisto , Hanna-Riikka Teppo , Outi Kuittinen , Kristiina Nordfors , Hannu Haapasalo , Joonas Haapasalo , Matti Nykter , Juha Kesseli , Kirsi J. Rautajoki doi: https://doi.org/10.1101/2025.10.07.25337348 Elaheh Moradi 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland 2 A.I. Virtanen Institute for Molecular Sciences, University of Eastern Finland , Kuopio 70150, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Juuso Vuorinen 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alejandra Rodriguez-Martinez 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Meeri Pekkarinen 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Miina Vulli 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Suvi Lehtipuro 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Vidal Fey 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Francesco Tabaro 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Anja Hartewig 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sinikka Ampuja 16 Fimlab Laboratories ltd., Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Andries De Koker 3 Center for Medical Biotechnology, VIB , Ghent, Belgium 4 Department of Biochemistry and Microbiology, Ghent University , Ghent, Belgium Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ruben Van Paemel 5 Cancer Research Institute Ghent (CRIG) , Ghent, Belgium 6 Department of Pediatric Hematology, Oncology and Stem Cell Transplantation, Ghent University Hospital , Ghent, Belgium Find this author on Google Scholar Find this author on PubMed Search for this author on this site Bram De Wilde 5 Cancer Research Institute Ghent (CRIG) , Ghent, Belgium 6 Department of Pediatric Hematology, Oncology and Stem Cell Transplantation, Ghent University Hospital , Ghent, Belgium Find this author on Google Scholar Find this author on PubMed Search for this author on this site Nico Callewaert 3 Center for Medical Biotechnology, VIB , Ghent, Belgium 4 Department of Biochemistry and Microbiology, Ghent University , Ghent, Belgium Find this author on Google Scholar Find this author on PubMed Search for this author on this site Milla E.L. Kuusisto 7 Cancer Research and Translational Medicine Research Unit, University of Oulu , Oulu, Finland 8 Medical Research Center, Oulu University Hospital , Oulu, Finland 9 Department of Hematology, Länsi-Pohja Central Hospital , Kemi, Finland 10 Department of Oncology and Radiotherapy, Oulu University Hospital , Oulu, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hanna-Riikka Teppo 7 Cancer Research and Translational Medicine Research Unit, University of Oulu , Oulu, Finland 8 Medical Research Center, Oulu University Hospital , Oulu, Finland 11 Department of Pathology, Oulu University Hospital , Oulu, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Outi Kuittinen 10 Department of Oncology and Radiotherapy, Oulu University Hospital , Oulu, Finland 12 Faculty of Health Sciences, Institute of Clinical Medicine, University of Eastern Finland , Kuopio, Finland 13 Center of Oncology, Kuopio University Hospital , Kuopio, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kristiina Nordfors 14 Department of Pediatric Hematology and Oncology, Tampere University Hospital , Tampere Finland 15 Tays sCancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hannu Haapasalo 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland 16 Fimlab Laboratories ltd., Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Joonas Haapasalo 15 Tays sCancer Center, Tampere University Hospital , Tampere, Finland 16 Fimlab Laboratories ltd., Tampere University Hospital , Tampere, Finland 17 Department of Neurosurgery, Tampere University Hospital and Tampere University , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Matti Nykter 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Juha Kesseli 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kirsi J. Rautajoki 1 Faculty of Medicine and Health Technology, Tampere University and Tays Cancer Center, Tampere University Hospital , Tampere, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: kirsi.rautajoki{at}tuni.fi Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Background Current clinical neuropathology practice utilizing DNA methylation information to support diagnosis of central nervous system (CNS) tumors could benefit from increased interpretability and cost reductions. Methods We identified and characterized limited sets of genomic regions (i.e. features) that can be used for accurate classification of CNS tumors based on DNA methylation data. The features were selected using a hybrid strategy combining filtering and Elastic Net Logistic Regression (ENLR). A Support Vector Machine (SVM)-based classifier was trained using select 1003 informative features and an established cohort of 60 diagnostic tumor classes comprising 82 tumor DNA methylation classes and 9 control classes. Validation was performed using external microarray and targeted DNA methylation sequencing cohorts. Results Informative regions were enriched in enhancers and associated with genes involved in neural development and morphogenesis. In the microarray validation cohort of 1993 samples representing 76 DNA methylation classes, overall accuracy of our SVM classifier was 0.96, when using 1003 features and after the differences to the molecular neuropathology classifier were evaluated based on reported final tumor diagnosis and diagnostic relevance. Its performance remained similar (overall accuracy 0.95-0.96) when the number of features was further decreased, down to 163. An accuracy of 0.94 was detected in the in-house targeted sequencing cohort of 17 cases. Conclusions The classification of CNS tumors is feasible and accurate based on a very limited set of genomic regions, which facilitate further method development and the interpretation of classification results, likely benefiting CNS tumor diagnostics worldwide. Highlights Hybrid feature selection identifies 1,003 CpGs strongly linked to CNS tumors SVM model achieves 0.96 accuracy with confidence and top-3 predictions Robust across sequencing and microarray platforms for clinical use Reliable even when reduced to 163 CpG features, lowering cost and complexity Introduction CNS tumors constitute a large and heterogeneous group of entities. 1 Traditionally, histological methods have been considered the gold standard for diagnosing CNS malignancies. 2 Despite its demonstrated utility, histological diagnosis also has limitations such as inter- and intraobserver variability 3 as well as site-related discrepancies. 4 Advances in diagnostic technologies has enabled a more accurate and integrated approach that incorporates molecular methods for detecting genetic alterations and epigenetic features as part of a standard procedure. 5 DNA methylation profiles represent stable and tumor-specific profile traits, making them informative for diagnostic purposes. 7 – 9 A methylation-based classifier developed by Capper et al . 10 (hereafter referred to as the molecular neuropathology (MNP) classifier) is freely available online for comprehensive classification of CNS malignancies, and is widely used for integrated diagnosis in clinical settings. 10 Integration of histological and DNA methylation-based assessments led to diagnostic changes in 12% of the cases in the seminal study (71% involving WHO grade modifications) 10 and in 14% and 10% of cases in subsequent implementation studies, 11 , 12 with a similar fraction of WHO grade changes (71%) in the latter. The importance of methylome profiling is underscored by its integration as a diagnostic tool in the most recent WHO classification, also giving rise to new entities uncovered via DNA methylation analysis. 2 , 13 In the clinical setting, the classification is typically based on hybridization microarray data covering up to 935 000 (EPIC v2.0 with MNP v12.8) CpG DNA methylation sites in the genome. The data acquisition procedure is somewhat time-consuming, costly and tedious to customize. Additional factors, such as low tumor cell content and poor DNA quality, may lead to low confidence results. 10 Furthermore, unexpected or low-confidence classifications are difficult to interpret due to the limited explainability of the approach. The cost-effectiveness and versatility of methylation-based classification can be enhanced by analyzing a minimal set of informative features with targeted DNA methylation sequencing. Here, we developed an approach to select informative features from those extracted from the Illumina 450k methylation array, and developed a machine-learning classifier for the separation of brain tumors based on this reduced set of features. Materials and methods Acquisition and processing of DNA methylation microarray data To construct a dataset for feature selection, IDAT files from the CNS tumor reference cohort established by Capper et al . (GSE90496) were downloaded from the Gene Expression Omnibus (GEO). For feature selection using the multi-class consistency approach, the files were processed together with a set of 36 blood and 30 cfDNA samples (including samples from GSE122126, GSE109430, GSE77056, and GSE128654) to allow the expansion of cfDNA analysis in follow-up projects. The files were read into R using minfi, 14 filtered, batch-effect corrected, and normalized as described by Capper et al . 10 Detection p-values were also calculated. The Capper method included the following probe filters: 1) removal of probes targeting sex chromosomes; 2) removal of probes containing a single-nucleotide polymorphism (dbSNP132 Common) within five base pairs of and including the targeted CpG site; 3) probes not uniquely mapped to the human reference genome (hg19), allowing for one mismatch; and 4) probes not included in the Illumina EPIC array. No samples were dropped based on p-values of <0.05. Probe filtering was not performed using the detection p-value. The pre-processed dataset contained 2,801 samples and 428,230 probes, referred to as the reference cohort. Validation dataset of Capper et al. (GSE109379) was similarly downloaded and pre-processed from GEO and resulted in an additional 1104 samples and 428,799 probes. Another validation set was constructed from six cohorts (GSE147548, GSE125450, GSE198855, GSE156090, GSE193196, and GSE104210), resulting in 899 samples and 428,230 probes. We determined methylation-based classes for samples without prior classification results available using the next-generation molecular neuropathology (MNP) platform ( https://www.molecularneuropathology.org/mnp/ ). Hybrid feature selection strategy We refined the original feature set with a hybrid feature selection strategy combining filtering approaches and Elastic Net Logistic Regression (ENLR) ( Figure 1A and Supplementary Figure S1A). First, the 32,000 most variable probes were selected from the Capper et al. 10 reference set in combination with blood and cfDNA sample sets. Utilization of blood and cfDNA sets was limited to this step. Then, a union of the outputs of intra-class and multi-class consistency filtering approaches (4,109 and 1,826 respectively) was taken to gain 4,976 features. This final set, produced by the filtering-based methods, was then used for the next feature selection step with ENLR. Download figure Open in new tab Fig. 1 Characterization of the select 1003 features associate them with gene regulatory regions, nervous system, and development. (A) Schematic representation of the feature selection strategies. First, a variance filtering was applied followed by two parallel filtering-based strategies. Then, the results were pooled together. The resulting set of 4976 features was subjected to supervised feature selection by elastic net logistic regression (ENLR). Finally, classification was done using a support vector machine (SVM). (B) Schematic representation of the supervised ENLR-based feature selection. The reference cohort for training consisted of 4976 features measured from 2801 samples belonging to 91 tumor or control classes in total. A 10-fold cross-validation (CV) was used for feature selection by ENLR, and the process was repeated for 10 iterations, resulting in 1003 features. (C) Variable methylation patterns of the 1003 selected probes throughout DNA methylation classes and similarities among related tumor types. Distribution of features across methylation consistency categories (as defined in the multiclass consistency approach) for each tumor and normal methylation class is shown. Average tumor grade for each methylation class is shown on the right side of the bar plots. The tumor DNA methylation classes are ordered alphabetically. NA, no formal grading system. (D) The 1003 features were located mostly in introns, CpG islands, and interCGI regions. The percentage of features falling into different CpG island, gene, and enhancer locations for the 1003 feature set are shown. Absolute feature numbers for each genomic location are marked above the corresponding bar. (E) Features located in enhancers were over-represented in the selected 1003 feature set. The percentage of enhancer-linked features among the 1003 features as compared to the percentage in the Illumina Human Methylation 450k array after preprocessing is shown. Statistical significance was tested with Fisher’s exact test. (F) Development, morphogenesis and nervous system were the most common themes associated with genes located near to the CpGs contained in the 1003 features. Genes with CpGs in their promoter (proximal and distal) were more prone to be involved in development and morphogenesis than those with CpGs in other locations. The results from gene set enrichment analysis were filtered so that only Gene Ontology (GO) terms containing more than ten genes, with an enrichment p-value below 0.001 and with a fold enrichment above two were considered and grouped under Biological process themes. ENLR-based feature selection ENLR 15 was employed for the union 4,976 set. The ENLR combines LASSO and ridge penalties, making it particularly suitable for the applications involving highly correlated features. 16 We implemented ENLR using the glmnet package (version 4.1-7) in R (version 4.1.1). We conducted 10 runs of 10-fold cross-validation (CV) to gain 100 models, extracted the absolute coefficient values (ACV) derived from these models and ranked the features based on these models ( Figure 1B ). We established two criteria to select the best feature set. First, features with average ACV>0.001 across the 100 models were selected. Secondly, we ranked the features for each class separately and selected the top ten most important features (key features) per class. Finally, we combined these two feature sets to form the final selected feature set. Consequently, the number of features was reduced from 4,976 to 1,003. Stricter cut-offs for average coefficients and key features reduced feature sets to 533, 298 and 163 when using cutt-offs 0.002 and four, 0.01 and four, 0.01 and two respectively. Coordinates and consistency categories of the probes belonging to each reduced set along with the averaged ENLR coefficients are provided in (Supplementary data 1). Feature characterization The two sets of features, the 1003-set and the 4976-set, were annotated to the nearest gene according to the location with respect to the gene, using the pre-built hg19 annotations of the annotatr Bioconductor package 17 with default settings. Gene set enrichment analysis (GSEA) was performed separately for genes having probe(s) at the promoter, other genomic region(s) or either, as in. 18 Redundancy was reduced from the term lists with the help of Revigo. 19 The biological process Gene Ontology (GO) terms were manually categorized into themes. Classifier After selecting the relevant features, we apply a pattern recognition method called support vector machine (SVM) 23 – 25 with a linear kernel to obtain the final predicted values. We used the multiclass classification supporting e1071 R package with the one-vs-one approach. The decision values were used to assign class labels. To assign confidence scores to our predicted classes, we utilized the predicted probability scores obtained from the SVM. The predicted classes were categorized into two groups to assess their confidence levels by defining a threshold for the relative difference between the probabilities of the best and second-best matches. We denote the predicted probability of the best choice as Prob 1 and the difference between the first- and second-best choices as Diff 12 and categorize the samples into higher confidence group 1 (Diff 12 /Prob 1 >0.5), and to lower confidence group 2 (Diff 12 /Prob 1 <0.5), and finally evaluate the confidence levels associated with each predicted class. In-house targeted DNA methylation sequencing cohort Fresh-frozen (FF) and formalin-fixed paraffin-embedded (FFPE) tissue samples were collected from 17 patients who underwent surgery between 2007 and 2018 at Tampere University Hospital. Additionally, two patients underwent surgery at Oulu University Hospital, and only FFPE tissues were available for DNA isolation. Genomic DNA was extracted from FF and FFPE tissue samples using a QIAamp DNA Mini kit (Qiagen) and GeneRead DNA FFPE kit (Qiagen), respectively, and manufacturer’s instructions. Preparation of sequencing libraries The EM-seq libraries were prepared with EM-seq workflow for standard insert libraries, 20 which were target enriched with the IDT xGen Hybridization Capture of DNA Libraries Protocol using a IDT custom xGen Custom Hyb Panel-Accel covering 1003 CpGs. The SureSelect libraries were prepared using the SureSelectXT Methyl reagent kit (Agilent Technologies) according to the manufacturer’s protocol for 1 µg DNA samples (SureSelectXT Methyl-Seq Target Enrichment System for Illumina Multiplexed Sequencing). A custom designed capture library containing 46,084 probes (2.923 Mbp) was used for target enrichment, with few exceptions. The cfRRBS libraries were prepared using the original protocol 21 , 22 with 10 ng of sample DNA input appropriately mixed with unmethylated lambda DNA controls, but deviating from the original protocol by using Diagenode’s Premium Bisulfite Conversion Kit (Diagenode, Liège, Belgium) for the bisulfite conversion step. For each method, library quantity was monitored with Qubit dsDNA Broad Range assay or High Sensitivity assay using Qubit™ 4.0 instrument (Invitrogen). Library quality was monitored with TapeStation instrument and D1000 or High-Sensitivity D1000 Screen tape (Agilent Technologies) or alternatively with Fragment Analyzer 5200 and HS NGS Kit (Agilent Technologies). The resulting libraries were sequenced on an Illumina NovaSeq sequencer (Illumina) using a PE150 option. In-house sequencing data preprocessing and validation dataset creation for classification EM-seq and SureSelect files were processed similarly. After the sequencing data QC step with FastQC and adapter trimming with Trim Galore, bisulfite mapping against hg19 (UCSC) and mapping quality control with duplicate removal were performed using Bismark v0.22.3 ( https://www.bioinformati cs.babraham.ac.uk ). 26 MethylSnake ( https://github.com/ftabaro/MethylSnake ), an in-house Snake-make pipeline, was used for the cfRRBS data processing. We processed the preprocessed sequencing data further in the R 4.0.4 environment (R Core Team 2021) using MethylKit 1.16.1 27 for coverage filtering out CpGs with read coverage <10 and used destranding for batch-uniting samples. To represent the methylation level for each probe, we used the probe coordinates from Illumina HumanMethylation 450 k array (2.0.6) to select the nearest measured CpGs within 200 bp. We expanded the coordinates by 50 bp in both directions, then calculated the mean methylation level of CpGs within this 100bp region and finally used these as input to the classifier as a matrix of samples x features (Supplementary data 6). Visualizations The EM-seq samples were plotted together with the reference cohort samples for tSNE visualization. 28 The matrix with 1003 features was filtered by the distance of the feature (in practice, nearest CpG in EM-seq data) from the nearest probe, allowing it to be at most 200 bp. The EM-seq and capper samples were combined into a single matrix using these 986 features. Next, the distance matrix was calculated with the scaled Pearson correlation (1 − cor (“ pearson ”)) /2) and given to Rtsne, using 50 initial dimensions, perplexity of 30, and 1000 iterations. The results were visualized using ggplot2, ComplexHeatmap and karyoploteR packages. 29 Results Feature selection resulted in 1003 informative genomic regions We used reference cohort 10 with 82 CNS tumor methylation classes and nine control classes for feature selection. Our hybrid feature selection strategy combines multiple filtering approaches with elastic net logistic regression (ENLR) ( Figure 1A ). The term feature refers to a designated genomic region whose CpG DNA methylation state is measured with the respective microarray probe or, alternatively, with targeted bisulfite sequencing (BS-seq) or targeted enzymatic methyl-seq (EM-seq) methods. Variance-based filtering of the 32,000 most variable features with parallel intra-class and multi-class consistency approaches resulted in 4976 features as a union of the outputs (Methods and Supplementary Figure S1A). Subjecting this feature set to ENLR resulted in a set of 1003 features ( Figure 1B , Supplementary data 1). We assessed the variability in consistent and inconsistent features between tumor classes by plotting the feature counts by methylation category for each class ( Figure 1C ), which shows related tumor classes to generally have similar consistent feature counts. Unmethylated features dominate in parts of pituitary adenomas and plasmacytomas. In contrast, methylated probes overshine in isocitrate dehydrogenase (IDH)-mutant diffuse gliomas and atypical teratoid/rhabdoid tumors (ATRTs). The vastly heterogeneous higher grade tumor classes show a higher number of inconsistent probes opposing the lower-grade malignancies ( Figure 1C ). Most of the 1003 selected features were intronic and located in CpG islands (CGI) and interCGI regions ( Figure 1D ). Features showed a 2.86-fold enrichment towards enhancers (p<0.0001, two-sided Fisher’s exact test) ( Figure 1E ). The 4976 feature set mirrored those characteristics (Supplementary Figure S1B). Uniform distribution throughout the genome was retained through ENLR (Supplementary Figure S1C) within the limitations of the DNA methylation array measurements (Supplementary Figure S1D). The selected features were linked to genes associated with development, morphogenesis, and nervous system-related biological processes ( Figure 1F , Supplementary data 2), suggesting a functional relevance. The selected 1003 features were distributed into two main clusters based on their methylation profiles throughout the tumor classes, with each cluster representing either predominantly unmethylated (Cluster 1) or methylated (Cluster 2) probes ( Figure 2 ). The number of selected features with a non-zero ENLR coefficient was rather evenly distributed across tumor methylation classes; the highest contributing feature counts were detected in some of the glioblastomas (GBMs) ( Figure 2 ). A pairwise comparison of the methylation classes identified clusters of tumor classes with highly similar, consistent methylation profiles, which corresponded to methylation groups of the same or related tumor types (Supplementary Figure S2). A rather unique DNA methylation profile was observed for cerebellar liponeurocytoma and retinoblastoma, while GBMs presented few differences in consistent features, not only among them, but also in general when compared with many other tumor classes. Download figure Open in new tab Fig. 2 DNA methylation patterns of the select 1003 features across tumor methylation classes show a high degree of consistency. Inconsistent features are more predominant especially in diffuse gliomas and most embryonal tumors. Hierarchical clustering of the 1003 features according to their methylation categories identifies two distinct probe clusters, namely predominantly unmethylated probes (cluster 1, light blue) and predominantly methylated probes (cluster 2, light red). On the right side of the heatmap, the proportions of feature categories in the two identified feature clusters are visualized as stacked bar plots. Tumor classes with higher proportions of inconsistent features tend to include higher numbers of non-zero features in ENLR (gray bar plots on the right). High performance of the Support Vector Machine (SVM) classifier Performance evaluation in the training phase used a 10-fold CV approach in the reference cohort ( Figure 3A ). The evaluation was performed at the methylation class level, and at diagnostic class level after grouping samples based on the 5th edition of the WHO’s CNS tumor classification (Supplementary data 3). Using MNP DNA methylation class annotations as ground truth, overall and class-specific performance metrics were favorable in the reference cohort, with misclassifications mainly within diagnostic classes (Supplementary Figure S3B, and Supplementary data 3). At the diagnostic level, there were 17 misclassifications (0.61 %) compared to 64 (2.3 %) at the methylation class level. (Supplementary Figure S3A-D). Download figure Open in new tab Fig. 3 The performance of the SVM-based classifier is high in the DNA methylation array validation cohort (A) Schematic representation of the training phase of the SVM-based classifier. A 10 fold cross-validation (CV) was used for performance evaluation, and the whole process was repeated for 10 iterations. (B) Schematic representation of the evaluation phase of the SVM-based classifier. After selecting the optimal feature set, SVM was used for designing the final classifier. The final classifier was evaluated with an external validation data set. (C) Box plot showing the overall performance metrics (balanced accuracy, F1 score, precision, and recall) of SVM classification across tumor methylation classes based on MNP class labels in the validation data. Results are shown for the whole dataset (All), and also after separating samples into three sample groups based on their calibrated MNP score. Group 1 includes samples with MNP score ≥0.84, Group 3 samples with score <0.5, and Group 2 rest of the samples. (D) DNA methylation classes with poorer performance include higher counts of misclassified low-confidence samples (samples in Group 2-3) or very few samples in general. Heatmap representation of the classification performance metrics across DNA methylation classes in the validation data when using MNP classification as the ground truth. Total number of samples in the class, the percentage of misclassified samples, and their distribution to MNP confidence groups (Groups 1-3) are shown on the right. (E) Distribution of samples in the validation cohort based on the SVM confidence score. Classification accuracy in each sample was evaluated against MNP classification. SVM confidence score of 0.5 was selected as a cutoff for high-confidence classification. (F) Distribution of samples in the validation cohort based on the SVM confidence score. Classification accuracy in each sample was evaluated so that irrelevant differences were ignored, and diagnosis-assisted assessment was used. (G) Nearly all of the samples (1658 of 1668, 99.4%) in SVM high confidence group (Group 1) are correctly classified. In the figure, the validation cohort samples are distributed into MNP (Groups 1-3, high confidence group: Group 1) and SVM (Groups 1-2) confidence groups (in rows), and the difference in MNP vs. SVM classification is shown in columns. For the majority of the samples, the DNA methylation class was the same (n=1768) or the difference between the classifications was not relevant for diagnostic purposes (n=102). Part of the samples were classified more accurately with MNP (n=48) or SVM (n=36). Samples with a different classification with MNP and SVM, and both classifications also deviating from the reported diagnosis (n=39), were predominantly detected in low-confidence classification groups. (H) Diagnostic tumor type distribution of samples which received high confidence classification (Group 1) only with MNP (on the left) or SVM (on the right) classification. The SVM classifier’s performance was assessed using 1003 features with an external validation set of 1993 samples representing 72 tumors and four control methylation classes ( Figure 3B and Supplementary data 4). The samples lacking previous classification were analyzed using the MNP methylation classifier v11b4 (version 3.3) (MolecularNeuropathology.org ) hosted by the German Cancer Research Center (DKFZ). The validation dataset was divided into three groups based on the calibrated MNP Classifier scores: Group 1 (score ≥0.84, n= 1658), Group 2 (0.5 ≤ score < 0.84, n= 241), and Group 3 (score <0.5, n= 104) (Supplementary data 4). A calibrated score cutoff of 0.84 for group 1 provides the best balance between sensitivity and specificity, and is also used in clinical settings. 12 , 30 We first assessed performance metrics (balanced accuracy, F1 score, precision, recall) by comparing SVM results to MNP classifications in the validation set (Supplementary data 4). As expected, all performance metrics were highest in group 1 ( Figure 3C , Supplementary Figure S3F-H). Performance declined and variation increased with lower calibrated scores ( Figure 3C , Supplementary Figure S3B-E). DNA methylation class-specific performance metrics varied significantly between tumor types. GBMs, infantile hemispheric gliomas, IDH-mutant astrocytomas, pediatric plexus tumors (PLEX PED), and anaplastic pilocytic astrocytomas yielded the highest number of samples with discrepant classifications ( Figure 3D ). Again, most discrepancies occurred within the same diagnostic class (Supplementary Figure S3F-G). The concordance of SVM results with those of MNP classification was found to be very high (accuracy: 96.42%) for samples with high MNP classification confidence (samples in group 1). However, a gradual decrease with decreasing MNP confidence scores was also observed ( Figure 3C , Supplementary Figure S3E-H). This raises the question of whether the MNP classification represents a ground truth for cases of low-confidence MNP classification. Therefore, we decided to analyze the samples for which discrepant classification was obtained using the SVM and MNP approaches by comparing their final integrated diagnosis to the classification results to incorporate tumor diagnosis into the ground truth evaluation (Supplementary data 5). This also allowed us to ignore irrelevant differences, such as a change from one DNA methylation class to another within the same tumor type, and to label those classifications as correct, in which only the SVM classification represented the tumor diagnosis. The obtained correct/incorrect classifications were referred to as diagnosis-assisted assessments. In a total of 102 (3.6% of the validation cohort) samples, the difference was not relevant, and both SVM and MNP represented tumor diagnosis (Supplementary data 5). Furthermore, 48 (1.7%) tumors obtained better classification with MNP than SVM, and both classifications differed from the diagnosis in 39 (1.4%) cases. Interestingly, 36 (1.3%) of the samples obtained more accurate classification with SVM than MNP. When samples with irrelevant classification differences and samples with an accurate classification only in SVM were labeled as correctly classified, the overall accuracy of the SVM classification in the validation cohort was found to be 0.957. This diagnosis-assisted “correctly classified” label was used in the following analyses. The metric based on SVM probability and the difference from the second-best hit (hereafter referred to as confidence score) ( Figure 3E-F ) was determined as the most informative criterion to evaluate the confidence of SVM classification. SVM classifications were then divided into a high-confidence group 1 (confidence score ≥0.5) and a low-confidence group 2 (confidence score <0.5). In the SVM group 1, 10 of the 1668 samples were misclassified (accuracy 0.996), three of which were correctly classified by MNP ( Figure 3G , Supplementary data 5). In the SVM low-confidence group 2, 77 of the 325 samples were misclassified (accuracy 0.763) (Supplementary data 5). These included 45 samples that were correctly classified only with MNP (20 of which were in MNP group 1). Recurrent SVM misclassifications in SVM group 2 included GBM MES tumors classified as CONTROL INFLAM or CONTROL REACT (n=11) and IDH-wild-type GBMs (n=16) classified as LGGs characterized by BRAF and other MAPK alterations (PXA and LGG GG, PA/GG ST, and DIG/DIA). Furthermore, one HMB, one DMG K27, and five PLEX PED A/B samples were classified as CONTR ADENOPIT in SVM Group 2. SVM classification was found to be more accurate than that of MNP for 36 tumors ( Figure 3G , Supplementary data 5). All samples classified as IDH-mutant astrocytomas by MNP and as oligodendroglioma by SVM (n=6, two of which were in SVM group 1) were classified as IDH-mutant oligodendrogliomas upon applying the final integrated diagnosis (Supplementary data 5). Furthermore, the SVM classified correctly 10 GBMs and 11 LGG DIG/DIA samples misclassified by MNP. Analysis of samples with the same classification or irrelevant classification differences revealed that 154 samples (7.7% of the validation cohort) received high-confidence classification (were inside Group 1) only with SVM and 121 samples (6.1% of the validation cohort) only with MNP ( Figure 3H , Supplementary data 5). Most of the samples classified with high-confidence only with SVM were IDH-wild-type GBMs (42%) or IDH-mutant astrocytomas (15%) ( Figure 3H ). A set of 10 samples from Capper’s validation cohort yielded a misleading profile according to the original publication 10 , and were thus not included in the validation dataset presented above. This indicates that these samples showed a high MNP classifier confidence score and a conflicting methylation profile compared to the original tumor diagnosis, yet tumor classification was not altered after pathological re-evaluation. SVM did not classify these samples any better than the MNP classifier. Classification is feasible based on targeted DNA methylation sequencing of informative features To demonstrate the utility of our selected set of features, we prepared EM-Seq libraries from a cohort of 17 tumor samples ( Table 1 ), and enriched them with a hybridization capture panel targeting 1003 informative features. This yielded a mean on-target rate (i.e. the proportion of reads aligned to the targeted CpG) of 72.1% (range 67.8-76.2%). We used data from 986 out of the 1003 informative features that were covered by a minimum sequencing depth of 10 reads within 200 bp of the respective array index CpGs (Supplementary Figure S4A-C). We visualized our samples together with the reference cohort from 10 using the t-distributed stochastic neighbor embedding (t-SNE) method ( Figure 4A ). With this unsupervised clustering approach, all but one sample (number 13, diagnosed as DNT) were located overlapping or in close proximity to the DNA methylation cluster, corresponding to its tumor diagnosis ( Figure 4A ). Download figure Open in new tab Fig. 4 Correct SVM classification was obtained for all but one sample analyzed with targeted DNA methylation sequencing. (A) t-SNE dimensionality reduction of the reference cohort from Capper et al . (n=2,801), measured by the methylation array, and our in-house cohort (n=17), analyzed with targeted sequencing (EM-seq), using the 986 features covered by both methods. With this approach, samples were positioned into DNA methylation clusters similarly as in Capper et al . Individual array samples are color-coded by the methylation class to which they belong. Sequenced samples (number 1-17) and their clinical diagnosis are shown in black. All but sample 13 (LGG DNT) are located within or closest to the DNA methylation class representing their diagnosis. (B) All but sample number 13 were classified correctly with EM-seq targeted sequencing. Histopathological diagnosis, results of the SVM classifier based on EM-seq data, SVM confidence scores, and closest cluster in the tSNE are shown. (C) Samples with accurate classification show clearly higher probabilities for the best hit than the others, whereas nearly even probabilities are detected for sample 13 with inaccurate classification. Probability plot showing the three most probable methylation classes for each sample in the in-house EM-seq cohort, as obtained by the SVM classifier based on 986 measured features. View this table: View inline View popup Download powerpoint Table 1. DNA methylation sequencing cohort. The table presents for each sample the pathological diagnosis, tumor grade, IDH mutation status, age group at surgery, sex, disease stage, sample material type, experienced pathologist’s estimate of tumor purity (%) based on histological analysis, and whether the sample was analyzed with EM-seq, SureSelect or cfRRBS sequencing. The SVM classifier was retrained based on the aforementioned 986 features, and run for EM-Seq data, resulting in confirmation of the histological diagnosis in all but one sample, namely sample 13, which showed unexpected localization in our t-SNE results ( Figure 4A-B ). This sample was diagnosed as DNT. However, based on SVM classification, it falls into “low grade glioma, subclass midline pilocytic astrocytoma” (LGG, PA MID) class. The SVM confidence scores ( Figure 4B , Supplementary data 6) and probability distributions ( Figure 4C ) showed that the confidence of the SVM classification was poor for sample 13. The correct tumor class (LGG, DNT) was the fourth-best hit for this sample, with a probability score (0.09) which was only slightly lower than that for the first three classes. To better understand the reasons behind misclassification, we also analyzed the beta values of Sample 13 with respect to the consistency class in LGG DNT and other tumor classes with similar SVM probabilities (Supplementary data 1 and 6). There were 17 features that were not measured by sequencing, and for 22 features, the sequencing data covered only nearby CpGs and not the microarray index CpG. All but two of these features were either inconsistent in DNT, or DNA methylation values represented the consistency class in DNT or the same consistency class across these tumor classes, so they were unlikely to influence the classification results. For features with sequencing data covering the index CpG, 834 features were consistent in LGG DNT, but 102 (12.2%) of them did not represent the consistency class in EM-seq data, suggesting that the tumor has some atypical characteristics that complicate accurate SVM classification. Sample 13 was pathologically re-evaluated by an experienced neuropathologist to confirm the original tumor diagnosis, which led to DNT diagnosis being maintained. The tumor showed typical DNT characteristics, including a multinodular growth pattern with oligodendroglial-like features in the cell nodules, a very low proliferation rate, and columnar architecture. However, the tumor was not a typical DNT, as it was void of neuronal dysplasia, floating neurons, and the mucoid matrix. Increased lymphocyte counts were also detected in some tumor regions. These are atypical for DNT and potentially influence DNA methylation values. To summarize the EM-seq sequencing results, an accuracy of 0.941, with all but one case being correctly classified, was obtained with the EM-seq-based classification. The incorrect classification of sample 13 was unlikely to be caused by technical issues, and this sample showed atypical characteristics both based on DNA methylation patterns and histological evaluation. Accurate CNS tumor classification from a reduced numbers of features To better characterize methylation sequencing-based classification, we subjected an in-house CNS tumor cohort ( Table 1 ) to two methylation sequencing procedures that targeted a greater part of the methylome than our EM-seq panel. Customized targeting of 12798 variable features, which were found to be the most interesting based on the initial filtering steps, was achieved using hybrid capture-based SureSelect methylation sequencing. Moreover, we determined how well the recently developed cell-free RRBS (cfRRBS) method 21 covered our informative features. These methods covered 615 (61%) and 325 (32%) of the selected 1003 features, respectively, when the 200bp distance cutoff to the index CpG was used (Supplementary Figure S5 and Supplementary data 6). We retrained and applied an SVM classifier to each dataset to assess performance in our setup. Again, all samples were correctly classified, except Sample 13, which had also been misclassified using EM-seq in the cfRRBS data ( Figure 5A ). Interestingly, all samples were correctly classified with SureSelect data, although Sample 13 still had a low confidence score (i.e., 0.202) (Supplementary data 6). Thus, accurate classification was achieved with DNA methylation sequencing using fewer than the original 1,003 features (986 for EM-seq, 615 for SureSelect, and 325 for cfRRBS) ( Figure 5A-B ). Interestingly, performance dropped only marginally when we reduced features via ENLR-based selection to 533, 298, or 163 in the DNA microarray data ( Figure 5C-D , Supplementary data 7). Cutting down features altered only minimally the distribution of SVM confidence scores ( Figure 5E ). Classification accuracies upon a diagnosis-assisted assessment (as for the 1003 set above) remained high: 0.957, 0.955, and 0.947 for 533, 298, and 163 features, respectively (Supplementary data 7), and they did not show any drastic drops in separate DNA methylation classes ( Figure 5F ). Download figure Open in new tab Fig. 5 Tumor classification is feasible with even more reduced numbers of features. (A) Summary of SVM classifier labeling when applied to three different DNA methylation sequencing data that cover various amounts (986, 615, 325) of selected informative features. Similar high-precision classification is obtained even with just 325 features. All tumors were classified correctly with SureSelect data, although the SVM confidence score of sample 13 was poor with all the approaches. *: SVM confidence score <0.5, na: not analyzed (B) Only a moderate number of features are measured with all three targeted sequencing approaches. Venn diagram showing the overlap between the set of selected 1003 informative features (990 of which are covered by at least one method) and different reduced sets measured with DNA methylation sequencing approaches. Regions in which the maximum distance to the closest measured CpG was 200 bp from the probe coordinate were included in the analysis. (C) Reduction of feature numbers by adjusting the cutoffs for ENLR-based feature selection has a moderate impact on the performance parameters of the SVM classifier in the training phase. Data represents performance in the microarray reference cohort. (D) Performance metrics of the SVM classifier in the microarray validation cohort are only slightly decreased when the number of features is reduced by adjusting the ENLR parameters. (E) Decreased numbers of features have only a minor effect on the distribution of SVM confidence scores or misclassified samples. (F) Classification performance remains similar across DNA methylation classes when the number of features was reduced from 1003 to 163. Heatmap representing the classification recall rate for each class when irrelevant differences were ignored and diagnosis-assisted assessment was used. Samples were distributed to the classes based on MNP class labels. Discussion Here, we identified genomic regions (i.e. features) informative for CNS tumor classification, and established a basis for the flexible and versatile use of targeted DNA methylation data for this purpose. We developed a strategy to reduce the number of informative features using a machine-learning-based classification approach with a performance similar to that of other classifiers 10 , 31 , 32 with only 163-1003 features. The selected features are linked to genes associated with the nervous system and development, and can be studied further from this perspective. Recent findings suggest that the elastic-net method and ridge-penalized multinomial logistic regression-calibrated SVM method with linear kernels achieved the highest area under the curve (AUC) and minimized misclassification errors. 33 We found that the SVM-based classifiers provided accurate results in this study. Notably, classification was nearly as precise when using as few as 163 informative features, which is considerably smaller than that required for accurate classification with random forest (1000 features). 34 In future, our approach could be further developed by training a new classifier based on the updated MNP classification with an extended set of tumor classes. A similar model is also likely to work well for targeted analyses such as meningioma grading. 9 , 35 It is relevant to point out that although we used label information in both intra-class and multi-class consistency filter-based feature selection approaches across the entire reference cohort before starting the CV loop, our feature selection focused on stability within groups rather than minimizing classification error. High performance in a large external validation cohort supports the reliability and robustness of our methodology. The high potential of tumor detection and diagnostics based on DNA methylation sequencing has been previously demonstrated in several studies across different malignancies. Nanopore sequencing enables intraoperative and surgical strategies to modify the classification of brain tumors using both methylation and DNA copy number signatures derived from ultra-low coverage or shallow whole-genome sequencing, with or without the adaptive sampling feature of nanopore flow cells. 36 , 37 The recently commercialized single-primer enrichment method QIAseq Targeted Methyl Sequencing could be a valid option for at least smaller sets of features, as well. The utilized cfRRBS method is based on well established RRBS workflow 38 , and allows cost-effective sequencing of the CpG-rich fraction of the genome from low sample inputs, thereby enabling its clinical utilization in e.g. pediatrics. 21 , 22 Efforts to optimize other protocols to accommodate lower DNA input requirements and the physical constraints of cfDNA 39 – 41 have enabled sensitive and specific cancer detection and localization in a pan–cancer context. 42 – 44 These protocols have also been applied to classify CNS tumors using plasma cfDNA as the starting material. 45 , 46 These and our results underline the potential and feasibility of DNA methylation based CNS tumor classification. We mainly evaluated the performance after marking irrelevant differences as correctly classified. Differences were considered irrelevant if, for example, DNA methylation classes fell within the same tumor type and did not affect patient treatment prediction or prognosis. Different GBM subtypes are especially difficult to distinguish from one another. As several different GB M subtypes are also typically present within one tumor bulk, 48 their distinction is clinically less relevant. Furthermore, changing the tumor class to another age- or location-related tumor class within the same tumor type was also considered irrelevant, as patient age and tumor location are already known during diagnostic sampling. We designed an SVM confidence score based on the relative difference in the SVM probabilities between the best and second-best hits, clearly separating correct classifications from incorrect ones. A cutoff value of 0.5 was set based on MNP-based accuracy calling. A further decrease of the cutoff without any major drop in performance is possible. With diagnosis-assisted assessment, the SVM accuracy remained 0.990 or 0.979 in the high-confidence and 0.748 or 0.761 in the low-confidence groups, with a cutoff of 0.4 or 0.3, respectively. An explainable analysis method helps to identify correct diagnosis, especially in the case of low-confidence methylation profiling result or inconsistency with classification and histopathological diagnosis. Our approach, involving only hundreds of features, makes it relatively easy to explore methylation profiles, as demonstrated with one DNT sample (sample 13). Identifying exceptions in methylation profile that may be linked to other factors in the tumor, such as genetic alterations, location, patient age, degree of immune infiltration, or necrosis, will likely become more efficient as the number of available targeted methylation sequencing samples increases. Furthermore, the characterization of informative features provides grounds for the development of assays (e.g. droplet digital PCR) to distinguish tumor entities of interest from each other. A very high performance was obtained from both MNP and SVM approaches, which is promising when considering the reduction in the number of features used in SVM classification. Strikingly, the performance of SVM was maintained when the number of features was reduced to 533, 298, or even 163, indicating the robustness of the approach. This is highly significant, as feature counts do not differ remarkably from the number of classes (82 for tumors and 9 for normal tissues in the reference cohort). However, classification is driven by feature combinations spanning all classes, rather than by individual class-specific features, which is likely to increase its robustness. Furthermore, our results underline the full compatibility of this approach with sequencing-based data acquisition, which increases its usability, especially in healthcare units that favor cost-effective CNS tumor classification in clinical practice. A recent study by Yuan et al. (2025) introduced a neural network-based classifier for CNS tumors using binarized beta values and a random masking strategy to simulate sparse methylation profiles, achieving high classification accuracy. While our results are comparable, our approach differs in several important aspects. Rather than relying on random feature dropout, we apply a biologically informed hybrid feature selection strategy that integrates consistency filtering with ENLR. This enables the identification of a compact, biologically meaningful set of CpG regions directly linked to CNS tumor types. Our classifier maintains high performance even when the feature set is systematically reduced to as few as 163 CpGs, in both microarray and sequencing data. Furthermore, our method retains continuous beta values, preserving the full resolution of methylation signals. This is particularly relevant in low-purity tumor samples, where fixed-threshold binarization may obscure subtle but diagnostically informative methylation patterns. Additionally, our classification framework employs a Support Vector Machine (SVM), which outputs not only the predicted tumor class but also a confidence score and probability estimates for the top three ranked tumor types. This layered output allows clinicians to assess how decisively the top prediction outperforms alternatives, with large probability gaps indicating strong classification certainty and smaller gaps suggesting potential diagnostic ambiguity, such as mixed or fused tumor types. Taken together, our study demonstrates that carefully designed traditional machine learning approaches remain a powerful and interpretable option for DNA methylation–based tumor classification. By integrating hybrid feature selection with SVM classification, we identified a compact and biologically meaningful feature set that delivers high diagnostic accuracy while reducing complexity and cost. The framework is robust across microarray and sequencing platforms, remains reliable even when many features are unavailable, and incorporates strategies to handle incomplete CpG coverage. Importantly, the model provides probability scores, confidence values, and top-3 ranked predictions, enhancing diagnostic transparency and supporting clinical decision-making. In sum, this work establishes a scalable, flexible, and clinically adaptable solution for precision neuro-oncology, with clear advantages in interpretability, robustness, and practical clinical utility. Data Availability The data supporting the findings of this study are available in the Gene Expression Omnibus (GEO) repository under accession number GSE262533. The code used for analysis is available on GitHub at https://github.com/CRI-group/DNA_meth_brain_tumors . Further inquiries regarding the data and code can be directed to the corresponding author. Required Statements Ethics The study was performed in accordance with the principles of the Declaration of Helsinki and the guidelines of the Finnish National Board on Research Integrity. This study was approved by the Ethical Committee of Tampere University Hospital (TAYS) (decision R07042 for the retrospective study, dates 9.2017 and 12.2024, and decision R14024 for the prospective study) and the National Supervisory Authority for Welfare and Health (Valvira, decision V/78697/2017). For cases that were part of the retrospective study, the requirement for informed consent was waived by The Regional Ethics Committee of Tampere University Hospital due to the retrospective nature of the study. Funding The study was financially supported by the Academy of Finland (#312043 to M.N., #310829 to M.N. Cancer Foundation Finland (M.N., K.J.R.); Sigrid Jusélius Foundation (M.N., K.J.R.); Emil Aaltonen Foundation (K.J.R., E.M.); Finnish Cancer Institute (M.N.); Väre Research foundation (K.J.R.); Aamu Pediatric Cancer Foundation (K.J.R.); and Competitive State Research Financing of the Expert Responsibility area of Tampere University Hospital (H.H., M.N., K.J.R.). Conflict of interests Authors J. Vuorinen and M. Nykter are shareholders in Fluivia Oy. Authors A. De Koker and N. Callewaert are inventors on patent regarding cf-RRBS (PCT/EP2017/056850). No disclosures were reported by the other authors. Data and code availability The data supporting the findings of this study are available in the Gene Expression Omnibus (GEO) repository under accession number GSE262533. The code used for analysis is available on GitHub at https://github.com/CRI-group/DNA_meth_brain_tumors . Further inquiries regarding the data and code can be directed to the corresponding author. Author contributions Authors listed in an alphabetical order after the task. Characterization of informative features A.R.M., M.P., M.V. Clinical Expertise H.H., J.H., K.N., O.K. Design of the study K.J.R. ENLR-based feature selection E.M. Filtering-based feature selection, M.P., M.V., S.L. Misclassification analysis K.J.R. MethylSnake pipeline development for cfRRBS data analysis. F.T. Pathology expertise H.H. Performance evaluation E.M. Planning of targeted sequencing panels A.R.M., J.V., K.J.R. Preparation of the figures A.R.M., E.M., J.V., M.P. Preparation of the result tables A.R.M., E.M. Project coordination K.J.R. Result interpretation A.R.M., K.J.R. Sample and clinical data collection H-R.T., H.H., J.H., K.N., M.K., O.K. Sample handling A.R.M., J.V. Sequencing data processing and analysis A.H., F.T., M.P., V.F. Sequencing library preparation A.R.M., J.V. Supervision A.R.M., A.D.K., B.D.W., J.K., K.J.R., M.N., M.P., N.C., R.V.P. Supplementary material construction A.R.M., E.M., J.V., K.J.R. SVM classification E.M. The cfRRBS method development and supervision of implementation A.D.K., B.D.W., N.C., R.V.P. The tSNE analysis and visualization M.P. Writing the manuscript A.R.M., E.M., J.K., J.V., K.J.R., M.P. All the authors revised and approved the final manuscript. Online Resource Captions Supplementary Figure S1. (A) Schematic representation of the filter-based strategies used. A tumor class-specific methylation category of 1 (unmethylated), 2 (intermediately methylated) or 3 (methylated) was given to a feature if at least a determined percentage of samples (90% or 80%) in that specific tumor class represent the same methylation level (beta ≤0.3, 0.3-0.7 or ≥0.7, respectively). The category 0 (Inconsistent) was given when any of the above conditions was not met. Features were then filtered based on criteria of a minimum of two different categories present and a maximum number of inconsistent tumor classes (50 and 27 classes in intra- and multi-class consistency approaches, respectively). (B) Bar plot showing the proportions of features (y-axis) falling into different types of genomic locations (x-axis) for the 4976 feature set. Absolute feature counts for each genomic location are shown above the corresponding bar. (C) Karyoplot representing the distribution of 1003 (red bars) and 4976 (blue bars) features throughout the genome. Visualized 4976 features are those selected with filter-based feature selection before elastic net logistic regression-based feature selection. (D) Karyoplot representing the features measured with the Illumina 450k methylation array. Supplementary data 1. DNA methylation classes and ENLR coefficient values across tumor methylation classes. FeatureSetsCoordsPlusMetVal-sheet contains the consistency class of each feature included in the 4976 feature set or its subsets across DNA methylation classes, where: 1 = consistently methylated, 2 = consistently intermediately methylated, 3 = consistently unmethylated, 0 = inconsistent. It also provides the genomic coordinates of each feature and indicates the reduced probe set(s) to which each feature belongs. Coefs1003Feats91Classes-sheet contains ENLR coefficient values for the select 1003 features across tumor methylation classes. Supplementary data 2. Gene set enrichment analysis of genes to which the selected set of features (1003) were annotated. The results for analyses done considering either all the genomic locations with respect to the gene, all the locations except for the promoter or only the promoter are in separate sheets. Supplementary Figure S2. Most of the DNA methylation classes showed a clearly distinct methylation profile when compared to other classes. Very few or none differences in consistent features were detected between DNA methylation classes inside certain tumor types (ATRT, GBM, non-WNT/non-SHH MB) and within IDH-mutant diffuse gliomas. Heatmap showing a pairwise comparison of the number of features that belong to different consistent methylation categories (1-3) for the 1003 features. Differences between category 0 (inconsistent) and consistent categories are not considered in the analysis. Clustering was done using euclidean distance and complete linkage. Supplementary data 3. The full and abbreviated class terms for DNA methylation classes and diagnostic classes. Supplementary Figure S3. (A) Heatmap representation of the classification accuracy based on the reference cohort shows good performance across tumor and normal DNA methylation classes. Somewhat poorer performance was detected in part of the glioblastoma multiforme (GBM) classes, and in DLGNT and PIN T PB A. Only two tumor groups, GBM MES and DLGNT, showed an average balanced accuracy below 0.92, yet remained above 0.86. Total number of samples in the class and the percentage of misclassified samples are shown on the right. (B) Confusion matrix showing the misclassified samples at the DNA methylation class level for reference cohort. Discrepancy is shown both at the level of DNA methylation classes and after grouping the methylation classes into one normal and 59 tumor sample groups based on the 5th edition of the WHO classification of CNS tumors, hereafter known as diagnostic classes The majority of the misclassifications fall within the same diagnostic class, such as GBMs or MB SHH, or between similar tumor types, such as IDH-mutant gliomas. Six control samples were misclassified as low-grade gliomas (LGGs) and two DLGNTs as pilocytic astrocytomas. (C) Performance of the classifier during the training phase. Box plot showing the values of overall performance metrics: balanced accuracy, F1 score, precision, and recall. The center line shows the median, the first and third quartiles are represented by box limits and whiskers depict 1.5× interquartile range. The overall performance values were all above 0.97. (D) A confusion matrix illustrating misclassifications at the diagnostic tumor class level in the reference cohort, using MNP classification as the ground truth. The number of misclassified samples decreased from 64 to 17 when evaluated at the diagnostic class level. (E) Accuracy of the SVM classification in the validation cohort when using MNP classification as the ground truth. In addition to the overall performance in the whole validation cohort, accuracy was also calculated separately for samples in each MNP confidence group which was determined based on the calibrated score of the MNP Classifier. Group 1 includes samples with a score equal or larger than 0.84 (n=1658) (high-confidence group), group 2 samples with a score 0.5-0.84 (n=241), and group 3 samples with a score smaller than 0.5 (n=104). Samples with a misleading profile (n=10) (as reported in the original publication by Capper et al .) were analyzed separately. (F) Performance metrics in the validation cohort for each DNA methylation class for samples belonging to MNP group 1. (G) Performance metrics in the validation cohort for each DNA methylation class for samples belonging to MNP group 2. (H) Performance metrics in the validation cohort for each DNA methylation class for samples belonging to MNP group 3. (I) Confusion matrix showing the misclassified samples at DNA methylation class level for the validation data when MNP classification was used as the ground truth. The majority of the misclassified samples are glioblastoma multiformes (GBMs) and fall into the other GBM, CONTROL INFLAM, or CONTROL REACT classes. GBM MES tumors were also misclassified as low-grade gliomas, and 10 different tumor samples as control tissue from pituitary gland anterior lobe (CONTR ADENOPIT). The majority of the misclassified samples belong to low-confidence MNP Groups 2-3. (J) Confusion matrix showing the misclassified samples at diagnostic tumor class level for the validation data when MNP classification was used as the ground truth. Supplementary data 4. Validation cohort of public microarray datasets (total of 1993 samples, representing 72 distinct tumors and four control methylation classes). Results of the SVM and MNP classification with 1003 features for Capper (1094 samples) and GEO (899 samples) validation cohorts separately. Presented are the SVM predicted labels in comparison to MNP labels. For comparison, both MNP and SVM confidence scores and groups for each sample are presented. Samples with a misleading profile (n=10) (as reported in the original publication by Capper et al .) were left out from this analysis. Supplementary Figure S4. (A) Distance from the 1003 features (probe index CpGs) to the nearest CpG measured by EM-seq. (B) Features with 0 distance to nearest CpG. (C) Read depth (coverage) in EM-seq data for the 1003 select features. Supplementary data 5. Diagnosis-assisted evaluation of tumor classification correctness when also irrelevant differences are ignored. Results are shown separately for 1094 samples in the validation cohort in 10 and for 899 samples that were separately collected for validation purposes from GEO. Supplementary Figure S5. (A) Distance from the 1003 select features (probe index CpGs) to the nearest CpG measured by Sureselect. (B) Distance from the 1003 select features (probe index CpGs) to the nearest CpG measured by cfRRBS. Supplementary data 6. Beta values of select 1003 features for each sample in each sequencing dataset (EM-Seq, SureSelect, and cfRRBS). For each probe, the number of CpGs that falls within the range from which the beta value is obtained is shown. For each sequenced sample, the SVM confidence scores are presented in separate sheets. Supplementary data 7. The SVM classification results when 533, 298, or 163 features were used for the classification. Both the predicted class labels and SVM confidence scores are shown. Acknowledgements We would like to acknowledge Mrs. Paula Kosonen, Mrs. Päivi Martikainen, Mrs. Marja Pirinen, Mrs. Sari Toivola, and Mrs. Hanna Selin for sample handling, logistics and skillful technical assistance. Anne Simi and other personnel at Tampere University Hospital and Fimlab laboratories Ltd. are acknowledged for their contribution to sample collection. We acknowledge Mrs. Katleen De Preter for her input related to cfRRBS-based analysis. We are grateful to them and the patients for permitting the analysis of precious patient material. We acknowledge the CSC - IT Centre for Science, Finland, for providing computational resources as well as the Biocenter Finland (BF) and Tampere Genomics Facility for the service. References 1. ↵ Louis DN , Perry A , Wesseling P , et al. The 2021 WHO Classification of Tumors of the Central Nervous System: a summary . Neuro Oncol . 2021 ; 23 ( 8 ): 1231 – 1251 . OpenUrl CrossRef PubMed 2. ↵ Louis DN , Ohgaki H , Wiestler OD , et al. The 2007 WHO classification of tumours of the central nervous system . Acta Neuropathol . 2007 ; 114 ( 2 ): 97 – 109 . OpenUrl CrossRef PubMed Web of Science 3. ↵ van den Bent MJ . Interobserver variation of the histopathological diagnosis in clinical trials on glioma: a clinician’s perspective . Acta Neuropathol . 2010 ; 120 ( 3 ): 297 – 304 . OpenUrl CrossRef PubMed Web of Science 4. ↵ Leo P , Lee G , Shih NNC , Elliott R , Feldman MD , Madabhushi A . Evaluating stability of histomorphometric features across scanner and staining variations: prostate cancer diagnosis from whole slide images . J Med Imaging (Bellingham ) . 2016 ; 3 ( 4 ): 047502 . OpenUrl PubMed 5. ↵ Louis DN , Perry A , Reifenberger G , et al. The 2016 World Health Organization Classification of Tumors of the Central Nervous System: a summary . Acta Neuropathol . 2016 ; 131 ( 6 ): 803 – 820 . OpenUrl CrossRef PubMed 6. Park SH , Won J , Kim SI , et al. Molecular Testing of Brain Tumor . J Pathol Transl Med . 2017 ; 51 ( 3 ): 205 – 223 . OpenUrl CrossRef PubMed 7. ↵ Pajtler KW , Witt H , Sill M , et al. Molecular Classification of Ependymal Tumors across All CNS Compartments, Histopathological Grades, and Age Groups . Cancer Cell . 2015 ; 27 ( 5 ): 728 – 743 . OpenUrl CrossRef PubMed 8. Hovestadt V , Remke M , Kool M , et al. Robust molecular subgrouping and copy-number profiling of medulloblastoma from small amounts of archival tumour material using high-density DNA methylation arrays . Acta Neuropathol . 2013 ; 125 ( 6 ): 913 – 916 . OpenUrl CrossRef PubMed Web of Science 9. ↵ Sahm F , Schrimpf D , Stichel D , et al. DNA methylation-based classification and grading system for meningioma: a multicentre, retrospective analysis . Lancet Oncol . 2017 ; 18 ( 5 ): 682 – 694 . OpenUrl CrossRef PubMed 10. ↵ Capper D , Jones DTW , Sill M , et al. DNA methylation-based classification of central nervous system tumours . Nature . 2018 ; 555 (7697): 469 – 474 . OpenUrl CrossRef PubMed 11. ↵ Jaunmuktane Z , Capper D , Jones DTW , et al. Methylation array profiling of adult brain tumours: diagnostic outcomes in a large, single centre . Acta Neuropathol Commun . 2019 ; 7 ( 1 ): 24 . OpenUrl PubMed 12. ↵ Priesterbach-Ackley LP , Boldt HB , Petersen JK , et al. Brain tumour diagnostics using a DNA methylation-based classifier as a diagnostic support tool . Neuropathol Appl Neurobiol . 2020 ; 46 ( 5 ): 478 – 492 . OpenUrl CrossRef PubMed 13. ↵ WHO Classification of Tumours Editorial Board . Central Nervous System Tumours . (WHO classification of tumours series, 5th ed.; vol.6). 2021 . Accessed Jun 16 2023. https://tumorclassification.iarc.who.int/chapters/45 14. ↵ Aryee MJ , Jaffe AE , Corrada-Bravo H , et al. Minfi: a flexible and comprehensive Bioconductor package for the analysis of Infinium DNA methylation microarrays . Bioinformatics . 2014 ; 30 ( 10 ): 1363 – 1369 . OpenUrl CrossRef PubMed Web of Science 15. ↵ Friedman J , Hastie T , Tibshirani R . Regularization Paths for Generalized Linear Models via Coordinate Descent . J Stat Softw . 2010 ; 33 ( 1 ): 1 – 22 . OpenUrl CrossRef PubMed Web of Science 16. ↵ Zou H , Hastie T . Regularization and variable selection via the elastic net . J R Stat Soc Series B Stat Methodol . 2005 ; 67 ( 2 ): 301 – 320 . OpenUrl CrossRef 17. ↵ Cavalcante RG , Sartor MA. annotatr: genomic regions in context . Bioinformatics . 2017 ; 33 ( 15 ): 2381 – 2383 . OpenUrl CrossRef PubMed 18. ↵ Eden E , Navon R , Steinfeld I , Lipson D , Yakhini Z . GOrilla: a tool for discovery and visualization of enriched GO terms in ranked gene lists . BMC Bioinformatics . 2009 ; 10 : 48 . OpenUrl CrossRef PubMed 19. ↵ Supek F , Bošnjak M , Škunca N , Šmuc T . REVIGO summarizes and visualizes long lists of gene ontology terms . PLoS One . 2011 ; 6 ( 7 ): e21800 . OpenUrl CrossRef PubMed 20. ↵ Vaisvila R , Ponnaluri VKC , Sun Z , et al. Enzymatic methyl sequencing detects DNA methylation at single-base resolution from picograms of DNA . Genome Res . 2021 ; 31 ( 7 ): 1280 – 1289 . OpenUrl Abstract / FREE Full Text 21. ↵ Koker AD , Van Paemel R , Wilde BD , Preter KD , Callewaert N . A versatile method for circulating cell-free DNA methylome profiling by reduced representation bisulfite sequencing . bioRxiv. Published online June 11 , 2019 : 663195 . OpenUrl 22. ↵ Van Paemel R , De Koker A , Vandeputte C , et al. Minimally invasive classification of paediatric solid tumours using reduced representation bisulphite sequencing of cell-free DNA: a proof-of-principle study . Epigenetics . 2021 ; 16 ( 2 ): 196 – 208 . OpenUrl CrossRef PubMed 23. ↵ Cortes C , Vapnik V . Support-vector networks . Mach Learn . 1995 ; 20 ( 3 ): 273 – 297 . OpenUrl CrossRef Web of Science 24. Boser BE , Guyon IM , Vapnik VN. A training algorithm for optimal margin classifiers . In: Proceedings of the Fifth Annual Workshop on Computational Learning Theory . COLT ‘92. Association for Computing Machinery; 1992 : 144 – 152 . 25. ↵ Vapnik VN . Statistical Learning Theory .; 1998 . 26. ↵ Krueger F , Andrews SR . Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications . Bioinformatics . 2011 ; 27 ( 11 ): 1571 – 1572 . OpenUrl CrossRef PubMed Web of Science 27. ↵ Akalin A , Kormaksson M , Li S , et al. methylKit: a comprehensive R package for the analysis of genome-wide DNA methylation profiles . Genome Biol . 2012 ; 13 ( 10 ): R87 . OpenUrl CrossRef PubMed 28. ↵ Krijthe JH. Rtsne: T-distributed stochastic neighbor embedding using Barnes-Hut implementation . version 013, URL https://github.com/jkrijthe/Rtsne . 29. ↵ Wickham H . ggplot2: Elegant Graphics for Data Analysis . Springer International Publishing ; 2016 . 30. ↵ Capper D , Stichel D , Sahm F , et al. Practical implementation of DNA methylation and copy-number-based CNS tumor diagnostics: the Heidelberg experience . Acta Neuropathol . 2018 ; 136 ( 2 ): 181 – 210 . OpenUrl CrossRef PubMed 31. ↵ Chen Y , Yan Y , Xu M , et al. Development of a Machine Learning Classifier for Brain Tumors Diagnosis Based on DNA Methylation Profile . Front Bioinform . 2021 ; 1 : 744345 . OpenUrl PubMed 32. ↵ Santana-Santos L , Kam KL , Dittmann D , et al. Validation of Whole Genome Methylation Profiling Classifier for Central Nervous System Tumors . J Mol Diagn . 2022 ; 24 ( 8 ): 924 – 934 . OpenUrl PubMed 33. ↵ Maros ME , Capper D , Jones DTW , et al. Machine learning workflows to estimate class probabilities for precision cancer diagnostics on DNA methylation microarray data . Nat Protoc . 2020 ; 15 ( 2 ): 479 – 512 . OpenUrl CrossRef PubMed 34. ↵ Kuschel LP , Hench J , Frank S , et al. Robust methylation-based classification of brain tumours using nanopore sequencing . Neuropathol Appl Neurobiol . 2023 ; 49 ( 1 ): e12856 . OpenUrl CrossRef PubMed 35. ↵ Choudhury A , Magill ST , Eaton CD , et al. Meningioma DNA methylation groups identify biological drivers and therapeutic vulnerabilities . Nat Genet . 2022 ; 54 ( 5 ): 649 – 659 . OpenUrl CrossRef PubMed 36. ↵ Djirackor L , Halldorsson S , Niehusmann P , et al. Intraoperative DNA methylation classification of brain tumors impacts neurosurgical strategy . Neurooncol Adv . 2021 ; 3 ( 1 ): vdab149 . OpenUrl 37. ↵ Vermeulen C , Pagès-Gallego M , Kester L , et al. Ultra-fast deep-learned CNS tumour classification during surgery . Nature. Published online October 11, 2023 . doi: 10.1038/s41586-023-06615-2 OpenUrl CrossRef PubMed 38. ↵ Meissner A , Gnirke A , Bell GW , Ramsahoye B , Lander ES , Jaenisch R . Reduced representation bisulfite sequencing for comparative high-resolution DNA methylation analysis . Nucleic Acids Res . 2005 ; 33 ( 18 ): 5868 – 5877 . OpenUrl CrossRef PubMed Web of Science 39. ↵ Shen SY , Burgener JM , Bratman SV , De Carvalho DD . Preparation of cfMeDIP-seq libraries for methylome profiling of plasma cell-free DNA . Nat Protoc . 2019 ; 14 ( 10 ): 2749 – 2780 . OpenUrl CrossRef PubMed 40. Taiwo O , Wilson GA , Morris T , et al. Methylome analysis using MeDIP-seq with low DNA concentrations . Nat Protoc . 2012 ; 7 ( 4 ): 617 – 636 . OpenUrl CrossRef PubMed 41. ↵ Stackpole ML , Zeng W , Li S , et al. Cost-effective methylome sequencing of cell-free DNA for accurately detecting and locating cancer . Nature Communications 2022 13:1. 2022 ; 13 ( 1 ): 1 – 12 . OpenUrl CrossRef PubMed 42. ↵ Liu MC , Oxnard GR , Klein EA , et al. Sensitive and specific multi-cancer detection and localization using methylation signatures in cell-free DNA . Ann Oncol . 2020 ; 31 ( 6 ): 745 . OpenUrl CrossRef PubMed 43. Jamshidi A , Liu MC , Klein EA , et al. Evaluation of cell-free DNA approaches for multi-cancer early detection . Cancer Cell . 2022 ; 40 ( 12 ): 1537 – 1549 .e12. OpenUrl CrossRef PubMed 44. ↵ Kang S , Li Q , Chen Q , et al. CancerLocator: non-invasive cancer diagnosis and tissue-of-origin prediction using methylation profiles of cell-free DNA . Genome Biol . 2017 ; 18 ( 1 ): 53 . OpenUrl CrossRef PubMed 45. ↵ Lak NSM , van Zogchel LMJ , Zappeij-Kannegieter L , et al. Cell-Free DNA as a Diagnostic and Prognostic Biomarker in Pediatric Rhabdomyosarcoma . JCO Precis Oncol . 2023 ; 7 : e2200113 . OpenUrl 46. ↵ Nassiri F , Chakravarthy A , Feng S , et al. Detection and discrimination of intracranial tumors using plasma cell-free DNA methylomes . Nat Med . 2020 ; 26 ( 7 ): 1044 – 1047 . OpenUrl CrossRef PubMed 47. Lin WC , Tsai CF . Missing value imputation: a review and analysis of the literature (2006–2017) . Artificial Intelligence Review . 2020 ; 53 ( 2 ): 1487 – 1509 . OpenUrl 48. ↵ Neftel C , Laffy J , Filbin MG , et al. An Integrative Model of Cellular States, Plasticity, and Genetics for Glioblastoma . Cell . 2019 ; 178 ( 4 ): 835 – 849 .e21. OpenUrl CrossRef PubMed 49. Wang Z , Xue X. Multi-Class Support Vector Machine . In: Ma Y , Guo G , eds. Support Vector Machines Applications . Springer International Publishing ; 2014 : 23 – 48 . View the discussion thread. Back to top Previous Next Posted October 13, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Accurate classification of CNS tumors through DNA methylation data analysis of select genomic regions Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Accurate classification of CNS tumors through DNA methylation data analysis of select genomic regions Elaheh Moradi , Juuso Vuorinen , Alejandra Rodriguez-Martinez , Meeri Pekkarinen , Miina Vulli , Suvi Lehtipuro , Vidal Fey , Francesco Tabaro , Anja Hartewig , Sinikka Ampuja , Andries De Koker , Ruben Van Paemel , Bram De Wilde , Nico Callewaert , Milla E.L. Kuusisto , Hanna-Riikka Teppo , Outi Kuittinen , Kristiina Nordfors , Hannu Haapasalo , Joonas Haapasalo , Matti Nykter , Juha Kesseli , Kirsi J. Rautajoki medRxiv 2025.10.07.25337348; doi: https://doi.org/10.1101/2025.10.07.25337348 Share This Article: Copy Citation Tools Accurate classification of CNS tumors through DNA methylation data analysis of select genomic regions Elaheh Moradi , Juuso Vuorinen , Alejandra Rodriguez-Martinez , Meeri Pekkarinen , Miina Vulli , Suvi Lehtipuro , Vidal Fey , Francesco Tabaro , Anja Hartewig , Sinikka Ampuja , Andries De Koker , Ruben Van Paemel , Bram De Wilde , Nico Callewaert , Milla E.L. Kuusisto , Hanna-Riikka Teppo , Outi Kuittinen , Kristiina Nordfors , Hannu Haapasalo , Joonas Haapasalo , Matti Nykter , Juha Kesseli , Kirsi J. Rautajoki medRxiv 2025.10.07.25337348; doi: https://doi.org/10.1101/2025.10.07.25337348 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Oncology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4420) Dentistry and Oral Medicine (443) Dermatology (381) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15212) Forensic Medicine (30) Gastroenterology (1120) Genetic and Genomic Medicine (6581) Geriatric Medicine (667) Health Economics (996) Health Informatics (4519) Health Policy (1366) Health Systems and Quality Improvement (1611) Hematology (538) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15906) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (667) Neurology (6580) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1141) Occupational and Environmental Health (956) Oncology (3323) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5429) Public and Global Health (9212) Radiology and Imaging (2192) Rehabilitation Medicine and Physical Therapy (1368) Respiratory Medicine (1194) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9fefd1a94b6741e2',t:'MTc3OTMyNzA1OQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00