Type-2 diabetes biomarker discovery and risk assessment through saliva DNA methylome

doi:10.1101/2024.12.20.24319403

Type-2 diabetes biomarker discovery and risk assessment through saliva DNA methylome

2024 · doi:10.1101/2024.12.20.24319403

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

⚙ AI-generated deep summary by claude@2026-06, 2026-06-24 · read from full text ⓘ

The study investigated whether type 2 diabetes (T2D)-associated DNA methylation signals can be detected in saliva and used for biomarker discovery and risk assessment. Using pooled whole-genome bisulfite sequencing and a cost-efficient two-step pipeline with custom targeted bisulfite sequencing, the authors found T2D-specific saliva methylation signatures enriched in immune and metabolic regulation pathways, and they identified CpG sites such as those in the ABCG1 region that showed strong potential for predicting T2D status. Targeted sequencing also enabled cell-type deconvolution, indicating minimal differences in cellular composition between diabetic and non-diabetic samples, supporting an interpretation of intrinsic methylation changes. As a preprint, a key caveat is that it has not undergone peer review, and the provided excerpt does not specify additional limitations such as sample size, replication, or external validation. The paper does not explicitly discuss endometriosis or adenomyosis; it was included in the corpus via a keyword match in the upstream search index.

Read from the paper's body, not the abstract. Not a substitute for reading the paper. No clinical advice. How this works

Full text 74,106 characters · extracted from preprint-html · click to expand

Type-2 diabetes biomarker discovery and risk assessment through saliva DNA methylome | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Type-2 diabetes biomarker discovery and risk assessment through saliva DNA methylome Wenbin Guo , Marco Morselli , Kimberly C. Paul , Michael Thompson , Beate Ritz , Matteo Pellegrini doi: https://doi.org/10.1101/2024.12.20.24319403 Wenbin Guo 1 Bioinformatics Interdepartmental Program, University of California Los Angeles , Los Angeles, CA, 90095, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Marco Morselli 2 Department of Molecular, Cell, and Developmental Biology, University of California Los Angeles , Los Angeles, CA, 90095, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kimberly C. Paul 4 Department of Epidemiology, Fielding School of Public Health, University of California Los Angeles , Los Angeles, CA, 90095, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Michael Thompson 2 Department of Molecular, Cell, and Developmental Biology, University of California Los Angeles , Los Angeles, CA, 90095, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Beate Ritz 4 Department of Epidemiology, Fielding School of Public Health, University of California Los Angeles , Los Angeles, CA, 90095, USA 5 Department of Neurology, David Geffen School of Medicine, University of California Los Angeles , Los Angeles, CA, 90095, USA 6 Department of Environmental Health, Fielding School of Public Health, University of California Los Angeles , Los Angeles, CA 90095, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Matteo Pellegrini 1 Bioinformatics Interdepartmental Program, University of California Los Angeles , Los Angeles, CA, 90095, USA 2 Department of Molecular, Cell, and Developmental Biology, University of California Los Angeles , Los Angeles, CA, 90095, USA 3 UCLA-DOE Institute for Genomics and Proteomics, University of California Los Angeles , Los Angeles, CA, 90095, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: matteop{at}ucla.edu Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract The rising prevalence of type 2 diabetes (T2D) motivates innovative strategies to deepen disease understanding and enhance diagnostic capabilities. This study measures diabetes-specific epigenetic signals in saliva, establishing saliva DNA methylome as a promising medium for T2D screening and study. By integrating comprehensive whole-genome bisulfite sequencing (WGBS) and high-depth targeted bisulfite sequencing (TBS), we developed a cost-efficient two-step approach to profiling DNA methylation at regions of interest. WGBS analysis confirmed T2D-specific methylation signatures in saliva, revealing their enrichment in immune and metabolic regulation pathways. TBS enabled accurate cell type deconvolution, revealing minimal differences in cellular composition between diabetic and non-diabetic samples, suggesting intrinsic molecular changes drive the observed methylation changes. Epigenome-wide association studies further identified significant CpG sites, notably in the ABCG1 region, with strong potential for T2D status prediction. These findings validate the saliva DNA methylome as a scalable, non-invasive resource for T2D biomarker discovery, advancing opportunities in T2D screening, risk assessment, and personalized medicine. Introduction Diabetes mellitus, a multifaceted metabolic disorder characterized by hyperglycemia, continues to pose a considerable and escalating global health challenge. According to the World Health Organization and the Centers for Disease Control and Prevention, the prevalence of diabetes has surged more than fourfold since 1980 [ 1 ], affecting approximately 529 million individuals worldwide [ 2 ] and 38.4 million in the United States in 2021 [ 3 , 4 ]. Notably, over 90% of these cases are type 2 diabetes (T2D) [ 5 ]. This alarming rise ( Figure S1 ) underscores the urgent need for deeper disease mechanism understanding, innovative diagnostic tools, as well as effective management strategies. Timely detection and intervention are crucial for managing diabetes, preventing associated complications, and reducing the economic burden on patients and healthcare systems. Recent years have witnessed burgeoning interest in the role of epigenetics underlying diabetes [ 6 , 7 , 8 ], focusing on how environmental factors and lifestyle choices can induce gene expression changes without altering the DNA sequence. Among various epigenetic modifications, DNA methylation has garnered substantial attention for its robust and dynamic nature, playing important roles in gene regulation, cell differentiation, development and maintenance of homeostasis [ 9 , 10 ]. Alterations in DNA methylation can contribute to disease and are often reflective of disease states, making them informative for disease mechanism research and diagnostic purposes [ 6 , 11 ]. In the context of T2D, DNA methylation has been implicated in its onset [ 12 , 13 ], progression [ 14 ], and complications [ 15 , 16 , 17 ], with emerging evidence highlighting its utility for diabetes risk prediction [ 18 , 19 ]. Aberrant methylation patterns are also found in key genes associated with glucose metabolism [ 20 ], insulin secretion [ 21 ], insulin resistance [ 22 ], and inflammatory responses [ 23 , 24 ]. These findings establish DNA methylation changes as valuable biomarkers for T2D, emphasizing their potential in elucidating disease mechanisms and developing novel diagnostic and treatment strategies. Despite advancements in understanding DNA methylation changes in diabetes, most studies have focused on tissues such as blood, skeletal muscle, adipose tissue, and pancreas [ 6 , 7 , 8 , 25 , 26 , 27 , 28 , 29 ], while the potential of saliva DNA methylation as a non-invasive biomarker remains underexplored. Saliva offers a particularly appealing option due to its ease of collection and high patient compliance, making it ideal for disease screening and routine monitoring. Recent studies have demonstrated a high similarity in methylation profiles between blood and saliva [ 30 , 31 ], suggesting that disease-associated epigenetic signals identified in blood may also be detectable in saliva. This evidence forms the basis of our hypothesis that the saliva methylome can serve as a valuable medium for identifying T2D biomarkers. If validated, the saliva methylome profiles could facilitate T2D screening and monitoring, paving the road for future applications in T2D diagnostics and management. A major challenge in current methylation profiling is the substantial resource demand, particularly with whole-genome bisulfite sequencing (WGBS), which remains prohibitively expensive for large-scale studies and clinical applications. While methylation microarrays offer a more affordable alternative and are widely used in DNA methylation research [ 32 , 33 ], they capture only a limited, predetermined subset of CpG sites, potentially overlooking critical regions relevant to the disease of interest. Recognizing that many CpG sites exhibit minimal variation across cell types [ 34 , 35 ] and non-cancer diseases [ 36 ], we identified an opportunity to reduce costs by selectively measuring the informative regions. In this study, we devised and implemented a cost-effective two-step strategy for T2D biomarker research ( Figure 1 ). First, pooled WGBS of saliva DNA was conducted to identify T2D-associated signals, revealing 1,358 differentially methylated regions (DMRs) between diabetic and non-diabetic groups. Building on these findings, we designed custom probes to enrich these DMRs and other informative regions for targeted bisulfite sequencing (TBS). This integrated approach synergizes the broad genomic coverage of WGBS with the high-depth profiling of TBS, enabling precise DNA methylation measurements in genomic regions OF interest. By focusing sequencing efforts on relevant targets, this approach achieves cost-efficiency and makes large-scale study and routine screening more economically feasible. Download figure Open in new tab Figure 1: Study design for saliva DNA methylome analysis in Type 2 diabetes. (A) Experimental procedure. Participants’ saliva samples were collected, followed by DNA extraction and fragmentation. Pooled samples from non-diabetic and diabetic cohorts were then subjected to whole-genome bisulfite sequencing (WGBS) to identify differentially methylated regions (DMRs) associated with T2D. Probes targeting these DMRs were synthesized and used for targeted region enrichment, followed by bisulfite conversion and sequencing in high-efficiency Targeted Bisulfite Sequencing (TBS). (B) Computational Analysis. Sequencing reads underwent preprocessing and alignment, with methylation levels quantified as the ratio of methylated cytosine (C) counts to the total counts at each CpG site. The methylation data were used for downstream analysis, including cell type deconvolution, an epigenome-wide association study, and diabetes status prediction. Our study validated the presence of T2D-associated signals in the saliva methylome for the first time and provided key biological insights into the molecular basis of T2D. WGBS analysis revealed that the identified DMRs were significantly enriched in immune and metabolic pathways, consistent with the established pathophysiology of T2D [ 37 ]. TBS provided a high-depth profiling of the targeted regions and allowed for accurate cell-type deconvolution. This analysis revealed no major differences in cell type composition between diabetic and non-diabetic samples, suggesting the observed methylation changes are likely driven by intrinsic molecular alterations rather than shifts in cellular proportions. To further investigate the molecular changes underlying T2D, an epigenome-wide association study (EWAS) was conducted on TBS data and identified 12 significant CpG sites with the top hit in the ABCG1 region, replicating and reinforcing findings from previous blood-based studies [ 32 , 38 ]. Collectively, these findings establish saliva as a robust and practical medium for T2D research, enabling the precise identification of T2D-associated biomarkers. By integrating WGBS and TBS, this approach provides a cost-efficient and scalable framework for large-scale screening and monitoring. This study underscores the transformative potential of saliva-based epigenetic approaches in advancing T2D research and diagnostic applications. Results WGBS identifies DMRs associated with diabetes in saliva To investigate DNA methylation changes associated with T2D while optimizing sequencing efficiency, we implemented a carefully designed sample pooling strategy followed by Whole Genome Bisulfite Sequencing (WGBS). In this study, we pooled 96 saliva samples with matched demographical attributes into four groups: Diabetic Male, Diabetic Female, Non-diabetic Male, and Non-diabetic Female, with a balanced sample size per group. This pooling approach ensured adequate representation of each group and enabled robust comparisons across groups at a reduced cost. The WGBS data were then processed and aligned to the human reference genome (hg38) with CpG methylation levels quantified. Downstream differential methylation region (DMR) analysis between diabetic and non-diabetic groups revealed 1358 potential DMRs out of 162833 total regions (0.8%), visualized using a volcano plot ( Figure 2A ) and a heatmap ( Figure 2B ). These findings highlight significant epigenetic variations (both hypo- and hyper-methylation) between diabetic and non-diabetic individuals. Download figure Open in new tab Figure 2: Differential methylation region and genomic region enrichment analysis for saliva WGBS data. (A) Volcano plot showing differential methylation region (DMR) analysis results, comparing diabetic group to non-diabetic controls. The x-axis represents the difference in methylation levels (Δmethylation), while the y-axis displays the −log10 p-values. Regions where both Δmethylation and the p-value exceed their respective thresholds are highlighted in red, representing hypo-methylation (left) and hyper-methylation (right). Regions where only the Δmethylation or p-value passe their corresponding threshold are shown in green and blue, respectively. Non-significant regions are depicted in gray. (B) Hierarchical clustering heatmap of DMRs’ methylation levels across diabetic and non-diabetic groups. The color scale represents z-scores, with hypo-methylated regions indicated in blue and hyper-methylated regions in red, highlighting differential methylation between the two groups. (C) Bar plot showing the genomic region enrichment analysis results of DMRs. The x-axis represents the −log10 adjusted p-value of enrichment, and the y-axis lists the enriched Gene Ontology (GO) terms of biological processes. Metabolic-related processes are highlighted in green, immune-related processes in orange, and others in gray, with notable enrichment in pathways related to cellular metabolic and immune responses. (D) Table summarizing the significantly enriched transcription factor binding sites. Each motif was ranked by significance, and the percentage of target versus background regions, p-value, adjusted p-value, and associated phenotype were provided. Genomic region enrichment analysis was conducted to elucidate the biological relevance of the identified DMRs. The results exhibited substantial enrichment in genomic regions associated with metabolic regulation and immune response pathways ( Figure 2C , Figure S2 ), underscoring their potential relevance in diabetes pathogenesis. Notably, several key pathways, such as leukocyte-mediated immunity and neutrophil activation, were significantly enriched, aligning with the current understanding of diabetes as a multifactorial disease involving intricate interactions between metabolic dysfunction and immune responses [ 37 ]. Based on the identified DMRs, we designed a set of probes (n=937) for targeted bisulfite sequencing. Motif analysis of these probe-enriched regions revealed seven significant transcription factor binding sites (p-value < 0.01, adjusted p-value < 0.1, Figure 2D ), which are associated with T2D [ 39 ] and related traits, such as glycolysis [ 40 ] and immune response [ 41 ]. This association underscores the functional relevance of the identified DMRs and enriched regions with diabetes pathophysiology. Taken together, the WGBS analysis confirmed the presence of diabetes-specific methylation signals in saliva and facilitated the screening of genomic regions enriching these signals, paving the way for efficient profiling through Targeted Bisulfite Sequencing. TBS enriches target regions with high sequencing depth To enhance efficiency in large-scale epigenetic profiling, we implemented targeted bisulfite sequencing (TBS) using a curated set of probes. This set includes probes designed to enrich the identified DMRs from WGBS analysis, as well as additional probes targeting regions associated with phenotypes such as aging, cell type, BMI and metabolic disorders [ 29 , 42 , 43 ]. A total of 8154 probes were used throughout the targeted bisulfite sequencing study, capturing ∼1M bases of the genome. Genomic coordinate overlap analysis with the existing EWAS database [ 44 , 45 ] revealed that more than 40% of the probes overlap with known EWAS sites associated with diabetes and related traits, including BMI, obesity, fasting glucose levels, insulin levels and resistance ( Figure 3A ), ensuring the capture of the diabetes-informative methylation regions. Download figure Open in new tab Figure 3: TBS captures desired region with high depth with reduced cost. (A) Pie chart illustrating the composition of the probe set (n=8154), highlighting its overlap with the differentially methylated regions (DMRs) identified in WGBS data and the public EWAS database. The probes are categorized as overlapping with DMR & EWAS (blue), DMR only (red), EWAS only (yellow), and other regions (gray). (B) Coverage plot showcasing an example of read coverage across a targeted genomic region (chr21:42,235,500-42,236,800) in a sample’s TBS data. The x-axis represents the genomic coordinates, and the y-axis shows the depth at each locus. Both Watson and Crick strands are displayed, with the targeted probe region highlighted in blue. The plus signs indicate probes designed on the Watson strand to capture the Crick strand. (C) Density plots showing the depth distribution of probes targeting diabetes DMR regions (n=937) and the total probe set (n=8154) across two batch samples. The red dashed line indicates the average depth of the enriched regions, with grey dashed lines indicating the non-enriched background regions. (D) Box plots displaying the percentage of CpG sites within the probe regions that achieve a sequencing depth greater than 10x. The plots demonstrate the efficiency of TBS in achieving high sequencing depth for the targeted regions across probe sets and batches. With the curated probe set, we conducted targeted bisulfite sequencing on two cohorts (Supplementary Data 1), aiming for 10 million reads per sample. Our results confirmed that TBS can effectively capture the targeted genomic regions with high depth ( Figure 3B ). Of note, the enriched regions exhibited an average of 1300-fold higher depth than non-enriched background regions ( Figure 3C , Figure S3 ), and over 80% of the CpG sites within the targeted regions had depth greater than 10 counts ( Figure 3D ). These findings demonstrate the remarkable efficiency of TBS in profiling targeted genomic regions with high depth while achieving cost efficiency. The successful capture of informative regions establishes TBS as a scalable solution for high-throughput epigenetic studies. Its high-depth coverage of CpG sites within targeted regions enables accurate and reliable DNA methylation quantification, ensuring robust statistical power for detecting differential signals in downstream analyses. Cell type deconvolution reveals minimal T2D-related compositional changes in saliva Both the WGBS and TBS technologies are applied to bulk saliva samples, which obscures the specific cell type abundance associated with T2D in saliva. To address this, we first assessed whether the TBS sites contain cell type information. We downloaded a WGBS dataset containing a comprehensive methylation atlas of normal human cell types [ 46 ] and identified cell type-specific regions. By overlapping with the TBS sites, we found a significant proportion of the TBS sites fell within these cell type-specific regions, sufficiently distinguishing the different cell types in saliva tissue ( Figure S4 ). To further validate the utility of these sites for cell type deconvolution, we generated in-silico mixtures of DNA methylation profiles with known cell type proportions. Using these simulated datasets, we performed cell type deconvolution analysis using the Houseman method [ 47 ] ( Figure S5A ), achieving a root mean square error (RMSE) of less than 0.01 and an R-squared value approaching to 1 ( Figure S5B ). Repeated experiments consistently showed high accuracy ( Figure S5C ), confirming that the TBS sites support accurate cell-type deconvolution. Following this validation, we applied the deconvolution method to bulk saliva TBS data to investigate cell type composition in our samples. The analysis revealed that monocytes, granulocytes, and epithelial cells were the most abundant cell types in saliva, consistent with previous literature and our reanalysis of recent single-cell RNA-seq data of human sputum tissue [ 48 ] ( Figure S6 ). Comparing cell type proportions between diabetic and non-diabetic samples, we observed no significant changes in major cell types ( Figure 4 ), except for a marginally significant difference in naïve T cells. However, this association was not significant after p-value adjustments. Our analysis also revealed that cell type proportions are highly correlated with the top Principal Components (PCs) of the DNA methylation matrix ( Figure S7 ), emphasizing the dominant role of cell proportions in the epigenetic variability [ 33 ] and echoing the importance of including these variabilities in EWAS analysis to account for cell type heterogeneity [ 49 ]. Download figure Open in new tab Figure 4: Differential cell type proportions between diabetic and non-diabetic samples. Violin plots show the difference in cell type proportions between diabetic and non-diabetic samples for each cell type after adjusting other covariates (age, sex, ethnicity, and batch). Wilcoxon p-values are annotated in each subplot, revealing no significant difference in cell proportions between the two groups, except a marginally significance for naïve T cell (p=0.022). In conclusion, our analysis demonstrated that TBS sites capture cell-type information and enable accurate cell-type deconvolution. Notably, the similar cell type proportions observed between diabetic and non-diabetic groups suggest that diabetes-related epigenetic changes in saliva are driven by intrinsic molecular alterations rather than shifts in cell composition. EWAS reveals differential DNA methylation associated with T2D status Another distinct advantage of TBS is its ability to elucidate the epigenetic mechanisms underlying diabetes at the molecular level, providing valuable insights into disease pathways and potential therapeutic targets. To demonstrate this potential, we conducted an epigenome-wide association study (EWAS) on the TBS data. In this analysis, we accounted for key covariates such as age, sex, ethnicity, study batches, and cell-type proportions, to mitigate the influence of confounding factors and identified CpG sites associated with diabetic states. The EWAS results, visualized with a Manhattan plot ( Figure 5A ) and a QQ plot ( Figure S8 ), revealed 12 CpG sites significantly associated with T2D, with 7 of these sites near genes previously implicated in diabetes pathogenesis, such as ABCG1 [ 32 ], LDLRAD4 [ 50 ], and TYK2 [ 51 ]. Figure 5B shows methylation level differences at the top CpG sites between diabetic and non-diabetic groups after adjusting for covariates. Download figure Open in new tab Figure 5: EWAS analysis identifies methylation sites associated with diabetes status. (A) Manhattan plot depicting the epigenome-wide association between DNA methylation levels and T2D status. Each dot represents a CpG site, with the −log10(p-value) plotted against its chromosomal position. The horizontal dashed line indicates the suggestive significance threshold (10 − 4 ). Genes located within a 2kb window of the top CpG sites are annotated, with established diabetes-related genes highlighted in red, such as ABCG1 , LDLRAD4 , TYK2 , etc. (B) Boxplots illustrating the methylation levels (adjusted for covariates) at selected top CpG sites. Each plot panel compares the methylation levels between diabetic (yellow) and non-diabetic (blue) samples at the specific CpG site, highlighting their potential role in diabetes pathogenesis. Notably, the strongest signal was observed in the ABCG1 gene region, corroborating a recent meta-analysis of blood-based EWAS [ 32 ] that identified ABCG1 as a top hit across five cohorts with over 3,000 samples. ABCG1 plays a crucial role in regulating lipid metabolism and cholesterol efflux, which are essential for maintaining cellular lipid homeostasis [ 52 ]. The dysfunction of ABCG1 is particularly detrimental in the context of diabetes, where impaired cholesterol efflux can exacerbate insulin resistance and promote atherosclerosis [ 53 ], a common complication of the disease. Furthermore, the accumulation of lipids can result in cellular stress and apoptosis [ 54 ], which in turn triggers an immune response and leads to chronic inflammation, further accelerating diabetes progression and increasing cardiovascular disease risk. The role of ABCG1 in lipid regulation and its broader impact on inflammation and cell viability highlight its potential as a therapeutic target in diabetes management. Our EWAS findings, particularly the significant signal at the ABCG1 region, highlight the gene’s critical role in diabetes. These results validate the utility of saliva-based DNA methylation analysis in diabetes research and emphasize the potential of these epigenetic markers as biomarkers for diagnosing diabetes, predicting risk, and informing the development of targeted therapeutic strategies. Predictive performance of individual methylation sites for T2D status To evaluate the potential of DNA methylation as a biomarker for diabetes diagnosis, we analyzed the predictive performance of individual methylation sites using ROC analysis. Figure 6 illustrates the ROC curves of all tested sites, with chr19:10380958 ( TYK2 ) and chr21:42236481 ( ABCG1 ) achieving AUC values of 0.683 and 0.681, respectively, indicating moderate predictive ability. The shaded region, representing the 95% quantile range of ROC curves across all sites, highlights the variability in predictive performance. These results demonstrate that while some individual sites show moderate performance, most exhibit weak signals, underscoring the importance of refining site selection. This validates the need for a targeted sequencing strategy, as it can effectively enrich informative loci, improving the signal-to-noise ratio and enabling precise and efficient methylation profiling. Download figure Open in new tab Figure 6: ROC curve for diabetes status classification using individual methylation sites. This ROC curve highlights the classification performance of two key methylation sites, chr19:10380958 and chr21:42236481, in predicting T2D status, with respective AUC values of 0.683 and 0.681. The shaded region denotes the 95% range of predictive performance across all other analyzed methylation sites, providing context for the highlighted sites’ relative performance, with the dashed diagonal line representing AUC 0.5 as a reference Additionally, although the predictive power of individual loci is limited, combining methylation profiles within multivariate or ensemble frameworks offers a promising path forward. Future model development should focus on integrating information across multiple loci to enhance predictive accuracy and robustness. These strategies have the potential to yield reliable, clinically actionable tools for diabetes diagnosis and risk stratification, underscoring the transformative potential of the saliva DNA methylome as a scalable, non-invasive approach for advancing T2D biomarker discovery and improving disease management. Discussion The rising prevalence of type 2 diabetes (T2D) underscores the need for innovative approaches that extend beyond traditional diagnostics to explore the molecular mechanisms underpinning the disease. Identifying reliable biomarkers and investigating epigenetic modifications, such as DNA methylation, can deepen our understanding of T2D pathogenesis, enable early detection, and inform the development of targeted therapeutic strategies. To address the need for accessible and noninvasive approaches, this study evaluated the potential of saliva DNA methylome for T2D biomarker discovery and diagnostic applications, offering insights into the molecular and cellular dynamics underlying the disease. One key challenge in methylation profiling is the high cost of obtaining informative and accurate measurements. Whole-genome bisulfite sequencing (WGBS) provides comprehensive coverage but requires high sequencing depth, making it prohibitively expensive for large-scale studies. Methylation arrays, while more affordable, capture only a fixed, small subset of CpG sites, potentially overlooking critical variations relevant to disease. To overcome these limitations, we developed a cost-efficient two-step strategy, combining WGBS to identify key regions with targeted bisulfite sequencing (TBS) for high-depth profiling. This approach significantly reduces costs while maintaining precision, making it suitable for broader and cohort-level applications. Using this combined strategy, we obtained compelling evidence supporting the use of saliva DNA methylation for T2D biomarker discovery and risk assessment. Through WGBS, we identified differentially methylated regions (DMRs) associated with T2D, particularly enriched in pathways related to immune response and metabolic regulation. These results align with existing blood-based studies [ 37 ], confirming that saliva, like blood, harbors diabetes-specific epigenetic signatures. The subsequent application of targeted bisulfite sequencing (TBS) enabled precise quantification of DNA methylation in these key regions at the cohort scale. Importantly, cell type deconvolution of the TBS data revealed no significant differences in cell proportions between diabetic and non-diabetic groups, suggesting that the observed methylation changes are primarily intrinsic rather than driven by shifts in cell composition. Further supporting these findings, an epigenome-wide association study (EWAS) conducted on the TBS data identified significant CpG sites, with the top hit in the ABCG1 gene region, consistent with prior blood-based findings [ 32 ]. Collectively, our findings provide the first validation of T2D-specific methylation signals in saliva, establishing a novel paradigm for non-invasive diabetes screening and offering valuable insights into the epigenetic basis of this prevalent disease. Despite these promising findings, our study has limitations that warrant further investigation. The relatively small sample size may have reduced the statistical power of our findings, potentially leading to missing important epigenetic signals. Expanding the sample size and including a more diverse population would enhance the robustness and generalizability of the results. Additionally, many diabetic participants were under good glycemic control, which may have attenuated the strength of detectable epigenetic changes. Future studies should include individuals at various stages of disease progression to capture a broader range of epigenetic variations. While our probe panel targeted diabetes-related sites, it could be further optimized by integrating prior knowledge to capture a wider range of diabetes-associated signals, particularly regions near genes involved in insulin signaling, glucose metabolism, and related pathways. Additionally, advanced machine learning approaches, such as ensemble and contrastive learning [ 55 , 56 ], hold promise for enhancing diagnostic model performance by effectively integrating subtle signals linked to different disease states. Addressing these limitations through larger cohorts, refined probe designs, and advanced modeling techniques will be crucial for maximizing the potential of saliva DNA methylation in diabetes research and diagnostics. Looking ahead, further research could greatly enhance the utility and impact of our approach. Advanced barcoding and multiplexing techniques, such as Time-Seq [ 57 ], could further reduce costs, making this method even more accessible for large-scale studies and routine clinical applications. The non-invasive nature of saliva collection, combined with cost-effective methylation profiling, offers a practical and scalable solution for diabetes screening and longitudinal monitoring. Conducting longitudinal studies will be critical to establish causal relationships between DNA methylation changes and T2D progression, providing deeper insights into disease mechanisms and enabling timely interventions. Ultimately, integrating saliva DNA methylation profiling into clinical practice has the potential to revolutionize diabetes diagnostics and monitoring, facilitating earlier detection, personalized treatment, and more effective disease management. In conclusion, this proof-of-concept study validates diabetes-specific epigenetic signals in saliva, establishing saliva DNA methylation as a promising biomarker source for non-invasive T2D research and screening. By employing an innovative sequencing strategy that enhances precision while reducing costs, we have made epigenetic profiling feasible for large-scale studies and clinical applications. While further research with larger, more diverse cohorts is needed, this approach lays the groundwork for transforming diabetes diagnostics and monitoring, paving the way for more personalized and accessible care. Methods Sample collection and preparation This study involved saliva samples collected as part of the Parkinson’s Environment and Genes (PEG) study [ 58 , 59 , 60 ]. While PEG is a case-control study focused on Parkinson’s disease (PD), the saliva samples utilized in this study were primarily unrelated to PD. Participants were recruited from various sources across three counties in the Central Valley of California (Kern, Fresno, and Tulare) during two study waves (2000-2007 and 2009-2015). Population controls were enrolled from the same regions using Medicare lists and residential tax assessor records. Demographic data, medical history, medication use, and lifestyle information were collected through standardized interviews. Saliva collection tubes were mailed to participants, who then returned them via shipping or during in-person examinations. For this study, samples from participants with and without type 2 diabetes were randomly selected from those available in the PEG study, ensuring that the diabetic and non-diabetic groups were matched for age, sex, and ethnicity (Supplementary Data 1). Two batches of 96-well plates were prepared: the first in 2020 (Diabetes n=48, Non-diabetic n=48) for Whole Genome Bisulfite Sequencing (WGBS), probe design, and a pilot Targeted Bisulfite Sequencing (TBS) study, and the second in 2022 (Diabetes n=42, Non-diabetic n=54) for an expanded TBS study. The batch was included as a covariate in the downstream analyses. Each individual’s saliva samples were sent to the UCLA Neuroscience Genomics Core (UNGC) for DNA extraction. Typically, 2.5 mL to 4 mL of saliva samples were collected using the Oragene saliva collection kit, followed by the standard manufacturer protocol of the Qiagen Puregene DNA extraction kit. After purification and extraction, the DNA concentration was measured using a NanoDrop 8000 spectrophotometer, and the extracted DNA samples were stored at −20°C before library preparation. Whole genome bisulfite sequencing (WGBS) To optimize cost efficiency, the extracted saliva DNA samples from the first batch were aggregated into four groups, matched by age and sex, as detailed in Supplementary Data 1. Each grouped DNA was pooled and subjected to whole genome bisulfite sequencing (WGBS) following established protocols [ 61 ]. Specifically, one microgram of purified DNA was sonicated using the Bioruptor Pico (Diagenode) for 15 cycles of 30 seconds ON and 90 seconds OFF, targeting a fragment size of 200-300 bp. The NEB Next Ultra II DNA kit (New England Biolabs) was used for subsequent end-repair, A-tailing, and ligation of pre-methylated unique-dual indexed adapters (Integrated DNA Technologies, custom synthesis). Bisulfite conversion was performed with the EZ DNA Methylation-Gold kit (Zymo Research). Final library amplification (12 PCR cycles) was conducted using KAPA HiFi U+ polymerase (Roche Sequencing) and IDT xGen Primers. Library quality was assessed using the D1000 Assay on a 4200 Agilent TapeStation, and concentrations were quantified with the Qubit dsDNA BR Assay (Life Technologies). Sequencing was conducted on a NovaSeq 6000 platform (S4 lane), generating paired end reads of 150 base pairs. WGBS data processing and DMR analysis The raw sequencing reads underwent quality control using FastQC [ 62 ], followed by adapter and low-quality base trimming with fastp [ 63 ]. The trimmed reads were aligned to the reference genome (hg38) using BSBolt [ 64 ], with PCR duplicates marked with samtools [ 65 ]. Methylation levels of CpG sites were quantified for each sample, then aggregated into a methylation matrix. For downstream analysis, only sites with at least five counts in all four pooled samples were retained. The methylation matrix is available in Supplementary Data 2. Differentially methylated region (DMR) analysis of the WGBS data was conducted using metilene [ 66 ] (version 0.2-8), with each candidate region required to contain a minimum of five CpG sites. In total, 162,833 genomic regions were analyzed. The statistical significance of methylation differences between diabetic and non-diabetic groups was evaluated using the Mann-Whitney U-test and the 2D Kolmogorov-Smirnov test for each region. Regions were considered significantly differentially methylated if they exhibited p-values below 0.01 for both tests and an absolute methylation difference exceeding 0.2 between the two groups. Supplementary Data 3 provides a comprehensive list of all candidate regions and identified DMRs, including their genomic coordinates, absolute methylation differences, and statistical significance levels. Genomic region enrichment analysis and probe design Following the identification of differentially methylated regions (DMRs) between diabetes and non-diabetes WGBS data, we conducted a genomic region enrichment analysis using the R package rGREAT [ 67 ] (version 2.4.0) in online mode. This analysis compared the DMRs against the total examined genomic regions as background, revealing significant enrichment patterns and biological relevance of the observed methylation changes. The identified DMRs were later submitted to Integrated DNA Technologies (IDT, https://www.idtdna.com/ ) for probe design, resulting in 937 custom probes targeting these regions. To further understand the regulatory context, we performed motif enrichment analysis using HOMER [ 68 ] (version 4.11), identifying enriched transcription factor binding sites (TFBS) for the probe-enriched regions. In addition to the newly designed probes, we also incorporated previously designed probes targeting regions of interest from earlier studies [ 42 , 43 ]. These probes were selected based on loci identified in public epigenome-wide association studies (EWAS) related to aging, cell types, and metabolic disorders. Due to an update in the probe set, there are slight differences between the probes used in batch 1 and batch 2, each containing a small set of extended probes labeled as ‘batch1 extended’ and ‘batch2 extended.’ The probes consistently used throughout the TBS study are collectively labeled as the ‘Total Panel’ and referred to as ‘total probes’ throughout the manuscript. The complete panel of probes, including both the Total Panel and extended probes, is detailed in Supplemental Data 4, with their sequences and target regions provided. Targeted bisulfite sequencing (TBS) For targeted bisulfite sequencing (TBS), 250 to 500 ng of purified gDNA from each sample was fragmented, and libraries were constructed following the same procedure as described in the WGBS protocol. Groups of 16 libraries, each with a unique dual index adapter, were pooled together, concentrated via SpeedVac, and subjected to targeted enrichment using custom 5’-biotinylated probes (IDT, xGen Custom Hybridization probe panel) (Supplemental Data 4). Enrichments were performed with the xGen Hybridization Capture kit (IDT), following the manufacturer’s instructions, including overnight hybridization at 65°C. Bisulfite conversion of captured DNA was conducted using the EZ Methylation Gold kit (Zymo Research). Final PCR amplification employed KAPA HiFi Uracil+ (Roche) with the following conditions: initial denaturation at 98°C for 2 minutes, followed by 16 cycles of 98°C for 20 seconds, 60°C for 30 seconds, and 72°C for 30 seconds, with a final extension at 72°C for 5 minutes. PCR products were purified using SPRI beads, and library quality control was conducted with the High-Sensitivity D1000 Assay on the 4200 Agilent TapeStation. Pools of 96 libraries were sequenced on a NovaSeq 6000 with paired-end 150-base reads. TBS data processing and quality control The raw sequencing reads of TBS data underwent a standardized preprocessing pipeline, including quality control, trimming, alignment, PCR duplicate marking, and methylation calling, as outlined in the WGBS data processing protocol. The methylation level of each CpG site is computed as follows: To ensure data quality, samples with fewer than 2.5 million unique reads post-PCR deduplication or identified as outliers through PCA on the methylation level matrix were excluded from further analysis. After the data quality control, a total of 182 samples (Diabetic n=87, Non-diabetic n=95) were retained for further investigation. We also performed quality control on the features. For epigenome-wide association studies, we focused on count data, retaining only those sites with read counts exceeding 10 in at least 80% of the samples. For analyses focused on methylation levels, such as cell deconvolution or machine learning model development, we retained only those sites that had at least 20 counts in at least 80% of the samples to ensure a reliable methylation level estimate. Missing values in the methylation level matrix were imputed using the KNN algorithm (k=5) implemented by R package impute [ 69 ] (version 1.70.0). Cell type deconvolution To ensure deconvolution accuracy, we first analyzed the cell composition in saliva using a single-cell RNA-seq dataset (GSE158055) [ 48 ] from the CELLxGENE Discover Data Portal ( https://cellxgene.cziscience.com ), confirmed the predominance of epithelial and immune cells ( Figure S6A-B ). Building on this confirmation, we then compiled a comprehensive cell type methylation reference for deconvolution by integrating Whole Genome Bisulfite Sequencing (WGBS) profiles from the DNA methylation atlas (GSE186458) [ 46 ]. This reference encompasses epithelial and key immune cell types—granulocytes, monocytes, NK cells, B cells, and T cells (CD4, CD8, and naïve)—with detailed accession IDs and labels provided in Supplementary Data 5. Cell type-specific differentially methylated regions (DMRs) were identified by comparing each cell type against all others using metilene [ 66 ] (version 0.2-8), with parameters aligned to prior DMR analyses. Regions with a methylation difference exceeding 0.3 and an adjusted p-value below 0.05 (Benjamini-Hochberg correction) were extracted, and CpG sites in these regions were used to create the cell type methylation signature matrix. The resulting matrix, which serves as a robust reference for deconvolution, is available in Supplementary Data 6. Given the limited number of CpG sites captured by TBS data, we further validated the deconvolution accuracy on TBS data using synthetic DNA methylation profiles. We generated 100 in-silico samples by mixing DNA methylation profiles of cell types with known proportions, with random gaussian noise (mean = 0, standard deviation = 0.05) added to mimic the variability in methylation levels due to sequencing noise. These synthetic datasets were then filtered to include only the CpG sites presented in the TBS methylation level matrix. Deconvolution was performed using the Houseman method [ 47 ], a well-established NNLS (non-negative least squares) approach for estimating cell-type compositions from bulk methylation data. The deconvolution accuracy was assessed by comparing the estimated and true cell proportions using key metrics, including R-squared and root mean square error (RMSE), confirming the precision and reliability of the method for TBS sites. This validated framework was then applied to deconvolve cell-type compositions in bulk saliva samples. Detailed cell proportions for each sample are provided in Supplementary Data 7. Epigenome-wide association study To prioritize the risk CpG sites associated with T2D, we conducted an epigenome-wide association study (EWAS) using the methylation read counts data using R package DSS [ 70 ]. Specifically, the DSS package utilized a beta-binomial modeling strategy to model the methylated counts Y i based on the total counts N i for site i by where π i , ϕ i are the mean and dispersion parameter for site i . The mean parameter was modeled as where g (·) is the link function and X j for j = 1 ,…, J are the covariates (including the variable of interest and other covariates). By testing the coefficient β j = 0 using the F-test, the significance level of association between diabetes status and methylation of site i can be assessed. For our analysis, we used the methylation count matrix for the EWAS analysis, including age, sex, ethnicity, batch, and cell proportions as covariates, and tested whether a site is associated with diabetes. A Manhattan plot is used to visualize the testing results. Genes within 2kb window of the most prominent sites that passed the with suggestive p-value (10 −4 ) is annotated on to the plot. Detailed methylation count matrix and test result are available in Supplementary Data 2. View this table: View inline View popup Download powerpoint Table 1: Characteristics of the study population. Data availability The raw data will be released upon manuscript publication. Author contributions W.G., M.M., and M.P. conceived the research idea. W.G. performed downstream data analysis and wrote the manuscript with the help of M.M. and K.C.P. M.M. conducted the WGBS experiment, initiated the WGBS analysis and probe design, and edited the manuscript. M.T. helped with the data preprocessing and manuscript editing. K.C.P. and B.R. contributed to sample preparation, data collection, and manuscript editing. M.P. supervised the entire research. Competing interests M.P. founded ProsperK9. Supplementary Figures Download figure Open in new tab Figure S1: The increasing prevalence of diabetes across U.S. counties from 2004 to 2020. Choropleth map displaying the escalating diabetes prevalence in U.S. counties from 2004 (A), through 2012 (B), to 2020 (C), which underscores the growing public health challenge and the need for targeted interventions. The color gradient indicates the percentage of the population with diabetes, with darker colors representing increasing prevalence, as shown in the accompanying legend (4% to 20%). County-level diabetes prevalence data was obtained from the United States Diabetes Surveillance System ( https://gis.cdc.gov/grasp/diabetes/DiabetesAtlas.html ). Download figure Open in new tab Figure S2: GO pathway enrichment for DMR regions in WGBS analysis. Genomic region enrichment analysis for differentially methylated regions (DMRs) identified in the Whole Genome Bisulfite Sequencing (WGBS) data. The bar plot presents enriched GO terms categorized by Biological Process (BP), Cellular Component (CC), and Molecular Function (MF) ontologies, with the x-axis showing the −log10 of the adjusted p-values. Only GO terms with an adjusted p-value below 0.05 are displayed. The analysis highlights significant associations of DMRs with various biological processes, particularly those related to metabolic functions and immune responses. Download figure Open in new tab Figure S3: Depth distribution of targeted regions across probe sets in TBS. Density plots illustrate the depth distributions of different probe sets across two sample batches (upper panel: batch1, lower panel, batch2). Each subplot corresponds to a specific probe group—Diabetes, EPIC, EWAS, Opool, and SNPs—with the number of probes indicated in parentheses. Red dashed lines indicate the average depth for each probe set, with grey lines showing non-enriched background regions, underscoring the high efficiency of target enrichment achieved by targeted bisulfite sequencing (TBS). Download figure Open in new tab Figure S4: Cell-type specific methylation signatures at TBS sites. Heatmap showing the methylation profiles of various cell types, restricted to CpG sites that overlap with targeted bisulfite sequencing (TBS) data. Each column represents a sample from a specific cell type, with cell types indicated by the color bar at the top: B cells, epithelial cells, granulocytes, monocytes, NK cells, cd4+, cd8+, and naive T cells. The z-scores reflect relative methylation levels, with red indicating hypermethylation and blue indicating hypomethylation. The distinct clustering patterns in the heatmap confirm that TBS sites retain sufficient cell type identity information, allowing for a clear distinction between cell types. This demonstrates the efficacy of TBS in capturing cell type-specific epigenetic signatures, reinforcing its utility for studying cellular heterogeneity. Download figure Open in new tab Figure S5: Simulated validation of TBS sites for accurate cell deconvolution. Simulation of cell deconvolution using targeted bisulfite sequencing (TBS) sites to confirm their support for accurate cell type estimation. (A) Schematic of the simulation workflow for cell deconvolution. The process begins with a true cell type proportion matrix, C, and a reference cell type methylation matrix, R, which are combined with random errors, E, to generate a simulated methylation matrix, M. This matrix is then refined to include only the sites overlapping with TBS data, creating a reduced methylation matrix, M’. A deconvolution algorithm is subsequently applied to estimate cell type proportions, C’, from the reduced methylation matrix. The accuracy of the deconvolution is then evaluated by comparing these estimated proportions with the true proportions. (B) Scatter plots showing the deconvolution accuracy across different cell types in a single simulated exaperiment, demonstrating high deconvolution accuracy. (C) Results from repeating the simulation 100 times, consistently showing high R 2 values and low RMSE across all cell types, confirming that TBS sites robustly support accurate cell type deconvolution. ( R 2 : coefficient of determination; RMSE: Root Mean Squared Error) Download figure Open in new tab Figure S6: Cell type compositions in saliva: reanalysis of existing scRNA-seq dataset and TBS deconvolution results. (A-B) Cell type composition in saliva was revealed by the reanalysis of a previous single-cell RNA sequencing dataset from human sputum [ 48 ]. (A) UMAP plot displaying distinct clusters of cells, each colored according to its identified cell type. (B) Pie chart showing the abundance of cell type proportions, with Monocytes, Epithelial cells, and Neutrophils being the most abundant, followed by smaller populations of other immune cells. The reanalysis results validate the major cell types, confirming that immune cells and Epithelial cells are predominant in saliva samples. (C) Boxplots illustrating the cell type proportions in two batches (batch 1 and batch 2) derived from deconvolution analysis of bulk TBS data, highlighting the reproducibility across different batches. The deconvolution results show a similar pattern to the scRNA-seq findings, with Granulocytes, Monocytes, and Epithelial cells constituting most of the cell population. In contrast, other immune cells are present in lower proportions. The alignment between the scRNA-seq reanalysis and TBS data deconvolution results supports the reliability of deconvolution analysis. The differences in quantitative proportions may be attributed to inherent sample variation and technology biases, such as the scRNA-seq conducted on sputum from COVID-19 patients, which could have altered cell proportions and capture preferences. Download figure Open in new tab Figure S7: Correlation heatmap between methylation principal components (mPCs) and demographical and cellular variables. Heatmap illustrating the correlations between the top 10 methylation principal components (mPC1 to mPC10) and various demographical and cellular proportions. The color intensity and size of the squares represent the strength of the correlation, with blue indicating positive correlations and red indicating negative correlations, as shown by the color scale on the right. These correlations suggest that sex, age, ethnicity, and cell proportions are dominant factors of DNA methylation variations in the TBS data. Download figure Open in new tab Figure S8: Quantile-Quantile (Q-Q) plot for EWAS analysis. The Q-Q plot compares observed −log10(p-values) from the EWAS with expected values under the null hypothesis. Points along the diagonal indicate concordance between observed and expected p-values, while deviations from the diagonal, particularly at the upper tail, suggest the presence of CpG sites with significant associations that exceed what would be expected by chance. The plot shows a slight deviation from the diagonal in the higher −log10(p-value) range, indicating the presence of true associations in the dataset. Acknowledgements We would like to thank all the participants who donated samples and make this study possible. We appreciate Dr. David Wong from School of dentistry at UCLA for his valuable insights and discussion. We also thank the UCLA Neuroscience Genomics Core (UNGC) and Broad Stem Cell Sequencing Core (BRSRC) for their help in DNA extraction, library preparation, WGBS and TBS data generation. Figure 1 was created with BioRender.com , thus acknowledging it by courtesy. References [1]. ↵ B Zhou et al. “Worldwide Trends in Diabetes since 1980: A Pooled Analysis of 751 Population-Based Studies with 4·4 Million Participants” . In: The Lancet 387.10027 (Apr. 2016 ), pp. 1513 – 1530 . OpenUrl [2]. ↵ Kanyin Liane Ong et al. “Global, Regional, and National Burden of Diabetes from 1990 to 2021, with Projections of Prevalence to 2050: A Systematic Analysis for the Global Burden of Disease Study 2021” . In: The Lancet 402.10397 (July 2023 ), pp. 203 – 234 . OpenUrl [3]. ↵ Centers for Disease Control and Prevention . National Diabetes Statistics Report . (accessed on August, 2024 ). URL: https://www.cdc.gov/diabetes/php/data-research/index.html . [4]. ↵ Diabetes . ( accessed on August, 2024 ). URL: https://www.niddk.nih.gov/health-information/diabetes . [5]. ↵ Ehtasham Ahmad et al. “Type 2 Diabetes” . In: The Lancet 400.10365 (Nov. 2022 ), pp. 1803 – 1820 . OpenUrl [6]. ↵ Yan-Lin Wu et al. “Epigenetic Regulation in Metabolic Diseases: Mechanisms and Advances in Clinical Study” . In: Signal Transduction and Targeted Therapy 8.1 (Mar. 2023 ), pp. 1 – 27 . OpenUrl [7]. ↵ Charlotte Ling , Karl Bacos , and Tina Rö nn . “Epigenetics of Type 2 Diabetes Mellitus and Weight Change — a Tool for Precision Medicine?” In: Nature Reviews Endocrinology 18.7 (July 2022 ), pp. 433 – 448 . OpenUrl [8]. ↵ Charlotte Ling and Tina Rö nn . “Epigenetics in Human Obesity and Type 2 Diabetes” . In: Cell Metabolism 29.5 (May 2019 ), pp. 1028 – 1044 . OpenUrl [9]. ↵ Maxim V. C. Greenberg and Deborah Bourc’his . “The Diverse Roles of DNA Methylation in Mammalian Development and Disease” . In: Nature Reviews Molecular Cell Biology 20.10 (Oct. 2019 ), pp. 590 – 607 . OpenUrl [10]. ↵ Zachary D. Smith and Alexander Meissner . “DNA Methylation: Roles in Mammalian Development” . In: Nature Reviews Genetics 14.3 (Mar. 2013 ), pp. 204 – 220 . OpenUrl [11]. ↵ Mary L. Stackpole et al. “Cost-Effective Methylome Sequencing of Cell-Free DNA for Accurately Detecting and Locating Cancer” . In: Nature Communications 13.1 (Sept. 2022 ), p. 5566 . OpenUrl [12]. ↵ John C Chambers et al. “Epigenome-Wide Association of DNA Methylation Markers in Peripheral Blood from Indian Asians and Europeans with Incident Type 2 Diabetes: A Nested Case-Control Study” . In: The Lancet Diabetes & Endocrinology. Onset 3.7 (July 2015 ), pp. 526 – 534 . OpenUrl [13]. ↵ Gidon Toperoff et al. “Genome-Wide Survey Reveals Predisposing Diabetes Type 2-Related DNA Methylation Variations in Human Peripheral Blood” . In: Human Molecular Genetics. Onset 21.2 (Jan. 2012 ), pp. 371 – 383 . OpenUrl [14]. ↵ Linn Gillberg and Charlotte Ling . “The Potential Use of DNA Methylation Biomarkers to Identify Risk and Progression of Type 2 Diabetes” . In: Frontiers in Endocrinology 6 (Mar. 2015 ), p. 43 . OpenUrl PubMed [15]. ↵ Kelly Yichen Li et al. “DNA Methylation Markers for Kidney Function and Progression of Diabetic Kidney Disease” . In: Nature Communications 14.1 (May 2023 ), p. 2543 . OpenUrl [16]. ↵ Zhuo Chen et al. “DNA Methylation Mediates Development of HbA1c-associated Complications in Type 1 Diabetes” . In: Nature Metabolism 2.8 (Aug. 2020 ), pp. 744 – 762 . OpenUrl [17]. ↵ Colette Christiansen et al. “Enhanced Resolution Profiling in Twins Reveals Differential Methylation Signatures of Type 2 Diabetes with Links to Its Complications” . In: eBioMedicine 103 (May 2024 ). [18]. ↵ Sangeeta Dhawan and Rama Natarajan . “Epigenetics and Type 2 Diabetes Risk” . In: Current Diabetes Reports 19.8 (Aug. 2019 ), p. 47 . OpenUrl [19]. ↵ Yipeng Cheng et al. “Development and Validation of DNA Methylation Scores in Two European Cohorts Augment 10-Year Risk Prediction of Type 2 Diabetes” . In: Nature Aging 3.4 (Apr. 2023 ), pp. 450 – 458 . OpenUrl [20]. ↵ Paulina Baca et al. “DNA Methylation and Gene Expression Analysis in Adipose Tissue to Identify New Loci Associated with T2D Development in Obesity” . In: Nutrition & Diabetes. Glucose Metabolism 12.1 (Dec. 2022 ), pp. 1 – 7 . OpenUrl [21]. ↵ Karl Bacos et al. “Blood-Based Biomarkers of Age-Associated Epigenetic Changes in Human Islets Associate with Insulin Secretion and Diabetes” . In: Nature Communications 7.1 (Mar. 2016 ), p. 11089 . OpenUrl [22]. ↵ Hannah Maude , Claudia Sanchez-Cabanillas , and Inês Cebola . “Epigenetics of Hepatic Insulin Resistance” . In: Frontiers in Endocrinology. Insulin Resistance 12 (May 2021 ). [23]. ↵ Qiyou Ding et al. “Inflammation-Related Epigenetic Modification: The Bridge Between Immune and Metabolism in Type 2 Diabetes” . In: Frontiers in Immunology. Inflammation 13 (May 2022 ). [24]. ↵ Bao-Yi Shao et al. “Epigenetics and Inflammation in Diabetic Nephropathy” . In: Frontiers in Physiology. Inflammation 12 (May 2021 ). [25]. ↵ Amita Bansal and Sara E Pinney . “DNA Methylation and Its Role in the Pathogenesis of Diabetes” . In: Pediatric Diabetes 18.3 (May 2017 ), pp. 167 – 177 . OpenUrl [26]. ↵ Cajsa Davegå rdh et al. “DNA Methylation in the Pathogenesis of Type 2 Diabetes in Humans” . In: Molecular Metabolism 14 (Feb. 2018 ), pp. 12 – 25 . OpenUrl PubMed [27]. ↵ Sanabil Ali Hassan Ahmed et al. “The Role of DNA Methylation in the Pathogenesis of Type 2 Diabetes Mellitus” . In: Clinical Epigenetics 12.1 (Dec. 2020 ), p. 104 . OpenUrl [28]. ↵ Nikhil Nadiger et al. “DNA Methylation and Type 2 Diabetes: A Systematic Review” . In: Clinical Epigenetics 16.1 (May 2024 ), p. 67 . OpenUrl [29]. ↵ Luz D Orozco et al. “Epigenome-Wide Association in Adipose Tissue from the METSIM Cohort” . In: Human Molecular Genetics 27.10 (May 2018 ), pp. 1830 – 1846 . OpenUrl [30]. ↵ Patricia R. Braun et al. “Genome-Wide DNA Methylation Comparison between Live Human Brain and Peripheral Tissues within Individuals” . In: Translational Psychiatry. Similarity 9.1 (Dec. 2019 ), p. 47 . OpenUrl [31]. ↵ Shota Nishitani et al. “Cross-Tissue Correlations of Genome-Wide DNA Methylation in Japanese Live Human Brain and Blood, Saliva, and Buccal Epithelial Tissues” . In: Translational Psychiatry. Similarity 13.1 (Feb. 2023 ), p. 72 . OpenUrl [32]. ↵ Eliza Fraszczyk et al. “Epigenome-Wide Association Study of Incident Type 2 Diabetes: A Meta-Analysis of Five Prospective European Cohorts” . In: Diabetologia 65.5 (May 2022 ), pp. 763 – 776 . OpenUrl [33]. ↵ Mary E Sehl , et al. “Systematic dissection of epigenetic age acceleration in normal breast tissue reveals its link to estrogen signaling and cancer risk” . In: bioRxiv ( 2024 ), pp. 2024 – 10 . [34]. ↵ Michael J. Ziller et al. “Charting a Dynamic DNA Methylation Landscape of the Human Genome” . In: Nature 500.7463 (Aug. 2013 ), pp. 477 – 481 . OpenUrl [35]. ↵ Joshua Moss et al. “Comprehensive Human Cell-Type Methylation Atlas Reveals Origins of Circulating Cell-Free DNA in Health and Disease” . In: Nature Communications 9.1 (Dec. 2018 ), p. 5068 . OpenUrl [36]. ↵ Tasnim Dayeh et al. “Genome-Wide DNA Methylation Analysis of Human Pancreatic Islets from Type 2 Diabetic and Non-Diabetic Donors Identifies Candidate Genes That Influence Insulin Secretion” . In: PLOS Genetics 10.3 (Mar. 2014 ), e1004160 . OpenUrl [37]. ↵ Ralph A. DeFronzo et al. “Type 2 Diabetes Mellitus” . In: Nature Reviews Disease Primers 1.1 (July 2015 ), pp. 1 – 22 . OpenUrl [38]. ↵ Tasnim Dayeh et al. “DNA Methylation of Loci within ABCG1 and PHOSPHO1 in Blood DNA Is Associated with Future Type 2 Diabetes Risk” . In: Epigenetics 11.7 (July 2016 ), pp. 482 – 488 . OpenUrl [39]. ↵ Paul V. Sabatini et al. “Neuronal PAS Domain Protein 4 Suppression of Oxygen Sensing Optimizes Metabolism during Excitation of Neuroendocrine Cells” . In: Cell Reports 22.1 (Jan. 2018 ), pp. 163 – 174 . OpenUrl [40]. ↵ Xiangying Luo et al. “LHX9, a P53-Binding Protein, Inhibits the Progression of Glioma by Suppressing Glycolysis” . In: Aging (Albany NY) 13.18 (Sept. 2021 ), pp. 22109 – 22119 . OpenUrl [41]. ↵ Florian Renoux et al. “The AP1 Transcription Factor Fosl2 Promotes Systemic Autoimmunity and Inflammation by Repressing Treg Development” . In: Cell Reports 31.13 (June 2020 ), p. 107826 . OpenUrl [42]. ↵ Marco Morselli et al. “DNA Methylation Profiles in Pneumonia Patients Reflect Changes in Cell Types and Pneumonia Severity” . In: Epigenetics 17.12 (Dec. 2022 ), pp. 1646 – 1660 . OpenUrl [43]. ↵ Giulia Protti et al. “The Methylome of Buccal Epithelial Cells Is Influenced by Age, Sex, and Physiological Properties” . In: Physiological Genomics 55.12 (Dec. 2023 ), pp. 618 – 633 . OpenUrl [44]. ↵ Thomas Battram et al. “The EWAS Catalog: A Database of Epigenome-Wide Association Studies” . In: Wellcome Open Research 7 (May 2022 ), p. 41 . OpenUrl PubMed [45]. ↵ Mengwei Li et al. “EWAS Atlas: A Curated Knowledgebase of Epigenome-Wide Association Studies” . In: Nucleic Acids Research 47.D1 (Jan. 2019 ), pp. D983 – D988 . OpenUrl [46]. ↵ Netanel Loyfer et al. “A DNA Methylation Atlas of Normal Human Cell Types” . In: Nature. Wgbs 613.7943 (Jan. 2023 ), pp. 355 – 364 . OpenUrl [47]. ↵ Eugene Andres Houseman et al. “DNA Methylation Arrays as Surrogate Measures of Cell Mixture Distribution” . In: BMC Bioinformatics 13.1 (Dec. 2012 ), p. 86 . OpenUrl [48]. ↵ Xianwen Ren et al. “COVID-19 Immune Features Revealed by a Large-Scale Single-Cell Transcriptome Atlas” . In: Cell 184.7 (Apr. 2021 ), 1895 – 1913.e19 . OpenUrl [49]. ↵ Andrew E Jaffe and Rafael A Irizarry . “Accounting for Cellular Heterogeneity Is Critical in Epigenome-Wide Association Studies” . In: Genome Biology 15.2 ( 2014 ), R31 . OpenUrl [50]. ↵ Marijana Vujkovic et al. “Discovery of 318 New Risk Loci for Type 2 Diabetes and Related Vascular Outcomes among 1.4 Million Participants in a Multi-Ancestry Meta-Analysis” . In: Nature Genetics 52.7 (July 2020 ), pp. 680 – 691 . OpenUrl [51]. ↵ Vikash Chandra et al. “The Type 1 Diabetes Gene TYK2 Regulates β -Cell Development and Its Responses to Interferon- α ” . In: Nature Communications 13.1 (Oct. 2022 ), p. 6363 . OpenUrl [52]. ↵ Matthew A. Kennedy et al. “ABCG1 Has a Critical Role in Mediating Cholesterol Efflux to HDL and Preventing Cellular Lipid Accumulation” . In: Cell Metabolism 1.2 (Feb. 2005 ), pp. 121 – 131 . OpenUrl [53]. ↵ Karin E. Bornfeldt and Ira Tabas . “Insulin Resistance, Hyperglycemia, and Atherosclerosis” . In: Cell metabolism 14.5 (Nov. 2011 ), pp. 575 – 585 . OpenUrl [54]. ↵ Alicia J. Jenkins , Maria B. Grant , and Julia V. Busik . “Lipids, Hyperreflective Crystalline Deposits and Diabetic Retinopathy: Potential Systemic and Retinal-Specific Effect of Lipid-Lowering Therapies” . In: Diabetologia 65.4 (Apr. 2022 ), pp. 587 – 603 . OpenUrl [55]. ↵ Xinming Tu , et al. “A Supervised Contrastive Framework for Learning Disentangled Representations of Cell Perturbation Data” . In: bioRxiv ( 2024 ), pp. 2024 – 01 . [56]. ↵ Abubakar Abid and James Zou . “Contrastive variational autoencoder enhances salient features” . In: arXiv preprint arXiv:1902.04601 ( 2019 ). [57]. ↵ Patrick T. Griffin et al. “TIME-seq Reduces Time and Cost of DNA Methylation Measurement for Epigenetic Clock Construction” . In: Nature Aging. TIME-seq 4.2 (Feb. 2024 ), pp. 261 – 274 . OpenUrl [58]. ↵ Yu-Hsuan Chuang et al. “Parkinson’s Disease Is Associated with DNA Methylation Levels in Human Blood and Saliva” . In: Genome Medicine 9.1 (Aug. 2017 ), p. 76 . OpenUrl [59]. ↵ Aline Duarte Folle et al. “Clinical Progression in Parkinson’s Disease with Features of REM Sleep Behavior Disorder: A Population-Based Longitudinal Study” . In: Parkinsonism & Related Disorders 62 (May 2019 ), pp. 105 – 111 . OpenUrl PubMed [60]. ↵ Kimberly C. Paul et al. “A Pesticide and iPSC Dopaminergic Neuron Screen Identifies and Classifies Parkinson-relevant Pesticides” . In: Nature Communications 14.1 (May 2023 ), p. 2803 . OpenUrl [61]. ↵ Marco Morselli et al. “In Vivo Targeting of de Novo DNA Methylation by Histone Modifications in Yeast and Mouse” . In: eLife 4 (Apr. 2015 ). Ed. by Bing Ren , e06205 . OpenUrl CrossRef PubMed [62]. ↵ FastQC: A Quality Control Tool for High Throughput Sequence Data . URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ . [63]. ↵ Shifu Chen et al. “Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor” . In: Bioinformatics 34.17 (Sept. 2018 ), pp. i884 – i890 . OpenUrl [64]. ↵ Colin Farrell , et al. “BiSulfite Bolt: A bisulfite sequencing analysis platform” . In: GigaScience 10.5 ( 2021 ), giab033 . OpenUrl [65]. ↵ Heng Li et al. “The Sequence Alignment/Map Format and SAMtools” . In: Bioinformatics . Samtools 25.16 (Aug. 2009 ), pp. 2078 – 2079 . OpenUrl [66]. ↵ Frank Jü hling et al. “Metilene: Fast and Sensitive Calling of Differentially Methylated Regions from Bisulfite Sequencing Data” . In: Genome Research 26.2 (Feb. 2016 ), pp. 256 – 262 . OpenUrl [67]. ↵ Zuguang Gu and Daniel Hü bschmann . “rGREAT: An R/Bioconductor Package for Functional Enrichment on Genomic Regions” . In: Bioinformatics 39.1 (Jan. 2023 ), btac745 . OpenUrl [68]. ↵ Sven Heinz et al. “Simple Combinations of Lineage-Determining Transcription Factors Prime Cis -Regulatory Elements Required for Macrophage and B Cell Identities” . In: Molecular Cell 38.4 (May 2010 ), pp. 576 – 589 . OpenUrl [69]. ↵ Impute . URL: http://bioconductor.org/packages/impute/ . [70]. ↵ Yongseok Park and Hao Wu . “Differential Methylation Analysis for BS-seq Data under General Experimental Design” . In: Bioinformatics 32.10 (May 2016 ), pp. 1446 – 1453 . OpenUrl View the discussion thread. Back to top Previous Next Posted December 21, 2024. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Type-2 diabetes biomarker discovery and risk assessment through saliva DNA methylome Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Type-2 diabetes biomarker discovery and risk assessment through saliva DNA methylome Wenbin Guo , Marco Morselli , Kimberly C. Paul , Michael Thompson , Beate Ritz , Matteo Pellegrini medRxiv 2024.12.20.24319403; doi: https://doi.org/10.1101/2024.12.20.24319403 Share This Article: Copy Citation Tools Type-2 diabetes biomarker discovery and risk assessment through saliva DNA methylome Wenbin Guo , Marco Morselli , Kimberly C. Paul , Michael Thompson , Beate Ritz , Matteo Pellegrini medRxiv 2024.12.20.24319403; doi: https://doi.org/10.1101/2024.12.20.24319403 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (574) Allergy and Immunology (865) Anesthesia (304) Cardiovascular Medicine (4462) Dentistry and Oral Medicine (445) Dermatology (383) Emergency Medicine (611) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1517) Epidemiology (15251) Forensic Medicine (31) Gastroenterology (1132) Genetic and Genomic Medicine (6621) Geriatric Medicine (669) Health Economics (1002) Health Informatics (4564) Health Policy (1372) Health Systems and Quality Improvement (1617) Hematology (544) HIV/AIDS (1272) Infectious Diseases (except HIV/AIDS) (15938) Intensive Care and Critical Care Medicine (1107) Medical Education (624) Medical Ethics (147) Nephrology (670) Neurology (6643) Nursing (346) Nutrition (1001) Obstetrics and Gynecology (1149) Occupational and Environmental Health (957) Oncology (3350) Ophthalmology (981) Orthopedics (369) Otolaryngology (421) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1698) Pharmacology and Therapeutics (694) Primary Care Research (714) Psychiatry and Clinical Psychology (5465) Public and Global Health (9259) Radiology and Imaging (2212) Rehabilitation Medicine and Physical Therapy (1372) Respiratory Medicine (1199) Rheumatology (598) Sexual and Reproductive Health (716) Sports Medicine (533) Surgery (715) Toxicology (100) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a03e828b082458f4',t:'MTc4MDE1MjE5Mw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2024) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00