Mutational landscape of pure ductal carcinoma in situ and associations with disease prognosis and response to radiotherapy

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 94,843 characters · extracted from preprint-html · click to expand
Mutational landscape of pure ductal carcinoma in situ and associations with disease prognosis and response to radiotherapy | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Mutational landscape of pure ductal carcinoma in situ and associations with disease prognosis and response to radiotherapy Noor Rizvi , Eliseos J. Mucaki , Emily L. Salmini , Monica Zhang , Sabina Trebinjac , Ezra Hahn , Lawrence Paszat , Sharon Nofech-Mozes , Michael T. Hallett , Eileen Rakovitch , View ORCID Profile Vanessa Dumeaux doi: https://doi.org/10.1101/2025.03.01.25323122 Noor Rizvi 1 Department of Biochemistry, Western University , London, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Eliseos J. Mucaki 1 Department of Biochemistry, Western University , London, Canada 2 Department of Anatomy and Cell Biology, Western University , London, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Emily L. Salmini 1 Department of Biochemistry, Western University , London, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Monica Zhang 2 Department of Anatomy and Cell Biology, Western University , London, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sabina Trebinjac 3 Department of Radiation Oncology, Sunnybrook Health Sciences Centre, University of Toronto , Toronto, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ezra Hahn 3 Department of Radiation Oncology, Sunnybrook Health Sciences Centre, University of Toronto , Toronto, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lawrence Paszat 3 Department of Radiation Oncology, Sunnybrook Health Sciences Centre, University of Toronto , Toronto, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sharon Nofech-Mozes 4 Department of Pathology, Sunnybrook Health Sciences Centre, University of Toronto , Toronto, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Michael T. Hallett 1 Department of Biochemistry, Western University , London, Canada 5 Department of Oncology, Lawson Research Institute, London Health Sciences Centre , London, Canada 6 Centre for Translational Cancer Research, Western University , London, Canada . Find this author on Google Scholar Find this author on PubMed Search for this author on this site Eileen Rakovitch 3 Department of Radiation Oncology, Sunnybrook Health Sciences Centre, University of Toronto , Toronto, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Vanessa Dumeaux 1 Department of Biochemistry, Western University , London, Canada 2 Department of Anatomy and Cell Biology, Western University , London, Canada 5 Department of Oncology, Lawson Research Institute, London Health Sciences Centre , London, Canada 6 Centre for Translational Cancer Research, Western University , London, Canada . Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Vanessa Dumeaux For correspondence: vdumeaux{at}uwo.ca Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Background Managing Ductal Carcinoma in Situ (DCIS) remains challenging due to the lack of reliable biomarkers to predict radiotherapy (RT) response, leading to both overtreatment of indolent disease and undertreatment of aggressive cases. Results Through whole-exome sequencing of 147 DCIS cases, we characterized the genomic landscape of pure DCIS and identified genetic alterations associated with the risk of recurrence, either in-situ or invasive. DCIS lesions harbored frequent mutations in established cancer drivers ( PIK3CA , TP53 ) and genes regulating tissue architecture, which likely enhanced pre-invasive cell fitness but lacked prognostic value. A subset of younger patients exhibited distinct mutational processes characterized by high mutational burden, though these were not linked to recurrence risk. Across the cohort, five mostly mutually exclusive genes ( SH2B2 , PDZD8 , MYO7A , MUCL3 , DNASE2B ), involved in cell adhesion, membrane organization, and DNA degradation, were significantly associated with 10-year risk of local recurrence. In RT-treated patients, we identified 27 additional mutated genes uniquely associated with recurrence, along with SH2B2 and MUCL3 . Most of these genes were involved in cytoskeletal regulation, cell adhesion, and cell-environment interactions. Mutations in metabolic regulators ( MGAM2 and AADACL3 ) and REV1 , which mediates DNA damage tolerance, may impair cellular responses to RT-induced stress. Notably, we identified distinct genes prognostic for in-situ versus invasive recurrence: nineteen genes predominantly involved in tissue structural maintenance in in-situ relapse, and thirteen genes primarily affecting cell-cycle and genome-stability pathways in invasive progression. Copy number analyses revealed that pure DCIS exhibits molecular subtype-specific patterns characteristic of invasive disease, with novel alterations associated with recurrence, including three non-adjacent gains and five losses in regions harboring oncogenes, tumor suppressor genes, and genes regulating structural integrity, cell-cell adhesion and interactions. Conclusions While TP53 , PIK3CA , and recurrent copy number alterations represent early events in tumorigenesis, they lack prognostic value in pure DCIS, underscoring the need for alternative biomarkers. Our findings identify key genetic alterations associated with local recurrence and RT resistance. We further uncovered distinct molecular programs underlying in-situ versus invasive recurrence, with mutations affecting tissue structural maintenance in in-situ relapse and cell-cycle/genome-stability pathways in invasive progression. Introduction Ductal Carcinoma in Situ (DCIS) is a non-invasive, non-obligate precursor of invasive breast cancer characterized by clonal proliferation of neoplastic cells confined within the breast ducts [ 1 ]. DCIS is primarily detected through screening and diagnostic mammograms due to its characteristic presentation with microcalcifications. The widespread implementation of mammographic screening programs has led to a substantial increase in DCIS detection, with incidence rates rising from four to eleven cases per 100,000 women between 1993 and 2007 [ 2 , 3 ]. This trend is expected to continue as screening programs expand to include younger women [ 4 ]. There are currently no definitive markers to predict which cases will progress to invasive, life-threatening lesions [ 5 , 6 ]. Consequently, treatment, usually involving breast-conserving surgery (BCS) followed by breast radiation therapy (RT), is recommended for all women diagnosed with DCIS [ 5 ]. However, many DCIS would not transit to invasive life-threatening disease even if left untreated [ 7 ]. The absence of known markers predictive of a patient benefit to RT results in both over-treatment for indolent lesions and under-treatment for some aggressive DCIS likely to evolve to an invasive state. Markers that identify such cases could enable tailored treatments, such as full mastectomy or adjuvant systemic therapies for patients at risk of in situ or invasive local recurrence (LR) despite receiving RT [ 8 ], or more frequent watch-and-wait strategies for those with indolent lesions. Cancer emerges from the accumulation of genetic aberrations in neoplastic cells and genomic instability [ 9 ]. Mutations can disrupt normal cellular processes, leading to uncontrolled cell growth, impaired DNA repair, and other aberrant cellular behaviours that may contribute to cancer development [ 9 ]. Previous studies have revealed important insights into DCIS progression by highlighting specific mutations and chromosomal alterations that may influence the progression towards invasive disease [ 10 – 17 ]. Many of these studies, however, focus on synchronous DCIS – concurrent presentation of DCIS and invasive ductal carcinoma (IDC). These studies explore the overlap between DCIS and IDC, and recognize the potential differences between them. However, these findings might be reflective of a timepoint beyond the evolutionary bottleneck, and the full repertoire of mediators of the transition from DCIS to IDC or associated with a future risk of LR cannot be established from these studies [ 10 ]. TP53 and PIK3CA [ 11 – 17 ] are among the most frequently observed mutations in these studies [ 11 – 17 ] and are often characterized as key drivers of tumorigenesis, promoting growth and spread of cancer cells. However, this may not necessarily be the case, as mutant field clonalization could equip pre-malignant cells with additional (epi-) genetic mutations that confer fitness advantages, allowing them to expand into ductal regions without directly driving invasive behavior [ 10 ]. This suggests that driver mutations, alongside frequently mutated genes, may serve as these early fitness enhancers rather than drivers of progression. Several copy number aberrations (CNAs) have also been identified by these studies [ 10 – 15 , 17 – 20 ] including frequent gains at 1q, 8q, 11q, and 17q, and losses at 16q, however the specific associations of these CNAs to DCIS prognosis remain unclear. To address these critical gaps, we conducted comprehensive exome sequencing analysis of 147 pure DCIS cases, including patients treated with and without RT, to investigate markers of local recurrence within 10 years of diagnosis. Our study characterizes the mutational landscape in pure DCIS and identifies novel genomic alterations associated with tumor grade, molecular subtypes, and patient age. Most importantly, we discovered specific variants and CNAs predictive of local recurrence risk, including genetic markers associated with RT response. These findings provide insights into the genomic determinants of DCIS prognosis and treatment response, establishing a foundation for improved risk stratification and personalized treatment strategies for DCIS patients. Results A unique cohort of pure DCIS patients We assembled a cohort of 147 pure DCIS patients treated with BCS, with or without subsequent RT, incorporating comprehensive genomic profiling through whole-exome DNA analysis of primary DCIS tumors and matched normal tissues ( Table 1 ). The study design was balanced to include at least a third of patients who experienced an ipsilateral invasive or in-situ local recurrence within a 10-year follow-up period and about half received radiotherapy as part of their standard-of-care ( Table 1 ). In clinical settings, RT is omitted in some patients with low-risk features of DCIS or due to patient preference. The median time to recurrence was 4.2 years for invasive disease and 2.1 years for in-situ disease. Most tumors were of intermediate to high grade spanning all five molecular subtypes, with normal-like and luminal A subtypes more frequently observed in patients without local recurrence within 10 years ( Table 1 ). A minority of tumors exhibited multifocality (24.5%) and positive margins (6.8%). Clinical characteristics were comparable between RT-treated and untreated patients across all variables except age, with women aged 60 years or older less likely to receive RT ( Supplementary Table 1 ). View this table: View inline View popup Table 1. Patient and tumor clinical attributes Mutational landscape of DCIS reveals distinct processes driving high mutational burden in early-onset cases The mutational load of pure DCIS lesions varied considerably, ranging from. 3 to 3,482 non-synonymous variants per sample (median 75) The majority (74.2%) were missense mutations, constituting approximately 52K unique variants identified in ∼14K genes. Most of these genes were not frequently mutated, with 2,030 genes (14.5%) harboring non-synonymous aberrations in at least 5 patients. As expected, most mutations are of C>T type (barplot in Fig. 1 ), a common mutational pattern attributed to the spontaneous deamination of 5-methylcytosine, a process frequently observed in many cancer genomes [ 21 , 22 ]. Spontaneous deamination can be exacerbated in FFPE samples due to DNA damage [ 23 ]. FFPE-related mutational artefacts are known to resemble certain COSMIC signatures [ 24 ], such as SBS30 and SBS1 [ 25 ]. During library preparation, formalin-induced DNA lesions are chemically repaired with unrepaired profiles resembling SBS30 and repaired profiles resembling SBS1 [ 25 ]. While SBS30 was not identified in our dataset, SBS1 was detected in a large number of samples ( Fig. 1 ). SBS1, commonly found in tumor genomes, is difficult to distinguish from repaired FFPE-related artifacts due to their high similarity [ 22 , 25 ]. However, previous research has demonstrated that mutational profiles from repaired FFPE samples closely match true tumor mutational profiles [ 25 ], and we expect that our careful processing of mutation calling-pipeline minimizes the impact of these artefacts [ 23 ]. Download figure Open in new tab Fig. 1: Mutational Signatures in pure DCIS samples. The top panel shows tumor mutational burden (TMB) with distribution of point mutation types. Clinical annotations display 10-year local recurrence outcome, tumor grade, molecular subtype, and patient age. The bottom heatmap depicts the row-scaled relative contribution of single base substitution (SBS) signatures detected in at least 10 samples (z-score). Samples (columns) and signatures (rows) are ordered based on a rank-sum statistic that maximizes the coherence of contribution patterns. The method first splits signatures into two groups using medoid clustering. For each signature, relative contributions are ranked within each group, and the final sample ordering is determined by the average rank-sum across all signatures. Interestingly, a subset of DCIS samples exhibit a significantly higher tumor mutational burden (TMB), characterized by an increase in frequency of C>G, T>C, and C>A mutations, and enrichment of specific SBS signatures including SBS26, SBS7b, and SBS5 ( Fig. 1 , 12 samples on the right). The causes underlying these mutational signatures remain incompletely understood. However, SBS26 has been linked to impaired mismatch repair and microsatellite instability. These cases were not more likely to have a recurrence, of a higher grade or of a specific molecular subtype but were predominantly found in younger patients (8 out of 12 under 50 years; Chi-square test p-value < 0.05). These findings highlight distinct mutational processes in some early-onset DCIS cases, though these molecular features were not associated with prognosis. Pure DCIS is associated with high frequency of mutations in genes involved cell adhesion, polarity, tissue structure and function Analysis of pure DCIS revealed distinct patterns of recurrent mutations across multiple genes ( Fig. 2 ). PIK3CA was the most frequently mutated gene (15% of cases), followed by FSIP2 and KIR3DL3 (14%). We also identified functional gene groups among the most frequently mutated genes including several motor genes converting chemical energy to mechanical force ( DNAH12, DNHD1 , and MYOB15; 12% each), collagen genes ( COL18A1 and COL4A3 ; 12% and 10%, respectively), and mucin genes ( MUC3A , MUC4 , MUC22 , and MUC5AC ) also showed alterations (10-12% of cases each). While mucin genes are typically large and can accumulate mutations by chance, these specific mucin genes were not identified as FLAGS (FrequentLy mutAted GeneS) and therefore not excluded in our analysis (See Methods). Given our stringent rules for mutation calling, these findings suggest that mutations in mucin and other genes controlling epithelial-components including cellular morphology, epithelial function and adhesion are central to the physiopathology of pure DCIS. Download figure Open in new tab Fig. 2: The most frequently mutated genes and association with clinico-pathological variables in pure DCIS patients. a The top 50 most frequent non-synonymous small variants identified in pure DCIS lesions. Samples are in columns and variants are color-coded based on their classification. The TMB for each lesion is displayed at the top of the heatmap. Samples are organized by age at diagnosis, with additional clinico-pathological features—grade and subtype—depicted at the bottom. b Mutated genes significantly associated with specific clinico-pathological variables (False discovery rate [FDR] < 0.001). The barplots show the proportion of mutated samples within specific categories: age group (left), high grade (middle), and Her2-subtype (right). We also identified a few mutated genes enriched in distinct clinico-pathological groups including patients with early-onset DCIS ( FILP1L , CFAP61 , FREM1 ) or later-onset ( ERBB4 ), high-grade lesions ( TP53 ) and Her2-enriched subtype ( ASH1L , NAGPA , DMD ) ( Figure 2B ). Collectively, these findings highlight that pure DCIS harbors frequent mutations in genes governing tissue architecture and cell-cell interactions, suggesting these alterations may be fundamental to DCIS development. Proportions for each category are compared to the proportions of mutated samples in the other respective groups (shown in grey). Established cancer driver genes are present in pure DCIS but lack prognostic capacity To identify potential cancer driver genes, we analyzed the ratio of non-synonymous to synonymous mutations (dN/dS) across all genes, which can indicate positive selection of mutations that provide growth advantages to cancer cells [ 26 ]. This analysis identified two significantly mutated driver genes in pure DCIS : PIK3CA and TP53 (FDR < 0.05). Both genes were also among the most frequently mutated genes ( Fig. 2a , red-labelled genes). While PIK3CA mutations were not enriched in specific molecular subtypes, TP53 mutations were significantly more frequent in basal-like and HER2-enriched subtypes compared to other subtypes (19% in basal-like [4/21], 33% in HER2-enriched [6/18] vs 5% in other subtypes [4/78], Chi-square test p < 0.005). Out of 34 samples carrying a mutation in at least one of these driver genes, 15 samples harbored at least one potentially actionable alteration as indicated by OncoKB ( Supplementary Fig. 1 ). The mutation spectrum in PIK3CA was dominated by the activating H1047R hotspot mutation (∼50%) in the kinase domain of exon 21, known to enhance PI3K-mitigated pathway signaling ( Supplementary Fig. 1a ) [ 27 , 28 ]. Similarly, TP53 mutations clustered in known hotspots within the DNA-binding domain, which may result in the loss of tumor suppression by affecting its ability to bind to DNA ( Supplementary Fig. 1b ). Despite their established roles in cancer progression, none of these driver mutations was associated with 10-year local recurrence risk (Firth’s penalized likelihood Cox regression p > 0.5), suggesting that additional factors influence DCIS prognosis. Identification of mutations associated with increased local recurrence risk independent of treatment To identify potential prognostic markers, we analyzed mutated genes associated with the risk of invasive or in-situ local recurrence in the ipsilateral breast occurring between 6 months and 10 years after diagnosis. Our survival analysis identified five biomarkers each significantly associated with increased 10-year local recurrence risk (Firth’s penalized likelihood Cox regression p-value < 0.01 & permuted p-value < 0.05; Fig. 3a-b ). These mutations, occurring in 4-7% of cases, were largely mutually exclusive and occurred across molecular subtypes and grades. Download figure Open in new tab Fig. 3: Gene variants associated with an increased 10-year local recurrence risk in DCIS. a Samples are in columns and variants are color-coded based on their classification. Local recurrence status and clinico-pathological characteristics of each lesion are depicted at the bottom. Local recurrence is defined as any recurrence, in situ or invasive, in the ipsilateral breast occurring between 6 months and 10 years after diagnosis. Right-hand side reports the proportion of each variant classification type. b Hazard ratio and confidence intervals for each significant mutated gene associated with increased 10-year local recurrence risk (Firth’s penalized likelihood Cox regression). c Kaplan-Meier analysis of local recurrence-free survival (LRFS) comparing patients with mutations in at least one of the 5 genes (red) versus those without mutations (blue). d Cellular component Gene Ontology (GO) terms annotations for each significant gene. The presence of mutations in at least one of these genes was significantly associated with increased recurrence risk ( Fig. 3c , log-rank p-value p4.4, Fig. 3b ). Gene Ontology analysis revealed that most genes are involved in multiple interrelated cellular processes including cell adhesion ( MYO7A , SH2B2 ), organization and function of the cellular membrane ( PDZD8 , MUCL3 , MYO7A ), and cellular organization and cytoskeleton ( MYO7A , PDZD8 ) ( Fig. 3d ). Additionally, DNASE2B, a member of the DNase II family of endonucleases, was identified among the significant genes. These findings underscore how cytoskeletal reorganization, changes in cell structure, and compromised cell adhesion might contribute to increased risk of recurrence within 10 years after a DCIS diagnosis. Mutations in genes governing cytoskeletal organization and membrane dynamics associated with radiotherapy resistance To identify predictive biomarkers for RT response, we analyzed mutations associated with 10-year local recurrence in a cohort restricted to patients who received RT. This analysis revealed 29 genes significantly associated with an increased risk of recurrence (F irth’s penalized likelihood Cox regression p-value < 0.05 & permuted p-value < 0.05; Fig. 4a ). Notably, these mutations often co-occurred, with at least two mutated genes present in approximately 27% of lesions (20/73 patients treated with RT). Download figure Open in new tab Fig. 4: Mutations associated with radiotherapy resistance in DCIS. a An oncoplot showing the distribution of mutations in 29 genes significantly associated with local recurrence in RT-treated patients. Color-coding indicates mutation types; clinical annotations show age, grade, molecular subtype, and 10-year recurrence status. b Kaplan-Meier analysis of LRFS comparing patients with mutations in at least two of the 29 genes (red) versus those without mutations (blue). c Distribution of TMB, shown as number of non-synonymous variants, log2 scale) for lesions with at least one mutation in two genes associated with LRFS colored by recurrence status at 10 years. d GO cellular component enrichment network analysis of recurrence-associated genes in RT-treated DCIS. Network visualization shows enriched GO terms (FDR 2), and edges indicate significant semantic similarity between terms. Node color intensity corresponds to enrichment significance. In RT-treated patients, lesions harboring mutations in at least two of these genes exhibited a markedly increased risk of local recurrence (Firth’s penalized likelihood Cox regression HR = 4.8, 95% CI: 2.1–11.2, p = 0.0002), with most mutation-positive recurrences occurring within 5 years post-RT ( Fig. 4b ). Notably, while mutations in SH2B2 and MUCL3 were significantly associated with prognosis in both the overall cohort and the RT-treated subgroup, an RT-stratified analysis revealed that lesions harboring two mutations in the remaining 26 genes (which were altered in at least five lesions in the no-RT group) were predictive of prognosis only in RT-treated patients (p 0.5) ( Supplementary Fig. 2 ). Given the frequent co-occurrence of these mutations, we assessed whether overall TMB might explain the increased recurrence risk. Although several high-TMB lesions were observed among patients with these mutations ( Fig. 4a,c ), TMB itself was not significantly associated with local recurrence risk (Firth’s penalized likelihood Cox regression p = 0.8). Indeed, many lesions with high TMB remained recurrence-free over 10 years, suggesting that specific mutations—not overall mutation load—drive LR after RT. We further examined potential confounding by clinico-pathological variables (i.e. age, grade, tumor size, multifocality and subtype) by adjusting our survival model for each factor. The predictive value of these mutations remained significant (Firth’s penalized likelihood Cox regression p-values < 0.005), supporting their role as an independent risk factor for recurrence following RT. Functional analysis revealed a network of interrelated cellular functions involved in actin cytoskeleton regulation, cellular polarity, and membrane dynamics—processes essential for invasive behavior ( Fig. 4d ). Alterations in SH2B2 and SORBS1 , which modulate stress fiber formation and actin remodeling, and in PTPN13 and WWC1 , which affect actomyosin tension at the cell leading edge, suggest disruptions in cytoskeletal dynamics. Additional genes associated with prognosis in RT-treated include KRT1 , MUCL3 , TMEM132C , TTLL10 , EML5 , CEP128 , CFAP47 , and KIF24 , all of which contribute to maintaining cellular architecture, polarity, and adhesion ( Fig. 4a ). In addition, alterations in metabolic regulators—such as MGAM2 and AADACL3 — could influence the energetic and biosynthetic demands required in response to RT-induced stress, while REV1 is implicated in DNA damage tolerance, a critical safeguard following genotoxic stress. Collectively, these findings highlight the pivotal roles of cytoskeletal reorganization, membrane remodeling, metabolic regulation, and DNA repair pathways in the response to RT. The prognostic significance of these mutations in RT-treated patients suggests that these gene alterations may mediate resistance mechanisms triggered by RT-induced stress. Differential mutational associations with in-situ versus invasive local recurrence after radiotherapy To investigate whether individual mutations are preferentially linked to either in-situ or invasive LR, we refitted the gene-based survival models using each LR type as the sole endpoint (the alternative event was censored) and stratified the analyses by RT. For the full cohort we had retained genes with p < 0.01 and permutation p < 0.05; for the smaller RT-treated group we used p < 0.05 and permutation p < 0.05. As in the main analysis, many significant associations were found in RT-treated patients, with some overlap with significant genes found in the whole cohort ( Fig. 5a ). Download figure Open in new tab Fig. 5: Differential mutational associations with in-situ versus invasive local recurrence after radiotherapy a An UpSet plot showing the number of genes significantly associated with risk of invasive, in-situ or any local recurrence (LR) in the whole cohort (all) and in RT-treated (RT) or untreated patients (noRT), with intersections between gene lists indicated by connected dots below and corresponding size of the intersection depicted in the barplot on the right. b Oncoplot showing mutations in genes uniquely associated with in-situ LR (n=19) in RT-treated patients. Color-coding indicates mutation types; clinical annotations show age, grade, molecular subtype, and recurrence status. c Similar to b but showing mutations in genes uniquely associated with invasive LR (n=13) in RT-treated patients. d Kaplan-Meier analysis of in-situ (left) or invasive (right) LR-free survival comparing patients with mutations in at least two of the 19 genes uniquely associated with in-situ LR (red) versus those without mutations (blue). e Similar to d but comparing in-situ or invasive LR-free survival between patients with mutations in at least two of the 13 genes uniquely associated with invasive LR versus those without mutations. Three genes— MUCL3 , SH2B2 and C6orf118 —were associated with outcome in at least three out of six analyses ( Fig. 5a ). In RT-treated tumours, fifteen genes were associated with in-situ LR or with “any LR”, and two genes ( CFAP47 , KRT1 ) were associated with invasive or with “any LR” ( Fig. 5a ). Nineteen additional genes were uniquely associated with in-situ LR ( Fig. 5b ), whereas thirteen were unique to invasive LR ( Fig. 5c ) within the RT subgroup. The Kaplan-Meyer analyses demonstrate that patients whose lesions harbor mutations in at least two genes from either the in-situ-specific or invasive-specific gene sets show significantly worse LR-free survival only for the corresponding recurrence type ( Fig. 5d,e ). Functional annotation indicates that mutations uniquely linked to in-situ LR occur mainly in genes that preserve epithelial architecture and mechanosensing: anchoring-junction components that couple cells to neighbouring cells or to the extracellular matrix ( DMD , FOCAD ), the stretch-activated channel PIEZO1, and scaffolds/adaptors localised to actin-rich membrane projections such as invadopodia ( SHANK3 , PRAG1 ). The set also includes WDFY4, a WD-repeat/FYVE-domain autophagy adaptor implicated in MHC-II antigen presentation. Conversely, mutations associated with invasive LR after RT mapped to genes involved in late cytokinetic abscission ( PDCD6IP ), centriole-to-centrosome maturation ( CEP295 ), DNA-damage sensing and repair ( SETX , TNKS1BP1 ), calcium-regulated motility ( CACNA1I ) and cytokine-dependent inflammatory signalling ( TYK2 ). Because stratifying simultaneously by treatment and LR type reduces sample size and event numbers, we cannot exclude the possibility that some endpoint-specific associations—or the absence of others—reflect differences in statistical power. Nevertheless, these findings suggest that distinct biological programs—structural maintenance versus cell-cycle, genome-stability and inflammatory pathways—underlie in-situ and invasive patterns of recurrence after RT. Copy number alterations display molecular subtype-specific patterns with select genomic regions linked to local recurrence Analysis of copy number alterations (CNAs) revealed recurrent chromosomal changes similar to those reported in invasive breast cancer ( Fig. 6a ). Significant gains were identified on chromosomal arms 1q, 8q, 16p, 17q, 20p, and 20q, while losses predominantly occurred on 8p, 9p, 11q, 13q, 14q, 16q, and 17p (binomial test, FDR < 0.05). Download figure Open in new tab Fig. 6: Frequent copy number alterations in pure DCIS and associations with 10-year LR risk a Genome-wide frequency of absolute copy number gains and losses across chromosomes 1-22 (1Mb window). The estimated ploidy for each sample is subtracted from the copy number values of each segment which means that a copy number of 0 is no copy number change. The y-axis shows the percentage of samples with each alteration type. Asterisks indicate statistical significance for chromosomal arm alterations b Heatmap showing copy number profiles per cytoband across samples (rows), ordered by global CNA score (GCS). Sample annotations include age, grade, molecular subtype and local recurrence status at 10 years. c Heatmap of twenty genomic regions significantly associated with 10-year risk of local recurrence (Firth-corrected cox model p-value < 0.05 adjusted for grade), showing copy number status. CNAs present in at least 5 patients are shown, with copy number gains in red and losses in blue. Global CNA burden varied considerably across samples, with approximately half showing minimal alterations (global CNA score < −0.58; Fig. 6b ). Basal-like tumors exhibited greater CNA burden, while normal-like tumors showed fewer alterations ( Fig. 6b , Supplementary Fig. 2a ). Similarly, high-grade lesions showed a greater CNA burden compared to low-grade lesions ( Supplementary Fig. 3b ). We identified distinct CNA patterns across molecular subtypes: basal-like tumors showed enrichment for gains on 8q, 13q, and 19q; LumA tumors frequently exhibited 16q loss; and Her2-enriched tumors showed characteristic 17q12 gains corresponding to the ERBB2 locus (Fisher’s exact test, p < 0.005; Supplementary Fig. 3c ). Samples and regions were clustered using Ward’s hierarchical clustering with Minkowski distance metric. Fourteen cytoband regions were significantly associated with increased 10-year local recurrence risk after grade adjustment (Firth’s penalized likelihood Cox regression p-value & permuted p-values < 0.05; Fig. 6c ). These included copy number losses in five non-adjacent cytobands and gains in three non-adjacent cytobands. When analyzing invasive local recurrence specifically, four regions were shared (losses in 5q32-33, 9p21, 18p11), with two additional losses identified in 9p13 and 8q11 and one gain in 5p15 (Firth’s penalized likelihood Cox regression p-value < 0.01 & permuted p-value < 0.05). Copy number losses could alter critical tumor suppressor functions, as evidenced by the presence of several well-known tumor suppressor genes within these regions. Notable examples include CDKN2A at 9p21 (a key regulator of cell cycle progression), PRDM4 at 12q23-24 (involved in cell differentiation and tumorigenesis) [ 29 ], SPARC at 5q33 (important for extracellular matrix synthesis and modulation of cell shape) [ 30 ] and MITOSTATIN at 12q24 (a mitochondrial protein with tumor suppressor activity) [ 31 ]. Additional loss highlights disruptions in cell adhesion and intercellular contacts as loss of 18p11 may affect DAL-1, a known regulator of cell adhesion and link between cell membrane and cytoskeleton [ 32 , 33 ]. Gains were identified in regions harboring oncogenes or potential cancer-related genes. For example, amplification in the 11q region—including 11q13, which contains CCND1 and CTTN — previously associated with breast cancer aggressiveness [ 34 , 35 ]. Additionally, a gain in 5p14 includes four cadherin genes ( CDH10 , CDH9 , CDH12 , CDH18 ) and PRDM9 , whose aberrant expression has been associated with genomic instability [ 36 ]. Collectively, these findings demonstrate that the CNAs associated with 10 year local recurrence not only affect regions harboring established tumor suppressors and oncogenes but also target regions involved in cell structural integrity and cell-cell interactions, potentially contributing to an aggressive and treatment-resistant phenotype. Contextualizing our genomic findings with prior profiling studies: consistent early mutational drivers, novel subtype-specific CNA patterns, and predictors of RT response Most prior DCIS genomic studies analyzed small cohorts (<100 cases) and primarily focused on DCIS cases with concurrent invasive disease (synchronous DCIS) rather than pure DCIS ( Supplementary Table 2 ). While recent larger studies, such as those by Strand et al. (2022) [ 20 ] and Kader et al. (2024) [ 37 ], examined hundreds of pure DCIS cases, their reliance on low-pass sequencing or lack of matched normal tissues limits the sensitivity for detecting genomic changes, particularly when working with FFPE samples, which are often the only available option for DCIS studies. Across studies, PIK3CA and TP53 consistently emerged as the most frequently mutated genes in DCIS (and invasive disease), with mutation rates ranging from 21–55% and 17–52%, respectively ( Supplementary Tables 2-3 ). In our cohort, PIK3CA and TP53 were also among the most frequently mutated genes and identified as tumorigenesis drivers in pure DCIS. Mutations in GATA3 and ERBB2, which were frequently reported in several studies, were only detected in a small number of lesions in our cohort (n = 3 and 7, respectively, Supplementary Table 3 ). Overall, we observe very little overlap between genes reported as frequently mutated across studies ( Supplementary Table 3 ). Differences across studies likely reflect variations in methodology, including variant-calling pipelines, sample types (e.g., synchronous vs. pure DCIS), cohort composition (e.g., histological grade and ER/HER2 status), and our study’s specific focus on LR and RT response, complicating direct comparisons. Nevertheless, the consistent identification of PIK3CA and TP53 as the most frequently mutated genes across studies and in our cohort reinforces their pivotal roles in the early stages of breast tumorigenesis. Recurrent CNAs in regions such as 1q, 8q, and 17q gains, as well as 8p, 11q, and 16q losses, were observed in our study, consistent with prior DCIS findings, including those from Strand et al. (2022) [ 20 ] and Abba et al. (2015) [ 15 ]. For example, Strand et al. (2022) identified 29 recurrent CNAs in DCIS but found no single CNA predictive of recurrence [ 20 ]. In contrast, we identified twenty genomic regions significantly associated with 10-year local recurrence risk, including regions containing known tumor suppressors and oncogenes. These differences may reflect the higher resolution of our sequencing approach (100x whole-exome sequencing) compared to the low-pass sequencing used in prior studies. Our study also provides valuable new insights into molecular subtype-specific CNA patterns in DCIS. In our cohort, basal-like tumors exhibited gains on 8q, 13q, and 19q; Luminal A tumors displayed 16q loss; and Her2-enriched tumors showed 17q12 gains encompassing ERBB2 . Aside from amplifications in HER2-positive subtypes, subtype-specific CNA analyses remain poorly represented in the DCIS literature but are well established in invasive breast cancer ( see Supplementary Text ). These findings suggest that genetic and molecular aberrations defining subtypes likely arise early and are at least partially established at the DCIS stage. Finally, while several studies have explored prognostic markers of recurrence, few account for treatment variation, and none specifically examined markers of RT response ( Supplementary Table 2 ). Our study uniquely identified genetic alterations within a gene network that integrates cytoskeletal integrity, cell-cell interactions, cell adhesion, and metabolism which are associated with an increased risk of local recurrence within five years following RT. These findings suggest that these alterations may play a critical role in resistance mechanisms activated by RT-induced stress. Discussion Our study provides a comprehensive analysis of pure DCIS, focusing on DNA profiles associated with disease prognosis and response to RT. While previous research has primarily examined genomic changes in synchronous DCIS compared to IDC or pure DCIS, our work offers a detailed view of the mutational landscape specific to pure DCIS and its association with LR. Importantly, we identified genomic alterations associated with molecular subtypes and, most critically, LR—both independent of treatment and specifically in cases following RT— highlighting potential molecular drivers of treatment outcomes. We also observed distinct mutational processes in early-onset DCIS with high tumor mutational burden potentially driven by impaired mismatch repair, but these were not associated with prognosis. While driver genes like TP53 and PIK3CA are critical in the early stages of tumorigenesis, our study did not find any significant associations between their mutations and disease progression or response to treatment. These findings suggest that TP53 and PIK3CA mutations are essential for overcoming initial biological constraints during early tumor development but may become less relevant to progression once these barriers are surpassed. This aligns with prior research showing that these mutations are frequently retained in invasive carcinoma but occur at higher prevalence in hyperplastic and DCIS lesions [ 38 ]. Furthermore, their prognostic relevance appears to depend on tumor clinicopathological characteristics. For example, Lin et al. [ 11 ] observed an inverse association between PIK3CA kinase domain mutations in high-grade DCIS tumors and progression, while Silwal-Pandit et al. [ 39 ] reported that the prognostic impact of TP53 mutations varied across molecular subtypes of invasive breast cancer. These findings underscore the complexity of breast cancer progression, where early driver mutations interact with additional genetic and microenvironmental factors to shape disease outcomes. We identified recurrent CNAs frequently reported in the literature, including gains on 8q [ 10 – 12 , 14 , 15 , 37 , 40 ], 17q [ 10 , 12 , 15 , 18 – 20 , 37 , 40 ], 20q [ 37 , 40 ], and losses on 11q [ 10 – 12 , 18 , 19 , 37 ] and 16q [ 10 , 13 , 14 , 20 , 40 ]. These CNAs mirror those observed in invasive ductal carcinoma, suggesting that pure DCIS already harbors genomic features characteristic of invasive cancer—including alterations that define molecular subtypes (e.g., basal, luminal-A, or Her2-enriched tumors; see Supplementary Text ). Moreover, this observation is consistent with earlier transcriptional and epigenetic studies that show subtype-specific alterations in gene expression and DNA methylation are already evident in DCIS [ 41 , 42 ]. These findings suggest that genetic and molecular aberrations defining subtypes likely emerge early - being at least partially established at the DCIS stage - and support a model of subtype-specific progression from DCIS to invasive breast cancer [ 41 ]. Importantly, our study also revealed novel associations between mutations in genes regulating cytoskeletal integrity, cell membrane organization and function, cell-cell interactions, and cell adhesion, and the risk of LR in pure DCIS. Across the cohort, five genes were significantly associated with LR, underscoring their potential roles in tumor progression. These mutations were not frequently co-occurring in the same patient, and involved genes maintaining cell adhesion ( MYO7A , SH2B2 ), organizing and supporting cellular organization, cytoskeleton and membrane function ( PDZD8 , MUCL3 , MYO7A ). When the cohort was stratified by RT administration, 27 unique genes - excluding SH2B2 and MUCL3 - were significantly associated with increased LR risk in RT-treated patients. Many of these mutations were identified in genes critical for maintaining cellular architecture, polarity, and adhesion, such as KRT1 , MUCL3 , TMEM132C , TTLL10 , EML5 , CEP128 , CFAP47 , and KIF24 , highlighting how disruptions in structural integrity and epithelial cell organization may undermine the effectiveness of RT and contribute to LR. In addition to structural changes, metabolic regulators like MGAM2 and AADACL3 may influence the energetic and biosynthetic demands required for cell survival and adaptation under genotoxic conditions. Alterations in REV1 , a key player in DNA damage tolerance, underscore the importance of effective DNA repair mechanisms following RT. These findings expand upon prior studies linking disruptions in tissue structure and cell adhesion to DCIS progression [ 43 – 46 ], while providing novel insights into their specific roles in recurrence following RT in pure DCIS. Notably, our analysis also revealed distinct biological programs underlying in-situ versus invasive recurrence following RT. Specifically, while some mutations in genes involved in structural maintenance and mechanosensory pathways were uniquely associated with in-situ relapse, invasive progression was characterized by alterations in cell-cycle regulation, genome stability, and inflammatory signaling. Because stratifying by both treatment and LR type reduced our statistical power, these molecular distinctions will require confirmation in larger cohorts. Finally, we uniquely identified losses in cytobands harboring established tumor suppressor genes—such as CDKN2A in 9p21 and PRDM4 in 12q23–24—as well as regions containing genes crucial for cell adhesion and migration, including a cluster of keratin genes in 12q13–14 and DAL-1 in 18p11. In parallel, gains in regions like 11q, which includes oncogenic drivers such as CCND1 and CTTN , suggest that oncogene amplification may further contribute to tumor aggressiveness and ultimately patient prognosis. Gains at 5p14, containing cadherin genes ( CDH10 , CDH9 , CDH12 , CDH18 ), again emphasize the importance of maintaining epithelial integrity to prevent LR in pure DCIS patients. Future studies are warranted to investigate how these mutations interact with other molecular pathways and microenvironmental factors to elucidate their contribution to DCIS prognosis and the adverse effects of RT. Such research could help identify strategies to mitigate recurrence risk following RT and improve treatment outcomes [ 47 – 49 ]. Conclusions Our findings uncover the genomic landscape of pure DCIS and highlight key factors that contribute to LR and mediate the adverse effects of RT. While TP53 and PIK3CA mutations play important roles in early tumorigenesis, they do not predict recurrence, emphasizing the need for alternative biomarkers. We identify distinct mutational processes and genetic alterations that disrupt cytoskeletal integrity, cell-cell interactions and cell adhesion, potentially destabilizing the epithelial tissue environment and contributing to recurrence, particularly following RT-induced stress. Importantly, our data also suggest that in-situ and invasive recurrences after RT may follow divergent molecular trajectories—mutations affecting structural maintenance and mechanosensory pathways were specifically linked to in-situ relapse, whereas alterations in cell-cycle regulation, genome stability, and inflammatory signaling characterized invasive progression. These insights provide a foundation for understanding the genetic basis of DCIS progression and identifying potential molecular drivers of treatment resistance. Future research will be essential to translate these insights into clinical practice, guiding the development of more targeted therapeutic approaches to improve outcomes for patients with DCIS. Methods The Ontario DCIS cohort The Ontario DCIS Cohort was established at the Sunnybrook Health Sciences Center (Toronto, Canada) as a population-based sample of women diagnosed with pure DCIS defined as in situ cancer without any invasive component between 1994 and 2003 [ 50 – 52 ]. All patients underwent BCS, with a subset receiving subsequent RT as part of their standard-of-care. Adjuvant endocrine therapy was administered to less than 15% of individuals, while none received systemic chemotherapy or neoadjuvant endocrine therapy. The cohort features comprehensive annotation of clinical annotation and expert pathology review. Previous studies of this cohort have characterized outcomes based on clinical factors including age at diagnosis, pathological features (tumor size and nuclear grade), and treatment modalities [ 53 – 56 ]. Sample selection was prioritized to achieve balanced representation between RT-treated and untreated patients, with equivalent distributions of individuals who did or did not develop an invasive or in-situ ipsilateral recurrence within ten years. Tissue cores were obtained from FFPE blocks, sampling DCIS tumors without microinvasion alongside adjacent normal and stromal tissues. DNA and RNA were extracted using the Qiagen AllPrep FFPE DNA/RNA kit (Qiagen). Samples yielding sufficient DNA quantities underwent library construction using the Agilent SureSelect Human Exome library preparation kit and were sequenced on the NovaSeq6000 platform (100bp paired-end, 100M reads/sample) at the Genome Quebec Innovation Centre (Montreal, Canada). While high-quality sequencing data was obtained for 300 tumor tissues, downstream analyses focused on 147 samples with matched normal profiles (144 normal tissue and 3 stroma non-epithelial samples). Molecular subtypes were determined using RNA profiles available for a subset of patients (n = 122). Sequencing libraries were prepared using RNA Flex kit (Illumina). Raw reads were processed using TrimGalore to remove adapters and low-quality reads [ 57 ], followed by alignment to the human genome (Ensembl release v104) using STAR [ 58 ]. Reads counts were mapped to genomic features using Rsubread R package [ 59 ] and gene counts were normalized using the variance stabilizing transformation implemented in the DESeq2 R package [ 60 ]. PAM50 subtype classification was performed using the genefu R package [ 61 ] with established centroids [ 62 ]. Specifically, normalized expression data of the 50 PAM50 genes were obtained using variance stabilizing transformation implemented in the DESeq2 R package [ 60 ] and compared to subtype-specific centroids using Pearson correlations. Each sample was assigned to the molecular subtype with which it showed the highest correlation coefficient. Whole-exome sequencing data preprocessing Raw reads were processed using Trimmomatic [ 63 ] (version 0.39) to remove adaptor sequences and low-quality bases. Reads were trimmed to retain high-quality sequences, applying quality thresholds of 10 at read ends and 20 within a 4-base sliding window. The remaining paired reads were processed following the GATK4 best practices. Briefly, reads were aligned to the human reference genome (GRCh38, GATK resource bundle) using the Burrows-Wheeler Aligner (BWA) [ 64 ]. Post-alignment procedures included sorting, annotating reads with read groups, and marking duplicate reads with Picard. Base quality score recalibration was conducted using GATK4 tools. A recalibration table was generated with the BaseRecalibrator function using known variant sites (dbSNP138 for SNPs and Mills and 1000 Genomes for indels), and recalibration was applied with ApplyBQSR to adjust base quality scores and correct for systematic technical errors. The process focused on SureSelect Human Exons v7 regions with a 100 bp padding. Finally, properly paired reads were extracted, excluding secondary alignments and low-quality reads, with the resulting files indexed using Samtools [ 65 ]. After filtration, samples had a mean coverage depth of 152X (range: 52-308X, standard deviation: 42X). Single nucleotide variant & indel calling For variant calling, we used NeuSomatic [ 66 ], a deep learning approach that leverages both tumor and matched normal sequence alignment information, alongside somatic mutation calls from six different approaches: MuTect2, MuSE, VarDict, VarScan2, Strelka2, and SomaticSniper [ 67 – 72 ]. This method was selected because of the low level of agreement between callers in our data - the majority of mutations (88.7%) were identified by only one caller ( Supplementary Fig. 4a ) - an observation consistent with previous studies [ 73 – 75 ]. The number of mutations detected varied significantly across samples, with the minimum identified in any single sample being 1,563 mutations, and the maximum reaching 283,247 mutations ( Supplementary Fig. 4a ). Small-variant calling has become increasingly amenable to deep learning approaches, thanks to the availability of extensive sequencing data and robust benchmarking datasets that cover millions of variants across diverse genomic contexts [ 76 ]. A convolutional neural network (CNN) architecture leverages information from sequencing reads and the reference genome in the vicinity of each candidate variant to approximate complex, nonlinear functions, accurately classifying loci as homozygous variant, heterozygous variant or homozygous reference (non-variant) [ 66 ]. Best practices established using well-characterized somatic reference datasets from the SEQC-II consortium demonstrated that NeuSomatic models achieve robust performance across various sequencing technologies - including both fresh and FFPE DNA inputs - across a range of tumor/normal purities and sequencing coverages, significantly outperforming conventional approaches [ 73 ]. Accordingly, we used the ensemble extension of NeuSomatic. This extension combines outputs from six individual variant callers by integrating features from 93 channels and incorporates an additional 26 channels to capture alignment information in a window of seven bases around the candidate mutation, resulting in a total of 119 input channels per candidate matrix. We used the recommended pre-trained model SEQC- II (SEQC-WGS-Spike model), which was trained on 20 whole-genome sequencing replicate pairs containing in silico somatic mutations with allele frequencies ranging from 1% to 100% (with matched normals at both 95% and 100% purity, and sequencing coverage ranging from ∼40x to 220x) using five variant callers used: MuTect2, Strelka2, MuSE, SomaticSniper, VarDict. After mutation calling, the recommended post-processing was applied to resolve long INDEL sequences and the final NeuSomatic predictions were used for all downstream analyses. We obtained comprehensive genomic information for each variant using the Ensembl Variant Effect Predictor (VEP) [ 77 ]. This included the effects on gene and protein function, such as consequence types and amino acid changes, variant frequencies in different populations, impact on regulatory regions, and potential associations with diseases and phenotypes. Following annotation with the Ensembl VEP, we used the vcf2maf tool to transform VEP-annotated VCF files into the Mutation Annotation Format (MAF). This conversion ensures each variant is uniquely associated with a single gene transcript or isoform, despite the potential for a variant to impact multiple isoforms. Particularly in cases where variants could be classified under different effects — such as a Missense_Mutation near a Splice_Site — the MAF format forces a singular designation for each variant by leveraging VEP’s determinations for canonical isoforms. We excluded 100 genes commonly mutated in public exome datasets (FLAGS) due to their lower likelihood of disease association [ 78 ]. This decision stems from their longer coding regions, which inherently increase mutation probability, and the presence of paralogs that might offset functional loss these mutations could cause [ 78 , 79 ]. High-confidence variants (identified with a probability score of 0.7 or higher) consistently showed higher allele frequencies compared to those categorized as low-quality (with scores between 0.4 and 0.7) and rejected variants (with scores below 0.4) ( Supplementary Fig. 4B ). To reduce potential false positives, we selected high-confidence variants with allele frequency above 0.1. To further confirm that variants detected were not technical artifacts, we assessed the relationship between tumor mutational burden and sequencing depth ( Supplementary Fig. 5 ). The lack of correlation supports robust variant detection independent of coverage. Mutation patterns and frequencies were visualized using the oncoplot function from the maftools R package [ 80 ], which displays mutation types and frequencies across samples. Mutational signatures We performed mutational signature analysis using the COSMIC database of single-base substitution (SBS) signatures [ 24 ]. First, the trinucleotide context of each single nucleotide variant was characterized. We then used the fit_to_signature function in the MutationalPatterns R package (version 3.19) [ 81 ] to find the linear combination of COSMIC mutation signatures that most closely reconstructs the mutation spectra for each sample by solving the nonnegative least-squares constraints problem. We used strict refit where the signature with the lowest contribution is removed; refitting is repeated until the cosine similarity between the original and reconstructed profile becomes more than max_delta= 0.004). We selected signatures that contributed to the mutation spectrum of at least 10 samples and plotted their relative contributions using the pheatmap R package [ 82 ]. Driver genes To identify driver genes (i.e., genes under positive selection in cancer), we used the dNdScv analysis method [ 26 ]. This approach is based on the evaluation of the ratio between synonymous (silent) mutations and non-synonymous (missense) mutations in genes. A higher ratio of non-synonymous to synonymous mutations in a gene indicates positive selection for mutations that may confer a growth advantage to cancer cells, suggesting the potential role of the gene as a driver in tumorigenesis. The dNdScv method estimates the background mutation rate of each gene by combining local information (synonymous mutations within the gene) with global information (variation of mutation rates across genes). This approach controls for the sequence composition of genes and accounts for mutational signatures, providing a more accurate estimation of the expected neutral mutation rate. In particular, the dNdScv R package [ 83 ] implementation uses trinucleotide context-dependent substitution matrices to mitigate common mutation biases that can affect dN/dS calculations [ 26 ]. To visualize and analyze the distribution and nature of mutations in driver genes, we used lollipop plots generated from the maftools R package [ 80 ] and annotation tracks from cBioPortal [ 84 ]. These plots provide a representation of mutation types and their locations along the protein sequence as well as annotations including likely mutation hotspots as identified by Memorial Sloan Kettering Cancer Hotspots and 3D Hotspots databases [ 85 ], and annotation records of therapeutic indication from OncoKB [ 86 ]. Copy Number Alterations (CNAs) To investigate copy number alterations, we applied the Allele-Specific Copy Number Analysis of Tumours (ASCAT v3) on our tumor normal pairs estimating tumor purity, ploidy, and allele-specific copy number [ 87 ]. The runAscat function was executed with default settings optimized for high-throughput exome sequencing data, with the gamma parameter set to 1. After examination of ASCAT sunrise plots, we identified a subset of samples (n=35 samples), for which the initial estimates of tumor purity and ploidy did not align with the regions of highest confidence on the sunrise plots. The runAscat was re-run for these samples by manually assigning the aberrant cell fraction (tumor purity) and tumor ploidy parameters corresponding to the regions of highest probability as depicted on the sunrise plots. Overall, eight samples were excluded from further analysis due to poor goodness of fit leaving 139 samples with CNA profiles for downstream analyses. Absolute number gains and losses shared across samples were visualized across whole chromosomal regions using aCNViewer [ 88 ] (window size of 1 Mbp). The estimated ploidy for each sample is subtracted from the copy number values of each segment which means that a copy number of 0 is copy number change. These adjusted windows at base resolution are then plotted into a stacked histogram representing genome-wide absolute copy number and copy neutral variations over all samples in a group. We applied a re-segmentation approach to adjust for amplitude divergence due to technical variability implemented in CNApp [ 89 ] using the default settings (minimum segment length = 100 Kbp, minimum amplitude deviation from segment to zero = 0.16, maximum distance between segments=1 Mb, maximum amplitude deviation between segments = 0.16, and maximum BAF deviation between segments = 0.1). Re-segmented data were then used to calculate the broad, focal and global CNA scores. We then transformed re-segmented data into genomic regions profiles (chromosome arms, cytobands and sub-cytobands) using both focal and broad segments. Length-relative means are computed for each window by considering amplitude values from those segments included in each specific window. Default cutoffs for low-level copy number gains and losses (i.e., |0.2|) were used to infer CNA frequencies. Survival analyses We evaluated the association between gene mutations or copy number aberrations in cytobands and 10-year local recurrence-free survival using Firth’s penalized likelihood Cox regression which accounts for small sample sizes and rare events. This analysis was conducted using the coxphf R package [ 90 ]. Aberrations were included in the analysis only if detected in at least five lesions (2,030 genes; 303 cytobands). This analysis was performed across the entire patient cohort. To further investigate mutated genes associated with response to RT, a stratified analysis was conducted based on treatment groups. To control for multiple testing in the identification of mutated genes associated with LR, we employed permutation-based testing (1,000 permutations) to establish empirical significance thresholds. We required both a traditional p-value threshold (p < 0.01 for full-cohort analyses; p < 0.05 for RT-stratified analyses) and a permuted p-value < 0.05. Kaplan-Meier survival curves were used to visualize the results, illustrating event-free survival probabilities over time for patients stratified by mutational status in the specified genes or gene sets. GO enrichment analyses were performed using the clusterProfiler R package [ 91 ]. Entrez gene identifiers were mapped to GO terms using the org.Hs.eg.db annotation database [ 92 ]. All GO terms with FDR < 0.2 were considered. Semantic similarity between GO terms was calculated using the Wang method implemented in the pairwise_termsim function [ 93 ]. The enrichment map was visualized using the emapplot function [ 94 ] which displays the significantly enriched terms with at least 2 genes. Third-party studies A systematic literature search was performed using PubMed to identify previous studies that conducted DNA profiling on pure DCIS or DCIS mixed with invasive lesions. The search strategy included terms related to “ductal carcinoma in situ”, “genetic markers”, “DCIS prognosis”, “DCIS progression”, “DCIS to IDC”, “dcis dna”, “dcis prognosis dna markers”, “copy number alterations”, and “somatic mutations”. Studies were included if they reported genomic analyses of DCIS samples using sequencing or copy number profiling techniques and were published within the last 10 years. Twelve studies met the inclusion criteria, and their key findings were summarized in Supplementary Table 1 . The review emphasized genetic alterations and pathway dysregulation that may drive DCIS initiation and progression to invasive disease. Declarations Ethics approval and consent to participate Research ethics board approval was obtained from Sunnybrook Health Sciences Centre, Toronto, Ontario (#2738). This study was facilitated through ICES which is named as a prescribed entity in section 45 of PHIPA (Regulation 329/04, section 18) which allows access and utilization of administrative data for research purposes with a waived requirement for consent. Consent for publication Not Applicable Availability of data and materials The dataset analyzed in this study is not publicly available due the personal, sensitive, and inherently identifying nature of raw genomic data. Access to raw data and patient metadata may be provided but is controlled and requires institutional material data transfer agreements (contact person: eileen.rakovitch{at}sunnybrook.ca ). The scripts used to reproduce the analyses performed in this study are available in the Dumeaux Lab GitHub Repository https://github.com/dumeaux-lab/dcis-dna_paper . Competing Interests The authors declare no competing interests. Funding This work was supported by Canadian Institute for Health Research (CIHR) project grant #391682. We thank the Canadian Foundation for Innovation J. Evans Leaders Fund grant #43481 for the support of the computing infrastructure used throughout our analyses. ER holds LC Campbell Chair for breast cancer research. NR received Breast Cancer Canada and Ontario Graduate Scholarships. We thank Genome Quebec Innovation Center for RNA processing and sequencing. Data Availability Owing to the personal, sensitive and inherently identifying nature of raw genomic data, access to raw data and patient metadata is controlled and requires institutional material data transfer agreements (contact person: eileen.rakovitch{at}sunnybrook.ca ). https://github.com/dumeaux-lab/dcis-dna_paper Authors’ Contributions Conceptualization, S.N.M, L.P., E.R., M.T.H and V.D.; Methodology, N.R, E.S, V.D.; Investigation, N.R., E.S., M.Z., V.D.; Software N.R., E.J.M., V.D.; Writing – Original Draft, N.R. and V.D.; Writing – Review & Editing, E.S., M.Z., E.J.M., E.H., S.N.M., S.T., L.P., M.T.H. and E.R. ; Funding Acquisition, E.R., M.T.H, E.H., S.N.M, S.T., L.P. and V.D.; Resources, E.R., S.N.M, S.T., L.P. ; Supervision, V.D. Data Acknowledgement This study was supported by ICES, which is funded by an annual grant from the Ontario Ministry of Health (MOH) and the Ministry of Long-Term Care (MLTC). The opinions, results and conclusions reported in this paper are those of the authors and are independent from the funding sources. No endorsement by ICES, the MOH or MLTC is intended or should be inferred. As a prescribed entity under Ontario’s privacy legislation, ICES is authorized to collect and use health care data for the purposes of health system analysis, evaluation and decision support. Secure access to these data is governed by policies and procedures that are approved by the Information and Privacy Commissioner of Ontario. Footnotes add separate analyses to distinguish recurrence as DCIS from progression to invasive cancer; other small edits to improve in clarity and precision References 1. ↵ Harbeck N , Penault-Llorca F , Cortes J , Gnant M , Houssami N , Poortmans P , et al. Breast cancer . Nat Rev Dis Primer . 2019 ; 5 : 1 – 31 . OpenUrl CrossRef 2. ↵ Sørum R , Hofvind S , Skaane P , Haldorsen T . Trends in incidence of ductal carcinoma in situ: The effect of a population-based screening programme . The Breast . 2010 ; 19 : 499 – 505 . OpenUrl PubMed 3. ↵ Gangnon RE , Sprague BL , Stout NK , Alagoz O , Weedon-Fekjær H , Holford TR , et al. The contribution of mammography screening to breast cancer incidence trends in the United States: an updated age-period-cohort model . Cancer Epidemiol Biomark Prev Publ Am Assoc Cancer Res Cosponsored Am Soc Prev Oncol . 2015 ; 24 : 905 – 12 . OpenUrl 4. ↵ Force UPST , Nicholson WK , Silverstein M , Wong JB , Barry MJ , Chelmow D , et al. Screening for Breast Cancer: US Preventive Services Task Force Recommendation Statement . JAMA . 2024 ; 331 : 1918 – 30 . OpenUrl CrossRef PubMed 5. ↵ Tomlinson-Hansen S , Khan M , Cassaro S. Breast Ductal Carcinoma in Situ. StatPearls [Internet] . Treasure Island (FL) : StatPearls Publishing ; 2023 [cited 2023 Nov 16]. Available from: http://www.ncbi.nlm.nih.gov/books/NBK567766/ 6. ↵ Zhang Y , Weinberg RA . Epithelial-to-mesenchymal transition in cancer: complexity and opportunities . Front Med . 2018 ; 12 : 361 – 73 . OpenUrl CrossRef PubMed 7. ↵ Maxwell AJ , Hilton B , Clements K , Dodwell D , Dulson-Cox J , Kearins O , et al. Unresected screen-detected ductal carcinoma in situ: Outcomes of 311 women in the Forget-Me-Not 2 study . Breast Edinb Scotl . 2022 ; 61 : 145 – 55 . OpenUrl 8. ↵ Rakovitch E , Gray R , Baehner FL , Sutradhar R , Crager M , Gu S , et al. Refined estimates of local recurrence risks by DCIS score adjusting for clinicopathological features: a combined analysis of ECOG-ACRIN E5194 and Ontario DCIS cohort studies . Breast Cancer Res Treat . 2018 ; 169 : 359 – 69 . OpenUrl CrossRef PubMed 9. ↵ Andor N , Maley CC , Ji HP . Genomic instability in cancer: Teetering on the limit of tolerance . Cancer Res . 2017 ; 77 : 2179 – 85 . OpenUrl Abstract / FREE Full Text 10. ↵ Trinh A , Gil Del Alcazar CR, Shukla SA, Chin K, Chang YH, Thibault G , et al. Genomic Alterations during the In Situ to Invasive Ductal Breast Carcinoma Transition Shaped by the Immune System. Mol Cancer Res . 2021 ; 19 : 623 – 35 . OpenUrl PubMed 11. ↵ Lin C-Y , Vennam S , Purington N , Lin E , Varma S , Han S , et al. Genomic landscape of ductal carcinoma in situ and association with progression . Breast Cancer Res Treat . 2019 ; 178 : 307 – 16 . OpenUrl PubMed 12. ↵ Hernandez L , Wilkerson PM , Lambros MB , Campion-Flora A , Rodrigues DN , Gauthier A , et al. Genomic and mutational profiling of ductal carcinomas in situ and matched adjacent invasive breast cancers reveals intra-tumour genetic heterogeneity and clonal selection . J Pathol . 2012 ; 227 : 42 – 52 . OpenUrl CrossRef PubMed Web of Science 13. ↵ Nagasawa S , Kuze Y , Maeda I , Kojima Y , Motoyoshi A , Onishi T , et al. Genomic profiling reveals heterogeneous populations of ductal carcinoma in situ of the breast . Commun Biol . 2021 ; 4 : 1 – 13 . OpenUrl PubMed 14. ↵ Nachmanson D , Officer A , Mori H , Gordon J , Evans MF , Steward J , et al. The breast pre-cancer atlas illustrates the molecular and micro-environmental diversity of ductal carcinoma in situ . Npj Breast Cancer . 2022 ; 8 : 1 – 13 . OpenUrl PubMed 15. ↵ Abba MC , Gong T , Lu Y , Lee J , Zhong Y , Lacunza E , et al. A Molecular Portrait of High-Grade Ductal Carcinoma In Situ . Cancer Res . 2015 ; 75 : 3980 – 90 . OpenUrl Abstract / FREE Full Text 16. Kaplan HG , Dowdell AK , Berry AB , Shimol RB , Robinson FL , Carney CA , et al. Multi-omic profiling of simultaneous ductal carcinoma in situ and invasive breast cancer . Breast Cancer Res Treat . 2024 ; 205 : 451 – 64 . OpenUrl CrossRef PubMed 17. ↵ Kader T , Hill P , Zethoven M , Goode DL , Elder K , Thio N , et al. Atypical ductal hyperplasia is a multipotent precursor of breast carcinoma . J Pathol . 2019 ; path.5262 . 18. ↵ Kim SY , Jung S-H , Kim MS , Baek I-P , Lee SH , Kim T-M , et al. Genomic differences between pure ductal carcinoma in situ and synchronous ductal carcinoma in situ with invasive breast cancer . Oncotarget . 2015 ; 6 : 7597 – 607 . OpenUrl CrossRef PubMed 19. ↵ Pareja F , Brown DN , Lee JY , Da Cruz Paula A , Selenica P , Bi R , et al. Whole-Exome Sequencing Analysis of the Progression from Non–Low-Grade Ductal Carcinoma In Situ to Invasive Ductal Carcinoma . Clin Cancer Res . 2020 ; 1078 – 0432 .CCR-19–2563. 20. ↵ Strand SH , Rivero-Gutiérrez B , Houlahan KE , Seoane JA , King LM , Risom T , et al. Molecular classification and biomarkers of clinical outcome in breast ductal carcinoma in situ: Analysis of TBCRC 038 and RAHBT cohorts. Cancer Cell [Internet] . 2022 [cited 2022 Dec 7]; Available from: https://www.sciencedirect.com/science/article/pii/S1535610822005128 21. ↵ Cooper DN , Mort M , Stenson PD , Ball EV , Chuzhanova NA . Methylation-mediated deamination of 5-methylcytosine appears to give rise to mutations causing human inherited disease in CpNpG trinucleotides, as well as in CpG dinucleotides . Hum Genomics . 2010 ; 4 : 406 – 10 . OpenUrl CrossRef PubMed 22. ↵ Alexandrov LB , Jones PH , Wedge DC , Sale JE , Campbell PJ , Nik-Zainal S , et al. Clock-like mutational processes in human somatic cells . Nat Genet . 2015 ; 47 : 1402 – 7 . OpenUrl CrossRef PubMed 23. ↵ Steiert TA , Parra G , Gut M , Arnold N , Trotta J-R , Tonda R , et al. A critical spotlight on the paradigms of FFPE-DNA sequencing . Nucleic Acids Res . 2023 ; 51 : 7143 – 62 . OpenUrl CrossRef PubMed 24. ↵ Alexandrov LB , Nik-Zainal S , Wedge DC , Aparicio SAJR , Behjati S , Biankin AV , et al. Signatures of mutational processes in human cancer . Nature . 2013 ; 500 : 415 – 21 . OpenUrl CrossRef PubMed Web of Science 25. ↵ Guo Q , Lakatos E , Bakir IA , Curtius K , Graham TA , Mustonen V. The mutational signatures of formalin fixation on the human genome [Internet] . bioRxiv; 2021 [cited 2024 Dec 6]. p. 2021.03.11.434918. Available from: https://www.biorxiv.org/content/10.1101/2021.03.11.434918v1 26. ↵ Martincorena I , Raine KM , Gerstung M , Dawson KJ , Haase K , Loo PV , et al. Universal Patterns of Selection in Cancer and Somatic Tissues . Cell . 2017 ; 171 : 1029 – 1041 .e21. OpenUrl CrossRef PubMed 27. ↵ Yuan T , Cantley L . PI3K pathway alterations in cancer: variations on a theme . Oncogene . 2008 ; 27 : 5497 – 510 . OpenUrl CrossRef PubMed Web of Science 28. ↵ Bader AG , Kang S , Zhao L , Vogt PK . Oncogenic PI3K deregulates transcription and translation . Nat Rev Cancer . 2005 ; 5 : 921 – 9 . OpenUrl CrossRef PubMed Web of Science 29. ↵ Yang XH , Huang S . PFM1 (PRDM4), a new member of the PR-domain family, maps to a tumor suppressor locus on human chromosome 12q23-q24.1 . Genomics . 1999 ; 61 : 319 – 25 . OpenUrl CrossRef PubMed 30. ↵ Arnold SA , Brekken RA . SPARC: a matricellular regulator of tumorigenesis . J Cell Commun Signal . 2009 ; 3 : 255 – 73 . OpenUrl CrossRef PubMed 31. ↵ Vecchione A , Fassan M , Anesti V , Morrione A , Goldoni S , Baldassarre G , et al. MITOSTATIN, a putative tumor suppressor on chromosome 12q24.1, is downregulated in human bladder and breast cancer . Oncogene . 2009 ; 28 : 257 – 69 . OpenUrl CrossRef PubMed 32. ↵ Heller G , Geradts J , Ziegler B , Newsham I , Filipits M , Markis-Ritzinger E-M , et al. Downregulation of TSLC1 and DAL-1 expression occurs frequently in breast cancer . Breast Cancer Res Treat . 2007 ; 103 : 283 – 91 . OpenUrl CrossRef PubMed Web of Science 33. ↵ Kittiniyom K , Gorse KM , Dalbegue F , Lichy JH , Taubenberger JK , Newsham IF . Allelic loss on chromosome band 18p11.3 occurs early and reveals heterogeneity in breast cancer progression . Breast Cancer Res . 2001 ; 3 : 192 – 8 . OpenUrl CrossRef PubMed Web of Science 34. ↵ Schuuring E , Verhoeven E , Mooi WJ , Michalides RJ . Identification and cloning of two overexpressed genes, U21B31/PRAD1 and EMS1, within the amplified chromosome 11q13 region in human carcinomas . Oncogene . 1992 ; 7 : 355 – 61 . OpenUrl PubMed Web of Science 35. ↵ Ormandy CJ , Musgrove EA , Hui R , Daly RJ , Sutherland RL . Cyclin D1, EMS1 and 11q13 Amplification in Breast Cancer . Breast Cancer Res Treat. 2003 ; 78 : 323 – 35 . OpenUrl CrossRef PubMed Web of Science 36. ↵ Houle AA , Gibling H , Lamaze FC , Edgington HA , Soave D , Fave M-J , et al. Aberrant PRDM9 expression impacts the pan-cancer genomic landscape . Genome Res . 2018 ; 28 : 1611 – 20 . OpenUrl Abstract / FREE Full Text 37. ↵ Kader T , Zethoven M , Mahale S , Saunders H , Tjoeka L , Lehmann R , et al. Predictive biomarkers of breast ductal carcinoma in situ may underestimate the risk of recurrence due to de novo ipsilateral breast carcinoma development [Internet] . bioRxiv ; 2024 [cited 2025 Jan 3]. p. 2024.05.19.594731. Available from: https://www.biorxiv.org/content/10.1101/2024.05.19.594731v1 38. ↵ Ang DC , Warrick AL , Shilling A , Beadling C , Corless CL , Troxell ML . Frequent phosphatidylinositol-3-kinase mutations in proliferative breast lesions . Mod Pathol . 2014 ; 27 : 740 – 50 . OpenUrl CrossRef PubMed 39. ↵ Silwal-Pandit L , Langerød A , Børresen-Dale A-L . TP53 Mutations in Breast and Ovarian Cancer . Cold Spring Harb Perspect Med . 2017 ; 7 : a026252 . OpenUrl Abstract / FREE Full Text 40. ↵ Pang J-MB , Savas P , Fellowes AP , Mir Arnau G , Kader T , Vedururu R , et al. Breast ductal carcinoma in situ carry mutational driver events representative of invasive breast cancer . Mod Pathol . 2017 ; 30 : 952 – 63 . OpenUrl CrossRef PubMed 41. ↵ Lesurf R , Aure MR , Mørk HH , Vitelli V , Lundgren S , Børresen-Dale A-L , et al. Molecular Features of Subtype-Specific Progression from Ductal Carcinoma In Situ to Invasive Breast Cancer . Cell Rep . 2016 ; 16 : 1166 – 79 . OpenUrl CrossRef PubMed 42. ↵ Bergholtz H , Lien TG , Swanson DM , Frigessi A , Daidone MG , Tost J , et al. Contrasting DCIS and invasive breast cancer by subtype suggests basal-like DCIS as distinct lesions . NPJ Breast Cancer . 2020 ; 6 : 26 . 43. ↵ Emery LA , Tripathi A , King C , Kavanah M , Mendez J , Stone MD , et al. Early Dysregulation of Cell Adhesion and Extracellular Matrix Pathways in Breast Cancer Progression . Am J Pathol . 2009 ; 175 : 1292 – 302 . OpenUrl CrossRef PubMed Web of Science 44. Glukhova MA , Streuli CH . How integrins control breast biology . Curr Opin Cell Biol . 2013 ; 25 : 633 – 41 . OpenUrl CrossRef PubMed 45. Maxwell CA , Benítez J , Gómez-Baldó L , Osorio A , Bonifaci N , Fernández-Ramires R , et al. Interplay between BRCA1 and RHAMM regulates epithelial apicobasal polarization and may influence risk of breast cancer . PLoS Biol . 2011 ; 9 : e1001199 . OpenUrl CrossRef PubMed 46. ↵ Qin X , Strand SH , Lee MR , Saraswathibhatla A , van IJzendoorn DGP, Zhu C, et al. Single Cell Expression Analysis of Ductal Carcinoma in Situ Identifies Complex Genotypic-Phenotypic Relationships Altering Epithelial Composition . Cancer Res [Internet ]. 2025 [cited 2025 Apr 21]; Available from : doi: 10.1158/0008-5472.CAN-24-3023 OpenUrl CrossRef 47. ↵ Deng Y , Chen G , Xiao J , Deng H . Role and potential therapeutic strategies of matrix mechanics for optimizing tumor radiotherapy . Mechanobiol Med . 2024 ; 2 : 100037 . OpenUrl PubMed 48. Prakash J , Shaked Y . The Interplay between Extracellular Matrix Remodeling and Cancer Therapeutics . Cancer Discov . 2024 ; 14 : 1375 – 88 . OpenUrl CrossRef PubMed 49. ↵ Frascogna C , Mottareale R , La Verde G , Arrichiello C , Muto P , Netti PA , et al. Role of the mechanical microenvironment on CD-44 expression of breast adenocarcinoma in response to radiotherapy . Sci Rep . 2024 ; 14 : 391 . OpenUrl PubMed 50. ↵ Rakovitch E , Nofech-Mozes S , Narod SA , Hanna W , Thiruchelvam D , Saskin R , et al. Can we select individuals with low risk ductal carcinoma in situ (DCIS)? A population-based outcomes analysis . Breast Cancer Res Treat . 2013 ; 138 : 581 – 90 . OpenUrl CrossRef PubMed 51. Rakovitch E , Nofech-Mozes S , Hanna W , Baehner FL , Saskin R , Butler SM , et al. A population-based validation study of the DCIS Score predicting recurrence risk in individuals treated by breast-conserving surgery alone . Breast Cancer Res Treat . 2015 ; 152 : 389 – 98 . OpenUrl CrossRef PubMed 52. ↵ Rakovitch E , Nofech-Mozes S , Hanna W , Sutradhar R , Baehner FL , Miller DP , et al. Multigene Expression Assay and Benefit of Radiotherapy After Breast Conservation in Ductal Carcinoma in Situ . J Natl Cancer Inst . 2017 ; 109 : djw256 . OpenUrl CrossRef PubMed 53. ↵ Paszat L , Sutradhar R , Zhou L , Nofech-Mozes S , Rakovitch E . Including the Ductal Carcinoma-In-Situ (DCIS) Score in the Development of a Multivariable Prediction Model for Recurrence After Excision of DCIS . Clin Breast Cancer . 2019 ; 19 : 35 – 46 . OpenUrl PubMed 54. Klein J , Kong I , Paszat L , Nofech-Mozes S , Hanna W , Thiruchelvam D , et al. Close or positive resection margins are not associated with an increased risk of chest wall recurrence in women with DCIS treated by mastectomy: a population-based analysis . SpringerPlus . 2015 ; 4 : 335 . 55. Lalani N , Paszat L , Sutradhar R , Thiruchelvam D , Nofech-Mozes S , Hanna W , et al. Long-term outcomes of hypofractionation versus conventional radiation therapy after breast-conserving surgery for ductal carcinoma in situ of the breast . Int J Radiat Oncol Biol Phys . 2014 ; 90 : 1017 – 24 . OpenUrl PubMed 56. ↵ Rakovitch E , Parpia S , Koch A , Grimard L , Soliman H , Stevens C , et al. DUCHESS: an evaluation of the ductal carcinoma in situ score for decisions on radiotherapy in patients with low/intermediate-risk DCIS . Breast Cancer Res Treat . 2021 ; 188 : 133 – 9 . OpenUrl PubMed 57. ↵ Krueger F. FelixKrueger/TrimGalore [Internet] . 2025 [cited 2025 Jun 2]. Available from: https://github.com/FelixKrueger/TrimGalore 58. ↵ Dobin A , Davis CA , Schlesinger F , Drenkow J , Zaleski C , Jha S , et al. STAR: ultrafast universal RNA-seq aligner . Bioinforma Oxf Engl . 2013 ; 29 : 15 – 21 . OpenUrl 59. ↵ Liao Y , Smyth GK , Shi W . The R package Rsubread is easier, faster, cheaper and better for alignment and quantification of RNA sequencing reads . Nucleic Acids Res . 2019 ; 47 : e47 . OpenUrl CrossRef PubMed 60. ↵ Anders S , Huber W . Differential expression analysis for sequence count data . Genome Biol . 2010 ; 11 : R106 . 61. ↵ Gendoo DMA , Ratanasirigulchai N , Schröder MS , Paré L , Parker JS , Prat A , et al. Genefu: an R/Bioconductor package for computation of gene expression-based signatures in breast cancer . Bioinforma Oxf Engl . 2016 ; 32 : 1097 – 9 . OpenUrl 62. ↵ Parker JS , Mullins M , Cheang MCU , Leung S , Voduc D , Vickery T , et al. Supervised Risk Predictor of Breast Cancer Based on Intrinsic Subtypes . J Clin Oncol . 2009 ; 27 : 1160 – 7 . OpenUrl Abstract / FREE Full Text 63. ↵ Bolger AM , Lohse M , Usadel B . Trimmomatic: a flexible trimmer for Illumina sequence data . Bioinforma Oxf Engl . 2014 ; 30 : 2114 – 20 . OpenUrl 64. ↵ Li H . Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM [Internet] . arXiv.org. 2013 [cited 2024 Apr 8]. Available from: https://arxiv.org/abs/1303.3997v2 65. ↵ Danecek P , Bonfield JK , Liddle J , Marshall J , Ohan V , Pollard MO , et al. Twelve years of SAMtools and BCFtools . GigaScience . 2021 ; 10 : giab008 . OpenUrl CrossRef PubMed 66. ↵ Sahraeian SME , Liu R , Lau B , Podesta K , Mohiyuddin M , Lam HYK . Deep convolutional neural networks for accurate somatic mutation detection . Nat Commun . 2019 ; 10 : 1041 . OpenUrl PubMed 67. ↵ Benjamin D , Sato T , Cibulskis K , Getz G , Stewart C , Lichtenstein L. Calling Somatic SNVs and Indels with Mutect2 [Internet] . 2019 [cited 2024 Apr 8]. Available from: http://biorxiv.org/lookup/doi/10.1101/861054 68. Fan Y , Xi L , Hughes DST , Zhang J , Zhang J , Futreal PA , et al. MuSE: accounting for tumor heterogeneity using a sample-specific error model improves sensitivity and specificity in mutation calling from sequencing data . Genome Biol . 2016 ; 17 : 178 . OpenUrl CrossRef PubMed 69. Lai Z , Markovets A , Ahdesmaki M , Chapman B , Hofmann O , McEwen R , et al. VarDict: a novel and versatile variant caller for next-generation sequencing in cancer research . Nucleic Acids Res . 2016 ; 44 : e108 . OpenUrl CrossRef PubMed 70. Koboldt DC , Zhang Q , Larson DE , Shen D , McLellan MD , Lin L , et al. VarScan 2: somatic mutation and copy number alteration discovery in cancer by exome sequencing . Genome Res . 2012 ; 22 : 568 – 76 . OpenUrl Abstract / FREE Full Text 71. Kim S , Scheffler K , Halpern AL , Bekritsky MA , Noh E , Källberg M , et al. Strelka2: fast and accurate calling of germline and somatic variants . Nat Methods . 2018 ; 15 : 591 – 4 . OpenUrl CrossRef PubMed 72. ↵ Larson DE , Harris CC , Chen K , Koboldt DC , Abbott TE , Dooling DJ , et al. SomaticSniper: identification of somatic point mutations in whole genome sequencing data . Bioinformatics . 2012 ; 28 : 311 . OpenUrl CrossRef PubMed Web of Science 73. ↵ Sahraeian SME , Fang LT , Karagiannis K , Moos M , Smith S , Santana-Quintero L , et al. Achieving robust somatic mutation detection with deep learning models derived from reference data sets of a cancer sample . Genome Biol . 2022 ; 23 : 12 . 74. Barbitoff YA , Abasov R , Tvorogova VE , Glotov AS , Predeus AV . Systematic benchmark of state-of-the-art variant calling pipelines identifies major factors affecting accuracy of coding sequence variant discovery . BMC Genomics . 2022 ; 23 : 155 . 75. ↵ de Schaetzen van Brienen L , Larmuseau M , Van der Eecken K , De Ryck F , Robbe P , Schuh A , et al. Comparative analysis of somatic variant calling on matched FF and FFPE WGS samples . BMC Med Genomics . 2020 ; 13 : 94 . 76. ↵ Olson ND , Wagner J , Dwarshuis N , Miga KH , Sedlazeck FJ , Salit M , et al. Variant calling and benchmarking in an era of complete human genome sequences . Nat Rev Genet . 2023 ; 24 : 464 – 83 . OpenUrl CrossRef PubMed 77. ↵ McLaren W , Gil L , Hunt SE , Riat HS , Ritchie GRS , Thormann A , et al. The Ensembl Variant Effect Predictor . Genome Biol . 2016 ; 17 : 122 . OpenUrl CrossRef PubMed 78. ↵ Shyr C , Tarailo-Graovac M , Gottlieb M , Lee JJ , van Karnebeek C , Wasserman WW . FLAGS, frequently mutated genes in public exomes . BMC Med Genomics . 2014 ; 7 : 64 . OpenUrl CrossRef PubMed 79. ↵ Alfieri F , Caravagna G , Schaefer MH . Cancer genomes tolerate deleterious coding mutations through somatic copy number amplifications of wild-type regions . Nat Commun . 2023 ; 14 : 3594 . OpenUrl PubMed 80. ↵ Mayakonda A , Lin D-C , Assenov Y , Plass C , Koeffler HP . Maftools: efficient and comprehensive analysis of somatic variants in cancer . Genome Res . 2018 ; 28 : 1747 – 56 . OpenUrl Abstract / FREE Full Text 81. ↵ Blokzijl F , Janssen R , van Boxtel R , Cuppen E . MutationalPatterns: comprehensive genome-wide analysis of mutational processes . Genome Med . 2018 ; 10 : 33 . OpenUrl CrossRef PubMed 82. ↵ Raivo Kolde . pheatmap: Pretty Heatmaps [Internet] . 2010 [cited 2025 May 20]. p. 1.0.12. Available from: https://CRAN.R-project.org/package=pheatmap 83. ↵ Martincorena I. im3sanger/dndscv [Internet]. 2025 [cited 2025 May 20]. Available from: https://github.com/im3sanger/dndscv 84. ↵ Mutation data transcript annotation [Internet] . [cited 2025 May 20]. Available from: https://docs.cbioportal.org/mutation-data-transcript-annotation/ 85. ↵ Chang MT , Bhattarai TS , Schram AM , Bielski CM , Donoghue MTA , Jonsson P , et al. Accelerating Discovery of Functional Mutant Alleles in Cancer . Cancer Discov . 2018 ; 8 : 174 – 83 . OpenUrl Abstract / FREE Full Text 86. ↵ Suehnholz SP , Nissan MH , Zhang H , Kundra R , Nandakumar S , Lu C , et al. Quantifying the Expanding Landscape of Clinical Actionability for Patients with Cancer . Cancer Discov . 2024 ; 14 : 49 – 65 . OpenUrl CrossRef PubMed 87. ↵ Lesluyes T , Tarabichi M , Haase K , Demeulemeester J , Loo P. Robust and platform-independent CNA calling with ASCAT v3 . 2023 . 88. ↵ Renault V , Tost J , Pichon F , Wang-Renault S-F , Letouzé E , Imbeaud S , et al. aCNViewer: Comprehensive genome-wide visualization of absolute copy number and copy neutral variations . PloS One . 2017 ; 12 : e0189334 . OpenUrl PubMed 89. ↵ Franch-Expósito S , Bassaganyas L , Vila-Casadesús M , Hernández-Illán E , Esteban-Fabró R , Díaz-Gay M , et al. CNApp, a tool for the quantification of copy number alterations and integrative analysis revealing clinical implications. Robles-Espinoza CD, Cheah KSE, editors . eLife . 2020 ; 9 : e50267 . OpenUrl PubMed 90. ↵ Heinze G , Ploner M , Jiricka L , Steiner G. coxphf: Cox Regression with Firth’s Penalized Likelihood [Internet] . 2023 [cited 2025 May 20]. Available from: https://cran.r-project.org/web/packages/coxphf/index.html 91. ↵ Yu G , Wang L-G , Han Y , He Q-Y . clusterProfiler: an R Package for Comparing Biological Themes Among Gene Clusters . OMICS J Integr Biol . 2012 ; 16 : 284 – 7 . OpenUrl CrossRef PubMed 92. ↵ org.Hs.eg.db [Internet]. Bioconductor. [cited 2025 May 20]. Available from: http://bioconductor.org/packages/org.Hs.eg.db/ 93. ↵ pairwise_termsim: pairwise_termsim in enrichplot: Visualization of Functional Enrichment Result [Internet] . [cited 2025 May 20]. Available from: https://rdrr.io/bioc/enrichplot/man/pairwise_termsim.html 94. ↵ emapplot function -RDocumentation [Internet] . [cited 2025 May 20]. Available from: https://www.rdocumentation.org/packages/enrichplot/versions/1.13.1.994/topics/emapplot View the discussion thread. Back to top Previous Next Posted June 24, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Mutational landscape of pure ductal carcinoma in situ and associations with disease prognosis and response to radiotherapy Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Mutational landscape of pure ductal carcinoma in situ and associations with disease prognosis and response to radiotherapy Noor Rizvi , Eliseos J. Mucaki , Emily L. Salmini , Monica Zhang , Sabina Trebinjac , Ezra Hahn , Lawrence Paszat , Sharon Nofech-Mozes , Michael T. Hallett , Eileen Rakovitch , Vanessa Dumeaux medRxiv 2025.03.01.25323122; doi: https://doi.org/10.1101/2025.03.01.25323122 Share This Article: Copy Citation Tools Mutational landscape of pure ductal carcinoma in situ and associations with disease prognosis and response to radiotherapy Noor Rizvi , Eliseos J. Mucaki , Emily L. Salmini , Monica Zhang , Sabina Trebinjac , Ezra Hahn , Lawrence Paszat , Sharon Nofech-Mozes , Michael T. Hallett , Eileen Rakovitch , Vanessa Dumeaux medRxiv 2025.03.01.25323122; doi: https://doi.org/10.1101/2025.03.01.25323122 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4436) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4538) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (542) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3333) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00f78f1aab30d06',t:'MTc3OTY1ODk2OQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00