Full text
77,214 characters
· extracted from
preprint-html
· click to expand
ARID1A expression and genomic alterations as predictive biomarkers for metachronous colorectal polyp risk stratification | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search ARID1A expression and genomic alterations as predictive biomarkers for metachronous colorectal polyp risk stratification View ORCID Profile Aula Ammar , Amna Matly , Kimiya Kabiri Arani , Scott Murray , Mayuri Hendricks , Alexander Winton , Natalie Fisher , Noori Maka , Gerard Lynch , Jennifer Hay , Mark Johnstone , View ORCID Profile Philip Dunne , View ORCID Profile Stephen McSorley , View ORCID Profile Joanne Edwards doi: https://doi.org/10.1101/2025.05.03.25326857 Aula Ammar 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Aula Ammar For correspondence: aula.ammar{at}glasgow.ac.uk Amna Matly 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kimiya Kabiri Arani 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Scott Murray 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mayuri Hendricks 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alexander Winton 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Natalie Fisher 3 Patrick G Johnston Centre for Cancer Research, School of Medicine, Dentistry and Biomedical Sciences , Belfast, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Noori Maka 2 National Health Service Greater Glasgow and Clyde (NHSGGC) , Glasgow, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Gerard Lynch 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jennifer Hay 4 Human Biology Facility, The Francis Crick Institute , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mark Johnstone 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK 2 National Health Service Greater Glasgow and Clyde (NHSGGC) , Glasgow, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Philip Dunne 3 Patrick G Johnston Centre for Cancer Research, School of Medicine, Dentistry and Biomedical Sciences , Belfast, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Philip Dunne Stephen McSorley 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK 2 National Health Service Greater Glasgow and Clyde (NHSGGC) , Glasgow, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Stephen McSorley Joanne Edwards 1 Translational Cancer Pathology Group , Experimental Therapeutics, School of Cancer Sciences, University of Glasgow , UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Joanne Edwards Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Background Current British bowel cancer surveillance guidelines (BSG2020) rely on polyp size, number, histology and dysplasia grade to predict metachronous polyp risk. However, these criteria fail to accurately distinguish true high-and low-risk patients, resulting in either unnecessary surveillance or misclassification of patients at true risk of metachronous lesions. No molecular biomarkers have yet been integrated into risk stratification, limiting the potential for personalised surveillance strategies. We investigated ARID1A expression and copy number alterations (CNAs) in KMT2C, SMG1 and TRAF7 as predictive markers for colorectal metachronous lesions. Methods A total of n=1184 archival colorectal polyp tissue microarrays (TMAs) were analysed for ARID1A expression using immunohistochemistry (IHC). Genomic and transcriptomic characterisation of a subset of patients (n=623) was performed using somatic mutation profiling, CNAs and bulk RNA sequencing. Results ARID1A was identified as an independent predictor of metachronous disease. ARID1A protein expression correlated with ARID1A and KMT2D mutations. Patients with ARID1A mutations exhibited fewer CNAs than non-mutants. Among CNAs, KMT2C, SMG1 and TRAF7 were the most frequently altered genes and were predictive of metachronous polyp formation. Combining ARID1A expression with KMT2C, SMG1 and TRAF7 CNAs refined risk stratification within BSG2020-defined risk groups. Several pathways, including cell cycle regulation and DNA repair, differed between ARID1A-CNA risk groups. Conclusion: ARID1A expression, in combination with KMT2C, SMG1 and TRAF7 CNAs, presents a novel biomarker tool to enhance metachronous polyp risk stratification alongside the BSG2020 criteria, potentially improving personalisation of surveillance. Background Bowel cancer surveillance guidelines following polypectomy, established as part of the national bowel screening program, rely on basic pathological features to assess metachronous polyp or colorectal cancer (CRC) risk. These include polyp count, histological subtypes and dysplasia grade, as outlined by the British Society of Gastroenterology 2020 (BSG2020) guidelines ( 1 ). However, an audit we recently published revealed that 48% of patients classified as low risk later developed metachronous lesions, while 39% of high-risk patients did not ( 2 ). This suggests that incorporating molecular factors could improve risk stratification. Genetic studies have linked multiple mutations to metachronous or advanced adenoma risk (reviewed in ( 3 )), yet little is known about the mutational landscape of pre-malignant colonic polyps. A single cell-based study of pre-cancerous colonic polyps profiled mutations of 24 pre-cancerous lesions with adenomatous and serrated histology ( 4 ). A selected list of CRC driver mutations including APC , KRAS , TP53 , BRAF , PIK3CA , MSH3 , methylation genes ( KMT2C , KMT2D ) and chromatin remodelling genes ( ARID1A and ARID1B ) were retrieved from cBioPortal ( 5 ). Data suggested that methylation and chromatin remodelling pathways are altered in pre-malignant polyps. KMT2C and KMT2D mutations were present in 43% and 25% of the cohort. ARID1A and ARID1B were mutated in 20% of the patients. ARID1A and KMT2D completely co-existed whereas ARID1A and B did not ( Supplementary Figure 1a-b ), yet the impact of these mutational changes on future risk of colonic polyps or cancer remains unclear. Download figure Open in new tab Supplementary Figure 1. Data supporting the selection of ARID1A as a target in premalignant polyps and an overview of study datasets, copy number alteration analysis, ARID1A antibody validation and survival outcomes. (a-b) Oncoplots of CRC-related mutations in precancerous polyps from publicly available data and a bar-dumbbell plot of overlapping mutations [4]. (c) CONSORT diagram of training and test datasets, with bar graphs showing comparable percentages of clinicopathological criteria between the training and test datasets. (d) Diagram illustrating the interpretation of log2 values for copy number alterations of Cancer Plus Panel. (e) Validation of ARID1A antibody specificity using Western blotting (left-20 µg protein loaded per well; expected molecular weight ∼200kDa and 65 kDa for a truncated ARID1A form) and IHC staining in cell pellets of SW620, SW48 and SW837 cells (right). A negative control is also shown, omitting the primary antibody step. SW48 cells harbours an ARID1A deletion mutation, showed low ARID1A levels compared to wild-type SW620 and SW437 (f-g) Histograms of ARID1A epithelial percentages in the training and test datasets, respectively. (h-k) One-minus Kaplan-Meier survival curves for ARID1A high/low percentage groups based on the upper 75th percentile cutpoint and metachronous lesions ( h-i ) and advanced metachronous polyps ( j-k ) over 72 months of follow-up in the training and test datasets. (l-s) One-minus Kaplan-Meier survival curves for polyp location ( l-m ), sex ( n-o ), number of polyps ( p-q ) and dysplasia grade ( r-s ) in the training and test datasets, respectively. ARID1A is key for embryonic development as loss of ARID1A lead to early embryonic lethality ( 6 ). ARID1A plays a significant role in regulating gene expression by modulating chromatin structure. It is widely recognised as a tumour suppressor gene whose loss or mutation has been implicated in the development and progression of various cancers such as ovarian clear cell carcinoma ( 7 ). Despite the mutation rates of ARID1A across different cancer types ( 8 ) the prognostic significance of ARID1A protein is inconclusive. Many studies showed that loss of ARID1A expression is associated with poor survival ( 9 – 11 ). Moreover, it has been suggested that ARID1A co-mutations with other tumour suppressor gene such as TP53 or PTEN is required to trigger tumour progression ( 12 , 13 ). In contrast, loss of ARID1A protein expression is associated with favourable prognosis in cancers such as early-stage grade III endometrial cancer ( 14 ) lung cancer ( 15 ) and early or stage IV CRC ( 16 , 17 ). We hypothesise that ARID1A plays a critical role in polyp development and may serve as a predictive biomarker for metachronous disease. In this study, we investigated ARID1A expression in index polyps and evaluated its potential to improve CRC surveillance and guide post-polypectomy risk stratification. METHODS Patient cohort and TMA construction TMAs from retrospective formalin-fixed paraffin-embedded (FFPE) index colonic polyps were tested for ARID1A expression using the INCISE cohort ( 2 ), which includes the largest and/or advanced polyps (≥1cm) from patients aged 50–74 (median=63 years) ( Supplementary Figure 1c ). TMA samples were selected based on sufficient tissue availability, prioritising large polyps considered most clinically significant per BSG2020 guidelines. Four 0.6 mm cores per patient capturing tissue heterogeneity were included in TMAs. Data (n=1184) were split into training (n=819) and test (n=365) sets. Table 1 summarises the clinicopathological criteria. The median follow-up was 36 months (range: 6–72). Patients were censored at 6 years. Ethical approval was granted through the INCISE project (GSH/20/CO/002) ( 2 ). The study follows REMARK guidelines ( 18 ). View this table: View inline View popup Download powerpoint Table 1. clinical and pathological criteria of the training and test datasets Western blotting (WB) Primary antibody specificity was confirmed prior to immunohistochemistry using WB (supplementary methods; Supplementary Figure 1d ). Based on information in DepMap portal ( https://depmap.org/portal ), SW48 cell line harbours a deletion mutation in ARID1A whereas SW620 and SW837 are wildtype. Cell lysates were prepared from SW48, SW620 and SW837 using RIPA buffer supplemented with protease and phosphatase inhibitors. Pellets (∼1.5 × 10D cells) were lysed in 150 μL buffer via repeated syringe shearing, incubated on ice, and centrifuged at 14000 g for 5 min to collect supernatants. Protein concentrations were quantified using the Bradford assay with bovine serum albumin (BSA) standards. Lysate concentrations were normalised to 1 mg/mL, mixed with 4X loading dye (Nu-Page, Invitrogen) supplemented with beta-mercaptoethanol, boiled for 5 min and stored at -20°C until use. 20 ug of individual protein lysates were separated via Bio-Rad Mini-Protean TGX 4-20% pre-cast gels (#4561093, BioRad), transferred to nitrocellulose membranes, and verified with Ponceau S staining to ensure successful transfer. Membranes were blocked with 5% BSA for 1h at room temperature (RT), then incubated with primary antibodies for ARID1A and beta-actin (loading control, 1:5000, Cell Signalling) at 4◦C overnight followed by appropriate HRP-conjugated secondary antibodies for 1h at RT. Detection was carried out using ECL substrate (#2106, Thermofisher) and visualised using a BioRad Chemi-Doc system. Immunohistochemistry ARID1A protein expression was evaluated by immunohistochemistry using 2.5μm sections of cell pellets, TMAs and selected 4μm whole tissue sections. ARID1A staining was performed with normal colon and CRC tissues serving as positive controls. Cell pellets, TMA and full section slides were de-waxed and antigen retrieved using a PT-module (Agilent) and dewax-antigen retrieval-H (Epredia). Ultravision Quanto detection system (Epredia) was used following the manufacturer’s instruction. In brief, endogenous peroxidase was blocked with 3% H2O2 for 10 min followed by a protein blocking step (8 minutes). ARID1A (polyclonal rabbit antibody; cat#12354; 1:1500, Cell Signalling Technology) was applied to the tissue for overnight at 4D°C. Tissues were treated with an amplifier for 10 min followed by an HRP polymer step (10 min). Immunohistochemical reactions were visualised with 3,3′-diaminobenzidine and counterstained with haematoxylin Gill III formula (Leica Biosystems), dehydrated in ethanol and xylene followed by mounting with ClearVue (Epredia). Slides were scanned at 20x magnification using a Hamamatsu S60 scanner. Digital pathology analysis for ARID1A All TMA images were scored using QuPath (v0.5.0) ( 19 ). First, the TMA dearray function was used to allow mapping cores to patient IDs. Staining was manually checked to identify high, moderate and low expression of ARID1A, and representative examples were used to train QuPath to detect different staining intensities and set thresholds as part of the positive cell detection function. Histoscores (H-scores; 0-300) were calculated by QuPath. An object classifier segmented epithelial and lamina propria cells and ARID1A histoscores and percentages were exported. Cores with >100 adenoma cells were considered assessable. 10% of scores manually validated. DNA extraction, sequencing and somatic mutational analysis Genomic DNA was extracted from 10µm tissue sections (n=623), using the Maxwell RSC instrument (Promega) and samples were assessed for quality using Qubit and TapeStation. DNA Sequencing and variant calling was performed by the Genomic Innovation Alliance (Glasgow, UK). Samples with low DNA concentration (<10Dng/µl) or poor integrity (DIN <3.0) were excluded. Libraries were prepared using the Agilent SureSelect XT2 HS2 protocol with Cancer Plus panel enrichment and sequenced on the Illumina NovaSeq 6000, aiming for 500× mean coverage. Next-generation sequencing (NGS) data processing was performed using the HOLMES pipeline v1.3.1. FASTQ files were aligned to hg38 with BWA v0.7.15, SNVs and indels were called using deepSNV/Shearwater and Pindel, structural variants with Brass, and copy number alterations with geneCN. Variants were annotated with CAVA. Samples were analysed in tumour-only mode using a panel of normals from 21 healthy tonsil samples (unmatched normal) to filter artifacts and germline variants. Variants underwent quality filtering based on allele frequency, read depth, mapping quality, and presence in public databases. Sequencing QC thresholds included: ( 1 ) mean coding depth >50×, ( 2 ) insert size >120 bp, ( 3 ) non-reference read rate <3.5%, ( 4 ) discordant pairs 20%. Two VCF files per sample were generated: one containing all somatic variants (for TMB estimation) and one filtered for coding variants (for reporting). Sex chromosomes were excluded from the analysis. Somatic mutation analysis was performed using Maftools (v2.18.0) in R. ARID1A mutations were visualised with Mutation Mapper on cBioPortal and compared to OncoKB list ( 20 ) to assess pathogenic/oncogenic mutations. Log2 CNAs for somatic mutations were used ( Supplementary Figure 1e ). Statistical significance for chi-squared tests was set at padj<0.05. Transcriptomics analysis Templated Oligo assay with Sequencing (TempO-Seq) readout (Biospyder Technologies, USA) whole transcriptome v2.0 panel profiling was performed on 4µm full sections ( 21 ). Quality control and preprocessing included the following steps. Probe variance across all samples was calculated using the sapply function. Probes with a variance less than the 25th percentile were removed (n = 5640 probes). Samples with a low read count (< 95% confidence interval [<2.28×106 counts]) were identified (stats v4.2.3) and removed. Samples with incomplete batch information were removed (n =5 samples). The counts were batch corrected using ComBat_seq (sva v3.46.0). Probe IDs were mapped to gene symbols with duplicated genes collapsed using MaxMeans (WGCNA v1.72-1), resulting in 14,993 genes. Following outlier review n=20 samples were removed. Quality control and preprocessing was carried out in RStudio (R4.2.3). Counts per million normalisation and log2+1 transformed values were generated. Single sample Gene Set Variation Analysis (ssGSVA) method ( 22 ) was implemented in R using MsigDB c5 Gene Ontology (GO) biological process (BP), molecular functions (MF) and cellular components (CC) pathways as a reference with a range of 10-500 genes/pathway. Statistical analysis ARID1A histoscores were dichotomised into low/high groups using a cutpoint generated by the R-Survminer package. Quantile regression estimated ARID1A percentage cutpoints (25 th , 50 th , 75 th percentiles) against metachronous polyp outcomes. IHC results were analysed by correlating ARID1A expression (low/high) with clinicopathological and mutational data using Chi-squared tests. Kaplan-Meier survival curves were plotted and group differences were assessed by log-rank tests. Cox-regression multivariate analysis, adjusted for confounding clinical/pathological factors, was performed. Statistical significance was set at p<0.05, using IBM SPSS Statistics (ver28). RESULTS ARID1A expression in colonic polyp tissue and its association with clinicopathological criteria WB and ARID1A staining in ARID1A-mutant SW48 vs wildtype SW620 and SW837confirmed the specificity of the ARID1A antibody ( Supplementary Figure 1e ). In tissues, ARID1A exhibited nuclear staining in epithelial and lamina propria cells, with heterogeneous expression in epithelial cells ranging from weak to intense ( Figure 1a-b ). A mosaic pattern and partial or complete crypt loss were observed ( Figure 1c-d ). Figures 1e-h show representative controls and Figures 1i-j demonstrate ARID1A detection using QuPath. Download figure Open in new tab Figure 1. IHC of ARID1A in colonic premalignant polyps. (a-d) Weak, strong, mosaic and crypt-loss expression in polyps, scale bars: 100 um (e-f) Negative and positive control staining, scale bars: 100 um (g-h) ARID1A-negative (-ve) and ARID1A-positive (+ve) staining in CRC controls. (i-j) QuPath positive cell detection intensities and adenoma/lamina propria classifiers. (k-l) Histograms of ARID1A histoscores in the training and test datasets. (m) Generation of the ARID1A cutpoint using the survminer package in R in the training set. (n-q) One-minus Kaplan-Meier survival curves for ARID1A high/low groups with regards to metachronous lesions and advanced metachronous polyps over 72 months of follow-up in the training and test datasets. (p-q) ARID1A status is significantly associated with future polyp outcomes in the training and test datasets (p0.05). (r-s) Sensitivity and specificity of the BSG2020 risk score, ARID1A histoscores and ARID1A percentages in the training and test datasets. Bars represent confidence intervals for sensitivity and specificity. Given the heterogeneity of expression, histoscores were initially employed. Median H-scores were 182.1 (range 22.2-264.9) in the training dataset and 185.9 (16-263.5) in the test dataset ( Figure 1k-l ). A strong correlation was observed between ARID1A expression in epithelial and lamina propria cells (training dataset: Pearson r=0.914, p<0.0001, n=819; test dataset: r=0.912, p<0.001, n= 365), indicating broad expression across polyp tissue compartments. An ARID1A histoscore threshold of 147.49 ( Figure 1m ) dichotomised patients into low and high ARID1A expression groups. Low ARID1A was observed in 19.4% (n=159) and 17.5% (n=64) of cases in the training and test datasets, respectively. Then to assess ARID1A loss, the percentage of positive epithelial cells was also evaluated ( Supplementary Figure 1f-g ). Distribution of ARID1A percentages (ARID1A_Per) was skewed towards high expression. Quartile values were similar in both datasets: 25 th , 50 th and 75 th percentiles were 84.7%, 92.7% and 96.7% in the training and 85.5%, 93.2% and 96.7% in the test datasets, respectively. Quantile regression revealed significant differences in the 75th percentile between the “no future polyp” and “future polyp” groups (Supplementary Table1). Patients were dichotomised into high (≥75 th percentile of ARID1A_Per) and low (< 75 th percentile of ARID1A_Per) for further analyses. Chi-squared tests revealed significant associations between ARID1A histoscore groups and polyp location (p=0.002), future polyp status (p=0.008) and advanced future polyps (p=0.019) in the training dataset. In the test dataset, associations were found with age (p=0.021), polyp location (p=0.031), future polyp status (p=0.004) and advanced future polyps (p=0.044) ( Table 2 ). View this table: View inline View popup Download powerpoint Table 2. Chi-squared test results of epithelial ARID1A histoscores with clinicopathological criteria No significant associations were found with polyp histology, number, or dysplasia grade for ARID1A histoscores (all p>0.05; Table 2 ) and ARID1A_Per (Supplementary Table2). ARID1A is predictive of metachronous polyp outcome in univariate and multivariate analysis Survival analysis showed that high ARID1A histoscores were significantly associated with future polyp development in both the training and test datasets (p=0.008 and p=0.003, respectively; Figure 1n-o ). Elevated ARID1A_Per were linked to metachronous polyps in both datasets (p=0.027 and p<0.001, respectively; Supplementary Figure 1h-i ). The predictive value of ARID1A histoscores for advanced future polyp status was assessed ( Figure 1p-q ). Although a trend of separation between low and high ARID1A histoscore groups was observed, statistical significance was not reached in either dataset. High ARID1A_Per in predicted advanced future status in the training dataset (p=0.023) but not in the test dataset (p=0.090; Supplementary Figure 1j-k ). The specificity and sensitivity of ARID1A as a biomarker was also investigated. In both datasets, BSG2020 score sensitivity was 73.3% (training) and 76.9% (test) ( Figure 1r–s ). ARID1A histoscores showed higher sensitivity (83.8% and 87.5%), while ARID1A_Per had lower sensitivity (56.6% and 68.3%). Specificity for BSG2020 was 42.6% (training) and 38.9% (test), while ARID1A histoscores had lower specificity (23.5% and 42.2%) and ARID1A_Per had higher specificity (49.9% and 55.4%) than BSG2020-score. ARID1A histoscores identified positive outcomes, while ARID1A_Per accurately identified negative outcomes, suggesting that both methods are complementary. It is therefore of interest to consider both scoring methods. Cox regression multivariate analysis was conducted using two models. ARID1A, sex and polyp location were included either with the BSG2020 risk score (Model 1) or with components of the BSG2020 score (i.e., number of polyps and dysplasia grade; size was excluded because all polyps were ≥1 cm). Kaplan-Meier curves and log-rank tests identified significant predictive factors (p < 0.05) in the training dataset, except for dysplasia grade (p = 0.499; Supplementary Figure 1l–s ). In the test dataset, polyp location and number of polyps remained significant predictors. In Model 1, ARID1A histoscore was an independent predictor of metachronous disease in both datasets (training HR=1.362, p=0.016; test HR=1.924, p=0.002; Table 3 ). In Model 2, ARID1A histoscore remained an independent predictor in both datasets (training HR=1.372, p=0.014; test HR=1.893, p=0.003; Table 4 ). High ARID1A_Per also predicted metachronous disease in both models (Supplementary Table 3 & 4). View this table: View inline View popup Download powerpoint Table 3. Multivariate Cox-regression analysis of sex, location, BSG2020 score and ARID1A histoscore groups in the training and test datasets (model 1) View this table: View inline View popup Download powerpoint Table 4. Multivariate Cox-regression analysis of sex, location, number of polyps, dysplasia grade and ARID1A histoscore groups in the training and test datasets (model 2) ARID1A mutations in pre-cancerous polyps is associated with low ARID1A protein expression but does not predict metachronous lesion ARID1A was among the 11 th most frequently mutated genes in pre-cancerous colonic polyps ( Figure 2a ). Mutational frequencies of genes such as APC , KRAS , MSH3 and TP53 were similar across polyps with high and low ARID1A histoscores ( Figure 2a ). However, mutations in KMT2D and ARID1A were significantly more frequent in the ARID1A-low group (12% vs. 24% and 11% vs. 20%, respectively). Download figure Open in new tab Figure 2. mutational profiling of premalignant colonic polyps and a description of ARID1A mutation in polyps with comparison of ARID1A protein expression in relation to mutation status. (a) Oncoplot comparing top mutations in colonic polyps between ARID1A high-and low-expression groups. The most frequently mutated genes preceding ARID1A were APC, KRAS, MSH3, SOX9, KMT2C, ZFHX3, TP53, MGA, TCF7L2, FAT1 and KMT2D. MT2D and ARID1A mutation rates were doubled in the ARID1A low-expression group. (b-c) Box plots showing a significant decrease in ARID1A protein expression in the presence of KMT2D (p = 0.021) or ARID1A mutations (p < 0.001). (d) Lollipop plots of ARID1A mutations in polyps, colon adenocarcinoma and rectal carcinoma across 20 exons. Arrows indicate pathogenic/oncogenic mutations based on the OncoKB™ database. (e) Percentage of the most common substitution mutations in polyp tissues, with C>T transitions being the most frequent (47.95%). (f) Types of ARID1A mutations in polyps, showing truncating mutations as the most frequent (48%). (g) Bar graphs of mutation types across ARID1A exons, highlighting exon 1, 18 and 20 as the most frequently mutated. (h-j) Representative images of ARID1A expression in full tissue sections, showing high (h), mixed (i) and loss (j) expression patterns. (k) Waffle plots showing the percentage of ARID1A loss patterns in relation to future polyp status (no vs. yes) and ARID1A mutation status (non-mutant vs. mutant). ARID1A loss patterns were more frequent in patients without future polyps and in those with ARID1A mutations. (l) Distribution of full-section histoscores based on advanced future polyp status (p>0.05). (m) Visualisation of ARID1A histoscores in full sections using a clustered box plots analysed by expression pattern and future polyp outcome. Negative associations were found between ARID1A-histoscores groups and KMT2D (p=0.002, OR=0.543) and ARID1A mutations (p=0.005, OR=0.506) ( Table 5 ). Raw ARID1A histoscores were lower in KMT2D - and ARID1A -mutant cases (p=0.021 and p<0.001, respectively; Figure 2b-c ). It has been reported that Methylation-related alterations and ARID1A mutations may drive ARID1A protein loss in polyps ( 23 ). No other mutations significantly associated with ARID1A protein levels (data not shown). View this table: View inline View popup Download powerpoint Table 5. Chi-squared associations between ARID1A histoscore and ARID1A/KMT2D mutation status Before multiple testing correction, ARID1A mutations were linked to KRAS and TP53 (p=0.010, p=0.013; Supplementary Table 5), while other mutations showed potential co-occurrence ( APC-SOX9 , KMT2C-KMT2D , KMT2C-MGA , KRAS-TCF7L2 ; Supplementary Table 5). However, it is unclear if these mutations are polyp-restricted or widespread. ARID1A and KMT2D mutations did not predict metachronous polyp/CRC development ( Supplementary Figure 2a-b ), nor were individual or paired mutations associated with metachronous polyps (all p>0.05; data not shown). Download figure Open in new tab Supplementary Figure 2. Supplementary data for mutational analysis, full section data and copy number alterations. (a–b) Kaplan-Meier survival curves for ARID1A and KMT2D mutations in polyps, showing no significant predictive value (p= 0.721 and p= 0.530, respectively). (c-d) Distribution of ARID1A histoscore and ARID1A_Per across different exons, showing no significant difference in total ARID1A expression (p= 0.089 and p= 0.114, respectively). (e) Box plots showing the distribution of epithelial ARID1A_Per in full sections based on advanced future polyp outcome. (f) Visualisation of ARID1A percentages in full sections using clustered box plots, analysed by expression pattern and future polyp outcome. (g) Total CNAs of Cancer Plus Panel genes across four groups (ARID1A low/no future polyp, ARID1A low/future polyp, ARID1A high/no future polyp and ARID1A high/future polyp), showing a trend toward lower CNAs in patients with low ARID1A (ANOVA, p = 0.07). (h) Total CNAs of Cancer Plus Panel genes across four ARID1A_Per /future polyp groups (ARID1A_Per low/no future polyp, low/future polyp, high/no future polyp and high/future polyp) showing lower CNAs in patients with low percentage of ARID1A (ANOVA, p = 0.05). (i) Log2 values of CNAs for ARID1A in all patients, with no loss or deletion observed in the cohort . (j–m) Distribution of ARID1A histoscores in relation to CNAs in CDKN2C, KMT2C, MET and SMG1, with no significant differences observed (all ANOVA p> 0.05). Previous studies have shown complex role of ARID1A in tumorigenesis, particularly in the context of APC mutations where APC germline mutations mice with ARID1A mutations formed less intestinal tumours compared to APC mutations alone ( 24 ) and in ovarian cancer where ARID1A co-mutations with APC/PTEN prolonged survival ( 25 ). Further studies are needed to assess genetic, epigenetic and microenvironmental factors in disease progression. ARID1A mutations were distributed across all 20 exons, with only six identified as pathogenic ( Figure 2d ). Analysis of ARID1A mutations in colon and rectal adenocarcinoma revealed no frequently pathogenic mutations. However, the most affected exons (20 and 15) were similar to those harbouring pathogenic ARID1A mutations in polyps. The most common mutations were C>T (47.96%) and G>A (16.33%) transitions ( Figure 2e ). Truncation mutations (48%) were most frequent, followed by missense (23%) and in-frame shift mutations (12%) ( Figure 2f ). No significant difference in ARID1A protein expression was observed based on exon location (ANOVA, p>0.05; Supplementary Figure 2c-d ). Full section ARID1A staining reveals a loss pattern that correlates with mutations and reduced future polyp risk ARID1A staining was performed on 94 tissue sections (45.7% future polyp/CRC, 24.5% mutations) which represented a subset of the TMA-cohort. Variation of ARID1A staining was present across sections with high intensity, mosaic patterns, or focal clonal loss in individual crypts or large polyp areas ( Figure 2h-j ). Patients were classified by ARID1A loss patterns (no loss vs. loss pattern). The loss pattern was more frequent in ARID1A -mutant polyps and patients with no future polyps ( Figure 2k ). A trend of higher ARID1A epithelial histoscores and ARID1A_Per was observed in patients who developed advanced future polyps ( Figure 2l , Supplementary Figure 2e ), but this was not statistically significant, likely due to the sample size. Interestingly, patients with ARID1A loss patterns who developed metachronous lesions had higher histoscores than those without future polyps ( Figure 2m , Supplementary Figure 2f ). These findings highlight the complexity of ARID1A expression analysis and suggest the need for further investigation. Due to sample size, no survival analysis was performed. Copy number alterations signify metachronous polyp status and ARID1A mutations may indicate lower total CNA in pre-malignant colorectal polyps Total CNAs significantly differed between patients with/without future polyps (p=0.0028; Figure 3a ). Regression analysis showed that higher CNAs increased the likelihood of developing future polyps (OR=1.024, 95%CI=1.008-1.041), indicating a 2.4% increase in the chance of future polyp development for every unit increase in CNAs. Download figure Open in new tab Figure 3. Copy number alterations analysis in pre-malignant polyps. (a) Line graphs showing differences in total CNAs of Cancer Plus Panel genes per sample based on metachronous polyp outcome, with higher total CNAs in patients who develop future polyps (unpaired t-test, p = 0.0028). (b) Total CNAs of Cancer Plus Panel genes across four groups: mutant ARID1A/no future polyp, mutant ARID1A/future polyp, non-mutant ARID1A/no future polyp and non-mutant ARID1A/future polyp, showing lower total CNAs in patients with ARID1A mutations (ANOVA, p = 0.045). (c) Bar graphs showing the percentage of genes with CNAs across chromosomes 1–22, with chromosomes 1, 7 and 16 having the highest percentage of alterations. (d) Most altered genes in chromosomes 1, 7 and 16. CDKN2C predominantly exhibited loss/deletions, whereas ELF3, MET, KMT2C, SMG1 and TRAF7 showed gains/amplifications. (e) The percentage of patients with CNAs in CDKN2C, ELF3, MET, KMT2C, SMG1 and TRAF7. (f-g) Box plots showing significant differences in ARID1A histoscores between ELF3 and TRAF7 alterations (ANOVA, p < 0.001 for both), with significant pairwise comparisons (p < 0.05 indicated by *). (h ) Sensitivity and specificity analysis of KMT2C, SMG1 and TRAF7, which are associated with metachronous polyp outcomes. CNAs were compared across four groups: non-mutant ARID1A /future polyp (no), non-mutant ARID1A /future polyp (yes), mutant ARID1A /future polyp (no) and mutant ARID1A /future polyp (yes) and significant differences were found ( Figure 3b , p=0.045). Additionally, low ARID1A protein expression correlated with lower CNAs, though this was not statistically significant for histoscore (p=0.07) but significant for percentage comparisons (p=0.049), suggesting that ARID1A mutations may indicate CNAs levels in pre-malignant polyps. Top CNA genes KMT2C , SMG1 and TRAF7 predict metachronous disease status To explore the potential impact of CNAs on ARID1A expression, we analysed gene CNAs within the patient cohort. The most common CNAs were observed on chromosomes 1, 7 and 16, with the highest frequency on chromosome 1 (10.8%, Figure 3c ). On chromosome 1, CDKN2C and ELF3 were the most altered genes, whereas MET and KMT2C were frequently mutated on chromosome 7 and SMG1 and TRAF7 on chromosome 16 ( Figure 3d ). ARID1A alterations were rare, with only two patients showing gain/amplification ( Supplementary Figure 2i ) showing no role of ARID1A-CAN in its expression in polyp tissues. The proportion of patients with CNAs in CDKN2C , ELF3 , MET , KMT2C , SMG1 and TRAF7 was 38.6%, 27%, 28.8%, 21.5%, 66.3% and 13.4%, respectively ( Figure 3e ). CDKN2C alterations primarily consisted of loss/deletion whereas ELF3, MET, KMT2C, SMG1 and TRAF7 were gained/amplified. To test if ARID1A expression was driven by CNAs changes we assessed ARID1A protein expression across CDKN2C, ELF3, MET, KMT2C, SMG1 and TRAF7 mutations. Significant associations were found between ARID1A raw histoscores and ELF3 or TRAF7 ( Figure 3f-g , ANOVA p<0.001) or between ARID1A protein groups and ELF3 (p<0.001, OR= 0.436) and TRAF7 (p<0.001, OR=4.531; Table 6 ). View this table: View inline View popup Download powerpoint Table 6. Chi-squared test results of epithelial ARID1A histoscores with top mutated genes according to copy number alterations. However, no significant differences were seen in ARID1A expression and CNAs in CDKN2C, MET, KMT2C, or SMG1 ( Supplementary Figure 2j-m , all ANOVA p>0.05). Similar results were observed with ARID1A_Per (Supplementary Table 6). A previous study showed that CNAs in CRC cell lines do not drive changes in ARID1A protein expression whereas hypermethylation in ARID1A promoter does ( 23 ). KMT2C, SMG1 and TRAF7 alterations were significantly associated with future polyp development (all p<0.001, Table 6 ), with ORs of 2.022, 2.343 and 2.57, respectively, indicating a higher risk of future polyps. Sensitivity and specificity evaluations showed SMG1 had the highest sensitivity (70%), followed by KMT2C (26%) and TRAF7 (17.5%). TRAF7 exhibited the highest specificity (92.4%), followed by KMT2C (85%) and SMG1 (45%) ( Figure 3h-i ). A combined analysis of these mutations, along with ARID1A status, was performed to assess their collective predictive value for metachronous disease. Combining ARID1A status with KMT2C, SMG1 and TRAF7 CNA improves patient stratification Combining ARID1A status with KMT2C, SMG1, or TRAF7 alterations improved patient stratification for metachronous polyp risk. Binary combinations of ARID1A status and individual gene alterations resulted in four groups: ( 1 ) ARID1A low/no alteration, ( 2 ) ARID1A low/gain or amplification, ( 3 ) ARID1A high/no alteration and ( 4 ) ARID1A high/gain or amplification. Patient stratification was significantly enhanced ( Figure 4a-c , all p<0.001). This was evident in the cumulative percentage of patients developing metachronous disease, with lower percentages of patients in the low ARID1A/no alteration group compared to the rest. For example, 21% of patients with low ARID1A and no KMT2C alterations developed future disease within 36 months, compared to 40% in the other groups (life tables for all groups are shown in Supplementary Figure 3a-c for KMT2C, SMG1 and TRAF7). These results suggest that combining ARID1A status with KMT2C, SMG1 and TRAF7 mutations enhances risk stratification, especially for low BSG2020 risk patients ( Figure 4d-f ) and high-risk patients ( Figure 4g-i ). Download figure Open in new tab Supplementary Figure 3. Survival KM curves for ARID1A_Per/CNAs groups in terms of metachronous lesion development in the total cohort and BSG2020 low/high-risk subgroups. (a–c) Detailed life tables for ARID1A/KMT2C, ARID1A/SMG1 and ARID1A/TRAF7 combinations across different groups at 12–72 months of follow-up based on ARID1A histoscores. (d–f) One-minus KM survival curves for ARID1A_Per/KMT2C/SMG1/TRAF7 (groups 1–5) and future polyp outcomes in the total cohort, low BSG2020 and high BSG2020 risk groups (all p < 0.001). Group 1: ARID1A_Per low/no alteration; Group 2: ARID1A_Per low with one CNA alteration or ARID1A_Per high with no alteration; Group 3: ARID1A_Per high with one CNA alteration; Group 4: ARID1A_Per high with two CNA alterations and Group 5: ARID1A_Per high with three CNA alterations. (g–i) One-minus KM survival curves for ARID1A_Per/KMT2C/SMG1/TRAF7 (groups 1–5) and advanced future polyp outcomes in the total cohort, low BSG2020 and high BSG2020 risk groups (all p < 0.001). Life tables show the percentage of patients developing a future polyp at 1–6 years of follow-up within groups 1–5. Download figure Open in new tab Figure 4. Survival KM curves for ARID1A/CNAs groups in terms of metachronous lesion development in the total cohort and BSG202 low/high risk subgroups. (a–c) One-minus KM survival curves for ARID1A/KMT2C, ARID1A/SMG1 and ARID1A/TRAF7 combinations in the total cohort (all p= 0.001). Life tables show the percentage of patients with future polyp development at 1–6 years of follow-up. (d–f) One-minus KM survival curves for ARID1A/KMT2C, ARID1A/SMG1 and ARID1A/TRAF7 combinations in the low BSG2020 risk group (all p= 0.001). Life tables show the percentage of patients with future polyp development in low ARID1A/no KMT2C alteration group vs all other groups at 1–6 years of follow-up in the low BSG2020 risk patients. (g–i) One-minus KM survival curves for ARID1A/KMT2C, ARID1A/SMG1 and ARID1A/TRAF7 combinations in the high BSG2020 risk group (p< 0.001, p= 0.001 and p= 0.001, respectively). Life tables show the percentage of patients with future polyp development in low ARID1A/no SMG1 alteration group vs all other groups at 1–6 years of follow-up in patients with high BSG2020 risk score. (j–l) One-minus KM survival curves for ARID1A/KMT2C/SMG1/TRAF7 (groups 1–5) and future polyp outcome in the total cohort, low BSG2020 and high BSG2020 risk groups (all p< 0.001). Life tables show the percentage of patients developing a future polyp in low ARID1A/no TRAF7 alteration group vs all other groups at 1–6 years of follow-up in group 1 vs all other groups. (m–o) One-minus KM survival curves for ARID1A/KMT2C/SMG1/TRAF7 (groups 1–5) and advanced future polyp outcome in the total cohort, low BSG2020 and high BSG2020 risk groups (p < 0.001, p = 0.029 and p < 0.001, respectively). Group 1: ARID1A low/no alteration; Group 2: ARID1A low with one CNA alteration or ARID1A high with no alteration; Group 3: ARID1A high with one CNA alteration; Group 4: ARID1A high with two CNA alterations and Group 5: ARID1A high with three CNA alterations. Life tables show the percentage of patients developing a future polyp at 1–6 years of follow-up. We then examined the collective impact of CNA changes in KMT2C, SMG1 and TRAF7 with ARID1A status on predicting metachronous polyp risk. Patients were categorised into five groups: Group 1: ARID1A low/no alteration; Group 2: ARID1A low with one CNA alteration or ARID1A high with no alteration; Group 3: ARID1A high with one CNA alteration; Group 4: ARID1A high with two CNA alterations and Group 5: ARID1A high with three CNA alterations. Results showed improved patient stratification based on these combinations ( Figure 4j-l , p<0.001), independent of the BSG2020 risk score ( Table 7 ). View this table: View inline View popup Download powerpoint Table 7. Multivariate Cox-regression analysis of ARID1A, KMT2C, SMG1 and TRAF7 combined groups using two cox regression models. Pairwise comparisons ( Table 8 ) reinforce the value of combining CNAs with ARID1A status in risk prediction across the entire cohort and BSG2020 subgroups. All higher alteration groups (especially those with ≥2 CNAs) had significantly higher polyp risk compared to the Group1. Similar results were observed for ARID1A_Per/CNA combined groups ( Supplementary Figure 3d-f ; Supplementary Table7). View this table: View inline View popup Download powerpoint Table 8. Log-rank P values of pairwise comparisons of metachronous polyp outcome among ARID1A histoscore/CNA alterations study groups in the total cohort and low and high BSG2020 risk subgroups. Integration of ARID1A and CNAs for predicting advanced polyp risk Patient stratification for advanced metachronous polyps differed significantly across ARID1A and CNAs combination groups ( Figure 4m ). Within 36 months, 22% of patients with ARID1A high and KMT2C, SMG1 and TRAF7 alterations developed advanced polyps, compared to 8% in the low ARID1A/no alteration group. Similar trends were observed in low and high BSG2020 risk groups ( Figure 4n-o ). Similar results were observed for ARID1A_Per combined with CNAs ( Supplementary Figure 3g-I , all p=0.001). HR of developing a future polyp or an advanced future polyp at 36 and 60 months are detailed in Tables 9 and 10 , respectively. The hazard of developing a future polyp increases progressively from Group 1 to Group 5 at both 36 and 60 months of follow-up. Compared to Group 1 (reference group), higher-risk groups (i.e. Groups 3, 4 and 5) show significantly elevated HRs with Group 5 having the highest risk (HR=3.97 at 36 months and HR=5.36 at 60 months, P ≤0.001). This trend suggests a cumulative effect of ARID1A expression and CNA alterations on polyp risk over time. View this table: View inline View popup Download powerpoint Table 9. Hazard ratio of developing a future polyp at 36 and 60 months using ARID1A and CNA combined groups in univariate analysis. View this table: View inline View popup Download powerpoint Table 10. Hazard ratio of developing an “advanced” future polyp at 36 and 60 months using ARID1A and CNA combined groups in univariate analysis The risk for developing advanced polyps also increases across the groups, though with more variability and wider confidence intervals. While Group 2 shows no significant association, Groups 3, 4 and especially Group 5 exhibit elevated HRs at 60 months, with Group 5 reaching statistical significance (HR=4.767, P =0.002). At 36 months, only Group 5 shows a statistically significant risk increase ( P =0.026), indicating that the combined alterations may have a stronger predictive value for advanced lesions over longer follow-up periods. Top activated and repressed GO pathways in pre-cancerous polyp tissue Cohort To explore the underlying biology among the ARID1A/CNAs groups, we performed pathway enrichment analysis using ssGSVA for GO pathways. Signature scores for a given gene set indicate how its expression differs from overall gene expression, calculated using the GSVA ranking method ( 22 ). The distribution of ssGSVA scores for GO-BP, GO-CC and GO-MF categories showed that some pathways have signature scores highly different from the overall gene expression ( supplementary Figure 4a ). We then explored the top activated and most repressed pathways in the patient cohort, across ARID1A/CNA groups, using the 10% and 90% percentile thresholds of median values across all patients (Supplementary file 1, Supplementary Figure 4b-d ). The thresholds were selected based on the assumption that enrichment scores of gene sets are ranked higher or lower than the rest of genes in the cohort making them interesting for further analysis. Download figure Open in new tab Supplementary Figure 4. Heatmaps of most activated and inhibited pathways of GO-BP, GO-MF and GO-CC in the cohort and multiple comparisons of pathways in terms of future polyp development. (a) Violin plots of GO-BP, GO-MF and GO-CC pathway enrichment scores across all patients. The median signature score of each pathway across all samples was calculated and the median values were used to generate the violin plots (b-d) Heatmaps of the top activated and repressed pathways in the total cohort based on the 10th and 90th percentile cutoffs in GO-BP (a) , GO-MF (b) and GO-CC (c) , showing ssGSVA across all patient samples. (panel e) Violin plots of selected GO-BP pathways among groups 1–5 in relation to metachronous polyp outcome. Comparisons included group 1 with/without a metachronous polyp and groups 2–5 collectively with/without a future polyp. No statistically significant associations were observed between pathway enrichment scores and group comparisons in terms of future polyp development. (panel f) Violin plots of selected GO-MF pathways among groups 1–5 in relation to metachronous polyp outcome. Comparisons included group 1 with/without a metachronous polyp and groups 2–5 collectively with/without a future polyp. No statistically significant associations were observed between pathway enrichment scores and group comparisons in terms of future polyp development. In the top activated and inhibited GO-BP pathways ( Figure 5a ), significant differences were observed between groups 2-5 vs group 1, as assessed by ANOVA multiple comparisons ( Figure 5 -panel b). GO-MF pathways ( Figure 5c and panel d) also showed significant differences between groups 2-5 vs group 1. However, differences in GO-CC pathways expression did not reach statistical significance ( Figure 5e-f ). Significant pathways included DNA damage repair, histone H3_K4 methylation, metabolic and signalling pathways and immunological pathways. Download figure Open in new tab Figure 5. Transcriptomic analysis of polyps for ARID1A/KMT2C/SMG1/TRAF7 combination groups (groups 1-5). (a) Heatmap showing expression of the most activated and repressed GO-BP pathways in groups 1-5, displayed as normalised values calculated as the ratio of pathway expression out of total expression. (panel b) Violin plots showing enrichment scores of significantly different GO-BP pathways among groups 1-5, with multiple comparisons of groups 2-5 vs group 1. (c) Heatmap showing expression of the most activated and repressed GO-MF pathways in groups 1-5, displayed as normalised values calculated as the ratio of pathway expression out of total expression. (panel d) Violin plots showing enrichment scores of significantly different GO-MF pathways among groups 1-5, with multiple comparisons of groups 2–5 vs group 1. (e) Heatmap showing expression of the most activated and repressed GO-CC pathways in groups 1–5, displayed as normalised values calculated as the ratio of pathway expression out of total expression. (panel f) Violin plots showing enrichment scores of significantly different GO-CC pathways among groups 1–5, with multiple comparisons of groups 2–5 vs group 1. Distribution of enrichment scores per pathway for all patients is also plotted (grey) to visualise distribution in all patients (panels b, d and f). Despite the involvement of distinct molecular pathways, their expression did not appear to directly associate with metachronous polyp outcome ( Supplementary Figure 4 , panels d-e). Discussion In this study, we demonstrated that ARID1A protein expression and CNAs in KMT2C, SMG1, and TRAF7 can be employed as potential biomarkers for predicting metachronous polyp risk in large colonic polyps excised through the Scottish bowel cancer screening programme ( 2 , 26 ). The current BSG2020 stratification tool often misclassifies risk, particularly for advanced polyps, as it only incorporates polyp number, size, histology and dysplasia without molecular classifiers( 1 ). Although ARID1A mutations occur in approximately 20% of pre-malignant colorectal polyps( 4 ), their predictive significance remains unclear. This study is the first to report that low ARID1A expression is associated with favourable outcomes for metachronous lesions, independent of the BSG2020 risk score. Similar findings have been observed in CRC and endometrial cancer ( 17 ), though some studies link ARID1A loss to adverse outcomes ( 27 ). The mosaic ARID1A expression pattern has been described previously in cancer tissues such as uterine endometroid carcinoma and was more frequent in low grade tumours ( 28 ). We propose that high ARID1A protein in the index polyps may serve as a highly sensitive biomarker for identifying patients at higher risk of developing metachronous lesions. Mutational profiling revealed frequent mutations in APC, Wnt-related genes, KRAS, MSH3 and transcription factors such as ZFHX3 and MGA. However, unlike CRC, these mutations were not predictive of outcome ( 29 ). This may be due to compensation protective mechanisms where hierarchical differentiation in self-renewing tissues suppresses somatic evolution by limiting mutational burden through reduced cell divisions and restricting somatic selection via differentiation-driven loss of proliferative ability ( 30 ). Both ARID1A and KMT2D mutations were associated with reduced ARID1A protein levels. Temporal ARID1A loss was linked to chromatin accessibility changes and induction of pluripotency growth factors ( 31 ). Other studies suggested that ARID1A mutations activate mismatch repair mechanisms via MSH2 and immune responses, particularly in CMS1-class CRC ( 32 ). As KMT2D is a histone methyltransferase, its mutations may reduce H3K4 methylation, altering chromatin accessibility and gene transcription including ARID1A ( 33 ) or reduce recruitment of TET2, a key regulator of ARID1A expression ( 34 ). Frequent C>T substitution mutations suggest an active methylation process affecting ARID1A ( 23 ) and implicating an epigenetic regulation process in polyp development. While the prognostic role of CNAs in CRC is well established (reviewed in ( 35 )), little is known about CNAs heterogeneity in premalignant polyps. A previous study on a cohort of HGD adenomas revealed focal CNAs (≤10 Mb) on chromosomes 6, 7 and 16 ( 36 ) however CNAs could not discriminate between HGD-polyps with/out a metachronous polyp. Despite the limited mutational panel used in this study, we demonstrate that CNAs can predict metachronous lesion outcome. The discrepancy may be due to the use of NGS-based approach in our study which offers higher resolution than the array-comparative genomic hybridisation methodology ( 36 ) which relies on differential hybridisation intensities and larger-scale segmentation.. This is the first study to report that CNAs in CDKN2C, ELF3, MET, KMT2C, SMG1 and TRAF7 are characteristic of colonic polyps, with KMT2C, SMG1 and TRAF7 being associated with metachronous lesion outcomes. These findings suggest that CNAs may represent early events in polyp development and implicate the involvement of pathways regulating cell cycle ( 37 ), TGF-β, EGFR/mTORC ( 38 ) and NF-κB signalling ( 39 ), MET/EFGR ( 40 ), KMT2C/methylations ( 41 ), regulation of DNA-damage responses ( 42 ) and TNF-a/Jun pathway ( 43 ) in polyp biology. The integration of ARID1A status with CNAs in genes such as KMT2C, SMG1, or TRAF7 enhances risk stratification within the BSG2020 framework. This combination reflects the interplay of chromatin remodelling, DNA damage response, and immune-related pathways, suggesting a convergence of mechanisms driving polyp progression. Additionally, the combination may be particularly helpful when assessing advanced future polyp risk within 3 years of polypectomy. Multimodal biomarker strategies including proteomics ( 44 ), genome-wide CNA analysis, and targeted FISH panels including CDX2 ( 36 ) further support the potential of integrated molecular profiling to inform biologically driven surveillance approaches. Transcriptomic analysis revealed that higher-risk patients exhibit upregulation of pathways related to DNA repair, oxidative stress response, epigenetic modification, and mitochondrial RNA processing. These signatures suggest a cellular environment marked by stress and genomic instability, aligning with the observed CNAs and reinforcing the link between transcriptional activity and structural genomic alterations. In contrast, enrichment of PI3K signalling, IL-1α production, and regulation of translation during ER stress in the lower-risk group suggests a more regulated inflammatory and translational stress response. Together, these findings underscore the importance of integrated multi-omic analyses in elucidating the underlying biology of polyp progression, ultimately supporting more refined risk assessment and personalised surveillance strategies. In conclusion, our study provides direct evidence from patient data that incorporating molecular profiles of ARID1A and a panel of CNAs into the BSG2020 risk score can improve risk stratification. However, the sequence of early polyp events leading to progression remains unclear, highlighting the need for further investigation. Strengths and limitations of the study Polyp TMAs primarily served as a discovery tool however the reproducibility of our findings in the training and test datasets, along with expression patterns observed in full sections, suggests that a large-scale study on full tissue sections is warranted. Investigating the expression of ARID1A in synchronous polyps is needed to confirm whether index polyp data is sufficient or applies only to large polyps. Investigating ARID1A in paired normal colon tissues could clarify whether its alterations are localised or regional. Given the limited mutational panel used, broader mutational and methylation profiling may reveal additional biomarkers. Validation of CNAs using methods like FISH is needed for clinical translation. Declatations Ethical approval Ethical approval was granted through the West of Scotland Research Ethics Committee (GSH/20/CO/002). Data availability statement The clinical database is accessible through NHS SafeHaven Trusted Research Environment (TRE) subject to appropriate ethical approval and completion of mandatory information governance training modules. Alternatively, access can be arranged by contacting Professor Joanne Edwards. Genomics data can be accessed through Datacite DOIs: 10.5525/gla.researchdata.1223 , 10.5525/gla.researchdata.1498 and 10.5525/gla.researchdata.1544 . Transcriptomics datasets generated and/or analysed during the current study are available upon reasonable request by contacting the INCISE project director, Professor Joanne Edwards. Competing Interests The authors declare no conflict of interests. Funding This project was funded by Innovate UK (42497 and 10054829) JE, Beatson Cancer Charity (22-23-054) AA, CRUK Scotland Centre (CTRQQR-2021\100006) JE and PD. Patient and public involvement (PPI) Patient involvement in this study was facilitated through a dedicated meeting with the INCISE patient steering group https://www.gla.ac.uk/research/az/translationalcancerpathology/forpatientsandthepub lic/ . During this session, study data were presented and discussed. The group’s feedback helped inform the research questions explored in this paper and contributed to shaping ideas for future research. Acknowledgment We would like to acknowledge Glasow Tissue Research Facility (GTRF), Glasgow University, for TMA construction, sectioning and slide scanning. We would also like to thank the Greater Glasgow and Clyde Biorepository for providing polyp tissue samples essential for this study. Finally, we would like to show gratitude to INCISE patient steering group for supporting this research. Footnotes ↵ * On behalf of the INCISE collaborative References 1. ↵ Rutter MD , East J , Rees CJ , Cripps N , Docherty J , Dolwani S , et al. British Society of Gastroenterology/Association of Coloproctology of Great Britain and Ireland/Public Health England post-polypectomy and post-colorectal cancer resection surveillance guidelines . Gut . 2020 ; 69 ( 2 ): 201 – 23 . OpenUrl Abstract / FREE Full Text 2. ↵ Johnstone MS , Stoops R , Lynch G , Hay J , Jawny J , Sloan W , et al. Risk stratification for the detection of metachronous polyps after bowel screening polypectomy: clinical outcomes from the Integrated Technologies for Improved Polyp Surveillance (INCISE) study cohort . BJS Open . 2023 ; 7 ( 3 ). 3. ↵ Johnstone MS , Lynch G , Park J , McSorley S , Edwards J . Novel Methods of Risk Stratifying Patients for Metachronous, Pre-Malignant Colorectal Polyps: A Systematic Review . Crit Rev Oncol Hematol . 2021 ; 164 : 103421 . 4. ↵ Chen B , Scurrah CR , McKinley ET , Simmons AJ , Ramirez-Solano MA , Zhu X , et al. Differential pre-malignant programs and microenvironment chart distinct paths to malignancy in human colorectal polyps . Cell . 2021 ; 184 ( 26 ): 6262 – 80 e26. OpenUrl CrossRef PubMed 5. ↵ Cerami E , Gao J , Dogrusoz U , Gross BE , Sumer SO , Aksoy BA , et al. The cBio cancer genomics portal: an open platform for exploring multidimensional cancer genomics data . Cancer Discov . 2012 ; 2 ( 5 ): 401 – 4 . OpenUrl Abstract / FREE Full Text 6. ↵ Lei I , Gao X , Sham MH , Wang Z . SWI/SNF protein component BAF250a regulates cardiac progenitor cell differentiation by modulating chromatin accessibility during second heart field development . J Biol Chem . 2012 ; 287 ( 29 ): 24255 – 62 . OpenUrl Abstract / FREE Full Text 7. ↵ Katagiri A , Nakayama K , Rahman MT , Rahman M , Katagiri H , Nakayama N , et al. Loss of ARID1A expression is related to shorter progression-free survival and chemoresistance in ovarian clear cell carcinoma . Mod Pathol . 2012 ; 25 ( 2 ): 282 – 8 . OpenUrl PubMed 8. ↵ Jiang T , Chen X , Su C , Ren S , Zhou C . Pan-cancer analysis of ARID1A Alterations as Biomarkers for Immunotherapy Outcomes . J Cancer . 2020 ; 11 ( 4 ): 776 – 80 . OpenUrl CrossRef PubMed 9. ↵ Cho H , Kim JS , Chung H , Perry C , Lee H , Kim JH . Loss of ARID1A/BAF250a expression is linked to tumor progression and adverse prognosis in cervical cancer . Hum Pathol . 2013 ; 44 ( 7 ): 1365 – 74 . OpenUrl CrossRef PubMed 10. Cho HD , Lee JE , Jung HY , Oh MH , Lee JH , Jang SH , et al. Loss of Tumor Suppressor ARID1A Protein Expression Correlates with Poor Prognosis in Patients with Primary Breast Cancer . J Breast Cancer . 2015 ; 18 ( 4 ): 339 – 46 . OpenUrl CrossRef PubMed 11. ↵ Jang SH , Lee JH , Lee HJ , Cho H , Ahn H , Song IH , et al. Loss of ARID1A expression is associated with poor prognosis in non-small cell lung cancer . Pathol Res Pract . 2020 ; 216 ( 11 ): 153156 . OpenUrl PubMed 12. ↵ Reske JJ , Wilson MR , Holladay J , Siwicki RA , Skalski H , Harkins S , et al. Co-existing TP53 and ARID1A mutations promote aggressive endometrial tumorigenesis . PLoS Genet . 2021 ; 17 ( 12 ): e1009986 . OpenUrl PubMed 13. ↵ Hartley A , Galbraith LCA , Shaw R , Tibbo A , Veeratterapillay R , Wilson L , et al. Loss of ARID1A accelerates prostate tumourigenesis with a proliferative collagen-poor phenotype through co-operation with AP1 subunit cFos . Br J Cancer . 2025 . 14. ↵ Ahlquist DA . Multi-target stool DNA test: a new high bar for noninvasive screening . Dig Dis Sci . 2015 ; 60 ( 3 ): 623 – 33 . OpenUrl CrossRef PubMed 15. ↵ Zhu G , Shi R , Li Y , Zhang Z , Xu S , Chen C , et al. ARID1A, ARID1B, and ARID2 Mutations Serve as Potential Biomarkers for Immune Checkpoint Blockade in Patients With Non-Small Cell Lung Cancer . Front Immunol. 2021 ; 12 : 670040 . OpenUrl PubMed 16. ↵ Lee LH , Sadot E , Ivelja S , Vakiani E , Hechtman JF , Sevinsky CJ , et al. ARID1A expression in early stage colorectal adenocarcinoma: an exploration of its prognostic significance . Hum Pathol . 2016 ; 53 : 97 – 104 . OpenUrl PubMed 17. ↵ Wei XL , Wang DS , Xi SY , Wu WJ , Chen DL , Zeng ZL , et al. Clinicopathologic and prognostic relevance of ARID1A protein loss in colorectal cancer . World J Gastroenterol . 2014 ; 20 ( 48 ): 18404 – 12 . OpenUrl PubMed 18. ↵ McShane LM , Altman DG , Sauerbrei W , Taube SE , Gion M , Clark GM , et al. REporting recommendations for tumour MARKer prognostic studies (REMARK) . Br J Cancer . 2005 ; 93 ( 4 ): 387 – 91 . OpenUrl CrossRef PubMed Web of Science 19. ↵ Bankhead P , Loughrey MB , Fernandez JA , Dombrowski Y , McArt DG , Dunne PD , et al. QuPath: Open source software for digital pathology image analysis . Sci Rep . 2017 ; 7 ( 1 ): 16878 . OpenUrl CrossRef PubMed 20. ↵ Suehnholz SP , Nissan MH , Zhang H , Kundra R , Nandakumar S , Lu C , et al. Quantifying the Expanding Landscape of Clinical Actionability for Patients with Cancer . Cancer Discov . 2024 ; 14 ( 1 ): 49 – 65 . OpenUrl CrossRef PubMed 21. ↵ Yeakley JM , Shepard PJ , Goyena DE , VanSteenhouse HC , McComb JD , Seligmann BE . A trichostatin A expression signature identified by TempO-Seq targeted whole transcriptome profiling . PLoS One . 2017 ; 12 ( 5 ): e0178302 . OpenUrl CrossRef PubMed 22. ↵ Hanzelmann S , Castelo R , Guinney J . GSVA: gene set variation analysis for microarray and RNA-seq data . BMC Bioinformatics . 2013 ; 14 : 7 . 23. ↵ Erfani M , Hosseini SV , Mokhtari M , Zamani M , Tahmasebi K , Alizadeh Naini M , et al. Altered ARID1A expression in colorectal cancer . BMC Cancer . 2020 ; 20 ( 1 ): 350 . OpenUrl PubMed 24. ↵ Mathur R , Alver BH , San Roman AK , Wilson BG , Wang X , Agoston AT , et al. ARID1A loss impairs enhancer-mediated gene regulation and drives colon cancer in mice . Nat Genet . 2017 ; 49 ( 2 ): 296 – 302 . OpenUrl CrossRef PubMed 25. ↵ Zhai Y , Kuick R , Tipton C , Wu R , Sessine M , Wang Z , et al. Arid1a inactivation in an Apc- and Pten-defective mouse ovarian cancer model enhances epithelial differentiation and prolongs survival . J Pathol . 2016 ; 238 ( 1 ): 21 – 30 . OpenUrl CrossRef PubMed 26. ↵ Mansouri D , McMillan DC , Grant Y , Crighton EM , Horgan PG . The Impact of Age, Sex and Socioeconomic Deprivation on Outcomes in a Colorectal Cancer Screening Programme . Plos One . 2013 ; 8 ( 6 ). 27. ↵ Xiao Y , Liu G , Ouyang X , Zai D , Zhou J , Li X , et al. Loss of ARID1A Promotes Hepatocellular Carcinoma Progression via Up-regulation of MYC Transcription . J Clin Transl Hepatol . 2021 ; 9 ( 4 ): 528 – 36 . OpenUrl PubMed 28. ↵ Mao TL , Ardighieri L , Ayhan A , Kuo KT , Wu CH , Wang TL , et al. Loss of ARID1A expression correlates with stages of tumor progression in uterine endometrioid carcinoma . Am J Surg Pathol . 2013 ; 37 ( 9 ): 1342 – 8 . OpenUrl CrossRef PubMed 29. ↵ Conlin A , Smith G , Carey FA , Wolf CR , Steele RJ . The prognostic significance of K-ras, p53, and APC mutations in colorectal carcinoma . Gut . 2005 ; 54 ( 9 ): 1283 – 6 . OpenUrl Abstract / FREE Full Text 30. ↵ Demeter M , Derenyi I , Szollosi GJ . Trade-off between reducing mutational accumulation and increasing commitment to differentiation determines tissue organization . Nat Commun . 2022 ; 13 ( 1 ): 1666 . OpenUrl CrossRef PubMed 31. ↵ Blumli S , Wiechens N , Wu MY , Singh V , Gierlinski M , Schweikert G , et al. Acute depletion of the ARID1A subunit of SWI/SNF complexes reveals distinct pathways for activation and repression of transcription . Cell Rep . 2021 ; 37 ( 5 ): 109943 . OpenUrl CrossRef PubMed 32. ↵ Mehrvarz Sarshekeh A , Alshenaifi J , Roszik J , Manyam GC , Advani SM , Katkhuda R , et al. ARID1A Mutation May Define an Immunologically Active Subgroup in Patients with Microsatellite Stable Colorectal Cancer . Clin Cancer Res . 2021 ; 27 ( 6 ): 1663 – 70 . OpenUrl Abstract / FREE Full Text 33. ↵ Ortega-Molina A , Boss IW , Canela A , Pan H , Jiang Y , Zhao C , et al. The histone lysine methyltransferase KMT2D sustains a gene expression program that represses B cell lymphoma development . Nat Med . 2015 ; 21 ( 10 ): 1199 – 208 . OpenUrl CrossRef PubMed 34. ↵ Jin Y , Rao K , Zheng J , Zhang X , Luo Y , He J . Deficiency of TET2-mediated KMT2D self-transcription confers a targetable vulnerability in hepatocellular carcinoma . PNAS Nexus . 2024 ; 3 ( 11 ): pgae504 . OpenUrl 35. ↵ Ried T , Meijer GA , Harrison DJ , Grech G , Franch-Expósito S , Briffa R , et al. The landscape of genomic copy number alterations in colorectal cancer and their consequences on gene expression levels and disease outcome . Mol Aspects Med . 2019 ; 69 : 48 – 61 . OpenUrl CrossRef PubMed 36. ↵ Fiedler D , Heselmeyer-Haddad K , Hirsch D , Hernandez LS , Torres I , Wangsa D , et al. Single-cell genetic analysis of clonal dynamics in colorectal adenomas indicates gain as a predictor of recurrence . Int J Cancer . 2019 ; 144 ( 7 ): 1561 – 73 . OpenUrl CrossRef PubMed 37. ↵ Li GS , Chen G , Liu J , Tang D , Zheng JH , Luo J , et al. Clinical significance of cyclin-dependent kinase inhibitor 2C expression in cancers: from small cell lung carcinoma to pan-cancers . BMC Pulm Med . 2022 ; 22 ( 1 ): 246 . OpenUrl PubMed 38. ↵ Wang JL , Chen ZF , Chen HM , Wang MY , Kong X , Wang YC , et al. Elf3 drives beta-catenin transactivation and associates with poor prognosis in colorectal cancer . Cell Death Dis . 2014 ; 5 ( 5 ): e1263 . OpenUrl CrossRef PubMed 39. ↵ Zotti T , Scudiero I , Vito P , Stilo R . The Emerging Role of TRAF7 in Tumor Development . J Cell Physiol . 2017 ; 232 ( 6 ): 1233 – 8 . OpenUrl PubMed 40. ↵ Raghav K , Morris V , Tang C , Morelli P , Amin HM , Chen K , et al. MET amplification in metastatic colorectal cancer: an acquired response to EGFR inhibition, not a de novo phenomenon . Oncotarget . 2016 ; 7 ( 34 ): 54627 – 31 . OpenUrl PubMed 41. ↵ Larsson C , Cordeddu L , Siggens L , Pandzic T , Kundu S , He L , et al. Restoration of KMT2C/MLL3 in human colorectal cancer cells reinforces genome-wide H3K4me1 profiles and influences cell growth and gene expression . Clin Epigenetics . 2020 ; 12 ( 1 ): 74 . OpenUrl PubMed 42. ↵ Bamberger C , Pankow S , Yates JR , 3rd . . SMG1 and CDK12 Link DeltaNp63alpha Phosphorylation to RNA Surveillance in Keratinocytes . J Proteome Res. 2021 ; 20 ( 12 ): 5347 – 58 . OpenUrl PubMed 43. ↵ Scudiero I , Zotti T , Ferravante A , Vessichelli M , Reale C , Masone MC , et al. Tumor necrosis factor (TNF) receptor-associated factor 7 is required for TNFalpha-induced Jun NH2-terminal kinase activation and promotes cell death by regulating polyubiquitination and lysosomal degradation of c-FLIP protein . J Biol Chem . 2012 ; 287 ( 8 ): 6053 – 61 . OpenUrl Abstract / FREE Full Text 44. ↵ Bech JM , Terkelsen T , Bartels AS , Coscia F , Doll S , Zhao S , et al. Proteomic Profiling of Colorectal Adenomas Identifies a Predictive Risk Signature for Development of Metachronous Advanced Colorectal Neoplasia . Gastroenterology . 2023 ; 165 ( 1 ): 121 – 32 e5. OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted May 05, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following ARID1A expression and genomic alterations as predictive biomarkers for metachronous colorectal polyp risk stratification Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share ARID1A expression and genomic alterations as predictive biomarkers for metachronous colorectal polyp risk stratification Aula Ammar , Amna Matly , Kimiya Kabiri Arani , Scott Murray , Mayuri Hendricks , Alexander Winton , Natalie Fisher , Noori Maka , Gerard Lynch , Jennifer Hay , Mark Johnstone , Philip Dunne , Stephen McSorley , Joanne Edwards medRxiv 2025.05.03.25326857; doi: https://doi.org/10.1101/2025.05.03.25326857 Share This Article: Copy Citation Tools ARID1A expression and genomic alterations as predictive biomarkers for metachronous colorectal polyp risk stratification Aula Ammar , Amna Matly , Kimiya Kabiri Arani , Scott Murray , Mayuri Hendricks , Alexander Winton , Natalie Fisher , Noori Maka , Gerard Lynch , Jennifer Hay , Mark Johnstone , Philip Dunne , Stephen McSorley , Joanne Edwards medRxiv 2025.05.03.25326857; doi: https://doi.org/10.1101/2025.05.03.25326857 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Pathology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffc171c8da3ad07',t:'MTc3OTQ1NTczMw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.