Artificial Intelligence-Enabled Precision Medicine Reveals Prognostic Impact of TGF-Beta Pathway Alterations in FOLFOX-Treated Early-Onset Colorectal Cancer Among Disproportionately Affected Populations

preprint OA: closed CC-BY-NC-ND-4.0
📄 Open PDF Full text JSON View at publisher
Full text 72,305 characters · extracted from preprint-html · click to expand
Artificial Intelligence-Enabled Precision Medicine Reveals Prognostic Impact of TGF-Beta Pathway Alterations in FOLFOX-Treated Early-Onset Colorectal Cancer Among Disproportionately Affected Populations | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Artificial Intelligence-Enabled Precision Medicine Reveals Prognostic Impact of TGF-Beta Pathway Alterations in FOLFOX-Treated Early-Onset Colorectal Cancer Among Disproportionately Affected Populations View ORCID Profile Fernando C. Diaz , View ORCID Profile Brigette Waldrup , View ORCID Profile Francisco G. Carranza , View ORCID Profile Sophia Manjarrez , View ORCID Profile Enrique Velazquez-Villarreal doi: https://doi.org/10.1101/2025.08.11.25333363 Fernando C. Diaz 1 Lineberger Comprehensive Cancer Center, University of North Carolina , Chapel Hill, NC, United States M.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Fernando C. Diaz Brigette Waldrup 2 City of Hope, Beckman Research Institute, Department of Integrative Translational Sciences , Duarte, CA B.S. Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Brigette Waldrup Francisco G. Carranza 2 City of Hope, Beckman Research Institute, Department of Integrative Translational Sciences , Duarte, CA Ph.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Francisco G. Carranza Sophia Manjarrez 2 City of Hope, Beckman Research Institute, Department of Integrative Translational Sciences , Duarte, CA B.S. Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sophia Manjarrez Enrique Velazquez-Villarreal 2 City of Hope, Beckman Research Institute, Department of Integrative Translational Sciences , Duarte, CA 3 City of Hope Comprehensive Cancer Center , Duarte, CA M.D., Ph.D., M.P.H., M.S. Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Enrique Velazquez-Villarreal For correspondence: evelazquezvilla{at}coh.org Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Early-onset colorectal cancer (EOCRC; <50 years) incidence is increasing most rapidly among Hispanic/Latino (H/L) populations. While the transforming growth factor–beta (TGF-β) pathway influences colorectal cancer (CRC) progression, its prognostic role in FOLFOX-treated EOCRC, particularly in H/L patients, is unclear. We analyzed 2,515 CRC cases (H/L = 266; NHW = 2,249) stratified by ancestry, age at onset, and FOLFOX treatment using Fisher’s exact, chi-square, and Kaplan–Meier analyses. We then applied AI-HOPE and AI-HOPE-TGFβ, conversational artificial intelligence (AI) platforms that integrate clinical, genomic, and treatment data, to perform complex, natural language–driven queries requiring multi-parameter integration. TGF-β pathway alterations occurred in 28–39% of H/L and 23–31% of NHW patients, with SMAD4 as the predominant driver. BMPR1A mutations were enriched in FOLFOX-treated EO H/L patients (5.5% vs. 1.1% EO NHW; p = 0.0272), while late-onset NHW non-FOLFOX cases had higher SMAD2/TGFBR2 mutation rates. In FOLFOX-treated EO H/L patients, TGF-β pathway alterations predicted poorer survival (p = 0.029); no survival impact was seen in other groups. SMAD4 mutations were less frequent in EO H/L than EO NHW receiving FOLFOX (2.74% vs. 13.87%; p = 0.013). TGF-β pathway alterations may serve as an ancestry- and treatment-specific biomarker of poor prognosis in FOLFOX-treated EO H/L CRC. AI-enabled integration accelerated biomarker discovery, supporting precision medicine. 1. Introduction Colorectal cancer (CRC) has traditionally been considered a disease of older adults, with incidence rates historically declining or stabilizing in high-income countries due to advances in prevention and widespread access to early screening programs ( 1 ). Despite this progress, CRC remains the third most commonly diagnosed cancer and ranks as the third leading cause of cancer-related deaths in men and the fourth in women ( 2 ). In contrast to the overall decline, the incidence of early-onset colorectal cancer (EOCRC)— defined as diagnosis before age 50—has risen sharply over the past two decades and is projected to become the leading cause of cancer-related deaths among individuals aged 20 to 49 in the United States by 2030 ( 3 – 9 ). This increase is particularly pronounced among Hispanic/Latino (H/L) populations, which have experienced some of the steepest rises in EOCRC incidence and mortality in recent years ( 2 , 10 – 12 ). Representing 14% of the U.S. GDP (∼$3.2 trillion) and 18% of the national workforce ( 13 , 14 ), the H/L population’s growing cancer burden underscores a pressing public health and economic challenge. Understanding the genomic and molecular drivers of EOCRC in these disproportionately affected groups is essential for guiding prevention, drug development, and personalized treatment strategies. Although emerging research has identified important molecular differences between EOCRC and late-onset colorectal cancer (LOCRC), findings remain inconsistent. Discrepancies in reported tumor mutation burden, microsatellite instability (MSI) status, and PD-L1 expression have been documented ( 15 – 17 ). Nonetheless, several studies have highlighted unique biomarkers in EOCRC—including LINE-1 hypomethylation and distinct mutational profiles involving genes such as SMAD4, TP53, APC, and KRAS— suggesting a potentially different biological trajectory compared to LOCRC ( 15 – 19 ). Most prior genomic studies, however, have been conducted in predominantly non-Hispanic White (NHW) populations, leaving critical gaps in our understanding of EOCRC molecular features within H/L and other underserved groups ( 20 ). Our recent work has begun addressing this gap by characterizing the mutational landscape of key oncogenic pathways, including WNT, MAPK, JAK/STAT, PI3K, and TGF-beta, in H/L EOCRC patients ( 21 – 23 ). The TGF-beta pathway plays a pivotal role in regulating cell differentiation, growth, apoptosis, and adhesion ( 24 , 25 ). In cancer, TGF-beta signaling exhibits a context-dependent dual role: functioning as a tumor suppressor in early stages while promoting epithelial-to-mesenchymal transition (EMT) and contributing to an immunosuppressive tumor microenvironment in advanced disease ( 26 , 27 ). SMAD4 alterations have been implicated in EMT progression and poor outcomes in metastatic CRC ( 27 ), while other TGF-beta pathway genes—including BMP7, TGFBR2, and ACVR1B—have been linked to CRC pathogenesis and prognosis ( 21 , 28 , 29 ). Importantly, our previous findings revealed a higher prevalence of BMP7 alterations in H/L EOCRC patients, with improved outcomes observed in those lacking such alterations ( 21 ). For metastatic microsatellite stable (MSS) CRC without actionable mutations and proficient mismatch repair (pMMR), the American Society of Clinical Oncology (ASCO) recommends the FOLFOX regimen—comprising folinic acid, fluorouracil (5-FU), and oxaliplatin—as the standard first-line treatment ( 30 , 31 ). However, EOCRC patients treated with FOLFOX appear to experience poorer overall survival and higher treatment-related toxicity compared to LOCRC patients ( 32 ). The extent to which alterations in the TGF-beta pathway influence FOLFOX responsiveness in EOCRC, particularly in disproportionately affected populations, remains largely unexplored. Recent advances in artificial intelligence (AI) have opened new avenues for addressing these knowledge gaps. Conversational AI agents, such as our AI-HOPE ( 33 ) platform, enable dynamic integration of clinical, genomic, and molecular data to conduct complex precision medicine analyses. The specialized AI-HOPE-TGFbeta ( 34 ) module is designed to interrogate TGF-beta pathway alterations in CRC, rapidly generating pathway-specific queries, identifying clinically relevant mutation patterns, and correlating these with treatment outcomes. By enabling iterative, natural-language–driven exploration of large-scale datasets, AI agents overcome traditional bioinformatics bottlenecks and facilitate hypothesis generation that is both comprehensive and adaptable. Incorporating AI-HOPE-TGFbeta into this study allows us to systematically evaluate the relationship between TGF-beta pathway alterations and FOLFOX treatment outcomes in EOCRC, with a focus on disproportionately affected populations. This approach not only advances mechanistic understanding but also exemplifies how AI-driven tools can accelerate the translation of genomic insights into actionable clinical strategies. 2. Materials and Methods 2.1 Data sources and cohort assembly We conducted a retrospective analysis using de-identified clinical and genomic data from three publicly accessible CRC datasets available through the cBioPortal for Cancer Genomics platform: Colorectal Adenocarcinoma (TCGA, PanCancer Atlas), MSK-CHORD (MSK, Nature 2024), and GENIE BPC CRC. Datasets were selected for their inclusion of both somatic variant profiles and detailed therapeutic annotations, enabling accurate identification of chemotherapy regimens. Eligible cases included histologically confirmed colorectal, colon, or rectal adenocarcinoma with available primary tumor sequencing data. When multiple tumor samples were present for a single individual, only one sample was retained to avoid duplication bias. 2.2 Identification of disproportionately affected populations Ethnicity classification prioritized self-reported annotations within the original datasets. Individuals were assigned to the H/L group if labeled as “Hispanic or Latino,” “Spanish, NOS,” “Hispanic, NOS,” or “Latino, NOS.” When ethnicity was not explicitly stated, surname-based classification was applied using validated algorithms for identifying Hispanic origin. The comparator group consisted of Non-Hispanic White (NHW) patients meeting the same inclusion criteria. Age at diagnosis was extracted from clinical metadata, with EOCRC defined as diagnosis before age 50 and LOCRC as diagnosis at age 50 or older. 2.3 Treatment classification Patients were categorized as “FOLFOX-treated” if their treatment records documented concurrent or sequential administration of leucovorin, fluorouracil (5-FU), and oxaliplatin, consistent with standard first-line protocols for metastatic microsatellite stable CRC. Treatment start and stop dates were cross-referenced to confirm overlapping timelines for the three drugs. Individuals without recorded use of all three components were assigned to the non-FOLFOX group. 2.4 Definition of TGF-beta pathway alterations A curated list of TGF-beta signaling genes was compiled from peer-reviewed literature and pathway databases ( 24 – 29 ). Genes included SMAD family members, TGFBR1/2, BMP ligands, and other canonical signaling components implicated in CRC development and progression. Somatic alterations were extracted from cBioPortal mutation data and filtered to retain only non-synonymous variants (missense, nonsense, frameshift insertions/deletions, splice site, and start codon mutations). Pathway alteration status was defined as the presence of at least one qualifying mutation in any of the pathway genes. 2.5 Statistical analysis Comparisons of mutation frequencies between groups were performed using Fisher’s exact test or chi-square test, as appropriate. Continuous variables were evaluated with the Mann–Whitney U test. Overall survival (OS) was assessed using the Kaplan–Meier method, and differences between survival curves were evaluated using the log-rank test. Hazard ratios (HRs) and 95% confidence intervals (CIs) were estimated via univariate and multivariate Cox proportional hazards regression models. All statistical analyses were conducted in R (v4.3.2), with p -values <0.05 considered statistically significant. 2.6 Artificial Intelligence-enabled data integration, post-analysis scanning, and query execution To enhance analytical efficiency, ensure reproducibility, and strategically guide downstream statistical testing, we first deployed the AI-HOPE-TGF-Beta conversational AI agent ( 34 )—a specialized module within the AI-HOPE precision medicine framework ( 33 )—to conduct a comprehensive post-analysis scan of the selected CRC datasets. This process leveraged targeted, natural language–driven queries to rapidly interrogate the data and identify clinically relevant patterns for formal evaluation. Representative queries included: Among H/L CRC patients treated with FOLFOX, does TGF-β pathway alteration status associate with overall survival (OS)? Is BMPR1A mutation frequency different among early-onset H/L patients treated with FOLFOX? Identify all clinical features associated with NHW patients with TGF-β pathway alterations versus those without. Is there a difference in SMAD4 mutation prevalence between EOCRC H/L patients treated with FOLFOX and EOCRC NHW CRC patients treated with FOLFOX? The AI-HOPE ( 33 ) and AI-HOPE-TGF-Beta ( 34 ) platforms integrate structured clinical, genomic, and treatment data, enabling automated filtering, subgroup stratification, and statistical comparison based on these high-priority questions. Following this preliminary scan, the AI systems were used to: (1) formally identify patients meeting combined clinical (EOCRC, treatment group, ancestry) and molecular (TGF-beta pathway status) criteria; (2) generate subgroup-specific mutation frequency tables; and (3) perform outcome-based stratifications for survival analyses. The conversational interface supported iterative refinement of query parameters, ensuring alignment with study objectives, reducing manual data handling errors, and accelerating the transition from exploratory interrogation to validated statistical testing. 3. Results 3.1 Baseline Clinical and Demographic Profiles of H/L and NHW CRC Cohorts The clinical and demographic characteristics of the Hispanic/Latino (H/L) and Non-Hispanic White (NHW) colorectal cancer (CRC) cohorts—stratified by age at onset, FOLFOX treatment status, tumor type, sex, disease stage, microsatellite instability (MSI) status, and detailed ethnicity annotations—are presented in Table 1 . View this table: View inline View popup Table 1. Clinical and demographic characteristics of Hispanic/Latino (H/L) and Non-Hispanic White (NHW) colorectal cancer (CRC) patients, stratified by age at diagnosis, FOLFOX treatment status, tumor features, and ethnicity. Within the H/L cohort ( n = 266), EOCRC patients treated with FOLFOX accounted for 27.4% of cases, compared to 16.7% in the NHW cohort ( n = 2,249). In contrast, LOCRC patients treated with FOLFOX were more common among NHW patients (40.9%) than H/L patients (34.2%). A higher proportion of EOCRC cases without FOLFOX treatment was observed in the H/L group (19.5%) compared to NHW patients (13.4%). Regarding tumor type, colon adenocarcinoma was the most frequent diagnosis in both groups (61.7% H/L; 59.0% NHW), followed by rectal adenocarcinoma (24.1% H/L; 28.7% NHW) and combined colorectal adenocarcinoma (14.3% H/L; 12.2% NHW). Sex distribution was comparable between cohorts, with a slight predominance of males in both groups. All samples represented primary tumors. Stage at diagnosis showed similar patterns across groups, with most patients diagnosed at stages 1–3 (58.6% H/L; 55.0% NHW), while stage 4 disease was slightly less frequent in H/L patients (40.6%) than in NHW patients (44.7%). MSI stability predominated in both groups, though MSI-stable tumors were less frequent in H/L patients (75.2%) compared to NHW patients (86.3%), and a higher proportion of MSI status was missing in the H/L cohort (13.2% vs. 1.9%). As expected, ethnicity sub-classifications confirmed complete separation between the groups: all NHW patients were recorded as non-Spanish/non-Hispanic, while H/L patients were most frequently annotated as “Spanish NOS; Hispanic NOS; Latino NOS” (86.5%), followed by “Mexican (includes Chicano)” (11.3%), with smaller representation from other H/L categories. 3.2 Genomic Comparisons Across Age Groups and Ancestral Backgrounds H/L patients by age and treatment status Clinical and genomic features of Hispanic/Latino (H/L) colorectal cancer patients, stratified by age at diagnosis and FOLFOX treatment status, are summarized in Table 2a . In early-onset cases, FOLFOX-treated patients had a slightly higher median diagnosis age (42 years; IQR 36–47) than those not treated (40 years; IQR 34–43), a difference that approached but did not reach statistical significance ( p = 0.0541). In late-onset patients, median diagnosis age was significantly younger in the FOLFOX-treated group (59 years; IQR 54–66) compared with the non-treated group (62 years; IQR 56–70; p = 0.0487). Median mutation counts were comparable across treatment groups for both age categories. TMB showed a non-significant trend toward higher values in early-onset non-FOLFOX cases, whereas in late-onset patients, TMB was significantly greater in non-FOLFOX cases (6.9; IQR 5.6–9.0) than in FOLFOX-treated cases (6.1; IQR 4.9–7.8; p = 0.0439). Fraction of genome altered (FGA) did not vary significantly between groups, and BMPR1A mutations were rare across all categories without treatment-related differences. View this table: View inline View popup Download powerpoint Table 2. Comparative clinical and genomic profiles of early-onset and late-onset colorectal cancer (CRC) patient cohorts. This table outlines clinical and molecular distinctions, including TGF-Beta pathway alterations and mutation burden, across key subgroups: (a) Early-Onset CRC (EOCRC) versus Late-Onset CRC (LOCRC) within Hispanic/Latino (H/L) patients; (b) EOCRC versus LOCRC within Non-Hispanic White (NHW) patients; (c) EOCRC comparisons between H/L and NHW cohorts; and (d) EOCRC versus LOCRC comparisons between H/L treated and not treated with FOLFOX. Comparisons include median age at diagnosis, total mutation counts, and the prevalence of selected TGF-Beta pathway gene alterations, stratified by both ethnicity and age category. NHW patients by age and treatment status Findings for NHW patients are outlined in Table 2b . Among early-onset cases, the median diagnosis age did not differ between FOLFOX-treated (43 years; IQR 37–48) and non-treated patients (44 years; IQR 38–47; p = 0.5646). In contrast, late-onset FOLFOX-treated patients were significantly younger at diagnosis than non-treated patients (63 vs. 66 years; p = 4.15 × 10⁻⁷). Mutation counts were similar between early-onset treatment groups but significantly higher in late-onset non-FOLFOX patients (8; IQR 6–12) compared to treated patients (7; IQR 5–9; p = 1.22 × 10⁻⁵). TMB patterns mirrored these findings, with higher values in late-onset non-FOLFOX patients (6.6 vs. 6.1; p = 0.000285), but no differences in early-onset patients. FGA values did not differ significantly between treatment groups. BMPR1A mutations were rare. SMAD2 mutations were significantly more frequent in late-onset non-FOLFOX patients (6.6%) than in treated patients (3.8%; p = 0.0173). SMAD3 mutations were slightly higher in late-onset non-FOLFOX cases (5.2%) compared to treated cases (3.2%), though not statistically significant ( p = 0.0558). TGFBR2 mutations were significantly more common in late-onset non-FOLFOX patients (7.0%) than in FOLFOX-treated patients (4.1%; p = 0.0158). Ethnic comparisons in early-onset disease Table 2c compares early-onset H/L and NHW patients by treatment status. Among FOLFOX-treated early-onset patients, median diagnosis age was slightly lower in H/L patients (42 years; IQR 36–47) than NHW patients (43 years; IQR 37–48), but this was not statistically significant ( p = 0.0847). In the non-FOLFOX group, H/L patients were diagnosed significantly earlier (40 vs. 44 years; p = 0.00060). Mutation counts and TMB were similar between ethnic groups in both treatment categories, although TMB trended higher in FOLFOX-treated H/L patients (6.3 vs. 5.7; p = 0.0581). FGA values were comparable. Notably, BMPR1A mutations were significantly more frequent in FOLFOX-treated H/L patients (5.5%) than in treated NHW patients (1.1%; p = 0.0272), while prevalence in the non-FOLFOX group was similar between ethnicities. H/L patients: early-onset vs. late-onset Early-onset and late-onset H/L patients are compared within treatment categories in Table 2d . As expected, median diagnosis age was markedly lower in early-onset patients for both the FOLFOX-treated (42 vs. 59 years; p = 2.20 × 10⁻¹⁶) and non-FOLFOX groups (40 vs. 62 years; p = 2.20 × 10⁻¹⁶). In the FOLFOX-treated cohort, late-onset patients had slightly higher mutation counts than early-onset patients (8 vs. 7; p = 0.0172), while no significant differences were seen in the non-FOLFOX cohort. TMB was similar between age groups in FOLFOX-treated patients but was significantly higher in late-onset non-FOLFOX patients (6.9 vs. 5.5; p = 0.0328). FGA values were similar across age groups. BMPR1A mutations occurred exclusively in early-onset patients (5.5% in FOLFOX-treated, 3.8% in non-treated), with no cases in late-onset disease. 3.3 TGF-Beta Pathway Alterations by Age, Ancestry, and Treatment Status The prevalence of TGF-beta pathway alterations was examined across H/L and NHW CRC cohorts, stratified by age at diagnosis and FOLFOX treatment status ( Tables 3a – 3d ). View this table: View inline View popup Download powerpoint Table 3. Frequency of TGF-beta pathway alterations in colorectal cancer (CRC) patients stratified by age of onset, ancestry, and FOLFOX treatment status. This table summarizes the mutation frequencies of key genes involved in the TGF-beta signaling pathway among CRC patients. Analyses are stratified as follows: (3a) early-onset (EOCRC) vs. late-onset (LOCRC) and FOLFOX treatment status within the Hispanic/Latino (H/L) cohort; (3b) FOLFOX-treated vs. untreated patients within EOCRC and LOCRC subgroups of Non-Hispanic White (NHW) patients; (3c) EOCRC H/L vs. NHW patients by FOLFOX treatment status; and (3d) LOCRC H/L vs. NHW patients by FOLFOX treatment status. Genes analyzed include BMPR1A, SMAD2, SMAD3, SMAD4, TGFBR1, and TGFBR2. Statistically significant differences ( p < 0.05, Chi-square or Fisher’s exact test) are indicated with asterisks. This stratified analysis highlights potential interactions between age, ancestry, chemotherapy exposure, and TGF-beta pathway dysregulation. Within the H/L cohort, alteration frequencies were similar between treatment groups in both early- and late-onset disease. In early-onset H/L patients, TGF-beta alterations were observed in 28.8% of FOLFOX-treated cases and 36.5% of non-treated cases ( p = 0.4693). In late-onset patients, alterations occurred in 33.0% of treated cases and 38.0% of non-treated cases ( p = 0.6777). Within the NHW cohort, early-onset patients showed no significant difference in alteration prevalence between FOLFOX-treated (24.5%) and non-treated (26.2%) groups ( p = 0.6929). In contrast, among late-onset NHW patients, alterations were significantly less common in the FOLFOX-treated group (23.4%) than in the non-FOLFOX group (31.4%; p = 0.00051). Between-ancestry comparisons in early-onset disease showed no statistically significant differences in TGF-beta alteration prevalence. Among FOLFOX-treated early-onset patients, alterations were detected in 28.8% of H/L cases and 24.5% of NHW cases ( p = 0.5387). In non-FOLFOX patients, prevalence was 36.5% in H/L and 26.2% in NHW ( p = 0.1684). Between-ancestry comparisons in late-onset disease revealed that, among FOLFOX-treated patients, H/L cases had a higher alteration prevalence (33.0%) than NHW cases (23.4%), a difference that approached statistical significance ( p = 0.0569). In non-FOLFOX late-onset patients, alterations were present in 38.0% of H/L cases and 31.4% of NHW cases ( p = 0.4186). Across all stratifications, the majority of patients in both ethnic groups and age categories did not harbor TGF-beta pathway alterations. However, the higher prevalence observed in late-onset H/L patients receiving FOLFOX, as well as the significantly lower prevalence among late-onset NHW patients treated with FOLFOX, suggests potential ancestry- and treatment-specific differences in TGF-beta pathway involvement that may warrant further investigation. 3.4 Frequencies of Gene Alterations in the TGF-Beta Pathway Across all stratifications by age of onset, ancestry, and FOLFOX treatment status, SMAD4 consistently emerged as the most frequently altered TGF-beta pathway gene in CRC patients. In H/L early-onset patients (Table S1), SMAD4 mutations occurred in 12.3% of FOLFOX-treated and 13.5% of non-FOLFOX cases. Other recurrent alterations included BMPR1A (5.5% treated; 3.8% untreated), SMAD2 (5.5%; 5.8%), SMAD3 (4.1%; 5.8%), TGFBR2 (4.1%; 11.5%), and TGFBR1 (2.7%; 3.8%). No significant differences were observed by treatment status, although TGFBR2 mutations were nearly threefold higher in the untreated group. In H/L late-onset patients (Table S2), SMAD4 remained the most altered gene (17.6% treated; 18.0% untreated), followed by TGFBR2 (5.5%; 6.0%), SMAD3 (4.4%; 8.0%), and SMAD2 (3.3%; 8.0%). BMPR1A mutations were absent in both treatment groups. No statistically significant differences were found between treated and untreated cases. In NHW early-onset patients (Table S3), SMAD4 mutations occurred in 13.9% of treated and 12.3% of untreated cases, followed by SMAD2 (5.1%; 5.3%), TGFBR2 (2.9%; 5.3%), BMPR1A (1.1%; 3.3%), SMAD3 (3.7%; 4.0%), and TGFBR1 (2.1%; 3.0%). No significant differences were observed, though BMPR1A mutations were more than twice as common in the untreated group (p = 0.0558). In NHW late-onset patients (Table S4), SMAD4 mutations were detected in 13.6% of treated and 15.9% of untreated cases. SMAD2 (3.8% vs. 6.6%; p = 0.0173) and TGFBR2 (4.1% vs. 7.0%; p = 0.0158) mutations were significantly more frequent in non-FOLFOX cases, with SMAD3 trending higher (3.2% vs. 5.2%; p = 0.0558). When comparing early- vs. late-onset within H/L patients treated with FOLFOX (Table S5), BMPR1A mutations were significantly more common in early-onset cases (5.5% vs. 0.0%; p = 0.0375), while other genes showed no significant differences. In the non-FOLFOX group (Table S6), mutation patterns were similar across age categories, with SMAD4 remaining most frequent (13.5% early-onset; 18.0% late-onset). Among NHW patients, early- vs. late-onset comparisons revealed highly consistent mutation profiles regardless of treatment status. In FOLFOX-treated patients (Table S7), SMAD4 was most frequent (13.9% vs. 13.6%) with no significant differences. When comparing ancestries, in early-onset FOLFOX-treated patients (Table S8), BMPR1A mutations were significantly more common in H/L than NHW (5.5% vs. 1.1%; p = 0.0272), while other genes showed similar frequencies. In the non-FOLFOX group (Table S9), no significant ancestry-related differences were found, though TGFBR2 mutations were more than twice as frequent in H/L (11.5% vs. 5.3%; p = 0.1583). In late-onset FOLFOX-treated patients (Table S10), mutation distributions were similar between H/L and NHW, with SMAD4 most frequent (17.6% vs. 13.6%). In the non-FOLFOX group (Table S11), SMAD4 again predominated (18.0% vs. 15.9%), with no significant differences between ancestries. Overall, SMAD4 emerged as the predominant alteration across nearly all subgroups, while statistically significant differences were rare and primarily limited to BMPR1A (age- and ancestry-associated) and higher SMAD2/TGFBR2 mutation rates in late-onset NHW non-FOLFOX patients. 3.5 Mutational Landscape of the TGF-Beta Pathway Early-Onset H/L CRC Figure 1a depicts the TGF-beta pathway mutational profile in H/L with EOCRC (n = 113; Table S12), integrating mutation type, tumor mutational burden (TMB), and FOLFOX treatment status. Overall, 38 cases (33.6%) harbored at least one pathway alteration. SMAD4 was the most frequently mutated gene (14%), predominantly featuring missense mutations (green), alongside truncating events such as frame shift deletions (light blue) and nonsense mutations (red). TGFBR2 ranked second (8%), enriched for frame shift deletions and multi-hit events (black), with fewer missense variants. SMAD2 (6%) and SMAD3 (5%) exhibited mixed missense and truncating alterations, while BMPR1A (5%) mutations were largely missense with occasional splice site variants (orange). TGFBR1 (4%) alterations were mainly multi-hit events. TMB distribution was generally low, with a small subset of hypermutated cases lacking clear gene-specific clustering. FOLFOX-treated (blue) and untreated (red) cases occurred across all mutation categories without distinct segregation. Download figure Open in new tab Download figure Open in new tab Figure 1. Somatic mutation landscape of TGF-beta pathway genes in colorectal cancer (CRC) stratified by age of onset and ancestry. Oncoplots showing gene-level mutation profiles of key TGF-beta pathway components (SMAD4, TGFBR2, SMAD2, SMAD3, BMPR1A, and TGFBR1) in colorectal cancer, stratified by age of onset (early vs. late) and ancestry (Hispanic/Latino vs. Non-Hispanic White). Panels display mutation types, tumor mutational burden (TMB), and FOLFOX treatment status across: (a) 113 early-onset Hispanic/Latino (H/L) patients, (b) 123 late-onset H/L patients, (c) 607 early-onset Non-Hispanic White (NHW) patients, and (d) 1,409 late-onset NHW patients. Across all subgroups, SMAD4 is the most frequently mutated gene, with missense variants predominating, followed by recurrent alterations in TGFBR2, SMAD2, and SMAD3. Lower-frequency events are observed in BMPR1A and TGFBR1. The data highlight the widespread disruption of TGF-beta signaling in CRC and suggest potential age- and ancestry-associated variation in the somatic mutation landscape, as well as representation across both FOLFOX-treated and untreated patients. Late-Onset H/L CRC In LOCRC H/L (n = 123; Figure 1b ), 48 cases (39.0%) carried at least one TGF-beta pathway alteration. SMAD4 was again the most frequently altered (20%), with a predominance of missense variants, along with nonsense mutations, frame shift deletions, and multi-hit events. TGFBR2 and SMAD3 each accounted for 7% of cases, with TGFBR2 enriched for frame shift deletions and missense variants, and SMAD3 showing missense, nonsense, and occasional splice site changes. SMAD2 was mutated in 6% of tumors, with a combination of missense and frame shift variants. TGFBR1 was altered in 3% of cases, exclusively via missense mutations. BMPR1A mutations were absent in this cohort. TMB was generally low, with no clustering of hypermutated cases by specific genes. FOLFOX treatment distribution was balanced across mutation categories. Early-Onset NHW CRC In EOCRC NHW (n = 607; Figure 1c ), 171 cases (28.2%) harbored at least one TGF-beta pathway alteration. SMAD4 was most frequently mutated (15%), with a spectrum of missense, nonsense, frame shift, in-frame deletion, and multi-hit events. SMAD2 ranked second (6%), followed by TGFBR2 (4%) and SMAD3 (4%), each showing a combination of missense and truncating variants, with TGFBR2 particularly enriched for frame shift deletions. Lower-frequency alterations were observed in TGFBR1 (3%) and BMPR1A (2%). TMB distribution was predominantly low, with a subset of hypermutated tumors. FOLFOX treatment status showed no distinct clustering of mutation types. Compared to EO H/L CRC, the EO NHW cohort displayed a lower overall prevalence of TGF-beta pathway alterations, suggesting possible ancestry-related differences in pathway dysregulation. Late-Onset NHW CRC In LOCRC NHW (n = 1,409; Figure 1d ), 419 cases (29.7%) carried at least one TGF-beta pathway mutation. SMAD4 remained the most altered gene (16%), followed by TGFBR2 (6%) and SMAD2 (6%), both showing mixed missense and truncating events, and SMAD3 (4%), primarily with missense changes. BMPR1A and TGFBR1each occurred in 2% of tumors, with BMPR1A displaying a diverse mutation spectrum and TGFBR1 largely limited to missense variants. TMB patterns were similar to other groups, with mostly low values and sporadic hypermutated cases. FOLFOX treatment distribution again showed no clear separation by mutation profile. Overall, across all age and ancestry groups, SMAD4 consistently emerged as the most frequently altered TGF-beta pathway gene, followed by variable contributions from TGFBR2, SMAD2, SMAD3, BMPR1A, and TGFBR1. Mutation spectra were dominated by missense changes, supplemented by truncating and splice site events, reflecting diverse mechanisms of pathway disruption. 3.6 Survival Impact of TGF-Beta Pathway Alterations Across Age, Ancestry, and Treatment Groups We investigated the relationship between TGF-beta pathway alterations and overall survival in colorectal cancer, stratifying patients by age of onset, ancestry, and FOLFOX treatment status using Kaplan–Meier survival analysis. In EOCRC H/L patients who received FOLFOX, the presence of TGF-beta pathway alterations was linked to a significant reduction in overall survival (p = 0.029; Figure 2a ). Patients without alterations maintained survival rates near 100% for most of the follow-up, whereas those with alterations showed a steeper decline during the first ∼50 months, followed by a plateau. The broader confidence intervals for the altered group reflect the smaller sample size. These results indicate that TGF-beta pathway mutations may adversely affect survival outcomes in EO H/L patients undergoing FOLFOX therapy. Download figure Open in new tab Download figure Open in new tab Figure 2. Kaplan–Meier survival analysis of TGF-beta pathway alterations across colorectal cancer (CRC) subgroups defined by age, ancestry, and FOLFOX treatment status. Overall survival curves are shown for: (a) Early-Onset Hispanic/Latino (H/L) treated with FOLFOX, (b) Early-Onset H/L not treated with FOLFOX, (c) Late-Onset H/L treated with FOLFOX, (d) Late-Onset H/L not treated with FOLFOX, (e) Early-Onset Non-Hispanic White (NHW) treated with FOLFOX, and (f) Early-Onset NHW not treated with FOLFOX. Each plot compares patients with TGF-beta pathway alterations to those without, illustrating subgroup-specific differences in survival outcomes. Shaded areas represent 95% confidence intervals, and number-at-risk tables indicate patient counts at key follow-up intervals. Among EO H/L patients who did not receive FOLFOX, there was no significant survival difference between altered and non-altered groups (p = 0.55; Figure 2b ). Both groups maintained high survival rates with closely overlapping confidence intervals. A slight divergence appeared at later time points, but this was not statistically meaningful, and interpretation was limited by the small number of altered cases. For late-onset H/L (LO H/L) patients treated with FOLFOX, survival outcomes were similar regardless of alteration status (p = 0.65; Figure 2c ). Although curves displayed slight early separation, they converged later, with overlapping confidence intervals and comparable median survival times, suggesting no substantial effect of TGF-beta pathway status. A similar pattern was observed in LO H/L patients not treated with FOLFOX, where survival curves were almost identical between groups (p = 0.93; Figure 2d ), with overlapping confidence intervals and no meaningful divergence throughout follow-up. In EOCRC NHW patients treated with FOLFOX, no statistically significant difference in survival was found (p = 0.099; Figure 2e ). However, a gradual separation of curves suggested a potential trend toward poorer outcomes in the altered group, particularly between 20 and 60 months. Despite this, overlapping confidence intervals indicate the trend may not be statistically reliable. Lastly, for EO NHW patients not treated with FOLFOX, survival was similar between groups (p = 0.37; Figure 2f ). Curves showed minimal early divergence that diminished over time, with broad overlapping confidence intervals underscoring the lack of a measurable effect. Taken together, these analyses demonstrate that TGF-beta pathway alterations were significantly associated with worse survival only in EO H/L patients receiving FOLFOX, while no statistically significant associations were found in any other age–ancestry–treatment subgroup. 3.7 AI-enabled data interrogation and pre-statistical insights The AI-HOPE-TGF-Beta platform was first deployed to conduct a targeted, post-analysis scan of the integrated CRC datasets, rapidly generating exploratory insights that guided subsequent statistical testing. Initial natural language–driven queries revealed several patterns of interest. First, among H/L CRC patients treated with FOLFOX, preliminary AI analysis indicated a potential association between TGF-β pathway alterations and reduced overall survival, which was confirmed by subsequent Kaplan–Meier analysis. In this subgroup (n = 21 altered; n = 52 non-altered), patients with TGF-β pathway alterations demonstrated significantly worse survival compared with their non-altered counterparts (log-rank p = 0.029) (Figure S2). This consistent with our standard overall survival analysis resulted in the sub section 3.6 Figure 2a . The survival curves showed an early and sustained separation, with the altered group experiencing a steeper decline in survival probability during the first ∼50 months, followed by a plateau, whereas the non-altered group maintained higher survival probabilities throughout the follow-up period. The non-overlapping trend of the survival curves and narrower confidence intervals for the non-altered group suggest that TGF-β pathway alterations may represent a prognostic biomarker of poorer outcome in EO H/L patients receiving FOLFOX. Second, AI-driven subgroup interrogation suggested that BMPR1A mutations were more frequent in early-onset H/L patients receiving FOLFOX compared with other ancestry–treatment groups. Upon formal testing, the case cohort (EOCRC H/L, n = 73) and control cohort (EO NHW, n = 375) were evaluated for BMPR1A mutation enrichment under the constraint of SMAD4 mutation positivity. Fisher’s exact test revealed no statistically significant difference in BMPR1A mutation prevalence between groups (p = 0.836), with an odds ratio of 0.0 (95% CI: 0.033–12.145) (Figure S3). The proportion of in-context BMPR1A mutations was 1.07% in controls versus 0.68% in cases, indicating only a marginal difference in mutation occurrence. These results suggest that while AI-based exploratory scanning flagged BMPR1A as potentially enriched in the EOCRC H/L FOLFOX subgroup, confirmatory statistical testing did not support a significant ancestry-specific association in the context of concurrent SMAD4 alterations. Third, the AI scan identified multiple clinical and molecular attributes that significantly differed between early-onset NHW and early-onset H/L patients treated with FOLFOX (p < 0.05). Statistically significant features included demographic variables (ethnicity group, race), clinical characteristics (primary tumor site, stage at diagnosis, sample type), and disease outcomes (overall survival status, event occurrence). In addition, several key driver mutations—including SMAD4, APC, TCF7L2, TP53, PIK3CA, KRAS, and BRAF— showed differential prevalence between cohorts (Figure S4). Notably, tumor location and highest recorded stage emerged as prominent distinguishing features, aligning with prior evidence that anatomical site and disease stage at presentation can vary by ancestry. These findings underscore the ability of AI-enabled interrogation to rapidly surface clinically relevant subgroup distinctions, guiding targeted downstream statistical analyses. Finally, the system flagged a possible disparity in SMAD4 mutation prevalence between early-onset H/L and NHW patients treated with FOLFOX. Analysis revealed that SMAD4 mutations were present in 2.74% of the H/L case cohort ( n = 73) compared with 13.87% of the NHW control cohort ( n = 375). This difference was statistically significant (Chi-square p = 0.013), with an odds ratio of 0.175 (95% CI: 0.042–0.735), indicating that early-onset H/L patients treated with FOLFOX were markedly less likely to harbor SMAD4 mutations than their NHW counterparts (Figure S5). These findings suggest potential ancestry-related differences in the molecular landscape of FOLFOX-treated early-onset CRC, which may have implications for tumor biology and treatment response. These exploratory outputs informed the selection of subgroup comparisons for formal statistical analyses. The AI system then executed automated filtering and cohort construction based on combined clinical, molecular, and treatment parameters, producing validated mutation frequency tables and survival stratifications. This AI-guided workflow reduced manual data handling, ensured reproducibility, and accelerated the transition from hypothesis generation to confirmatory analysis. 4. Discussion This study represents one of the first artificial intelligence agents–enabled precision medicine analysis of TGF-beta pathway alterations in CRC stratified by age at onset, ancestry, and FOLFOX treatment status, with a particular focus on H/L populations disproportionately affected by EOCRC. Leveraging the AI-HOPE-TGF-Beta platform, we integrated multi-dimensional genomic and clinical data to uncover ancestry- and treatment-specific patterns of pathway disruption and survival impact. Our findings reveal highly specific association: EOCRC H/L patients treated with FOLFOX who harbored TGF-beta pathway alterations experienced significantly worse overall survival compared to their non-altered counterparts. This association was absent in all other subgroups, including EOCRC H/L patients not receiving FOLFOX, late-onset H/L patients, and both EOCRC and LOCRC NHW cohorts, regardless of treatment status. These results suggest a possible gene–treatment–ancestry interaction, in which TGF-beta pathway dysregulation may confer treatment resistance or promote aggressive tumor biology specifically in young H/L patients exposed to oxaliplatin-based chemotherapy. Biological implications of TGF-beta pathway alterations The TGF-beta signaling pathway plays a dual role in CRC—functioning as a tumor suppressor in early stages and as a pro-metastatic, pro-immune evasion driver in later stages. Alterations in key pathway members, particularly SMAD4, TGFBR2, SMAD2, and SMAD3, may shift the pathway toward oncogenic activity, promoting epithelial-to-mesenchymal transition (EMT), immune suppression, and metastatic spread. The predominance of SMAD4 alterations across all ancestry and age groups in our dataset aligns with prior evidence linking SMAD4 loss to advanced disease, chemoresistance, and poor prognosis. Notably, our study identifies BMPR1A mutations as disproportionately enriched in EO H/L patients treated with FOLFOX—a finding not previously reported—which may indicate additional ancestry-linked biology within the TGF-beta superfamily. Ancestry-specific genomic patterns and treatment context While overall TGF-beta pathway alteration frequencies were similar between H/L and NHW patients in EOCRC, our analyses revealed notable exceptions. In LOCRC, H/L patients treated with FOLFOX exhibited a higher prevalence of TGF-beta alterations than their NHW counterparts, whereas NHW late-onset patients receiving FOLFOX had a significantly lower prevalence than those not treated. This suggests that both tumor genomic architecture and treatment selection pressures may differ across ancestry groups. These differences could reflect underlying germline variants influencing somatic evolution, environmental exposures, or healthcare access patterns that shape tumor biology and treatment outcomes. Implications for FOLFOX response in EO H/L patients The most clinically actionable result from this study is the survival disadvantage observed in FOLFOX-treated EO H/L patients with TGF-beta pathway alterations. Preclinical evidence suggests that TGF-beta activation may drive chemoresistance through multiple mechanisms, including modulation of DNA damage repair, induction of cancer stem cell phenotypes, and reshaping of the tumor microenvironment to suppress anti-tumor immunity. The specificity of the survival effect to EO H/L patients could indicate that host–tumor interactions, potentially shaped by genomic ancestry, play a critical role in modulating chemotherapy benefit. If validated, TGF-beta pathway status could serve as a biomarker to guide therapy selection in this high-risk group, potentially favoring non–oxaliplatin-based regimens or combination strategies incorporating TGF-beta inhibitors currently in development. AI-HOPE-TGF-Beta as an enabling technology The application of AI-HOPE-TGF-Beta was pivotal in uncovering these insights. By automating genomic curation, harmonizing multi-institutional clinical data, and enabling stratified analyses across multiple demographic and treatment dimensions, this platform overcomes a major barrier in precision oncology: the complexity of integrating heterogeneous datasets at scale. Importantly, AI-driven approaches allowed us to rapidly test hypotheses across numerous biologically and clinically relevant strata, a process that would be prohibitively time-consuming with manual workflows. In addition to survival analyses, AI-enabled exploratory interrogation provided several key pre-statistical insights that shaped downstream hypothesis testing. The AI-HOPE-TGF-Beta platform rapidly identified a significant survival disadvantage among EO H/L CRC patients treated with FOLFOX who harbored TGF-β pathway alterations, a finding subsequently validated by Kaplan–Meier analysis ( p = 0.029) and consistent with the survival patterns reported in our primary analyses. While AI-driven scans flagged BMPR1A mutations as potentially enriched in this subgroup, confirmatory testing under the constraint of SMAD4 mutation positivity revealed no statistically significant ancestry-specific association. The platform also highlighted multiple demographic, clinical, and molecular features that differed significantly between EO H/L and EO NHW patients receiving FOLFOX, including disparities in primary tumor site, stage at diagnosis, and prevalence of key driver mutations ( SMAD4, APC, TCF7L2, TP53, PIK3CA, KRAS, and BRAF ), underscoring ancestry-associated heterogeneity in disease presentation. Notably, AI-guided comparison revealed that SMAD4 mutations were markedly less frequent in EO H/L patients (2.74%) than in EO NHW patients (13.87%) treated with FOLFOX ( p = 0.013; OR = 0.175, 95% CI: 0.042–0.735), suggesting potential ancestry-related molecular differences with implications for treatment response. These AI-generated insights not only directed the selection of subgroup comparisons for formal statistical testing but also exemplified the platform’s ability to accelerate precision oncology research by automating cohort construction, reducing manual data handling, and ensuring reproducibility. Limitations and future directions Our study has several limitations. First, despite the large overall sample size, some stratified subgroups—particularly EO H/L patients with pathway alterations—had limited case counts, resulting in wider confidence intervals and reduced statistical power. Second, treatment data were limited to FOLFOX status, without granular details on dosing, duration, or use of targeted agents, which could further refine outcome associations. Third, functional validation of specific mutations was beyond the scope of this analysis, leaving the mechanistic basis for the observed survival effect to future laboratory studies. Finally, our results require validation in independent, ancestrally diverse cohorts to confirm generalizability and assess the utility of TGF-beta pathway status as a predictive biomarker. Future directions include incorporating state-of-the-art molecular characterization approaches such as spatial biology ( 35 ), and single-cell sequencing ( 36 ) to dissect the tumor microenvironment, resolve cellular heterogeneity, and elucidate spatially organized signaling events that may underlie ancestry- and treatment-specific outcomes. 5. Conclusion This AI-enabled, precision oncology analysis identifies TGF-beta pathway alterations as a potential biomarker of poor survival specifically in EO H/L CRC patients receiving FOLFOX chemotherapy. These findings highlight the importance of integrating ancestry, age, and treatment context into genomic outcome studies and suggest a path toward more personalized treatment strategies for disproportionately affected populations. By revealing ancestry-specific vulnerabilities and treatment interactions, this work underscores the transformative potential of artificial intelligence in advancing health equity in cancer genomics. Data Availability All data used in the present study is publicly available at https://www.cbioportal.org/ and https://genie.cbioportal.org . The datasets used in our study were aggregated/summary data, and no individual-level data were used. Additional data can be provided upon reasonable request to the authors. Supplementary Materials View this table: View inline View popup Download powerpoint Table S1 Comparison of Early-Onset Hispanic/Latino (H/L) Patients Treated with FOLFOX versus Not Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S2 Comparison of Late-Onset Hispanic/Latino (H/L) Patients Treated with FOLFOX versus Not Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S3 Comparison of Early-Onset Non-Hispanic White (NHW) Patients Treated with FOLFOX versus Not Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S4 Comparison of Late-Onset Non-Hispanic White (NHW) Patients Treated with FOLFOX versus Not Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S5 Comparison of Early-Onset versus Late-Onset Hispanic/Latino (H/L) Patients Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S6 Comparison of Early-Onset versus Late-Onset Hispanic/Latino (H/L) Patients Not Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S7 Comparison of Early-Onset versus Late-Onset Non-Hispanic White (NHW) Patients Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S8 Comparison of Early-Onset Hispanic/Latino (H/L) versus Early-Onset Non-Hispanic White (NHW) Patients Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S9 Comparison of Early-Onset Hispanic/Latino (H/L) versus Early-Onset Non-Hispanic White (NHW) Patients Not Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S10 Comparison of Late-Onset Hispanic/Latino (H/L) versus Late-Onset Non-Hispanic White (NHW) Patients Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S11 Comparison of Late-Onset Hispanic/Latino (H/L) versus Late-Onset Non-Hispanic White (NHW) Patients Not Treated with FOLFOX View this table: View inline View popup Download powerpoint Table S12 Distribution of TGF-β Pathway Gene Mutation Types by Ancestry, Age of Onset, and FOLFOX Treatment Status in Colorectal Cancer. This table presents the percentage distribution of mutation types across six TGF-β pathway genes (BMPR1A, SMAD2, SMAD3, SMAD4, TGFBR1, and TGFBR2) stratified by ancestry [Hispanic/Latino (H/L) vs. Non-Hispanic White (NHW)], age of onset [early-onset (EO) vs. late-onset (LO)], and FOLFOX treatment status (treated vs. not treated). Mutation categories include frame shift deletions and insertions, in-frame deletions and insertions, missense mutations, nonsense mutations, nonstop mutations, splice site and splice region alterations, and translation start site variants. Percentages indicate the proportion of each mutation type among all mutations detected for a given gene within each subgroup. This table highlights differences in the mutation spectrum between clinical and demographic subgroups, enabling comparisons of variant type prevalence across ancestry, age, and treatment groups. Download figure Open in new tab Figure S1. Kaplan–Meier survival analysis of TGF-beta pathway alterations across colorectal cancer (CRC) subgroups defined by age, ancestry, and FOLFOX treatment status. Overall survival curves are shown for: (a) Late-Onset NHW Treated with FOLFOX, and (b) Late-Onset NHW Not Treated with FOLFOX. Download figure Open in new tab Figure S2. AI-guided selection and survival analysis of Hispanic/Latino (H/L) colorectal cancer (CRC) patients treated with FOLFOX, stratified by TGF-β pathway alteration status. The AI-HOPE and AI-HOPE-TGFbeta platforms were used to identify case and control cohorts based on predefined clinical, genomic, and treatment criteria. (Left) Distribution of selected versus unselected samples in the case cohort— early-onset (EO) H/L CRC patients treated with FOLFOX and harboring TGF-β pathway alterations (n = 21)—and the control cohort—late-onset (LO) H/L CRC patients treated with FOLFOX without pathway alterations (n = 52). (Right) Kaplan–Meier overall survival (OS) analysis comparing the two cohorts. Patients in the case group exhibited significantly reduced OS compared to controls (log-rank p = 0.029). Shaded areas represent 95% confidence intervals. Survival probabilities declined more steeply in the case cohort within the first 50 months, suggesting a potential negative prognostic impact of TGF-β pathway alterations in EO H/L patients receiving FOLFOX. Download figure Open in new tab Figure S3. Comparison of BMPR1A mutation frequency between early-onset Hispanic/Latino (H/L) and early-onset Non-Hispanic White (NHW) colorectal cancer (CRC) patients treated with FOLFOX in the context of SMAD4 mutation positivity. The AI-HOPE and AI-HOPE-TGFbeta platforms were used to identify and compare BMPR1A mutation prevalence between the case cohort (EO H/L; n = 73) and control cohort (EO NHW; n = 375), restricted to patients harboring SMAD4 mutations. Pie charts show the proportion of selected (in-context) versus unselected (out-of-context) samples within each cohort. The stacked bar plot displays the number of in-context and out-of-context samples across case and control groups, with Fisher’s exact test used to assess statistical significance. BMPR1A mutations were observed in 0.68% of cases and 1.07% of controls (p = 0.836; odds ratio = 0.0; 95% CI: 0.033–12.145), indicating no significant difference in mutation frequency between groups. Download figure Open in new tab Figure S4. AI-driven identification of significant clinical and molecular attributes distinguishing early-onset Hispanic/Latino (H/L) and Non-Hispanic White (NHW) colorectal cancer (CRC) patients treated with FOLFOX. The pie charts display the proportion of selected versus unselected samples for the case cohort (early-onset H/L, n = 73) and control cohort (early-onset NHW, n = 375). The table lists all clinical and genomic attributes showing statistically significant differences between groups (p < 0.05), including demographic factors (ethnicity group, race), clinical variables (primary tumor site, stage, sample type), disease outcomes (overall survival status, event), and key oncogenic mutations (e.g., SMAD4 ). These results highlight multiple dimensions of divergence between cohorts, supporting further targeted analyses of ancestry-related differences in CRC biology and outcomes. Download figure Open in new tab Figure S5. Comparison of SMAD4 mutation prevalence between early-onset Hispanic/Latino (H/L) and Non-Hispanic White (NHW) colorectal cancer (CRC) patients treated with FOLFOX. The case cohort (left pie chart) consisted of early-onset H/L patients (n = 73) and the control cohort (middle pie chart) consisted of early-onset NHW patients (n = 375). Blue segments indicate selected samples meeting the inclusion criteria, while orange segments represent all other samples. The stacked bar plot (right) displays the distribution of SMAD4 mutation–positive (“In_Context”) and mutation–negative (“Out_of_Context”) cases in each cohort. SMAD4 mutations were detected in 2.74% of the case group compared with 13.87% of the control group. Fisher’s exact test yielded a statistically significant difference (Chi-square p = 0.013), with an odds ratio of 0.175 (95% CI: 0.042–0.735), indicating a lower prevalence of SMAD4 mutations in early-onset H/L patients treated with FOLFOX relative to NHW counterparts. References 1. ↵ Patel SG , Karlitz JJ , Yen T , Lieu CH , Boland CR . The rising tide of early-onset colorectal cancer: a comprehensive review of epidemiology, clinical features, biology, risk factors, prevention, and early detection . Lancet Gastroenterol Hepatol . 2022 ; 7 ( 3 ): 262 – 74 . OpenUrl PubMed 2. ↵ Siegel RL , Giaquinto AN , Jemal A. Cancer statistics, 2024 . CA Cancer J Clin . 2024 ; 74 ( 1 ): 12 - 49 . OpenUrl CrossRef PubMed 3. ↵ Bhandari A , Woodhouse M , Gupta S . Colorectal cancer is a leading cause of cancer incidence and mortality among adults younger than 50 years in the USA: a SEER-based analysis with comparison to other young-onset cancers . J Investig Med . 2017 ; 65 ( 2 ): 311 – 5 . OpenUrl Abstract / FREE Full Text 4. Gandini A , Taieb J , Blons H , Netter J , Laurent-Puig P , Gallois C . Early-Onset colorectal Cancer: From the laboratory to the clinic . Cancer Treat Rev . 2024 ; 130 : 102821 . OpenUrl PubMed 5. Mauri G , Sartore-Bianchi A , Russo AG , Marsoni S , Bardelli A , Siena S . Early-onset colorectal cancer in young individuals . Mol Oncol . 2019 ; 13 ( 2 ): 109 – 31 . OpenUrl CrossRef PubMed 6. Muller C , Ihionkhan E , Stoffel EM , Kupfer SS . Disparities in Early-Onset Colorectal Cancer . Cells . 2021 ; 10 ( 5 ). 7. Rahib L , Wehner MR , Matrisian LM , Nead KT . Estimated Projection of US Cancer Incidence and Death to 2040 . JAMA Netw Open . 2021 ; 4 ( 4 ): e214708 . OpenUrl CrossRef 8. Santucci C , Mignozzi S , Alicandro G , Pizzato M , Malvezzi M , Negri E , et al. Trends in cancer mortality under age 50 in 15 upper-middle and high-income countries . J Natl Cancer Inst . 2025 ; 117 ( 4 ): 747 – 60 . OpenUrl PubMed 9. ↵ Sinicrope FA . Increasing Incidence of Early-Onset Colorectal Cancer . N Engl J Med . 2022 ; 386 ( 16 ): 1547 – 58 . OpenUrl CrossRef PubMed 10. ↵ Garcia S , Pruitt SL , Singal AG , Murphy CC . Colorectal cancer incidence among Hispanics and non-Hispanic Whites in the United States . Cancer Causes Control . 2018 ; 29 ( 11 ): 1039 – 46 . OpenUrl CrossRef PubMed 11. Miller KD , Ortiz AP , Pinheiro PS , Bandi P , Minihan A , Fuchs HE , et al. Cancer statistics for the US Hispanic/Latino population, 2021 . CA Cancer J Clin . 2021 ; 71 ( 6 ): 466 - 87 . OpenUrl CrossRef PubMed 12. ↵ Pinheiro PS , Callahan KE , Siegel RL , Jin H , Morris CR , Trapido EJ , et al. Cancer Mortality in Hispanic Ethnic Groups . Cancer Epidemiol Biomarkers Prev . 2017 ; 26 ( 3 ): 376 – 82 . OpenUrl Abstract / FREE Full Text 13. ↵ Dubina K. Hispanics in the Labor Force: 5 Facts 2021 [Available from: https://blog.dol.gov/2021/09/15/hispanics-in-the-labor-force-5-facts . 14. ↵ Hoffman Dennis JJ . The 2023 Official LDC U.S LATINO GDP REPORT . Beverly Hills, California : Latino Donor Collaborative ; 2023 . Report No.: 6th edition. 15. ↵ Lieu CH , Golemis EA , Serebriiskii IG , Newberg J , Hemmerich A , Connelly C , et al. Comprehensive Genomic Landscapes in Early and Later Onset Colorectal Cancer . Clin Cancer Res . 2019 ; 25 ( 19 ): 5852 – 8 . OpenUrl Abstract / FREE Full Text 16. Storandt MH , Shi Q , Eng C , Lieu C , George T , Stoppler MC , et al. Genomic Landscapes of Early-Onset Versus Average-Onset Colorectal Cancer Populations . Cancers (Basel) . 2025 ; 17 ( 5 ). 17. ↵ Tang J , Peng W , Tian C , Zhang Y , Ji D , Wang L , et al. Molecular characteristics of early-onset compared with late-onset colorectal cancer: a case controlled study . Int J Surg . 2024 ; 110 ( 8 ): 4559 – 70 . OpenUrl CrossRef PubMed 18. Alshenaifi JY , Vetere G , Maddalena G , Yousef M , White MG , Shen JP , et al. Mutational and co-mutational landscape of early onset colorectal cancer . Biomarkers . 2025 ; 30 ( 1 ): 64 – 76 . OpenUrl CrossRef PubMed 19. ↵ Antelo M , Balaguer F , Shia J , Shen Y , Hur K , Moreira L , et al. A high degree of LINE-1 hypomethylation is a unique feature of early-onset colorectal cancer . PLoS One . 2012 ; 7 ( 9 ): e45357 . OpenUrl CrossRef PubMed 20. ↵ Cheung ATM , Palapattu EL , Pompa IR , Aldrighetti CM , Niemierko A , Willers H , et al. Racial and ethnic disparities in a real-world precision oncology data registry . NPJ Precis Oncol . 2023 ; 7 ( 1 ): 7 . OpenUrl PubMed 21. ↵ Monge C , Waldrup B , Carranza FG , Velazquez-Villarreal E . WNT and TGF-Beta Pathway Alterations in Early-Onset Colorectal Cancer Among Hispanic/Latino Populations . Cancers (Basel) . 2024 ; 16 ( 23 ). 22. Monge C , Waldrup B , Carranza FG , Velazquez-Villarreal E . Ethnicity-Specific Molecular Alterations in MAPK and JAK/STAT Pathways in Early-Onset Colorectal Cancer . Cancers (Basel) . 2025 ; 17 ( 7 ). 23. ↵ Monge C , Waldrup B , Manjarrez S , Carranza FG , Velazquez-Villarreal E . Detecting PI3K and TP53 Pathway Disruptions in Early-Onset Colorectal Cancer Among Hispanic/Latino Patients . Cancer Med . 2025 ; 14 ( 7 ): e70791 . OpenUrl CrossRef PubMed 24. ↵ Papavassiliou KA , Delle Cave D , Papavassiliou AG . Targeting the TGF-beta Signaling Axis in Metastatic Colorectal Cancer: Where Do We Stand? Int J Mol Sci . 2023 ; 24 ( 23 ). 25. ↵ Koveitypour Z , Panahi F , Vakilian M , Peymani M , Seyed Forootan F , Nasr Esfahani MH , et al. Signaling pathways involved in colorectal cancer progression . Cell Biosci . 2019 ; 9 : 97 . OpenUrl CrossRef PubMed 26. ↵ Waldner MJ , Neurath MF . TGFbeta and the Tumor Microenvironment in Colorectal Cancer . Cells . 2023 ; 12 ( 8 ). 27. ↵ Li X , Wu Y , Tian T . TGF-beta Signaling in Metastatic Colorectal Cancer (mCRC): From Underlying Mechanism to Potential Applications in Clinical Development . Int J Mol Sci . 2022 ; 23 ( 22 ). 28. ↵ Laskar RS , Qu C , Huyghe JR , Harrison T , Hayes RB , Cao Y , et al. Genome-wide association studies and Mendelian randomization analyses provide insights into the causes of early-onset colorectal cancer . Ann Oncol . 2024 ; 35 ( 6 ): 523 – 36 . OpenUrl CrossRef PubMed 29. ↵ Xu Y , Pasche B . TGF-beta signaling alterations and susceptibility to colorectal cancer . Hum Mol Genet . 2007 ; 16 Spec No 1(SPEC): R14 - 20 . OpenUrl CrossRef PubMed Web of Science 30. ↵ Morris VK , Kennedy EB , Baxter NN , Benson AB , 3rd, Cercek A, Cho M , et al. Treatment of Metastatic Colorectal Cancer: ASCO Guideline. J Clin Oncol . 2023 ; 41 ( 3 ): 678 – 700 . OpenUrl PubMed 31. ↵ O’Reilly M , Linehan A , Krstic A , Kolch W , Sheahan K , Winter DC , et al. Oncotherapeutic Strategies in Early Onset Colorectal Cancer . Cancers (Basel) . 2023 ; 15 ( 2 ). 32. ↵ Meng L , Thapa R , Delgado MG , Gomez MF , Ji R , Knepper TC , et al. Association of Age With Treatment-Related Adverse Events and Survival in Patients With Metastatic Colorectal Cancer . JAMA Netw Open . 2023 ; 6 ( 6 ): e2320035 . OpenUrl PubMed 33. ↵ Yang EW , Velazquez-Villarreal E . AI-HOPE: an AI-driven conversational agent for enhanced clinical and genomic data integration in precision medicine research . Bioinformatics . 2025 Jul 1 ; 41 ( 7 ): btaf359 . doi: 10.1093/bioinformatics/btaf359 . PMID: 40577785 ; PMCID: PMC12212640 . OpenUrl CrossRef PubMed 34. ↵ Yang EW , Waldrup B , Velazquez-Villarreal E . AI-HOPE-TGFbeta: A Conversational AI Agent for Integrative Clinical and Genomic Analysis of TGF-β Pathway Alterations in Colorectal Cancer to Advance Precision Medicine . AI 2025 , 6 ( 7 ), 137 ; doi: 10.3390/ai6070137 . OpenUrl CrossRef 35. ↵ Carranza FG , Diaz FC , Ninova M , Velazquez-Villarreal E . Current state and future prospects of spatial biology in colorectal cancer . Front Oncol . 2024 Dec 3 ; 14 : 1513821 . doi: 10.3389/fonc.2024.1513821 . PMID: 39711954 ; PMCID: PMC11660798 . OpenUrl CrossRef PubMed 36. ↵ Velazquez-Villarreal EI , Maheshwari S , Sorenson J , Fiddes IT , Kumar V , Yin Y , Webb MG , Catalanotti C , Grigorova M , Edwards PA , Carpten JD , Craig DW . Single-cell sequencing of genomic DNA resolves sub-clonal heterogeneity in a melanoma cell line . Commun Biol . 2020 Jun 25 ; 3 ( 1 ): 318 . doi: 10.1038/s42003-020-1044-8 . PMID: 32587328 ; PMCID: PMC7316972 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted August 16, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Artificial Intelligence-Enabled Precision Medicine Reveals Prognostic Impact of TGF-Beta Pathway Alterations in FOLFOX-Treated Early-Onset Colorectal Cancer Among Disproportionately Affected Populations Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Artificial Intelligence-Enabled Precision Medicine Reveals Prognostic Impact of TGF-Beta Pathway Alterations in FOLFOX-Treated Early-Onset Colorectal Cancer Among Disproportionately Affected Populations Fernando C. Diaz , Brigette Waldrup , Francisco G. Carranza , Sophia Manjarrez , Enrique Velazquez-Villarreal medRxiv 2025.08.11.25333363; doi: https://doi.org/10.1101/2025.08.11.25333363 Share This Article: Copy Citation Tools Artificial Intelligence-Enabled Precision Medicine Reveals Prognostic Impact of TGF-Beta Pathway Alterations in FOLFOX-Treated Early-Onset Colorectal Cancer Among Disproportionately Affected Populations Fernando C. Diaz , Brigette Waldrup , Francisco G. Carranza , Sophia Manjarrez , Enrique Velazquez-Villarreal medRxiv 2025.08.11.25333363; doi: https://doi.org/10.1101/2025.08.11.25333363 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15222) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6589) Geriatric Medicine (667) Health Economics (997) Health Informatics (4525) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (971) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9221) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (711) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffe812bbf5906eb',t:'MTc3OTQ4MTA0OA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-05-23T02:00:01.238055+00:00
License: CC-BY-NC-ND-4.0