Multi-ancestry genome-wide and transcriptome-wide association analyses identified new risk loci and genes for inflammatory bowel disease

Linshuoshuo Lyu; Qing Li; Chao Li; Ghadeer K Dawwas; You Chen; Ran Tao; Qi Liu; Wanqing Wen; Xiao-Ou Shu; Kay Washington; David A Schwartz; Wei Zheng; Zhijun Yin; Ken S Lau; Xingyi Guo

doi:10.1101/2025.09.23.25336483

Multi-ancestry genome-wide and transcriptome-wide association analyses identified new risk loci and genes for inflammatory bowel disease

Linshuoshuo Lyu, Qing Li, Chao Li, Ghadeer K Dawwas, You Chen, Ran Tao, Qi Liu, Wanqing Wen, Xiao-Ou Shu, Kay Washington, David A Schwartz, Wei Zheng, Zhijun Yin, Ken S Lau, Xingyi Guo

2025 · doi:10.1101/2025.09.23.25336483

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

⚙ AI-generated summary by claude@2026-06, 2026-06-10 ⓘ

This study performed multi-ancestry genome-wide and transcriptome-wide association analyses to identify new risk loci and genes associated with inflammatory bowel disease.

One-sentence paraphrase of the abstract; not a substitute for reading it. No clinical advice. How this works

⚙ AI-generated deep summary by claude@2026-06, 2026-06-10 · read from full text ⓘ

This study performed multi-ancestry genome-wide association meta-analyses for inflammatory bowel disease using 63,415 cases from European and East Asian descendants and identified 90 previously unknown risk loci. The authors then integrated multi-ancestry transcriptome-wide association approaches, including cell type–specific TWAS, alternative splicing and polyadenylation analyses based on RNA-seq from normal colon tissue, to fine-map 506 high-confidence IBD risk genes, many not previously reported, and linked them to pathways involving immune regulation and microbial interaction. A key caveat is that the transcriptome analyses rely on reference RNA-seq expression models (and predicted gene regulation) from normal colon tissue rather than direct disease tissue measurements. Relevance to endometriosis: the paper is not about endometriosis or adenomyosis and does not explicitly discuss them; it was included in the corpus via a keyword match in the upstream search index.

Read from the paper's body, not the abstract. Not a substitute for reading the paper. No clinical advice. How this works

Full text 81,404 characters · extracted from preprint-html · click to expand

Multi-ancestry genome-wide and transcriptome-wide association analyses identified new risk loci and genes for inflammatory bowel disease | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Multi-ancestry genome-wide and transcriptome-wide association analyses identified new risk loci and genes for inflammatory bowel disease Linshuoshuo Lyu , Qing Li , Chao Li , Ghadeer K Dawwas , View ORCID Profile You Chen , Ran Tao , Qi Liu , Wanqing Wen , Xiao-Ou Shu , Kay Washington , David A Schwartz , Wei Zheng , Zhijun Yin , View ORCID Profile Ken S Lau , Xingyi Guo doi: https://doi.org/10.1101/2025.09.23.25336483 Linshuoshuo Lyu 1 Division of Epidemiology, Department of Medicine, Vanderbilt Epidemiology Center, and Vanderbilt-Ingram Cancer Center, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Qing Li 1 Division of Epidemiology, Department of Medicine, Vanderbilt Epidemiology Center, and Vanderbilt-Ingram Cancer Center, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Chao Li 1 Division of Epidemiology, Department of Medicine, Vanderbilt Epidemiology Center, and Vanderbilt-Ingram Cancer Center, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ghadeer K Dawwas 1 Division of Epidemiology, Department of Medicine, Vanderbilt Epidemiology Center, and Vanderbilt-Ingram Cancer Center, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site You Chen 2 Department of Biomedical Informatics, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for You Chen Ran Tao 3 Department of Biostatistics, Vanderbilt University Medical Center , Nashville TN 37203, USA 4 Vanderbilt Genetics Institute, Vanderbilt University Medical Center , Nashville, TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Qi Liu 3 Department of Biostatistics, Vanderbilt University Medical Center , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Wanqing Wen 1 Division of Epidemiology, Department of Medicine, Vanderbilt Epidemiology Center, and Vanderbilt-Ingram Cancer Center, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Xiao-Ou Shu 1 Division of Epidemiology, Department of Medicine, Vanderbilt Epidemiology Center, and Vanderbilt-Ingram Cancer Center, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kay Washington 5 Department of Pathology, Vanderbilt University Medical Center, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site David A Schwartz 6 Division of Gastroenterology, Hepatology, and Nutrition, Department of Medicine, Vanderbilt University Medical Center , Nashville, TN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Wei Zheng 1 Division of Epidemiology, Department of Medicine, Vanderbilt Epidemiology Center, and Vanderbilt-Ingram Cancer Center, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Zhijun Yin 2 Department of Biomedical Informatics, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ken S Lau 7 Epithelial Biology Center and Department of Cell and Developmental Biology, Vanderbilt University School of Medicine , Nashville, TN, USA 8 Center for Computational Systems Biology, Vanderbilt University , Nashville, TN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ken S Lau Xingyi Guo 1 Division of Epidemiology, Department of Medicine, Vanderbilt Epidemiology Center, and Vanderbilt-Ingram Cancer Center, Vanderbilt University School of Medicine , Nashville TN 37203, USA 2 Department of Biomedical Informatics, Vanderbilt University School of Medicine , Nashville TN 37203, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: xingyi.guo{at}vumc.org Abstract Full Text Info/History Metrics Preview PDF ABSTRACT To advance genetic understanding of inflammatory bowel disease (IBD), we conducted genome-wide association meta-analyses of 63,415 IBD cases of European and East Asian descendants and identified 90 previously unknown risk loci. Integrating multi-ancestry transcriptome-wide association studies (TWAS), cell type–specific TWAS, alternative splicing (AS-WAS), and alternative polyadenylation (APA-WAS) analyses using RNA-seq data from normal colon tissues of 707 European and 364 East Asian individuals, we uncovered 506 high-confidence IBD risk genes, including 384 not previously reported. These genes converge on immune regulation, microbial interaction, and other pathways central to IBD pathogenesis, with over half showing transcriptional dysregulation supported by single-cell and spatial omics analyses. Notably, 46 risk genes are targeted by 225 drugs that have been approved or in Phase II/III trials, including sulfasalazine already used in IBD therapy. Our study findings deepen the understanding of IBD genetics and support the development of precision medicine for its prevention and treatment. INTRODUCTION Inflammatory Bowel Disease (IBD) comprises a group of chronic, relapsing-remitting inflammatory conditions of the gastrointestinal tract, primarily Crohn’s disease (CD) and ulcerative colitis (UC). Since 1900, the incidence of IBD has been rising strikingly in newly industrialized countries 1 .The age- and sex-adjusted incidence of IBD was estimated at 10.9 per 100,000 person-years (95% confidence interval (CI): 10.6–11.2) 2 . Genome-wide association studies (GWAS) have identified more than 300 risk loci associated with IBD, primarily through large-scale studies in individuals of European and East Asian ancestry 3 – 10 . Recent efforts, including the largest East Asian IBD GWAS (14,393 cases and 15,456 controls) 4 and data from the FinnGen consortium (10,960 cases and 489,388 controls) 11 , further expanded the catalog of IBD risk loci. However, the causal genes and biological mechanisms underlying these associations remain largely unresolved. Moreover, previous GWAS were conducted in single ancestry groups, which limits cross-population generalizability and hinders a comprehensive understanding of disease biology. Multi-ancestry GWAS meta-analysis can enhance risk locus discovery and refine association signals by leveraging shared genetic architectures. Transcriptome-wide association studies (TWAS) offer a powerful framework to bridge GWAS signals with gene expression, by using predictive models trained on expression quantitative trait loci (eQTLs) to identify genes whose genetically predicted expression is associated with disease risk 12 , 13 . However, conventional TWAS approaches are often limited by the inclusion of non-functional variants in prediction models, reducing power and increasing false discovery rates 14 – 16 . To overcome these limitations, we developed sTF-TWAS, a framework that prioritizes regulatory variants within transcription factor-occupied cis-regulatory elements (STFCREs), significantly improving the accuracy and sensitivity of TWAS gene discovery 17 , 18 . TWAS has also been extended to investigate genetically regulated post-transcriptional features such as alternative splicing (AS) and alternative polyadenylation (APA), offering additional insights into gene regulation and disease mechanisms 19 . Furthermore, single-cell RNA sequencing (scRNA-seq) enables high-resolution characterization of tissue composition and facilitates the construction of cell type-specific transcriptomic profiles 20 , thereby improving the detection of cell type-specific genetic effects 21 – 26 . Integration of scRNA-seq data with computational deconvolution tools, such as CIBERSORTx 27 , has demonstrated utility in uncovering cell type-specific genetic regulation in several complex diseases 21 – 23 , 28 – 30 . However, such approaches remain underutilized in IBD, and to date, only limited TWAS of IBD using bulk normal colon tissue has been reported 31 , 32 underscoring the need for high-resolution, multi-omics integrative analyses in this field. To address these critical gaps, we conducted a multi-ancestry meta-analysis of GWAS summary statistics from 63,415 IBD cases of European and East Asian ancestry to advance the discovery of novel risk loci. We next integrated the results from the multi-ancestry TWAS in conducting cell type-specific TWAS, alternative splicing-wide association study (AS-WAS), and alternative polyadenylation-wide association study (APA-WAS). The cell type-specific TWAS was performed by leveraging single-cell RNA-seq (scRNA-seq) reference data from Colon Molecular Atlas Project (COLON MAP) 33 , and bulk RNA-seq data from 707 European individuals 34 and 364 East Asian 34 . We aimed to combine genetically predicted gene expression, AS, and APA with fine-mapping and eQTL colocalization to identify high-confidence risk genes, including those targeted by approved drugs or compounds in Phase II/III clinical trials. Our approach allows for the identifications of drugs that are commonly prescribed for conditions that co-exist with IBD, suggesting that these drugs could be evaluated using observational data, eliminating the need for subsequent randomized trials. These findings from our study could facilitate risk loci and genes discovery and drug development for IBD. RESULTS Identification of genetic risk loci and target genes associated with IBD In the meta-analysis of European populations, we included a total of 49,022 cases of IBD and 1,293,489 controls ( Supplementary Table 1 ) from Veterans Affairs (VA) Million Veteran Program (MVP) 35 , the FinnGen study (R12) 11 , Pan-UK Biobank (UKB) 36 and a GWAS meta-analysis of UK IBD Genetics Consortium (UKIBDGC) and International Inflammatory Bowel Disease Genetics Consortium (IIBDGC) 3 (Hereafter referred as IIBDGC population). Using fixed-effect meta-analysis ( Methods ), we identified 19 novel loci associated with IBD at genome-wide significance ( P < 5 × 10 −8 ). Subtype analyses further revealed 15 novel loci associated with CD and 27 associated with UC ( Table 1 , Figure 1 ). View this table: View inline View popup Table 1. Novel genetic risk variants identified in the European GWAS meta-analysis. Download figure Open in new tab Figure 1. Manhattan plot of GWAS meta-analysis results in the European populations (49,022 IBD cases and 1,293,489 controls). Novel loci identified in the meta-analysis are highlighted with SNP rs IDs. Results are shown for IBD, CD, and UC from top to bottom. While the genomic inflation factors (λ) showed mild inflations for summary statistics (λ CD = 1.08, λ UC = 1.10, and λ IBD = 1.12), the linkage disequilibrium score regression (LDSC) intercepts (0.97, 0.99, and 0.99 for CD, UC, and IBD, respectively) indicated such inflation was due to polygenic effects of the variants instead of confounding population stratification. The quantile-quantile (QQ) plots of the full GWAS meta data and GWAS meta data after removing the loci that are located within 300 Kilobyte (Kb) of previously identified subtype specific GWAS significant loci were presented in Supplementary Figure 1 . Combining evidence from LDSC and Q-Q plots showed that population structure and other confounding factors were well-controlled. In the multi-ancestry meta-analysis, we aggregated a total of 63,415 IBD cases and 1,308,945 controls after combining the European data with an additional 14,393 cases of IBD and 15,456 controls from a recent GWAS of East Asian populations 4 . At genome-wide significance level, we identified additional 9 novel loci associated with IBD. Further stratified analysis revealed 11 novel genetic loci associated with CD, and 12 novel loci associated UC ( Table 2 , Figure 2 ). Combined genomic inflation factors (λ = 1.07-1.10), LDSC intercepts (0.98-1.00), and the Q-Q plots ( Supplementary Figure 1 ) suggests that population stratification has been well controlled. The Q-Q plots for individual GWAS summary statistics were presented in Supplementary Figure 2 . View this table: View inline View popup Download powerpoint Table 2. Novel genetic risk variants identified in the European and East Asian GWAS meta-analysis. Download figure Open in new tab Figure 2. Manhattan plot of GWAS meta-analysis results across European and East Asian populations. The meta-analysis includes 49,022 IBD cases and 1,293,489 controls of European ancestry, along with 14,393 IBD cases and 15,456 controls of East Asian ancestry. Novel loci identified in the meta-analysis are highlighted with SNP rs IDs. Results are shown for IBD, CD, and UC from top to bottom. In total, we identified 90 unique novel genetic loci associated with IBD after combining findings from the above analyses ( Supplementary Table 2 ). All variants reached the genome-wide significant threshold ( P < 5×10 - 8 ) were presented in Supplementary Table 3 for European-specific meta-analysis and Supplementary Table 4 for multi-ancestry meta-analysis. To identify putative target genes in the 90 novel and the previously reported 317 GWAS-identified lead variants collected from literature 3 , 4 , 9 , 11 , 35 , 36 ( Supplementary Table 5; Methods ), we performed xQTL mapping, including expression (eQTL), alternative splicing (AS-QTL), and alternative polyadenylation (APA-QTL) analyses, using bulk RNA-seq data from 707 European-ancestry individuals (423 from BarcUVa-Seq project 37 and 284 from GTEx 34 ), and 364 Asian-ancestry individuals from our previous studies 34 ( Methods ). In addition, we also conducted cell type-specific eQTL using cell type-specific gene expression deconvoluted from bulk colon tissue gene expression 38 ( Methods ). At a Bonferroni-corrected significance threshold of P < 0.05, we identified 248 risk genes (corresponding 313 gene-variants pairs), including 236 genes in the European population and additional 8 genes in the East Asian population ( Supplementary Table 6 ). Furthermore, colocalization analysis between GWAS and xQTL signals using coloc 39 identified 53 high-confidence risk genes, supported by strong evidence of shared causal signals (PP.H4 > 0.8; Supplementary Table 7; Methods ). Of them, 13 genes are linked to lead variants from the newly identified GWAS loci. Identification of risk genes through multi-ancestry TWAS framework We next conducted multi-ancestry TWAS, AS-WAS, and APA-WAS through ancestry-specific genetic prediction model building based on the above bulk RNA-seq data. For TWAS in European populations, we identified 318 genes significantly associated with IBD risk at Bonferroni-corrected P < 0.05 ( Supplementary Table 8 ). Meta-analysis incorporating Asian ancestry data revealed an additional 17 significant genes. We also identified additional genes through subtype-specific analysis. For CD, we identified 35 significant genes in the European cohort and 9 additional genes through meta-analysis ( Supplementary Table 8 ). For UC, 39 genes were significant in the European cohort, with 12 additional genes identified in meta-analysis ( Supplementary Table 8 ). In total, TWAS uncovered 430 IBD-associated genes, including 392 from European ancestry and 38 additional genes identified through meta-analysis with Asian ancestry. We further performed fine-mapping using FOCUS 14 to identify high-confidence genes, defined as those with a posterior inclusion probability (PIP) > 0.5 in FOCUS analyses, or, for regions not analyzed by FOCUS, as the gene with the smallest TWAS P value within a ±1 Mb window ( Methods ). This resulting in 207 high-confidence genes, with 146 from FOCUS ( Supplementary Table 9 ) and an additional 61 from regions beyond FOCUS analyses ( Supplementary Table 10 ). For APA-WAS, we identified 86 genes associated with IBD, including 75 in European populations and an additional 11 genes from meta-analysis of European and East Asian populations ( Supplementary Table 11 ). For CD, we identified 13 additional significant genes associated with CD in the European cohort, while no significant genes were detected from the meta-analysis ( Supplementary Table 11 ). For UC, APA-WAS identified 9 additional significant genes in European ancestry and 5 additional genes from meta-analysis ( Supplementary Table 11 ). In total, APA-WAS identified 113 genes, including 97 from European ancestry and 16 additional genes through meta-analysis ( Supplementary Table 11 ). Fine-mapping analysis prioritized 73 high-confidence genes, including 53 with PIP > 0.5 from FOCUS analyses ( Supplementary Table 12 ) and 20 additional genes with the smallest APA-WAS P -values within 1 Mb loci ( Supplementary Table 13 ). The definition of high-confidence genes was presented in Methods . For AS-WAS, we identified 277 genes associated with IBD, including 257 in European populations and additional 20 genes from meta-analysis of European and East Asian populations ( Supplementary Table 14 ). For CD, 27 genes were significant in European ancestry, and 11 more were identified via meta-analysis ( Supplementary Table 14 ). For UC, 22 genes were significant in European ancestry, and an additional 8 genes were discovered through meta-analysis ( Supplementary Table 14 ). Altogether, AS-WAS identified 345 genes, comprising 309 from European ancestry and 36 from meta-analysis with Asian ancestry. Fine-mapping underscored 145 high-confidence genes, including 116 with PIP > 0.5 from FOCUS analyses ( Supplementary Table 15 ) and 29 additional genes with the smallest AS-WAS P -values within 1 Mb loci ( Supplementary Table 16 ). For cell type-specific TWAS, we first estimated the heritability contributed by SNPs for three major cell type, absorptive (ABS) cells, goblet (GOB), and stem cells (STM) using stratified LDSC analysis 40 ( Methods ). The analysis revealed substantial heritability contributions from these cell types ( Supplementary Table 17 ). For CD as an example, among the European ancestry, heritability was contributed to ABS cells with 65.19% (standard error, SE: 0.06), GOB with 49.36% (SE: 0.05), and STM with 17.48% (SE: 0.02). Cell type-specific TWAS based on prediction model building in three major cell types (ABS, GOB, and STM) identified 255 genes significantly associated with IBD in European ( Supplementary Table 18 ), and an additional 17 genes through meta-analysis with Asian ancestry ( Supplementary Table 18 ). For the subtype CD, we identified 38 significant genes in the European cohort ( Supplementary Table 18 ) and 5 additional genes via meta-analysis. For the subtype UC, we observed 34 additional significant genes in the European cohort and 7 more genes through meta-analysis. In total, the cell type-specific TWAS analysis uncovered 356 genes associated with IBD, CD and UC, including 327 from European ancestry and 29 additional genes identified through cross-ancestry meta-analysis ( Supplementary Table 18 ). Fine-mapping underscored 241 high-confidence genes, including 179 with PIP > 0.5 from FOCUS ( Supplementary Table 19 ) analyses and 62 additional genes with the smallest cell type-specific TWAS P -values within 1 Mb loci ( Supplementary Table 20 ). After pooling results across the above analyses, we uncovered 802 genes that are associated with IBD, including 484 high-confidence genes. Of these, 372 genes have not been reported in previous TWAS 31 , 32 , with 287 from TWAS, APA-WAS, and AS-WAS ( Figure 3 ) and 165 from cell type-specific TWAS ( Supplementary Figure 3 ). Among these, we identified 100 genes located in 43 GWAS loci that had not been previously reported ( Supplementary Table 21 ). After combining the results with xQTL mapping in GWAS risk loci, we identified 506 high-confidence risk genes for IBD. Of them, 384 genes have not been reported in previous TWAS 31 , 32 and GWAS 3 , 4 , 9 ( Supplementary Table 22 ). Of the 506 high-confidence genes, we found that analyses from these molecular traits largely provide complementary contributions to the discovery of risk genes ( Supplementary Figure 4 ). Download figure Open in new tab Figure 3. Novel risk genes identified across IBD, CD, and UC from TWAS, APA-WAS, and AS-WAS analyses. A total of 287 novel risk genes were discovered and categorized as followed: “IBD” (unique to IBD), “CD” (unique to Crohn’s disease), “UC” (unique to ulcerative colitis), “CD,IBD” (shared by CD and IBD), “CD,UC” (shared by CD and UC), “IBD,UC” (shared by IBD and UC), and “CD,IBD,UC” (shared by all three). Gene names are truncated with remaining characters replaced by an asterisk (*) due to space constraints. Functional evidence of etiological roles for the newly identified risk genes Functional enrichment analyses on the 506 high-confidence risk genes revealed significant enrichment in the established immune and signaling pathways, such as Cytokine Signaling, Interferon Gamma Signaling and Allograft rejection pathway ( Supplementary Table 23; Methods ). In particular, these genes encompass a broad spectrum of biological processes that align with and extend beyond previously established IBD mechanisms, which included Microbial Sensing & Innate Immunity, Autophagy, IL-23/Th17 Signaling, NF-κB Signaling, Epithelial Barrier, Antigen Presentation ( HLA/ERAP ), T Cell Regulation, and Leukocyte Trafficking (Supplementary Table 24 ). These include key regulators of innate immune sensing and autophagy such as ATG16L1 and LRRK2 ; Th17 / IL-23 signaling genes including IL23R , STAT3 , JAK2 , and TYK2 ; NF-κB signaling components such as NFKB1 and REL ; epithelial barrier and mucosal integrity genes like MUC1 and GNA12 ; and antigen processing and presentation molecules within the HLA region (e.g., HLA-DQA, HLA-DRB, TAP1 , ERAP2 ). These findings not only recapitulate prior GWAS, but also integrate key functional genes implicated in IBD pathogenesis through immune modulation, microbial interaction, and epithelial homeostasis. Risk genes supported by scRNA-seq and spatial transcriptomics data We next analyzed scRNA-seq datasets on normal colon tissues from 18 non-IBD controls and inflamed colon tissues from 65 CD patients 41 ( Methods ) using differential expression analysis on three major cell types (ABS, GOB, and STM). We provided evidence that the identified high-confidence genes ( P = 3.76 × 10 - 5 ), or genes stratified by cell type-specific TWAS ( P = 3.65 × 10 - 6 ), bulk-level TWAS approaches (TWAS, APA-WAS, and AS-WAS ( P = 4.92 × 10 - 17 ), and were significantly enriched among the differentially expressed genes (DEGs) between non-IBD controls and CD patients ( Supplementary Table 25; Methods ). For example, the genes identified through ABS-specific TWAS were enriched among DEGs in ABS cells ( P = 3.28 × 10 - 5 ) ( Supplementary Table 25 ). Additionally, risk genes from bulk-level TWAS approaches were enriched among DEGs in all three cell types: ABS ( P = 3.15 × 10 - 17 ), GOB ( P = 4.07 × 10 - 8 ), and STM ( P = 6.23 × 10 - 17 ) ( Supplementary Table 25 ). Among the 506 high-confidence genes, 268 genes (52.96%) were supported by differential expression analysis (nominal P < 0.05) with consistent association direction relative to TWAS or xQTL results, providing additional evidence of transcriptional dysregulation involved in IBD pathogenesis ( Supplementary Table 26 ). Specifically, 122 genes (22.13%) were identified through single-cell differential expression analysis by comparing the expression between 18 non-IBD controls and 65 CD patients ( Supplementary Table 26, Supplementary Table 27 ), while 220 genes were supported by cell type-specific expression from spatial transcriptomics data, including 146 (28.85%) genes not detected in the single-cell differential expression analysis ( Supplementary Table 28 , Supplementary Table 29 ). Characterizing drug target genes and therapeutic candidates Using data from DrugBank 42 , ChEMBL 43 , and the TTD 44 , we comprehensively annotated our findings as therapeutic candidates targeted by approved or clinical-stage drugs. Among 506 genes, we identified 60 genes encoding druggable proteins, potentially targeted by 796 approved drugs or drugs in clinical trials ( Supplementary Table 30 ). Of them, we characterized 46 genes linked to 225 drugs approved or under clinical trial phase II/III ( Figure 4 ), suggesting their potential as therapeutic options for treating CD, IBD, and UC. Of note, our results found the drug Sulfasalazine already utilized to treat IBD/CD/UC and another drug Tenapanor utilized to treat irritable bowel syndrome (IBS). We highlighted these genes involved in immune-related pathways, such as the IL23-Th17-cytokine axis, as a promising mechanism underlying IBD pathogenesis. This finding aligns with previous studies showing that IL23 supports the expansion and maintenance of Th17 cells, which secrete proinflammatory cytokines including IL17A, IL21, and IL22 that drive mucosal inflammation, neutrophil recruitment, and epithelial injury 45 , 46 . Download figure Open in new tab Figure 4. Circular plot of druggable genes and associated drugs for IBD treatment. The plot displays 95 druggable proteins potentially targeted by 388 approved drugs or compounds currently in clinical trials for IBD. From the inner to outer layers, the plot shows IBD, proteins, and their corresponding drugs. DISCUSSION In this study, we identified 90 previously unreported loci associated with IBD through a meta-analysis of multi-ancestry GWAS data, thereby significantly expanding the known genetic architecture of IBD. To prioritize potential causal genes, we integrated large-scale, multi-ancestry, and multi-omics datasets using an innovative sTF-TWAS approach that incorporated both bulk and single-cell expression data. Fine-mapping and colocalization analyses further supported the identification of 506 high-confidence IBD risk genes, including 384 not previously reported genes. These results significantly increase the number of putative susceptibility genes and deepen our understanding of the cellular mechanisms driving IBD pathogenesis. Beyond genes mapped to canonical immune and epithelial barrier pathways, we identified a substantial number of novel risk genes potentially involved in underexplored disease mechanisms. For example, genes involved in vesicular protein trafficking within the early secretory pathway (e.g., TMED5 ) 47 is important in maintaining Golgi apparatus structure and facilitating the transport of glycosylphosphatidylinositol-anchored proteins. Genes involved in cell adhesion and signal transduction (e.g., ITGB3 ) 48 suggest an important role in reactive oxygen species (ROS)-induced migration and invasion in colorectal cancer. ITGB3 was also found to enrich in intestinal epithelial cells, whose elevation can induce cell senescence signatures in colonic epithelial cells 49 . Additional candidates, such as NUCKS1 , essential in genome stability and metabolic regulation, are involved in DNA damage response and metabolism, inflammatory immune response, and microRNA 50 . The discovery revealed additional key biological processes relevant to IBD, including epithelial–immune crosstalk, stress responses, and metabolic reprogramming, providing new opportunities for functional validation and therapeutic development. To further investigate transcriptional dysregulation in IBD, we found that more than half of the prioritized genes were supported by differential expression analysis using single-cell and spatial transcriptomic datasets from colon and IBD tissues. Several of these genes are also supported by previous experimental studies. For example, GPX4 , a member of the glutathione peroxidase family, can detoxify lipid peroxides and regulate ferroptosis, whose inhibition can trigger ferroptotic cell death 51 , 52 . It can function as antioxidants through the incorporation of selenocysteine at the catalytic site, which facilitate oxidation-reduction reactions 53 . GSDMB was downregulated in early enterocyte cells, its potential involvement in IBD-associated cellular dysfunction 54 . GSDMB expression was found to be upregulated during IBD, particularly within intestinal epithelial cells, where it is linked to gene pathways enriched for proliferation, migration, and adhesion, but not for pyroptosis 55 . PIGR is an epithelial glycoprotein, whose deletion may be associated with alterations in the gut microbiota and increased susceptibility to colitis 56 . PIGR mediates the transcytosis of secretory IgA (SIgA) across epithelial cells into the intestinal lumen, where SIgA preserves spatial segregation between the microbiota and the epithelial surface. Perturbation of the SIgA- PIGR -microbial axis is associated with increased risk of allergic and inflammatory diseases of the intestine 57 . SLC44A4 ’s function involves uptaking the gut microbiota-generated thiamine pyrophosphate (TPP) in the colon, so the deletion of SLC44A4 affects colon physiology and increases the susceptibility to inflammation 58 . Future functional studies are necessary to elucidate the roles of these candidate genes in stem cell differentiation, adhesion and migration, proliferation, and metabolism/signaling, key processes implicated in IBD pathogenesis. By analyzing drug-protein interaction databases, we identified 46 genes linked to 225 drugs that are either approved or in Phase II/III clinical trials relevant to IBD treatment. Several of the identified medications are commonly prescribed to IBD patients with comorbid conditions. For instance, Furosemide, a diuretic, is commonly prescribed for hypertension, a condition that is highly prevalent among IBD patients 59 . Similarly, Metformin is a first-line treatment for type 2 diabetes, which often co-exist with IBD. Observational studies using existing administrative claims data allow for a timelier and efficiently assessment of these drugs than randomized clinical trials. Our results provide the roadmap for future etiologic studies that can confirm or refute the utility of the identified medications. Our study findings support the therapeutic potential of these compounds, highlight opportunities for drug repurposing in IBD prevention and intervention, and contribute to advancing precision medicine-guided drug discovery for IBD. Of note, our meta-analyses were based on summary-level statistics, with population stratification carefully controlled in the original studies, although close relatedness among few samples across cohorts cannot be fully excluded. Mild inflation was observed in the meta-analysis; however, after removing loci within 300 kb of previously reported IBD-associated variants, the Q–Q plot showed substantial improvement, indicating that the observed inflation primarily reflects polygenic effects rather than residual stratification. To bridge genetic discovery and clinical translation, we integrated comprehensive drug–protein interaction databases to prioritize druggable target genes for IBD therapy. The large number of candidate targets highlights opportunities for drug repurposing and development. Several of the approved drugs (e.g. Furosemide, Metformin) are commonly prescribed for conditions that are highly prevalent among individuals with IBD and could be candidates for evaluation. Future studies leveraging real-world data and target trial emulation on these promising drugs, in conjunction with mechanistic experiments and clinical trials, will be critical to validate these candidates and accelerate precision-medicine–based therapies for IBD. METHODS GWAS cohorts in both European and Asian populations In the meta-analysis of European ancestry, we obtained the GWAS summary statistics from three large biobanks: VA MVP 35 , the FinnGen study (R12, November 2024) 11 , Pan-UKB 36 , and a published GWAS meta-analysis of IBD (IIBDGC) 3 . VA MVP is one of the largest US national cohorts containing comprehensive genotype and phenotype data from the VA electronic health record (EHR), with GWAS summary statistics released in July 2024. IBD cases from VA EHR were defined by phecodes curated by the MCP Data Core 9 . For each phecode, participants with ≥2 phecode-mapped ICD-9 or ICD-10 codes were defined as cases. Controls were defined as those with no instance of a phecode-mapped ICD-9 or ICD-10 code. The current meta-analysis included 7,837 individuals with IBD (N control = 438,916), 3,402 individuals with CD (N control = 446,631), and 5,229 individuals with UC (N control = 442,102) from MVP GWAS summary statistics. FinnGen is a large public-private collaboration project within Finland and international partners. It combines and analyzes large-scale genomic and health record data from Finnish biobank and national health registries. We included the GWAS summary statistics from FinnGen (R12) in our analysis, with a total of 10,960 cases of IBD (N control = 489,388), 2,489 cases of CD (N control = 497,622), and 7,220 cases of UC (N control = 492,160). In addition, we obtained the summary statistics of IBD from the Pan-UKB project, with a total of 5,183 cases of IBD, 2,008 cases of CD, 3,670 cases of UC, and 330,270 controls. Finally, we included a summary statistic from a GWAS meta-analysis of UKIBDGC and IIBDGC, which contained a total of 25,042 cases of IBD (N control = 34,915), 12,194 cases of CD (N control = 28,072), and 12,366 cases of UC (N control = 33,609). For convenience, this GWAS meta-analysis was referred to as IIBDGC in the manuscript. The diagnosis of IBD was followed by endoscopic, histopathological, and radiological criteria. In addition to the above European individuals, we included additional East Asian populations, including 14,393 cases of IBD, 7,372 cases of CD, 6,862 cases of UC, and 15,456 controls ( Supplementary Table 1 ) in our multi-ancestry GWAS meta-analysis. IBD cases were diagnosed by either the European Evidence-based Consensus or the ECCO-ESGAR Guideline for Diagnostic Assessment in IBD in the East Asian cohorts. Quality control and association analysis in previous individual GWASs In MVP, the ancestry assessment was followed by genetically inferred ancestry (GIA). Variants with an imputation quality r 2 > 0.3 and a minor allele count (MAC) > 20 within the relevant HARE-defined ancestry group were included in the analysis 35 . Analyses were adjusted for age, sex, and 10 ancestry-specific genetic principal components (PCs). In FinnGen, association tests were performed for variants with a minimum allele count of 5 among each phenotype’s cases and controls, with sex, age, 10 PCs, FinnGen chip version 1 or 2, and legacy genotyping batch included as covariates. Among the UKB, variants with INFO scores over 0.8 and an allele count of ≥ 20 were retained. Sex and age along with the first 10 PCs were included in the GWAS modelling. In the IBD Genetics Consortium GWAS meta, variants with minor allele frequency (MAF) < 0.1% and INFO 0.9) were discarded in the output of the fixed-effects meta-analysis. In addition, genome-wide significant variants for which the meta-analysis P-value was not lower than all the cohort-specific P -values were removed. In the East Asian GWAS, variants with an imputation quality r 2 > 0.6 and MAF > 0.001 were retained in the analysis, adjusting for the first ten PCs. Details about the QC and association analysis process can be found on the original studies in Data Availability . Quality control and meta-analysis of GWAS summary statistics We first used the LiftOver tool 60 on UCSC Genome Browser to convert GWAS summary statistics on GRCH 37 to GRCH 38. SNPs that are not able to be converted successfully were dropped from the analysis. For each GWAS summary statistics, if duplicate SNPs were found, the SNP with the smallest P -value was retained in the meta-analysis. In the UKB cohort, only the alternate allele frequency in cases and controls was provided, so the MAF for them was first calculated, and the MAF for the overall population was approximated by averaging the MAF among cases and controls. If the allele frequency or MAF for all populations, neither case nor controls, was not provided in the individual GWAS, we utilized 1000 Genomes Project (1KGP) on GRCh38 EUR reference panel 61 to map MAF in the datasets. For meta-analysis, we only included variants on autosomes. Genetic variants with MAF > 0.01 remained for downstream analysis. We conducted an inverse variance weighted meta-analysis of existing GWAS summary statistics in European populations, including data from MVP, FinnGen (R12), Pan-UKB, and IBDGC GWAS meta-analysis of IBD using METAL (release 2011-03-25) 62 . Cochran’s Q Statistic and I 2 statistics were also calculated using METAL. In the multi-ancestry meta-analysis, we further combined data from East Asians using the same model. Genomic control was applied to each individual GWAS in METAL and then applied to the GWAS meta-analyzed summary statistics according to the METAL document from Code Availability. Genomic inflation factors and Q-Q plot were utilized to evaluate the genomic inflation of GWAS. If λ close to 1, it suggests that genomic inflations were well-controlled. We next applied LDSC regression 63 by regressing the χ² statistics of each SNP from GWAS summary statistics on its corresponding LD score. The pre-computed LD scores were obtained from the 1KGP reference panel. LDSC intercepts were also considered when evaluating genomic inflation. If the intercepts from LDSC regression show values close to 1, then the deviation of Q-Q plot may be due to polygenic effects instead of population stratification. To evaluate such effects, we further plotted the Q-Q plot by removing the loci that are located within 300 Kb of previously identified GWAS loci. If the Q-Q plot of removing previously identified loci shows minimal deviation, then the confounding of population stratification was well-controlled. Previous GWAS loci collection and novel locus definition The novel loci were defined following the previous GWASs of IBD 3 , 4 . We first created a reference risk SNP list for each trait (CD, UC, or IBD) from previous individual GWASs or IBD consortia that were included in the current meta-analysis 3 , 4 , 11 , 35 , 36 by selecting the SNPs that reached the genome-wide significant threshold ( P < 5×10 - 8 ). If an SNP was reported in multiple GWASs, the one with the lower P -value was retained. For the reference SNP list of IBD, variants previously reported as CD-or UC-associated were also included in the list and considered as IBD-associated, while a known CD-associated SNP was not considered as associated with UC, or vice versa 3 . Next, we selected the genome-wide significant ( P < 5×10 - 8 ) SNPs from our meta-analysis and removed the variants on the reference SNP list. To ensure the putatively identified genome-wide significant SNPs in our meta-analysis were not due to LD with previously reported variants, we further checked whether they are at least 500 Kb away from already known SNPs. If yes, the SNP was not considered novel and was removed. For the remaining genome-wide significant SNPs, we performed a LD clumping with an LD window of 500 Kb and r 2 threshold of 0.6 using the 1KGP EUR reference panel for European-specific meta-analysis to identify the novel genetic loci. If the genetic loci after LD clumping overlapped with other loci within 500 Kb, we then merged the loci and kept the variants with the strongest association (the lowest P ) as the lead variants for the merged genetic loci. Clumping was performed using PLINK 1.9 64 . Finally, the retained SNPs were further cross-checked with GWAS loci identified from the latest GWASs of IBD 4 to confirm that they are novel. For the multi-ancestry meta-analysis, the 1KGP European reference panel and 1KGP East Asian reference panel were used for LD clumping, respectively. LD clumping results from the European and East Asian panel were merged, and the lead variants with the lowest P within 500 Kb were kept. To identify additional novel loci from European-specific meta-analysis, we merged the overlapping loci from European-specific and multi-ancestry meta-analysis within 500 Kb and then compared the P -values for the lead variants in the overlapping loci. Loci where lead variants from multi-ancestry meta-analysis with the lower P -values were considered as the additional novel loci from multi-ancestry meta-analysis. RNA-seq data processing and gene expression, AS and APA quantification We developed ancestry-specific TWAS models using three bulk gene expression datasets: 707 European-ancestry samples, including 423 from the BarcUVa-Seq project 37 and 284 from the GTEx project 37 , 65 , along with 364 Asian-ancestry samples from our previous study 34 . Gene-level expression was quantified using RNA-SeQC, aligning with the annotation used in GTEx as described in our previous study 37 . In general, we applied quantile and inverse normalization to the RNA-seq transcript per million (TPM) data. Next, we adjusted potential confounding factors through a probabilistic estimation of expression residuals (PEER) analysis, following the GTEx recommendations for determining the number of PEER factors. We quantified AS level from RNA-seq data using LeafCutter 66 and RegTools 66 , following the methods descried in our previous study 37 . The level of APA was assessed using the percentage of PDUI, as estimated with DaPars v2.0 67 , in accordance with our prior research 19 . For downstream gene expression, AS and APA prediction model building, we adjusted these measurements for potential confounding factors, including PEER factors (60 for BarcUVa-Seq and Asian-ancestry data, 45 for GTEx), as well as age, sex, and the first five genotype PCs. Cell type-specific gene expression quantification (deconvolution bulk RNA-seq based on the reference single-cell data.) We obtained scRNA-seq data for 33,806 epithelial cells from normal colon tissue using the COLON MAP project 33 . These epithelial cells are clustered into seven clusters: absorptive cells (ABS), crypt top cells (CT), enteroendocrine cells (EE), goblet cells (GOB), stem cells (STM), transit amplifying cells (TAC), and tuft cells (TUF). We applied quality control to select high quality cells: nUMI > 500, nGene > 250, log10GenesPerUMI > 0.8, and mitoRatio < 0.2. Following the CIBERSORTx 27 pipeline, we constructed a signature gene expression matrix for each cell type and estimated each cell type fraction for bulk RNA-seq data from 707 European and 364 East Asian individuals. Among these analyzed cell types, absorptive (ABS) cells exhibited the highest median proportion (49%), followed by stem-like (STM) cells (29%), goblet (GOB) cells (3%), and other cell types (< 1%). We kept the top three most abundant cell types (ABS, STM, and GOB) and performed deconvolution of bulk RNA-seq data using their respective signature matrices to derive cell type-specific gene expression profiles. The downstream processing and normalization of cell type-specific gene expression data were consistent with those used for bulk gene expression prediction. Model building for cell type-specific expression also incorporated the recommended PEER factors, along with age, sex, and the first five genotype PCs. Genotype data processing The process of genotype data from BarcUVa-Seq and GTEx project was described in our previous study 37 . In general, 400,000 SNPs from the BarcUVa-Seq project was imputed using TOPMed (Version 2) with European ancestry as the reference panel. We then excluded variants with an imputation quality below R 2 < 0.3, missingness rate greater than 10%, a MAF less than 0.01 (MAF < 0.01), or significant deviations from Hardy-Weinberg equilibrium ( P < 10 - 6 ). The same process was applied to genotype data from East Asian data, which was imputed using TOPMed with Asian ancestry as reference panel 68 . In addition, GTEx project provides the whole genome sequencing (WGS) data and we downloaded the VCF format from dbGaP (phs000424.v8.p2). A comprehensive outline of WGS variant quality control procedures is available from the GTEx consortium 69 . Identifying putative target genes for GWAS risk loci through xQTL and colocalization analyses To identify putative target genes associated with both novel and previously established GWAS-identified risk variants, we performed xQTL mapping using normalized bulk gene expression, AS, APA, and cell type-specific gene expression data. Linear regression analyses were conducted between each risk variant and molecular traits (eQTL, aQTL, apaQTL, or cell type-specific eQTL) for genes located within ±1 Mb of the variant, adjusting for potential confounders as described in the preceding section. For cell type-specific eQTL analyses, we focused on expression data from ABS, GOB, and STM cells. Since the majority of GWAS risk variants were identified in European populations, the corresponding xQTL analyses were primarily conducted using molecular data from individuals of European ancestry. For additional risk variants identified through meta-analysis of European and East Asian populations, we also performed xQTL analyses using data from East Asian individuals. Statistical significance for xQTLs was determined using a Bonferroni-corrected P < 0.05. For these significant xQTLs, we conducted colocalization analyses to evaluate the likelihood of shared causal variants between each molecular trait and the GWAS signal. Within +/-500 Kb of each GWAS risk variant, we extracted GWAS summary statistics and corresponding xQTL data, analyzing only regions with at least 50 overlapping variants. We employed default priors and utilized the ‘coloc.abf’ function to calculate the posterior probability (PP) of colocalization. Posterior probability for hypothesis 4 (PP.H4), which reflects a shared causal variant between the two traits, was used to assess colocalization. A PP.H4 > 0.8 was considered evidence of colocalization. ABF colocalization was conducted using the package “coloc” in R 39 . TWAS, AS-WAS, APA-WAS and cell type-specific TWAS Genetic prediction models We applied our sTF-TWAS framework 17 , 18 to train prediction models for gene expression, AS, and APA levels. This approach utilizes cis-genetic variants located within a ±1 Megabase (Mb) region and occupied by transcription factors 34 and the elastic-net is used as the predictive model: where 𝐸 g represents the 𝑔-th gene expression (AS, APA or cell type-specific gene expression) level; 𝑋 i denotes the genotype vector of the 𝑖-th variant (𝑋 i =0, 1, or 2) and 𝑝 is the total count of variants within the cis-region. The term 𝑤 i,g represents the effect size of 𝑖 -th variant for 𝑔 -th gene expression, and 𝜀 is the residual with variance 𝜎 2 e . An elastic-net penalty, combining 𝐿 1 and 𝐿 2 regularizers, is applied to optimize (𝑤 w,g ) for genetic variants. To optimize model parameters and evaluate predictive efficacy, we used five-fold cross-validation. Model performance was assessed by calculating the squared correlation (R²) between predicted and measured gene expression (or AS, APA or cell type-specific gene expression). Association analysis Under our sTF-TWAS framework, we applied our ancestry-specific prediction models (gene, expression, AS, APA and cell type-specific gene expression) into GWAS summary statistics from European (49,022 IBD cases and 1,293,489 controls) and East Asian populations (14,393 cases and 15,456 controls), respectively, using the where, the Z g estimates the association between genetic prediction expression (or AS, APA, cell type-specific gene expression) and IBD risk; 𝑤̂ i,g is the weight of i -th genetic variant for predicting 𝑔-th gene expression. 𝛽̂ i and se (𝛽̂ i ) are the GWAS-reported regression coefficients and its SE for i -th variant; and 𝜎̂ i and 𝜎̂ g are the estimated variances of i -th variant and the predicted expression of 𝑔-th gene ( or AS, APA, cell type-specific gene expression). We conducted TWAS, AS-WAS, APA-WAS, and cell type-specific TWAS separately in European and East Asian populations. To integrate gene-level association signals across ancestries and enhance statistical power, we performed meta-analysis using the Aggregated Cauchy Association Test (ACAT). Parallel analyses were conducted for CD and UC, respectively. Statistical significance for TWAS, AS-WAS, APA-WAS, or cell type-specific TWAS associations was determined using a Bonferroni-corrected P-value threshold of < 0.05. Fine-mapping analysis We performed fine-mapping analysis to nominate putative causal genes using FOCUS 14 , for TWAS, AS-WAS, APA-WAS, or cell-type specific TWAS, respectively. The primary association signal for each gene in TWAS (or other molecular trait) was first identified based on ancestry-specific results from either the European or East Asian population. Linkage disequilibrium (LD) reference panels from the corresponding populations in the 1KGP 61 were used. Fine-mapping was then conducted separately for each ancestry-driven gene set using FOCUS-defined genomic regions and ancestry-matched LD panels. Each gene was assigned a posterior inclusion probability (PIP), representing the likelihood of being causal for the observed TWAS signal. Stratified LDSC regression analysis To identify the most disease-relevant cell types in colon tissue for IBD and its subtypes, we performed a stratified LDSC by using the single-cell normalized gene expression matrix and GWAS summary statistics of IBD. The scRNA-seq gene expression data on terminal ileum (TI) and ascending colon (AC) tissues were obtained from 202,359 cells from 170 specimens across non-IBD controls (N = 18) and active/inactive CD patients (N = 65) from the National Center for Biotechnology Information Gene Expression Omnibus (GEO) 41 . The cells were classified into 16 cell types within the epithelial compartment: early enterocytes, intermediate enterocyte, mature enterocyte, early colonocyte, intermediate colonocytes cells, mature colonocyte, early GOB, mature GOB, GOB proliferating cells, cycling TA cells, BEST4/OTOP2 cells, tuft cells, enteroendocrine cells (EEC), Paneth cells, STM cells, and LND cells (annotated by high expression of LCN2 , NOS2 , and DUOX2 ) 41 . Among them, enterocytes and colonocytes were merged as ABS cells. ABS, GOB, and STM were considered as major cell types for IBD and included in the follow-up analysis. Using Seurat v5, we generated a pseudo bulk gene expression matrix for each of the 83 subjects (18 non-IBD controls and 65 CD patients). Pseudo bulk aggregation was used to sum raw counts across all cells of a given type within each subject, reducing the influence of outlier cells and producing more robust, cell type-specific expression profiles 33 . Genes were then ranked by t-statistic within each cell type, and the top 10% were defined as specifically expressed genes 40 . Finally, we applied stratified LDSC on specifically expressed genes to estimate partitioned heritability and identify cell types significantly enriched for IBD risk. Pathway enrichment analysis and function categorization To explore the biological functions and pathways enriched in the gene list, we performed gene set enrichment analysis using the Enrichr web-based tool ( https://maayanlab.cloud/Enrichr/ ). Enrichr integrates multiple curated databases 70 – 72 , enabling comprehensive functional annotation and pathway enrichment analysis. Enrichr uses a combined score metric that incorporates the P -value from the Fisher exact test and the z-score of the deviation from the expected rank to rank enriched terms. Significantly enriched pathways were identified based on FDR-corrected P < 0.05. To contextualize these genes within known biological processes, we mapped the identified genes to established IBD-related pathways. Pathway definitions were obtained from curated databases, including KEGG, Reactome, and the Immunology Database and Analysis Portal (ImmPort). Additionally, we referenced literature-supported functional experiments that validate gene involvement in IBD pathogenesis. We further annotated genes with supporting evidence from functional studies, including expression profiling, knockout or knockdown models, and immunological assays, to highlight genes with demonstrated mechanistic roles in IBD. Differential expression analysis using single-cell RNA-sequencing and spatial transcriptomics Pseudo-bulk aggregation with summing raw counts for each cell type from 18 non-IBD controls and 65 CD patients was used, as described in the preceding section. We performed differential expression analysis for each cell type between non-IBD controls and active/inactive CD subjects using DESeq2 with default parameters. In addition, we compared the gene expression of each individual cell type to the rest of other cells within both non-IBD controls and active/inactive CD patients. Genes with FDR-corrected P < 0.05 were considered as significantly differentially expressed genes. If a significant gene identified by TWAS and xQTL also showed an average log2 fold change in the same direction in the single-cell analysis, and had a nominal P -value < 0.05, it was considered to have a consistent direction of association with the TWAS and xQTL results. Differential expression analysis was also performed in spatial transcriptomic data obtained from CD samples from 10X Genomics. More detailed information for spatial transcriptomic data processing can be found in a previous study 41 . Four CD samples from the above samples were selected because of their relatively high proportions of LND cells 41 . Among them, two samples were with active TI disease (GCA092 and GCA033), while the other two were with active AC disease (GCA089 and GCA099) 41 . Data normalization and scaling were performed in R using SCTransform and ScaleData functions, respectively. To identify spatially variable genes, we used FindVariableFeatures function to select the most informative genes for downstream analysis. Principal component analysis (PCA) was used to reduce dimensionality using RunPCA function, and the top 10 PCs were used for clustering with FindClusters function at a resolution of 0.8. FindNeighbors function was used to compute a nearest-neighbor graph based on PCA. For differential expression analysis, the FindAllMarkers function was used to compare the gene expression profile between one cluster and all others within the same sample, with the minimum percentage expression threshold of 0.25 and log fold-change threshold of 0.25. P -values were adjusted using Bonferroni correction, and corrected P < 0.05 were considered as statistically significant genes. Uniform Manifold Approximation and Projection (UMAP) were performed for visualization. Cell type annotation for each cluster was conducted using GPTCelltype 73 by selecting the top positively expressed feature genes. Clustering visualization was presented in Supplementary Figure 5 . Combing results from four samples, 3,047 unique genes (48.26%) were differentially expressed (Bonferroni-corrected P < 0.05) among the total 7,060 genes. Characterizing potential therapeutic drug candidates To identify potential therapeutic drug candidates, we assembled a comprehensive set of drug-target pairs from four drug databases: DrugBank 42 , ChEMBL 43 , and the TTD 44 . DrugBank is a curated database containing over 500,000 drugs and thousands of bioentities (proteins, genes, and other cellular components). ChEMBL provides detailed information on more than 2 million bioactivity assays for bioactive molecules with drug-like properties, covering over 2 million compounds. TTD offers comprehensive information on thousands of drug targets, including 426 approved, 1,014 in clinical trials, 212 in preclinical or patented stages, and 1,479 reported in the literature. Drugs and their paired genes were comprehensively downloaded from these three databases. We subsequently linked these drugs to our identified disease-susceptibility genes. DATA AVAILABILITY All GWAS summary statistics included in the current study are publicly available. VA MVP summary statistics is available at dbGAP (accession number phs002453). FinnGen summary statistics can be accessed at https://finngen.gitbook.io/documentation/data-download . The Pan-UK Biobank GWAS summary statistics is available at https://pan.ukbb.broadinstitute.org . IBDGC European GWAS meta-analysis can be downloaded through ftp://ftp.sanger.ac.uk/pub/project/humgen/summary_statistics/human/2016-11-07/ . The East Asian GWAS summary statistics is available at IIBDGC website: https://www.ibdgenetics.org . ScRNA-seq datasets are available at the GEO database under accession number GSE266546. Spatial transcriptomic data are available at request to corresponding authors. LDSC Reference data is available at https://alkesgroup.broadinstitute.org/LDSCORE . CODE AVAILABILITY The code used our study is available at GitHub: https://github.com/XingyiGuo/IBD_genetics . PLINK 1.9: Shaun Purcell, Christopher Chang; www.cog-genomics.org/plink/1.9/ METAL: GitHub - statgen/METAL: Meta-analysis of genomewide association scans METAL Documentation: https://genome.sph.umich.edu/wiki/METAL_Documentation LDSC v1.0.1: https://github.com/bulik/ldsc Seurat: https://satijalab.org/seurat/ Enrichr web-based tool: https://maayanlab.cloud/Enrichr/ FUNDING SOURCE This research was supported by US National Institutes of Health (NIH) grant R01CA269589-01A1 to X.G. and R01DK103831 to K.L. Supplementary Figures Supplementary Figure 1. Quantile-quantile (QQ) plots of GWAS meta-analysis results. QQ plots are shown for the full GWAS meta-analysis dataset and for the dataset after excluding loci within 300 kilobases (Kb) of previously reported genome-wide significant loci for each IBD subtype. Supplementary Figure 2. The Q-Q plots of individual GWAS summary statistics included in the meta-analysis. QQ plots are shown for individual GWAS summary statistics included in the meta-analysis for IBD, CD and UC. Supplementary Figure 3. Cell type-specific TWAS identifies 165 novel risk genes across IBD, CD, and UC. A total of 165 novel risk genes were discovered and categorized by disease specificity: “IBD” (unique to IBD), “CD” (unique to Crohn’s disease), “UC” (unique to ulcerative colitis), “CD,IBD” (shared by CD and IBD), “CD,UC” (shared by CD and UC), “IBD,UC” (shared by IBD and UC), and “CD,IBD,UC” (shared by all three). Gene names are truncated with remaining characters replaced by an asterisk (*) due to space constraints. Supplementary Figure 4. Unique and shared high-confidence genes identified from different molecular trait analyses for IBD, CD, and UC. The horizontal bars represent the total number of genes identified by each method. A solid circle denotes genes uniquely identified by that method. Circles connected by a black line indicate genes shared exclusively between those methods but not identified by others. Supplementary Figure 5. Clustering visualization of spatial transcriptomic data. Cell type annotation produced by using GPTCelltype by selecting the top positively expressed feature genes. ACKNOWLEDGEMENTS We acknowledge the significant genetic resource contributions from previous IBD GWASs, VA MVP, FinnGen study, UKB, UKIBDGC and IIBDGC. Footnotes ↵ † Shared co-first authorship We have corrected a typo in an author's name. REFERENCES 1. ↵ Ng , S.C. et al. Worldwide incidence and prevalence of inflammatory bowel disease in the 21st century: a systematic review of population-based studies . Lancet 390 , 2769 – 2778 ( 2018 ). OpenUrl CrossRef PubMed 2. ↵ Lewis , J.D. et al. Incidence, Prevalence, and Racial and Ethnic Distribution of Inflammatory Bowel Disease in the United States . Gastroenterology 165 , 1197 – 1205 e2 ( 2023 ). OpenUrl CrossRef PubMed 3. ↵ de Lange , K.M. et al. Genome-wide association study implicates immune activation of multiple integrin genes in inflammatory bowel disease . Nat Genet 49 , 256 – 261 ( 2017 ). OpenUrl CrossRef PubMed 4. ↵ Liu , Z. et al. Genetic architecture of the inflammatory bowel diseases across East Asian and European ancestries . Nat Genet 55 , 796 – 806 ( 2023 ). OpenUrl CrossRef PubMed 5. ↵ Verma , A. et al. Diversity and scale: Genetic architecture of 2068 traits in the VA Million Veteran Program . Science 385 , eadj1182 ( 2024 ). OpenUrl CrossRef PubMed 6. ↵ Barrett , J.C. et al. Genome-wide association defines more than 30 distinct susceptibility loci for Crohn’s disease . Nat Genet 40 , 955 – 62 ( 2008 ). OpenUrl CrossRef PubMed Web of Science 7. Jostins , L. et al. Host-microbe interactions have shaped the genetic architecture of inflammatory bowel disease . Nature 491 , 119 – 24 ( 2012 ). OpenUrl CrossRef PubMed Web of Science 8. ↵ Levine , A.P. et al. Genetic Complexity of Crohn’s Disease in Two Large Ashkenazi Jewish Families . Gastroenterology 151 , 698 – 709 ( 2016 ). OpenUrl CrossRef PubMed 9. ↵ Liu , J.Z. et al. Association analyses identify 38 susceptibility loci for inflammatory bowel disease and highlight shared genetic risk across populations . Nat Genet 47 , 979 – 986 ( 2015 ). OpenUrl CrossRef PubMed 10. ↵ Liu , T.C. & Stappenbeck , T.S . Genetics and Pathogenesis of Inflammatory Bowel Disease . Annu Rev Pathol 11 , 127 – 48 ( 2016 ). OpenUrl CrossRef PubMed 11. ↵ Kurki , M.I. et al. FinnGen provides genetic insights from a well-phenotyped isolated population . Nature 613 , 508 – 518 ( 2023 ). OpenUrl CrossRef PubMed 12. ↵ Gamazon , E.R. et al. A gene-based association method for mapping traits using reference transcriptome data . Nat Genet 47 , 1091 – 8 ( 2015 ). OpenUrl CrossRef PubMed 13. ↵ Gusev , A. et al. Integrative approaches for large-scale transcriptome-wide association studies . Nat Genet 48 , 245 – 52 ( 2016 ). OpenUrl CrossRef PubMed 14. ↵ Mancuso , N. et al. Probabilistic fine-mapping of transcriptome-wide association studies . Nat Genet 51 , 675 – 682 ( 2019 ). OpenUrl CrossRef PubMed 15. Wainberg , M. et al. Opportunities and challenges for transcriptome-wide association studies . Nat Genet 51 , 592 – 599 ( 2019 ). OpenUrl CrossRef PubMed 16. ↵ Amariuta , T. et al. IMPACT: Genomic Annotation of Cell-State-Specific Regulatory Elements Inferred from the Epigenome of Bound Transcription Factors . Am J Hum Genet 104 , 879 – 895 ( 2019 ). OpenUrl PubMed 17. ↵ Wen , W. et al. Genetic variations of DNA bindings of FOXA1 and co-factors in breast cancer susceptibility . Nat Commun 12 , 5318 ( 2021 ). OpenUrl PubMed 18. ↵ He , J. et al. Integrating transcription factor occupancy with transcriptome-wide association analysis identifies susceptibility genes in human cancers . Nat Commun 13 , 7118 ( 2022 ). OpenUrl PubMed 19. ↵ Guo , X. et al. Large-Scale Alternative Polyadenylation-Wide Association Studies to Identify Putative Cancer Susceptibility Genes . Cancer Res 84 , 2707 – 2719 ( 2024 ). OpenUrl PubMed 20. ↵ Goldman , S.L. et al. The Impact of Heterogeneity on Single-Cell Sequencing . Front Genet 10 , 8 ( 2019 ). 21. ↵ Aygun , N. et al. Brain-trait-associated variants impact cell-type-specific gene regulation during neurogenesis . Am J Hum Genet 108 , 1647 – 1668 ( 2021 ). OpenUrl CrossRef PubMed 22. Song , X. et al. MiXcan: a framework for cell-type-aware transcriptome-wide association studies with an application to breast cancer . Nat Commun 14 , 377 ( 2023 ). 23. ↵ Thompson , M. et al. Multi-context genetic modeling of transcriptional regulation resolves novel disease loci . Nat Commun 13 , 5704 ( 2022 ). OpenUrl CrossRef PubMed 24. Li , Q. et al. Molecular profiling of human non-small cell lung cancer by single-cell RNA-seq . Genome Med 14 , 87 ( 2022 ). 25. Tian , Y. et al. Single-cell transcriptomic profiling reveals the tumor heterogeneity of small-cell lung cancer . Signal Transduct Target Ther 7 , 346 ( 2022 ). 26. ↵ Wu , F. et al. Single-cell profiling of tumor heterogeneity and the microenvironment in advanced non-small cell lung cancer . Nat Commun 12 , 2540 ( 2021 ). OpenUrl CrossRef PubMed 27. ↵ Newman , A.M. et al. Determining cell type abundance and expression from bulk tissues with digital cytometry . Nat Biotechnol 37 , 773 – 782 ( 2019 ). OpenUrl CrossRef PubMed 28. ↵ Bryois , J. et al. Cell-type-specific cis-eQTLs in eight human brain cell types identify novel risk genes for psychiatric and neurological disorders . Nat Neurosci 25 , 1104 – 1112 ( 2022 ). OpenUrl CrossRef PubMed 29. Yazar , S. et al. Single-cell eQTL mapping identifies cell type-specific genetic control of autoimmune disease . Science 376 , eabf3041 ( 2022 ). OpenUrl CrossRef PubMed 30. ↵ Perez , R.K. et al. Single-cell RNA-seq reveals cell type-specific molecular and genetic associations to lupus . Science 376 , eabf1970 ( 2022 ). OpenUrl CrossRef PubMed 31. ↵ Mancuso , N. et al. Integrating Gene Expression with Summary Association Statistics to Identify Genes Associated with 30 Complex Traits . Am J Hum Genet 100 , 473 – 487 ( 2017 ). OpenUrl CrossRef PubMed 32. ↵ Diez-Obrero , V. et al. Transcriptome-Wide Association Study for Inflammatory Bowel Disease Reveals Novel Candidate Susceptibility Genes in Specific Colon Subsites and Tissue Categories . J Crohns Colitis 16 , 275 – 285 ( 2022 ). OpenUrl PubMed 33. ↵ Chen , B. et al. Differential pre-malignant programs and microenvironment chart distinct paths to malignancy in human colorectal polyps . Cell 184 , 6262 – 6280 e26 ( 2021 ). OpenUrl CrossRef PubMed 34. ↵ Zhishan Chen , W.S. , Qing Li , et al. Identifying putative susceptible transcription factors and genes for colorectal cancer: an integrated analysis of large-scale genetic and multi-omics data . ( 2025 ). 35. ↵ Verma , A. et al. Diversity and scale: Genetic architecture of 2068 traits in the VA Million Veteran Program . Science 385 , eadj1182 ( 2024 ). OpenUrl CrossRef PubMed 36. ↵ Karczewski , K.J. et al. Pan-UK Biobank GWAS improves discovery, analysis of genetic architecture, and resolution into ancestry-enriched effects . medRxiv , 2024.03.13.24303864 ( 2024 ). 37. ↵ Chen , Z. et al. Novel insights into genetic susceptibility for colorectal cancer from transcriptome-wide association and functional investigation . J Natl Cancer Inst 116 , 127 – 137 ( 2024 ). OpenUrl PubMed 38. ↵ Qing Li , J.W. , Victor Moreno , Chao Li , Jungyoon Choi , Zhishan Chen , Wanqing Wen , Xiang Shu , Xiao-ou Shu , Jirong Long , Qiuyin Cai , Bhuminder Singh , Ken S Lau , Ulrike Peters , Graham Casey , Wei Zheng , Xingyi Guo . Multi-ancestry transcriptome-wide association analyses on single-cell deconvoluted expression data to identify cell-type specific risk genes for colorectal cancer . in American Society of Human Genetics 2024 ( Denver, Colorado , 2024). 39. ↵ Giambartolomei , C. et al. Bayesian test for colocalisation between pairs of genetic association studies using summary statistics . PLoS Genet 10 , e1004383 ( 2014 ). OpenUrl CrossRef PubMed 40. ↵ Finucane , H.K. et al. Heritability enrichment of specifically expressed genes identifies disease-relevant tissues and cell types . Nat Genet 50 , 621 – 629 ( 2018 ). OpenUrl CrossRef PubMed 41. ↵ Li , J. et al. Identification and multimodal characterization of a specialized epithelial cell type associated with Crohn’s disease . Nat Commun 15 , 7204 ( 2024 ). OpenUrl CrossRef PubMed 42. ↵ Knox , C. et al. DrugBank 6.0: the DrugBank Knowledgebase for 2024 . Nucleic Acids Res 52 , D1265 – D1275 ( 2024 ). OpenUrl CrossRef PubMed 43. ↵ Zdrazil , B. et al. The ChEMBL Database in 2023: a drug discovery platform spanning multiple bioactivity data types and time periods . Nucleic Acids Res 52 , D1180 – D1192 ( 2024 ). OpenUrl CrossRef PubMed 44. ↵ Zhou , Y. et al. TTD: Therapeutic Target Database describing target druggability information . Nucleic Acids Res 52 , D1465 – D1477 ( 2024 ). OpenUrl CrossRef PubMed 45. ↵ Abraham , C. & Cho , J . Interleukin-23/Th17 pathways and inflammatory bowel disease . Inflamm Bowel Dis 15 , 1090 – 100 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 46. ↵ Kobayashi , T. et al. IL23 differentially regulates the Th1/Th17 balance in ulcerative colitis and Crohn’s disease . Gut 57 , 1682 – 9 ( 2008 ). OpenUrl Abstract / FREE Full Text 47. ↵ Zhou , L. et al. TMED family genes and their roles in human diseases . Int J Med Sci 20 , 1732 – 1743 ( 2023 ). OpenUrl CrossRef PubMed 48. ↵ Lei , Y. et al. Proteomics identification of ITGB3 as a key regulator in reactive oxygen species-induced migration and invasion of colorectal cancer cells . Mol Cell Proteomics 10 , M110 005397 ( 2011 ). 49. ↵ Tun , X. et al. Integrin beta3-Mediated Cell Senescence Associates with Gut Inflammation and Intestinal Degeneration in Models of Alzheimer’s Disease . Int J Mol Sci 24 ( 2023 ). 50. ↵ Huang , P. , Cai , Y. , Zhao , B. & Cui , L . Roles of NUCKS1 in Diseases: Susceptibility, Potential Biomarker, and Regulatory Mechanisms . Biomed Res Int 2018 , 7969068 ( 2018 ). OpenUrl PubMed 51. ↵ Xu , S. et al. The emerging role of ferroptosis in intestinal disease . Cell Death Dis 12 , 289 ( 2021 ). 52. ↵ Mayr , L. et al. Dietary lipids fuel GPX4-restricted enteritis resembling Crohn’s disease . Nat Commun 11 , 1775 ( 2020 ). OpenUrl CrossRef PubMed 53. ↵ Short , S.P. et al. Colonic Epithelial-Derived Selenoprotein P Is the Source for Antioxidant-Mediated Protection in Colitis-Associated Cancer . Gastroenterology 160 , 1694 – 1708 e3 ( 2021 ). OpenUrl CrossRef PubMed 54. ↵ Privitera , G. , Rana , N. , Armuzzi , A. & Pizarro , T.T . The gasdermin protein family: emerging roles in gastrointestinal health and disease . Nat Rev Gastroenterol Hepatol 20 , 366 – 387 ( 2023 ). OpenUrl PubMed 55. ↵ Rana , N. et al. GSDMB is increased in IBD and regulates epithelial restitution/repair independent of pyroptosis . Cell 185 , 283 – 298 e17 ( 2022 ). OpenUrl CrossRef PubMed 56. ↵ Bruno , M.E. et al. Correlation of Biomarker Expression in Colonic Mucosa with Disease Phenotype in Crohn’s Disease and Ulcerative Colitis . Dig Dis Sci 60 , 2976 – 84 ( 2015 ). OpenUrl CrossRef PubMed 57. ↵ Kaetzel , C.S . Cooperativity among secretory IgA, the polymeric immunoglobulin receptor, and the gut microbiota promotes host-microbial mutualism . Immunol Lett 162 , 10 – 21 ( 2014 ). OpenUrl CrossRef PubMed 58. ↵ Sabui , S. et al. Effect of knocking out mouse Slc44a4 on colonic uptake of the microbiota-generated thiamine pyrophosphate and colon physiology . Am J Physiol Gastrointest Liver Physiol 327 , G36 – G46 ( 2024 ). OpenUrl PubMed 59. ↵ Seif , F. et al. The role of JAK-STAT signaling pathway and its regulators in the fate of T helper cells . Cell Commun Signal 15 , 23 ( 2017 ). 60. ↵ Hinrichs , A.S. et al. The UCSC Genome Browser Database: update 2006 . Nucleic Acids Res 34 , D590 – 8 ( 2006 ). OpenUrl CrossRef PubMed Web of Science 61. ↵ Genomes Project, C. , et al. A global reference for human genetic variation . Nature 526 , 68 – 74 ( 2015 ). OpenUrl CrossRef PubMed 62. ↵ Willer , C.J. , Li , Y. & Abecasis , G.R . METAL: fast and efficient meta-analysis of genomewide association scans . Bioinformatics 26 , 2190 – 1 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 63. ↵ Bulik-Sullivan , B.K. et al. LD Score regression distinguishes confounding from polygenicity in genome-wide association studies . Nat Genet 47 , 291 – 5 ( 2015 ). OpenUrl CrossRef PubMed 64. ↵ Chang , C.C. et al. Second-generation PLINK: rising to the challenge of larger and richer datasets . Gigascience 4 , 7 ( 2015 ). 65. ↵ The GTEx Consortium atlas of genetic regulatory effects across human tissues . Science 369 , 1318 – 1330 ( 2020 ). OpenUrl Abstract / FREE Full Text 66. ↵ Cotto , K.C. et al. Integrated analysis of genomic and transcriptomic data for the discovery of splice-associated variants in cancer . Nat Commun 14 , 1589 ( 2023 ). OpenUrl CrossRef PubMed 67. ↵ Li , L. et al. An atlas of alternative polyadenylation quantitative trait loci contributing to complex trait and disease heritability . Nat Genet 53 , 994 – 1005 ( 2021 ). OpenUrl CrossRef PubMed 68. ↵ Taliun , D. et al. Sequencing of 53,831 diverse genomes from the NHLBI TOPMed Program . Nature 590 , 290 – 299 ( 2021 ). OpenUrl CrossRef PubMed 69. ↵ Consortium , G.T . The GTEx Consortium atlas of genetic regulatory effects across human tissues . Science 369 , 1318 – 1330 ( 2020 ). OpenUrl Abstract / FREE Full Text 70. ↵ Chen , E.Y. et al. Enrichr: interactive and collaborative HTML5 gene list enrichment analysis tool . BMC Bioinformatics 14 , 128 ( 2013 ). 71. Kuleshov , M.V. et al. Enrichr: a comprehensive gene set enrichment analysis web server 2016 update . Nucleic Acids Res 44 , W90 – 7 ( 2016 ). OpenUrl CrossRef PubMed 72. ↵ Xie , Z. et al. Gene Set Knowledge Discovery with Enrichr . Curr Protoc 1 , e90 ( 2021 ). OpenUrl CrossRef 73. ↵ Hou , W. & Ji , Z . Assessing GPT-4 for cell type annotation in single-cell RNA-seq analysis . Nat Methods 21 , 1462 – 1465 ( 2024 ). OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted September 30, 2025. Download PDF Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Multi-ancestry genome-wide and transcriptome-wide association analyses identified new risk loci and genes for inflammatory bowel disease Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Multi-ancestry genome-wide and transcriptome-wide association analyses identified new risk loci and genes for inflammatory bowel disease Linshuoshuo Lyu , Qing Li , Chao Li , Ghadeer K Dawwas , You Chen , Ran Tao , Qi Liu , Wanqing Wen , Xiao-Ou Shu , Kay Washington , David A Schwartz , Wei Zheng , Zhijun Yin , Ken S Lau , Xingyi Guo medRxiv 2025.09.23.25336483; doi: https://doi.org/10.1101/2025.09.23.25336483 Share This Article: Copy Citation Tools Multi-ancestry genome-wide and transcriptome-wide association analyses identified new risk loci and genes for inflammatory bowel disease Linshuoshuo Lyu , Qing Li , Chao Li , Ghadeer K Dawwas , You Chen , Ran Tao , Qi Liu , Wanqing Wen , Xiao-Ou Shu , Kay Washington , David A Schwartz , Wei Zheng , Zhijun Yin , Ken S Lau , Xingyi Guo medRxiv 2025.09.23.25336483; doi: https://doi.org/10.1101/2025.09.23.25336483 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4426) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15222) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6589) Geriatric Medicine (667) Health Economics (997) Health Informatics (4526) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (971) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1691) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9222) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (711) Sports Medicine (529) Surgery (711) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9fff7f7b1b40df88',t:'MTc3OTQ5MTQ2NQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (sparse)

Too few in-corpus citations on either side for a chart; here are the lists.

Cites (1)

Pan-UK Biobank GWAS improves discovery, analysis of genetic architecture, and resolution into ancestry-enriched effects 2024

References (70)

Pan-UK Biobank GWAS improves discovery, analysis of genetic architecture, and resolution into ancestry-enriched effects via crossref
doi:10.1053/j.gastro.2023.07.003 via crossref
doi:10.1038/ng.3760. via crossref
doi:10.1038/s41588-023-01384-0 via crossref
doi:10.1126/science.adj1182 via crossref
doi:10.1038/ng.175 via crossref
doi:10.1038/nature11582 via crossref
doi:10.1053/j.gastro.2016.06.040 via crossref
doi:10.1038/ng.3359 via crossref
doi:10.1146/annurev-pathol-012615-044152 via crossref
doi:10.1038/s41586-02205473-8 via crossref
doi:10.1038/ng.3367 via crossref
doi:10.1038/ng.3506 via crossref
doi:10.1038/s41588-019-0367-1 via crossref
doi:10.1038/s41588-019-0385-z via crossref
doi:10.1016/j.ajhg.2019.03.012 via crossref
doi:10.1038/s41467-021-25670-9 via crossref
doi:10.1038/s41467-022-34888-0 via crossref
doi:10.1158/0008-5472.can-24-0521 via crossref
doi:10.3389/fgene.2019.00008 via crossref
doi:10.1016/j.ajhg.2021.07.011 via crossref
doi:10.1038/s41467-023-35888-4 via crossref
doi:10.1038/s41467-022-33212-0 via crossref
doi:10.1186/s13073-022-01089-9 via crossref
doi:10.1038/s41392-022-01150-4 via crossref
doi:10.1038/s41467-021-22801-0 via crossref
doi:10.1038/s41587-019-0114-2 via crossref
doi:10.1038/s41593-022-01128-z via crossref
doi:10.1126/science.abf3041 via crossref
doi:10.1126/science.abf1970 via crossref
doi:10.1016/j.ajhg.2017.01.031 via crossref
doi:10.1093/ecco-jcc/jjab131 via crossref
doi:10.1016/j.cell.2021.11.031 via crossref
doi:10.1126/science.adj1182 via crossref
doi:10.1093/jnci/djad178 via crossref
doi:10.1371/journal.pgen.1004383 via crossref
doi:10.1038/s41588-018-0081-4 via crossref
doi:10.1038/s41467-024-51580-7 via crossref
doi:10.1093/nar/gkad976 via crossref
doi:10.1093/nar/gkad1004 via crossref
doi:10.1093/nar/gkad751 via crossref
doi:10.1002/ibd.20894 via crossref
doi:10.1136/gut.2007.135053 via crossref
doi:10.7150/ijms.87272 via crossref
doi:10.1074/mcp.m110.005397 via crossref
doi:10.3390/ijms24065697 via crossref
doi:10.1038/s41419-021-03559-1 via crossref
doi:10.1038/s41467-020-15646-6 via crossref
doi:10.1053/j.gastro.2020.12.059 via crossref
doi:10.1038/s41575-023-00743-w via crossref
doi:10.1016/j.cell.2021.12.024 via crossref
doi:10.1007/s10620-015-3700-2 via crossref
doi:10.1016/j.imlet.2014.05.008 via crossref
doi:10.1152/ajpgi.00065.2024 via crossref
doi:10.1186/s12964-017-0177-y via crossref
doi:10.1093/nar/gkj144 via crossref
doi:10.1038/nature15393 via crossref
doi:10.1093/bioinformatics/btq340 via crossref
doi:10.1038/ng.3211 via crossref
doi:10.1186/s13742-015-0047-8 via crossref
doi:10.1126/science.aaz1776 via crossref
doi:10.1038/s41467-023-37266-6 via crossref
doi:10.1038/s41588-021-00864-5 via crossref
doi:10.1038/s41586-021-03205-y via crossref
doi:10.1126/science.aaz1776 via crossref
doi:10.1186/1471-2105-14-128 via crossref
doi:10.1093/nar/gkw377 via crossref
doi:10.1002/cpz1.90 via crossref
doi:10.1016/s0140-6736(17)32448-0 via crossref
doi:10.1038/s41592-024-02235-4 via crossref

Source provenance

crossref: last seen: 2026-05-19T01:00:24.715877+00:00
europepmc: last seen: 2026-05-20T01:45:00.602351+00:00

[1] Pan-UK Biobank GWAS improves discovery, analysis of genetic architecture, and resolution into ancestry-enriched effects 2024