SMTdb: A comprehensive spatial meta-transcriptome resource in cancer

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 47,532 characters · extracted from preprint-html · click to expand
SMTdb: A comprehensive spatial meta-transcriptome resource in cancer | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results SMTdb: A comprehensive spatial meta-transcriptome resource in cancer Weiwei Zhou , Qingyi Yang , Jiyu Guo , Si Li , Minghai Su , Feng Leng , Tingyu Rong , Jingyi Shi , Yueying Gao , Tiantongfei Jiang , Juan Xu , Yongsheng Li doi: https://doi.org/10.1101/2025.01.22.634407 Weiwei Zhou 1 College of Bioinformatics Science and Technology, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Qingyi Yang 2 School of Interdisciplinary Medicine and Engineering, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jiyu Guo 2 School of Interdisciplinary Medicine and Engineering, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Si Li 2 School of Interdisciplinary Medicine and Engineering, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Minghai Su 1 College of Bioinformatics Science and Technology, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Feng Leng 1 College of Bioinformatics Science and Technology, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tingyu Rong 2 School of Interdisciplinary Medicine and Engineering, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jingyi Shi 1 College of Bioinformatics Science and Technology, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yueying Gao 2 School of Interdisciplinary Medicine and Engineering, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tiantongfei Jiang 1 College of Bioinformatics Science and Technology, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: 202001107{at}hrbmu.edu.cn xujuanbiocc{at}ems.hrbmu.edu.cn liyongsheng{at}ems.hrbmu.edu.cn Juan Xu 1 College of Bioinformatics Science and Technology, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: 202001107{at}hrbmu.edu.cn xujuanbiocc{at}ems.hrbmu.edu.cn liyongsheng{at}ems.hrbmu.edu.cn Yongsheng Li 2 School of Interdisciplinary Medicine and Engineering, Harbin Medical University , Harbin 150081, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: 202001107{at}hrbmu.edu.cn xujuanbiocc{at}ems.hrbmu.edu.cn liyongsheng{at}ems.hrbmu.edu.cn Abstract Full Text Info/History Metrics Preview PDF ABSTRACT Microbes have been found in various tumors and the research of tumor microbiome has been garnering increased attention. However, it remains challenge to investigate the microbiome in cancer at spatial resolution. The rapid advent of spatially resolved transcriptomics techniques has given rise to map transcripts at single-cell resolution in various types of cancer. Here, we constructed a comprehensive spatial meta-transcriptome resource by manually curating 203 fresh frozen (FF) slices from 20 cancers encompassing 334,253 spots and 1,908,646 cells. SMTdb ( http://bio-bigdata.hrbmu.edu.cn/SMTdb/ ) was constructed to provide comprehensive insights into the abundance, distribution and enriched TME regions of 1218 microbiota in spatial tissue slices. SMTdb enables vast interactive data exploration of spatial distribution and expression of microbiota, host gene modules associated with certain microbiota and co-occurrence between microbiota and immune cells within tumor microenvironment. The atlas resource serves as a one-stop and time-effective platform to investigate the interactions among microbial ecosystems and hosts in cancer. INTRODUCTION The tumor microenvironment (TME) is a central region of tumors with complex components. Diverse factors secreted by immune and nonimmune cells in the TME drive the inflammatory, immunosuppressive, and proangiogenic tumor internal environments ( Jin and Jin 2020 ). Numerous studies have confirmed that the interactions between tumor cells and immune cells play a critical role in TME ( Mao et al. 2021 ; Ma et al. 2023 ). In addition, emerging evidence has shown that microbes are present in various tumors and can impact cancer progression ( LaCourse et al. 2021 ), metastasis ( Bullman et al. 2017 ; Parhi et al. 2020 ; Fu et al. 2022 ), immune monitoring ( Jin et al. 2019 ; Riquelme et al. 2019 ), and even drug resistance ( Geller et al. 2017 ; Yu et al. 2017 ). Understanding the composition of the microbiota and its communication with cells in TME is vital for elucidating the molecular mechanisms of tumors. The development of single-cell RNA sequencing (scRNA-seq) technology provides the possibility to explore the relationships between the microbiota and cells in TME. Recent study has characterized the features of intra-tumoral microbiota and revealed the most abundant bacterial orders in intrahepatic cholangiocarcinoma ( Chai et al. 2023 ). Moreover, spatial transcriptomics (ST) technology allows exploration of the spatial distribution and co-occurrence of different cell types and microbiota in tissue slices. For example, host‒microbe interactions in oral squamous cell carcinoma and colorectal cancer have been revealed ( Galeano Nino et al. 2022 ). However, attempts are just emerging, and there is no work to explore the spatial distribution pattern of microbiota and their interactions with immune cells in cancer. Here, we construct a comprehensive spatial meta-transcriptome resource, SMTdb, by integrating scRNA-seq and ST data. We manually curated 203 FF slices from 20 cancer types encompassing 334,253 spots. To assess the cell type composition of spots, over 1,900,000 cells from paired or robust scRNA-seq data were collected as reference. SMTdb provides comprehensive insights into the abundance, distribution and enriched TME regions of 1218 microbiota in spatial tissue slices. Furthermore, SMTdb offers multiple analytical modules that allow users to interactively investigate the spatial distribution and expression of microbiota, host gene modules associated with certain microbiota and co-occurrence between microbiota and immune cells within TME. As the first spatial microbiota-TME analysis and data resource, SMTdb will promote microbial studies of tumor infection and the host immune response. RESULTS Spatial and single-cell transcriptomes across cancer types To construct a comprehensive spatial meta-transcriptome resource in cancer, we manually collected FF tissue samples published through a literature search over the past five years. FF samples are considered the gold standard for spatial genomics research, as they provide higher-quality DNA and RNA than do formalin-fixed and paraffin-embedded (FFPE) samples. After strict filtering and uniformly processing, we assembled a dataset comprising 203 tissue slices from 20 cancer types, including 120 tumor slices and 83 normal slices, with a total of 334,253 spots. The tumor slices contained an average of 1,800 spots per sample, whereas the average number of normal tissue slices was 1,600 spots ( Figure 1 and Supplemental Table S1). Download figure Open in new tab Figure 1: Overview of SMTdb. (A) and (B) Data collection of multi-omics. (C) Lineage map of microbiota in SMTdb. (D) Abundance of microbiota across cancer types. Furthermore, to analyze the composition of the immune microenvironment in spatial tissue sections, we collected paired or robust scRNA-seq data and annotated cell types based on marker genes (Supplemental Table S3). After rigorous quality control and filtering, we constructed a pan-cancer single-cell atlas encompassing 1,908,646 cells across 42 cell types, including 187,114 tumor cells, 1,077,641 immune cells and 644,071 stromal cells ( Figure 1 and Supplemental Table S1). Finally, using the corresponding single-cell atlas for each cancer type, we assigned cell types to each spot in tissue slices. Spatial meta-transcriptome in cancer Utilizing the constructed spatial transcriptomic atlas, we applied the analysis pipeline SMT (a method for extracting microbial sequences from ST data and assigning taxonomic labels) to assess microbial abundance within each spot. We found that microbiota within the tissue slices were predominantly bacteria, fungi, and eukaryotes, along with a minor presence of viruses (Supplemental Figure 1A). A total of 26 viruses, 309 bacterial, 391 eukaryotic, and 492 fungal species were identified (Supplemental Figure 1B). Notably, the microbiota enriched in each cancer type showed significant differences. Citrobacter was enriched in colorectal cancer (CRC), glioblastoma (GBM), muscle-invasive bladder cancer (MIBC), renal cell carcinoma (RCC), and ovarian carcinoma (OV), while Acidovorax was predominantly enriched in breast cancer (BRCA) and Helicobacter in gastrointestinal stromal tumor (GIST), suggesting the microbial diversity and specificity across different cancer types (Supplemental Figure 1A,D). In addition, tumor tissues exhibited a greater enrichment of microbiota implicated in cancer development and progression ( Jiang et al. 2015 ; Stasiewicz and Karpinski 2022 ; Zong et al. 2023 ), including Candida , Agaricus , and Malassezia , compared to normal tissues (Supplemental Figure 1C). Our analysis constructed a comprehensive atlas of microbial distribution and abundance in spatial contexts, providing a foundation for investigating the interactions between microbiota and TME. User interface (UI) overview We present SMTdb, a comprehensive data portal offering the exploration and visualization of microbial spatial distributions and their interactions with TME. To enhance usability, SMTdb provides versatile functional panels, supporting overview and data exploration. The “Browse” page offers a slice list displaying cancer types, the number of microbiota, spots and cell types. Users can filter the table by cancer type, tissue, or microbiota of interest and select a slice to access more information via the “Detail” button ( Figure 2A ). Download figure Open in new tab Figure 2: UI of SMTdb database. (A) The “Browse” page allows users to filter slices of interest using the buttons located on the left sidebar. (B) Example results generated by using “ Citrobacter ” as a key on the “Search” page. (C) On the “Slice Comparison” page, users can analyze the similarities and differences across multiple features between any two slices. (D) On the “Microbiota Distribution” page, users can visualize the spatial distribution of specific microbiota within different neighborhood regions across multiple slices. The “Search” page offers four modes, allowing users to query specific microbiota, cancers, cell types, or genes. SMTdb filters slices to deliver tailored results. As an example, Figure 2B highlights slices containing “ Citrobacter ”. The “Tools” page provides two interactive functions: “Slice comparison” and “Microbiota distribution”. The former aims to insight variations in the distribution of characteristics such as spatial neighborhoods, microbial distribution, cell type composition, and gene expression between two slices. Users can select any two spatial slices to explore the associations and differences between any features of interest within the slices ( Figure 2C ). “Microbiota distribution” is designed for rapid comparison of a selected microbiota across multiple spatial slices. Users can view the spatial distribution of the specific microbiota in slices. Moreover, quick links are provided for users to access more details related to the microbiota in the selected slice ( Figure 2D ). Slice annotation SMTdb annotated multi-omics data for each tissue slice. First, SMTdb identified the marker genes of the transcriptome clusters. Users can browse the expression of genes within specific spatial locations. SMTdb also provided SVG for each slice, facilitating an understanding of the cellular states and functions unique to particular spatial region ( Figure 3A ). In addition, based on scRNA-seq, SMTdb provides the composition of cell types for each spot, enabling users to select specific spot or region for the analysis of cell localization in tissue slices ( Figure 3A ). Ultimately, the data related to microbiota within spatial slices were also integrated herein. Users can explore the spatial distribution and abundance of microbiota of interest across individual spot ( Figure 3A ). Download figure Open in new tab Figure 3: Analytic modules in SMTdb. (A) Slice annotation module offers transcriptomic clusters, marker information of clusters, deconvolution of spot and the abundance of microbiota in tissue slices. (B) Spatial neighborhood module provides the distribution of spots in spatial slice, identifies differentially regulated genes and the enriched functions by microbiota in malignant and boundary regions (cancer hallmarks for malignant spots and immune pathways for boundary spots). (C) Co-occurrence module shows immune cell types that co-occurrence with bacteria in TME across spatial context. (D) Gene modules regulated by bacteria in the host and the biological functions regulated by these modules. (E) K-M plot for CRC patients of TCGA cohort based on the abundance of Citrobacter, showing the classification of patients into high-expression and low-expression groups by the median value for analysis. Functions of regions with high bacterial abundance In addition, SMTdb defines malignant and boundary regions for each tissue slice. The malignant regions consisted of cells with the most prominent oncogenic features, whereas the boundary regions encompassed the outermost layer of malignant in a solid tumor and spatially close stromal cells, which are frequently infiltrated by immune cells ( Figure 3B ). In the malignant region, the focus was directed towards exploring the cancer hallmarks regulated by microbiota-enriched spots, whereas in boundary regions, SMTdb emphasized the activity of immune pathways. Users can easily access these results through various types of visualizations, such as volcano plots and heatmaps ( Figure 3B ). Cell types within a high microbiota abundance region The colonization of bacteria can lead to the proliferation of immune cells, such as T cells and B cells, influencing the development of tumors ( Overacre-Delgoffe et al. 2021 ). SMTdb provides the composition of cell types in high microbiota abundance regions (HMAR) ( Figure 3C ). Users can identify the co-occurrence of different immune cells and microbiota within a spatial context, offering guidance for the comprehension of microbial functions within TME ( Figure 3C ). Host microbiota co-expression module and clinical relevance To understand the interactions between the host and microbiota and reveal how the microbiota affects the host’s physiological and immune responses, we studied the co-expression modules of the microbiota and host genes ( Figure 3D ). SMTdb calculates the microbial gene co-expression modules in each slice. Users can identify the gene modules regulated by microbes and the biological functions enriched by it in the spatial slice ( Figure 3D ). In addition, to investigate the relationship between microbial abundance and prognosis, we performed Cox regression analysis and log-rank tests to identify outcome-associated microbial taxa at the genus level ( Figure 3E ). Case Study Liver metastasis drives the main malignant tumor-progression events of CRC patients, whereas most studies have focused only on the cellular ecosystem of liver tissue. Recent work has revealed the remarkable roles of the microbiome in metastatic cancer. Therefore, we used an ST dataset of metastatic liver in CRC to analyze the relevance of the cellular composition and microbiota ( Garbarino et al. 2023 ). Spatial neighborhood and spot transcriptome clusters were generated from tissue slices. Consistent with the senescent metastatic cancer cells in the original study, SMTdb identified the malignant regions and adjacent boundary regions in the same slice ( Figure 4A,B ). Parts of spots were annotated as epithelial signature-like metastatic cancer cells (eSMCC) in the original study, which exactly appeared in the spot transcriptome cluster 2 ( Figure 4B and Supplemental Figure 2A). Similar to previous results, eSMCC-accumulated RP11 was also significantly overexpressed in cluster 2 ( Figure 4C ). Combined with scRNA-seq reference data, we explored the cellular composition of spatial spots. We found that monocytes dominated malignant boundary regions, which may be recruited by tumor cells (Supplemental Figure 2B). Notably, macrophages and monocytes were concentrated mainly at the interface between malignant and stromal regions, which was highly consistent with the original findings ( Figure 4D and Supplemental Figure 2C). Moreover, these cells were enriched in 6, 7, and particularly 8 spot transcriptome clusters, which implied that the transcriptional clusters covered important information on the cell type distribution ( Figure 4E and Supplemental Figure 2D). Download figure Open in new tab Figure 4: Case study of colorectal liver metastasis patients in SMTdb. (A) HE image of tissue slice. (B) Spatial neighborhood and transcriptome clusters of tissue slice. (C) Expression of RPL11 between cluster 2 and other clusters (Wilcoxon rank sum tests). (D) The distribution of macrophages and monocytes in tissue slices. (E) Proportion of macrophages and monocytes in cluster 8 compared with the other clusters. (F) The composition of microbiota within tissue slices (left) and the abundance of H.pylori in various spatial neighborhoods (right). (G) Cancer hallmarks regulated by microbiota enrich in malignant (left) and immune pathways regulated by microbiota enrich in boundary (right, * means FDR<0.05 by hypergeometric test). (H) Immune cells co-occurring with H.pylori in spatial context. Furthermore, we performed metagenomic analysis on another CRC liver metastasis slice. Among all the spots, the second highest abundance Helicobacter pylori ( H.pylori ) has been found to be associated with the development of liver disease ( Figure 3F )( Sumida et al. 2015 ; Boziki et al. 2021 ; Chen et al. 2023b ). Additionally, as the most prevalent bacteria in the digestive system ( Ralser et al. 2023 ), the spread of colorectal cancer cells may be the other cause of the abundance of H.pylori in liver tissue. As expected, H.pylori appeared mainly in the malignant region composed of metastatic cancer cells ( Figure 4F ). To investigate the function of spots enriched microbiota, we used the specifically upregulated genes in HMAR, which were identified in STMdb, for malignant and stromal regions separately Supplemental Figure 2E). Functional enrichment analysis indicated that cell cycle, DNA repair and TGF-β signal pathways were upregulated in the malignant cell region, whereas interferon receptor and antigen-related processes were activated in the adjacent boundary regions ( Figure 4G ). Co-occurrence analysis within STMdb indicated that H.pylori was closely associated with monocytes and multiple B lymphocyte subtypes in the spatial context ( Figure 4H ), suggesting that their interactions may play a role in the immune response ( Peek et al. 2010 ). Besides, a previous study has elucidated that H.pylori induces the formation of tertiary lymphoid organs, leading to liver inflammation ( Shomer et al. 2003 ). In summary, based on the annotated ST datasets and analysis pipeline within STMdb, we examined the cellular composition and spatial positioning in CRC liver metastasis slices and dissected the distribution of the microbiota and the co-occurrence between the microbiota and cells, providing a possible interpretation for the liver metastasis of cancer cells and immune-microbial ecosystem in TME. DISCUSSION This study integrates single-cell omics, spatial transcriptomics, and metagenomics data to construct SMTdb, the first comprehensive spatial meta-transcriptome resource for human cancer. This user-friendly platform facilitates the integration of multi-omics data, enabling users to explore the expression and spatial distribution of microbiota and cell types with clarity and precision. SMTdb addresses a critical gap in spatial metatranscriptomic data for cancer research and provides a powerful tool to investigate the interactions between microbes and TME. By combining single-cell omics, spatial transcriptomics, and metagenomics, SMTdb offers researchers a comprehensive spatial perspective on the role of microbes in tumor progression and their interactions with host cells. This enables a deeper understanding of microbial niches and functions across the TME and elucidates microbe-host interactions. SMTdb contains 203 tissue sections from 20 different cancer types, covering 334,253 spots, 945,149 cells, and 1218 microbiota. This extensive dataset allows users to compare microbiome differences across cancer types and investigate associations between specific microbes and cancer phenotypes. SMTdb features seven analytical modules, including slice annotation (e.g., spatially co-occurring gene modules, spot deconvolution, and spatially variable genes), microbiota spatial distribution, spatial neighborhood delineation, functional analysis of microbiota-enriched regions, cell type co-occurrence with bacteria, bacterial-host gene co-expression analysis and clinical relevance of microbiota. These modules provide insights into the spatial distribution of microorganisms and cell types in tissue sections, unveiling their roles and interactions within the TME. Despite the robust capabilities of SMTdb, it currently includes only FF slices due to sequencing technology limitations. With the continuous development of sequencing technology, in the future, we will continuously update SMTdb and expand it to other species. In summary, by enhancing the understanding of host‒microbiota interactions, SMTdb will become a valuable resource, significantly contributing to advancements in biology and medicine. METHODS Data collection We collected raw FF ST data from the Gene Expression Omnibus (GEO) ( Barrett et al. 2013 ) and public studies, including 120 tumor tissue slices and 83 peritumoral tissue slices from 20 cancer types (Supplemental Table S1). Furthermore, 1,908,646 cells from paired or robust scRNA-seq data ( Zhou et al. 2024 ) were matched to each slice as reference data to assess the cell type composition of spots. Abundance of microbiota in spatial slices We employed STM, a genome sequence-based pipeline, to extract microbiota (Supplemental Table S2) from ST datasets ( Lyu et al. 2023 ). Briefly, reads that did not map to the host genome were first filtered and denoised, followed by BLAST alignment to the NCBI Nucleotide database ( https://www.ncbi.nlm.nih.gov/nucleotide/ ), generating spatially resolved microbial abundance matrices and host gene expression profiles. Next, each spot was annotated with specific taxonomic labels if the UMI was greater than 0. The following analyses of the microbiota were based on the abundance matrix. Processing and clustering ST and scRNA-seq data To obtain the spatial transcriptome expression of each spot, the raw fastq files were processed with the spaceranger tool (version 2.0, 10x Genomics) and mapped to the human reference genome (GRCh38). The Seurat R package was used for subsequent analyses ( Butler et al. 2018 ). Only those spots coinciding with tissue slices were retained, whereas spots of low quality, which are identified by either an excessively small or large gene count per spot, were filtered. To normalize the raw counts, a strategy of regularized negative binomial regression, specifically the SC transform ( Hafemeister and Satija 2019 ), was adopted. Principal Component Analysis (PCA) was then applied to achieve dimensionality reduction. The Louvain algorithm (resolution = 0.8) was used to generate transcriptome clusters. To identify the marker genes associated with each cluster, the ‘findAllMarkers’ algorithm was executed, adopting the parameters ‘min.pct = 0.25, logfc.threshold = 0.25’. To identify spatially variable genes (SVGs), we used the function ‘FindSpatiallyVariableFeatures’ in Seurat to measure the complex expression patterns of genes (SVGs; FDR 0). For the analysis of all the scRNA-seq datasets, a uniform analytical pipeline was adopted utilizing the Seurat tool. Cells exhibiting high mitochondrial expression and total unique molecular identifier (UMI) counts were filtered to ensure data quality. Furthermore, normalization of the raw UMI counts was conducted via the SC transform. Following this preprocessing step, highly variable genes were identified to facilitate the distinction of cellular heterogeneity. PCA was then used to enable robust clustering of the cells. Cell type annotation for scRNA-seq data A total of 363 canonical marker genes were manually recorded for 42 cell types from our previous work (Supplemental Table S3) ( Jiang et al. 2023 ). The markers were used as reference atlas for a fully automated and ultra-fast cell type identification method, ScType, to assign cell types to each cell ( Ianevski et al. 2022 ). InferCNV (version 1.2.1) ( Patel et al. 2014 ) was employed to identify malignant cells based on copy number variations (CNVs), with immune cells serving as reference cells. Identification of differentially expressed genes To analyze the impact of bacterial presence on gene expression in tumors, we divided the malignant, boundary, and stromal spots into two groups based on whether bacteria were present in the spots and conducted differential expression analysis. The Wilcoxon rank-sum test was utilized to assess the statistical significance of differentially expressed genes, and the false discovery rate (FDR) method was applied for p-value correction. Genes whose FDR value was less than 0.05 and whose fold change (FC) value was greater than 1.5 were considered significantly upregulated. Identification of spatial neighborhood To decode the complex spatial microenvironment of the tumor, we employed Cottrazm ( Xun et al. 2023 ) to map the microenvironment at the tumor boundary. The SME normalization algorithm from the stLearn ( Pham et al. 2023 ) package is used to adjust gene expression based on the spot image matrix, resulting in a morphologically adjusted gene expression matrix (Morph), and spatial spots are clustered via the KNN algorithm from the Seurat package. Immune-related gene signatures were scored in the Morph matrix to define a normal tissue expression score (NormalScore) for each spot. Cottrazm selects a reference based on the highest median NormalScore within the cluster. InferCNV ( Patel et al. 2014 ) was employed to assess CNV levels for the remaining spots. Hierarchical clustering was performed to categorize spatial spots into clusters to distinguish malignant spots and stromal spots. The CNV scores of each spot were incorporated into the Seurat object, and spots with high median CNV scores were initially defined as core malignant spots. Cottrazm calculated centroids for malignant and normal clusters and determined the proximity of each spot to these centroids. Spots were labeled as malignant or boundary based on their relative distances. The method arranges spatial spots on hexagonal lattices and defines neighboring spots using Manhattan distances. Identification of spatial co-expression modules To identify spatial co-expression netwok modules, hdWGCNA ( Morabito et al. 2023 ) was used to analyze spatial transcriptome data. We used the MetaspotsByGroups() function to establish metaspots separately for each Seurat cluster, and the SetDatExpr() function was applied to construct the metaspot expression matrix. The soft power was tested via TestSoftPowers(), and the optimal threshold was determined. he ConstructNetwork() function was employed to construct gene co-expression network modules. Functional analysis To predict the function of microbiota-enriched regions, we identified DEGs by the Wilcoxon rank sum test (FDR 1.5) for both microbiota-enriched and non-microbiota enriched regions separately. To assess the functions of bacteria, we estimated single-sample gene set enrichment analysis (ssGSEA) scores ( Hanzelmann et al. 2013 ) for immune-related signatures ( Li et al. 2020 ) and cancer hallmarks ( Liberzon et al. 2015 ). Additionally, a hypergeometric test was performed to ascertain the functions significantly upregulated in the microbiota-enriched regions. Spatial correlation between host genes and microbiota We assessed microbiota-gene co-expression modules in slices based on their expression at each spatial spot. For any pair of bacteria 𝑖 and gene 𝑗, we first obtained the probabilities 𝐸 𝑖 = [𝑒𝑖 1 , 𝑒𝑖 2 , . . . , 𝑒𝑖 𝑛 ] and 𝐸 𝑗 = [𝑒𝑗 1 , 𝑒𝑗 2 , . . . , 𝑒𝑗 𝑛 ] that were observed across n spots, which represented the expression of bacteria and genes in spots. Next, the Spearman correlation coefficient R was calculated based on the following formula: where and and n was the number of spots in the given slice. For each bacteria, all positively correlated genes (R>0.1) were considered as co-expression modules. Furthermore, we investigated the functions that were influenced by the module, using hypergeometric test to Gene Ontology biological process (GO-BP). Survival analysis To evaluate the association between microbial abundance and clinical prognosis we collected clinical data from The Cancer Genome Atlas (TCGA) ( Tomczak et al. 2015 ), and the microbial abundance of TCGA samples was collected from BIC ( Chen et al. 2023a ). We used univariate and multivariate Cox regression analyses. Additionally, patients were stratified into high- and low-abundance groups according to the median expression of microbes in each cancer type, and survival differences between the two groups were assessed using the Kaplan-Meier (KM) method. DATA ACCESS All data in the SMTdb can be downloaded from the download page. The source code has been made available on GitHub and can be accessed through the following link: https://github.com/ComputationalEpigeneticsLab/SMTdb . COMPETING INTEREST STATEMENT The authors declare no competing interests. AUTHOR CONTRIBUTIONS Conceptualization: Juan Xu, Yongsheng Li, Tiantongfei Jiang. Data curatrion: Weiwei Zhou, Qingyi Yang, Jiyu Guo. Formal analysis: Weiwei Zhou, Qingyi Yang, Jiyu Guo. Investigation: Weiwei Zhou,Jiyu Guo, Qingyi Yang, Si Li. Methodology: Weiwei Zhou, Qingyi Yang, Jiyu Guo, Minghai Su, Jingyi Shi, Yueying Gao, Feng Leng, Tingyu Rong. Software: Weiwei Zhou, Qingyi Yang, Jiyu Guo. Validation: Weiwei Zhou, Qingyi Yang, Jiyu Guo., Si Li. Visualization: Weiwei Zhou, Qingyi Yang, Jiyu Guo. Writing – original draft: Weiwei Zhou, Yongsheng Li, Juan Xu, Qingyi Yang, Tiantongfei Jiang. Writing – review & editing: Weiwei Zhou, Yongsheng Li, Juan Xu, Qingyi Yang, Tiantongfei Jiang. ACKNOWLEDGMENTS This work was supported by the National Natural Science Foundation of China (32322020, 32170676, 32060152); Natural Science Foundation of Heilongjiang Province (Key Program) (ZD2023C007). We thank the National Natural Science Foundation of China. We would like to thank Natural Science Foundation of Heilongjiang Province (Key Program). We would also like to thank the member of the lab for helpful discussion and suggestion. REFERENCES ↵ Barrett T , Wilhite SE , Ledoux P , Evangelista C , Kim IF , Tomashevsky M , Marshall KA , Phillippy KH , Sherman PM , Holko M et al. 2013 . NCBI GEO: archive for functional genomics data sets--update . Nucleic Acids Res 41 : D991 – 995 . OpenUrl CrossRef PubMed Web of Science ↵ Boziki M , Polyzos SA , Papaefthymiou A , Doulberis M , Bakirtzis C , Sintila SA , Touloumtzi M , Grigoriadis N , Kountouras J . 2021 . Potential impact of Helicobacter pylori-related metabolic syndrome and Galectin-3 on liver, chronic kidney and brain disorders . Metabolism 118 : 154736 . OpenUrl CrossRef PubMed ↵ Bullman S , Pedamallu CS , Sicinska E , Clancy TE , Zhang X , Cai D , Neuberg D , Huang K , Guevara F , Nelson T et al. 2017 . Analysis of Fusobacterium persistence and antibiotic response in colorectal cancer . Science 358 : 1443 – 1448 . OpenUrl Abstract / FREE Full Text ↵ Butler A , Hoffman P , Smibert P , Papalexi E , Satija R . 2018 . Integrating single-cell transcriptomic data across different conditions, technologies, and species . Nat Biotechnol 36 : 411 – 420 . OpenUrl CrossRef PubMed ↵ Chai X , Wang J , Li H , Gao C , Li S , Wei C , Huang J , Tian Y , Yuan J , Lu J et al. 2023 . Intratumor microbiome features reveal antitumor potentials of intrahepatic cholangiocarcinoma . Gut Microbes 15 : 2156255 . OpenUrl CrossRef PubMed ↵ Chen KP , Hsu CL , Oyang YJ , Huang HC , Juan HF . 2023a . BIC: a database for the transcriptional landscape of bacteria in cancer . Nucleic Acids Res 51 : D1205 – D1211 . OpenUrl CrossRef PubMed ↵ Chen X , Peng R , Peng D , Xiao J , Liu D , Li R . 2023b . An update: is there a relationship between H. pylori infection and nonalcoholic fatty liver disease? why is this subject of interest? Front Cell Infect Microbiol 13 : 1282956 . OpenUrl CrossRef PubMed ↵ Fu A , Yao B , Dong T , Chen Y , Yao J , Liu Y , Li H , Bai H , Liu X , Zhang Y et al. 2022 . Tumor-resident intracellular microbiota promotes metastatic colonization in breast cancer . Cell 185 : 1356 – 1372 e1326. OpenUrl CrossRef PubMed ↵ Galeano Nino JL , Wu H , LaCourse KD , Kempchinsky AG , Baryiames A , Barber B , Futran N , Houlton J , Sather C , Sicinska E et al. 2022 . Effect of the intratumoral microbiota on spatial and cellular heterogeneity in cancer . Nature 611 : 810 – 817 . OpenUrl CrossRef PubMed ↵ Garbarino O , Lambroia L , Basso G , Marrella V , Franceschini B , Soldani C , Pasqualini F , Giuliano D , Costa G , Peano C et al. 2023 . Spatial resolution of cellular senescence dynamics in human colorectal liver metastasis . Aging Cell 22 : e13853 . OpenUrl CrossRef PubMed ↵ Geller LT , Barzily-Rokni M , Danino T , Jonas OH , Shental N , Nejman D , Gavert N , Zwang Y , Cooper ZA , Shee K et al. 2017 . Potential role of intratumor bacteria in mediating tumor resistance to the chemotherapeutic drug gemcitabine . Science 357 : 1156 – 1160 . OpenUrl Abstract / FREE Full Text ↵ Hafemeister C , Satija R . 2019 . Normalization and variance stabilization of single-cell RNA-seq data using regularized negative binomial regression . Genome Biol 20 : 296 . OpenUrl CrossRef PubMed ↵ Hanzelmann S , Castelo R , Guinney J . 2013 . GSVA: gene set variation analysis for microarray and RNA-seq data . BMC Bioinformatics 14 : 7 . OpenUrl CrossRef PubMed ↵ Ianevski A , Giri AK , Aittokallio T . 2022 . Fully-automated and ultra-fast cell-type identification using specific marker combinations from single-cell transcriptomic data . Nat Commun 13 : 1246 . OpenUrl CrossRef PubMed ↵ Jiang T , Zhou W , Sheng Q , Yu J , Xie Y , Ding N , Zhang Y , Xu J , Li Y . 2023 . ImmCluster: an ensemble resource for immunology cell type clustering and annotations in normal and cancerous tissues . Nucleic Acids Res 51 : D1325 – D1332 . OpenUrl CrossRef PubMed ↵ Jiang WG , Sanders AJ , Katoh M , Ungefroren H , Gieseler F , Prince M , Thompson SK , Zollo M , Spano D , Dhawan P et al. 2015 . Tissue invasion and metastasis: Molecular, biological and clinical perspectives . Semin Cancer Biol 35 Suppl: S244 – S275 . OpenUrl CrossRef PubMed ↵ Jin C , Lagoudas GK , Zhao C , Bullman S , Bhutkar A , Hu B , Ameh S , Sandel D , Liang XS , Mazzilli S et al. 2019 . Commensal Microbiota Promote Lung Cancer Development via gammadelta T Cells . Cell 176 : 998 – 1013 e1016. OpenUrl CrossRef PubMed ↵ Jin MZ , Jin WL . 2020 . The updated landscape of tumor microenvironment and drug repurposing . Signal Transduct Target Ther 5 : 166 . OpenUrl CrossRef PubMed ↵ LaCourse KD , Johnston CD , Bullman S . 2021 . The relationship between gastrointestinal cancers and the microbiota . Lancet Gastroenterol Hepatol 6 : 498 – 509 . OpenUrl CrossRef PubMed ↵ Li Y , Jiang T , Zhou W , Li J , Li X , Wang Q , Jin X , Yin J , Chen L , Zhang Y et al. 2020 . Pan-cancer characterization of immune-related lncRNAs identifies potential oncogenic biomarkers . Nat Commun 11 : 1000 . OpenUrl CrossRef PubMed ↵ Liberzon A , Birger C , Thorvaldsdottir H , Ghandi M , Mesirov JP , Tamayo P . 2015 . The Molecular Signatures Database (MSigDB) hallmark gene set collection . Cell Syst 1 : 417 – 425 . OpenUrl CrossRef PubMed ↵ Lyu L , Li X , Feng R , Zhou X , Guha TK , Yu X , Chen GQ , Yao Y , Su B , Zou D et al. 2023 . Simultaneous profiling of host expression and microbial abundance by spatial metatranscriptome sequencing . Genome Res 33 : 401 – 411 . OpenUrl Abstract / FREE Full Text ↵ Ma C , Yang C , Peng A , Sun T , Ji X , Mi J , Wei L , Shen S , Feng Q . 2023 . Pan-cancer spatially resolved single-cell analysis reveals the crosstalk between cancer-associated fibroblasts and tumor microenvironment . Mol Cancer 22 : 170 . OpenUrl CrossRef PubMed ↵ Mao X , Xu J , Wang W , Liang C , Hua J , Liu J , Zhang B , Meng Q , Yu X , Shi S . 2021 . Crosstalk between cancer-associated fibroblasts and immune cells in the tumor microenvironment: new findings and future perspectives . Mol Cancer 20 : 131 . OpenUrl CrossRef PubMed ↵ Morabito S , Reese F , Rahimzadeh N , Miyoshi E , Swarup V . 2023 . hdWGCNA identifies co-expression networks in high-dimensional transcriptomics data . Cell Rep Methods 3 : 100498 . OpenUrl CrossRef PubMed ↵ Overacre-Delgoffe AE , Bumgarner HJ , Cillo AR , Burr AHP , Tometich JT , Bhattacharjee A , Bruno TC , Vignali DAA , Hand TW . 2021 . Microbiota-specific T follicular helper cells drive tertiary lymphoid structures and anti-tumor immunity against colorectal cancer . Immunity 54 : 2812 – 2824 e2814. OpenUrl CrossRef PubMed ↵ Parhi L , Alon-Maimon T , Sol A , Nejman D , Shhadeh A , Fainsod-Levi T , Yajuk O , Isaacson B , Abed J , Maalouf N et al. 2020 . Breast cancer colonization by Fusobacterium nucleatum accelerates tumor growth and metastatic progression . Nat Commun 11 : 3259 . OpenUrl CrossRef PubMed ↵ Patel AP , Tirosh I , Trombetta JJ , Shalek AK , Gillespie SM , Wakimoto H , Cahill DP , Nahed BV , Curry WT , Martuza RL et al. 2014 . Single-cell RNA-seq highlights intratumoral heterogeneity in primary glioblastoma . Science 344 : 1396 – 1401 . OpenUrl Abstract / FREE Full Text ↵ Peek RM , Jr. , Fiske C , Wilson KT . 2010 . Role of innate immunity in Helicobacter pylori-induced gastric malignancy . Physiol Rev 90 : 831 – 858 . OpenUrl CrossRef PubMed ↵ Pham D , Tan X , Balderson B , Xu J , Grice LF , Yoon S , Willis EF , Tran M , Lam PY , Raghubar A et al. 2023 . Robust mapping of spatiotemporal trajectories and cell-cell interactions in healthy and diseased tissues . Nat Commun 14 : 7739 . OpenUrl CrossRef PubMed ↵ Ralser A , Dietl A , Jarosch S , Engelsberger V , Wanisch A , Janssen KP , Middelhoff M , Vieth M , Quante M , Haller D et al. 2023 . Helicobacter pylori promotes colorectal carcinogenesis by deregulating intestinal immunity and inducing a mucus-degrading microbiota signature . Gut 72 : 1258 – 1270 . OpenUrl Abstract / FREE Full Text ↵ Riquelme E , Zhang Y , Zhang L , Montiel M , Zoltan M , Dong W , Quesada P , Sahin I , Chandra V , San Lucas A et al. 2019 . Tumor Microbiome Diversity and Composition Influence Pancreatic Cancer Outcomes . Cell 178 : 795 – 806 e712. OpenUrl CrossRef PubMed ↵ Shomer NH , Fox JG , Juedes AE , Ruddle NH . 2003 . Helicobacter-induced chronic active lymphoid aggregates have characteristics of tertiary lymphoid tissue . Infect Immun 71 : 3572 – 3577 . OpenUrl Abstract / FREE Full Text ↵ Stasiewicz M , Karpinski TM. 2022 . The oral microbiota and its role in carcinogenesis . Semin Cancer Biol 86 : 633 – 642 . OpenUrl CrossRef PubMed ↵ Sumida Y , Kanemasa K , Imai S , Mori K , Tanaka S , Shimokobe H , Kitamura Y , Fukumoto K , Kakutani A , Ohno T et al. 2015 . Helicobacter pylori infection might have a potential role in hepatocyte ballooning in nonalcoholic fatty liver disease . J Gastroenterol 50 : 996 – 1004 . OpenUrl CrossRef PubMed ↵ Tomczak K , Czerwinska P , Wiznerowicz M . 2015 . The Cancer Genome Atlas (TCGA): an immeasurable source of knowledge . Contemp Oncol (Pozn ) 19 : A68 – 77 . OpenUrl ↵ Xun Z , Ding X , Zhang Y , Zhang B , Lai S , Zou D , Zheng J , Chen G , Su B , Han L et al. 2023 . Reconstruction of the tumor spatial microenvironment along the malignant-boundary-nonmalignant axis . Nat Commun 14 : 933 . OpenUrl CrossRef PubMed ↵ Yu T , Guo F , Yu Y , Sun T , Ma D , Han J , Qian Y , Kryczek I , Sun D , Nagarsheth N et al. 2017 . Fusobacterium nucleatum Promotes Chemoresistance to Colorectal Cancer by Modulating Autophagy . Cell 170 : 548 – 563 e516. OpenUrl CrossRef PubMed ↵ Zhou W , Su M , Jiang T , Yang Q , Sun Q , Xu K , Shi J , Yang C , Ding N , Li Y et al. 2024 . SORC: an integrated spatial omics resource in cancer . Nucleic Acids Res 52 : D1429 – D1437 . OpenUrl CrossRef PubMed ↵ Zong Z , Zhou F , Zhang L . 2023 . The fungal mycobiome: a new hallmark of cancer revealed by pan-cancer analyses . Signal Transduct Target Ther 8 : 50 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted January 25, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following SMTdb: A comprehensive spatial meta-transcriptome resource in cancer Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share SMTdb: A comprehensive spatial meta-transcriptome resource in cancer Weiwei Zhou , Qingyi Yang , Jiyu Guo , Si Li , Minghai Su , Feng Leng , Tingyu Rong , Jingyi Shi , Yueying Gao , Tiantongfei Jiang , Juan Xu , Yongsheng Li bioRxiv 2025.01.22.634407; doi: https://doi.org/10.1101/2025.01.22.634407 Share This Article: Copy Citation Tools SMTdb: A comprehensive spatial meta-transcriptome resource in cancer Weiwei Zhou , Qingyi Yang , Jiyu Guo , Si Li , Minghai Su , Feng Leng , Tingyu Rong , Jingyi Shi , Yueying Gao , Tiantongfei Jiang , Juan Xu , Yongsheng Li bioRxiv 2025.01.22.634407; doi: https://doi.org/10.1101/2025.01.22.634407 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7616) Biochemistry (17625) Bioengineering (13852) Bioinformatics (41825) Biophysics (21397) Cancer Biology (18524) Cell Biology (25417) Clinical Trials (138) Developmental Biology (13350) Ecology (19858) Epidemiology (2067) Evolutionary Biology (24277) Genetics (15581) Genomics (22459) Immunology (17698) Microbiology (40278) Molecular Biology (17134) Neuroscience (88400) Paleontology (666) Pathology (2823) Pharmacology and Toxicology (4812) Physiology (7632) Plant Biology (15106) Scientific Communication and Education (2042) Synthetic Biology (4281) Systems Biology (9807) Zoology (2266)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00