Representation learning for multi-modal spatially resolved transcriptomics data

doi:10.1101/2024.06.04.24308256

Representation learning for multi-modal spatially resolved transcriptomics data

2024 · doi:10.1101/2024.06.04.24308256

preprint OA: gold CC-BY-NC-ND-4.0

📄 Open PDF Full text JSON View at publisher

Full text 66,920 characters · extracted from preprint-html · click to expand

Representation learning for multi-modal spatially resolved transcriptomics data | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Representation learning for multi-modal spatially resolved transcriptomics data View ORCID Profile Kalin Nonchev , View ORCID Profile Sonali Andani , Joanna Ficek-Pascual , Marta Nowak , Bettina Sobottka , Tumor Profiler Consortium , View ORCID Profile Viktor H Koelzer , View ORCID Profile Gunnar Rätsch doi: https://doi.org/10.1101/2024.06.04.24308256 Kalin Nonchev 1 Department of Computer Science, ETH Zurich , Zurich Switzerland 2 Swiss Institute of Bioinformatics , Zurich Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kalin Nonchev Sonali Andani 1 Department of Computer Science, ETH Zurich , Zurich Switzerland 2 Swiss Institute of Bioinformatics , Zurich Switzerland 3 Computational and Translational Pathology Group, Department of Pathology and Molecular Pathology, University Hospital Zurich, University of Zürich , Zurich Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sonali Andani Joanna Ficek-Pascual 1 Department of Computer Science, ETH Zurich , Zurich Switzerland 2 Swiss Institute of Bioinformatics , Zurich Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Marta Nowak 3 Computational and Translational Pathology Group, Department of Pathology and Molecular Pathology, University Hospital Zurich, University of Zürich , Zurich Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Bettina Sobottka 3 Computational and Translational Pathology Group, Department of Pathology and Molecular Pathology, University Hospital Zurich, University of Zürich , Zurich Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Viktor H Koelzer 3 Computational and Translational Pathology Group, Department of Pathology and Molecular Pathology, University Hospital Zurich, University of Zürich , Zurich Switzerland 4 Department of Oncology, University of Oxford , Oxford UK 5 Institute of Medical Genetics and Pathology, University Hospital Basel , Basel Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Viktor H Koelzer For correspondence: viktor.koelzer{at}usz.ch gunnar.raetsch{at}inf.ethz.ch Gunnar Rätsch 1 Department of Computer Science, ETH Zurich , Zurich Switzerland 2 Swiss Institute of Bioinformatics , Zurich Switzerland 6 AI Center, ETH Zurich , Zurich Switzerland 7 Medical Informatics Unit, University Hospital Zurich , Zurich Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Gunnar Rätsch For correspondence: viktor.koelzer{at}usz.ch gunnar.raetsch{at}inf.ethz.ch Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Spatial transcriptomics enables in-depth molecular characterization of samples on a morphology and RNA level while preserving spatial location. Integrating the resulting multi-modal data is an unsolved problem, and developing new solutions in precision medicine depends on improved methodologies. Here, we introduce AESTETIK , a convolutional deep learning model that jointly integrates spatial, transcriptomics, and morphology information to learn accurate spot representations. AESTETIK yielded substantially improved cluster assignments on widely adopted technology platforms (e.g., 10x Genomics™, NanoString™) across multiple datasets. We achieved performance enhancement on structured tissues (e.g., brain) with a 21% increase in median ARI over previous state-of-the-art methods. Notably, AESTETIK also demonstrated superior performance on cancer tissues with heterogeneous cell populations, showing a two-fold increase in breast cancer, 79% in melanoma, and 21% in liver cancer. We expect that these advances will enable a multi-modal understanding of key biological processes. 1 Introduction In multicellular organisms, cells are organized into tissues, groups of cells exhibiting common characteristics related to the biological function [ 1 , 2 ]. Recent advances in spatial transcriptomics enable in-depth molecular characterization of samples, capturing their morphology and RNA composition while retaining the spatial location ( Fig. 1A ). The gene expression profiles are usually available per spot, e.g., a 55µm tissue region (10x Genomics™, Visium) covering the whole transcriptome [ 3 ], or at a single-cell resolution but with a limited number of captured genes (CosMx NanoString™). More recent spatial transcriptomics technologies provide whole-transcriptome coverage along with higher resolution (e.g., 2µm, 10x Genomics™, Visium HD). Spatially aligning cell types by molecular phenotypes and morphology is important for understanding tissue-specific properties (e.g., neural organization in the brain [ 4 ]) in a physiological state and in the context of disease progression and treatment [ 5 – 8 ]. Nevertheless, spatial transcriptomics analysis demands manual annotation of multi-modal data, representing a laborious and resource-intensive process. Achieving reliable automation and overcoming limitations in cross-modal expertise will lead to more accurate annotations, offering a comprehensive, multi-modal perspective on biological mechanisms and interactions [ 9 ]. Download figure Open in new tab Fig. 1 AESTETIK integrates spatial, transcriptomics, and morphology information to learn accurate spot representations. A : Spatial transcriptomics enables in-depth molecular characterization of samples on a morphology and RNA level while preserving spatial location. B : Workflow of AESTETIK. Initially, the transcriptomics and morphology spot representations are preprocessed. Next, a dimensionality reduction technique (e.g., PCA) is applied. Subsequently, the processed spot representations are clustered separately to acquire labels required for the multi-triplet loss. Afterwards, the modality-specific representations are fused through concatenation and the grid per spot is built. This is used as an input for the autoencoder. Lastly, the spatial-, transcriptomics-, and morphology-informed spot representations are obtained and used for downstream tasks such as clustering, morphology analysis, etc. C : AESTETIK relies on a convolutional encoder-decoder architecture to learn accurate spot representations from the spatial transcriptomics data. D : Employing a multi-triplet loss, instead of a single triplet loss adds extra positive and negative instances per class around the anchor point, improving the placement of the anchor in the latent space. Despite recent progress, computational data analysis that integrates all available data modalities i.e., spatial information, transcriptomics, and morphology, remains challenging. Most existing methods either fall short in effectively integrating all modalities, especially those adapted from single-cell analysis or are computationally expensive [ 9 ]. For example, BayesSpace employs a Bayesian approach with a prior giving higher weight to physically close spots [ 10 ]; MUSE relies on a multi-view autoencoder to learn a latent space from transcriptomics and morphology [ 11 ]; stLearn quantifies morphological distance through histology image features and incorporates these distances with spatial neighbors to refine gene expression [ 12 ]. Furthermore, alternative methods suggested a different perspective on modeling the spatial transcriptomics data by employing graph neural networks (GNN) [ 13 – 15 ]. However, the expression profiles often suffer from biological variability (e.g., cellcycle stage) [ 16 , 17 ] or technical noise [ 17 , 18 ]. GNNs’ inherent susceptibility to noise can undermine their robustness and performance in downstream applications [ 19 , 20 ]. Therefore, a new and reliable integration approach is needed to overcome the aforementioned challenges and improve spatial transcriptomics analysis, ensuring adaptability across spatial transcriptomics technologies. To this end, we developed AESTETIK , a model that jointly integrates spatial, transcriptomics, and morphology information to learn accurate spot representations. We compared its performance against previous state-of-the-art methods on multiple datasets and widely adopted technology platforms: Brain tissue [ 21 ], breast cancer [ 22 ] and new and yet unreleased metastatic melanoma samples sequenced using Visium from 10x Genomics™; liver from normal and cancer patients using CosMx from NanoString™. We substantially improved the clustering accuracy across all datasets which yielded spatial domains with coherent expression and morphology. Through an ablation study, we showed the enhanced value of utilizing all available data modalities given the specifics of the analyzed tissue. Further, we validated the learned representation by identifying the main biological drivers and characterizing clusters based on morphology and cell-type composition. 2 Results 2.1 AESTETIK integrates spatial, transcriptomics, and morphology information We introduce AESTETIK ( A uto E ncoder for S patial T ranscriptomics E xpression with T opology and I mage K nowledge), a convolutional autoencoder model ( Fig. 1B ). It jointly integrates transcriptomics and morphology information at a spot level and topology at a neighborhood level to learn accurate spot representations that capture biological complexity. Firstly, we preprocess the transcriptomics profiles and apply principal component analysis (PCA) [ 23 ]. Simultaneously, the pre-trained on Imagenet [ 24 ] deep-learning model, Inception v3 [ 25 ], is employed to extract morphology spot features, followed by PCA. After computing clusters separately to preserve the modality-specific structure, we concatenate the top principal components (PC) from both modalities. Next, we construct a square grid for each spot that includes spatially neighboring spots. This grid per spot, along with the precomputed clusters, serves as an input to AESTETIK . The model relies on a convolutional encoder-decoder architecture ( Fig. 1C ) to learn accurate spatial-, transcriptomics-, and morphology-informed spot representations. Ultimately, the learned representations can be leveraged for various downstream applications, including but not limited to clustering, gene expression, morphology, and pathway analysis. The motivation for the grid construction is to form an image-like representation, with grid encoding for spatial neighborhood and channels for both transcriptomics and morphology modalities. We frame the machine-learning problem as image pattern recognition and compression, with convolutional autoencoders being the state-of-the-art architecture for addressing these challenges [ 26 , 27 ]. The bottleneck layer serves as a constriction for information flow, forcing the model to capture the biological signal. Moreover, AESTETIK ’s loss function ( Eq. 4 ) is designed to optimize multiple objectives simultaneously by combining reconstruction loss for accurate latent representation and multi-triplet loss ( Fig. 1D , Eq. 3 ) for structure preservation across modalities. This dual optimization ensures a comprehensive and informative representation of each data modality. 2.2 AESTETIK improves the identification of spatial domains We benchmarked AESTETIK performance on multiple datasets with available ground truth annotations ( Fig. 2A ). In line with the methodology of [ 10 – 15 ], we adopted the Adjusted Rand Index (ARI) to measure the similarity between predicted cluster labels and ground truth, with the number of clusters set to match that in the ground truth. To avoid hyperparameter tuning on the samples used for testing, we introduced reversed leave-one-out cross-validation. More specifically, we used a single sample and its replicates to select hyperparameters to maximize the median ARI. Then, the optimal hyperparameters were applied to the remaining test samples. This process was iterated over all folds, and the resulting median ARI, along with the standard error, is reported ( Fig. 2A ). Download figure Open in new tab Fig. 2 AESTETIK improves the identification of spatial domains with coherent expression and morphology. A : Benchmark of AESTETIK and previous state-of-the-art methods in spatial transcriptomics on 5 datasets across 2 technology platforms. The y-axis represents the ARI between the ground truth and the predicted labels. Models are ordered based on their relative rank across the datasets. The shape represents the modalities the model integrates. B : Histology image and manual annotation of slice 151676 from the LIBD human DLPFC dataset [ 21 ] and C : Comparison of cluster assignments for the same slice. AESTETIK consistently yielded substantially improved cluster assignments closer to the ground truth annotations over previous state-of-the-art methods across all datasets ( Fig. 2A ). For example on the LIBD Human DLPFC dataset [ 21 ], AESTETIK achieved the highest ARI of 0.58 ± 0.02, significantly surpassing the second best model - GraphST - by 21%. The LIBD Human DLPFC dataset comprises 12 tissue slices obtained from the dorsolateral prefrontal cortex (DLPFC) brain region, sequenced using Visium from 10x Genomics™, together with curated manual annotations based on brain cytoarchitecture and known marker genes ( Fig. 2B ). This improvement highlights the superior performance of AESTETIK in effectively integrating the spatial modality and generating accurate cluster assignments in structured brain tissue. STAGATE and GraphST demonstrated lower performance, achieving ARI of 0.48 ± 0.02 and 0.48 ± 0.03, respectively. To qualitatively illustrate the cluster assignments, we compare them for slice 151676 ( Fig. 2C ), using the closest annotations to the ground truth across folds. MUSE (ARI 0.23), Leiden (ARI 0.28), stLearn (ARI 0.36) and SpaGCN (ARI 0.37) mixed the brain layers, accompanied by noise along the boundaries. BayesSpace (ARI 0.40) partitioned the white matter (WM) and layer 6 into multiple groups. While GraphST (ARI 0.55) and STAGATE (ARI 0.57) generated mostly well-defined clusters, layers 1, 2 and 3 were inconsistent. Notably, AESTETIK (ARI 0.63) identified the brain architecture, and its clusters displayed clearer definitions at the boundaries, leading to superior performance ( Fig. 2C , S1). Next, we investigate the methods’ performance on the Human Breast Cancer dataset, which comprises 5 tissue slices sequenced using Visium from 10x Genomics™ and annotated independently in two different labs [ 22 ]. This dataset presents unique challenges, primarily stemming from the considerable inter- and intra-sample heterogeneity, including variations in the cancer cell population. AESTETIK achieved the closest clusters to the ground truth labels with an ARI of 0.51 ± 0.13 ( Fig. 2A ), indicating a two-fold increase over the second best model, stLearn (ARI 0.25 ± 0.08). Despite wide standard error intervals observed in all models, AESTETIK exhibited heightened robustness, surpassing the challenges posed by the complexity of breast cancer tissue (Fig. S2). 2.3 AESTETIK effectively incorporates the morphology modality We introduce a new and yet unreleased spatial transcriptomics dataset with 9 distinct tissue regions sequenced using Visium from 10x Genomics™ from the Tumor Profiler study [ 28 ]. Each region has a replicate resulting in 18 samples of size 6.5 × 6.5mm 2 , with data including 10x Genomics™ Space Ranger v3.0.0 outputs and a corresponding H&E image scanned at a high resolution of 0.3µm/pixel. The tissue regions originate from 7 patients with metastatic melanoma each characterized by one of the following immune subtypes: immune desert, immune excluded, or inflamed. The ground truth annotations were obtained using histopathology software (HALO AI™ (Indica Labs, Corrales, NM, USA)), classifying the spots into one of the following categories: tumor, stroma, normal lymphoid, and blood/necrosis. Following this, a pathologist manually reviewed the model predictions ( Fig. 3A ). We consider this dataset to be a valuable reference benchmark for evaluating the performance of spatial transcriptomics models, particularly in terms of their ability to integrate morphology effectively. Download figure Open in new tab Fig. 3 AESTETIK effectively incorporates the morphology modality revealing biologically relevant spatial organization of cancer tissue. A : Histology image and pathology annotation of slice MACEGEJ-2-2 from the Tumor Profiler dataset. B : Comparison of cluster assignments for slice MACEGEJ-2-2. C : UMAP plot of the AESTETIK ’s latent space with randomly sampled spot images. D : Most representative cluster spots based on the obtained representations. E : Euclidean distance in latent space of each spot to the tumor centroid plotted in spatial space. Most representative spots are located in the middle of the tumor formations. F : Spatial marker gene expression of TYRP1. G : Pathway analysis of the identified clusters using decoupler [ 29 ]. H Spatial activation of MAPK pathway. On this dataset, AESTETIK achieved a 79% increase in ARI (0.59 ± 0.03) over previous state-of-the-art methods, demonstrating effective use of the morphology modality ( Fig. 2A ). While both stLearn (ARI 0.33 ± 0.02) and MUSE (ARI 0.20 ± 0.02) use the same pre-trained Inception v3 [ 25 ] for extracting morphology features, they fall short in effectively leveraging this information ( Fig. 3B ). On the other hand, AESTETIK not only produced accurate cluster assignments ( Fig. 3B ), but also identified a hemorrhage region in the upper left of the H&E image, that was overlooked during annotation (black box in Fig. 3A, B , S3). Next, we qualitatively explore the latent representations and the identified spatial domains by focusing on slice MACEGEJ-2-2 ( Fig. 3A ). We visualized the latent space using UMAP [ 30 ] with randomly sampled morphology spot representations ( Fig. 3C ). We observe an aggregation of tumor spots (cluster 4) on the bottom-right side, showcasing similarities in their characteristics. On the lower left side, there are areas with blood and necrosis. While clusters 1 and 3, representing normal lymphoid tissue and stromal cells, are positioned in the upper part, a closer inspection reveals discernible differences in their underlying structures. Besides, for enhanced explainability of the spot representations, we selected the most representative spots per cluster ( Fig. 3D ). Visually, tumor cells within cluster 4 exhibit distinct characteristics; they appear significantly larger, displaying irregular shapes, and possessing enlarged nuclei. Stromal cells (cluster 3) have an elongated morphology and are noticeably more scattered [ 31 ] compared to the normal lymphoid cells (cluster 1), which are generally smaller and denser [ 32 ]. Furthermore, to illustrate the effect of encoding spatial information in latent space, we computed the Euclidean distance of each spot to the tumor centroid and visualized it spatially ( Fig. 3E ). While stromal cells are the furthest, we observed that tumor cells close in latent space are clustered spatially, with the most representative spots located in the middle of the tumor formations. To provide additional insights, we found TYRP1 and TKTL1 among the top tumor marker genes in the tumor cluster, confirming the model predictions for the spatially resolved identification of melanoma cells ( Fig. 3F , S4). TYRP1 gene is involved in melanocyte pigmentation, associated with melanoma progression and is a target for oncological immunotherapy [ 33 – 35 ]. The second highly upregulated gene, TKTL1, is implicated in the progression of melanoma and contributes to the increased invasion of melanoma cells [ 36 ]. Further, we performed a pathway analysis of the tumor clusters using decoupler [ 29 ] ( Fig. 3G ) which revealed increased activity of the MAPK pathway in the cancer cluster ( Fig. 3H ), known for promoting cell proliferation, invasion, metastasis, migration, survival, and angiogenesis [ 37 – 39 ]. Furthermore, we observed that hypoxia signatures were predominant in areas of necrosis and hemorrhage and JAK-STAT inflammatory signaling was predominant in the cancer microenvironment clusters 1 and 3. These pathway-level analyses underline the robust associations of the spatially resolved clustering results achieved by AESTETIK and support interpretation in the context of the underlying biology. 2.4 AESTETIK improves cluster assignment in single-cell spatial transcriptomics CosMx NanoString™ released a liver dataset with single-cell resolution, encompassing two tissue regions from normal and cancer patients and capturing 1000 genes. It offers valuable insights into liver biology and cancer characteristics. More specifically, using these datasets, we assess the models’ effectiveness at single-cell resolution by comparing the clusters they produce with the cell types reported by NanoString™. In both normal and cancer liver tissue, AESTETIK exhibited outstanding performance, substantially outperforming the other models by 39% and 21%, with ARI of 0.46 ± 0.02 and 0.23 ± 0.00, respectively ( Fig. 2A , S5, S5). The second best model, stLearn , attained a score of 0.33 ± 0.00 and 0.19 ± 0.00, followed by GraphST with 0.24 ± 0.00 and 0.14 ± 0.00. Overall, the clustering accuracy on the cancer tissue is lower compared to the normal sample. However, the relative trend in the ranking of the models remained consistent. 2.5 Joint integration of multi-modal data enhances computational analysis To pinpoint the benefit of the spatial modality in the LIBD Human DLPFC and Tumor Profiler datasets, we systematically varied the grid’s window size, ranging from 1 (w/o spatial information) to 11, and measured the change in ARI ( Fig. 4A ). The grid size determines the number of spatially adjacent spots to consider. Local spatial information proved important, preserving local details and spot-to-spot variability. However, incorporating a more extensive global context through a larger window size (e.g., 11) introduced noise and hampered performance, which was likely due to signal over-smoothing and the loss of spot-specific details. Download figure Open in new tab Fig. 4 Joint integration of multi-modal data enhances computational analysis A : Ablation study on the influence of window size and morphology weight on the ARI. The y-axis represents the ARI, normalized by dataset. B : UMAP visualization of single (transcriptomics, morphology) and combined ( AESTETIK ) modality representations on a simulated tissue slice, colored by the 10 ground truth annotations. C : Cluster assignments based on only a single modality (transcriptomics, morphology) and AESTETIK ’s joint representations. D : Comparing the stability of the single and multi-triplet loss on the loss function of AESTETIK across datasets. The number, following the dataset name, is the median number of clusters present. The y-axis represents the standard deviation computed on the loss difference over successive training epochs. E : Runtime for the evaluated clustering methods. The y-axis represents the time in minutes. Models are ordered based on their relative rank across the datasets. Similarly, we studied the contribution of each modality by varying the morphology weight (0 - no morphology; 1.5 - equal weight between transcriptomics and morphology; 3 - only morphology). As expected, we found that the transcriptomics modality in the brain dataset is informative given the provided ground truth annotations, relying on known cytoarchitecture and marker genes [ 21 ]. In contrast, the ground truth for the Tumor Profiler, derived from histopathology software, was morphology driven ( Fig. 4A ). Furthermore, to underscore the significance of methods incorporating all data modalities, we present a scenario illustrating the necessity of both modalities to reveal ground truth annotations ( Fig. 4B ). Following the approach of [ 11 ], we simulated data where both modalities are essential for accurate cluster identification. Our ablation study demonstrated that the optimal ARI was achieved when accounting for both modalities, thus emphasizing the critical significance of multi-modal data integration ( Fig. 4C ). Additionally, the multi-triplet loss demonstrated an enhancement in loss stability during training compared to the single triplet loss ( Fig. 4D ). The refined positioning of clusters in latent space, considering multiple positive and negative spots, becomes crucial, especially when dealing with datasets containing numerous clusters. Lastly, the runtime per tissue slice for our model, incorporating all three modalities ( ∼ 8 min), was either lower or comparable to that of other models ( Fig. 4E ). For example, BayesSpace , MUSE , and STAGATE , incorporating only two modalities, required ∼ 28 min, ∼ 17 min, and ∼ 13 min, respectively. Moreover, we demonstrate that AESTETIK is well-suited for analyzing large spatial transcriptomics datasets, scaling to millions of spots (Fig. S7). 3 Discussion In this work, we propose AESTETIK , a method that jointly integrates spatial, transcriptomics, and morphology information to learn accurate spot representations. Our results consistently showed superior performance to state-of-the-art methods across structured tissues (e.g., brain) and cancer tissues with heterogeneous cell populations (e.g., breast, melanoma, liver) across widely adopted spatial transcriptomics technologies (e.g., 10x Genomics™, Visium, CosMx NanoString™). We systematically demonstrated the significance of jointly integrating multi-modal data to improve spatial transcriptomics analysis and yield more precise spot annotations. This improvement in spot representation resulted from modeling the spatial transcriptomics modalities as a grid encoding the spatial spot neighborhood and channels as transcriptomics and morphology modalities. Our approach framed the machine-learning problem as image pattern recognition and compression, where convolution filters jointly learn the importance of neighboring spots and channels. This proved beneficial in both structured and heterogeneous tissues. In contrast, the GNNs, employed by SpaGCN, GraphST and STAGATE demonstrated variations in their performance relative to the other methods across tissue types and spatial transcriptomics technologies. This could be attributed to the inherent susceptibility of GNNs to noise [ 19 , 20 ]. The graph structure ensures connectivity among neighboring spots, which is useful in structured tissues (e.g., brain) with coherent spatial patterns. However, it presents challenges in samples of lower sequencing quality or tissues with higher heterogeneity (e.g., cancer cell populations), where noise, introduced through node perturbations and edge alterations, might affect the graph structure. Consequently, this undermines the robustness and performance of current GNNs in downstream applications. Further, in our ablation study on the brain dataset, we quantitatively demonstrated the significance of the spatial modality in identifying the brain layer structure. We discovered that a relatively small grid’s window size (5-7) sufficiently captures the desired spatial signal. Opting for a larger neighborhood (e.g., window size 11) offers no extra value. Unlike the global tissue context, the local environment better preserves spot-specific signals and nearby variability. Ultimately, our ablation results underscore 1) the importance of jointly integrating the available spatial transcriptomics data modalities for accurate spot representation, and 2) the necessity for external knowledge to prioritize the signal of interest, depending on the particular research question at hand. Several paths to further improve model accuracy appear promising. 1) We employed the pre-trained Inception v3 [ 25 ] to extract morphology features. However, adopting a model tailored to a specific task (e.g., cell nuclei segmentation and classification) would likely yield more informative spot features, potentially leading to improved overall performance. 2) AESTETIK randomly selects the positive and negative pairs for each anchor point during training. We believe this process can be improved by utilizing a smarter strategy for triplet mining, which would eventually improve the performance, and robustness to noise. In the future, AESTETIK could be effectively applied to fine-map cell populations in spatial transcriptomics datasets [ 40 – 42 ], to systematically analyze the interplay between different modalities by varying their contribution and to gain a multi-modal understanding of key biological processes. To foster these downstream applications, we have released the code for AESTETIK along with examples demonstrating its usage. Moreover, we anticipate that upon its release, the 10x Genomics™, Visium dataset from the Tumor Profiler study will serve as a valuable reference benchmark for assessing spatial transcriptomics model performance and explainability. Thus, we hope that our model, together with this dataset, will stimulate further improvements in computational spatial transcriptomics analysis. 4 Methods & Materials 4.1 Data preprocessing AESTETIK takes in spatial, transcriptomics, and morphology information. We apply the same preprocessing pipeline across datasets and sequencing technologies. For simplicity, we refer to both spot and cell as a spot (a single spot can contain 1 cell) . 4.1.1 Transcriptomics modality Starting with raw counts, genes expressed in fewer than 10 spots are removed. Then, the scanpy function highly variable genes computes normalized variance in Seurat v3 style, removing genes with variance below 1 [ 23 , 43 ]. Each spot undergoes normalization by total counts over all genes, followed by log1p transformation and scaling. Subsequently, PCA is applied to the preprocessed counts, extracting the first 15 PCs [ 10 ]. 4.1.2 Morphology modality The raw RGB image for each tissue slice is divided into tails, each representing a spot and its defined neighborhood based on the spot diameter. Following the default preprocessing steps of Inception v3 [ 25 ], the tiles are resized to 299, their center is cropped and the RGB channels are normalized. Morphology features are then extracted from the last network layer (with 2,048 dimensions) of the pre-trained on Imagenet [ 24 ] deep-learning model Inception v3 . Finally, PCA reduces the feature dimension from 2,048 to 15. 4.1.3 Grid construction To begin, each spot is represented by two vectors containing the first n d 1 and n d 2 PCs obtained via PCA from the preprocessed transcriptomics and morphology modalities, along with their spatial coordinates. For simplicity, we assume n d 1 = n d 2 = n pca , but the following workflow holds also for n d 1 ≠ n d 2 . These vectors are concatenated and scaled in the range [0, 1]. Then, a square grid for each spot is constructed with the number of spatial neighbors, N grid , chosen as an odd number to ensure the center position of the selected spot in the window. This results in a tensor of size: which can be interpreted as N grid × N grid image with 2 ∗ n pca channels. For missing or located on the borders spots, we apply padding by taking the median expression over each channel in spot i . 4.1.4 Clustering The default clustering algorithm is Bayesian Gaussian Mixture with a diagonal covariance matrix from the sklearn package, but we also support K-Means, Leiden , and Louvain . Once the cluster labels are obtained, an additional preprocessing step can be applied. A K-Neighbors Classifier is fitted using spatial coordinates and the already obtained clusters to refine the cluster assignments in spatial space through majority voting. 4.2 Model architecture AESTETIK utilizes a convolutional deep-learning autoencoder with a standard encoder-decoder architecture and a bottleneck layer. The encoder comprises a convolutional layer, max-pooling layer, batch normalization, ReLU activation, and linear layer. Default hyperparameters include 64 convolutional kernels (size 7), dropout ( p = 0.3), max-pooling (stride 3), and a linear layer (size 16). The decoder follows a mirrored architecture, concluding with a sigmoid function to constrain output values in the range [0, 1]. AESTETIK is a Python package implemented in PyTorch . 4.2.1 Autoencoder ensemble To improve the network stability, we employ ensemble architectures for both the encoder and decoder, utilizing random LeCun [ 44 ] initialization. The ensemble’s output is determined by taking the median over predictions. We train an ensemble with 3 encoders and decoders. The final representation is computed by dropout sampling 1,000 times and taking the median value. 4.2.2 Reconstruction loss We employ a reconstruction loss to ensure that the latent space effectively captures the biological complexity of the morphology and transcriptomics modalities. We define it as: where α ∈ [0,3] is a hyperparameter for the morphology weight. is the standard L1 reconstruction loss with m for morphology and tr for transcriptomics modality. We use L1 loss due to the input-output range being [0, 1]. 4.2.3 Multi-triplet loss We apply triplet loss to preserve the structure across modalities. Its primary objective is to learn a spot representation in which similar instances are closer together, while dissimilar instances are farther apart. Define an anchor point A with label l i , then we draw at random a positive point P with label l i and a negative point N with label l j such as l i ≠ l j , then the single triplet loss is defined as: In spatial transcriptomics, multiple classes and high noise ratios are typical. Using just one positive and negative point can lead to unstable representation and increase the training time due to alternations. To improve the spot representation robustness, we propose the multi-triplet loss motivated by [ 45 ]. Let L be the number of unique labels in the dataset. Define an anchor point A with label l i , then we draw, with replacement, L − 1 positive points {P 1 , P 2 , …, P L − 1 } with label l i . Additionally, for each label l j where j ≠ i , we draw a single negative point, resulting in {N 1 , N 2 , …, N L − 1 } . Then the multi-triplet loss for a single modality can be defined as: which when extended to all spots: The modality-weighted multi-triplet loss is defined as: with α defined as in equation 2 . 4.2.4 Loss function The overall loss function for training combines reconstruction loss to ensure accurate latent representation and multi-triplet loss for preserving structure across modalities. Formally, it is defined as: which can be rewritten as: 4.2.5 Training details The model is trained for 100 epochs using Adam [ 46 ] with a weight decay of 1e-6, a learning rate of 1e-3, and a batch size corresponding to the number of spots in a tissue slice. The run time is approximately 8 minutes on a GPU, with inference time under a minute. Computational data analysis was performed at Leonhard Med ( https://sis.id.ethz.ch/services/sensitiveresearchdata/ ) secure trusted research environment at ETH Zurich. 4.2.6 Evaluation We propose reversed leave-one-out cross-validation for model evaluation to avoid hyperparameter tuning on test samples. We utilize a single sample and its replicates to select hyperparameters through a grid search, aiming to maximize the median ARI. Subsequently, the optimal hyperparameters are applied to the remaining test samples. For state-of-the-art methods, we consider hyperparameter values suggested by the authors, as well as those discussed in the corresponding paper. Hyperparameter values are provided in the supplement. To ensure comparable conditions across models, the number of clusters is pre-defined based on the provided ground truth. Performance is assessed using the ARI between ground-truth labels and cluster assignments. We bootstrap 10,000 times from the median ARI across the test folds and report the resulting median ARI and its standard error. For most datasets, we generated all possible sample combinations (folds). However, for the larger CosMx NanoString™ Liver dataset we use each FOV to select the hyperparameters, but we evaluate on randomly selected 20 FOVs, not adjacent to the FOVs used for optimization. 4.2.7 Ablation study In the ablation study on AESTETIK , we adhere to the procedure described in 4.2.6, where we fix the value for the hyperparameter of interest and assess its impact on the ARI. To ensure comparability across datasets, we compute an ARI z-score. 4.3 Downstream applications 4.3.1 Marker genes For marker genes, we employ the rank genes groups function from scanpy using the Wilcoxon signed-rank test. Significant marker genes (adjusted p-value < 0.05) are selected and sorted by their average log-fold change. The top 15 marker genes per cluster are reported. 4.3.2 Pathway analysis For pathway analysis, we utilize the multivariate linear model from the decoupler package [ 29 ] to compute regulatory pathway activities from the PROGENy database [ 47 ]. 4.3.3 Cluster centroids in latent space To determine the centroid for each cluster in the latent space, we employed a method minimizing the sum of Euclidean distances among all samples within that class. Subsequently, we computed the top N spots near each cluster centroid. 4.4 Data availability The LIBD Human DLPFC dataset is available at https://github.com/LieberInstitute/HumanPilot and http://research.libd.org/spatialLIBD ; Human Breast Cancer - Zenodo https://doi.org/10.5281/zenodo.4739739 , Human Liver Normal and Cancer - https://nanostring.com/products/cosmx-spatial-molecular-imager/human-liver-rna-ffpe-dataset/ . The metastatic melanoma dataset with 18 tissue slices from Tumor Profiler samples sequenced using Visium from 10x Genomics™ will be made available upon acceptance of publication. 4.5 Code availability The open-source implementation of AESTETIK along with a tutorial is available at: www.github.com/ratschlab/aestetik The Snakemake pipeline for reproducing the results is available at: www.github.com/ratschlab/st-rep TUMOR PROFILER CONSORTIUM Rudolf Aebersold 5 , Melike Ak 33 , Faisal S Al-Quaddoomi 12,22 , Silvana I Albert 10 , Jonas Albinus 10 , Ilaria Alborelli 29 , Sonali Andani 9,22,31,36 , Per-Olof Attinger 14 , Marina Bacac 21 , Daniel Baumhoer 29 , Beatrice Beck-Schimmer 44 , Niko Beerenwinkel 7,22 , Christian Beisel 7 , Lara Bernasconi 32 , Anne Bertolini 12,22 , Bernd Bodenmiller 11,40 , Ximena Bonilla 9 , Lars Bosshard 12,22 , Byron Calgua 29 , Ruben Casanova 40 , Stéphane Chevrier 40 , Natalia Chicherova 12,22 , Ricardo Coelho 23 , Maya D’Costa 13 , Esther Danenberg 42 , Natalie R Davidson 9 , Monica-Andreea Dragan 7 , Reinhard Dummer 33 , Stefanie Engler 40 , Martin Erkens 19 , Katja Eschbach 7 , Cinzia Esposito 42 , André Fedier 23 , Pedro F Ferreira 7 , Joanna Ficek-Pascual 1,9,16,22,31 , Anja L Frei 36 , Bruno Frey 18 , Sandra Goetze 10 , Linda Grob 12,22 , Gabriele Gut 42 , Detlef Günther 8 , Pirmin Haeuptle 3 , Viola Heinzelmann-Schwarz 23,28 , Sylvia Herter 21 , Rene Holtackers 42 , Tamara Huesser 21 , Alexander Immer 9,17 , Anja Irmisch 33 , Francis Jacob 23 , Andrea Jacobs 40 , Tim M Jaeger 14 , Katharina Jahn 7 , Alva R James 9,22,31 , Philip M Jermann 29 , André Kahles 9,22,31 , Abdullah Kahraman 22,36 , Viktor H Koelzer 36,41 , Werner Kuebler 30 , Jack Kuipers 7,22 , Christian P Kunze 27 , Christian Kurzeder 26 , Kjong-Van Lehmann 2,4,9,15 , Mitchell Levesque 33 , Ulrike Lischetti 23 , Flavio C Lombardo 23 , Sebastian Lugert 13 , Gerd Maass 18 , Markus G Manz 35 , Philipp Markolin 9 , Martin Mehnert 10 , Julien Mena 5 , Julian M Metzler 34 , Nicola Miglino 35,41 , Emanuela S Milani 10 , Holger Moch 36 , Simone Muenst 29 , Riccardo Murri 43 , Charlotte KY Ng 29,39 , Stefan Nicolet 29 , Marta Nowak 36 , Monica Nunez Lopez 23 , Patrick GA Pedrioli 6 , Lucas Pelkmans 42 , Salvatore Piscuoglio 23,29 , Michael Prummer 12,22 , Prélot, Laurie 9,22,31 , Natalie Rimmer 23 , Mathilde Ritter 23 , Christian Rommel 19 , María L Rosano-González 12,22 , Gunnar Rätsch 1,6,9,22,31 , Natascha Santacroce 7 , Jacobo Sarabia del Castillo 42 , Ramona Schlenker 20 , Petra C Schwalie 19 , Severin Schwan 14 , Tobias Schär 7 , Gabriela Senti 32 , Wenguang Shao 10 , Franziska Singer 12,22 , Sujana Sivapatham 40 , Berend Snijder 5,22 , Bettina Sobottka 36 , Vipin T Sreedharan 12,22 , Stefan Stark 9,22,31 , Daniel J Stekhoven 12,22 , Tanmay Tanna 7,9 , Alexandre PA Theocharides 35 , Tinu M Thomas 9,22,31 , Markus Tolnay 29 , Vinko Tosevski 21 , Nora C Toussaint 12,22 , Mustafa A Tuncel 7,22 , Marina Tusup 33 , Audrey Van Drogen 10 , Marcus Vetter 25 , Tatjana Vlajnic 29 , Sandra Weber 32 , Walter P Weber 24 , Rebekka Wegmann 5 , Michael Weller 38 , Fabian Wendt 10 , Norbert Wey 36 , Andreas Wicki 35,41 , Mattheus HE Wildschut 5,35 , Bernd Wollscheid 10 , Shuqing Yu 12,22 , Johanna Ziegler 33 , Marc Zimmermann 9 , Martin Zoche 36 , Gregor Zuend 37 1 AI Center at ETH Zurich, Andreasstrasse 5, 8092 Zurich, Switzerland, 2 Cancer Research Center Cologne-Essen, University Hospital Cologne, Cologne, Germany, 3 Cantonal Hospital Baselland, Medical University Clinic, Rheinstrasse 26, 4410 Liestal, Switzerland, 4 Center for Integrated Oncology Aachen (CIO-A), Aachen, Germany, 5 ETH Zurich, Department of Biology, Institute of Molecular Systems Biology, Otto-Stern-Weg 3, 8093 Zurich, Switzerland, 6 ETH Zurich, Department of Biology, Wolfgang-Pauli-Strasse 27, 8093 Zurich, Switzerland, 7 ETH Zurich, Department of Biosystems Science and Engineering, Mattenstrasse 26, 4058 Basel, Switzerland, 8 ETH Zurich, Department of Chemistry and Applied Biosciences, Vladimir-Prelog-Weg 1-5/10, 8093 Zurich, Switzerland, 9 ETH Zurich, Department of Computer Science, Institute of Machine Learning, Universitätstrasse 6, 8092 Zurich, Switzerland, 10 ETH Zurich, Department of Health Sciences and Technology, Otto-Stern-Weg 3, 8093 Zurich, Switzerland, 11 ETH Zurich, Institute of Molecular Health Sciences, Otto-Stern-Weg 7, 8093 Zurich, Switzerland, 12 ETH Zurich, NEXUS Personalized Health Technologies, Wagistrasse 18, 8952 Zurich, Switzerland, 13 F. Hoffmann-La Roche Ltd, Grenzacherstrasse 124, 4070 Basel, Switzerland, 14 F. Hoffmann-La Roche Ltd, Grenzacherstrasse 124, 4070 Basel, Switzerland, 15 Joint Research Center Computational Biomedicine, University Hospital RWTH Aachen, Aachen, Germany, 16 Life Science Zurich Graduate School, Biomedicine PhD Program, Winterthurerstrasse 190, 8057 Zurich, Switzerland, 17 Max Planck ETH Center for Learning Systems, 18 Roche Diagnostics GmbH, Nonnenwald 2, 82377 Penzberg, Germany, 19 Roche Pharmaceutical Research and Early Development, Roche Innovation Center Basel, Grenzacherstrasse 124, 4070 Basel, Switzerland, 20 Roche Pharmaceutical Research and Early Development, Roche Innovation Center Munich, Roche Diagnostics GmbH, Nonnenwald 2, 82377 Penzberg, Germany, 21 Roche Pharmaceutical Research and Early Development, Roche Innovation Center Zurich, Wagistrasse 10, 8952 Schlieren, Switzerland, 22 SIB Swiss Institute of Bioinformatics, Lausanne, Switzerland, 23 University Hospital Basel and University of Basel, Department of Biomedicine, Hebelstrasse 20, 4031 Basel, Switzerland, 24 University Hospital Basel and University of Basel, Department of Surgery, Brustzentrum, Spitalstrasse 21, 4031 Basel, Switzerland, 25 University Hospital Basel, Brustzentrum & Tumorzentrum, Petersgraben 4, 4031 Basel, Switzerland, 26 University Hospital Basel, Brustzentrum, Spitalstrasse 21, 4031 Basel, Switzerland, 27 University Hospital Basel, Department of Information- and Communication Technology, Spitalstrasse 26, 4031 Basel, Switzerland, 28 University Hospital Basel, Gynecological Cancer Center, Spitalstrasse 21, 4031 Basel, Switzerland, 29 University Hospital Basel, Institute of Medical Genetics and Pathology, Schönbeinstrasse 40, 4031 Basel, Switzerland, 30 University Hospital Basel, Spital-strasse 21/Petersgraben 4, 4031 Basel, Switzerland, 31 University Hospital Zurich, Biomedical Informatics, Schmelzbergstrasse 26, 8006 Zurich, Switzerland, 32 University Hospital Zurich, Clinical Trials Center, Ramistrasse 100, 8091 Zurich, Switzerland, 33 University Hospital Zurich, Department of Dermatology, Gloriastrasse 31, 8091 Zurich, Switzerland, 34 University Hospital Zurich, Department of Gynecology, Frauenklinikstrasse 10, 8091 Zurich, Switzerland, 35 University Hospital Zurich, Department of Medical Oncology and Hematology, Rämistrasse 100, 8091 Zurich, Switzerland, 36 University Hospital Zurich, Department of Pathology and Molecular Pathology, Schmelzbergstrasse 12, 8091 Zurich, Switzerland, 37 University Hospital Zurich, Ramistrasse 100, 8091 Zurich, Switzerland, 38 University Hospital and University of Zurich, Department of Neurology, Frauenklinikstrasse 26, 8091 Zurich, Switzerland, 39 University of Bern, Department of BioMedical Research, Murtenstrasse 35, 3008 Bern, Switzerland, 40 University of Zurich, Department of Quantitative Biomedicine, Winterthurerstrasse 190, 8057 Zurich, Switzerland, 41 University of Zurich, Faculty of Medicine, Zurich, Switzerland, 42 University of Zurich, Institute of Molecular Life Sciences, Winterthurerstrasse 190, 8057 Zurich, Switzerland, 43 University of Zurich, Services and Support for Science IT, Winterthurerstrasse 190, 8057 Zurich, Switzerland, 44 University of Zurich, VP Medicine, Künstlergasse 15, 8001 Zurich, Switzerland Consent for publication This manuscript has been seen and approved by all listed authors. The figures were created using BioRender.com and exported under a paid subscription. Funding We gratefully acknowledge funding from the Tumor Profiler Initiative and the Tumor Profiler Center (to V.H.K., G.R.). The Tumor Profiler study is jointly funded by a public-private partnership involving F. Hoffmann-La Roche Ltd., ETH Zurich, University of Zurich, University Hospital Zurich, and University Hospital Basel. We also acknowledge funding of S.A. from the Swiss Federal Institutes of Technology strategic focus area of personalized health and related technologies project 2021-367 (to G.R., V.H.K.), of K.N. by Swiss National Science Foundation grants 220127 (to G.R.) and 201656, and ETH core funding (to G.R.), UZH core funding (to V.H.K) and funding by the Promedica Foundation grant F-87701-41-01 (to V.H.K). Conflict of interest/Competing interests V.H.K reports being an invited speaker for Sharing Progress in Cancer Care (SPCC) and Indica Labs; advisory board of Takeda; and sponsored research agreements with Roche and IAG, all unrelated to the current study. VHK is a participant in a patent application on the assessment of cancer immunotherapy biomarkers by digital pathology; a patent application on multimodal deep learning for the prediction of recurrence risk in cancer patients, and a patent application on predicting the efficacy of cancer treatment using deep learning. GR is a participant in a patent application on matching cells from different measurement modalities which is not directly related to the current work. Moreover, G.R. is a cofounder of Computomics GmbH, Germany, and one of its shareholders. Acknowledgments This work was supported by the Swiss Federal Institutes of Technology (strategic focus area of personalized health and related technologies; 2021–367). The 10x spatial transcriptomics sequencing of the Tumor Profiler samples was made possible through a technology access program by 10x Genomics™, with special acknowledgments to Jacob Stern, James Chell, Rudi Schläfli, Laura Lipka, Mario Werner, Nikhil Rao, and Scott Brouilette for their invaluable contributions. The Tumor Profiler study was supported by a public-private partnership involving Roche Holding AG, ETH Zurich, University of Zurich, University Hospital Zurich, and University Hospital Basel. V.H.K. gratefully acknowledges additional funding by the Promedica Foundation (F-87701-41-01). References [1]. ↵ Rao , A. , Barkley , D. , França , G.S. , Yanai , I .: Exploring tissue architecture using spatial transcriptomics . Nature 596 ( 7871 ), 211 – 220 ( 2021 ) OpenUrl CrossRef PubMed [2]. ↵ Asp , M. , Bergenstråhle , J. , Lundeberg , J .: Spatially resolved transcriptomes—next generation tools for tissue exploration . BioEssays 42 ( 10 ), 1900221 ( 2020 ) OpenUrl CrossRef [3]. ↵ Williams , C.G. , Lee , H.J. , Asatsuma , T. , Vento-Tormo , R. , Haque , A .: An introduction to spatial transcriptomics for biomedical research . Genome Medicine 14 ( 1 ), 1 – 18 ( 2022 ) OpenUrl [4]. ↵ Lein , E. , Borm , L.E. , Linnarsson , S .: The promise of spatial transcriptomics for neuroscience in the era of molecular cell typing . Science 358 ( 6359 ), 64 – 69 ( 2017 ) OpenUrl Abstract / FREE Full Text [5]. ↵ Ståhl , P.L. , Salmén , F. , Vickovic , S. , Lundmark , A. , Navarro , J.F. , Magnusson , J. , Giacomello , S. , Asp , M. , Westholm , J.O. , Huss , M. , et al : Visualization and analysis of gene expression in tissue sections by spatial transcriptomics . Science 353 ( 6294 ), 78 – 82 ( 2016 ) OpenUrl Abstract / FREE Full Text [6]. Yoosuf , N. , Navarro , J.F. , Salmén , F. , Ståhl , P.L. , Daub , C.O .: Identification and transfer of spatial transcriptomics signatures for cancer diagnosis . Breast Cancer Research 22 , 1 – 10 ( 2020 ) OpenUrl CrossRef PubMed [7]. Chen , W.-T. , Lu , A. , Craessaerts , K. , Pavie , B. , Frigerio , C.S. , Corthout , N. , Qian , X. , Laláková , J. , Kühnemund , M. , Voytyuk , I. , et al : Spatial transcriptomics and in situ sequencing to study alzheimer’s disease . Cell 182 ( 4 ), 976 – 991 ( 2020 ) OpenUrl PubMed [8]. ↵ Longo , S.K. , Guo , M.G. , Ji , A.L. , Khavari , P.A .: Integrating single-cell and spatial transcriptomics to elucidate intercellular tissue dynamics . Nature Reviews Genetics 22 ( 10 ), 627 – 644 ( 2021 ) OpenUrl CrossRef PubMed [9]. ↵ Zeng , Z. , Li , Y. , Li , Y. , Luo , Y .: Statistical and machine learning methods for spatially resolved transcriptomics data analysis . Genome biology 23 ( 1 ), 1 – 23 ( 2022 ) OpenUrl CrossRef [10]. ↵ Zhao , E. , Stone , M.R. , Ren , X. , Guenthoer , J. , Smythe , K.S. , Pulliam , T. , Williams , S.R. , Uytingco , C.R. , Taylor , S.E. , Nghiem , P. , et al : Spatial transcriptomics at subspot resolution with bayesspace . Nature biotechnology 39 ( 11 ), 1375 – 1384 ( 2021 ) OpenUrl [11]. ↵ Bao , F. , Deng , Y. , Wan , S. , Shen , S.Q. , Wang , B. , Dai , Q. , Altschuler , S.J. , Wu , L.F .: Integrative spatial analysis of cell morphologies and transcriptional states with muse . Nature biotechnology 40 ( 8 ), 1200 – 1209 ( 2022 ) OpenUrl CrossRef [12]. ↵ Pham , D. , Tan , X. , Xu , J. , Grice , L.F. , Lam , P.Y. , Raghubar , A. , Vukovic , J. , Ruitenberg , M.J. , Nguyen , Q .: stlearn: integrating spatial location, tissue morphology and gene expression to find cell types, cell-cell interactions and spatial trajectories within undissociated tissues . BioRxiv , 2020 – 05 ( 2020 ) [13]. ↵ Hu , J. , Li , X. , Coleman , K. , Schroeder , A. , Ma , N. , Irwin , D.J. , Lee , E.B. , Shinohara , R.T. , Li , M. : Spagcn: Integrating gene expression, spatial location and histology to identify spatial domains and spatially variable genes by graph convolutional network . Nature methods 18 ( 11 ), 1342 – 1351 ( 2021 ) OpenUrl [14]. Dong , K. , Zhang , S .: Deciphering spatial domains from spatially resolved transcriptomics with an adaptive graph attention auto-encoder . Nature communications 13 ( 1 ), 1739 ( 2022 ) OpenUrl [15]. ↵ Long , Y. , Ang , K.S. , Li , M. , Chong , K.L.K. , Sethi , R. , Zhong , C. , Xu , H. , Ong , Z. , Sachaphibulkij , K. , Chen , A. , et al : Spatially informed clustering, integration, and deconvolution of spatial transcriptomics with graphst . Nature Communications 14 ( 1 ), 1155 ( 2023 ) OpenUrl [16]. ↵ Eldar , A. , Elowitz , M.B .: Functional roles for noise in genetic circuits . Nature 467 ( 7312 ), 167 – 173 ( 2010 ) OpenUrl CrossRef PubMed Web of Science [17]. ↵ Kharchenko , P.V. , Silberstein , L. , Scadden , D.T .: Bayesian approach to single-cell differential expression analysis . Nature methods 11 ( 7 ), 740 – 742 ( 2014 ) OpenUrl [18]. ↵ Buettner , F. , Natarajan , K.N. , Casale , F.P. , Proserpio , V. , Scialdone , A. , Theis , F.J. , Teichmann , S.A. , Marioni , J.C. , Stegle , O .: Computational analysis of cell-to-cell heterogeneity in single-cell rna-sequencing data reveals hidden subpopulations of cells . Nature biotechnology 33 ( 2 ), 155 – 160 ( 2015 ) OpenUrl CrossRef PubMed [19]. ↵ Dai , H. , Li , H. , Tian , T. , Huang , X. , Wang , L. , Zhu , J. , Song , L. : Adversarial attack on graph structured data . In: International Conference on Machine Learning , pp. 1115 – 1124 ( 2018 ). PMLR [20]. ↵ Zügner , D. , Akbarnejad , A. , Günnemann , S. : Adversarial attacks on neural networks for graph data . In: Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining , pp. 2847 – 2856 ( 2018 ) [21]. ↵ Maynard , K.R. , Collado-Torres , L. , Weber , L.M. , Uytingco , C. , Barry , B.K. , Williams , S.R. , Catallini , J.L. , Tran , M.N. , Besich , Z. , Tippani , M. , et al : Transcriptome-scale spatial gene expression in the human dorsolateral prefrontal cortex . Nature neuroscience 24 ( 3 ), 425 – 436 ( 2021 ) OpenUrl CrossRef PubMed [22]. ↵ Wu , S.Z. , Al-Eryani , G. , Roden , D.L. , Junankar , S. , Harvey , K. , Andersson , A. , Thennavan , A. , Wang , C. , Torpy , J.R. , Bartonicek , N. , et al : A single-cell and spatially resolved atlas of human breast cancers . Nature genetics 53 ( 9 ), 1334 – 1347 ( 2021 ) OpenUrl CrossRef PubMed [23]. ↵ Lun , A.T. , McCarthy , D.J. , Marioni , J.C .: A step-by-step workflow for low-level analysis of single-cell rna-seq data with bioconductor . F1000Research 5 ( 2016 ) [24]. ↵ Deng , J. , Dong , W. , Socher , R. , Li , L.-J. , Li , K. , Fei-Fei , L. : Imagenet: A largescale hierarchical image database . In: 2009 IEEE Conference on Computer Vision and Pattern Recognition , pp. 248 – 255 ( 2009 ). Ieee [25]. ↵ Szegedy , C. , Vanhoucke , V. , Ioffe , S. , Shlens , J. , Wojna , Z. : Rethinking the inception architecture for computer vision . In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition , pp. 2818 – 2826 ( 2016 ) [26]. ↵ Pramerdorfer , C. , Kampel , M .: Facial expression recognition using convolutional neural networks: state of the art . arXiv preprint arXiv : 1612.02903 ( 2016 ) [27]. ↵ Voulodimos , A. , Doulamis , N. , Doulamis , A. , Protopapadakis , E. , et al : Deep learning for computer vision: A brief review . Computational intelligence and neuroscience 2018 ( 2018 ) [28]. ↵ Irmisch , A. , Bonilla , X. , Chevrier , S. , Lehmann , K.-V. , Singer , F. , Toussaint , N.C. , Esposito , C. , Mena , J. , Milani , E.S. , Casanova , R. , et al : The tumor profiler study: integrated, multi-omic, functional tumor profiling for clinical decision support . Cancer Cell 39 ( 3 ), 288 – 293 ( 2021 ) OpenUrl [29]. ↵ Badia-i-Mompel , P. , Santiago , J.V. , Braunger , J. , Geiss , C. , Dimitrov , D. , Müller-Dott , S. , Taus , P. , Dugourd , A. , Holland , C.H. , Flores , R.O.R. , Saez-Rodriguez , J .: decoupler: ensemble of computational methods to infer biological activities from omics data . Bioinformatics Advances ( 2022 ) doi: 10.1093/ bioadv/vbac016 OpenUrl Abstract / FREE Full Text [30]. ↵ McInnes , L. , Healy , J. , Melville , J. : Umap: Uniform manifold approximation and projection for dimension reduction . arXiv preprint arXiv : 1802.03426 ( 2018 ) [31]. ↵ Manetti , M. : Molecular morphology and function of stromal cells . MDPI ( 2021 ) [32]. ↵ Van Der Meer , W. , Van Gelder , W. , Keijzer , R. , Willems , H .: The divergent morphological classification of variant lymphocytes in blood smears . Journal of clinical pathology 60 ( 7 ), 838 – 839 ( 2007 ) OpenUrl FREE Full Text [33]. ↵ Ghanem , G. , Fabrice , J .: Tyrosinase related protein 1 (tyrp1/gp75) in human cutaneous melanoma . Molecular oncology 5 ( 2 ), 150 – 155 ( 2011 ) OpenUrl [34]. Qiu , C. , Li , P. , Bi , J. , Wu , Q. , Lu , L. , Qian , G. , Jia , R. , Jia , R .: Differential expression of tyrp1 in adult human retinal pigment epithelium and uveal melanoma cells . Oncology Letters 11 ( 4 ), 2379 – 2383 ( 2016 ) OpenUrl [35]. ↵ Journe , F. , Boufker , H.I. , Van Kempen , L. , Galibert , M.-D. , Wiedig , M. , Salès , F. , Theunis , A. , Nonclercq , D. , Frau , A. , Laurent , G. , et al : Tyrp1 mrna expression in melanoma metastases correlates with clinical outcome . British journal of cancer 105 ( 11 ), 1726 – 1732 ( 2011 ) OpenUrl CrossRef PubMed [36]. ↵ Jayachandran , A. , Lo , P.-H. , Chueh , A.C. , Prithviraj , P. , Molania , R. , Davalos-Salas , M. , Anaka , M. , Walkiewicz , M. , Cebon , J. , Behren , A .: Transketolase-like 1 ectopic expression is associated with dna hypomethylation and induces the warburg effect in melanoma cells . BMC cancer 16 , 1 – 15 ( 2016 ) OpenUrl CrossRef PubMed [37]. ↵ Inamdar , G.S. , Madhunapantula , S.V. , Robertson , G.P .: Targeting the mapk pathway in melanoma: why some approaches succeed and other fail . Biochemical pharmacology 80 ( 5 ), 624 – 637 ( 2010 ) OpenUrl CrossRef PubMed [38]. Amaral , T. , Sinnberg , T. , Meier , F. , Krepler , C. , Levesque , M. , Niessner , H. , Garbe , C .: The mitogen-activated protein kinase pathway in melanoma part i–activation and primary resistance mechanisms to braf inhibition . European journal of cancer 73 , 85 – 92 ( 2017 ) OpenUrl [39]. ↵ Shain , A.H. , Yeh , I. , Kovalyshyn , I. , Sriharan , A. , Talevich , E. , Gagnon , A. , Dummer , R. , North , J. , Pincus , L. , Ruben , B. , et al : The genetic evolution of melanoma from precursor lesions . New England Journal of Medicine 373 ( 20 ), 1926 – 1936 ( 2015 ) OpenUrl CrossRef PubMed [40]. ↵ Fan , Z. , Chen , R. , Chen , X .: Spatialdb: a database for spatially resolved transcriptomes . Nucleic acids research 48 ( D1 ), 233 – 237 ( 2020 ) OpenUrl [41]. Xu , Z. , Wang , W. , Yang , T. , Chen , J. , Huang , Y. , Gould , J. , Du , W. , Yang , F. , Li , L. , Lai , T. , et al : Stomicsdb: a database of spatial transcriptomic data . bioRxiv , 2022 – 03 ( 2022 ) [42]. ↵ Zheng , Y. , Chen , Y. , Ding , X. , Wong , K.H. , Cheung , E .: Aquila: a spatial omics database and analysis platform . Nucleic Acids Research 51 ( D1 ), 827 – 834 ( 2023 ) OpenUrl [43]. ↵ Wolf , F.A. , Angerer , P. , Theis , F.J .: Scanpy: large-scale single-cell gene expression data analysis . Genome biology 19 , 1 – 5 ( 2018 ) OpenUrl CrossRef PubMed [44]. ↵ LeCun , Y. , Boser , B. , Denker , J.S. , Henderson , D. , Howard , R.E. , Hubbard , W. , Jackel , L.D .: Backpropagation applied to handwritten zip code recognition . Neural computation 1 ( 4 ), 541 – 551 ( 1989 ) OpenUrl CrossRef [45]. ↵ Sohn , K .: Improved deep metric learning with multi-class n-pair loss objective . Advances in neural information processing systems 29 ( 2016 ) [46]. ↵ Kingma , D.P. , Ba , J. : Adam: A method for stochastic optimization . arXiv preprint arXiv : 1412.6980 ( 2014 ) [47]. ↵ Schubert , M. , Klinger , B. , Klünemann , M. , Sieber , A. , Uhlitz , F. , Sauer , S. , Garnett , M.J. , Blüthgen , N. , Saez-Rodriguez , J .: Perturbation-response genes reveal signaling footprints in cancer gene expression . Nature communications 9 ( 1 ), 20 ( 2018 ) OpenUrl View the discussion thread. Back to top Previous Next Posted June 04, 2024. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Representation learning for multi-modal spatially resolved transcriptomics data Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Representation learning for multi-modal spatially resolved transcriptomics data Kalin Nonchev , Sonali Andani , Joanna Ficek-Pascual , Marta Nowak , Bettina Sobottka , Tumor Profiler Consortium , Viktor H Koelzer , Gunnar Rätsch medRxiv 2024.06.04.24308256; doi: https://doi.org/10.1101/2024.06.04.24308256 Share This Article: Copy Citation Tools Representation learning for multi-modal spatially resolved transcriptomics data Kalin Nonchev , Sonali Andani , Joanna Ficek-Pascual , Marta Nowak , Bettina Sobottka , Tumor Profiler Consortium , Viktor H Koelzer , Gunnar Rätsch medRxiv 2024.06.04.24308256; doi: https://doi.org/10.1101/2024.06.04.24308256 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (574) Allergy and Immunology (865) Anesthesia (304) Cardiovascular Medicine (4462) Dentistry and Oral Medicine (445) Dermatology (383) Emergency Medicine (611) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1517) Epidemiology (15251) Forensic Medicine (31) Gastroenterology (1132) Genetic and Genomic Medicine (6621) Geriatric Medicine (669) Health Economics (1002) Health Informatics (4564) Health Policy (1372) Health Systems and Quality Improvement (1617) Hematology (544) HIV/AIDS (1272) Infectious Diseases (except HIV/AIDS) (15938) Intensive Care and Critical Care Medicine (1107) Medical Education (624) Medical Ethics (147) Nephrology (670) Neurology (6643) Nursing (346) Nutrition (1001) Obstetrics and Gynecology (1149) Occupational and Environmental Health (957) Oncology (3350) Ophthalmology (981) Orthopedics (369) Otolaryngology (421) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1698) Pharmacology and Therapeutics (694) Primary Care Research (714) Psychiatry and Clinical Psychology (5465) Public and Global Health (9259) Radiology and Imaging (2212) Rehabilitation Medicine and Physical Therapy (1372) Respiratory Medicine (1198) Rheumatology (598) Sexual and Reproductive Health (716) Sports Medicine (533) Surgery (715) Toxicology (100) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a03c83b8be70c13d',t:'MTc4MDEzMTI3MA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (sparse)

Too few in-corpus citations on either side for a chart; here are the lists.

Cited by (1)

Towards Cross-Sample Alignment for Multi-Modal Representation Learning in Spatial Transcriptomics 2026

Cited by (1)

Towards Cross-Sample Alignment for Multi-Modal Representation Learning in Spatial Transcriptomics 2026

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-21T05:10:58.409756+00:00

License: CC-BY-NC-ND-4.0