Full text
73,149 characters
· extracted from
preprint-html
· click to expand
Spatially varying cell-specific gene regulation network inference | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Spatially varying cell-specific gene regulation network inference View ORCID Profile Yurui Li , View ORCID Profile Jin Chen , View ORCID Profile Haohan Wang doi: https://doi.org/10.1101/2025.10.17.683188 Yurui Li 1 School of Information Science, University of Illinois at Urbana-Champaign , Champaign, IL 61820, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yurui Li For correspondence: yuruil2{at}illinois.edu haohanw{at}illinois.edu Jin Chen 2 Department of Inflammation and Immunity, Lerner Research Institute , Cleveland Clinic, Cleveland, OH 44195, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jin Chen Haohan Wang 1 School of Information Science, University of Illinois at Urbana-Champaign , Champaign, IL 61820, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Haohan Wang For correspondence: yuruil2{at}illinois.edu haohanw{at}illinois.edu Abstract Full Text Info/History Metrics Preview PDF Abstract Gene regulatory networks (GRNs), involving interactions between large numbers of genes, govern expression levels of mRNA and their resulting proteins to control cell functions. As many new sequencing technologies in single-cell and spatial resolutions are raised, the construction of GRNs gains opportunities to be generalized to the cell-specific level. Here we propose SVGRN, a deep learning model to infer cell-specific GRNs using spatial transcriptomics data. We model the gene expression, GRN matrix, and spatial coordinates of cells in the structural equation model (SEM) to learn gene interactions in an unsupervised way. Conditioned on the target cell position, the model is able to tune the whole tissue GRN to the target cell through also borrowing information from neighborhood cells. Results on simulated datasets show that SVGRN achieves better performance in cases with more noises, genes, or cells, which illustrates its ability to solve more complex situations. Our model was further applied to spatial transcriptomics datasets generated using different technologies and resolutions, including a seqFISH-based mouse embryo dataset and two Visium-based datasets from human cutaneous squamous cell carcinoma (cSCC) and fallopian tube tissues. The GRNs predicted by SVGRN from these datasets reveal dynamic gene regulatory patterns in mouse organogenesis, tumor development and the functional organization of the fallopian tube, highlighting the efficiency and broad applicability of our model. 1. Introduction The collaboration between chromatin, transcription factors and genes generates complex regulatory circuits that can be modeled as gene regulatory networks (GRNs) [ 51 ]. Unraveling these interactions plays a critical role in understanding the underlying regulatory crosstalk that drives many cellular processes and diseases [ 36 ]. Traditional GRN inference methods rely on bulk transcriptomics data [ 49 , 40 ] or experimentally validated regulatory events. However, bulk profiling obscures regulatory programs specific to individual cell types or states, as it aggregates data from diverse cells within a tissue sample. This limitation has been overcome by single-cell multi-omics and spatial transcriptomics technologies, which enable GRN inference across distinct cell types, differentiation paths, and conditions. The advent of these technologies has led to the development of novel computational methods that infer GRNs at an unprecedented resolution. The current computational methods employing diverse statistical or machine learning methods aim to reconstruct more comprehensive and precise gene regulatory networks. Many methods are developed to infer common GRNs of all cells or a specific subpopulation of cells through utilizing single-cell multi-omics datasets [ 72 , 29 ] or time series data [ 25 , 76 ]. There are only a few methods that can extend this inference to cell-specific GRNs [ 74 , 73 ]. In CeSpGRN [ 74 ], it models the gene expression data using Gaussian Copula Graphical Model (GCGM) [ 43 , 67 ] to conduct cell-specific inference from single-cell multi-omics and spatial data. LocaTE [ 73 ] utilizes an information theoretic approach that leverages single cell dynamical information together with the geometry of the cell state manifold to infer cell-specific GRNs. Although these methods enable people to understand GRNs on a finer-grained level, they often need additional measurements besides single cell gene expression such as scATAC-seq and RNA velocity, which adds more difficulties to the inference. These measurements may also introduce additional noise as they may come from different experiments. Therefore, in this paper, we propose a deep learning method, SVGRN, solely based on spatially resolved gene expression data to infer varying cell-specific GRNs along the space without other measurements or prior knowledge. Relying on the decisive role of cell coordinates to its GRN and utilizing useful information from neighborhood cells, we are able to refine the GRN of the whole tissue to the specific target cell by borrowing information from its spatial position and neighboring cells. We implemented the modeling of gene expression and cells’ layout through conditional variational autoencoder (CVAE), and to the best of our knowledge, SVGRN is the first method based on a deep learning model to construct cell-specific GRNs using spatial transcriptomic data. The introducing of deep learning allows modeling spatially varying GRNs in a more comprehensive nonlinear framework and benefits from its computational power. Compared with CeSpGRN on several simulated datasets with various settings, SVGRN demonstrates advantages on more complex inference situations and maintains affordable running time on a larger number of genes. We applied our model to a cell-based spatial transcriptomics dataset of mouse embryos, demonstrating its ability to capture regional gene interactions during early organogenesis. Additionally, the predicted GRNs from the spot-based cSCC and fallopian tube dataset align with existing studies and reveal the functional tissue niches in space, further highlighting the broad applicability of SVGRN and its stable performance across different spatial transcriptomics technologies. 2. Methods 2.1 Gene interaction modeling with cell spatial coordinates The underlying framework of SVGRN is derived from structural equation modeling (SEM), which is inspired from DeepSEM [ 62 ] using SEM for GRN modeling. SEM is a multivariate, hypothesis-driven technique that is based on a structural model representing a hypothesis about the causal relations among several variables [ 64 ]. In the context of GRN inference from spatial transcriptomics, these variables are the genes with available expression data on which we want to explore their interaction relationship, and the hypothetical causal relations are based on the regulation mechanism between genes. The structural model of SEM formulates each gene’s expression value as a function of the expression levels of other genes, enabling to capture the dependencies between genes in GRNs showing which genes influence others and to what extent. In our case of GRN inference problem, let X ∈ ℝ n×m be the gene expression matrix with n cells and m genes and W ∈ ℝ m×m be the GRN adjacency matrix that represents the dependencies among different genes. The relationship between X and W can be formalized following the basic linear SEM [ 62 ] as where Z ∈ ℝ n×m is a matrix following Gaussian distribution representing the noise introduced during the transcription process. It helps to capture the unobserved variability or measurement errors in the spatial transcriptomics data, providing flexibility and robustness to the model. The equation (1) can be modified to While linear SEM can provide basic gene dependencies, the actual gene regulation mechanisms in biological data are often more complicated. In transcriptomics data, the gene interactions usually involve diverse combinational and context-dependent factors such as external environmental response and cell-cell signaling, which makes the linear model challenging to capture this complexity. Therefore, we transform equation (2) and (3) to a nonlinear version of the SEM following DAG-GNN [ 71 ] to fully utilize the computational ability of deep learning where f 1 and f 2 are parameterized functions performing nonlinear transforms on Z ( I − W T ) − 1 and X , respectively. When considering cells in spatially organized environments, their behavior is significantly influenced by spatial context factors. Depending on where a cell is located, specific genes will be turned on or off due to signals received from neighboring cells, effectively dictating its development and function based on its spatial context [ 57 ]. In the developing embryo, positions of cells within the concentration gradients of small diffusible molecules called morphogens can also trigger different gene regulation patterns [ 33 ]. Spatial transcriptomics, capturing transcriptional activity at distinct spatial locations, provides opportunities for us to include the positional context effects in our model. By incorporating spatial coordinates, the model can take into account how a cell’s location affects its gene expression, allowing it to capture microenvironmental effects that shape cellular function and differentiation. Accordingly, based on equation (4) and (5) , we further bring Y ∈ ℝ n× 2 , the spatial coordinates of cells, into the model. The updated equations will become where f 1,1 , f 1,2 , f 2,1 , f 2,2 are also nonlinear parameterized functions and implemented as multilayer neural networks in Section 2.2 . Including Y in the functions f 1,2 and f 2,2 enables the model to learn the nonlinear dependencies between gene expression and the spatial organization of cells, while simultaneously capturing gene regulatory relationships. This design preserves the general symmetry between these two equations, providing a structured and balanced representation of spatial effects. Furthermore, this symmetry aligns with the architecture of the conditional variational autoencoder, making it well-suited for implementation within this framework, as we will further elaborate in Section 2.2 . Involving coordinates in the SEM establishes a bridge between gene expression and spatial location information. We can not only capture the gene-gene dependency from the gene expression data, but also consider the contribution to GRN from the spatial layout of cells. This provides the opportunity to extend the GRN on the whole tissue to single cells. 2.2 SVGRN framework based on CVAE To integrate both equations (6) and (7), we use a conditional variational autoencoder (CVAE) [ 63 ] to model the relationship among gene expression matrix, GRN matrix, and cell coordinates. The CVAE structure consists of two components, a encoder compressing the input data into a latent representation and the decoder reconstructing the original data from this latent space, conditioned on additional input information. The encoding and decoding processes can be respectively aligned to the modeling in equations (7) and (6), and they are implemented using neural network structures to apprehend the non-linear relationships in data. Given the gene expression X and coordinates Y of cells from spatial transcriptomics, the encoder is trained to learn the distribution q ϕ ( Z | X, Y ), parameterized by ϕ , which maps them into the latent space as Z . This enables capturing how spatial information and gene expression are interrelated, resulting in a compressed representation of this complexity. Functioning in the opposite way, the decoder p θ ( X | Z, Y ), parameterized by θ , reconstructs the gene expression X based on the latent representation Z and the spatial coordinates Y . It models how gene expression varies as a function of both the regulatory signals encoded in the latent space and the spatial locations of cells, effectively implementing the SEM model in a non-linear manner. For the GRN matrix W in the equations, it will serve as part of the parameters in both encoder and decoder, and be learned through training. The more detailed neural network structure of SVGRN is shown in Fig. 1 , which also follows the functions in our SEM equations. corresponding to f 2,1 ( · ) in equation (7) is the neural network to encode gene expression, and we additional add for cell coordinate encoding to better capture their spatial patterns. The gene expression encoder is followed by a GRN layer to directly capture gene-gene dependency from gene expression embeddings. E 2 , implementing f 2,2 ( · ), combines expression embeddings with coordinate features to further project them to latent space. With a symmetrical structure, f 1,1 ( · ) and f 1,2 ( · ) are respectively implemented as neural network decoders D 1 and D 2 . D 2 decodes the features of latent variables and coordinate features, followed by a reverse GRN layer to remove the gene dependency. Then D 1 is used to finally recover the input gene expression. To let GRN layers fully comprehend interactions between genes, we avoid involving other combinations across genes in the rest of neural network structures. Therefore, each encoder or decoder shares the same weight across all the genes and only conducts calculations on each single gene. Download figure Open in new tab Fig. 1. The SVGRN model architecture. The model takes the gene expression and its corresponding spatial coordinate for each spot as input from spatial transcriptomics data. , and E 2 encode input data into latent space, and D 1 and D 2 are decoders to reconstruct the gene expression data. The GRN layer and inverse GRN layer learn through training to capture gene interactions. Given the dataset where x k = X k ,: and y k = Y k ,: , the goal of CVAE is to fit a model of the conditional probability distribution p ( x | y ). It learns to recover the gene expression of each cell conditioned on its spatial coordinate and therefore, relates cell-specific gene expression patterns with their specific places in the cell layout. The loss function of SVGRN extends the loss function for CVAE with an additional L 1 norm to regularize the adjacent matrix W , which is defined as follows where E is the expected value function, KL denotes to KL-divergence function, and α and β are hyperparameters. Applying L 1 regularization encourages sparsity in the learned GRN matrix W , reflecting the reality that only a subset of genes directly regulate each other in gene regulatory networks. 2.3 SVGRN training for cell-specific GRNs The training of our SVGRN model can be devided into two stages, which gradually refine the learning from all cells in the tissue to a specific target cell. The pseudocode of this two-stage training is illustrated in Algorithm 1. Stage 1 training establishes a general GRN by using gene expression and spatial coordinates of all cells, providing a foundational network that captures the tissue’s overall regulatory patterns. Stage 2 then refines this network by conditioning on the target cell’s position and leveraging information from neighboring cells to learn a GRN specific to the target cell’s microenvironment. This two-stage approach establishes a biologically meaningful hierarchy by first capturing tissue-wide regulatory signals and then focusing on cell-specific nuances driven by local interactions, enabling precise inference of spatially resolved GRNs at the single-cell level. The following paragraphs will provide further details on each training stage. In many cases, we only have access to gene expression data and spatial information, with limited prior knowledge of TF-gene interactions. Obtaining such prior knowledge often requires significant additional effort and may not always be applicable, as TF-gene interactions can vary depending on the tissue type, developmental stage, section position, and individual regulatory differences. To address these challenges, the first training stage is designed to avoid reliance on prior interaction data and instead learn a general GRN specific to the tissue being studied. Additionally, this stage incorporates spatial information to capture the relationship between gene expression and cellular coordinates. This allows the trained model to serve as a foundation for the second stage, where it can be fine-tuned to target specific cells by conditioning on the target coordinate. In the first training stage, the training dataset is composed of pairs of gene expression and corresponding coordinates for each cell k . The loss function follows the same form of equation (8) and can be further written as follows By training on all cells equally at this stage, the model learns a general distribution of gene expression of all cells given their spatial positions. In its GRN layers, the model starts with a randomly initialized GRN matrix and obtains an overall GRN representation on the entire tissue. It can serve as a good start for the following learning for each single cell. In the second training stage, we further specialize our model to the single-cell level. Since in the previous stage, the model has already learned stable encoders and decoders, we only update the parameters of GRN layers and freeze all other layers in this stage. Here, we adjust training strategies based on two assumptions about the relationship between a cell’s spatial information and its gene expression profile. Firstly, we assume that the position of a cell can serve as a unique identifier or label for its GRN after the training in the first stage where the model learns the relationship between gene expression and its specific position across the dataset. By conditioning the model on these spatial positions, it captures the spatially-dependent variations in GRNs, allowing the model to further infer cell-specific GRNs of the target cells effectively based on the cell’s location. According to this assumption, when focusing on learning the GRN for a target cell t , we fix all the positions in the dataset as the position of this target cell. The training dataset of Stage 2 for target cell t will become . By doing so, the model is conditioned entirely on the target cell’s spatial context, enabling it to learn GRNs specific to the unique microenvironment of that cell. As we are training to model the distribution p ( x | y t ), the parameterized GRN matrix will be updated towards the GRN of cell t in the fixed position y t which the whole model is conditioned on. This approach allows the model to focus on regulatory relationships specific to the target cell’s location, refining the general GRN from Stage 1 into a cell-specific network. Secondly, we assume that the GRNs in the closer cells will tend to be similar, while cells that have a larger distance will have more differences in their GRNs, which is especially apparent in developing tissues. Through this assumption, when training the model to get the GRN for a target cell, we can not only use the single data for this target cell, but also be able to borrow information from its neighboring cells, as they could in some content provide some reference to the cell we are studying. In developing tissues such as embryos, GRNs exhibit smooth transitions across space due to the coordinated tissue development and gradual patterning along body axes. During embryogenesis, cell differentiation exhibit spatial continuity as they progressively transition from progenitors to specialized cell types [ 7 , 5 ]. This progression is often governed by spatially distributed gradients of morphogens, which gradually influence gene regulation patterns and expression throughout the tissue [ 6 ]. Consequently, cells that are spatially close, even if they belong to different types, often share similar GRNs. When applied to fully developed tissues, although cells in mature tissues are more specialized, there are still some information that we can utilize from neighborhood cells to infer the target cell’s GRN as spatially close cells remain influenced by their shared microenvironment. Factors such as paracrine signaling, cell-cell interactions, and extracellular matrix-mediated communication contribute to maintaining localized regulatory similarities. While GRNs in mature tissues are generally less continuous than in developing tissues, spatial influences can still impose a degree of similarity in regulatory networks for cells in close proximity. This contextual information can be utilized to enhance the inference of a target cell’s GRN in mature tissues. To implement this assumption, we introduce a kernel weight K kt to the loss function, which shows how much information we can utilize from a cell k when focusing on a target cell t according to their similarity in spatial location. The kernel weight K kt is calculated following the radial basis function kernel: where σ is the hyper-parameter accounting for the range of neighboring cells that the target cell will refer to. The closer cells to the target cell t will have larger weights in loss values, while the weights of further cells will be much smaller. In this way, the neighborhood cells can have more contributions to model training for this target cell. By gathering the two assumptions the training stage 2 is based on, the loss function for this stage is defined as Algorithm 1 SVGRN Two-Stage Training for Cell-Specifc GRNs Download figure Open in new tab 3 Results & Discussion 3.1 Experiments on simulation datasets Due to the lack of datasets with known cell-specific GRNs, we evaluated our model’s GRN inference capabilities using simulated datasets generated by scMultiSim [ 41 ]. scMultiSim enables the simulation of scRNA-seq data given dynamic GRN across cells and the spatial organization of cells. To approximate real-world heterogeneous spatial transcriptomics data, we conducted a series of experiments, varying critical parameters such as gene numbers, cell numbers, and intrinsic noise levels, which are all crucial factors impacting GRN inference accuracy. In scMultiSim, the parameter intrinsic . noise adjusts the noise in the transcription process, where higher noise levels increase the difficulty of accurately inferring GRNs [ 41 ]. To benchmark our model’s performance in GRN inference, we adopted two metrics from the BEELINE framework: the early precision ratio (EPR) and the area under the precision–recall curve ratio (AUPRC ratio) [ 59 ]. These metrics provide insight into the accuracy of GRN constructions and indicate how well a model performs in predicting edges between genes. The early precision value measures the fraction of true positives among the top k predicted edges, with k set to the number of edges in the actual network. EPR represents the ratio of early precision value between the model and the random predictions, and AUPRC ratio is the ratio of the AUPRC between those. SVGRN captures dynamic GRNs through cell-specific training In Fig. 2 , we show the visualization of cell layout in simulated dataset with 2000 cells, 110 genes and 0.1 intrinsic noise. Given an initial GRN matrix, scMultiSim can randomly pick pairs of genes to add new interactions, enlarge interaction strength, remove existing interactions, or weaken interaction strength to generate cell-specific GRNs cell by cell. To create a spatial layout where cells with similar GRNs are positioned close to each other, we used the “layers” parameter in scMultiSim for dataset simulation. This parameter controls placing cells one by one on a spatial grid, with new added cells more likely to be placed near existing cells with similar GRNs, forming a layered structure of GRN clusters. In Fig. 2 (a), we clustered cells by their GRNs and it demonstrates that the cell-specific GRNs follow a layered changing pattern across the space, which meets with our assumptions about closer cells having more similar GRNs. Download figure Open in new tab Fig. 2. (a) The GRNs in all cells are clustered into 10 classes and the cluster ID for each cell is shown in their spatial layout. (b) For each cell, the difference between the initial EPR and that after the cell-specific training is calculated and shown as a heatmap. With generated single-cell GRNs by scMultiSim, we calculated the EPR of each cell before and after the second training stage for cell-specific GRN refinement. The spatial distribution of EPR improvement values is shown in Fig. 2 (b) . Results demonstrate that starting from a general whole-tissue GRN, our model effectively fine-tunes GRNs for each individual cell. Most cells show EPR improvements, validating the effectiveness of the cell-specific training stage. The pattern of EPR changes also aligns with the layered GRN distribution. The improvement values are similar in each layer shape and follow the real GRN cluster distributed pattern from left to right in the cell layout. It means that the model trained in the second stage can capture this layered distribution of cell-specific GRNs since GRNs in each layered cluster are similar and so that will have similar improvement when model is trained started from the same whole-tissue GRN to learn these cell-specific GRNs in all cells. This reflects the model’s ability to leverage spatial layout and nearby information, which enables the separate training on each cell to capture GRN variation across cells, leading to dynamic results in the space. SVGRN shows advantages in more complex settings We compared our model’s performance to CeSpGRN, a statistical method for inferring cell-specific GRNs from scRNA-seq data. Although LocaTE is also capable of inferring single-cell GRNs, we did not compare our model with it because it requires dynamic single-cell profiles, which is not well-suited to the context of our study. The average metrics across all cells are shown in Fig. 3 , demonstrating that our model generally outperforms CeSpGRN, especially in cases with higher noise levels or larger numbers of genes and cells. Download figure Open in new tab Fig. 3. Result comparison of EPR and AUPRC ratio between SVGRN and CeSpGRN on simulation datasets with various cell numbers, gene numbers and noise levels. “-” represents unavailable results within affordable time spending. Real biological datasets from single-cell sequencing often contain substantial noise due to technical factors such as low RNA capture rates [ 34 ], limited sequencing depth [ 4 ], and batch effects [ 58 ], along with biological variability across cell cycle phases [ 35 ]. Spatial transcriptomics introduces additional noise from extra steps that preserve spatial information during sequencing [ 68 ], such as unpredictable spatial noise from cell damage during cryosectioning and exposure to reagents for staining and mRNA release [ 15 ]. This noise can obscure subtle, meaningful interactions or amplify irrelevant signals, complicating accurate GRN construction. Thus, it is essential for our model to remain precise and reliable in noisy conditions. To assess this capability, we tested our model on simulated datasets with noise levels of 0.8, and 1.0. At a noise level of 0.8, our model achieved similar results to CeSpGRN, while at a noise level of 1.0, it demonstrated a clear advantage in both EPR and AUPRC ratios. These findings highlight SVGRN’s ability to infer underlying GRN patterns even in highly noisy data. Moreover, in practice, tissues are composed of a vast number of diverse cells, contributing to complex tissue organization. A model that performs well with larger numbers of cells can better capture these complexities to deepen our understanding of cellular behavior. To evaluate this, we tested our model on datasets with increasing cell counts. For datasets with 4000 cells, our model showed higher EPR. It maintained stable performance as the cell number grew and got both higher EPR and AUPRC ratio in 6000 cells. We also examined performance on datasets with increasing gene counts, which introduce more potential edges and a more complex interaction environment. As shown in Fig. 4 , SVGRN, leveraging a deep learning framework, demonstrates superior computational efficiency. Using four GPUs for training, SVGRN’s runtime remains stable, while CeSpGRN’s runtime for datasets with 510 genes exceeded 48 hours, growing rapidly with he number of genes. We compared EPR and AUPRC ratios for both methods on datasets with 210, 310, 410, 510, and 1020 genes ( Fig. 3 ). As it is hard to get the results of CeSpGRN on 510 and 1020 genes within affordable time spending, these results are marked as “-” in the figure. On datasets with 210, 310, 410 genes, our model achieved obvious higher EPR than CeSpGRN and still maintained stable results even with 1020 genes. This demonstrates SVGRN’s robustness in inferring GRNs when facing increasing cellular diversity and complex gene interactions. Download figure Open in new tab Fig. 4. Running time comparison between SVGRN and CeSpGRN 3.2 Experiments on the cell-based spatial transcriptomics dataset of mouse embryos Unlike simulated data, the biological environment is inherently complex, making it challenging to obtain true cell-specific GRNs through experiments. Using spatial transcriptomics data, the predicted cell-specific GRNs from our model provide a unique view of cell development within spatial contexts. Although some spatial transcriptomics technologies such as Visium [ 50 ] can preserve spatial locations of transcripts using NGS barcoding techniques, they typically measure transcriptomes in barcoded spots of 55μm in diameter, each containing about 1-10 cells, rather than achieving single-cell spatial resolution [ 52 ]. Some multiplexed single-molecule FISH (smFISH) technologies, such as seqFISH [ 46 , 45 ] and multiplexed error-robust FISH (MERFISH) [ 12 ], and in situ sequencing (ISS) methods [ 32 ] offer single-cell and single-molecule resolution. Compared to spot-based approaches with equidistant spatial data points, cell-based methods can better reflect true cellular distributions within tissues. This provides an opportunity to infer cell-specific GRNs in individual cells and incorporate more precise influences from neighboring cells based on actual spatial distances. In this section, we applied our model to a seqFISH-based spatial transcriptomics dataset [ 44 ] of mouse embryos at the 8–12 somite stage to infer GRNs. Early-stage embryonic tissues, such as those in this dataset, exhibit progressive sharpening of gene expression profiles as cells differentiate into specific identities [ 47 ]. Intermediate cellular states result in smooth transitions of gene expression and GRNs across spatial regions and cell types, aligning with our model’s assumption that closer cells share similar GRNs under the weight kernel in the second training stage. We trained our model on sagittal section slice 2 of embryo 2, containing 6,880 cells and 351 genes from seqFISH. From the inferred cell-specific GRNs, we calculated the variance of each gene pair’s absolute edge weight across all cells to identify the most variable edges. Those edges represent gene interactions that fluctuate most along the space, potentially uncovering underlying regional gene regulation dynamics. From these edges, we can identify genes with variable functions in different embryo regions. Fig. 5 (a) demonstrates the gene appearance frequency in the top 100 most variable interacting gene pairs across cells. The most frequently shown genes include Hoxb9, Hoxd4, En1, Nepn etc., which all proved to have regional functions in mouse embryos [ 23 , 48 , 55 , 54 , 3 , 1 ]. Download figure Open in new tab Fig. 5. (a) The gene appearance frequency in the top 100 edges with highest edge weight variance value accross all the cells. (b) Hoxb9-Hoxb5 edge weight changing pattern in the mouse embryo tissue.(c) Fgf17-Dkk1 edge weight changing pattern in the mouse embryo tissue. In (b)-(c), largest values of X correspond to the tail region, and lowest values of Y correspond to the head region. By analyzing all the inferred single-cell GRNs, we found that the model is able to capture some regional gene interactions functioning for different body segments of early-stage mouse embryos. Hox genes such as Hoxb9, Hoxd4, are key regulators of positional identity along the head-to-tail axis, determining body segment identities by controlling the expression of region-specific genes, thus shaping the vertebrae and limb morphology [ 23 , 48 , 55 ]. Hox genes can directly regulate the expression of other Hox genes within the same cluster or even across different clusters, functioning in overlapping and sequential domains [ 18 , 60 ]. This kind of interaction is particularly important for neural tube development, affecting the development of different neuronal populations and axial skeleton formation [ 2 , 14 ]. The gene interaction weight-changing pattern shown in Fig. 5 (b) illustrates possible interaction between Hoxb genes and their relationship with the neural tube development at the 8–12 somite stage of the embryo. The higher interaction appearing in the head region of the embryo shows the neural tube formation in the head region, where it then starts differentiating into brain regions and progresses toward the tail at this stage of development. Another pair of gene interactions that has a relationship with a specific region in mouse embryos is between Dkk1 and Fgf8 subfamily. Dkk1 may directly modulate the level of Wnt activity, thereby indirectly regulating Fgf expression [ 8 ] or this may suggest that Dkk1 acts over the Fgf pathway in a Wnt-independent fashion. More importantly, these interactions modulate cell movement inside the forming limb bud and Fgf8 activation in response to Dkk1 can be responsible for the observed limb phenotype [ 53 ]. Fig. 5 (c) demonstrates the captured interaction changing between Dkk1 and Fgf17 which belongs to the Fgf8 subfamily. It shows a relatively stronger interaction in the lumbar region which may show the merging of hindlimb buds located posterior to the developing forelimbs and close to the tail. 3.3 Experiments on the spot-based spatial transcriptomics dataset of cSCC Compared to cell-based spatial transcriptomics data, spot-based data leads to a more complex case as each spot can contain multiple cells. Here, we further applied our model to a spot-based ST dataset of human cutaneous squamous cell carcinoma (cSCC) [ 27 ], which provides samples with both tumor and adjacent normal tissue regions. From the results, we demonstrate our model’s capability for spot-specific GRN inference in relatively mature tissue. Although each spot aggregates the expression of multiple cells, these cells are often spatially constrained and belong to related cell types or states. The aggregated transcriptomics data, therefore, reflects the dominant regulatory features of the region, which are also influenced by neighboring spots. It provides opportunities for our model to be used in this case that our model can still learn some useful information from the neighborhood spots and the coordinates of target spots. Our model helps to reveal how regulatory mechanisms gradually evolve from the tumor core to the leading edge and into adjacent normal regions in cancer. This capability is particularly valuable given the common lack of matched normal tissue from the same patient to be compared with tumors, which limits biological comparative experiments at high resolution. By generating GRNs tailored to individual spots in their spatial environment, our model supports the study of tumor development, tumor-local tissue crosstalk, and regulatory shifts that may drive tumorigenesis, offering insights into the spatially dynamic interactions underlying cell behavior and cancer progression. In this dataset, we focused on the skin section from patient 2, which includes 17,139 genes across 1,933 spatial spots. The H&E stained tissue section of this patient is shown in Fig. 6 (d) as a reference of the locations of the tumor and normal regions which comes from this cSCC dataset [ 27 ]. For model training, we selected 666 spots located on the tissue, covering both tumor and normal regions, and used the top 40 marker genes of basal, cycling, differentiating, and tumor-specific keratinocyte (TSK) clusters shared between normal skin and cSCC. Download figure Open in new tab Fig. 6. (a)-(c) Gene expression patterns of genes most frequently appeared in predicted high variable edges. (d) The H&E stained tissue section in this experiment with leading edge of tumor manually annotated (dotted lines) (e)-(f) Edge weight changing patterns of highly variable edges. From the ranked highly variable edges, we found that our model is able to discover genes with spatially distinct functions that play key roles in leading to the discrepancy between tumor and normal regions. The most frequently appeared genes in the top 200 predicted highly variable edges are found to include IGFBP5, PTHLH, and CCL2, which have appearance frequencies of 42, 38, and 20 respectively. These genes all exhibit region-specific expression patterns in the tissue, as shown in Fig. 6 (a)-(c), and are supported by current studies indicating their distinct functional roles in normal skin versus tumor regions. IGFBP5, with higher expression in normal regions, has been shown to suppress tumor growth, while its down-regulation in tumors promotes cancer progression [ 69 , 42 ]. CCL2, a chemokine shown enriched at the tumor’s leading edge, may drive tumor spread and pre-metastatic niche formation by transforming non-neoplastic epithelial cells into invasive ones [ 28 ]. PTHLH is a proteinaceous hormone reported to contribute to the pathogenesis of oral squamous cell carcinoma, which exhibits a higher expression level in cSCC tumors than adjacent normal tissues. Studies indicate that PTHLH stimulates SCC cell growth via an autocrine/paracrine manner [ 9 ] and is upregulated in SCC to control epithelial-mesenchymal interactions [ 11 ]. The apparent spatial expression variations of these genes are controlled by their corresponding dynamic gene regulations, resulting in the frequent occurrence of these genes in the high variable edges of our predicted GRNs. We further analyzed the spatial weight patterns of highly variable edges to examine how gene interaction strengths differ across regions, offering insights into abnormal gene regulation in tumor development. Matrix metalloproteinases (MMPs) are zinc-dependent endopeptidases that degrade extracellular matrix (ECM) components and play critical roles in tumor progression, including growth, invasion, metastasis, angiogenesis, and cell survival. They facilitate proteolytic cleavage of IGFBP5, thereby influencing the impact of IGFBP5 has in cancer [ 69 ]. We explored the spatial distribution of the edge weight between MMP10 and IGFBP5 across all spots. As visualized in Fig. 6 (e), the interaction strength aligns with gene functions, with shifts corresponding to the spatial organization of tumor and normal regions. Another noteworthy edge is between CCL2 and CYR61. Fig. 6 (f) shows a similar pattern, where interaction weights are stronger in tumor areas and gradually weaken in normal skin. Studies have shown that CYR61 interacts with CCL2 to promote localized inflammation in several diseases [ 17 , 10 ]. In SCC, CYR61 acts as a positive growth regulator [ 37 ], while CCL2 recruits macrophages and other immune cells to tumors, promoting tumor growth and proliferation [ 70 , 30 ]. Through CCL2 upregulation in tumor regions, CYR61 enhances immune cell recruitment and is in turn activated by them, creating a positive feedback loop that intensifies inflammation within tumors. These findings illustrate our model’s ability to capture spatially specific, tumor-associated gene interactions across regions. From predicted spot-specific GRNs in real spatial transcriptomics datasets, our model demonstrates its ability to capture genes with region-specific functions that are key to understanding tumor growth and immune response. Additionally, the edge weights in predicted GRNs reveal how gene regulatory relationships shift with cellular position. Such findings highlight the utility of our model in uncovering location-dependent gene interaction mechanisms. This high-resolution GRN construction has important implications for studying development, tumor biology, and microenvironmental impacts on cell behavior, potentially advancing targeted therapies and personalized medicine. 3.4 SVGRN helps identify and understand functional tissue niches through GRNs We applied SVGRN to the Visium fallopian tube sample from the HuBMAP Program [ 26 ], using the predicted spot-specific GRNs to better understand the cell functional identities and their layout in the tissue. This approach directly highlight differential gene regulation relationship accross cells, providing clearer insight into cell activities compared with traditional reliance on only gene expression levels and marker genes for cell type identification. The predicted GRNs for each spot were clustered into five groups using the K-means method, as shown in Fig. 7 (b). The clustering results reveal distinct features of groups that correspond to functional roles within the tissue. Download figure Open in new tab Fig. 7. (a) The histology image of the fallopian tube sample. (b) Spots clustered by the predicted spot-specific GRNs. (c) The top differentially interacted TF-Target pairs in each cluster. In-Out Diff is the difference between the average interaction strength of spots in this cluster and out of this cluster, which are in 10 − 3 units. To characterize the main differential cell activities, we obtained the possible TF-Target regulatory pairs from the TRRUST database [ 19 ], and calculated the average TF–Target interaction strength for each pair inside and outside each cluster. This allowed us to identify the most differentially interacted pairs. As shown in Fig. 7 (c), clusters 0, 2, 3, and 4 display the most differentially highly interacted pairs, while cluster 1 shows no stronger interactions than other clusters. Instead, it is defined by differentially weaker pairs. These top differential TF-Target regulation relationship reveals biological processes for each cluster and helps identify potential cell functions and tissue niches in the fallopian tube. In cluster 0, the top interactions reflect cells near smooth muscle or fibroblasty regions at the epithelial boundary. KLF5 and MYH11 all play important roles in epithelial and smooth muscle cells [ 20 , 16 ], and NR4A1-MMP2 interactions occur in vascular smooth muscle cells [ 61 ]. IGF2 is involved in tissue regeneration in the fallopian tube after ovulation, promoting the transformation of fimbrial epithelial cells [ 22 ]. ASH1L–TIMP1 controls ECM remodeling, and NCAM1 contributes to cell–cell adhesion [ 31 ]. In contrast, cluster 1 shows distinct regulatory patterns. Many pairs that are strongly interacted in other clusters appear at lower strength, suggesting a possible identity of homeostatic epithelium which prioritizes ciliogenesis and beating over inflammatory, stress or ECM programs. Cluster 2 highlights the regional inflammation repair processes. In the top interactions, the frequently appeared RELA regulates the inflammatory environment in the fallopian tube [ 66 ], GADD45 is associated with stress response, promoting the repair of DNA damage [ 24 ], and BCL2 supports cell survival in the inflammatory environment [ 39 ]. F8 plays a crucial role in the intrinsic pathway of blood coagulation [ 65 ] which may point cluster 2 toward a perivascular or endothelial neighborhood. Cluster 3 reflects calming signals, cleaning up, and rebuilding processes: GCLC protests against oxidative stress [ 38 ], NR4A1 and CDKN1B are involved in cell cycle control [ 21 , 56 ], and DNMT1-TNFRSF10B interaction is related to apoptosis thresholds tuning to remove damaged cells without excessive loss [ 75 ]. Cluster 4 represents a hypoxia-adapted perimuscular or epithelial interface. YY1-EPAS1 and HIF3A-EPAS1 indicate an active oxygen-sensing axis tuned to accommodate low- O 2 niches [ 13 ], while HSF1-LDHA captures a heat-shock/proteostasis response coupled to a glycolytic shift typical of hypoxic remodeling. Overall, these results highlight that SVGRN enables the identification of spatially distinct cell groups with unique GRN programs, which reflect their underlying functional identities and tissue roles. This GRN-based perspective provides a more mechanistic understanding of tissue organization, showing that functional niches in the fallopian tube can be uncovered not only by marker expression but also by the differential regulatory logic that drives cell activities. 3.5 Ablation Experiments To assess the impact of our two key assumptions on cell-specific GRN learning, we conducted ablation experiments on two simulated datasets with noise levels of 0.1 and 0.5. In our final model, these assumptions are implemented by fixing each cell’s position to that of the target cell and by adding kernel weights to the loss function. In the ablations, we removed one or both of these components and compared the resulting average loss values, as shown in Table 1 . View this table: View inline View popup Download powerpoint Table 1. Ablation experiment results showing the changes in loss values when removing the assumption implement components in the model. Without either assumption, the model cannot construct specific GRNs for each cell, resulting in identical GRNs across cells and the highest loss values in both datasets. As we incrementally reintroduce each assumption, we can observe significant loss reductions. This confirms that our assumptions not only allow for extending the whole-tissue GRN to single cells but also improve the reconstruction of gene expression based on spatial coordinates. By fixing coordinates alone, the model achieves a 0.13% and 6.6% loss reduction for noise levels 0.1 and 0.5, respectively. Adding kernel weights based on cell distances results in even greater reductions of 7.9% and 7.4% in the two cases. 4 Conclusion In this paper, we proposed SVGRN, a deep learning based method for inferring cell-specific GRNs in spatial transcriptomics. It utilizes the gene expression data and spatial coordinates of cells to learn the underlying gene interaction in a self-regression way. The model is based on two assumptions about the relationship of a cell’s position to its GRN and the distribution of all GRNs in the space. These assumptions enable constructing the GRN for a single cell conditioned on its position and borrowing usable information from the cells nearby. From the experiments on simulated dataset, SVGRN shows advantages in more noisy and complex situations with more genes or cells considered. It also illustrates its ability in learning the changing gene regulation pattern from spatial transcriptomics dataset with different technologies and resolutions, such as spot-based and cell-based data. The future potential research could fall on more detailed understanding the causal relationship in gene regulations and involving biological interactions in other levels, such as the cell-cell interaction. Code and Data availability The source code of SVGRN and configurations for CeSpGRN in experiments are available at https://github.com/lyrrrr/SVGRN . The seqFISH dataset of mouse embrys used in section 3.2 can be downloaded from https://content.cruk.cam.ac.uk/jmlab/SpatialMouseAtlas2020/ . The cSCC spatial transcriptomics dataset can be downloaded from Gene Expression Omnibus (GEO) ( https://www.ncbi.nlm.nih.gov/geo/ ) with accession number GSE144240. The sample P2_ST_rep1 is used for our experiments in section 3.3 . The fallopian tube data in section 3.4 comes from the HuBMAP Program ( https://hubmapconsortium.org ) and the sample used for our experiment can be found at https://portal.hubmapconsortium.org/browse/dataset/fa7fbcd8ae9219225f5df25e8c5e994e . Funder Information Declared NIH Office of the Director , 1R03OD038389-01 Footnotes Figure 3, the result comparison of EPR and AUPRC ratio between SVGRN and CeSpGRN on simulation datasets is revised. References 1. ↵ Addeo , M. , Buonaiuto , S. , Guerriero , I. , Amendola , E. , Visconte , F. , Marino , A. , De Angelis , M.T. , Russo , F. , Roberto , L. , Marotta , P. , et al : Insight into nephrocan function in mouse endoderm patterning . International Journal of Molecular Sciences 21 ( 1 ), 8 ( 2019 ) OpenUrl PubMed 2. ↵ Ahn , Y. , Mullan , H.E. , Krumlauf , R. : Long-range regulation by shared retinoic acid response elements modulates dynamic expression of posterior hoxb genes in cns development . Developmental Biology 388 ( 1 ), 134 – 144 ( 2014 ) OpenUrl CrossRef PubMed 3. Altieri , S.C. , Jalabi , W. , Zhao , T. , Romito-DiGiacomo , R.R. , Maricich , S.M. : En1 directs superior olivary complex neuron positioning, survival, and expression of foxp1 . Developmental biology 408 ( 1 ), 99 – 108 ( 2015 ) OpenUrl CrossRef PubMed 4. ↵ Angerer , P. , Simon , L. , Tritschler , S. , Wolf , F.A. , Fischer , D. , Theis , F.J. : Single cells make big data: new challenges and opportunities in transcriptomics . Current opinion in systems biology 4 , 85 – 91 ( 2017 ) OpenUrl 5. ↵ Belmonte-Mateos , C. , Pujades , C. : From cell states to cell fates: how cell proliferation and neuronal differentiation are coordinated during embryonic development . Frontiers in Neuroscience 15 , 781160 ( 2022 ) OpenUrl PubMed 6. ↵ Bhatla , S.C. , Lal , M.A. : Embryogenesis, growth, and differentiation . In: Plant Physiology, Development and Metabolism , pp. 543 – 563 . Springer ( 2023 ) 7. ↵ Briggs , J.A. , Weinreb , C. , Wagner , D.E. , Megason , S. , Peshkin , L. , Kirschner , M.W. , Klein , A.M. : The dynamics of gene expression in vertebrate embryogenesis at single-cell resolution . Science 360 ( 6392 ), eaar5780 ( 2018 ) OpenUrl Abstract / FREE Full Text 8. ↵ Caneparo , L. , Huang , Y.L. , Staudt , N. , Tada , M. , Ahrendt , R. , Kazanskaya , O. , Niehrs , C. , Houart , C. : Dickkopf-1 regulates gastrulation movements by coordinated modulation of wnt/βcatenin and wnt/pcp activities, through interaction with the dally-like homolog knypek . Genes & development 21 ( 4 ), 465 – 480 ( 2007 ) OpenUrl Abstract / FREE Full Text 9. ↵ Chang , W.M. , Lin , Y.F. , Su , C.Y. , Peng , H.Y. , Chang , Y.C. , Hsiao , J.R. , Chen , C.L. , Chang , J.Y. , Shieh , Y.S. , Hsiao , M. , et al : Parathyroid hormone-like hormone is a poor prognosis marker of head and neck cancer and promotes cell growth via runx2 regulation . Scientific reports 7 ( 1 ), 41131 ( 2017 ) OpenUrl PubMed 10. ↵ Chen , C.Y. , Fuh , L.J. , Huang , C.C. , Hsu , C.J. , Su , C.M. , Liu , S.C. , Lin , Y.M. , Tang , C.H. : Enhancement of ccl2 expression and monocyte migration by ccn1 in osteoblasts through inhibiting mir-518a-5p: implication of rheumatoid arthritis therapy . Scientific Reports 7 ( 1 ), 421 ( 2017 ) OpenUrl PubMed 11. ↵ Chen , H. , Yang , J. , Wu , W. : Seven key hub genes identified by gene co-expression network in cutaneous squamous cell carcinoma . BMC cancer 21 , 1 – 12 ( 2021 ) OpenUrl CrossRef PubMed 12. ↵ Chen , K. , Boettiger , A. , Moffitt , J. , Wang , S. , Zhuang , X. : Rna imaging . spatially resolved, highly multiplexed rna profiling in single cells. science 348 , aaa6090 ( 2015 ) OpenUrl CrossRef PubMed 13. ↵ Cheng , B. , Ma , X. , Zhou , Y. , Liu , J. , Fei , X. , Pan , W. , Peng , X. , Wang , W. , Chen , J. : Recent progress in the development of hypoxia-inducible factor 2α (hif-2α) modulators: inhibitors, agonists, and degraders (2009–2024) . European Journal of Medicinal Chemistry 275 , 116645 ( 2024 ) OpenUrl PubMed 14. ↵ Deschamps , J. , van Nes , J. : Developmental regulation of the hox genes during axial morphogenesis in the mouse . Development 132 ( 13 ), 2931 – 2942 ( 2005 ) OpenUrl Abstract / FREE Full Text 15. ↵ Du , L. , Kang , J. , Hou , Y. , Sun , H.X. , Zhang , B. : Spotgf: Denoising spatially resolved transcriptomics data using an optimal transport-based gene filtering algorithm . Cell Systems 15 ( 10 ), 969 – 981 ( 2024 ) OpenUrl PubMed 16. ↵ Durst , K.L. , Lutterbach , B. , Kummalue , T. , Friedman , A.D. , Hiebert , S.W. : The inv (16) fusion protein associates with corepressors via a smooth muscle myosin heavy-chain domain . Molecular and cellular biology 23 ( 2 ), 607 – 619 ( 2003 ) OpenUrl Abstract / FREE Full Text 17. ↵ Emre , Y. , Imhof , B.A. : Matricellular protein ccn1/cyr61: a new player in inflammation and leukocyte trafficking . In: Seminars in immunopathology . vol. 36 , pp. 253 – 259 . Springer ( 2014 ) OpenUrl 18. ↵ Forlani , S. , Lawson , K.A. , Deschamps , J. : Acquisition of hox codes during gastrulation and axial elongation in the mouse embryo . Development 130 , 3807 – 3819 ( 2003 ) OpenUrl Abstract / FREE Full Text 19. ↵ Han , H. , Cho , J.W. , Lee , S. , Yun , A. , Kim , H. , Bae , D. , Yang , S. , Kim , C.Y. , Lee , M. , Kim , E. , et al : Trrust v2: an expanded reference database of human and mouse transcriptional regulatory interactions . Nucleic acids research 46 ( D1 ), D380 – D386 ( 2018 ) OpenUrl CrossRef PubMed 20. ↵ Hayashi , S. , Manabe , I. , Suzuki , Y. , Relaix , F. , Oishi , Y. : Klf5 regulates muscle differentiation by directly targeting muscle-specific genes in cooperation with myod in mice . Elife 5 , e17462 ( 2016 ) OpenUrl CrossRef PubMed 21. ↵ Herring , J.A. , Elison , W.S. , Tessem , J.S. : Function of nr4a orphan nuclear receptors in proliferation, apoptosis and fuel utilization across tissues . Cells 8 ( 11 ), 1373 ( 2019 ) OpenUrl CrossRef 22. ↵ Hsu , C.F. , Huang , H.S. , Chen , P.C. , Ding , D.C. , Chu , T.Y. : Igf-axis confers transformation and regeneration of fallopian tube fimbria epithelium upon ovulation . EBioMedicine 41 , 597 – 609 ( 2019 ) OpenUrl PubMed 23. ↵ Hubert , K.A. , Wellik , D.M. : Hox genes in development and beyond . Development 150 ( 1 ) ( 2023 ) 24. ↵ Humayun , A. , Fornace Jr , A.J. : Gadd45 in stress signaling, cell cycle control, and apoptosis . In: Gadd45 Stress Sensor Genes , pp. 1 – 22 . Springer ( 2022 ) 25. ↵ Ishikawa , M. , Sugino , S. , Masuda , Y. , Tarumoto , Y. , Seto , Y. , Taniyama , N. , Wagai , F. , Yamauchi , Y. , Kojima , Y. , Kiryu , H. , et al : Renge infers gene regulatory networks using time-series single-cell rna-seq data with crispr perturbations . Communications Biology 6 ( 1 ), 1290 ( 2023 ) OpenUrl PubMed 26. ↵ lcai@ caltech. edu 21 b Shendure Jay 9 Trapnell Cole 9 Lin Shin shinlin@ uw. edu 2 e Jackson Dana 9, C.U.T.C.L., kzhang@ bioeng. ucsd. edu 15 b Sun Xin 15 Jain Sanjay 24 Hagood James 25 Pryhuber Gloria 26 Kharchenko Peter 8, U.T.Z.K., of Technology TTD Cai Long lcai@ caltech. edu 21 b Yuan Guo-Cheng 35 Zhu Qian 35 Dries Ruben 35, C.I., peng_yin@ hms. harvard. edu 36 37 b Saka Sinem K. 36 37 Kishi Jocelyn Y. 36 37 Wang Yu 36 37 Goldaracena Isabel 36 37, H.T.Y.P., jlaskin@ purdue. edu 10 b Ye DongHye 10 38 Burnum-Johnson Kristin E. 39 Piehowski Paul D. 39 Ansong Charles 39 Zhu Ying 39, P.T.L.J., harbury@ stanford. edu 11 b Desai Tushar 40 Mulye Jay 11 Chou Peter 11 Nagendran Monica 40, S.T.H.P. , et al.: The human body at cellular resolution: the nih human biomolecular atlas program . Nature 574 ( 7777 ), 187 – 192 ( 2019 ) OpenUrl CrossRef PubMed 27. ↵ Ji , A.L. , Rubin , A.J. , Thrane , K. , Jiang , S. , Reynolds , D.L. , Meyers , R.M. , Guo , M.G. , George , B.M. , Mollbrink , A. , Bergenstråhle , J. , et al : Multimodal analysis of composition and spatial architecture in human squamous cell carcinoma . Cell 182 ( 2 ), 497 – 514 ( 2020 ) OpenUrl CrossRef PubMed 28. ↵ Jin , J. , Lin , J. , Xu , A. , Lou , J. , Qian , C. , Li , X. , Wang , Y. , Yu , W. , Tao , H. : Ccl2: an important mediator between tumor cells and host cells in tumor microenvironment . Frontiers in oncology 11 , 722916 ( 2021 ) OpenUrl PubMed 29. ↵ Jin , T. , Rehani , P. , Ying , M. , Huang , J. , Liu , S. , Roussos , P. , Wang , D. : scgrnom: a computational pipeline of integrative multi-omics analyses for predicting cell-type disease genes and regulatory networks . Genome Medicine 13 ( 1 ), 95 ( 2021 ) OpenUrl PubMed 30. ↵ Kadomoto , S. , Izumi , K. , Mizokami , A. : Roles of ccl2-ccr2 axis in the tumor microenvironment . International Journal of Molecular Sciences 22 ( 16 ), 8530 ( 2021 ) OpenUrl PubMed 31. ↵ Kasper , C. , Rasmussen , H. , Kastrup , J.S. , Ikemizu , S. , Jones , E.Y. , Berezin , V. , Bock , E. , Larsen , I.K. : Structural basis of cell–cell adhesion by ncam . Nature structural biology 7 ( 5 ), 389 – 393 ( 2000 ) OpenUrl CrossRef PubMed Web of Science 32. ↵ Ke , R. , Mignardi , M. , Pacureanu , A. , Svedlund , J. , Botling , J. , Wählby , C. , Nilsson , M. : In situ sequencing for rna analysis in preserved tissue and cells . Nature methods 10 ( 9 ), 857 – 860 ( 2013 ) OpenUrl PubMed 33. ↵ Kerszberg , M. , Wolpert , L. : Specifying positional information in the embryo: looking beyond morphogens . Cell 130 ( 2 ), 205 – 209 ( 2007 ) OpenUrl CrossRef PubMed Web of Science 34. ↵ Kharchenko , P.V. , Silberstein , L. , Scadden , D.T. : Bayesian approach to single-cell differential expression analysis . Nature methods 11 ( 7 ), 740 – 742 ( 2014 ) OpenUrl PubMed 35. ↵ Khozyainova , A.A. , Valyaeva , A.A. , Arbatsky , M.S. , Isaev , S.V. , Iamshchikov , P.S. , Volchkov , E.V. , Sabirov , M.S. , Zainullina , V.R. , Chechekhin , V.I. , Vorobev , R.S. , et al : Complex analysis of single-cell rna sequencing data . Biochemistry (Moscow) 88 ( 2 ), 231 – 252 ( 2023 ) OpenUrl PubMed 36. ↵ Kim , D. , Tran , A. , Kim , H.J. , Lin , Y. , Yang , J.Y.H. , Yang , P. : Gene regulatory network reconstruction: harnessing the power of single-cell multi-omic data . NPJ Systems Biology and Applications 9 ( 1 ), 51 ( 2023 ) OpenUrl 37. ↵ Kok , S.H. , Chang , H.H. , Tsai , J.Y. , Hung , H.C. , Lin , C.Y. , Chiang , C.P. , Liu , C.M. , Kuo , M.Y.P. : Expression of cyr61 (ccn1) in human oral squamous cell carcinoma: An independent marker for poor prognosis . Head & neck 32 ( 12 ), 1665 – 1673 ( 2010 ) OpenUrl PubMed 38. ↵ Krejsa , C.M. , Franklin , C.C. , White , C.C. , Ledbetter , J.A. , Schieven , G.L. , Kavanagh , T.J. : Rapid activation of glutamate cysteine ligase following oxidative stress . Journal of Biological Chemistry 285 ( 21 ), 16116 – 16124 ( 2010 ) OpenUrl Abstract / FREE Full Text 39. ↵ Kucera , E. , König , F. , Tangl , S. , Grosschmidt , K. , Kainz , C. , Sliutz , G. : Bcl-2 expression as a novel immunohistochemical marker for ruptured tubal ectopic pregnancy . Human Reproduction 16 ( 6 ), 1286 – 1290 ( 2001 ) OpenUrl CrossRef PubMed 40. ↵ Langfelder , P. , Horvath , S. : Wgcna: an r package for weighted correlation network analysis . BMC bioinformatics 9 , 1 – 13 ( 2008 ) OpenUrl CrossRef PubMed 41. ↵ Li , H. , Zhang , Z. , Squires , M. , Chen , X. , Zhang , X. : scmultisim: simulation of single-cell multi-omics and spatial data guided by gene regulatory networks and cell-cell interactions .. Nature Methods pp. 1 - 12 ( 2025 ) 42. ↵ Lin , W. , Niu , R. , Park , S.M. , Zou , Y. , Kim , S.S. , Xia , X. , Xing , S. , Yang , Q. , Sun , X. , Yuan , Z. , et al : Igfbp5 is an ror1 ligand promoting glioblastoma invasion via ror1/her2-creb signaling axis . Nature communications 14 ( 1 ), 1578 ( 2023 ) OpenUrl PubMed 43. ↵ Liu , H. , Han , F. , Yuan , M. , Lafferty , J. , Wasserman , L. : High-dimensional semiparametric gaussian copula graphical models . The Annals of Statistics 40 ( 4 ), 2293 – 2326 ( 2012 ) OpenUrl 44. ↵ Lohoff , T. , Ghazanfar , S. , Missarova , A. , Koulena , N. , Pierson , N. , Griffiths , J.A. , Bardot , E.S. , Eng , C.H. , Tyser , R.C. , Argelaguet , R. , et al : Integration of spatial and single-cell transcriptomic data elucidates mouse organogenesis . Nature biotechnology 40 ( 1 ), 74 – 85 ( 2022 ) OpenUrl CrossRef PubMed 45. ↵ Lubeck , E. , Cai , L. : Single-cell systems biology by super-resolution imaging and combinatorial labeling . Nature methods 9 ( 7 ), 743 – 748 ( 2012 ) OpenUrl PubMed 46. ↵ Lubeck , E. , Coskun , A.F. , Zhiyentayev , T. , Ahmad , M. , Cai , L. : Single-cell in situ rna profiling by sequential hybridization . Nature methods 11 ( 4 ), 360 – 361 ( 2014 ) OpenUrl PubMed 47. ↵ Madrigal , P. , Deng , S. , Feng , Y. , Militi , S. , Goh , K.J. , Nibhani , R. , Grandy , R. , Osnato , A. , Ortmann , D. , Brown , S. , et al : Epigenetic and transcriptional regulations prime cell fate before division during human pluripotent stem cell differentiation . Nature Communications 14 ( 1 ), 405 ( 2023 ) OpenUrl PubMed 48. ↵ Mallo , M. , Wellik , D.M. , Deschamps , J. : Hox genes and regional patterning of the vertebrate body plan . Developmental biology 344 ( 1 ), 7 – 15 ( 2010 ) OpenUrl CrossRef PubMed 49. ↵ Margolin , A.A. , Nemenman , I. , Basso , K. , Wiggins , C. , Stolovitzky , G. , Favera , R.D. , Califano , A. : Aracne: an algorithm for the reconstruction of gene regulatory networks in a mammalian cellular context . In: BMC bioinformatics . vol. 7 , pp. 1 – 15 . Springer ( 2006 ) OpenUrl 50. ↵ Marx , V. : Method of the year: spatially resolved transcriptomics . Nature methods 18 ( 1 ), 9 – 14 ( 2021 ) OpenUrl PubMed 51. ↵ Badia-i Mompel , P. , Wessels , L. , Müller-Dott , S. , Trimbour , R. , Ramirez Flores , R.O. , Argelaguet , R. , Saez-Rodriguez , J. : Gene regulatory network inference in the era of single-cell multi-omics . Nature Reviews Genetics 24 ( 11 ), 739 – 754 ( 2023 ) OpenUrl CrossRef PubMed 52. ↵ Moses , L. , Pachter , L. : Museum of spatial transcriptomics . Nature methods 19 ( 5 ), 534 – 546 ( 2022 ) OpenUrl PubMed 53. ↵ Mukhopadhyay , M. , Shtrom , S. , Rodriguez-Esteban , C. , Chen , L. , Tsukui , T. , Gomer , L. , Dorward , D.W. , Glinka , A. , Grinberg , A. , Huang , S.P. , et al : Dickkopf1 is required for embryonic head induction and limb morphogenesis in the mouse . Developmental cell 1 ( 3 ), 423 – 434 ( 2001 ) OpenUrl CrossRef PubMed Web of Science 54. ↵ Murcia , C.L. , Gulden , F.O. , Cherosky , N.A. , Herrup , K. : A genetic study of the suppressors of the engrailed-1 cerebellar phenotype . Brain research 1140 , 170 – 178 ( 2007 ) OpenUrl PubMed 55. ↵ Pearson , J.C. , Lemons , D. , McGinnis , W. : Modulating hox gene functions during animal body patterning . Nature Reviews Genetics 6 ( 12 ), 893 – 904 ( 2005 ) OpenUrl CrossRef PubMed Web of Science 56. ↵ Pellarin , I. , Dall’Acqua , A. , Favero , A. , Segatto , I. , Rossi , V. , Crestan , N. , Karimbayli , J. , Belletti , B. , Baldassarre , G. : Cyclin-dependent protein kinases and cell cycle regulation in biology and disease . Signal Transduction and Targeted Therapy 10 ( 1 ), 11 ( 2025 ) OpenUrl 57. ↵ Perrimon , N. , Pitsouli , C. , Shilo , B.Z. : Signaling mechanisms controlling cell fate and embryonic patterning . Cold Spring Harbor perspectives in biology 4 ( 8 ), a005975 ( 2012 ) OpenUrl Abstract / FREE Full Text 58. ↵ Piwecka , M. , Rajewsky , N. , Rybak-Wolf , A. : Single-cell and spatial transcriptomics: deciphering brain complexity in health and disease . Nature Reviews Neurology 19 ( 6 ), 346 – 362 ( 2023 ) OpenUrl PubMed 59. ↵ Pratapa , A. , Jalihal , A.P. , Law , J.N. , Bharadwaj , A. , Murali , T. : Benchmarking algorithms for gene regulatory network inference from single-cell transcriptomic data . Nature methods 17 ( 2 ), 147 – 154 ( 2020 ) OpenUrl PubMed 60. ↵ Rancourt , D.E. , Tsuzuki , T. , Capecchi , M.R. : Genetic interaction between hoxb-5 and hoxb-6 is revealed by nonallelic noncomplementation . Genes & Development 9 ( 1 ), 108 – 122 ( 1995 ) OpenUrl Abstract / FREE Full Text 61. ↵ Rodríguez-Calvo , R. , Ferrán , B. , Alonso , J. , Martí-Pàmies , I. , Aguiló , S. , Calvayrac , O. , Rodríguez , C. , Martínez-González , J. : Nr4a receptors up-regulate the antiproteinase alpha-2 macroglobulin (a2m) and modulate mmp-2 and mmp-9 in vascular smooth muscle cells . Thrombosis and Haemostasis 113 ( 06 ), 1323 – 1334 ( 2015 ) OpenUrl CrossRef PubMed 62. ↵ Shu , H. , Zhou , J. , Lian , Q. , Li , H. , Zhao , D. , Zeng , J. , Ma , J. : Modeling gene regulatory networks using neural network architectures . Nature Computational Science 1 ( 7 ), 491 – 501 ( 2021 ) OpenUrl PubMed 63. ↵ Sohn , K. , Lee , H. , Yan , X. : Learning structured output representation using deep conditional generative models . Advances in neural information processing systems 28 ( 2015 ) 64. ↵ Squire , L.R. Stephan , K.E. , Friston , K.J. : Functional connectivity . In: Squire , L.R. (ed.) Encyclopedia of Neuroscience , pp. 391 – 397 . Elsevier , New York ( 2009 ). doi: 10.1016/B978-008045046-9.00308-9 , https://doi.org/10.5167/uzh-25725 OpenUrl CrossRef 65. ↵ Thompson , A.R. : Structure and function of the factor viii gene and protein . In: Seminars in thrombosis and hemostasis . vol. 29 , pp. 011 – 022 . Copyright© 2003 by Thieme Medical Publishers, Inc., 333 Seventh Avenue, New … ( 2003 ) OpenUrl 66. ↵ Tone , A.A. , Virtanen , C. , Shaw , P. , Brown , T.J. : Prolonged postovulatory proinflammatory signaling in the fallopian tube epithelium may be mediated through a brca1/dab2 axis . Clinical cancer research 18 ( 16 ), 4334 – 4344 ( 2012 ) OpenUrl Abstract / FREE Full Text 67. ↵ Wang , H. , Fazayeli , F. , Chatterjee , S. , Banerjee , A. : Gaussian copula precision estimation with missing values . In: Artificial Intelligence and Statistics . pp. 978 – 986 . PMLR ( 2014 ) 68. ↵ Wang , Y. , Song , B. , Wang , S. , Chen , M. , Xie , Y. , Xiao , G. , Wang , L. , Wang , T. : Sprod for de-noising spatially resolved transcriptomics data based on position and image information . Nature methods 19 ( 8 ), 950 – 958 ( 2022 ) OpenUrl PubMed 69. ↵ Waters , J.A. , Urbano , I. , Robinson , M. , House , C.D. : Insulin-like growth factor binding protein 5: Diverse roles in cancer . Frontiers in oncology 12 , 1052457 ( 2022 ) OpenUrl PubMed 70. ↵ Yang , H. , Zhang , Q. , Xu , M. , Wang , L. , Chen , X. , Feng , Y. , Li , Y. , Zhang , X. , Cui , W. , Jia , X. : Ccl2-ccr2 axis recruits tumor associated macrophages to induce immune evasion through pd-1 signaling in esophageal carcinogenesis . Molecular Cancer 19 , 1 – 14 ( 2020 ) OpenUrl PubMed 71. ↵ Yu , Y. , Chen , J. , Gao , T. , Yu , M. : Dag-gnn: Dag structure learning with graph neural networks . In: International conference on machine learning . pp. 7154 – 7163 . PMLR ( 2019 ) 72. ↵ Zhang , S. , Pyne , S. , Pietrzak , S. , Halberg , S. , McCalla , S.G. , Siahpirani , A.F. , Sridharan , R. , Roy , S. : Inference of cell type-specific gene regulatory networks on cell lineages from single cell omic datasets . Nature Communications 14 ( 1 ), 3064 ( 2023 ) OpenUrl PubMed 73. ↵ Zhang , S.Y. , Stumpf , M.P. : Learning cell-specific networks from dynamics and geometry of single cells . bioRxiv pp. 2023 – 01 ( 2023 ) 74. ↵ Zhang , Z. , Han , J. , Song , L. , Zhang , X. : Cespgrn: Inferring cell-specific gene regulatory networks from single cell multi-omics and spatial data . bioRxiv pp. 2022 – 03 ( 2022 ) 75. ↵ Zhao , X. , Liu , X. , Su , L. : Parthenolide induces apoptosis via tnfrsf10b and pmaip1 pathways in human lung cancer cells . Journal of experimental & clinical cancer research 33 ( 1 ), 3 ( 2014 ) OpenUrl PubMed 76. ↵ Zheng , R. , Li , M. , Chen , X. , Wu , F.X. , Pan , Y. , Wang , J. : Bixgboost: a scalable, flexible boosting-based method for reconstructing gene regulatory networks . Bioinformatics 35 ( 11 ), 1893 – 1900 ( 2019 ) OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted October 23, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Spatially varying cell-specific gene regulation network inference Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Spatially varying cell-specific gene regulation network inference Yurui Li , Jin Chen , Haohan Wang bioRxiv 2025.10.17.683188; doi: https://doi.org/10.1101/2025.10.17.683188 Share This Article: Copy Citation Tools Spatially varying cell-specific gene regulation network inference Yurui Li , Jin Chen , Haohan Wang bioRxiv 2025.10.17.683188; doi: https://doi.org/10.1101/2025.10.17.683188 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17690) Bioengineering (13892) Bioinformatics (41935) Biophysics (21451) Cancer Biology (18587) Cell Biology (25499) Clinical Trials (138) Developmental Biology (13377) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24318) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88601) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15152) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.