Quantifying predictability of gene expression from histology image

doi:10.1101/2025.11.04.686651

Quantifying predictability of gene expression from histology image

2025 · doi:10.1101/2025.11.04.686651

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 29,638 characters · extracted from preprint-html · click to expand

Quantifying predictability of gene expression from histology image | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Quantifying predictability of gene expression from histology image View ORCID Profile Chen-Rui Xia , Jia-Wen Yao , View ORCID Profile Ge Gao doi: https://doi.org/10.1101/2025.11.04.686651 Chen-Rui Xia 1 State Key Laboratory of Gene Function and Modulation Research, School of Life Sciences, Biomedical Pioneering Innovative Center (BIOPIC) and Beijing Advanced Innovation Center for Genomics (ICG), Center for Bioinformatics (CBI), Peking University , Beijing 100871, China 2 Changping Laboratory , Beijing 102206, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Chen-Rui Xia Jia-Wen Yao 1 State Key Laboratory of Gene Function and Modulation Research, School of Life Sciences, Biomedical Pioneering Innovative Center (BIOPIC) and Beijing Advanced Innovation Center for Genomics (ICG), Center for Bioinformatics (CBI), Peking University , Beijing 100871, China 2 Changping Laboratory , Beijing 102206, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ge Gao 1 State Key Laboratory of Gene Function and Modulation Research, School of Life Sciences, Biomedical Pioneering Innovative Center (BIOPIC) and Beijing Advanced Innovation Center for Genomics (ICG), Center for Bioinformatics (CBI), Peking University , Beijing 100871, China 2 Changping Laboratory , Beijing 102206, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ge Gao For correspondence: gaog{at}mail.cbi.pku.edu.cn Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Histopathological images are indispensable in clinical diagnosis, yet provide limited insight into underlying molecular states. Numerous computational models attempt to predict gene expression from histopathological images. However, a fundamental question remains unresolved: which genes can be accurately predicted and which cannot. Here, we introduce Expression Predictability Score (EPS), a metric that quantifies the predictability of each gene from images through expression-image mutual information. Empirical analyses across more than 500 slices further reveal consistent sets of highly predictable and unpredictable genes, as well as their underlying association with the physicochemical nature of H&E staining. Main Histopathological imaging and spatial transcriptome technologies are complementary in terms of molecular information as well as speed and cost-effectiveness 1 , 2 . Thus, efforts have been made to integrate omics data with histology images 3 , and, specifically, inferring transcriptomic profiles from imaging data 4 , 5 , 6 , 7 , 8 . However, both the results of these works and independent benchmarking studies 9 have revealed that the predictive accuracy of models varies markedly across genes, ranging from as low as 0.1 to as high as 0.9. This raises a critical question: which genes can be reliably predicted, and which results can we trust? From an information-theoretic perspective, the mutual information between modalities (e.g., transcriptomic profiles vs histology images) defines the theoretical upper bound of cross-modal predictive performance 10 , 11 . Thus, we propose the Expression Predictability Score (EPS), a metric quantifying predictability of gene expression from histology image via the mutual information l(X; Y) (Methods). Mathematically, EPS is defined as the negative logarithm of graph Laplacian quadratic form calculated from the expression of gene a within the image-embedding kNN graph ( Fig. 1a , Methods), and could be interpreted intuitively as a quantitative metric for the expression consistency of gene a across multiple regions with similar histology. We further define the Slice Predictability Score (SPS) as the average EPS across all genes within a given tissue section to quantify the slice-level predictability ( Fig. 1a ). Download figure Open in new tab Fig. 1: EPS quantifies gene expression predictability from histology images accurately. a , Schematic of the EPS and SPS calculation. For each histology image of spots/cells, we first applied an image embedding method to obtain their embeddings, and then constructed a kNN graph 𝒢 Y based on the embeddings. For a given gene a , its expression value ( x a ) was assigned as a node attribute on 𝒢 Y . EPS of gene a was defined as the negative logarithm of graph Laplacian quadratic form of 𝒢 Y . L Y denotes the graph Laplacian matrix of 𝒢 Y ( Methods ). SPS was defined as the average EPS across all genes within a slice, K denotes the total gene number. b , Scatter plots showing the relationship between EPS (x-axis) and prediction performance (Pearson correlation, y-axis) for individual genes. Columns correspond to different prediction methods, and rows represent slices from different tissues. Points are colored according to local density. In efforts to validate these metrics, we first assess the relationship between EPS and gene-level predictive accuracy of different models across seven tissues, including brain, bowel, lung, breast, ovary, prostate, and uterus. We carefully selected five representative models, including simple linear approaches such as linear regression and ridge regression, complex nonlinear methods such as multilayer perceptrons (denoted as MLP) and gradient-boosted trees (denoted as Ensemble), as well as the state-of-the-art deep learning–based models ST-NET 4 (Methods). The results showed that EPS consistently quantifies the predictability, independent of tissue type and model ( Fig. 1b , Extended Data Fig. 1 – 2 and Supplementary Table 1). Cross-model comparisons showed that all models performed poorly on genes with low EPS (i.e. low mutual information). Of note, although all models performed well in predicting high EPS genes (i.e. high mutual information), complex models—such as ST-Net, Ensemble, and MLP—achieved higher accuracy for genes with intermediate EPS ( Fig. 1b , Extended Data Fig. 3 ). This observation suggests that overall model performance may be determined by its ability to predicting intermediate–mutual-information genes, highlighting a potential avenue for future model improvement. Next, we systematically investigated the relationship between genes’ EPS (i.e. predictability) and properties using the HEST dataset which is consist of 510 slices covering XX (number) human tissues and. We computed the average EPS for each gene across all slices and identified top 5% vs bottom 5% predictable genes ( Extended Data Fig. 4a , Supplementary Table 2). Gene Ontology (GO) enrichment analysis showed that these highly predictable genes were significantly enriched in extracellular matrix while unpredictable genes were more likely to be associated with biological membranes ( Fig. 2a and 2b ). We propose that such bias could be attributed to the technological nature of current H&E staining: eosin strongly binds to positively charged molecules such as collagen, which is lysine enriched, enabling strong staining of these extracellular structures 1 . In contrast, both hematoxylin and eosin are hydrophilic dyes, and therefore poorly stain hydrophobic, lipid-rich structures such as biological membranes, leading to the low predictability of membrane-associated proteins. Of interest, we noted that gene predictability exhibits tissue specificity ( Fig. 2c , Extended Data Fig. 4b , also see Supplementary Table 3), which may contribute to the well-documented challenge for cross-tissue prediction 3 and highlights the necessity of training models over datasets with comprehensive tissues covered. Download figure Open in new tab Fig. 2: EPS and SPS analysis on HEST dataset a-b , Gene Ontology Cellular Component (CC) analysis for the top predictable genes ( a ) and top unpredictable genes ( b ). Left panel: barplot of enriched GO terms in the Cellular Component category. Right panel: shows the genes and pathways in the form of a graph. c , Heatmap of genes with the highest EPS across different tissues. d , Heatmaps of EPS (left panel) and prediction performance (Pearson correlation, right panel) for known biomarkers across multiple cancer types. e , Scatter plots showing the relationship between SPS (x-axis) and prediction performance (Pearson correlation, y-axis) across slices in the HEST dataset. We further focused on the predictability of clinically meaningful genes in cancer 12 . Encouragingly, we found that PSA ( KLK3 ) in prostate cancer, as well as HER2 ( ERBB2 ) and ESR1 in breast cancer, exhibited high predictability (high EPS), and correspondingly strong predictive performance ( Fig. 2d ). However, some other clinically important markers, such as PD-L1 ( CD274 ), BRCA1 , and BRCA2 showed low predictability and poor predictive performance ( Fig. 2d ), suggesting that the histopathological imaging-based inference should be interpreted with caution. Consistently, we also found the slice-level metric SPS is strongly correlated with slice-level prediction accuracy of all models ( Fig. 2e and Extended Data Fig. 4c ). While model performance varied on high-SPS slices, all models showed similarly poor performance on low-SPS slices ( Fig. 2a , Extended Data Fig. 4d ). These findings reflect the lack of mutual information that prevents any model from making accurate predictions, further confirming the decisive role of mutual information in expression predictability. Of note, EPS and SPS are not limited to image–expression data and can be applied to any paired multi-omics datasets. For example, using multi-omics data from the NeurIPS 2021 Open Problem Competition 13 , we employed EPS and SPS to evaluate the predictability of ATAC to RNA, as well as surface protein to RNA. They also demonstrated consistently strong performance ( Extended Data Fig.5 - 6 ), highlighting their potential as general indicators of cross-modal predictability. Owing to the intrinsic complexity of the image distribution, we currently employ the published image model 14 when inferring the kNN graph for EPS calculation ( Methods ).Thus, a suboptimal embedding may introduce noise into the graph structure and further lead to an underestimation of EPS. However, we believe that the rapid development of image models would effectively alleviate this problem. In summary, we have developed the Expression Predictability Score (EPS), a metric that quantifies the predictability of genes from histopathological images. By analyzing a large number of slices, we found that the bias in gene predictability may be attributed to the hydrophilic nature of H&E dyes, and EPS can also serve as a quantitative metric for guiding the development of new tissue-staining methods to enable accurate prediction of specific genes or biomarkers. We recommend that EPS be carefully considered for each gene prior to interpreting the results of any image-based gene expression prediction model. The full code is publicly available at https://github.com/gao-lab/EPS . Methods Expression Predictability Score For spatial omics data with N spots, let x a denote the scaled expression of gene a , and Y represents the distribution of the paired histology images. Given to the complexity of the image distribution Y , we here introduce a spot-wise k-nearest neighbor (kNN) graph 𝒢 Y : derived from pretained image foundation model (here is the state-of-the-art UNI model 14 , with k =5 by default). Then,for gene a , EPS could be represented as the negative logarithm of the graph Laplacian quadratic form of x a on 𝒢 Y : where L Y denotes the graph Laplacian matrix corresponding to the graph 𝒢 Y : Now we’d derive the relationship between EPS and the mutual information I ( x a ; Y ) analytically. Specifically, we express the mutual information in terms of conditional entropy: Given the fact the H ( x a ) is a constant, we only need to consider H ( x a | Y ). However, the analytical estimation of conditional distribution H ( x a | Y ) is not trival. Therefore, we use the Gaussian Markov Random Field (GMRF) probabilistic model 15 to link the spots’ gene expression (Gaussian distribution) with their histological images (in the format of graph). In practice, we assume that x a follows a conditional distribution over 𝒢 Y : is the precision matrix (the inverse of the covariance matrix) of the GMRF, and is proportional to the graph Laplacian matrix L Y of 𝒢 Y 15 ; Tthe maximum likelihood of τ could be given by (see Supplementary Note 1 for details): N is the spot number in the slice. The entropy of GMRF P ( x a | Y ) is 15 : For the N-order matrix L Y , we have thus: So, the mutual information could be rewritten as: We omit all of constant values, including , and , and obtain: Substituting the maximum likelihood estimate , and omitting constant terms, we obtain: i.e.: Data collection and pre-process Seven 10x spatial slices were obtained from the official 10x Genomics website. These datasets encompass tissues from diverse organs and disease states, each paired with a corresponding H&E-stained image (Supplementary Table 1). The HEST data were obtained from the HEST dataset 16 , from which we selected human slices and excluded slices containing fewer than 100 spots, leaving a total of 510 slices. All spatial slices were processed using the same pipeline. For H&E images, we first scaled the images to 0.5□μm per pixel (mpp), and then extracted square tiles (224 × 224 pixels, 112□μm in diameter) centered at each spot coordinate. Then, image of each tile was embedded into a 1024-dimensional vector using the pretrained UNI 14 model. For gene expression data, we selected the top 2,000 highly variable genes shared across all slices, to ensure fair cross-slice comparisons. The expression data were then normalized, log-transformed (log1p), and scaled following the standard Scanpy workflow 17 . Single-cell multi-omics datasets were obtained from the NeurIPS 2021 Open Problem competition 13 and include two data types: 10x Multiome (RNA and ATAC) and CITE-seq (RNA and surface protein). The 10x Multiome dataset comprises 13 distinct samples (batches), while CITE-seq comprises 12 batches. For RNA data, we applied the same workflow as described above, including selection of the top 2,000 highly variable genes, normalization, log1p transformation, and scaling. For surface protein data, raw counts were log-normalized, scaled, and then use principal component analysis (PCA) to reduce dimensionality to 20 components. For ATAC data, dimensionality reduction was performed using the spectral decomposition method described in SnapATAC2 18 with default parameters, yielding 30 components. Prediction models We constructed a variety of baseline models to predict gene expression from histological images: Linear regression (denoted as Linear), implemented using scikit-learn. Ridge regression (denoted as Ridge), also implemented in scikit-learn, with α = 1. Multilayer perceptron regression (denoted as MLP), implemented in PyTorch. Specifically, the MLP consisted of a hidden layer with 256 units and ReLU activation. The mean squared error (MSE) was used as the loss function. Models were trained for 100 epochs using the AdamW optimizer, with a learning rate of 1 ×10□ 3 and a batch size of 128. Gradient-boosted trees (denoted as Ensemble), implemented using LightGBM with 100 trees, trained separately for each gene. For each tissue slice, data were randomly split into training and test sets at an 80:20 ratio. In addition, we included the previously published ST-Net 4 model, trained using the same preprocessing and train/test splits as the baseline models and following the training procedures described by the original authors. For single-cell multi-omics data, we trained only the MLP as the baseline model, as it was the most commonly choice among the winning teams in the original competition 13 . Model performance on the test set was evaluated using Pearson correlation, Spearman correlation, and Root Mean Square Error (RMSE). Then, the sample-level metric was calculated as the average across all genes. Statistical analysis Pearson and Spearman correlation coefficients, as well as the corresponding P -values, were calculated using the “scipy.stats” Python package (v1.16.1). Gene Ontology (GO) enrichment analysis was performed using the “clusterProfiler” R package 19 (v4.6.2), with a P- value threshold of 0.05. P -values were adjusted using the Benjamini–Hochberg method. Data availability statement All datasets used in this study were already published and were obtained from public data repositories. 10x spatial slices used in this study are recoded in Supplementary Table 1, including downloading URLs. HEST dataset 16 is available at Hugging Face ( https://huggingface.co/datasets/MahmoodLab/hest ). NeurIPS 10x Multiome and CITE-seq data 13 is available at GSE194122 ( https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE194122 ). Code availability statement The source code of calculating EPS and SPS, as well as codes to reproduce the results in this paper, can be accessed from https://github.com/gao-lab/EPS under MIT license. Author contributions G.G. conceived the study and supervised the research. C.R.X. designed and implemented the computational framework. C.R.X. and J.W.Y conducted experiments with guidance from G.G. C.R.X., J.W.Y., and G.G. wrote the manuscript. Competing interests The authors declare that they have no competing interests. Figure legends Download figure Open in new tab Extended Data Fig. 1: Correlation between EPS and Spearman correlation. Scatter plots showing the relationship between EPS (x-axis) and prediction performance (Spearman correlation, y-axis) for individual genes. Columns represent different prediction methods, and rows correspond to slices from different tissues. Points are colored according to local density. Download figure Open in new tab Extended Data Fig. 2: Correlation between EPS and RMSE. Scatter plots showing the relationship between EPS (x-axis) and prediction error (RMSE, y-axis) for individual genes. Columns represent different prediction methods, and rows correspond to slices from different tissues. Points are colored according to local density. Download figure Open in new tab Extended Data Fig. 3: Comparing prediction accuracy of prediction methods in 10x slices. a , Boxplot showing the average per-slice prediction performance (Pearson correlation) of each method across seven 10x slices. b-d , Scatter plots showing the per-gene performance differences between MLP ( b ), Ensemble ( c ), and ST-NET ( d ) relative to the Linear model in 10x slices. Points are colored according to local density. Download figure Open in new tab Extended Data Fig. 4: Comparison of prediction accuracy across methods in the HEST dataset. a , Histogram showing the distribution of average EPS across slices in the HEST dataset. Purple and red shadows indicate the “top predictable genes” and “top unpredictable genes”, respectively. b , Heatmap of prediction performance (Pearson correlation) for genes with the highest EPS across different tissues (as shown in Fig. 2d ). c , Scatter plots showing the relationship between SPS (x-axis) and prediction performance (Pearson correlation, y-axis) across slices in the HEST dataset. d , Scatter plots showing the per-slice performance differences of ST-NET, Ensemble and MLP relative to the Linear model in the HEST dataset. e-f , Violin plots showing the distributions of Pearson correlation ( e ) and Spearman correlation ( f ) for each method in the HEST dataset. Download figure Open in new tab Extended Data Fig. 5: EPS and SPS results in CITE-seq data. a , Scatter plot showing the relationship between SPS (x-axis) and prediction performance (Pearson correlation, y-axis) in each batch. b , Scatter plots showing the relationship between EPS (x-axis) and prediction performance (Pearson correlation, y-axis) for individual genes, each panel represents a batch. Download figure Open in new tab Extended Data Fig. 6: EPS and SPS results in 10x Multiome data. a , Scatter plot showing the relationship between SPS (x-axis) and prediction performance (Pearson correlation, y-axis) in each batch. b , Scatter plots showing the relationship between EPS (x-axis) and prediction performance (Pearson correlation, y-axis) for individual genes, each panel represents a batch. Acknowledgments We thank for Drs. Z. Cao, Z. Zhang, F. Tang, and X.S. Xie at Peking University for their helpful discussions and comments during the study. This work was supported by funds from the National Key Research and Development Program of China (2022ZD0115004), as well as the State Key Laboratory of Gene Function and Modulation Research, the Beijing Advanced Innovation Center for Genomics (ICG) at Peking University, the Changping Laboratory, and the Shaw Foundation Hong Kong Limited. The research of C.R.X. is supported in part by the National Natural Science Foundation of China (grant no. 323B2017). Footnotes https://github.com/gao-lab/EPS References 1. ↵ Bancroft , J. D. Theory and Practice of Histological Techniques. (Elsevier Health Sciences , 2008 ). 2. ↵ Bressan , D. , Battistoni , G. & Hannon , G. J. The dawn of spatial omics . Science 381 , eabq4964 ( 2023 ). OpenUrl CrossRef PubMed 3. ↵ Coleman , K. , Schroeder , A. & Li , M. Unlocking the power of spatial omics with AI . Nat Methods 21 , 1378 – 1381 ( 2024 ). OpenUrl CrossRef PubMed 4. ↵ He , B. et al. Integrating spatial gene expression and breast tumour morphology via deep learning . Nat Biomed Eng 4 , 827 – 834 ( 2020 ). OpenUrl PubMed 5. ↵ Xie , R. , Pang , K. , Bader , G. D. & Wang , B. Spatially Resolved Gene Expression Prediction from H&E Histology Images via Bi-modal Contrastive Learning . Preprint at http://arxiv.org/abs/2306.01859 ( 2023 ). 6. ↵ Zeng , Y. et al. Spatial transcriptomics prediction from histology jointly through Transformer and graph neural networks . Briefings in Bioinformatics 23 , bbac297 ( 2022 ). OpenUrl CrossRef PubMed 7. ↵ Comiter , C. et al. Inference of single cell profiles from histology stains with the Single-Cell omics from Histology Analysis Framework (SCHAF) . 2023.03.21.533680 Preprint at doi: 10.1101/2023.03.21.533680 ( 2023 ). OpenUrl Abstract / FREE Full Text 8. ↵ Chen , W. et al. A visual–omics foundation model to bridge histopathology with spatial transcriptomics . Nat Methods 22 , 1568 – 1582 ( 2025 ). OpenUrl PubMed 9. ↵ Wang , C. et al. Benchmarking the translational potential of spatial gene expression prediction from histology . Nat Commun 16 , 1544 ( 2025 ). OpenUrl CrossRef PubMed 10. ↵ Cover , T. M. & Thomas , J. A. ELEMENTS OF INFORMATION THEORY . 11. ↵ Tishby , N. & Zaslavsky , N. Deep learning and the information bottleneck principle . in 2015 IEEE Information Theory Workshop (ITW) 1 – 5 ( 2015 ). doi: 10.1109/ITW.2015.7133169 . OpenUrl CrossRef 12. ↵ Passaro , A. et al. Cancer biomarkers: Emerging trends and clinical implications for personalized treatment . Cell 187 , 1617 – 1635 ( 2024 ). OpenUrl CrossRef PubMed 13. ↵ Lance , C. et al. Multimodal single cell data integration challenge: results and lessons learned . BioRxiv 2022 – 04 ( 2022 ). 14. ↵ Chen , R. J. et al. Towards a general-purpose foundation model for computational pathology . Nat Med 30 , 850 – 862 ( 2024 ). OpenUrl CrossRef PubMed 15. ↵ Rue , H. & Held , L. Gaussian Markov Random Fields: Theory and Applications . ( Chapman and Hall/CRC , 2005 ). 16. ↵ Jaume , G. et al. HEST-1k: A Dataset For Spatial Transcriptomics and Histology Image Analysis . Advances in Neural Information Processing Systems 37 , 53798 – 53833 ( 2024 ). OpenUrl 17. ↵ Wolf , F. A. , Angerer , P. & Theis , F. J. SCANPY: large-scale single-cell gene expression data analysis . Genome Biology 19 , 15 ( 2018 ). OpenUrl CrossRef PubMed 18. ↵ Zhang , K. , Zemke , N. R. , Armand , E. J. & Ren , B. A fast, scalable and versatile tool for analysis of single-cell omics data . Nat Methods 21 , 217 – 227 ( 2024 ). OpenUrl CrossRef PubMed 19. ↵ Wu , T. et al. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data . The Innovation 2 , 100141 ( 2021 ). OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted November 05, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Quantifying predictability of gene expression from histology image Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Quantifying predictability of gene expression from histology image Chen-Rui Xia , Jia-Wen Yao , Ge Gao bioRxiv 2025.11.04.686651; doi: https://doi.org/10.1101/2025.11.04.686651 Share This Article: Copy Citation Tools Quantifying predictability of gene expression from histology image Chen-Rui Xia , Jia-Wen Yao , Ge Gao bioRxiv 2025.11.04.686651; doi: https://doi.org/10.1101/2025.11.04.686651 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17697) Bioengineering (13895) Bioinformatics (41951) Biophysics (21456) Cancer Biology (18594) Cell Biology (25520) Clinical Trials (138) Developmental Biology (13381) Ecology (19903) Epidemiology (2067) Evolutionary Biology (24323) Genetics (15612) Genomics (22510) Immunology (17738) Microbiology (40401) Molecular Biology (17184) Neuroscience (88622) Paleontology (667) Pathology (2833) Pharmacology and Toxicology (4825) Physiology (7644) Plant Biology (15158) Scientific Communication and Education (2046) Synthetic Biology (4296) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00