A multimodal cross-attention pathotranscriptome integration for enhanced survival prediction of oral squamous cell carcinoma

doi:10.1101/2025.10.31.25339218

A multimodal cross-attention pathotranscriptome integration for enhanced survival prediction of oral squamous cell carcinoma

2025 · doi:10.1101/2025.10.31.25339218

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 30,200 characters · extracted from preprint-html · click to expand

A multimodal cross-attention pathotranscriptome integration for enhanced survival prediction of oral squamous cell carcinoma | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search A multimodal cross-attention pathotranscriptome integration for enhanced survival prediction of oral squamous cell carcinoma View ORCID Profile Kountay Dwivedi , View ORCID Profile Amirreza Mahbod , View ORCID Profile Rupert C. Ecker , View ORCID Profile Klara Janjić doi: https://doi.org/10.1101/2025.10.31.25339218 Kountay Dwivedi 1 Center for Clinical Research, University Clinic of Dentistry, Medical University of Vienna , 1090 Vienna, Austria Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kountay Dwivedi Amirreza Mahbod 2 Research Center for Medical Image Analysis and Artificial Intelligence, Department of Medicine, Faculty of Medicine and Dentistry, Danube Private University , 3500 Krems an der Donau, Austria Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Amirreza Mahbod Rupert C. Ecker 3 TissueGnostics GmbH , 1020 Vienna, Austria Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rupert C. Ecker Klara Janjić 1 Center for Clinical Research, University Clinic of Dentistry, Medical University of Vienna , 1090 Vienna, Austria Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Klara Janjić For correspondence: klara.janjic{at}meduniwien.ac.at Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Oral squamous cell carcinoma (OSCC) accounts for a major part of cancer mortality, with survival outcomes highly dependent on early diagnosis. While many approaches have been proposed for OSCC survival prediction, they often rely on unimodal data, which may be suboptimal. In this study, we introduced a unified cross-attention-based deep learning framework that integrates whole-slide histopathology images (WSIs) and transcriptomic data from OSCC patients for survival prediction. The framework employed an autoencoder for transcriptomic feature extraction and a state-of-the-art pathology foundation model—evaluated across five alternatives—to derive WSI embeddings. These embeddings were subsequently integrated using cross-attention and concatenation within a Cox proportional hazards model. The multimodal approach outperformed nearly all unimodal counterparts, achieving a maximum concordance index of 0.780±0.059 with cross-attention and 0.766±0.050 with concatenation. The results indicate that pathotranscriptomic integration could improve survival prediction for OSCC patients. The implementation is available on GitHub at: https://github.com/kountaydwivedi/multimodal fusion.git . This work has been submitted to the IEEE for possible publication. Copyright may be transferred without notice, after which this version may no longer be accessible . 1. Introduction Oral squamous cell carcinoma (OSCC) attributes to approximately 90% of oral cancers [ 1 ]. The 5-year overall survival rate of OSCC declines from 70% - 80% at early stages to approximately 35% - 45% at later stages [ 2 ]. Early and accurate diagnostics are therefore essential for prognosis in OSCC cases. Deep learning has transformed the landscape of patient prognosis, automating the analysis of H&E-stained histopathological whole-slide images (WSI) for diagnosis [ 3 ]. Particularly, largescale computational pathology models can perform diverse medical image analysis tasks such as segmentation, classification and risk stratification [ 3 - 10 ]. While WSIs portray the spatial description of the tumor, the gene expression-based transcriptomic profile unveils the molecular-level attributes of the disease. As both the modalities are tightly associated [ 3 , 11 ] pathotranscriptome integration combines both data types, yielding more robust prediction. However, histopathology and transcriptome data are naturally distinct, which poses significant challenges for their integration. A WSI, often being a billion-pixel image, makes direct processing with any conventional deep learning model computationally prohibitive [ 4 ]. This issue is mitigated by modeling WSIs using multiple instance learning (MIL) concept [ 12 ]. In MIL, the WSI is partitioned into tiles or patches to generate patch-level embeddings, which are utilized for downstream tasks [ 4 ]. Contrary to WSI, the transcriptome profile is generally represented as a high-dimensional gene expression vector, often leading to curse of dimensionality. This issue can be alleviated by employing a dimensionality reduction approach [ 11 ]. In this study, we present a cross-attention-based pathotranscriptomic integration framework for survival prediction of OSCC patients. We initially employed an autoencoder to derive transcriptomelevel embeddings. Next, we leveraged and compared five state-ofthe-art computational pathology models including CTransPath [ 6 ], Prov-GigaPath or GigaPath [ 7 ], HibouL [ 8 ], Virchow [ 9 ] and UNI [ 10 ] to extract WSI-level embeddings via attention-based MIL (ABMIL) [ 13 ]. These unimodal embeddings were subsequently integrated using cross-attention and concatenation strategies for OSCC survival prediction. This is a novel approach study that utilizes a pathotranscriptomic integration based on pathology foundation model features and dimensionally reduced transcriptomic features via an autoencoder for OSCC survival prediction. We benchmark unimodal and multimodal integration strategies and evaluate the five employed pathology models for extracting WSI-level embeddings suitable for integration. 2. Methods The main workflow of our approach is depicted in Figure 1 . In the following, we describe the dataset used and provide a detailed description of the proposed method. Download figure Open in new tab Figure 1. A cross-attention–based deep learning framework for OSCC survival prediction. Step 1: Transcriptome embeddings were derived via autoencoder. Step 2: Patch-level embeddings were computed using distinct pathological models, fusing them to extract WSI-level embedding via attention-based multiple instance learning (ABMIL). Step 3: Transcriptome and WSI embeddings were integrated through concatenation and cross-attention mechanisms to derive pathotranscriptome embedding for subsequent survival analysis. 2.1. Dataset acquisition We utilized the publicly available OSCC dataset provided by The Cancer Genome Atlas (TCGA; https://portal.gdc.cancer.gov/ ). In total, 522 cases were found under the OSCC subcategory of head & neck squamous cell carcinoma cohort with transcriptome and clinical data (accessed through cBio Cancer Genomics Portal [ 14 ]). A subset of n = 38 cases with diagnostic WSIs (DICOM) was selected for experimentation. For clinical attributes, we employed age, gender and the clinical stage of the tumor. The survival outcome was predicted using the overall survival attribute and the performance of the predictive models was assessed by the concordance index (c-index) [ 15 ]. 2.2. AutoFusion-based transcriptome embedding generation The gene expression transcriptomic data , was initially investigated to filter out genes with ≥ 70% missing values across all samples, resulting in a concise G ∈ ℝ n × M ; M = 18375. Next, we z-score normalized G across genes to scale its mean and standard deviation to zero and one, respectively. Given a transcriptome vector g i ∈ ℝ M ; i ∈ [1, n ], we employed an autoencoder f a (·) (pretrained on all 522 OSCC cases) to generate a concise embedding vector . Specifically, a set of four embeddings was derived for g i , each aligned to the dimensionality of the respective computational pathology foundation model x utilized for WSI embedding generation (see section 2.3 .). Subsequently, a transcriptomic matrix for each x was constructed by stacking the respective generated by f a (·) for x . 2.3. ABMIL-based WSI embedding generation Initially, each WSI was partitioned into a (512 × 512 × 3) nonoverlapping patches at 40× magnification using the TIAToolbox [ 16 ] library. Only patches with tissue area greater than 20% (measured by white– gray pixel thresholding) were selected. The number of patches per WSI varied between 1,510 and 20,393. For a WSI W i ; i ∈ [1, n ], assume the set of patches . Each p j was subsequently subjected to each of the five computational pathology foundation models f x (·) to generate its vector embedding map where: Subsequently, an embedding matrix of dimension for each xx was constructed by stacking all the patch-level embeddings generated by x . Thereafter, we employed ABMIL to fuse into a single WSI-level embedding vector ABMIL initially utilizes a local attention mechanism on the patch-level and subsequently applies a pooling mechanism to generate a WSI-level embedding. For , the MIL pooling is computed as [ 13 ]: where: where and are parameters and ⊙ is tanh (·) function. Finally, a WSI-level matrix was generated for each x by stacking the respective WSI-level embeddings generated by x . 2.4. Pathotranscriptomic integration Two strategies were used to fuse the embeddings: cross-attention and concatenation. Cross-attention-based fusion For each x with dimension D x , given vectors and , we focused on designing an integration module robust enough to capture the intra-modal and cross-modal correlations. For this, we implemented a modified adaptation of the fusion mechanism proposed in [ 4 ] that utilizes the transformer attention [ 17 ] as the backbone. Mathematically, we initialized six vectors: three equal to h g and three equal to h w as: where q, k and ν denote the query,key and value vectors, respectively, essentially needed to compute the attention. For the pathotranscriptomic integration, we subsequently computed the vector-based self-attention and cross-attention values and defined a matrix Attn i as: where σ is the row-wise softmax. The q · k T captures the intramodality and cross-modality correlations and is mapped as: where Attn g → g and Attn W → W represent correlated attentions capturing intra-modal transcriptome-to-transcriptome and histopathology-to-histopathology interactions, respectively, while Attn g → W and Attn W → g represent correlated attentions capturing cross-modal transcriptome-to-histopathology and histopathology-to-transcriptome interactions, respectively. While Jaume et al . [ 4 ] modeled patch-to-patch interactions using Attn W → W and approximated it with −∞ in Equation (1) to reduce memory requirements, we explicitly computed this attention expression by utilizing the WSI-level embeddings. The matrix is thereafter normalized using LayerNorm modality-wise and subsequently concatenated to form a final embedding: Concatenation-based fusion We extended our methodology by employing a concatenation mechanism to integrate the vectors h g and h w . For this, we appended both the vectors to form a resultant vector: 2.5. Survival prediction experiments The computed embeddings Fused i and Concat i ; i ∈ [1, n ] for each x are combined with the clinical attributes of i . Subsequently, the Cox Proportional Hazard (CPH) model was utilized for survival prediction using the PyCox library [ 18 ]. The training was performed using stratified 5-fold cross-validation repeated across 100 different random seed values. Finally, the mean c-index with standard deviation across all seed values was computed for evaluation. 2.6. Implementation Autoencoder The autoencoder utilized in the AutoFusion module comprised two hidden encoder layers of sizes [4096, 2048], a bottleneck layer of size D x corresponding to the embedding dimension of model x and two hidden decoder layers of sizes [2048, 4096], respectively. Each hidden layer was cascaded with a batch-norm layer. We used tanh (·) for activation, mean-squared error for loss computation and Adam [ 19 ] for gradient optimization. The model was trained for 150 epochs with 1 e − 4 learning rate and 64 batch size. PyCox The PyCox-based CPH model was subjected to a vanilla multilayered perceptron (MLP) model with an input layer and a hidden layer of size D x and an output layer with a single node to output the predictive probability. The MLP model was trained for 500 epochs with early stopping. The optimizer used was AdamW [ 20 ] and the batch size was kept to 8. Hardware and Operating System The entire experimentation was performed on linux operating system installed on a high performance computing GPU server with a dedicated 40GB NVIDIA DGX A100 GPU, 2x AMD EPYC 7742 CPU and 2048GB RWM. 2.7. Ablation study We performed a series of ablation studies to evaluate our proposed framework. For a sample i ∈ [1, n ]: We concatenated full-scale transcriptome vector g i ∈ ℝ M with its corresponding WSI-level embeddings , we upscaled the WSI-level embeddings by employing linear interpolation with L2-normalization [ 21 ] to match the full-scale transcriptome vector g i and we concatenated the autofusion-based transcriptome embeddings with the WSI-level embeddings generated by two best pathology models: CTransPath and GigaPath 3. Results All benchmark results are provided in Table 1 . The unimodal and the multimodal approaches were evaluated on the basis of the mean c-index score (± standard deviation). The per-seed results are provided in the GitHub repository as supplementary materials. View this table: View inline View popup Download powerpoint Table 1. Evaluation of cross-attention-based pathotranscriptomic fusion against unimodal approaches for OSCC survival prediction. The results are reported based on mean c-index ± standard deviation. Overall, the fusion approach outperformed nearly all unimodal approaches, except GigaPath, where WSI-level embedding exhibited superior results. Further, the box-and-whisker plots in Figure 2 illustrate the comparison of cross-attention-based and concatenation-based pathotranscriptomic fusion with unimodal approaches. Download figure Open in new tab Figure 2. Box-and-whisker plots illustrating the comparison of c-index between cross-attention-based and concatenation-based pathotranscriptomic fusion approaches and unimodal (WSI and transcriptome) models. Except for GigaPath, both fusion mechanisms outperform individual modality-based survival prediction models. Each box-plot summarizes results obtained from 100 independent iterations of the experiment using different seed values. Overall, both integration strategies outperformed nearly all unimodal approaches. The highest performance was achieved with the cross-attention-based strategy (0.780±0.058), followed by the concatenation-based strategy (0.766±0.050), when combining CTransPath-based WSI features with autoencoder-based transcriptome embeddings. The only exception was the GigaPath model, where WSI-based embeddings exhibited slightly better predictive performance (0.762±0.059) than concatenation-based (0.759±0.056) and attention-based integration (0.756±0.059). The results underscore that histopathology and transcriptome profiles of OSCC patients can be complementarily integrated for better prediction. Table 2 summarizes the findings of ablation studies. In general, the concatenation of full-scale transcriptome with WSI-level embeddings yielded better results than upscaling WSI-level embeddings and concatenating them with full-scale transcriptome feature set or concatenating transcriptome embeddings with WSI-level embeddings generated by CTransPath and GigaPath. However, none of the ablation-based experiments outperformed the proposed cross-attention-based pathotranscriptomic fusion approach. View this table: View inline View popup Download powerpoint Table 2. Comparison of different ablation studies with the proposed cross-attention-based fusion approach. 4. Conclusion This study proposes a novel cross-attention-based pathotranscriptomic integration framework for OSCC profiles, designed to assist pathologists in achieving early and accurate survival prediction. By effectively integrating two complementary modalities - transcriptomic data and WSIs - the proposed framework exhibited superior predictive performance compared to unimodal approaches. These findings confirm that employing a cross-attention-based integration strategy increases the accuracy of survival prediction in OSCC. Data Availability All data produced in the present study are available upon reasonable request to the authors. https://github.com/kountaydwivedi/multimodal_fusion.git Compliance with ethical standards This study was conducted retrospectively, using publicly available data from the ethically approved TCGA program. Ethical approval for the present study was not required. Acknowledgements This work was supported by the Austrian Research Promotion Agency (FFG), project no. 895420. References [1]. ↵ Yunhan Tan , Zhihan Wang , Mengtong Xu , Bowen Li , Zhao Huang , Siyuan Qin , Edouard C Nice , Jing Tang , and Canhua Huang , “ Oral squamous cell carcinomas: state of the field and emerging directions ,” International journal of oral science , vol. 15 , no. 1 , pp. 44 , 2023 . OpenUrl CrossRef PubMed [2]. ↵ Ziye Xu , Manbin Xu , Zhichen Sun , Qin Feng , Shaowei Xu , and Hanwei Peng , “ A nomogram for predicting overall survival in oral squamous cell carcinoma: a seer database and external validation study ,” Frontiers in Oncology , vol. 15 , pp. 1557459 , 2025 . OpenUrl PubMed [3]. ↵ Andreas Vollmer , Stefan Hartmann , Michael Vollmer , Veronika Shavlokhova , Roman C Brands , Alexander Kübler , Jakob Wollborn , Frank Hassel , Sebastien Couillard-Despres , Gernot Lang , et al. , “ Multimodal artificial intelligence-based pathogenomics improves survival prediction in oral squamous cell carcinoma ,” Scientific reports , vol. 14 , no. 1 , pp. 5687 , 2024 . OpenUrl PubMed [4]. ↵ Guillaume Jaume , Anurag Vaidya , Richard J Chen , Drew FK Williamson , Paul Pu Liang , and Faisal Mahmood , “ Modeling dense multimodal interactions between biological pathways and histology for survival prediction ,” in Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition , 2024 , pp. 11579 – 11590 . [5]. Jared L Katzman , Uri Shaham , Alexander Cloninger , Jonathan Bates , Tingting Jiang , and Yuval Kluger , “ Deepsurv: personalized treatment recommender system using a cox proportional hazards deep neural network ,” BMC medical research methodology , vol. 18 , no. 1 , pp. 24 , 2018 . OpenUrl PubMed [6]. ↵ Xiyue Wang , Sen Yang , Jun Zhang , Minghui Wang , Jing Zhang , Wei Yang , Junzhou Huang , and Xiao Han , “ Transformer-based unsupervised contrastive learning for histopathological image classification ,” Medical image analysis , vol. 81 , pp. 102559 , 2022 . OpenUrl CrossRef PubMed [7]. ↵ Hanwen Xu , Naoto Usuyama , Jaspreet Bagga , Sheng Zhang , Rajesh Rao , Tristan Naumann , Cliff Wong , Zelalem Gero , Javier Gonzalez , Yu Gu , et al. , “ A whole-slide foundation model for digital pathology from real-world data ,” Nature , vol. 630 , no. 8015 , pp. 181 – 188 , 2024 . OpenUrl CrossRef PubMed [8]. ↵ Dmitry Nechaev , Alexey Pchelnikov , and Ekaterina Ivanova , “ Hibou: A family of foundational vision transformers for pathology ,” arXiv preprint arXiv: 2406.05074 , 2024 . [9]. ↵ Eugene Vorontsov , Alican Bozkurt , Adam Casson , George Shaikovski , Michal Zelechowski , Kristen Severson , Eric Zimmermann , James Hall , Neil Tenenholtz , Nicolo Fusi , Ellen Yang , Philippe Mathieu , Alexander van Eck , Donghun Lee , Julian Viret , Eric Robert , Yi Kan Wang , Jeremy D. Kunz , Matthew C. H. Lee , Jan H. Bernhard , Ran A. Godrich , Gerard Oakley , Ewan Millar , Matthew Hanna , Hannah Wen , Juan A. Retamero , William A. Moye , Razik Yousfi , Christopher Kanan , David S. Klimstra , Brandon Rothrock , Siqi Liu , and Thomas J. Fuchs , “ A foundation model for clinical-grade computational pathology and rare cancers detection ,” Nature Medicine , 2024 . [10]. ↵ Richard J Chen , Tong Ding , Ming Y Lu , Drew FKWilliamson , Guillaume Jaume , Bowen Chen , Andrew Zhang , Daniel Shao , Andrew H Song , Muhammad Shaban , et al. , “ Towards a general-purpose foundation model for computational pathology ,” Nature Medicine , 2024 . [11]. ↵ Kountay Dwivedi , Ankit Rajpal , Sheetal Rajpal , Manoj Agarwal , Virendra Kumar , and Naveen Kumar , “ An explainable ai-driven biomarker discovery framework for non-small cell lung cancer classification ,” Computers in Biology and Medicine , vol. 153 , pp. 106544 , 2023 . OpenUrl CrossRef PubMed [12]. ↵ Thomas G Dietterich , Richard H Lathrop , and Tomás Lozano-Pérez , “ Solving the multiple instance problem with axis-parallel rectangles ,” Artificial intelligence , vol. 89 , no. 1-2 , pp. 31 – 71 , 1997 . OpenUrl CrossRef [13]. ↵ Maximilian Ilse , Jakub Tomczak , and Max Welling , “ Attention-based deep multiple instance learning ,” in International conference on machine learning . PMLR , 2018 , pp. 2127 – 2136 . [14]. ↵ Ethan Cerami , Jianjiong Gao , Ugur Dogrusoz , Benjamin E Gross , Selcuk Onur Sumer , Bülent Arman Aksoy , Anders Jacobsen , Caitlin J Byrne , Michael L Heuer , Erik Larsson , et al. , “ The cbio cancer genomics portal: an open platform for exploring multidimensional cancer genomics data ,” Cancer discovery , vol. 2 , no. 5 , pp. 401 – 404 , 2012 . OpenUrl Abstract / FREE Full Text [15]. ↵ Frank E Harrell , Robert M Califf , David B Pryor , Kerry L Lee , and Robert A Rosati , “ Evaluating the yield of medical tests ,” Jama , vol. 247 , no. 18 , pp. 2543 – 2546 , 1982 . OpenUrl CrossRef PubMed Web of Science [16]. ↵ Johnathan Pocock , Simon Graham , Quoc Dang Vu , Mostafa Jahanifar , Srijay Deshpande , Giorgos Hadjigeorghiou , Adam Shephard , Raja Muhammad Saad Bashir , Mohsin Bilal , Wenqi Lu , David Epstein , Fayyaz Minhas Nasirmrajpoot and Shan E Ahmed Raza , “ TIAToolbox as an end-to-end library for advanced tissue image analytics ,” Communications Medicine , vol. 2 , no. 1 , pp. 120 , sep 2022 . OpenUrl PubMed [17]. ↵ Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , Lukasz Kaiser , and Illia Polosukhin , “ Attention is all you need ,” Advances in neural information processing systems , vol. 30 , 2017 . [18]. ↵ Håvard Kvamme , Ørnulf Borgan , and Ida Scheel , “ Time-to-event prediction with neural networks and cox regression ,” Journal of Machine Learning Research , vol. 20 , no. 129 , pp. 1 – 30 , 2019 . OpenUrl [19]. ↵ Kingma DP Ba J Adam et al. , “ A method for stochastic optimization ,” arXiv preprint arXiv: 1412.6980 , vol. 1412 , no. 6 , 2014 . [20]. ↵ Ilya Loshchilov and Frank Hutter , “ Decoupled weight decay regularization ,” arXiv preprint arXiv: 1711.05101 , 2017 . [21]. ↵ Byungsoo Ko and Geonmo Gu , “ Embedding expansion: Augmentation in embedding space for deep metric learning ,” in Proceedings of the IEEE/CVF conference on computer vision and pattern recognition , 2020 , pp. 7255 – 7264 . View the discussion thread. Back to top Previous Next Posted November 03, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following A multimodal cross-attention pathotranscriptome integration for enhanced survival prediction of oral squamous cell carcinoma Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share A multimodal cross-attention pathotranscriptome integration for enhanced survival prediction of oral squamous cell carcinoma Kountay Dwivedi , Amirreza Mahbod , Rupert C. Ecker , Klara Janjić medRxiv 2025.10.31.25339218; doi: https://doi.org/10.1101/2025.10.31.25339218 Share This Article: Copy Citation Tools A multimodal cross-attention pathotranscriptome integration for enhanced survival prediction of oral squamous cell carcinoma Kountay Dwivedi , Amirreza Mahbod , Rupert C. Ecker , Klara Janjić medRxiv 2025.10.31.25339218; doi: https://doi.org/10.1101/2025.10.31.25339218 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Oncology Subject Areas All Articles Addiction Medicine (570) Allergy and Immunology (863) Anesthesia (301) Cardiovascular Medicine (4442) Dentistry and Oral Medicine (444) Dermatology (383) Emergency Medicine (609) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1511) Epidemiology (15231) Forensic Medicine (30) Gastroenterology (1126) Genetic and Genomic Medicine (6610) Geriatric Medicine (668) Health Economics (998) Health Informatics (4542) Health Policy (1370) Health Systems and Quality Improvement (1613) Hematology (543) HIV/AIDS (1266) Infectious Diseases (except HIV/AIDS) (15924) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (147) Nephrology (668) Neurology (6608) Nursing (346) Nutrition (999) Obstetrics and Gynecology (1146) Occupational and Environmental Health (957) Oncology (3338) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1693) Pharmacology and Therapeutics (692) Primary Care Research (712) Psychiatry and Clinical Psychology (5450) Public and Global Health (9240) Radiology and Imaging (2203) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (596) Sexual and Reproductive Health (714) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a020a6617a47f047',t:'MTc3OTgzOTA4OA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00