PAH-former: Transfer Learning for Efficient Discovery of Pulmonary Arterial Hypertension-Associated Genes

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 50,907 characters · extracted from preprint-html · click to expand
PAH-former: Transfer Learning for Efficient Discovery of Pulmonary Arterial Hypertension-Associated Genes | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results PAH-former: Transfer Learning for Efficient Discovery of Pulmonary Arterial Hypertension-Associated Genes Toshinaru Kawakami , Sosuke Hosokawa , Masamichi Ito , Atsumasa Kurozumi , Ryohei Tanaka , Shun Minatsuki , Junichi Ishida , Takayuki Isagawa , Satoshi Kodera , Norihiko Takeda doi: https://doi.org/10.1101/2025.06.16.660027 Toshinaru Kawakami 1 Department of Cardiovascular Medicine, Graduate School of Medicine, The University of Tokyo , Tokyo, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sosuke Hosokawa 1 Department of Cardiovascular Medicine, Graduate School of Medicine, The University of Tokyo , Tokyo, Japan 2 Department of Information and Communication Engineering, Graduate School of Information Science and Technology, The University of Tokyo , Tokyo, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Masamichi Ito 1 Department of Cardiovascular Medicine, Graduate School of Medicine, The University of Tokyo , Tokyo, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: mitou.tky{at}gmail.com Atsumasa Kurozumi 1 Department of Cardiovascular Medicine, Graduate School of Medicine, The University of Tokyo , Tokyo, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ryohei Tanaka 1 Department of Cardiovascular Medicine, Graduate School of Medicine, The University of Tokyo , Tokyo, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Shun Minatsuki 1 Department of Cardiovascular Medicine, Graduate School of Medicine, The University of Tokyo , Tokyo, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Junichi Ishida 1 Department of Cardiovascular Medicine, Graduate School of Medicine, The University of Tokyo , Tokyo, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Takayuki Isagawa 3 Division of Bioconvergence, Center for Molecular Medicine, Jichi Medical University , Tochigi, Japan 4 Data Science Center, Jichi Medical University , Tochigi, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Satoshi Kodera 1 Department of Cardiovascular Medicine, Graduate School of Medicine, The University of Tokyo , Tokyo, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: mitou.tky{at}gmail.com Norihiko Takeda 1 Department of Cardiovascular Medicine, Graduate School of Medicine, The University of Tokyo , Tokyo, Japan 3 Division of Bioconvergence, Center for Molecular Medicine, Jichi Medical University , Tochigi, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Single-cell RNA sequencing (scRNA-seq) of patient samples holds promise for understanding disease mechanisms, but faces the challenge of excessive cost and effort in acquisition, processing, and data analysis, making it essential to leverage existing data. Pulmonary artery hypertension (PAH) is a refractory disease characterized by pulmonary vascular remodeling, and access to patient specimens is limited due to difficulties in tissue collection. In this study, we employed transfer learning with Geneformer, a deep learning algorithm pre-trained with scRNA-seq datasets and fine-tuned it with public PAH lung tissue data to identify the disease-relevant genes. The resulting algorithm, which we named PAH- former, demonstrated that its prediction accuracy varied significantly depending on the dataset used for fine-tuning. PAH-former enabled us to perform in silico perturbation analysis and identified PAH related genes. Loss-of-function PAH related genes in human pulmonary artery endothelial cells increased the expression of SOX18 , a signature gene of PAH. This integration of artificial intelligence and biological experiments can significantly advance our understanding of molecular mechanisms of PAH. Introduction PAH is a severe and complex disease marked by high pulmonary arterial pressure, causing right heart failure and death 1 . Pulmonary vascular remodeling is a common and critical pathogenic feature of PAH. This process, considered largely irreversible, involves the dysfunction of endothelial cells (ECs), vascular smooth muscle cells, and fibroblasts, as well as the participation of immune cells 2 . However, effective treatments are lacking due to its rarity and the absence of accurate in vivo models mimicking human disease. Identifying drug targets is thus challenging, often requiring lengthy knockout animal validation. Moreover, understanding PAH fully requires cell-specific analysis to clarify each cell type’s molecular roles. Therefore, innovative methods are essential to overcome these hurdles and discover effective PAH therapy targets. Notably, SRY-Box Transcription Factor 18 (SOX18) , a key transcription factor, has been identified as a signature gene whose expression is significantly upregulated in PAH, particularly in endothelial cells. Its involvement in angiogenesis and endothelial function highlights its importance in PAH pathogenesis, making it a valuable indicator of disease-like cellular states 3 , 4 . scRNA-seq technologies have emerged as powerful tools for dissecting complex diseases, offering unprecedented resolution to identify key genes and pathways within individual cell populations. However, analyzing these vast datasets and translating them into useful insights is not straightforward. One major hurdle lies in the absence of robust algorithms for prioritizing candidate disease genes from the extensive lists generated by differential gene expression (DEG) analysis. The selection process often relies on subjective criteria and the specific databases employed, potentially introducing bias and limiting reproducibility. Furthermore, DEG analysis alone does not guarantee the identified genes are causally implicated in the disease pathogenesis. In many rare diseases like PAH, obtaining large cohorts of human samples for comprehensive validation remains a significant bottleneck due to the severity of the disease and the technical difficulties of tissue collection. Geneformer 5 is a foundational transformer model pre-trained on a large-scale corpus of single-cell transcriptomes to enable context-aware predictions in network biology. It was originally trained on approximately 30 million single-cell transcriptomes in June 2021, and later expanded to about 95 million transcriptomes in April 2024. Some researchers have already applied Geneformer to a variety of downstream tasks. Mellors et al. fine-tuned it with bulk tumor gene expression data and proposed a novel transformer model predicting the tissue of origin for cancers 6 . Chen et al developed a model to predict tumor-restricting factors in the colorectal tumor microenvironment using a cancer-tuned Geneformer and in silico treatment analysis 6 , 7 . Wang et al. constructed context-specific brain gene regulatory networks. They fine-tuned Geneformer with brain single nucleus RNA-seq data and conducted in silico gene perturbation studies. They further applied these networks to the study of autism spectrum disorder 8 . Geneformer, with its ability to learn complex gene expression patterns and relationships from massive transcriptomic data in various conditions, offers an unbiased and robust approach to identify disease associated genes beyond DEG analysis. Although applications in understanding clonal pathologies such as cancer have been reported, there is no precedent for its use in elucidating complex systemic conditions like cardiovascular diseases. This is due to the lack of sufficient published data sets for fine- tuning in this area and the absence of studies that have experimentally validated the application of Geneformer. In this study, to overcome these challenges and improve scRNA-seq data analysis quality, we built a novel platform based on Geneformer (PAH-former) and trained it based on public data of PAH. We also tested the effectiveness of addition of datasets in improving prediction accuracy and validated the established models by in vitro experiments. Our approach not only avoids the limitations of traditional DEG analysis based methods but also demonstrates the broader applicability of Geneformer based fine-tuning as a powerful strategy for identifying disease associated genes. This study leverages our novel platform to identify and validate new disease associated genes of PAH, promising to advance our understanding of cell specific disease mechanisms and pave the way for novel therapeutic strategies. Results We conducted fine-tuning of Geneformer by public scRNA-seq analysis data to create “PAH-former”, which can efficiently detect PAH associated genes ( Fig. 1A ). PAH-former was trained using publicly available idiopathic pulmonary arterial hypertension (IPAH) datasets and can be utilized for various downstream analyses, such as cell type prediction and in silico perturbation. Single-cell data of IPAH is very limited and we primarily utilized data from GSE169471. First, we re-performed clustering and cell type annotation using the raw data from GSE169471 with CellTypist v2.0 9 . As a result, we achieved clustering and cell type annotation comparable to the t-SNE presented in the original paper 3 ( Fig. 1B ). We proceeded to map the cells, distinguishing between PAH and healthy groups. Our analysis revealed that each cluster contained cells from both control and PAH, aligning with the results presented in the original paper ( Fig 1C ). As will be discussed later, the original paper demonstrated upregulated expression of the transcription factor SOX18 in endothelial cells of the PAH group. When we mapped the expression levels of SOX18, we similarly observed its selective expression in endothelial cells ( Fig 1D ). These results collectively indicate the accuracy of our clustering and annotation. Download figure Open in new tab Figure 1. Public single cell RNA-seq data of pulmonary artery hypertension lung and its reanalysis. (A) Schematic of the PAH-former development pipeline. We created a new AI tool that understands the gene expression network of pulmonary arterial hypertension (PAH) by fint- tuning Geneformer, a transfer learning tool that has been trained on 1 billion single-cell analysis data, with publicly available PAH single-cell analysis data. The aim is to extract the genes that are involved in PAH. scRNA-seq,single-cell RNA sequencing; t-SNE, t-distributed stochastic neighbor embedding; PAH, pulmonary arterial hypertension. (B) t-SNE plot showing cell type prediction. Cell type annotation of the GSE169471 data was performed using CellTypist v2.0. t-SNE, t-distributed stochastic neighbor embedding. (C) t-SNE plot showing sample type distribution. CON,control; IPAH, idiopathic pulmonary artery hypertension; t-SNE, t-distributed stochastic neighbor embedding. (D) t-SNE plot visualizing SOX18 expression levels. t-SNE, t-distributed stochastic neighbor embedding. Training Dataset Setup According to Geneformer’s in silico perturbation of cardiomyocytes in the original paper, the training data included 93,589 cardiomyocytes (non-failing, n = 9; hypertrophic, n = 11; dilated, n = 9); the test data consisted of 39,006 cardiomyocytes (non-failing, n = 4; hypertrophic, n = 4; dilated, n = 2) 5 . However, publicly available IPAH data is very limited, and its quality also varies, making it potentially difficult to secure a sufficient amount for fine-tuning of Genformer for IPAH. While using large datasets for fine-tuning carries a risk of overfitting, it has been reported that fine-tuning can be performed efficiently even with small datasets 10 , 11 . We compared three distinct training approaches and found that the inclusion of large control data from Human Lung Cell Atlas (HLCA) significantly enhanced the model’s accuracy and F1 score in cell classification. Figure 2A presents a table outlining the datasets used to train the three fine-tuning models (model A, model B, and model C) evaluated in this study. Model A was fine-tuned using only data from GSE169471, which training data included 6 samples, (control = 4 samples, 5106 cells, IPAH = 2 samples, 6514 cells). Model A exhibited a very poor performance, achieved an accuracy of 0.523 and an F1 score of 0.522 ( Fig. 2B, C ). Download figure Open in new tab Figure 2 . Dataset selection for the fine-tuning of Geneformer. (A) Datasets used for Geneformer model training. This table outlines the composition of training data for each model. HLCA, human Lung Cell Atlas. (B, C) Prediction likelihood heatmap for cell classification and confusion matrix of Model A. (D, E) Prediction likelihood heatmap for cell classification and confusion matrix of Model B. (F, G) Prediction likelihood heatmap for cell classification and confusion matrix of Model C. CON,control; IPAH, idiopathic pulmonary artery hypertension. Model B augmented the GSE169471 dataset with a large quantity of healthy control cells sampled from HLCA (102 samples, randomly sampled 10,000 cells). Model B demonstrated a dramatic improvement in performance, achieving an impressive accuracy of 0.847 and an F1 score of 0.748. This substantial increase highlights the effectiveness of augmenting training data with large quantities of relevant control cells to better define the baseline healthy state ( Fig. 2D, E ). Model C represents an extended training approach, incorporating additional IPAH data from other GEO datasets (GSE210248, GSE185479; total 8 samples, randomly sampled 6,000 cells) along with GSE169471 and sampled HLCA data. Model C showed an accuracy of 0.774 and an F1 score of 0.733 (Fig. F, G). While superior to Model A, its performance was slightly lower than that of Model B. This observation suggests that for this specific task, the quality of control data from HLCA had a more significant positive impact than simply increasing the number of IPAH data. In silico perturbation analysis To investigate the impact of specific genes on cell state in PAH, we performed in silico deletion and overexpression analyses using Geneformer fine-tuned by PAH scRNA-seq data (GSE169471) ( Fig. 3A ). To train Geneformer on scRNA-seq data, we performed rank value encoding, a method of ranking all genes in descending order based on their expression levels. In silico deletion refers to removing a specific gene from a list, while in silico overexpression refers to setting the rank of that gene to the first position. We conducted in silico perturbation analysis to investigate the directional shifts in cell state (cell embedding) following the deletion or overexpression of individual genes. Specifically, we assessed whether the cell state shifted from a healthy phenotype towards a PAH phenotype, or conversely, from the PAH phenotype towards the healthy phenotype. This analysis encompassed four distinct perturbation scenarios. As a result, we generated lists of putative disease associated genes for each of the four scenarios, which included genes previously implicated in the disease. Herein, we report the top 40 candidate genes identified for each scenario, along with the corresponding results of GO analysis of genes in each list ( Fig. 3B - E). Download figure Open in new tab Figure 3. In silico perturbation using PAH-former and extraction of disease-related genes. (A) The workflow for in silico perturbation analysis using the fine-tuned Geneformer model (PAH-former). In silico manipulation (deletion or overexpression) results in shifts in cell embedding (representing cell state). IPAH, idiopathic pulmonary artery hypertension. (B-E) Gene Ontology (GO) analysis of candidate genes identified by in silico perturbation analysis by PAH-former and top 40 genes that shift cell embedding most for each of the four directions. (B) Gene Ontology (GO) analysis for candidate genes whose in silico deletion shifts the cell state towards IPAH. (C) Gene Ontology (GO) analysis for candidate genes whose in silico overexpression shifts the cell state towards control. (D) Gene Ontology (GO) analysis for candidate genes whose in silico overexpression shifts the cell state towards IPAH. (E) Gene Ontology (GO) analysis for candidate genes whose in silico deletion shifts the cell state towards control. The number of candidate genes extracted by the fine-tuned Geneformer that shifts cell embedding from control to PAH state after in silico deletion were 134 (Supplementary Table X). Among the identified genes, while some were previously reported, the majority of them were novel. Previously reported genes included HMGB2 (high-mobility group box 2). HMGB2 is upregulated in PAH, and it is mentioned as a significant contributor to the pathogenesis of pulmonary hypertension by promoting inflammation and vascular remodeling 12 . In addition, SOD2 (superoxide dismutase 2) was also on the gene list, and its tissue specific, epigenetic downregulation initiates and sustains PAH by impairing redox signaling and promoting a proliferative, apoptosis-resistant pulmonary artery smooth muscle cell phenotype 13 . This result is consistent with the cell embedding shifting from control to PAH state through in silico deletion. As described above, in silico deletion following fine- tuned Geneformer successfully identified a range of PAH related genes, including previously known genes. Notably, many genes in this list were not previously linked to pulmonary hypertension, suggesting them as novel candidates for exploring disease related molecular functions and pathways. There was minimal overlap between the gene set identified by our fine-tuned Geneformer and that derived from the DEG analysis in the original article, with only two genes being common to both lists ( Fig 4A , 4B). Download figure Open in new tab Figure 4. In Silico perturbation in pulmonary endothelial cells using PAH-former and in vitro validation. (A) Table showing partial lists of genes identified in endothelial cells by Differential Gene Expression (DEG) analysis (FDR < 10%) in the original article and genes enriched in the GO (gene ontology) pathways of blood vessel development pathway and cardiovascular development pathway. (B) Venn diagram comparing the number of candidate genes identified by Differential Gene Expression (DEG) analysis in the original article and the number of disease-associated candidate genes identified by PAH-former. PAH, pulmonary arterial hypertension. (C) Knockdown experiments of selected candidate genes ( S100A6, HSP90AA1, TXNIP, and MT2A ) using siRNA in Human Pulmonary Artery Endothelial Cells (HPAECs). Top panels show the knockdown efficiency of each target gene relative to control siRNA, displaying relative mRNA expression levels at 48 hours and indicating successful knockdown (data normalized to GAPDH expression, n = 9). Bottom panels show the relative mRNA expression levels of SOX18 at 48 hours after knockdown of each candidate gene compared to control siRNA (data normalized to GAPDH expression, n = 9). Graphs are presented as mean ± standard error of the mean (SEM). Statistical significance is indicated as: * p < 0.05, ** p < 0.01, **** p < 0.0001, ns: not significant. Enrichment analysis Enrichment analysis of 134 candidate genes that shifted cell embedding from control to PAH state after in silico deletion highlighted the enrichment in TNF-α/NF-κB signaling, regulation of inflammatory response, oxidative stress response, and VEGFA/VEGFR2 signaling pathways. These findings indicate that our approach successfully identified pathways associated with PAH using PAH-former. Target Gene Knockdown Among the genes listed as PAH-related, we identified four genes ( S100 Calcium Binding Protein A6 [S100A6], Heat Shock Protein 90 Alpha Family Class A Member 1 [HSP90AA1], Thioredoxin Interacting Protein [TXNIP], and Metallothionein 2A [MT2A] ) that have not been previously reported in association with PAH. In the original paper on which our dataset is based, several transcription factors, particularly SOX18 , were found to be upregulated and implicated in regulating the PAH endothelial cell transcriptome. SOX18 is also reported to increase the expression of nicotinamide phosphoribosyltransferase (NAMPT) and is shown to be involved in the pathophysiology of PAH via NAMPT 4 . Therefore, in order to determine whether the cellular state was approaching a PAH-like state after knockdown of candidate genes found by PAH-former, we chose to compare mRNA expression of SOX18 . We performed a knockdown experiment of the four candidate genes using RNA interference with human pulmonary arterial endothelial cells (HPAECs). For each gene targeted, the knockdown was successful. Interestingly, the knockdown of three of these four genes resulted in a significant increase in SOX18 expression 48 hours after siRNA transfection ( Fig. 4C ), ensuring the validity of the candidates extracted by PAH-former. Discussion It has been demonstrated that disruption of certain genes can lead to the development of phenotypes indicative of PAH. These genes include Prolyl Hydroxylase Domain-Containing Protein 2 (PHD2), GATA Binding Protein 6 (GATA-6), Bone Morphogenetic Protein Receptor Type 2 (BMPR2), Tet Methylcytosine Dioxygenase 2 (TET2), NLR Family CARD Domain Containing 3 (NLRC3), and AMP-Activated Protein Kinase (AMPK) 14 – 19 . BMPR2 mutations, for example, cause PAH through the mechanisms such as endothelial dysfunction, smooth muscle cell abnormalities, mitochondrial dysfunction, and inflammatory responses 20 – 22 . We reanalysed the open-source single cell RNA-seq data of IPAH 3 . Based on the findings by Saygin et al. , the expression level of the transcription factor SOX18 was significantly upregulated in endothelial cells from IPAH patients. SOX18 is known to be involved in angiogenesis and the regulation of endothelial barrier function, suggesting its critical role in IPAH pathogenesis 23 – 25 . Sun et al. elucidated the complex regulatory network governing NAMPT expression in PAH and found that SOX18 plays an important role alongside STAT5, SOX17, and HIF-2α. SOX18, while exhibiting context-dependent effects, is a key regulator of VEGF-induced NAMPT promoter activity, and was shown to act in a manner that is opposite to SOX17 4 . Therefore, we decided to use SOX18 mRNA expression to assess whether the cellular state was approaching an IPAH-like state following knockdown of candidate genes identified by PAH-former. The candidate genes were S100A6 , HSP90AA1 , TXNIP and MT2A . Although these genes have not been previously reported in association with PAH, their known biological functions offer plausible connections to disease mechanisms.For instance, S100A6 (also called Calcyclin, Cacy ) is a Ca2+-binding protein involved in cell proliferation and stress, with previous research suggesting its role in regulating antiproliferative pathways and potential interactions with proteins like Calcyclin-Binding Protein and Siah-1 Interacting Protein (CacyBP/SIP) in PAH models, hypothetically impacting vascular remodeling 26 – 28 . HSP90AA1 plays a crucial role in maintenance of endothelial nitric-oxide synthase (eNOS) dimer stability in pulmonary arterial endothelial cells and is an upregulated immune-related gene in PAH, implying its involvement in endothelial dysfunction and inflammatory processes 29 , 30 . TXNIP mediates oxidative stress by inhibiting thioredoxin activity, a system linked to PAH progression, suggesting that its knockdown could enhance pro-PAH conditions 31 – 34 . Lastly, MT2A, a member of the metallothionein family, possesses antioxidant properties and is elevated in PAH patients, indicating its potential role as a biomarker and a defense against oxidative stress; its deletion might compromise this protection, leading to PAH-like pathology35. Although these genes have entirely distinct functions, the common upregulation of SOX18 observed upon their knockdown in HPAECs is particularly noteworthy. This consistency across multiple novel genes suggests that our PAH-former approach successfully identified candidates that influence a key PAH-associated cellular phenotype. In this way, fine-tuning Geneformer using the PAH dataset and subsequently conducting in silico perturbation analysis enabled us to identify a significant number of previously unknown disease- associated genes in PAH. Our approach provides a significant benefit over conventional DEG analysis by finding genes that causally affect cellular states, rather than simply indicating changes in expression levels. This ability to pinpoint functionally relevant genes, even from limited patient samples, demonstrates the unique utility of a transfer learning framework like PAH-former. While further validation is required, this approach offers a comprehensive means to explore therapeutic targets and has the potential to enhance the efficiency of fundamental experiments, reducing the effort and cost required for molecular function experiments. Moreover, the Geneformer and public database combination (fine- tuning) represents a promising new platform applicable to a wide range of diseases, particularly rare diseases where patient samples are scarce and the underlying pathological mechanisms are poorly understood. We believe it can greatly accelerate the advancement of our understanding of disease molecular mechanisms. While our platform demonstrated promising capabilities, several limitations warrant consideration. First, the gene outputs are inherently dataset dependent. Different datasets, even those examining similar biological contexts, may exhibit variations in gene expression profiles, potentially leading to discrepancies in the identified key genes or pathways. Second, the fine-tuning process of our model is sensitive to the choice of fine-tuning datasets and hyperparameters, such as the learning rate. Variations in these parameters can lead to different model outputs and potentially affect the robustness of our findings. Third, our study’s focus on SOX18 mRNA expression as the primary validation metric is a limitation. While we have analyzed the expression levels of SOX1 8, we have not yet validated whether these expression changes are directly linked to corresponding changes in cellular phenotypes relevant to PAH. Finally, we must acknowledge the multifactorial nature of PAH. Our gene-centric approach, while informative, may not fully capture the complexity of PAH, which likely involves intricate interactions of multiple genetic and environmental factors beyond single gene mutations, as well as cell-cell interaction change within the organs. In conclusion, our novel Geneformer-based fine-tuning platform provides a powerful and broadly applicable strategy for disease-related gene discovery. This approach enables the identification and validation of new candidate genes, promising to advance cell-specific mechanistic insights and efficient therapeutic development for PAH. Methods Creating IPAH Dataset Datasets utilized for fine-tuning and testing the Geneformer model were acquired from a publicly available database in the NCBI Gene Expression Omnibus (GEO). Specifically, we included datasets GSE169471 (Six control samples, three idiopathic pulmonary arterial hypertension [IPAH] samples), GSE210248 (three control samples, three IPAH samples), and GSE185479 (three control samples, three IPAH samples). Additionally, we incorporated a subset of the integrated Human Lung Cell Atlas (HLCA) v1.0 core dataset, specifically selecting samples annotated as lung parenchyma to augment the control dataset. Original datasets were obtained in ScanPy AnnData (h5ad) format. Quality control (QC) for GSE169471, GSE210248, and GSE185479 was conducted using the following criteria: total gene counts per cell ranging between 200 and 2500, and mitochondrial gene content below 5%. The HLCA dataset was used without further QC, as it was provided in a pre-processed format. For compatibility with Geneformer tokenization requirements, genes in the GSE210248 and GSE185479 datasets were annotated with their corresponding Ensembl IDs using the MyGene library. The GSE169471 and HLCA datasets already contained Ensembl IDs and were used directly without further modification. The prepared datasets were partitioned into three distinct groupings for downstream analysis: GSE169471 only: four control samples and two IPAH samples for training, and two control samples and one IPAH sample for testing. GSE169471 combined with HLCA: four control samples from GSE169471 plus 102 control samples from HLCA, and two IPAH samples for training; two control samples from GSE169471 plus five control samples from HLCA, and one IPAH sample for testing. Combined datasets of GSE169471, GSE210248, GSE185479, and HLCA: For training, four control samples from GSE169471, two control samples each from GSE210248 and GSE185479, plus 102 control samples from HLCA; and two IPAH samples each from GSE169471, GSE210248, and GSE185479. For testing, two control samples from GSE169471, one control sample each from GSE210248 and GSE185479, plus five control samples from HLCA; and one IPAH sample each from GSE169471, GSE210248, and GSE185479. Fine-tuning of Geneformer Fine-tuning was performed to classify PAH versus control cells by leveraging the Geneformer model pre-trained on extensive transcriptional data. Specifically, we obtained the gf-12L-95M-i4096 (12-layer Transformer block, 4,096-token maximum sequence length) model from the ctheodoris/Geneformer repository on the Hugging Face Hub. For implementation, we used PyTorch along with the Hugging Face Transformers library and executed training on an H100 GPU (NVIDIA). We adapted the pre-training setup, which initially employed a masked token prediction head, by replacing it with a sequence classification head suitable for the binary classification task (PAH vs. control). The fine-tuning hyperparameters were set as follows: a learning rate of 2×10 −5 , a batch size of 64, a cosine scheduler with 100 warmup steps, and a total of eight training epochs. To mitigate overfitting, the lower four Transformer layers remained frozen during training, thereby focusing updates on the upper layers while preserving the foundational representational capacity learned during pre-training. Three separate models were created, each corresponding to one of the dataset partitions (A, B, and C) described above. These models are referred to as model A, model B, and model C, respectively. Each model was fine-tuned independently using its respective training split and evaluated on the corresponding test set to assess its classification performance. In silico perturbation In silico perturbation was conducted on models A, B, and C, following the approach described previously in the Geneformer study. Briefly, this method perturbed the gene expression ranking to simulate gene inhibition or activation within single-cell transcriptomes. Genes targeted for perturbation were comprehensively selected from those expressed in both control and PAH samples. In silico deletion was simulated by removing targeted genes from the rank encoding, measuring perturbation effects via cosine similarity changes in both cell-level and gene-level embeddings. Conversely, in silico overexpression was simulated by moving the targeted genes to the top of the rank encoding, modeling the activation of these genes. Perturbations were executed using the test splits of datasets corresponding to each model. Two scenarios were explored: perturbations transitioning from control to PAH states, and vice versa, each involving both in silico deletion and overexpression strategies. Genes exhibiting a false positive rate below 0.05 and demonstrating a decreased cosine similarity toward the target state upon perturbation were considered promising candidates. Enrichment analysis To identify potential driver mechanisms underlying PAH pathogenesis, we applied Metascape 36 to genes extracted by our PAH-former. The results of each perturbation output by Geneformer were ranked by the cosine shift towards the goal state ("Shift_to_goal_end") (largest first) and the False Discovery Rate (FDR) (smallest first). We defined genes with a positive "Shift_to_goal_end" and an FDR < 0.05 as candidate disease-related genes. Candidate genes whose in silico deletion shifted the cell state towards PAH were 134 genes in total and we used all of them for enrichment analysis. Candidate genes of other directions were more than 200 genes. For enrichment analysis, we used the top 200 genes for enrichment analysis. Target Gene Knockdown using RNA Interference HPAECs were commercially obtained (PromoCell, C-12241) and handled according to the provider’s instructions. The cells were seeded in 96-well plates at a density of 2,400 cells/well in complete growth medium without antibiotics and incubated overnight. For in vitro knockdown experiment, the following siRNAs were purchased from Thermo Fisher Scientific: Silencer™ Select siRNAs for S100A6 (ID: s12418), TXNIP (ID: s12418), HSP90AA1 (ID: s6993), and MT2A (ID: s194629). siRNA was diluted in Opti-MEM I Reduced Serum Medium (Thermo Fisher Scientific, 31985070). For negative control, Silencer™ Select Negative Control No. 2 siRNA (Thermo Fisher Scientific, 4390846) stock solution was diluted with Opti-MEM I. Lipofectamine RNAiMAX reagent (Thermo Fisher Scientific, 13778150) was diluted with Opti-MEM. Equal volumes of diluted siRNA and diluted Lipofectamine RNAiMAX were combined and incubated at room temperature for 15 minutes to allow siRNA- Lipofectamine RNAiMAX complex formation. The volume of diluted Lipofectamine RNAiMAX solution was adjusted to use 0.2 µL of Lipofectamine RNAiMAX per well. The final concentration of each siRNA was 20 nM. Subsequently, siRNA-Lipofectamine RNAiMAX complexes were added to the cells. Cells were incubated with the complexes in a final volume of 120 µL per well at 37°C in a 5% COLJ incubator for 48 hours post-transfection. Gene knockdown efficiency was evaluated at the indicated time points by quantitative PCR using QuantStudio 6 Flex Real-Time PCR System (Thermo Fisher Scientific). SOX18 mRNA levels were evaluated for each condition at the same time. Data was normalized to the GAPDH expression level. The primer sequence used for the quantitative PCR analysis was as follows: View this table: View inline View popup Statistical information All quantitative data are presented as mean ± standard deviation (SD). Statistical significance of differences between two groups was determined using a two-tailed Mann- Whitney U test. A P value of less than 0.05 was considered statistically significant. Specifically, for the RNA interference experiments, the mRNA expression levels of target genes ( S100A6, HSP90AA1, TXNIP, MT2A ) and SOX18 in HPAECs following siRNA- mediated knockdown were compared against control siRNA-treated cells. For the knockdown efficiency assessment: S100A6 knockdown: Control (n = 9) vs. S100A6 KD (n = 9), P < 0.0001. HSP90AA1 knockdown: Control (n = 9) vs. HSP90AA1 KD (n = 9), P < 0.0001. TXNIP knockdown: Control (n = 9) vs. TXNIP KD (n = 9), P < 0.0001. MT2A knockdown: Control (n = 9) vs. MT2A KD (n = 9), P < 0.0001. For the assessment of SOX18 mRNA expression changes following knockdown of candidate genes: S100A6 knockdown: Control (n = 3) vs. S100A6 KD (n = 8), P = 0.0485. HSP90AA1 knockdown: Control (n = 3) vs. HSP90AA1 KD (n = 9), P = 0.0091. TXNIP knockdown: Control (n = 3) vs. TXNIP KD (n = 9), P = 0.0091. MT2A knockdown: Control (n = 3) vs. MT2A KD (n = 8), P = 0.1939. All statistical analyses were performed using GraphPad Prism version 10.4.2 (Dotmatics). Data availability The datasets generated and analyzed during the current study are available in the Figshare repository. Code availability The pretrained PAH-former models, including those trained on three distinct datasets, are publicly available via the Hugging Face Hub repository ( https://huggingface.co/so298/PAH-former ). Each model is published on a separate branch: "base_dataset," "add_hlca," and "add_other_data." Additionally, the source code for the PAH-former analyses is openly accessible through our GitHub repository ( https://github.com/UTcardiology/PAH-former-analysis ). Author contributions T.K. and S.H. designed the study, interpreted the results, and wrote the manuscript. M.I. conceived the study and supervised the project. S.H. developed the computational model, performed in silico perturbation analyses. T.K. designed and performed the RNA interference experiments, analyzed the experimental data. S.K. provided the computational resources necessary for the AI model and supervised the project. A.K., R.T., S. M., J.I., T.I. and N.T. supervised the project. All authors reviewed and approved the final manuscript. Competing interests None. Acknowledgements This work was supported by Cross-ministerial Strategic Innovation Promotion Program (SIP) on “Integrated Health Care System” Grant Number JPJ012425. We would like to thank Yukiko Kaneko for her technical assistance. Funder Information Declared Council for Science, Technology and Innovation, https://ror.org/007tjaq50 , JPJ012425 Footnotes https://doi.org/10.6084/m9.figshare.29321678 References 1. ↵ Humbert , M. et al. Pathology and pathobiology of pulmonary hypertension: state of the art and research perspectives . Eur Respir J 53 , ( 2019 ). 2. ↵ Huertas , A. , Tu , L. , Humbert , M. & Guignabert , C . Chronic inflammation within the vascular wall in pulmonary arterial hypertension: more than a spectator . Cardiovasc Res 116 , 885 – 893 ( 2020 ). OpenUrl CrossRef PubMed 3. ↵ Saygin , D. et al. Transcriptional profiling of lung cell populations in idiopathic pulmonary arterial hypertension . Pulm Circ 10 , ( 2020 ). 4. ↵ Sun , X. et al. Direct Extracellular NAMPT Involvement in Pulmonary Hypertension and Vascular Remodeling. Transcriptional Regulation by SOX and HIF-2α . Am J Respir Cell Mol Biol 63 , 92 – 103 ( 2020 ). OpenUrl CrossRef PubMed 5. ↵ Theodoris , C. V. et al. Transfer learning enables predictions in network biology . Nature 618 , 616 – 624 ( 2023 ). OpenUrl CrossRef PubMed 6. ↵ Mellors , T. & Spitmann , M. Predicting Tissue of Origin from Bulk Tumor Gene Expression using a Pre-trained Transformer Model . bioRxiv 2024.12.01.626105 ( 2024 ) doi: 10.1101/2024.12.01.626105 . OpenUrl Abstract / FREE Full Text 7. ↵ Chen , H. , et al. Quantized multi-task learning for context-specific representations of gene network dynamics . bioRxiv ( 2024 ) doi: 10.1101/2024.08.16.608180 . OpenUrl Abstract / FREE Full Text 8. ↵ Wang , L. , Yang , E. , Stolovitzky , G. , Schadt , E. & Zhu , J. Multiple Types of Context- Specific Brain Causal Regulatory Networks and their Applications to Autism Spectrum Disorder . bioRxiv 2025.01.17.633619 ( 2025 ) doi: 10.1101/2025.01.17.633619 . OpenUrl Abstract / FREE Full Text 9. ↵ Xu , C. et al. Automatic cell-type harmonization and integration across Human Cell Atlas datasets . Cell 186 , 5876 – 5891 .e20 ( 2023 ). OpenUrl CrossRef PubMed 10. ↵ Zhang , T. , Wu , F. , Katiyar , A. , Weinberger , K. Q. & Artzi , Y . Revisiting Few-sample BERT Fine-tuning . Preprint at http://arXiv.org/abs/2006.05987 ( 2020 ). 11. ↵ Mosbach , M. , Andriushchenko , M. & Klakow , D . On the Stability of Fine-tuning BERT: Misconceptions , Explanations, and Strong Baselines. Preprint at http://arXiv.org/abs/2006.04884 ( 2020 ). 12. ↵ Kong , D. et al. HMGB2 Release Promotes Pulmonary Hypertension and Predicts Severity and Mortality of Patients With Pulmonary Arterial Hypertension . Arterioscler Thromb Vasc Biol 44 , e172 – e195 ( 2024 ). OpenUrl PubMed 13. ↵ Archer , S. L. et al. Epigenetic attenuation of mitochondrial superoxide dismutase 2 in pulmonary arterial hypertension: a basis for excessive cell proliferation and a new therapeutic target . Circulation 121 , 2661 – 2671 ( 2010 ). OpenUrl Abstract / FREE Full Text 14. ↵ Dai , Z. , Li , M. , Wharton , J. , Zhu , M. M. & Zhao , Y.-Y . Prolyl-4 Hydroxylase 2 (PHD2) Deficiency in Endothelial Cells and Hematopoietic Cells Induces Obliterative Vascular Remodeling and Severe Pulmonary Arterial Hypertension in Mice and Humans Through Hypoxia-Inducible Factor-2α . Circulation 133 , 2447 – 2458 ( 2016 ). OpenUrl Abstract / FREE Full Text 15. Ghatnekar , A. et al. Endothelial GATA-6 deficiency promotes pulmonary arterial hypertension . Am J Pathol 182 , 2391 – 2406 ( 2013 ). OpenUrl CrossRef PubMed 16. Hong , K.-H. et al. Genetic ablation of the BMPR2 gene in pulmonary endothelium is sufficient to predispose to pulmonary arterial hypertension . Circulation 118 , 722 – 730 ( 2008 ). OpenUrl Abstract / FREE Full Text 17. Potus , F. et al. Novel Mutations and Decreased Expression of the Epigenetic Regulator in Pulmonary Arterial Hypertension . Circulation 141 , 1986 – 2000 ( 2020 ). OpenUrl CrossRef PubMed 18. Maimaitiaili , N. et al. NLRC3 deficiency promotes hypoxia-induced pulmonary hypertension development via IKK/NF-κB p65/HIF-1α pathway . Exp Cell Res 431 , 113755 ( 2023 ). 19. ↵ Zhang , J. et al. AMP-activated Protein Kinase Phosphorylation of Angiotensin- Converting Enzyme 2 in Endothelium Mitigates Pulmonary Hypertension . Am J Respir Crit Care Med 198 , 509 – 520 ( 2018 ). OpenUrl CrossRef PubMed 20. ↵ Diebold , I. et al. BMPR2 preserves mitochondrial function and DNA during reoxygenation to promote endothelial cell survival and reverse pulmonary hypertension . Cell Metab 21 , 596 – 608 ( 2015 ). OpenUrl CrossRef PubMed 21. Wang , L. et al. Dysregulated Smooth Muscle Cell BMPR2-ARRB2 Axis Causes Pulmonary Hypertension . Circ Res 132 , 545 – 564 ( 2023 ). OpenUrl CrossRef PubMed 22. ↵ Cuthbertson , I. , Morrell , N. W. & Caruso , P . Mutation and Metabolic Reprogramming in Pulmonary Arterial Hypertension . Circ Res 132 , 109 – 126 ( 2023 ). OpenUrl CrossRef PubMed 23. ↵ Cermenati , S. et al. Sox18 and Sox7 play redundant roles in vascular development . Blood 111 , 2657 – 2666 ( 2008 ). OpenUrl Abstract / FREE Full Text 24. Fontijn , R. D. et al. Adipose tissue-derived stromal cells acquire endothelial-like features upon reprogramming with SOX18 . Stem Cell Res 13 , 367 – 378 ( 2014 ). OpenUrl CrossRef PubMed 25. ↵ Downes , M. & Koopman , P . SOX18 and the transcriptional regulation of blood vessel development . Trends Cardiovasc Med 11 , 318 – 324 ( 2001 ). OpenUrl CrossRef PubMed Web of Science 26. ↵ Wang , Y. , Kang , X. , Kang , X. & Yang , F . S100A6: molecular function and biomarker role . Biomark Res 11 , 78 ( 2023 ). 27. Lerchenmüller , C. et al. S100A6 Regulates Endothelial Cell Cycle Progression by Attenuating Antiproliferative Signal Transducers and Activators of Transcription 1 Signaling . Arterioscler Thromb Vasc Biol 36 , 1854 – 1867 ( 2016 ). OpenUrl Abstract / FREE Full Text 28. ↵ Zhou , J. , Li , F. & Yang , Y . Protective effects of calcyclin-binding protein against pulmonary vascular remodeling in flow-associated pulmonary arterial hypertension . Respir Res 23 , 223 ( 2022 ). 29. ↵ Sun , X. et al. The mitochondrial redistribution of ENOS is regulated by AKT1 and dimer status . Nitric Oxide 152 , 90 – 100 ( 2024 ). OpenUrl CrossRef PubMed 30. ↵ He , X. et al. Identification of immune-associated signatures and potential therapeutic targets for pulmonary arterial hypertension . J Cell Mol Med 27 , 3864 – 3877 ( 2023 ). OpenUrl CrossRef PubMed 31. ↵ Liyanage , N. P. M. , Fernando , M. R. & Lou , M. F . Regulation of the bioavailability of thioredoxin in the lens by a specific thioredoxin-binding protein (TBP-2) . Exp Eye Res 85 , 270 – 279 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 32. Han , S. H. et al. VDUP1 upregulated by TGF-beta1 and 1,25-dihydorxyvitamin D3 inhibits tumor cell growth by blocking cell-cycle progression . Oncogene 22 , 4035 – 4046 ( 2003 ). OpenUrl CrossRef PubMed Web of Science 33. Shin , K.-H. , Kim , R. H. , Kim , R. H. , Kang , M. K. & Park , N.-H . hnRNP G elicits tumor-suppressive activity in part by upregulating the expression of Txnip . Biochem Biophys Res Commun 372 , 880 – 885 ( 2008 ). OpenUrl CrossRef PubMed 34. ↵ Zimmer , A. et al. Thioredoxin system activation is associated with the progression of experimental pulmonary arterial hypertension . Life Sci 284 , 119917 ( 2021 ). 35. Maarman , G. J . Pulmonary arterial hypertension and the potential roles of metallothioneins: A focused review . Life Sci 214 , 77 – 83 ( 2018 ). OpenUrl CrossRef PubMed 36. ↵ Zhou , Y. et al. Metascape provides a biologist-oriented resource for the analysis of systems-level datasets . Nat Commun 10 , 1523 ( 2019 ). OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted June 20, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following PAH-former: Transfer Learning for Efficient Discovery of Pulmonary Arterial Hypertension-Associated Genes Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share PAH-former: Transfer Learning for Efficient Discovery of Pulmonary Arterial Hypertension-Associated Genes Toshinaru Kawakami , Sosuke Hosokawa , Masamichi Ito , Atsumasa Kurozumi , Ryohei Tanaka , Shun Minatsuki , Junichi Ishida , Takayuki Isagawa , Satoshi Kodera , Norihiko Takeda bioRxiv 2025.06.16.660027; doi: https://doi.org/10.1101/2025.06.16.660027 Share This Article: Copy Citation Tools PAH-former: Transfer Learning for Efficient Discovery of Pulmonary Arterial Hypertension-Associated Genes Toshinaru Kawakami , Sosuke Hosokawa , Masamichi Ito , Atsumasa Kurozumi , Ryohei Tanaka , Shun Minatsuki , Junichi Ishida , Takayuki Isagawa , Satoshi Kodera , Norihiko Takeda bioRxiv 2025.06.16.660027; doi: https://doi.org/10.1101/2025.06.16.660027 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17690) Bioengineering (13892) Bioinformatics (41936) Biophysics (21451) Cancer Biology (18588) Cell Biology (25499) Clinical Trials (138) Developmental Biology (13378) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24320) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88603) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15152) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00