A Minimal Plasma Proteome-Based Biomarker Panel for Accurate Prostate Cancer Diagnosis

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 74,906 characters · extracted from preprint-html · click to expand
A Minimal Plasma Proteome-Based Biomarker Panel for Accurate Prostate Cancer Diagnosis | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results A Minimal Plasma Proteome-Based Biomarker Panel for Accurate Prostate Cancer Diagnosis View ORCID Profile Syed Ahsan Shahid , Ahmed Al-Harrasi , View ORCID Profile Adil Al-Siyabi doi: https://doi.org/10.1101/2025.11.05.686712 Syed Ahsan Shahid 1 Natural and Medical Sciences Research Center, University of Nizwa , Birkat Al-Mouz, Nizwa, 616, Oman Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Syed Ahsan Shahid Ahmed Al-Harrasi 1 Natural and Medical Sciences Research Center, University of Nizwa , Birkat Al-Mouz, Nizwa, 616, Oman Find this author on Google Scholar Find this author on PubMed Search for this author on this site Adil Al-Siyabi 1 Natural and Medical Sciences Research Center, University of Nizwa , Birkat Al-Mouz, Nizwa, 616, Oman Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Adil Al-Siyabi For correspondence: aalsiyabi{at}unizwa.edu.om Abstract Full Text Info/History Metrics Preview PDF Abstract Early and accurate diagnosis of prostate cancer (PRC) remains a major clinical challenge, particularly with existing biomarker panels relying on invasive sampling or large biomarker panels with limited interpretability. Here, we present a machine learning framework for discovering compact and biologically grounded plasma protein signatures for PRC classification using publicly available pan-cancer proteomic data. We coupled a genetic algorithm-based protein identification method with LASSO-regularized logistic regression to identify minimal protein subsets optimized for diagnostic performance. A 14-protein panel, recurrent across 1,000 genetic algorithm iterations, achieved a mean accuracy of 98.0%, an F1 score of 0.98, and an ROC AUC of 0.997 on a held-out test dataset. This performance exceeded models trained on high dimensionality data (>1,400 proteins) and surpassed published transcriptomic, methylomic, and cfDNA classifiers, many of which reported AUCs less than 0.91. Functional analysis revealed enrichment in protease binding and DNA repair pathways, with known markers such as beta-microseminoprotein (MSMB) and poly(ADP-ribose) polymerase 1 (PARP1) appearing alongside under-characterized proteins like IGSF3 and XG. Models trained only on previously reported PRC-associated proteins showed lower performance, highlighting the added diagnostic value of including novel, data-driven candidates. This study outlines a scalable proteomic workflow and demonstrates that high diagnostic performance can be achieved using small, interpretable panels derived from blood-based proteomics. The findings lay the foundation for the development of interpretable, clinically deployable assays for PRC detection and risk stratification. Download figure Open in new tab Graphical abstract Overview of the study workflow: blood-derived proteomic profiles from cancer and normal cohorts were analyzed using machine learning to identify a compact diagnostic panel, enabling clinical decision support for new patients. Introduction Prostate cancer is one of the most commonly diagnosed malignancies among men worldwide and remains a major contributor to cancer-related mortality [ 1 ]. According to the Global Cancer Observatory (GLOBOCAN) and Global Burden of Diseases (GBD) reports, prostate cancer accounted for over 1.4 million new cases and approximately 375,000 deaths globally in 2020 [ 2 , 3 ]. It is the leading cancer among men in more than 100 countries and ranks among the top two cancers in terms of incidence across nearly every continent [ 1 ]. The burden of prostate cancer continues to rise, largely driven by aging populations, lifestyle changes, and improved diagnostic access in high-income and transitional economies [ 4 ]. Beyond its epidemiological footprint, prostate cancer significantly affects the physical, emotional, and social well-being of patients, particularly when diagnosed at advanced stages, where therapeutic options are limited and the risk of metastasis and mortality is higher [ 5 ]. It has been reported by the Lancet Commission on Prostate Cancer that there will be a significant annual increase in new prostate cancer cases from 1.4 million in 2020 to 2.9 million by 2040 [ 6 ]. Early detection plays a pivotal role in reducing the burden of prostate cancer. When diagnosed at localized stages, prostate cancer is highly treatable, with five-year survival rates exceeding 95% [ 7 ]. However, survival drops dramatically once the disease metastasizes, highlighting the importance of timely and accurate diagnosis. Early detection not only improves patient prognosis but also helps avoid overtreatment, allowing clinicians to distinguish between indolent and aggressive disease [ 6 ]. Current screening practices, which primarily involve prostate-specific antigen (PSA) testing and digital rectal examination, have improved early detection rates but are fraught with limitations. PSA testing lacks the specificity required to distinguish malignant from benign conditions such as prostatitis or benign prostatic hyperplasia (BPH), leading to high false-positive rates and unnecessary biopsies [ 8 ]. Moreover, PSA levels do not provide insight into the molecular characteristics or aggressiveness of tumors, making it difficult to tailor treatment decisions based on underlying biology [ 9 ]. To address these limitations, there has been growing interest in the use of blood-based proteomic profiling as a non-invasive diagnostic tool [ 10 ]. Blood is an attractive medium for biomarker discovery because it can be obtained with minimal discomfort and reflects both systemic and tumor-specific alterations [ 11 ]. Advances in high-throughput proteomic technologies, particularly the proximity extension assay (PEA), have enabled the simultaneous quantification of over a thousand proteins in small volumes of plasma with high sensitivity and specificity [ 12 ]. This has opened new avenues for identifying cancer-specific protein signatures and monitoring disease progression, treatment response, or recurrence. The integration of proteomics into the diagnostic landscape has the potential to transform clinical workflows by enabling liquid biopsy-based assays that are both minimally invasive and rich in molecular information [ 13 ]. Despite these technological advancements, interpreting proteomic data for clinical application remains challenging. Proteomic datasets are inherently high-dimensional, with thousands of variables (proteins) measured across a limited number of samples [ 14 ]. This “large p, small n” scenario complicates traditional statistical analysis and increases the risk of overfitting, especially when building predictive models [ 15 , 16 ]. Furthermore, the complexity of biological systems means that disease signatures often involve intricate interactions among proteins, rather than isolated markers [ 17 ]. As such, sophisticated computational methods are required to uncover meaningful patterns and reduce dimensionality without losing predictive power. Machine learning (ML) has emerged as a powerful tool for analyzing complex biomedical data. ML algorithms are capable of handling nonlinear relationships, interactions, and noise in high-dimensional datasets, making them well-suited for proteomic biomarker discovery [ 18 ]. In cancer research, ML has been applied to identify diagnostic, prognostic, and predictive biomarkers from a variety of omics data types, including genomics, transcriptomics, and proteomics [ 19 ]. ML approaches such as logistic regression (LR), support vector machines (SVM), random forests (RF), and regularized regression models have shown promise in classifying cancer subtypes and predicting treatment outcomes [ 20 ]. However, these models often require extensive feature engineering, hyperparameter tuning, and rely on large feature sets, which limits their interpretability and hinders clinical implementation. Moreover, the selection of informative biomarkers is often carried out manually or using suboptimal methods, reducing reproducibility and scalability [ 21 – 23 ]. To overcome these limitations, evolutionary algorithms such as genetic algorithms (GAs) have been proposed for automated and optimized feature selection. GAs mimic the process of natural selection to identify subsets of features that maximize model performance while minimizing redundancy [ 24 ]. When combined with ML frameworks, GAs offer a systematic and scalable approach for identifying minimal, high-performance biomarker panels [ 25 ]. This is particularly important for clinical applications, where simpler models with fewer features are more likely to be adopted due to reduced cost, complexity, and regulatory burden. In this study, we present a GA-based ML pipeline for the discovery of blood-derived protein signatures specific to prostate cancer. Using publicly available proteomic data from a large pan-cancer cohort profiled with PEA [ 12 ], we focused on prostate cancer samples to develop compact, high-accuracy diagnostic models. Our approach iteratively evolved protein subsets and evaluated their predictive performance across bootstrapped splits, enabling both model robustness and feature selection stability. We further assessed the biological relevance of the identified proteins through enrichment analyses, aiming to connect the selected biomarkers to known or emerging pathways involved in prostate tumorigenesis. By integrating blood-based proteomics, evolutionary feature selection, and ML classification, our framework seeks to address key challenges in prostate cancer diagnostics, namely, the need for accurate, interpretable, and scalable biomarker panels. The resulting models not only achieve high diagnostic performance, accuracy: 98.0%, ROC AUC: 0.997 with a minimal number of features (14 proteins) but also offer insights into the biological underpinnings of prostate cancer. These findings lay the groundwork for future efforts to develop clinically deployable assays for early detection and personalized risk stratification in prostate cancer management. Methodology Dataset Source and Preprocessing Plasma proteomic data were obtained from a publicly available pan-cancer cohort profiled using the Olink Explore 1536 platform [ 12 ]. This platform combines proximity extension assay (PEA) with next-generation sequencing (NGS), enabling highly sensitive and multiplexed quantification of over 1,400 plasma proteins from minimal sample volumes. Each sample was accompanied by detailed clinical annotations, including diagnosed cancer type, allowing disease-specific stratification. For this study, all available prostate cancer (PRC) samples were extracted and compared against a balanced control cohort derived from other tumor types. To minimize sampling bias and prevent overrepresentation of any single cancer type, non-PRC samples were stratified by cancer type, and equal numbers were randomly selected from each group to match the number of PRC cases. This strategy ensured comparable group sizes and balanced class representation across analyses. All downstream preprocessing, feature selection, and modeling were performed using this curated, balanced dataset, providing a robust foundation for subsequent machine learning and statistical evaluations. Genetic Algorithm Optimization for Protein Selection To identify compact and high-performing plasma protein panels for prostate cancer (PRC) classification, a genetic algorithm (GA)-based protein selection strategy was implemented using the sklearn-genetic framework. GA is a population-based optimization method that iteratively evolves protein subsets by maximizing predictive performance through simulated natural selection. The GA was initialized with a population of 1,000 randomly generated protein sets, each constrained to include a maximum of 40 proteins. Selection pressure was applied using ROC AUC as the fitness function, optimized over 300 generations with 4-fold cross-validation. Mutation and crossover probabilities were set empirically at 0.1 and 0.9, respectively. Both logistic regression (L1-penalized) and SVM classifiers were tested, with logistic regression selected as the final model based on interpretability and consistent performance across bootstrap replicates. The GA process was repeated across 1,000 independent iterations. For each iteration, the dataset was rebalanced to include equal numbers of PRC and non-PRC samples, randomly sampled across tumor types to prevent class or tissue-type bias. The best-performing panel from each run was recorded, along with the fitness trajectory and the selected protein set. Two complementary outputs were generated, i.e., iteration-specific protein sets, representing the best-performing protein subset per run, and a ranked protein list, quantifying protein selection frequency across all 1,000 iterations. To evaluate protein set stability, protein occurrence frequencies were used for ranking. These top-ranked proteins were then cumulatively tested using bootstrap-aggregated LASSO logistic regression. Machine Learning Model Training and Evaluation To evaluate the diagnostic performance of candidate protein subsets, we implemented a LASSO-regularized logistic regression framework with bootstrap aggregation and incremental feature inclusion. Protein selection was guided by GA derived sets according to each analytical context. Model training and evaluation were performed using the LogisticRegressionCV class from scikit-learn, with L1 regularization (penalty=’l1’) and the liblinear solver. Each model was trained on a balanced dataset composed of all PRC samples and an equal number of randomly sampled non-PRC cases. Stratified sampling ensured equal representation across non-PRC cancer types. For each feature set, we performed 100 bootstrap replicates. In each replicate, samples were resampled with replacement, split into 80% training and 20% test sets (stratified by label), and standardized using Z-score normalization. Models were evaluated using 5-fold cross-validation within the training set. Performance was assessed on the held-out test data in each replicate. Classification metrics included accuracy, ROC AUC, class-specific precision and recall, F1 scores, predictive values, and macro-averaged F1. Confusion matrices were used to compute class-specific metrics, and results were averaged across bootstrap replicates. Variability was captured using standard deviation estimates for each metric. This approach enabled systematic performance profiling of protein subsets of increasing size and provided stable estimates of classification robustness. All results were saved in tabular format and used to identify the optimal feature panel based on diagnostic performance plateau. Differential Expression Analysis for up- and down-regulated proteins We applied the Limma package to identify proteins differentially expressed between PRC and non-PRC groups. P-values were adjusted using the Benjamini-Hochberg method to control for false discovery rate. Proteins with adjusted p 0.5 were considered significant. The top 20 ranked proteins based on adjusted p-values were selected for initial model development. Results Genetic Algorithm-Driven Protein Selection and Systematic Evaluation of the Protein Panel Performance Iteration-specific optimization and model performance To identify compact protein panels optimized for classification performance, we implemented a genetic algorithm (GA)-based protein selection strategy. The GA was executed across 1,000 independent iterations. In each iteration, a balanced dataset of PRC and non-PRC samples was generated, and the algorithm searched for subsets of 40 proteins that maximized classification performance. Protein selection was performed separately using logistic regression (L1-penalized) and support vector machine classifiers, with logistic regression selected for final downstream analysis based on overall performance and interpretability. The GA produced two key outputs: (1) iteration-specific protein sets representing the highest-scoring subset from each individual run, and (2) a ranked list of proteins based on their recurrence frequency across all 1,000 iterations. We evaluated the diagnostic performance of the top 100 protein sets identified across runs by training LASSO logistic regression models on each set. For each protein panel, we ran 100 independent bootstrap-aggregated, five-fold cross-validation to evaluate classification accuracy, ROC AUC, F1 scores, and class-specific precision and recall for both PRC and non-PRC classes (Supplementary Data 1). On average, the best-performing panels achieved an accuracy of 94.4%, F1 scores 87.6%, and ROC AUC of 0.980. Approximately 90% of the top-ranked sets achieved ROC AUCs above 0.95 and F1 scores above 0.80. These panels consistently balanced predictive performance between the PRC and non-PRC classes. Standard deviations across the protein sets remained low, indicating stable performance. For example, top sets showed a standard deviation in ROC AUC under 0.01 and F1 standard deviations around 0.03-0.07. Panels with >90% accuracy maintained F1 standard deviations below 0.06 for both classes, supporting reproducibility ( Fig. 2A ; Supplementary Data 1). Download figure Open in new tab Figure 1: Workflow for proteomic biomarker discovery and evaluation in prostate cancer (PRC). (A) Proteomic dataset retrieval and balancing. A multi-cancer proteomic dataset comprising 1,477 samples across 12 cancer types (1463 proteins) was collected. To avoid bias, equal numbers of PRC and non-PRC samples were selected to generate a balanced proteomic dataset. (B) Feature reduction using a genetic algorithm. The dataset was processed through a genetic algorithm framework, where initial protein subsets underwent evaluation, selection, crossover, and mutation to generate new populations. Iteration-specific protein sets were identified, and proteins were ranked according to selection frequency across runs. (C) Machine learning (ML) evaluation of biomarker panels. The balanced dataset was split into training and test cohorts. ML models were trained and evaluated for predictive performance (ROC, sensitivity, specificity, and confusion matrix). The resulting top-performing protein panels were further assessed by enrichment analysis to determine their biological and clinical relevance. Download figure Open in new tab Figure 2. Classification performance evaluation of the identified protein panels. (A) Violin plots of accuracy, F1 score, and ROC AUC distributions across 100 bootstrap replicates for the top 100 iteration-specific protein panels. The narrow distributions highlight the stability and reproducibility of the classification performance. (B) ROC AUC values of the LASSO logistic regression models plotted against the number of top-ranked proteins used as subsets. Error bars represent standard deviation across 100 bootstrap replicates. A sharp increase in ROC AUC is observed up to 14 proteins, after which performance plateaus. (C) This plot presents the ROC curve and its confusion matrix for the 14-protein panel. (D) Radar plot showing average classification metrics for the 14-protein panel model across 100 replicates. Metrics include class-specific precision, recall, F1 scores, predictive values, accuracy, macro-averaged F1, and ROC AUC. The plot demonstrates balanced and high performance across both PRC-positive and PRC-negative classes. (E) This plot shows the ROC curve for the 8-protein panel and its confusion matrix. (F) The Volcano plot shows results from Limma differential expression analysis comparing PRC samples to a balanced set of non-PRC cancer types. Each point represents a plasma protein quantified by PEA. The x-axis indicates log2 fold change, and the y-axis shows -log10 adjusted p-values (Benjamini-Hochberg correction). Proteins significantly upregulated in PRC are shown in red, and downregulated proteins in blue (adjusted p < 0.05). Some of the proteins with high fold changes and strong statistical significance are labeled. Importantly, some panels achieved near-optimal classification with as few as 23-30 proteins, suggesting that compact sets can provide reliable diagnostics. While several larger panels also performed well, they offered only marginal improvements, reinforcing the efficiency of smaller, GA-optimized subsets. One additional observation was that, although many iteration-specific protein sets yielded classification accuracies above 90%, they often showed lower precision and recall, particularly for one of the classes. This indicates that while the overall accuracy was high, class-specific discrimination was sometimes suboptimal, reinforcing the need for performance-balanced selection criteria beyond accuracy alone. These results confirm that GA effectively identifies concise, high-performing protein signatures for prostate cancer classification. Incremental ranking and identification of the optimal 14-protein panel In addition to evaluating individual protein sets, we also analyzed the ranked recurrence of proteins across all GA iterations to derive a stable feature importance profile. To evaluate the diagnostic value of the most consistently selected proteins, we ranked all proteins based on how frequently they appeared in the top-performing sets across 1,000 GA iterations. Using this ranked list, we constructed a series of protein subsets starting from the top 1 protein and incrementally adding one protein at a time up to the top 50. Classification performance was evaluated using cumulatively increasing subsets, from the top 1 to the top 50 ranked proteins. For each subset size (1 to 50 proteins), we trained LASSO logistic regression models using 100 bootstrap replicates, each evaluated with five-fold cross-validation. Performance metrics included accuracy, ROC AUC, class-specific precision, recall, and F1 score, all performed on a held-out test set. Consistent improvements in classification performance were observed as more top-ranked proteins were incorporated. Accuracy rose from 65.8 % with a single top-ranked protein to 98.7 % with 50 proteins. ROC AUC followed a similar trend, increasing from 0.69 to 0.99. F1 scores also improved, reaching 0.98 for the largest panels. Importantly, inter-replicate variability declined with subset size: ROC AUC standard deviation dropped from ∼0.08 with 1 protein to < 0.003 beyond 25 proteins, underscoring model stability with more robust panels ( Fig. 2B ; Supplementary Data 1). The panel comprising the top 14 most-repeated proteins demonstrated near-optimal classification with minimal performance gains beyond this point. This subset achieved an average accuracy of 98.0 %, F1 of 0.980, and ROC AUC of 0.997. Class-specific precision and recall were well balanced, with F1 scores of 0.981 (PRC) and 0.979 (non-PRC) ( Table 1 ), and standard deviations below 0.02 across all metrics. Notably, beyond 14 proteins, accuracy and ROC AUC curves plateaued ( Fig. 2C ), indicating diminishing returns from adding further proteins. Radar plots confirmed uniform distribution of performance across all evaluated metrics for the 14-protein model ( Fig. 2D ). We also trained classification models using only the top eight most recurrent proteins for comparison. This panel yielded lower classification performance (accuracy 91.3 %, ROC AUC 0.97, F1 0.9) compared to the 14-protein panel ( Fig. 2E ; Table 1 ). In contrast to the GA iteration-specific panels, the ranked-protein approach yielded high metric scores, highlighting the 14-protein panel as an efficient and robust diagnostic signature capable of maintaining high performance while minimizing panel redundancy. The distribution of the top 14 recurrent proteins and their structured co-selection patterns are shown in Figure 3A-B , highlighting both individual marker robustness and inter-protein associations. Download figure Open in new tab Figure 3. Genetic algorithm-driven protein selection. (A) Barplot of the top-most recurrent proteins identified across 1,000 GA iterations. Bar length denotes recurrence count, and color intensity represents relative recurrence frequency. These consistently selected proteins highlight robust features favored by the evolutionary search process. (B) Co-selection heatmap of the top 20 recurrent proteins, showing pairwise co-occurrence frequencies across GA-selected feature sets. Warmer colors indicate higher co-selection counts, revealing structured patterns of protein co-selection and potential functional clustering. View this table: View inline View popup Download powerpoint Table 1: Comparative performance of diagnostic protein panels identified via genetic algorithm (GA), and previously published panels by Alvez et al., 2023 [ 12 ] and DEA. Comparison to Differential Expression-Based Protein Sets To benchmark our GA-based diagnostic panel, we compared it against protein sets derived from differential expression analysis (DEA) and to previously published panels. In a recently published work [ 12 ], the authors implemented a classification approach based on glmnet-regularized regression and achieved above 90% accuracy only when training the model on the full panel of 1,463 proteins. However, performance declined substantially as the number of proteins was reduced. The top 14 proteins identified only achieved an ROC AUC of 0.852. We applied our LASSO-based classification framework to both their 14- and 30-protein panels. While the 30-protein set showed moderate performance improvements, our GA-optimized 14-protein panel still outperformed both, achieving an accuracy of 98.0%, ROC AUC of 0.997, and F1 of 0.980. Furthermore, we also performed Limma-based DEA on the same dataset ( Fig. 2F ) and trained a LASSO model on the top 20 differentially expressed proteins. This model achieved high classification performance (accuracy: 92.3%, ROC AUC: 0.969, F1: 92.6%), but still fell short of our GA-derived panel. These comparisons highlight that both our protein selection method (GA) and classification approach (bootstrap-aggregated LASSO) yield superior diagnostic performance compared to traditional DEA and glmnet-based models. They also show that optimal performance does not require large panels, reinforcing the utility of compact, biologically informed protein signatures ( Table 1 ). Biological validation and literature-supported interpretation of selected proteins To assess the biological plausibility of the final GA-selected 14-protein panel, we conducted a structured literature survey. Each protein was annotated for its known molecular functions, prior evidence in PRC or other cancers, and putative biological roles ( Table 2 ). The resulting panel includes a combination of well-studied PRC-associated proteins and several less-characterized candidates, offering both validation and novel hypotheses. View this table: View inline View popup Table 2: Functional Overview and Cancer Associations of GA Selected Proteins in Prostate Cancer (PRC). This table summarizes the biological functions, prior evidence in PRC, associations in other cancers, and hypothesized mechanistic roles in prostate tumorigenesis for each of the 14 proteins identified by the GA–LASSO pipeline. Among the 14 proteins, MSMB, PARP1, FAP, COMP, DNER, and TNFRSF10A have strong literature support in the context of PRC. MSMB (beta-microseminoprotein) is consistently downregulated in PRC and has been widely proposed as a diagnostic and prognostic marker [ 26 – 37 ]. PARP1, involved in DNA damage repair, is overexpressed in aggressive PRC and targeted by approved therapies [ 38 – 47 ]. FAP and COMP, extracellular matrix components, are linked to tumor microenvironment remodeling and PRC progression [ 48 – 50 ], while TNFRSF10A is a pro-apoptotic receptor associated with androgen-independent PRC [ 51 – 53 ]. DNER is involved in apoptosis signaling and tumor cell adhesion and has been linked to mechanisms of resistance or metastasis in PRC [ 54 , 55 ]. To assess the added value of the remaining 6 proteins not reported in literature to be involved in PRC, we trained classification models using only the eight literature-supported proteins. This restricted panel yielded lower classification performance (mean accuracy: 91.3%, ROC AUC: 0.97, F1: 0.91), compared to the full 14-protein panel (accuracy: 98.4%, ROC AUC: 0.99, F1: 0.98). These reductions were consistent across all metrics, including F1 scores, precision, and recall ( Table 1 ). This highlights that the described approach identifies novel biomarkers which other methods of biomarker discovery cannot find. To contextualize these findings, we provide a structured overview of the molecular functions, prior cancer associations, and prostate cancer-specific roles of the 14 proteins ( Table 2 ). The decline in performance highlights the predictive contribution of the remaining six proteins: XG, GFER, PSPN, CNTN3, IGSF3, and CALB2. While these proteins are not widely characterized in PRC, several have been reported in other malignancies. For example, GFER is involved in mitochondrial redox homeostasis and has emerging roles in tumor growth and stress resistance [ 56 ]. PSPN, a neurotrophic factor, has been linked to aggressive neuroendocrine phenotypes [ 54 ]. CALB2 and CNTN3, though primarily studied in neurological contexts, have shown deregulation in cancers such as glioblastoma and colorectal cancer [ 57 – 69 ]. IGSF3, an immunoglobulin superfamily member, is associated with epithelial-mesenchymal transition and invasion in lung and pancreatic cancers [ 70 – 72 ]. XG, the most understudied of the group, is a GPI-anchored protein with potential implications in cell-cell interactions and immune recognition [ 73 – 76 ]. The inclusion of these proteins improved both sensitivity and specificity and enhanced class balance across all classification metrics. Importantly, their mechanistic interplay across apoptosis, extracellular matrix remodeling, calcium signaling, and neurotrophic pathways is illustrated in Figure 4 , which integrates the GA-selected proteins into hallmark processes of prostate cancer progression. Their consistent recurrence across GA iterations, despite limited prior association with PRC, suggests that they may capture non-redundant, biologically meaningful variations related to tumor progression or microenvironmental dynamics. These results support the hypothesis that under-characterized proteins, when identified through data-driven approaches, can contribute significant diagnostic value and may highlight novel biological mechanisms. Overall, this analysis yields a compact and biologically coherent panel of plasma proteins capable of accurately distinguishing PRC from other tumor types, providing a strong basis for biomarker development and mechanistic follow-up studies. Download figure Open in new tab Figure 4: Mechanistic integration of GA-selected proteins into hallmark pathways of prostate cancer. The 14 proteins identified by the GA-LASSO pipeline cluster into five functional groups: apoptosis (XG, MSMB, TNFRSF10A), growth and survival signaling (GFER, PARP1, LEP, DNER, IGSF3), extracellular matrix adhesion and remodeling (FAP, COMP, CNTN3, CD99), neurotrophic signaling (PSPN), and calcium modulation (CALB2). Each group contributes to critical cancer hallmarks, including apoptosis escape, angiogenesis, proliferation, invasion, metastasis, and therapy resistance. The diagram illustrates how these proteins converge on PI3K-AKT/MAPK survival signaling and microenvironmental dysregulation, providing a mechanistic rationale for their combined predictive power in distinguishing prostate cancer. Discussion We present a machine learning-based framework that identifies a minimal, high-performing plasma protein panel for prostate cancer (PRC) classification. The pipeline integrates a genetic algorithm (GA)-based protein selection with robust model training using bootstrap-aggregated LASSO logistic regression. Using a publicly available pan-cancer proteomic dataset, we demonstrate that compact and meaningful diagnostic signatures can be derived from small protein sets, and these panels yield superior classification performance compared to high-dimensional or purely statistical approaches. The main contribution of this study is the GA-driven biomarker selection methodology. We performed 1,000 independent GA runs, each using a balanced dataset of PRC and non-PRC samples and a logistic regression fitness function optimized for ROC AUC. This iterative process consistently identified protein subsets of 25-30 proteins that yielded classification accuracies above 94%. By aggregating recurrence patterns across runs, we derived a ranked list and identified a 14-protein panel that achieved an average ROC AUC of 0.997, an F1 score of 0.980, and an accuracy of 98.0%, all with low variance across 100 bootstrapped replicates. Notably, performance plateaued beyond 14 proteins, confirming that the selected set was not only compact but also information-rich. To benchmark our approach, we compared it directly to the published model by [ 12 ], in which glmnet-regularized classifiers were trained using 1,463 proteins. In that work, the full-protein models reached ∼90% accuracy, however, performance dropped substantially when the number of proteins was reduced. Notably, their top 14-protein panel achieved an ROC AUC of only 0.852. These comparisons emphasize that performance gains in our framework stem not only from biomarker selection but also from the robust classification strategy itself. Across recent literature, prostate cancer classification models have shown AUCs typically ranging from 0.73 to 0.91, often using transcriptomic [ 133 ], cfDNA [ 134 ], flow cytometry [ 135 ], or multi-omics inputs. For example, studies using RNA-seq from TCGA cohorts or integrated transcriptomic panels have reported AUCs around 0.91-0.93, using up to 200 genes or combinations of clinical and omics data [ 133 , 136 ]. Other approaches incorporating PSA kinetics, immune phenotyping, or DNA methylation achieved moderate sensitivity and specificity, often below 90% [ 137 – 139 ]. In contrast, our compact plasma protein panel exceeds these performances without the need for invasive sampling or complex multi-modal integration. This positions our framework as a promising foundation for blood-based, high-specificity diagnostic development. Biological enrichment and literature annotation confirmed the plausibility of our findings. The 14-protein panel included both established and under-characterized proteins. While established biomarkers such as MSMB, PARP1, and FAP are well-recognized in PRC, less-characterized proteins including XG, IGSF3, and GFER, rarely discussed in the PRC literature, proved critical for achieving optimal classification. Importantly, models trained only on the eight well-established proteins from the panel showed a clear drop in performance, reinforcing the value of including novel, data-driven candidates. To provide an integrative overview, we present a conceptual model that organizes the 14 proteins into functional axes driving prostate cancer progression ( Figure 5 ). This conceptual framework underscores the biological coherence of the panel, linking statistical signal to mechanistic plausibility. Together, these results establish a machine learning framework capable of identifying small, interpretable, and biologically grounded diagnostic signatures. By integrating statistical analysis, evolutionary search, and literature-based validation, we provide a template for biomarker development that is both data-driven and biologically principled. The final 14-protein panel offers a promising basis for targeted assay development and provides a roadmap for extending this approach to other cancers and multi-omics platforms. Download figure Open in new tab Figure 5. Conceptual model of GA-selected proteins in prostate cancer progression. The 14-protein panel is grouped into five functional categories: apoptosis (XG, MSMB, TNFRSF10A), growth/survival signaling (GFER, PARP1, LEP, DNER, IGSF3), extracellular matrix adhesion/remodeling (FAP, COMP, CNTN3, CD99), neurotrophic signaling (PSPN), and calcium modulation (CALB2). These pathways converge on apoptosis resistance, ECM remodeling, proliferative and angiogenic signaling, and Ca²⁺ imbalance, collectively driving tumor progression, invasion, metastasis, recurrence, and therapy resistance. This summary model highlights how under-characterized proteins complement well-established biomarkers in capturing key hallmarks of prostate cancer biology. While the present study demonstrates the effectiveness of genetic algorithm-driven selection in identifying a compact, high-performing plasma protein panel for prostate cancer classification, certain limitations should be acknowledged. First, the analysis was based on a publicly available pan-cancer cohort rather than an independent, prospectively collected PRC-specific dataset. External validation using an independent cohort could not be performed due to the current unavailability of comparable Olink- or PEA-quantified prostate cancer proteomic datasets. Although extensive cross-validation and bootstrap aggregation were employed to minimize overfitting, future validation using independent plasma cohorts and clinical samples will be essential to confirm the generalizability of the findings. Additionally, functional characterization of underexplored proteins such as XG, GFER, and IGSF3 is warranted to elucidate their mechanistic roles in PRC biology. Future studies should aim to incorporate such experimental validation and assess clinical assay translation of the proposed 14-protein panel. Conclusion This study presents a performance-optimized, machine learning-based pipeline that identifies a compact plasma protein signature for accurate prostate cancer prediction. Using genetic algorithm-driven protein selection and bootstrap-aggregated LASSO regression, we derived a 14-protein panel that consistently achieves over 98% accuracy and an ROC AUC of 0.99, outperforming previous models trained on much larger protein sets, offering a scalable, non-invasive alternative for clinical application. Unlike previous approaches based solely on differential expression or glmnet-based classification, our method directly optimizes for diagnostic performance and robustness. Importantly, we show that high accuracy can be achieved with minimal protein sets when guided by data-informed and performance-optimized selection strategies. While the final panel includes well-known markers like MSMB and PARP1, it also incorporates novel or under-characterized proteins that contribute unique diagnostic value, providing a base for its functional relevance in PRC biology. Functional enrichment and literature-based annotation confirm that the panel is not only statistically robust but also biologically meaningful. Compared to multi-omics or high-dimensional transcriptomic models, our compact, proteomics-only approach offers a scalable and clinically practical solution for early detection. This compact signature provides a strong foundation for future diagnostic assay development and highlights the value of combining biological insight with evolutionary optimization in biomarker discovery. Although this study was limited by the lack of independent validation owing to the absence of comparable Olink-based PRC proteomic datasets, the reproducible and data-driven framework established here provides a strong foundation for subsequent validation and clinical translation of the identified 14-protein panel. Statistical Analysis and Visualization All statistical analyses were conducted in Python (v3.9) and R (v4.5.0). Machine learning metrics, including accuracy, precision, recall, F1 score, and ROC AUC, were computed using NumPy, pandas, and scikit-learn. LASSO logistic regression and genetic algorithm-based protein selection were implemented using scikit-learn and sklearn-genetic, respectively. Bootstrap resampling was used to assess performance stability across models. All visualizations were generated using matplotlib (v3.8) and seaborn (v0.13). Data Availability Code and scripts for data analysis and visualization can be provided upon request. Author Contributions S.A.S. and A.A.S. designed the study, S.A.S. preprocessed proteomic data, developed and implemented the GA-AutoML pipeline, conducted protein selection and classifier evaluation, performed biological interpretation, and wrote the manuscript. A.A.S. supervised the study, provided conceptual guidance on methodology and biological validation, and critically revised the manuscript. All authors reviewed and approved the final version of the manuscript. Disclosure and Competing Interests Statement The authors declare that they have no conflict of interest. Acknowledgments The authors gratefully acknowledge the University of Nizwa, Natural and Medical Sciences Research Center, for providing computational and infrastructural support. References 1. ↵ Quaye , E ., Prostate Cancer Statistics . Cancer Therapy Advisor , 2024 . 2. ↵ Sung , H. , et al. , Global cancer statistics 2020: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries . CA: a cancer journal for clinicians , 2021 . 71 ( 3 ): p. 209 – 249 . OpenUrl CrossRef PubMed 3. ↵ Zhang , W. , et al. , Global Burden of Prostate Cancer and Association with Socioeconomic Status, 1990-2019: A Systematic Analysis from the Global Burden of Disease Study . J Epidemiol Glob Health , 2023 . 13 ( 3 ): p. 407 – 421 . OpenUrl 4. ↵ Berenguer , C.V. , et al. , Underlying Features of Prostate Cancer—Statistics, Risk Factors, and Emerging Methods for Its Diagnosis . Current Oncology , 2023 . 30 ( 2 ): p. 2300 – 2321 . OpenUrl 5. ↵ Sekhoacha , M. , et al. , Prostate Cancer Review: Genetics, Diagnosis, Treatment Options, and Alternative Approaches . Molecules , 2022 . 27 ( 17 ): p. 5730 . OpenUrl CrossRef PubMed 6. ↵ James , N.D. , et al. , The Lancet Commission on prostate cancer: planning for the surge in cases . The Lancet , 2024 . 403 ( 10437 ): p. 1683 – 1722 . OpenUrl 7. ↵ John , S . and J. Broggio , Cancer survival in England: adult, stage at diagnosis and childhood-patients followed up to 2018 . Office for National Statistics [accessed 2023 June 17 ]. https://www.ons.gov.uk/peoplepopulationandcommunity/healthandsocialcare/conditionsanddiseases/bulletins/cancersurvivalinengland/stageatdiagnosisandchildhoodpatientsfollowedupto2018 , 2019 . 8. ↵ Mensah , J.E. , et al. , Prostate-specific antigen, digital rectal examination, and prostate cancer detection: A study based on more than 7000 transrectal ultrasound-guided prostate biopsies in Ghana . Transl Oncol , 2025 . 51 : p. 102163 . OpenUrl 9. ↵ Zhou , Y. , et al. , Tumor biomarkers for diagnosis, prognosis and targeted therapy . Signal Transduction and Targeted Therapy , 2024 . 9 ( 1 ): p. 132 . OpenUrl 10. ↵ Chacko , N. and R. Ankri , Non-invasive early-stage cancer detection: current methods and future perspectives . Clinical and Experimental Medicine , 2024 . 25 ( 1 ): p. 17 . OpenUrl 11. ↵ Ma , L. , et al. , Liquid biopsy in cancer: current status, challenges and future prospects . Signal Transduction and Targeted Therapy , 2024 . 9 ( 1 ): p. 336 . OpenUrl 12. ↵ Álvez , M.B. , et al. , Next generation pan-cancer blood proteome profiling using proximity extension assay . Nature Communications , 2023 . 14 ( 1 ): p. 4308 . OpenUrl 13. ↵ Ding , Z. , et al. , Proteomics technologies for cancer liquid biopsies . Molecular Cancer , 2022 . 21 ( 1 ): p. 53 . OpenUrl 14. ↵ Zhou , Z. , et al. , Proteomics appending a complementary dimension to precision oncotherapy . Comput Struct Biotechnol J , 2024 . 23 : p. 1725 – 1739 . OpenUrl 15. ↵ Kirpich , A. , et al. , Variable selection in omics data: A practical evaluation of small sample sizes . PloS one , 2018 . 13 ( 6 ): p. e0197910 . OpenUrl CrossRef PubMed 16. ↵ Salmanpour , M.R. , et al. , Enhanced Lung Cancer Survival Prediction Using Semi-Supervised Pseudo-Labeling and Learning from Diverse PET/CT Datasets . Cancers , 2025 . 17 ( 2 ): p. 285 . OpenUrl 17. ↵ Yue , R. and A. Dutta , Computational systems biology in disease modeling and control, review and perspectives . npj Systems Biology and Applications , 2022 . 8 ( 1 ): p. 37 . OpenUrl 18. ↵ Saadh , M.J. , et al. , Advanced machine learning framework for enhancing breast cancer diagnostics through transcriptomic profiling . Discover Oncology , 2025 . 16 ( 1 ): p. 334 . OpenUrl 19. ↵ Ozaki , Y. , et al. , Integrating Omics Data and AI for Cancer Diagnosis and Prognosis . Cancers (Basel) , 2024 . 16 ( 13 ). 20. ↵ Alwazy , A.S.H. , et al. , Evaluating machine learning and statistical learning techniques for cancer classification and diagnosis . Iran Journal of Computer Science , 2025 . 21. ↵ Bang , S. , Y.J. Ahn , and K.C. Koo , Harnessing machine learning to predict prostate cancer survival: a review . Front Oncol , 2024 . 14 : p. 1502629 . OpenUrl 22. Akinnuwesi , B.A. , et al. , Application of support vector machine algorithm for early differential diagnosis of prostate cancer . Data Science and Management , 2023 . 6 ( 1 ): p. 1 – 12 . OpenUrl 23. ↵ Passaro , A. , et al. , Cancer biomarkers: Emerging trends and clinical implications for personalized treatment . Cell , 2024 . 187 ( 7 ): p. 1617 – 1635 . OpenUrl CrossRef PubMed 24. ↵ Ali , W. and F. Saeed , Hybrid filter and genetic algorithm-based feature selection for improving cancer classification in high-dimensional microarray data . Processes , 2023 . 11 ( 2 ): p. 562 . OpenUrl 25. ↵ Shahreen , N. , et al. , Minimal gene signatures enable high-accuracy prediction of antibiotic resistance in Pseudomonas aeruginosa . npj Systems Biology and Applications , 2025 . 11 ( 1 ): p. 108 . OpenUrl 26. ↵ Chen , X. , et al. , Identification and validation of MSMB as a critical gene for prostate cancer development in obese people . Am J Cancer Res , 2023 . 13 ( 4 ): p. 1582 – 1593 . OpenUrl 27. Zhao , W. , et al. , Single-cell analysis of gastric signet ring cell carcinoma reveals cytological and immune microenvironment features . Nat Commun , 2023 . 14 ( 1 ): p. 2985 . OpenUrl 28. Lee , I.-S. , et al. , A blood-based transcriptomic signature for noninvasive diagnosis of gastric cancer . British Journal of Cancer , 2021 . 125 ( 6 ): p. 846 – 853 . OpenUrl CrossRef PubMed 29. Bergström , S.H. , et al. , Prostate tumors downregulate microseminoprotein-beta (MSMB) in the surrounding benign prostate epithelium and this response is associated with tumor aggressiveness . Prostate , 2018 . 78 ( 4 ): p. 257 – 265 . OpenUrl CrossRef PubMed 30. Whitaker , H.C. , et al. , The rs10993994 risk allele for prostate cancer results in clinically relevant changes in microseminoprotein-beta expression in tissue and urine . PLoS One , 2010 . 5 ( 10 ): p. e13363 . OpenUrl CrossRef PubMed 31. Huang , S. and H. Yin , A Multi-Omics-Based Exploration of the Predictive Role of MSMB in Prostate Cancer Recurrence: A Study Using Bayesian Inverse Convolution and 10 Machine Learning Combinations . Biomedicines , 2025 . 13 ( 2 ): p. 487 . OpenUrl 32. Waters , K.M. , et al. , A common prostate cancer risk variant 5’ of microseminoprotein-beta (MSMB) is a strong predictor of circulating beta-microseminoprotein (MSP) levels in multiple populations . Cancer Epidemiol Biomarkers Prev , 2010 . 19 ( 10 ): p. 2639 – 46 . OpenUrl Abstract / FREE Full Text 33. Sjöblom , L. , et al. , Microseminoprotein-Beta Expression in Different Stages of Prostate Cancer . PLOS ONE , 2016 . 11 ( 3 ): p. e0150241 . OpenUrl 34. FitzGerald , L.M. , et al. , Investigation of the relationship between prostate cancer and MSMB and NCOA4 genetic variants and protein expression . Hum Mutat , 2013 . 34 ( 1 ): p. 149 – 56 . OpenUrl CrossRef PubMed 35. Lin , C. , et al. , Causal associations between plasma proteins and prostate cancer: a Proteome-Wide Mendelian Randomization . 2025 , eLife Sciences Publications, Ltd . 36. Wu , L. , et al. , Analysis of Over 140,000 European Descendants Identifies Genetically Predicted Blood Protein Biomarkers Associated with Prostate Cancer Risk . Cancer Res , 2019 . 79 ( 18 ): p. 4592 – 4598 . OpenUrl CrossRef PubMed 37. ↵ Zhong , H. , et al. , Identification of blood protein biomarkers associated with prostate cancer risk using genetic prediction models: analysis of over 140,000 subjects . Human Molecular Genetics , 2023 . 32 ( 22 ): p. 3181 – 3193 . OpenUrl 38. ↵ Deshmukh , D. and Y. Qiu , Role of PARP-1 in prostate cancer . Am J Clin Exp Urol , 2015 . 3 ( 1 ): p. 1 – 12 . OpenUrl CrossRef PubMed 39. Teyssonneau , D. , et al. , Prostate cancer and PARP inhibitors: progress and challenges . Journal of Hematology & Oncology , 2021 . 14 ( 1 ): p. 51 . OpenUrl 40. Asim , M. , et al. , Synthetic lethality between androgen receptor signalling and the PARP pathway in prostate cancer . Nature Communications , 2017 . 8 ( 1 ): p. 374 . OpenUrl 41. Krishnan , A. , et al. , Analysis of the PARP1, ADP-Ribosylation, and TRIP12 Triad With Markers of Patient Outcome in Human Breast Cancer . Modern Pathology , 2023 . 36 ( 7 ): p. 100167 . OpenUrl 42. Kulkarni , S. , et al. , PARP inhibitors in ovarian cancer: Mechanisms of resistance and implications to therapy . DNA Repair , 2025 . 149 : p. 103830 . OpenUrl 43. Yuan , K. , et al. , PARP-1 regulates resistance of pancreatic cancer to TRAIL therapy . Clin Cancer Res , 2013 . 19 ( 17 ): p. 4750 – 9 . OpenUrl Abstract / FREE Full Text 44. Wang , S.S.Y. , et al. , PARP Inhibitors in Breast and Ovarian Cancer . Cancers , 2023 . 15 ( 8 ): p. 2357 . OpenUrl 45. Longoria , O. , N. Beije , and J.S. de Bono , PARP inhibitors for prostate cancer . Seminars in Oncology , 2024 . 51 ( 1 ): p. 25 – 35 . OpenUrl 46. Wang , J. , B.B. Freeman , and P. Mathew , The Emerging Role of PARP Inhibitors in the Treatment of Prostate Cancer, in Prostate Cancer-Leading-edge Diagnostic Procedures and Treatments . 2016 , IntechOpen . 47. ↵ Aldakheel , F.M. , S.A. Alduraywish , and K.H. Dabwan , Integrating machine learning driven virtual screening and molecular dynamics simulations to identify potential inhibitors targeting PARP1 against prostate cancer . Scientific Reports , 2025 . 15 ( 1 ): p. 12764 . OpenUrl 48. ↵ Fitzgerald , A.A. and L.M. Weiner , The role of fibroblast activation protein in health and malignancy . Cancer Metastasis Rev , 2020 . 39 ( 3 ): p. 783 – 803 . OpenUrl CrossRef PubMed 49. Caramella-Pereira , F. , et al. , Overexpression of fibroblast activation protein (FAP) in the stroma of proliferative inflammatory atrophy (PIA) and primary adenocarcinoma of the prostate . Pathology . 50. ↵ Wang , F.T. , et al. , Cancer-associated fibroblast regulation of tumor neo-angiogenesis as a therapeutic target in cancer . Oncol Lett , 2019 . 17 ( 3 ): p. 3055 – 3065 . OpenUrl PubMed 51. ↵ Thorburn , A ., Tumor Necrosis Factor-Related Apoptosis-Inducing Ligand (TRAIL) Pathway Signaling . Journal of Thoracic Oncology , 2007 . 2 ( 6 ): p. 461 – 465 . OpenUrl 52. Langsenlehner , T. , et al. , The Glu228Ala polymorphism in the ligand binding domain of death receptor 4 is associated with increased risk for prostate cancer metastases . The Prostate , 2008 . 68 ( 3 ): p. 264 – 268 . OpenUrl PubMed 53. ↵ Wolf , S. , et al. , Ala228 variant of trail receptor 1 affecting the ligand binding site is associated with chronic lymphocytic leukemia, mantle cell lymphoma, prostate cancer, head and neck squamous cell carcinoma and bladder cancer . International journal of cancer , 2006 . 118 ( 7 ): p. 1831 – 1835 . OpenUrl CrossRef PubMed Web of Science 54. ↵ Tellman , T.V. , et al. , Cleavage of the Perlecan-Semaphorin 3A-Plexin A1-Neuropilin-1 (PSPN) Complex by Matrix Metalloproteinase 7/Matrilysin Triggers Prostate Cancer Cell Dyscohesion and Migration . Int J Mol Sci , 2021 . 22 ( 6 ). 55. ↵ Li , Q. , Z. Cao , and S. Zhao , The Emerging Portrait of Glial Cell Line-derived Neurotrophic Factor Family Receptor Alpha (GFRα) in Cancers . Int J Med Sci , 2022 . 19 ( 4 ): p. 659 – 668 . OpenUrl 56. ↵ Polimeno , L. , et al. , Decreased expression of the Augmenter of Liver Regeneration results in increased apoptosis and oxidative damage in human-derived glioma cells . Cell Death & Disease , 2012 . 3 ( 4 ): p. e289 – e289 . OpenUrl 57. ↵ Schwaller , B ., Calretinin: from a “simple” Ca(2+) buffer to a multifunctional protein implicated in many biological processes . Front Neuroanat , 2014 . 8 : p. 3 . OpenUrl CrossRef PubMed 58. Camp , A.J. and R. Wijesinghe , Calretinin: modulator of neuronal excitability . The international journal of biochemistry & cell biology , 2009 . 41 ( 11 ): p. 2118 – 2121 . OpenUrl 59. Blum , W. , et al. , Regulation of calretinin in malignant mesothelioma is mediated by septin 7 binding to the CALB2 promoter . BMC Cancer , 2018 . 18 ( 1 ): p. 475 . OpenUrl CrossRef 60. Doglioni , C. , et al. , Calretinin: a novel immunocytochemical marker for mesothelioma . Am J Surg Pathol , 1996 . 20 ( 9 ): p. 1037 – 46 . OpenUrl CrossRef PubMed Web of Science 61. Kresoja-Rakic , J. , et al. , Posttranscriptional regulation controls calretinin expression in malignant pleural mesothelioma . Frontiers in genetics , 2017 . 8 : p. 70 . OpenUrl 62. Stevenson , L. , et al. , Calbindin 2 (CALB2) regulates 5-fluorouracil sensitivity in colorectal cancer by modulating the intrinsic apoptotic pathway . PLoS One , 2011 . 6 ( 5 ): p. e20276 . OpenUrl CrossRef PubMed 63. Ojasalu , K. , et al. , Upregulation of mesothelial genes in ovarian carcinoma cells is associated with an unfavorable clinical outcome and the promotion of cancer cell adhesion . Molecular Oncology , 2020 . 14 ( 9 ): p. 2142 – 2162 . OpenUrl 64. Tao , J. , et al. , CALB2 drives pancreatic cancer metastasis through inflammatory reprogramming of the tumor microenvironment . J Exp Clin Cancer Res , 2024 . 43 ( 1 ): p. 277 . OpenUrl CrossRef PubMed 65. Matsuda , M. , et al. , Calretinin-expressing lung adenocarcinoma: Distinct characteristics of advanced stages, smoker-type features, and rare expression of other mesothelial markers are useful to differentiate epithelioid mesothelioma . Pathol Res Pract , 2020 . 216 ( 3 ): p. 152817 . OpenUrl CrossRef PubMed 66. Chatterjee , M. , D. Schild , and C.E. Teunissen , Contactins in the central nervous system: role in health and disease . Neural Regen Res , 2019 . 14 ( 2 ): p. 206 – 216 . OpenUrl 67. Zhou , J. , et al. , SLC1A1, SLC16A9, and CNTN3 Are Potential Biomarkers for the Occurrence of Colorectal Cancer . Biomed Res Int , 2020 . 2020 : p. 1204605 . OpenUrl 68. Zhu , Y.F. , et al. , Prognostic significance of contactin 3 expression and associated genes in glioblastoma multiforme . Oncol Lett , 2019 . 18 ( 2 ): p. 1863 – 1871 . OpenUrl 69. ↵ Chandran , U.R. , et al. , Gene expression profiles of prostate cancer reveal involvement of multiple molecular pathways in the metastatic process . BMC Cancer , 2007 . 7 : p. 64 . OpenUrl CrossRef PubMed 70. ↵ Usardi , A. , et al. , The immunoglobulin-like superfamily member IGSF3 is a developmentally regulated protein that controls neuronal morphogenesis . Dev Neurobiol , 2017 . 77 ( 1 ): p. 75 – 92 . OpenUrl CrossRef 71. Curry , R.N. , et al. , Glioma epileptiform activity and progression are driven by IGSF3-mediated potassium dysregulation . Neuron , 2023 . 111 ( 5 ): p. 682 – 695 .e9. OpenUrl PubMed 72. ↵ Sheng , P. , et al. , The immunoglobulin superfamily member 3 (IGSF3) promotes hepatocellular carcinoma progression through activation of the NF-κB pathway . Ann Transl Med , 2020 . 8 ( 6 ): p. 378 . OpenUrl 73. ↵ Tippett , P. and N.A. Ellis , The Xg blood group system: a review . Transfusion Medicine Reviews , 1998 . 12 ( 4 ): p. 233 – 257 . OpenUrl PubMed 74. Johnson , N.C ., XG: the forgotten blood group system . Immunohematology , 2011 . 27 ( 2 ): p. 68 – 71 . OpenUrl PubMed 75. Orentas , R.J. , et al. , Identification of cell surface proteins as potential immunotherapy targets in 12 pediatric cancers . Frontiers in oncology , 2012 . 2 : p. 194 . OpenUrl 76. ↵ Meynet , O. , et al. , Xg expression in Ewing’s sarcoma is of prognostic value and contributes to tumor invasiveness . Cancer research , 2010 . 70 ( 9 ): p. 3730 – 3738 . OpenUrl Abstract / FREE Full Text 77. Chen , J.-Y. , et al. , Biomarkers for Prostate Cancer: From Diagnosis to Treatment . Diagnostics , 2023 . 13 ( 21 ): p. 3350 . OpenUrl 78. Hartmann , K.P. , et al. , FAP-retargeted Ad5 enables in vivo gene delivery to stromal cells in the tumor microenvironment . Mol Ther , 2023 . 31 ( 10 ): p. 2914 – 2928 . OpenUrl CrossRef PubMed 79. Tchou , J. , et al. , Fibroblast activation protein expression by stromal cells and tumor-associated macrophages in human breast cancer . Hum Pathol , 2013 . 44 ( 11 ): p. 2549 – 57 . OpenUrl CrossRef PubMed 80. Shi , M. , et al. , Expression of fibroblast activation protein in human pancreatic adenocarcinoma and its clinicopathological significance . World J Gastroenterol , 2012 . 18 ( 8 ): p. 840 – 6 . OpenUrl CrossRef PubMed 81. Cheng , C.-s. , et al. , Fibroblast activation protein-based theranostics in pancreatic cancer . Frontiers in Oncology , 2022 . 12 : p. 969731 . OpenUrl 82. Liao , Y. , et al. , Evaluation of the circulating level of fibroblast activation protein α for diagnosis of esophageal squamous cell carcinoma . Oncotarget , 2017 . 8 ( 18 ): p. 30050 – 30062 . OpenUrl 83. Tabola , R. , et al. , Fibroblast activating protein-α expression in squamous cell carcinoma of the esophagus in primary and irradiated tumors: the use of archival FFPE material for molecular techniques . Eur J Histochem , 2017 . 61 ( 2 ): p. 2793 . OpenUrl 84. Zhou , X. , et al. , FAP positive cancer-associated fibroblasts promote tumor progression and radioresistance in esophageal squamous cell carcinoma by transferring exosomal lncRNA AFAP1-AS1 . Molecular Carcinogenesis , 2024 . 63 ( 10 ): p. 1922 – 1937 . OpenUrl 85. Dziadek , S. , et al. , Comprehensive analysis of fibroblast activation protein expression across 23 tumor indications: insights for biomarker development in cancer immunotherapies . Front Immunol , 2024 . 15 : p. 1352615 . OpenUrl PubMed 86. Chen , L. , et al. , FAP positive fibroblasts induce immune checkpoint blockade resistance in colorectal cancer via promoting immunosuppression . Biochem Biophys Res Commun , 2017 . 487 ( 1 ): p. 8 – 14 . OpenUrl CrossRef 87. Kalluri , R ., The biology and function of fibroblasts in cancer . Nature Reviews Cancer , 2016 . 16 ( 9 ): p. 582 – 598 . OpenUrl CrossRef PubMed 88. Ma , C. , et al. , Identifying the oncogenic roles of FAP in human cancers based on systematic analysis . Aging (Albany NY) , 2023 . 15 ( 14 ): p. 7056 – 7083 . OpenUrl 89. Wang , D.D. , et al. , Long noncoding RNA TNFRSF10A-AS1 promotes colorectal cancer through upregulation of HuR . World J Gastroenterol , 2022 . 28 ( 20 ): p. 2184 – 2200 . OpenUrl 90. Zhang , S. , et al. , Downregulation of death receptor 4 is tightly associated with positive response of EGFR mutant lung cancer to EGFR-targeted therapy and improved prognosis . Theranostics , 2021 . 11 ( 8 ): p. 3964 . OpenUrl CrossRef PubMed 91. Lapierre , M. , et al. , Histone deacetylase 9 regulates breast cancer cell proliferation and the response to histone deacetylase inhibitors . Oncotarget , 2016 . 7 ( 15 ): p. 19693 – 708 . OpenUrl CrossRef 92. Valladares , A. , et al. , Genetic expression profiles and chromosomal alterations in sporadic breast cancer in Mexican women . Cancer Genet Cytogenet , 2006 . 170 ( 2 ): p. 147 – 51 . OpenUrl CrossRef PubMed 93. Ma , Y. , et al. , SOX9 Is Essential for Triple-Negative Breast Cancer Cell Survival and Metastasis . Mol Cancer Res , 2020 . 18 ( 12 ): p. 1825 – 1838 . OpenUrl Abstract / FREE Full Text 94. Karamanos , N.K. , et al. , A guide to the composition and functions of the extracellular matrix . The FEBS journal , 2021 . 288 ( 24 ): p. 6850 – 6912 . OpenUrl CrossRef PubMed 95. Huang , Y. , et al. , Proteomic profiling of prostate cancer reveals molecular signatures under antiandrogen treatment . Clinical Proteomics , 2024 . 21 ( 1 ): p. 44 . OpenUrl 96. Englund , E. , et al. , Cartilage oligomeric matrix protein contributes to the development and metastasis of breast cancer . Oncogene , 2016 . 35 ( 43 ): p. 5585 – 5596 . OpenUrl CrossRef 97. Norman , G.L. , et al. , Cartilage oligomeric matrix protein: A novel non-invasive marker for assessing cirrhosis and risk of hepatocellular carcinoma . World J Hepatol , 2015 . 7 ( 14 ): p. 1875 – 83 . OpenUrl CrossRef 98. Chen , J. , et al. , Identification, prioritization, and evaluation of glycoproteins for aggressive prostate cancer using quantitative glycoproteomics and antibody-based assays on tissue specimens . Proteomics , 2013 . 13 ( 15 ): p. 2268 – 77 . OpenUrl CrossRef PubMed Web of Science 99. Liu , T.T. , et al. , Cartilage oligomeric matrix protein is a prognostic factor and biomarker of colon cancer and promotes cell proliferation by activating the Akt pathway . J Cancer Res Clin Oncol , 2018 . 144 ( 6 ): p. 1049 – 1063 . OpenUrl CrossRef 100. Englund , E. , et al. , Cartilage oligomeric matrix protein promotes prostate cancer progression by enhancing invasion and disrupting intracellular calcium homeostasis . Oncotarget , 2017 . 8 ( 58 ): p. 98298 – 98311 . OpenUrl CrossRef 101. Zhong , W. , et al. , Cartilage Oligomeric Matrix Protein promotes epithelial-mesenchymal transition by interacting with Transgelin in Colorectal Cancer . Theranostics , 2020 . 10 ( 19 ): p. 8790 – 8806 . OpenUrl CrossRef 102. Kim , E. , et al. , Paracrine influence of human perivascular cells on the proliferation of adenocarcinoma alveolar epithelial cells . Korean J Physiol Pharmacol , 2017 . 21 ( 2 ): p. 161 – 168 . OpenUrl 103. Lee , K. , et al. , Proteome-wide discovery of mislocated proteins in cancer . Genome Res , 2013 . 23 ( 8 ): p. 1283 – 94 . OpenUrl Abstract / FREE Full Text 104. Wang , L. , et al. , Delta/notch-like epidermal growth factor-related receptor (DNER) orchestrates stemness and cancer progression in prostate cancer . Am J Transl Res , 2017 . 9 ( 11 ): p. 5031 – 5039 . OpenUrl 105. Wang , L. , et al. , Quantum dot-based immunofluorescent imaging and quantitative detection of DNER and prognostic value in prostate cancer . Cancer Biomark , 2018 . 22 ( 4 ): p. 683 – 691 . OpenUrl 106. Wang , Z. , et al. , DNER promotes epithelial–mesenchymal transition and prevents chemosensitivity through the Wnt/β-catenin pathway in breast cancer . Cell Death & Disease , 2020 . 11 ( 8 ): p. 642 . OpenUrl 107. He , R. , et al. , Transcriptional Profiling Reveals the Regulatory Role of DNER in Promoting Pancreatic Neuroendocrine Neoplasms . Front Genet , 2020 . 11 : p. 587402 . OpenUrl 108. Manara , M.C. , et al. , CD99: A Key Regulator in Immune Response and Tumor Microenvironment . Biomolecules , 2025 . 15 ( 5 ): p. 632 . OpenUrl 109. Pasello , M. , M.C. Manara , and K. Scotlandi , CD99 at the crossroads of physiology and pathology . Journal of cell communication and signaling , 2018 . 12 : p. 55 – 68 . OpenUrl 110. Guerzoni , C. , et al. , CD99 triggering in Ewing sarcoma delivers a lethal signal through p53 pathway reactivation and cooperates with doxorubicin . Clinical Cancer Research , 2015 . 21 ( 1 ): p. 146 – 156 . OpenUrl Abstract / FREE Full Text 111. Buxton , D. , et al. , Frequent expression of CD99 in anaplastic large cell lymphoma: a clinicopathologic and immunohistochemical study of 160 cases . American journal of clinical pathology , 2009 . 131 ( 4 ): p. 574 – 579 . OpenUrl CrossRef PubMed 112. Richardson , A.I. , et al. , p53 expression in large B-cell lymphomas with MYC extra copies and CD99 expression in large B-cell lymphomas in relation to MYC status . Human pathology , 2019 . 86 : p. 21 – 31 . OpenUrl 113. Huang , X. , et al. , CD99 triggers upregulation of miR-9-modulated PRDM1/BLIMP1 in Hodgkin/Reed–Sternberg cells and induces redifferentiation . International Journal of Cancer , 2012 . 131 ( 4 ): p. E382 – E394 . OpenUrl CrossRef PubMed 114. Edlund , K. , et al. , CD99 is a novel prognostic stromal marker in non-small cell lung cancer . International journal of cancer , 2012 . 131 ( 10 ): p. 2264 – 2273 . OpenUrl CrossRef PubMed 115. Ali , A. , V.P. Vaikari , and H. Alachkar , CD99 in malignant hematopoiesis . Experimental hematology , 2022 . 106 : p. 40 – 46 . OpenUrl CrossRef PubMed 116. Chung , S.S. , et al. , CD99 is a therapeutic target on disease stem cells in acute myeloid leukemia and the myelodysplastic syndromes . Blood , 2013 . 122 ( 21 ): p. 2891 . OpenUrl CrossRef 117. Manara , M.C. , M. Pasello , and K. Scotlandi , CD99: A Cell Surface Protein with an Oncojanus Role in Tumors . Genes (Basel) , 2018 . 9 ( 3 ). 118. Scotlandi , K. , et al. , CD99 isoforms dictate opposite functions in tumour malignancy and metastases by activating or repressing c-Src kinase activity . Oncogene , 2007 . 26 ( 46 ): p. 6604 – 6618 . OpenUrl CrossRef PubMed 119. Friedman , J.M ., Leptin and the endocrine control of energy balance . Nature metabolism , 2019 . 1 ( 8 ): p. 754 – 764 . OpenUrl 120. Zhang , Y. and S. Chua Jr . , Leptin function and regulation . Comprehensive physiology , 2018 . 8 ( 1 ): p. 351 – 369 . OpenUrl 121. Kim , H.J. , et al. , Association between LEP Gene Polymorphisms and Prostate Cancer Susceptibility in Korean Men . 대한비뇨기종양학회지 , 2011 . 9 ( 3 ): p. 112 – 118 . OpenUrl 122. Gade-Andavolu , R. , et al. , Molecular interactions of leptin and prostate cancer . The Cancer Journal , 2006 . 12 ( 3 ): p. 201 – 206 . OpenUrl 123. Xu , C.-J. , et al. , Leptin promotes proliferation and inhibits apoptosis of prostate cancer cells by regulating ERK1/2 signaling pathway . European Review for Medical & Pharmacological Sciences , 2020 . 24 ( 16 ). 124. Hu , M.B. , et al. , Genetic polymorphisms in leptin, adiponectin and their receptors affect risk and aggressiveness of prostate cancer: evidence from a meta-analysis and pooled-review . Oncotarget , 2016 . 7 ( 49 ): p. 81049 – 81061 . OpenUrl CrossRef PubMed 125. Cleveland , R.J. , et al. , Common genetic variations in the LEP and LEPR genes, obesity and breast cancer incidence and survival . Breast cancer research and treatment , 2010 . 120 : p. 745 – 752 . OpenUrl CrossRef PubMed 126. Jin , T.Y. , et al. , LEP as a potential biomarker in prognosis of breast cancer: systemic review and meta analyses (PRISMA) . Medicine , 2021 . 100 ( 33 ): p. e26896 . OpenUrl 127. Partida-Pérez , M. , et al. , Association of LEP and ADIPOQ common variants with colorectal cancer in Mexican patients . Cancer Biomarkers , 2010 . 7 ( 3 ): p. 117 – 121 . OpenUrl 128. Du , M. , et al. , Associations between polymorphisms in leptin and leptin receptor genes and colorectal cancer survival . Cancer Biology & Medicine , 2023 . 20 ( 6 ): p. 438 – 451 . OpenUrl 129. Boroń , D. , et al. , Expression pattern of leptin and its receptors in endometrioid endometrial cancer . Journal of Clinical Medicine , 2021 . 10 ( 13 ): p. 2787 . OpenUrl 130. Bianco , B. , et al. , Endometrial cancer: a genetic point of view . Translational Cancer Research , 2020 . 9 ( 12 ): p. 7706 . OpenUrl 131. Dąbruś , D. , et al. , Evaluation of the impact of cisplatin on variances in the expression pattern of leptin-related genes in endometrial cancer cells . International Journal of Molecular Sciences , 2020 . 21 ( 11 ): p. 4135 . OpenUrl 132. Zhang , S. , et al. , investigation of LEP and LEPR polymorphisms with the risk of hepatocellular carcinoma: a case–control study in eastern chinese han population . OncoTargets and therapy , 2018 : p. 2083 – 2089 . 133. ↵ Zhong , X. , et al. , Integrating multi-cohort machine learning and clinical sample validation to explore peripheral blood mRNA diagnostic biomarkers for prostate cancer . Cancer Cell Int , 2025 . 25 ( 1 ): p. 158 . OpenUrl 134. ↵ Fang , S. , et al. , Multi-Omic Integration of Blood-Based Tumor-Associated Genomic and Lipidomic Profiles Using Machine Learning Models in Metastatic Prostate Cancer . JCO Clin Cancer Inform , 2023 . 7 : p. e2300057 . OpenUrl 135. ↵ Cosma , G. , et al. , Prostate Cancer: Early Detection and Assessing Clinical Risk Using Deep Machine Learning of High Dimensional Peripheral Blood Flow Cytometric Phenotyping Data . Front Immunol , 2021 . 12 : p. 786828 . OpenUrl 136. ↵ Ramírez-Mena , A. , et al. , Explainable artificial intelligence to predict and identify prostate cancer tissue by gene expression . Comput Methods Programs Biomed , 2023 . 240 : p. 107719 . OpenUrl 137. ↵ Antunes , M.E. , et al. , Machine learning models for predicting prostate cancer recurrence and identifying potential molecular biomarkers . Front Oncol , 2025 . 15 : p. 1535091 . OpenUrl 138. Aldakheel , F.M. , et al. , Analysing DNA methylation and transcriptomic signatures to predict prostate cancer recurrence risk . Discov Oncol , 2025 . 16 ( 1 ): p. 110 . OpenUrl 139. ↵ Perera , M. , et al. , Advancing Traditional Prostate-specific Antigen Kinetics in the Detection of Prostate Cancer: A Machine Learning Model . Eur Urol Focus , 2022 . 8 ( 5 ): p. 1204 – 1210 . OpenUrl View the discussion thread. Back to top Previous Next Posted November 06, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following A Minimal Plasma Proteome-Based Biomarker Panel for Accurate Prostate Cancer Diagnosis Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share A Minimal Plasma Proteome-Based Biomarker Panel for Accurate Prostate Cancer Diagnosis Syed Ahsan Shahid , Ahmed Al-Harrasi , Adil Al-Siyabi bioRxiv 2025.11.05.686712; doi: https://doi.org/10.1101/2025.11.05.686712 Share This Article: Copy Citation Tools A Minimal Plasma Proteome-Based Biomarker Panel for Accurate Prostate Cancer Diagnosis Syed Ahsan Shahid , Ahmed Al-Harrasi , Adil Al-Siyabi bioRxiv 2025.11.05.686712; doi: https://doi.org/10.1101/2025.11.05.686712 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Cancer Biology Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41911) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13371) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15599) Genomics (22483) Immunology (17728) Microbiology (40364) Molecular Biology (17163) Neuroscience (88537) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00