Full text
101,940 characters
· extracted from
preprint-html
· click to expand
Survival Prediction Landscape: An In-Depth Systematic Literature Review on Activities, Methods, Tools, Diseases, and Databases | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Survival Prediction Landscape: An In-Depth Systematic Literature Review on Activities, Methods, Tools, Diseases, and Databases Ahtisham Fazeel Abbasi , Muhammad Nabeel Asim , Sheraz Ahmed , Sebastian Vollmer , Andreas Dengel doi: https://doi.org/10.1101/2024.01.05.24300889 Ahtisham Fazeel Abbasi 1 Deutsches Forschungszentrum für Künstliche Intelligenz (DFKI) , 67663, Kaiserslautern, Germany 2 Rhineland-Palatinate Technical University of Kaiserslautern-Landau , 67663, Kaiserslautern, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: ahtisham.abbasi{at}dfki.de Muhammad Nabeel Asim 1 Deutsches Forschungszentrum für Künstliche Intelligenz (DFKI) , 67663, Kaiserslautern, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sheraz Ahmed 1 Deutsches Forschungszentrum für Künstliche Intelligenz (DFKI) , 67663, Kaiserslautern, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sebastian Vollmer 1 Deutsches Forschungszentrum für Künstliche Intelligenz (DFKI) , 67663, Kaiserslautern, Germany 2 Rhineland-Palatinate Technical University of Kaiserslautern-Landau , 67663, Kaiserslautern, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Andreas Dengel 1 Deutsches Forschungszentrum für Künstliche Intelligenz (DFKI) , 67663, Kaiserslautern, Germany 2 Rhineland-Palatinate Technical University of Kaiserslautern-Landau , 67663, Kaiserslautern, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Data/Code Preview PDF ABSTRACT Survival prediction integrates patient-specific molecular information and clinical signatures to forecast the anticipated time of an event, such as recurrence, death, or disease progression. Survival prediction proves valuable in guiding treatment decisions, optimizing resource allocation, and interventions of precision medicine. The wide range of diseases, the existence of various variants within the same disease, and the reliance on available data necessitate disease-specific computational survival predictors. The widespread adoption of artificial intelligence (AI) methods in crafting survival predictors has undoubtedly revolutionized this field. However, the ever-increasing demand for more sophisticated and effective prediction models necessitates the continued creation of innovative advancements. To catalyze these advancements, the need of the hour is to bring existing survival predictors knowledge and insights into a centralized platform. The paper in hand thoroughly examines 22 existing review studies and provides a concise overview of their scope and limitations. Focusing on a comprehensive set of 74 most recent survival predictors across 44 diverse diseases, it delves into insights of diverse types of methods that are used in the development of disease-specific predictors. This exhaustive analysis encompasses the utilized data modalities along with a detailed analysis of subsets of clinical features, feature engineering methods, and the specific statistical, machine or deep learning approaches that have been employed. It also provides insights about survival prediction data sources, open-source predictors, and survival prediction frameworks. Introduction According to World Health Organization (WHO), around ten thousand diseases have been discovered and each disease has unique symptoms, characteristics, and implications on human health 1 . Millions of people died from such diseases in the span of years 2000 to 2019, while cancers, cardiovascular, and infectious diseases persisted as the leading causes of mortality 2 , 3 . Extensive research on the intersection of life and technology has yielded a wide range of therapies and medications for various well-known diseases. However, the core idea behind traditional therapies and medications is based on the “one-size-fits-all” 4 . In this paradigm, a single drug is supposed to effectively treat a medical condition across a variety of patient cohorts i.e., children, old and young populations 4 , 5 . In-depth exploration and understanding of living organisms’ inherent biological processes reveal that high variability in genetics and drug responses make one-size-fits-all medication ineffective 4 , 5 . The groundbreaking discoveries of the factors contributing to the limited effectiveness of generalized medications marked the inception of the era of precision medicine 6 , 7 . Precision medicine offers customization in tailored medical treatments based on an individual’s unique genetic makeup, and optimization in drug selection and dosage based on the individual’s lifestyle, and environmental factors 8 . Precision medicine’s adoption and effectiveness have been significantly enhanced by the accurate, cost-effective, and large-scale analysis of molecular information obtained through next-generation sequencing 9 . In the realm of precision medicine, survival prediction plays a pivotal role in tailoring medical treatments to individual needs 10 , 11 . Survival prediction categorizes patients into distinct risk groups that enhance the efficiency of resource allocation for the patients who are likely to gain the most benefit from specific treatments 10 , 11 . It also enables counseling of patients and their families by predicting the expected course of the disease and potential challenges 10 . In addition to medical treatments, survival prediction offers multiple advantages in research, particularly in the area of biomarker discovery and disease understanding 12 , 13 . Survival prediction models provide useful information about the correlation between different features and clinical outcomes. This correlation information enables the identification of novel biomarkers associated with disease prognosis 12 . Moreover, researchers leverage survival prediction to unravel disease heterogeneity which helps to identify distinct subtypes with different survival profiles 14 . This knowledge not only aids in the stratification of homogeneous patients in clinical trials but also validates therapeutic targets by assessing their relevance in predicting patient outcomes 15 . Furthermore, it enables the longitudinal monitoring of disease progression that helps to explore critical time points and progression patterns 16 . To expedite advancements in survival prediction research, researchers are harnessing the capabilities of AI algorithms by utilizing extensive survival-related data from public databases such as the Cancer Genome Atlas Program (TCGA) 17 , and NCI Genomic Data Commons (GDC) 18 , 19 – 24 . In addition, the diversity and heterogeneity of diseases hinder the development of a universally applicable survival prediction pipeline 14 , 25 . Following the need for disease-specific predictors, there is a marathon for the development of more accurate and powerful predictors 26 – 28 . Figure 1 illustrates that for the advancement of survival predictors, public databases provide a spectrum of clinical data 29 , 30 and encompass 9 diverse omics data modalities, including gene expression (mRNA), micro RNA (miRNA), DNA methylation, copy number variation (CNV), long non-coding RNA (lncRNA), proteomics, metabolic, whole exome sequencing (WES) and mutation 23 , 26 , 31 , 32 . In each data modality, there exists an array of missing values that hinder survival predictors learning. Extensive research is being conducted to impute missing values by using different techniques such as deletion, multiple, K-nearest neighbor (KNN), and median imputation 33 – 35 . In addition, various normalization methods are also being used to normalize feature space such as quantile, variance threshold, and rank normalizations 36 . Download figure Open in new tab Figure 1. An end-to-end survival prediction pipeline. In the development of survival prediction pipelines, researchers are trying to unlock the potential of various data modalities by assessing predictor performance with individual modalities and combinations of multiple data modalities across diverse types of diseases. When data from different modalities is combined, survival predictors’ input feature space becomes very large which impedes the performance of AI approaches 37 . Researchers are trying to explore feature engineering approaches such as random forest importance (RFI), and recursive feature elimination (RFI) 38 , principal component analysis (PCA) 31 , 39 , non-negative matrix factorization (NMF) 40 , and autoencoders (AEs) 41 – 43 . Moreover, in an end-to-end survival predictive pipeline, apart from the selection of appropriate data and feature engineering strategy, designing appropriate survival prediction models is also an active area of research. Under different aforementioned directions, the recent 3 years have witnessed around 74 different survival predictors for different diseases. To further accelerate and expedite the development of more powerful predictors, in the last 10 years, from time to time, researchers have published 22 different review articles. These articles primarily aim to summarize the latest trends and developments in data modalities, feature engineering methods, and AI models specifically related to survival prediction. However, the focus of these reviews is often constrained to either a singular disease or multiple subtypes of cancer, highlighting a limited scope within the broader landscape of survival prediction research 37 , 44 – 48 . More comprehensive details about the scope of existing review articles in terms of contributions and drawbacks are summarised in Table 1 and section. Following the need for a comprehensive review article for survival prediction, the contributions of this paper are manifold: View this table: View inline View popup Table 1. The scope and limitations of current survey papers. It consolidates a diverse array of 22 survival prediction review papers, bringing together their scopes and limitations under a unified umbrella. This compilation serves as a valuable resource for researchers seeking high-level insights and pertinent information in the field. It provides comprehensive insights into 74 survival prediction articles published between 2020 and 2023. The objective is to delve into diverse aspects of the field, extract and furnish useful information from these articles under the following different research questions and objectives: i) What is the distribution of 74 research articles across 44 different diseases, and how does it vary among cancer subtypes and other diseases? ii) How do studies address the spectrum of survival prediction, from a broader perspective covering multiple cancer subtypes to individual subtypes? iii) What are the predominant survival endpoints used in studies, and how are studies distributed across four endpoints overall survival (OS), disease-free survival (DFS), progression-free survival (PFS), and biochemical recurrence (BC)? iv) What are the most commonly used public and private data sources in existing survival prediction studies and the types of data they encompass? v) What are the most commonly used omics data modalities and their associations with different diseases and survival endpoints? vi) Which clinical features are most commonly employed in survival prediction studies? vii) How have feature engineering techniques evolved across different data modalities, diseases, and survival endpoints in survival prediction studies? viii) Which specific statistical, machine learning (ML), and deep learning (DL) survival prediction algorithms have been applied to diverse diseases and survival endpoints? ix) Which survival prediction studies have made their source codes publicly available, and what types of methods are available in open-source survival prediction frameworks? x) What are the most commonly utilized survival prediction evaluation measures? xi) Which conferences and journals predominantly publish survival prediction studies? Background Survival prediction makes use of patient-specific molecular information and clinical signatures to forecast a wide range of events at particular time intervals. The most common events include recurrence, metastasis, response, recovery, hospitalization, and progression of a disease. Some of these events represent similar contexts, i.e., metastasis and progression both contribute to the overall progression of the condition/cancer. Survival prediction events are generally categorized into 4 different survival endpoints namely, overall survival (OS), disease-free survival (DFS), progression-free survival (PFS), and biochemical recurrence (BC). Survival endpoints serve as crucial measures for assessing the outcomes of interventions, indicating the duration until specific events occur. Therefore, events are essentially the occurrences that contribute to the survival endpoints. These endpoints are critical to examine the trajectory of a particular disease. Survival prediction is time to event approach with two distinct aspects, i.e., survival and hazard function. Survival function describes the probability that a subject survives longer than some specified time t . Mathematically, it is expressed as: where T is the random variable for survival time, t is a specific value of interest for T . For instance, S (10) represents the probability of survival beyond 10 years without experiencing a specific event. As time passes, S ( t ) decreases, reflecting the reduction in the probability of surviving without the occurrence of event E up to time t . In comparison, the hazard function illustrates the probability of an event E occurring at a specific time interval (Δ t ) with a prior assumption that the event has not taken place. The probability that the event E occurs within a very small time interval Δ t around time t is given by the conditional probability: Dividing this probability by the length of the time interval (Δt) gives the rate of occurrence of the event at time t . The limit as the time interval (Δt) approaches zero gives the instantaneous rate of occurrence at time t . Mathematically, this is represented as: where f ( t ) represents the probability density function of survival time. Thus, survival function S ( t ) shows that the subject survives beyond a specific time point and hazard function h(t) complements this by providing a risk rate that a patient does not survive in a specific time interval conditioned on having survived thus far. Moreover, S ( t ) is always monotonic in nature, however h ( t ) is classically assumed to follow increasing Weibull, decreasing Weibull, or lognormal survival curves. A Look-back Into Existing Review Studies In recent years multiple review papers have been published and the objective of each review revolves around summarising fundamental concepts in survival prediction and identifying trends in statistical, ML, and DL algorithms that have been utilized in the development of survival predictors. Table 1 illustrates a high-level overview of the existing 22 review articles in terms of their review scope and limitations. This comprehensive summary aims to assist researchers in locating specific information within relevant articles more effectively. In Table 1 , a comprehensive analysis of the scope of review articles indicates that existing studies can be classified into three distinct groups. I) 9 review papers primarily focus on the application of DL algorithms in survival prediction 47 , 49 – 56 , II) 7 review papers summarise the application of ML algorithms in survival prediction 37 , 48 , 57 – 61 , and 6 review papers summarise survival prediction methods from three different categories namely statistical, ML, and DL methods 44 – 46 , 62 – 64 . On the other hand, in the realm of disease specific survival predictors scope of existing review papers is limited. For instance, 8 papers only summarize survival predictors on single disease or subtype of cancer, i.e., cervical cancer 46 , glioblastoma 63 , esophageal adenocarcinoma 48 , esophageal and gastroesophageal junction cancer 57 , head and squamous cell carcinoma 61 , palliative cancer patients 47 , cardiovascular diseases (CVD) 64 , 65 , and schizophrenia 60 . Although 4 papers cover multiple subtypes of cancer but they cover only handful of 8 different subtypes such as, breast, lung, gastric, colon, esophageal, ovarian cancers and so on. While the scope of survival prediction extends beyond multiple diseases, existing review papers fall short to summarize current trends of data modalities, feature engineering approaches and survival prediction models. For example, Deepa et al. 44 specifically address the primary categories of data modalities used for survival prediction, namely multiomics and clinical data. However, the review does not extensively explore trends and patterns related to the 9 different omics types i.e., gene expression (mRNA), micro RNA (miRNA), methylation, copy number variation (CNV), whole exome sequencing (WES), long noncoding RNA (lncRNA), mutation, metabolic, and proteomics, or clinical features associated with distinct cancer subtypes. Similarly, Westerlund et al. 64 do not explore the potential of multiomics data in terms of cardiovascular diseases. In addition, various review papers completely neglect to address feature engineering in survival prediction 46 , 47 , 52 , 56 , 57 , 62 . For instance, Feldner et al. 37 despite their focus on dimensionality reduction, fall short in providing a comprehensive summary of current trends in feature engineering approaches with respect to diseases and data modalities. Futhermore, a small portion of these review papers cover details of few state of the art survival prediction models 49 , 52 , 56 . While current review papers summarize survival prediction pipelines partially, there is a necessity to bring diverse information into a unified platform which offers comprehensive insights into patterns and trends associated with survival prediction pipelines. Results RQ I, II, III: Survival predictors distribution analysis across diseases and survival endpoints The primary aim of this section is to summarise the distribution of survival predictors across various diseases and survival endpoints. Predictors distribution analysis under individual diseases offers insights into the most active trends of predictors associated with specific diseases. This consolidated distribution provides a centralized platform to access valuable information about their disease of interest. Similarly, examining the distribution of articles across survival endpoints is valuable for identifying current trends in forecasting multiple events. This approach not only enhances our understanding of the current state of predictive modeling but also facilitates researchers in efficiently accessing information specific to their desired endpoints. Through this exploration, we aim to contribute to a deeper understanding of the diverse landscape of survival prediction research and its applications across various diseases and endpoints. Table 2 illustrates disease specific predictors distribution for both cancer and other diseases respectively. In the last 3 years, 60 predictors have been designed for different cancer subtypes related survival prediction 24 , 104 , 108 while only 14 predictors have been designed for other diseases such as cardiovascular diseases, COVID-19, and trauma 29 , 112 , 119 , 120 . View this table: View inline View popup Download powerpoint Table 2. Distribution of survival predictors across individual diseases. To date, approximately more than 100 different cancer subtypes have been identified 121 . However, a deeper analysis of the last 3 years reveals that survival prediction models have been developed for only 36 distinct cancer subtypes, as outlined in Table 2 . Among 36 different subtypes, most of the predictors have been designed for breast cancer, lung adenocarcinoma, ovarian cancer, and glioblastoma. On the other hand, 7 different predictors have been designed for pancancer. Notably, there is a difference between other cancer types and pancancer because under this paradigm predictors simultaneously deal with multiple cancer subtypes. For the development of pancancer based predictors, there exists public data having more than 30 distinct cancer subtypes. However, researchers are utilizing different subsets for the development of predictors. Figure 3 provides an overview of multiple survival prediction studies that encompass a range of cancer subtypes, either within a pancancer context or within the context of predicting survival for different subtypes. A total of 14 studies have taken into account multiple cancer subtypes whereas the majority of the studies have only covered only a single type of cancer subtype such as colorectal cancer 109 , lymphoma 85 , colon adenocarcinoma 39 , gastric cancer 42 and so on. Figures 2 and 4 illustrate predictors distribution across survival endpoints. A majority of studies 54 (79%) have OS as an endpoint of survival prediction 28 , 82 , 101 , 120 , whereas 7 studies have incorporated multiple survival endpoints in their analysis. Out of 7 studies, 3 studies have incorporated DFS and BC 22 , 26 , 122 . Two studies have incorporated OS, DFS, and PFS 40 , 108 and 2 studies have OS, and PFS as the survival end-points 31 , 81 . A single study has focused on DFS only 95 , and 2 only on BC 94 , 117 . The rest of studies either did not explicitly specify their endpoints for survival prediction or predominantly concentrated on predicting patients’ survival outcomes without a specific focus on distinct survival endpoints. Download figure Open in new tab Figure 2. Survival endpoint distribution across diverse studies. Download figure Open in new tab Figure 3. Cancer subtypes coverage based on pancancer or individual subtype settings. Download figure Open in new tab Figure 4. Distribution of explored survival prediction streams from existing literature. DFS: disease-free survival, PFS: progression-free survival, OS: overall survival, and BC: biochemical recurrence. RQ IV: Survival prediction data availability in public and private sources and opportunities for development of predictors Survival prediction models development relies on the quality and quantity of annotated data, which is generated through extensive wet lab experiments. Experimental findings are stored in different types of databases that open new doors for the development of survival prediction applications. However, there exist multiple databases and each database encompasses particular diseases and modality specific survival data. For instance, CGGA 124 focuses on brain tumors, and MESA 128 contains data related to atherosclerosis. To accelerate the development of more competent survival predictors, it is essential to summarise which database contains which type of disease and what data modalities. In the highlight of research question IV, Table 3 illustrates public databases details in terms of diseases and data modalities they offer. View this table: View inline View popup Download powerpoint Table 3. The ample collection of survival data within diverse public databases. A deeper analysis of existing survival predictors reveals that among the 74 studies 54 utilized publicly accessible data from three key databases: the Cancer Genome Atlas Program (TCGA) 17 , NCI Genomic Data Commons (GDC) 18 , and the Gene Expression Omnibus (GEO) 31 , 32 , 72 , 73 , 80 , 82 , 87 , 90 , 91 , 130 , 131 . Apart from public databases, there also exist private databases that have been utilized in existing survival prediction studies 66 , 75 , 81 , 112 , 113 , 117 , 118 . However, these private databases often restrict data access and may require extensive research proposals for data retrieval. Among these databases commonly used databases are Heidelberg University Hospital 30 , COMBO-01 71 , Life cohort 115 , and UNOS 119 . The reliance on private databases for survival prediction creates significant hurdles for research in several ways. Firstly, limited accessibility to such data impedes the reproducibility and verification of study findings by other researchers, hindering the validation and robustness of predictive models. Secondly, the lack of transparency and standardized access procedures for private datasets introduces challenges in benchmarking and comparing different survival prediction models. Lastly, the exclusivity of private databases may contribute to a potential bias in research outcomes, as the diversity and representativeness of the data are often compromised which impacts the generalizability of survival predictions to broader patient cohorts. Public access to databases enables researchers to create survival benchmark datasets that fosters the development of survival prediction models. However, many researchers develop datasets without making them public which hinders transparency and the broader scientific community progress. The lack of shared data and presence of multiple datasets associated with a single disease pose a notable challenge in survival prediction. For instance, it hinders the establishment of standardized testing and benchmarking procedures for newly proposed survival prediction methods, leading to ambiguities in identifying the most advanced techniques. Moreover, recognizing the need for standardization in benchmarking survival prediction models, Wissel et al. 58 introduced benchmark survival datasets tailored for both individual cancer subtypes and pancancer settings. These datasets are accessible at https://survboard.vercel.app/ , contributing to a more uniform and transparent benchmarking framework within the survival prediction landscape. Particularly, here we emphasize the use of these datasets for benchmarking in addition to newly created datasets to have unified benchmarking for cancer-specific survival prediction models. RQ V, VI: Survival prediction data modalities and utilization of their combinations for disease and survival endpoints specific predictors development Following the objective of research question V, the primary focus of this section is to investigate and provide a comprehensive summary of the various data modalities utilized in the development of diverse survival predictors. To address research question V, it describes the distribution of data modalities across predictors associated with four distinct survival endpoints, and 44 different diseases. Furthermore, in response to research question VI, it furnishes information regarding the specific clinical features utilized by various survival prediction studies. Out of 74 different studies, data modalities details of only 68 studies are available. Within this subset, 14 studies exclusively used clinical data, 39 studies utilized multiomics data, and 15 studies investigated the combined potential of both clinical and multiomics data modalities. Moreover, based on characteristics of molecular information omics data is generally categorized into 9 different classes namely gene expression (mRNA), micro RNA (miRNA), methylation, copy number variation (CNV), whole exome sequencing (WES), long noncoding RNA (lncRNA), mutation, metabolic, and proteomics. The specifics of different predictors, in terms of variations in the combinations of clinical and various omics data modalities, are outlined in Table 4 . Among 54 survival prediction studies based on multiomics, 49 studies utilized different combinations of four distinct omics types: mRNA, methylation, miRNA, and CNV 14 , 26 , 27 , 42 , 43 , 69 , 72 , 73 , 77 , 82 , 84 , 89 , 96 , 97 , 100 , 101 , 106 , 108 . Only 7 studies utilized additional modalities such as whole exome sequencing (WES) 26 , 31 , long coding RNA (lncRNA) 31 , proteomics 22 , 23 , 108 , 113 , 115 , and mutation data 22 , 23 , 108 , 115 . View this table: View inline View popup Download powerpoint Table 4. Distribution of data modalities across diverse surival prediction studies. The choice of omics type hinges on the specific disease under investigation, as indicated by the disease-wise distribution of omics types in Figure 5 . Out of 9 omics types, mRNA, CNV, miRNA, and methylation have been the most commonly utilized modalities for 33 cancer subtypes i.e., breast cancer 14, 23, 68, 74, 90, 98–100 , pancancer 24 , 91 , 105 – 108 , 131 , colon cancer 39 , 74 – 77 , lung adenocarcinoma 27 , 101 , 102 , and ovarian cancer 72,84,88–90,103 . In addition, mutation data has been utilized for 7 cancer subtypes namely, adult diffuse glioma 69 , breast cancer 23 , cervical cancer 73 , non-small cell lung cancer 95 , ovarian cancer 103 , and pancreatic cancer 32 . Among 10 data modalities, 3 modalities namely, proteomic, lncRNA and WES have been utilized the least having limited applicability to clear renal cell cancer 31 , pancreatic cancer 26 , breast cancer 23 , localized prostate cancer 22 , and pancancer 107 . In terms of other diseases i.e., COVID-19 and heart diseases, proteomics, methylation, mRNA, metabolic, and methylation have been the only omics types utilized for survival prediction 113 , 115 , 117 . Download figure Open in new tab Figure 5. Distribution of omics data modalities across a diverse set of diseases. The variability in omics-type selection is not solely bound to diseases but notably varies across a wide spectrum of survival endpoints. Figure 6 shows the counts of different omics types that have been utilized for different survival endpoints prediction. In the context of OS prediction, mRNA, miRNA, methylation, and CNV have been primarily utilized in more than 30 studies, with 10 studies based on proteomics, mutation, and metabolic data. However, in terms of DFS and PFS the selection of omics types appears less distinct. These endpoints have been frequently studied in conjunction with OS, predominantly utilizing mRNA, miRNA, and methylation data. This combination suggests a commonality in the predictive factors across these survival endpoints, indicating potential interconnections or shared biological processes. Download figure Open in new tab Figure 6. Distribution of different omics modalities with respect to survival endpoints. Clinical data modality has been utilized in 29 different studies. However, in this modality number of features varied from study to study and it is still unclear which particular set of features is most important. To perform an in-depth analysis, which study utilized which subset of features across diverse cancer subtypes and heart diseases, a comprehensive collection of clinical features is presented in Table 5 . In order to better understand and discern the trends in clinical features across diverse diseases, hereby they are placed in 7 different categories i.e., demographic features (6), diseasespecific clinical markers (71), treatment-related features (17), laboratory and biomarkers (48), comorbidity and lifestyle factors (18), and other factors (15). View this table: View inline View popup Download powerpoint Table 5. Diverse collection of clinical features utilized in various survival prediction studies. A closer look at the clinical features across diverse diseases reveals a consistent set of fundamental demographic features i.e., age and gender which are prevalent in nearly all studies 85,86,91,111,112,115 . Beyond demographic features, diseasespecific features also play critical role for disease-specific survival prediction. For instance, cancer-related studies invariably focus on tumor stage, histological type, and treatment specifics, underlining the critical role of disease-specific clinical markers in prognosis 22 , 75 . Treatment-related features such as chemotherapy, radiotherapy, and immunotherapy, are particularly evident in cancer subtypes specific studies which reflect the profound influence of therapeutic interventions on survival outcomes 86 , 98 . Moreover, the recurrent inclusion of lifestyle and comorbidity factors ranging from smoking history and BMI to hypertension and diabetes across multiple diseases underlines their pervasive impact on prognostic modeling 101 , 111 . These lifestyle and comorbidity features show the complex relationship between individual health choices and their potential influence on survival outcomes. RQ VII: Feature engineering trends across data modalities and disease-specific survival predictors This section addresses research question VII by investigating the application of feature engineering methods in survival prediction studies across a variety of diseases. This will help researchers to analyze and understand trends of feature engineering techniques in disease or endpoint specific survival prediction pipelines. Additionally, it delves into the trends in diverse feature engineering methods and their relevance to clinical and multiomics data modalities. This investigation aims to reveal trends and patterns in the dynamic interplay between feature engineering methods and the specific characteristics of different data modalities, and survival endpoints. Table 6 illustrates 26 different feature engineering methods that have been utilized in diverse survival prediction studies. These methods are broadly categorized into five categories, namely supervised methods, incorporating L1 regularized Cox regression 29 , RSF algorithm 29 , Cox regression 103 , least absolute shrinkage and selection operator (lasso) regression 120 , cascaded Wx 105 , recursive feature elimination 38 , Boruta 31 , Akaike information criterion (AIC) regression 114 , variance 72 , lasso analysis 40 , multivariate regression 40 , Chi-squared 118 , mutual information 118 , and ANOVA 39 , 118 . Additionally, Network based methods include network based stratification (NBS) 83 , weighted correlation network analysis (WGCNA) 86 , canonical correlation analyses (CCA) 67 , patient similarity networks 38 , and neighborhood component analysis (NCA) 23 . Dimensionality reduction methods include non-negative matrix factorization (NMF) 40 , autoencoders (AEs) 28 , variational autoencoders (VAEs) 43 , principal component analysis (PCA) 39 , and dominant effect of the cancer driver genes (DEOD) 75 , 132 . Moreover, clustering methods comprise Kruskal-Wallis and Gaussian clustering 131 , hierarchical clustering 82 , and Guassian clustering 131 . View this table: View inline View popup Download powerpoint Table 6. Diverse feature engineering methods for survival prediction. View this table: View inline View popup Download powerpoint Table 7. Survival analysis libraries, models, and evaluation metrics A comprehensive analysis of feature engineering methods across a range of disease-specific survival prediction studies unveils that supervised methods, such as Cox regression, L1 regularized Cox regression, and RSF algorithm, have been prevalent in diseases like ASCVD, trauma, and ovarian cancer 103 , 120 . On the other hand, network based methods including NBS and WGCNA, have been applied in diseases like KIRP, and hepatocellular carcinoma, which shows the significance of network structures in certain medical contexts 86 . Univariate analyses, including ANOVA, chi-squared, and univariate Cox regression, have been prevalent in diseases such as pancreatic cancer and heart failure, underscoring the significance of statistical testing in identifying relevant features 71 , 118 . Furthermore, dimensionality reduction methods such as PCA, and NMF have been consistently used across various diseases namely, ovarian cancer 103 , lower grade glioma 80 , colon adenocarcinoma 39 , bladder and breast cancers 40 , 70 . In addition, the potential of AEs, and VAEs have also been explored in diseases like glioblastoma multiforme, breast cancer, pancancer, and Lung Adenocarcinoma for feature integration and dimensionality reduction 14 , 28 , 101 . While feature engineering methods exhibit specificity tailored to distinct diseases, their efficacy is influenced by the inherent characteristics of the utilized data. This raises the pertinent question of which particular feature engineering method proves most effective in the context of clinical and multiomics datasets. A thorough analysis of feature engineering methods and their applicability with respect to clinical and multiomics datasets reveals that methods like Cox regression, CCA, AIC, and ANOVA have been quite widely utilized in studies involving only clinical data 29 , 67 , 114 , 118 . These methods have been applied to clinical data for multiple reasons for instance, such methods are interpretable which is important to gain meaningful insights for healthcare professionals. Clinical data is always multifactorial, which means that multiple features of the data can lead to a specific event, and methods like ANOVA are quite efficient in analyzing such contributors. Although, such models have shown promising performance with clinical data, yet one of the drawbacks of such models is their inability to handle non-linear data which is the case in terms of multiomics data. Considering similar limitations, multiple methods such as cascaded wx 105 , RFI 38 , PSN 31 , NMF 40 , Boruta 31 , PCA 82 variance 72 , DEOD 75 , have been utilized to handle multiomics to capture important interactions among the features and to integrate cross modalities properly. Particularly, here methods such as AEs and VAEs play a significant role and recent studies also show a growing interest in using such methods for dimensionality reduction and feature integration by such methods for multiomics and clinical datasets i.e., AEs 26 , 27 , 31 , 39 , 39 , 41 – 43 , 76 , 90 , and VAEs 28 , 89 , 100 , 106 ,. Although the selection of a feature engineering method is tied to the characteristics of the disease and the nature of the data, there is no significant evidence to suggest that it is substantially impacted by survival endpoints such as DFS, PFS, BC, and OS. This assumption arises due to the absence of a consistent pattern in feature engineering method selection across different survival endpoints. Studies, such as 95 , 39 , and 40 , demonstrate a varied use of feature engineering techniques irrespective of the specific survival endpoints (DFS, PFS, BC, or OS). This lack of uniformity implies that feature engineering method selection is driven more by the unique characteristics of the data and disease than by the nature of the survival endpoint itself. On the basis of various trends and patterns it can be concluded that for heart diseases, univariate analyses and supervised feature engineering methods have been utilized. Conversely, in terms of cancer subtypes a mixture of dimensionality reduction methods is observed with a recent trend toward the AEs. In terms of survival datasets, the prime focus has been to use supervised methods for clinical data and multiple dimensionality reduction methods for multiomics data. Moreover, there are no conclusive remarks that feature engineering methods get affected by the survival endpoints, as the current literature also suggests a varied use of feature engineering methods regardless of the survival endpoints. RQ VIII: Survival Prediction Methods Insights and Distribution Across Disease Types and Survival Endpoints In pursuit of addressing research question VIII, this section presents an overview and insights about statistical, ML, and DL algorithms that have been utilized in existing survival prediction pipelines. It succinctly examines their emerging trends across diseases and survival endpoints. This exploration aims to empower researchers in identifying gaps within disease-specific and survival endpoint-focused studies, ultimately contributing to the enhancement of survival predictive pipelines. Table 8 provides information about 44 diseases and the corresponding survival prediction algorithms utilized in these diseases. A deeper analysis of Table 8 shows that Cox-PH and lasso Cox-PH models have been extensively utilized for disease specific survival prediction i.e., ASCVD 29 , 111 , bladder cancer 40 , 82 , colorectal cancer 74 – 77 , hepatocellular carcinoma 43 , 86 , 87 , ovarian cancer 88 – 90 , 103 , lung adenocarcinoma 101 , heart failure 118 , HER2-negative metastatic breast cancer 67 , pancreatic cancer 26 , 71 , trauma 120 , nasopharyngeal carcinoma 66 , triple-negative breast cancer 68 , lymphoma 85 , breast cancer 40 , 81 , 82 , ovarian cancer 88 – 90 , 103 , and lower-grade glioma 80 , cardiovascular disease 112 , 114 – 117 , invasive ductal carcinoma 70 , liver transplantation 119 , gastric cancer 42 , lung cancer 27 , esophageal squamous cell carcinoma 79 , glioma 69 , and liver cancer 41 . RSF has been employed in 13 studies for 6 diseases namely, ASCVD 29 , bladder cancer 82 , gastrointestinal cancer 30 , cervical cancer 73 , liver transplantation 119 , and heart failure 118 . DL model DeepSurv, has been utilized in 5 studies related to gastrointestinal cancer 30 , ASCVD 111 , NSCLC 97 . On the other hand, in the analyzed survival predictive pipelines less frequently utilized methods are i.e., survival SVM 79 , 95 , 120 , partial logistic regression 70 , 75 , log hazard net 75 , 104 , boosting 41 , 112 , stepCox 86 , elastic net 95 , CNNcox 104 , DeepOmix 104 , ordinal Cox-PH 78 , DeepHit 112 , and linear multitask logistic regression (MTLR) 112 . View this table: View inline View popup Download powerpoint Table 8. Distribution of survival predictors across diverse diseases. Furthermore, supplementary Table S3 provides details about predictors distribution with respect to survival endpoints. A detailed analysis reveals, out of 74 predictors, 31, 8, 1, and 6 models have been utilized for OS, DFS, PFS, and BC survival endpoints respectively. Unlike disease-specific predictors, here a mixture of methods is utilized and no particular trend exists. To provide high-level overview of multiple methods that have been utilized in all four survival endpoints we have provided a graphical representation of methods in Figure 7 . It can be seen in Figure 7 , diverse types of methods that have been utilized in survival predictive pipelines can be categorized into three different categories i.e., statistical, ML, and DL. Statistical methods are broadly classified into three different categories i.e., parametric, semi-parametric, and non-parametric models. Parametric methods make assumptions about the survival time distribution 122 , 143 . Parametric methods include exponential, Weibull, log-normal, Weibull, gamma models, and so on 143 , 144 . Comparatively, semiparametric methods make no assumptions about the shape of the baseline hazard function (non-parametric). Rather, these methods assume a specific functional form for the effect of covariates (parametric) 145 . In comparison, non-parametric methods do not take into account assumptions about the underlying distribution of survival times and the shape of the hazard function. These methods include Kalpan-Meier, Nelson-Aalen, Breslow, Gehan-Eilcoxon, and life table methods 146 . Statistical methods have certain disadvantages with multiomics based survival prediction 59 . For instance, statistical models assume linear relationships among variables and fail to capture complex and non-linear data patterns 147 . These methods perform poorly on high dimensional data where the number of features is larger than the number of samples. This specific gap is filled by the emergence of AI based models. Various ML models are utilized for survival analysis such as random survival forest 148 , and boosting-based methods 149 . Belle et al., Shivasmy et al., and Khan et al., 150 – 152 proposed ranking and regression-based survival SVM for survival prediction while handling right censored data. Particularly, survival SVM is used in three ways for survival prediction i.e., ranking, regression, and combined. Ishwaran et al., 148 proposed RSF where log-rank test is utilized for the splitting as compared to the Gini impurity of the classical random forest models. Download figure Open in new tab Figure 7. Hierarchal illustration of survival prediction methods under three different categories. DL methods are utilized in two ways to model survival prediction tasks i.e., continuous and discrete time. Models like CoxCC and time 137 , piecewise constant hazard or PEANN 153 , and DeepSruv 154 are utilized for continuous survival time prediction. Whereas, Nnet-survival 155 , Nnet-survival probability mass function (PMF) 156 , DeepHit and DeepHit Single 157 , multi-task logistic regression (MTLR) 158 , 159 , and BCESurv 160 are utilized to predict survival in a discrete-time setting. RQ IX: Open source tools and libraries potential for development of survival prediction pipelines Following the objective research question IX, this section summarizes details of open-source libraries and source codes of existing survival predictors. This comprehensive information will facilitate researchers to build upon existing work, fostering a collaborative environment and accelerating the development of robust and effective survival prediction models. Table 9 presents an overview of open-source survival prediction models. Among the 74 distinct survival prediction studies, only 26 have provided publicly accessible source code. Among these studies, 6 studies have utilized R 91 , 94 , 96 , 103 , 109 , 119 and 20 have opted for Python 14 , 24 , 28 , 38 , 72 , 74 , 82 , 83 , 89 , 90 , 95 , 100 , 105 , 111 , 116 , 118 , 131 , 23 , 27 , 106 . A comprehensive analysis of open source codes reveals that a majority of these tools have been developed from scratch without utilizing any specific survival prediction library 14 , 28 , 83 , 95 . View this table: View inline View popup Download powerpoint Table 9. Summary of open-source survival prediction methods in existing studies. Approximately 10 different survival prediction packages or libraries have been developed. Each library offers a diverse set of preimplemented statistical, ML, and DL survival prediction models. For instance, Pycox 137 primarily focuses on continuous and discrete DL survival prediction models such as CoxTime, CoxCC, MTLR, and so on. Lifelines 134 , scikit-survival 133 , and pysurvival 138 cover a wide range of statistical and ML survival prediction models like Cox-PH, RSF, survival support vector machine, and gradient boosting survival 133 , 134 , 138 . Notably, addressing the lack of interpretability or explainability in the previously discussed libraries, Spytek et al. 142 introduced Survex. This library allows researchers to analyze the features responsible for a specific event by offering different methods for both local and global explanations of various survival prediction models. The selection of a specific library is inherently subjective and depends on factors such as the preferred development platform, choice of survival prediction models, and the specific research question in hand. Therefore, recommendations are made based on the number of survival prediction models and evaluation measures each library offers. For Python, Lifelines 134 and Pycox 137 are recommended, with Lifelines 134 providing a diverse range of statistical and ML models, while Pycox 137 is specialized in DL models. Additionally, for R, mlr3proba 140 is recommended, as it offers a variety of statistical and ML models for survival prediction. Ultimately, selecting a library aligned with individual research needs not only streamlines the development process but also contributes to the overall reliability of survival prediction models. RQ X: Strategies for assessing survival predictors: unveiling common evaluation measures The main objective of this section is to provide a concise overview of research question X, which focuses on the commonly employed evaluation measures for survival predictive pipelines. Table 10 shows a compilation of 18 distinct evaluation measures that have been commonly used to evaluate survival prediction pipelines. The survival prediction pipelines can be categorized into two distinct classes namely survival outcome prediction and survival prediction. Details related to these categories is provided in the background section. Out of 18 evaluation measures mentioned in Table 10 , a set of 10 evaluation measures have been employed to assess the performance of survival outcome prediction models. In addition to the aforementioned measures, 8 other evaluation measures have been utilized to assess the performance of survival prediction models. View this table: View inline View popup Download powerpoint Table 10. A summary of evaluation measures used in survival prediction and survival outcome prediction pipelines. In survival prediction category based evaluation measures the objective is to capture two distinct characteristics namely, discrimination and calibration. Specifically, calibration refers to how well the predicted probabilities of survival align with the actual observed survival rates over time. Under this paradigm most widely used evaluation measures are BS, IBS, TD-ROC, and DCA. Discrimination paradigm based evaluation measures capture differentiation between individuals with different survival times. Under this paradigm most widely used measures are C-index, AUC-ROC, and likelihood ratio. On the other hand objective of survival outcome prediction evaluation measures is to assess diverse characteristics of a model i.e., efficacy of the model, overall accurate predictions, biasness towards type I or type II errors. Specifically, accuracy and F1 score are used to measure overall accurate predictions, precision, and recall examine the model’s biasness with respect to type I and type II errors. Additionally, MCC provides a comprehensive assessment, taking into account overall accurate predictions, and errors. In addition, AUC-ROC assesses the predictive potential of a model by analyzing the true positive rate (TPR) and true negative rate (TNR) at different thresholds. RQ XI: Publisher and journal-wise distribution of research papers This section addresses research question XI by presenting the distribution of survival prediction literature across diverse journals and publishers. Overall, this analysis not only enables researchers to strategically position their work but also offers opportunities for interdisciplinary collaboration, promoting a more interconnected and dynamic research landscape within the domain of survival prediction. In Figure 8 and 9 , the distribution of survival prediction literature is presented based on journals and publishers. The studies have been published in 16 different publishers, including but not limited to Springer, Elsevier, Oxford Press, and BioMed Central. Notably, around 30 out of 74 survival prediction studies have been disseminated through Springer, and BioMed Central. Furthermore, Elsevier has contributed to the field by publishing 10 relevant papers in recent years. Particularly, these studies have been published in more than 50 different conferences/journals, which shows the diversity of the survival prediction landscape. Download figure Open in new tab Figure 8. Journal-wise distribution of articles. Download figure Open in new tab Figure 9. Publisher-wise distribution of articles. Discussion The field of disease survival prediction has become a pivotal aspect of effective healthcare, especially within the domain of precision medicine. Recognizing the significant variability present among patients within specific diseases, there is an increasing demand and development for disease specific survival predictors. Our analysis reveals that researchers place a profound emphasis on predicting survival in cancer as compared to other diseases, and there are compelling reasons behind this focus. First, cancer exhibits significant variability from one patient to another as compared to other diseases, which highlights the imperative need for cancer survival prediction to explore and comprehend the heterogeneity of cancer. Second, cancer is a leading cause of death worldwide, and effective survival prediction can aid in early detection and intervention, potentially saving lives. Third, a huge amount of data sources are developed to make cancer-related data publicly available to accelerate and optimize cancer-related research. Furthermore, to analyze the trajectory of the disease, researchers place great focus on studying different survival endpoints that suit the respective research setting i.e., treatment, progression, recurrence, and death. Among 4 different survival endpoints i.e., OS, DFS, BC, and PFS, OS is often emphasized more in survival prediction studies. Despite the prime focus on OS, the significance of other survival endpoints in understanding disease trajectories cannot be understated. These survival endpoints help to analyze different characteristics of diseases such as understanding treatment efficacy and durability, treatments that not only extend life but also effectively manage the course of the illness, and markers responsible for disease recurrence. The lack of research in other survival endpoints opens up new research avenues for the AI experts to develop novel methods that can help explore various characteristics related to disease. Although both public and private databases have been utilized in survival prediction studies, yet the preference for public databases stems from their accessibility and the wealth of information they provide. For instance, TCGA 17 offers a vast array of genomic and clinical data across different cancer types. This invaluable resource aids researchers in developing accurate survival prediction models. Likewise, GDC 18 and GEO 130 offer comprehensive datasets that encompass a wide range of diseases, making them appealing choices for various research endeavors. Furthermore, a crucial observation regarding private data sources is that they are not universally accessible. This argument is supported by the limited accessibility of omics datasets related to cardiovascular diseases. Despite a singular study employing omics data for survival prediction in cardiovascular diseases, the challenge lies in the difficulty of retrieving the original data. Authors often refrain from sharing their datasets, and obtaining access to databases requires extensive proposals, adding a layer of complexity to the development of novel survival prediction pipelines for cardiovascular diseases. This obstacle may impede the advancement of innovative survival prediction pipelines for cardiovascular disease. Overall, the use of omics and clinical data in survival prediction tools marks a significant stride toward precision medicine. The distribution of omics types in survival prediction studies reveals a preference for mRNA, methylation, microRNA, and CNV across various cancer subtypes. In addition, the limited number of multiomics based survival prediction studies in cardiovascular diseases hinders definitive conclusions on the importance of specific omics types. Disease-specific patterns highlight the importance of tailored clinical markers, prominently seen in cancer studies with a focus on tumor stage and histological type. Treatment-related features, notably chemotherapy and radiotherapy, underscore the impact of therapeutic interventions on survival predictions. Moreover, clinical features along with omics data with diverse molecular aspects are utilized together to improve the performance of survival prediction models. Diverse survival prediction research accentuates the pivotal role of leveraging patient information, such as medical history, demographics, diseaserelated features, and diagnostic records. This trend reflects an increasing recognition of the potential of clinical data in not only understanding disease progression but also in guiding personalized treatment strategies and enhancing patient care. A recent benchmark study on survival prediction models with multiomics and clinical data also shows the significant role of clinical data in survival prediction across multiple cancer subtypes 45 . In addition, our analysis reveals that increasing the total number of data modalities does not necessarily offer improved survival predictions, yet data modalities are quite specific to the disease and survival endpoints. Therefore, the selection of data modalities should be made very carefully as rather than improving the overall performance it can induce undesirable noise in the analysis. One of the common problems in survival analysis is data censoring. Censoring arises when there is incomplete information about the time points and/or events of some subjects in a study. There are different types of censoring i.e. I) Right Censoring is the most common type of data censoring, where an event does not occur for some subjects by the end of study or by the last time point at which data is collected. For example, a subject withdraws from the study or there is a lost follow up for a specific subject II) Left Censoring is the least common type of censoring where the event may occur before the start of the study or during the data collection phase. III) Interval Censoring arises when the event of interest occurs in a time interval but the exact time point is not known. In survival analysis, three assumptions are taken into account to infer censored data i.e., II) Independent Censoring: assumes that the censoring times for multiple subjects are independent of each other. II) Random censoring assumes that the time t at which individuals are censored must be random and the failure rate for subjects who are censored is assumed to be equal to the failure rate for subjects who remained in the risk set who are not censored. III) Non-informative censoring occurs if the distribution of survival times (T) provides no information about the distribution of censorship times (C), and vice versa. Although, data censoring is quite important in terms of survival prediction, yet it has been discussed and dealt with properly in the existing studies. We recommend to incorporate comprehensive details of data censoring in future survival prediction studies. Particularly details on how each type of data censoring is handled should not be neglected. Our analysis of the utilization of feature engineering methods raises two crucial points. First, even though a plethora of methods have been already tested for various survival prediction studies, autoencoder based methods tend to reduce the dimensionality of omics data modalities more efficiently. In addition, the rest of the methods work much better with clinical features. The success of feature engineering approaches is contingent upon the chosen technique with the inherent properties of the data. This highlights the importance of large-scale benchmark studies in guiding the selection of feature engineering strategies for the development of accurate predictive pipelines. With an aim to evaluate the performance of predictive pipelines, diverse types of evaluation measures have been developed. Each evaluation measure addresses a specific aspect of survival prediction models, precluding the possibility of any single metric being universally ideal for a comprehensive evaluation of survival prediction. For instance, Cindex estimates the robustness and discriminatory power of the survival prediction model. In addition, BS and IBS measure the accuracy of a model on time distribution. Moreover, log-rank p-value evaluates the potential of the model by testing the differences in different survival groups. Although these measures are the most commonly utilized, there are diverse other evaluation measures for similar purposes i.e., restricted mean survival time (RMST), odds ratio 22 , Kappa for inter-rater reliability 107 , integrated absolute error (IAE), integrated square error (ISE), mean absolute error (MAE), integrated AUC (IAUC) time-dependent integrated discrimination improvement, and time-dependent net reclassification improvement (NRI). Furthermore, while these individual measures provide valuable insights, it is noteworthy to mention that their collective application offers a more comprehensive evaluation. Therefore, we recommend utilizing multiple evaluation measures to assess discrimination and calibration of survival prediction models. Methodology This section explains different steps or stages of preferred reporting items for systematic review and meta-analyses (PRISMA) strategy 161 , which is used to gather relevant papers on survival analysis. Figure 10 provides a visual representation of various stages form PRISMA that are summarised in the following subsections. Download figure Open in new tab Figure 10. A step-by-step process for articles search and their inclusion or exclusion criteria to generate a set of studies for further in-depth trends analysis Search Strategy In Figure 10 , the identification stage illustrates combinations of different keywords that are used to search research articles. The keywords block has two different types of operators ‘∧’ and ‘∨’ operators. On the basis of these operators one keyword from each block is selected and various search queries are formulated such as, “SURVIVAL PREDICTION AND AI AND OMICS”, “SURVIVAL PREDICTION AND AI AND Multiomics”, “SURVIVAL Machine Learning AND OMICS” , and so on. These queries are utilized in literature search engines like lens ( https://www.lens.org/ ), and Google Scholar for literature search from Jan 2020 to Jul 2023. Screening Strategy With an aim to retain literature related to survival prediction, two different screenings are performed on the basis of the following criteria; Articles that do not make use of only image-based datasets for survival prediction. Articles that do not make use of ML, DL, or statistical methods for survival prediction. Articles with closed access. Initially, guided by the title and abstract of the articles, more than 900 studies are discarded. Subsequently, at the final step, based on a comprehensive review of the full text a second screening is performed, resulting in the exclusion of an additional 20 studies. Ultimately, 74 papers are selected for the final comparison and discussion of survival prediction. Data Availability The data is present in different github repositories and public databases Additional information Supplementary tables can be found in additional files. Table S1 entails information about survival prediction studies. In addition, Table S2 presents distribution of data modalities across survival prediction studies. Table S3 shows distribution of survival endpoints across different studies. Table S4 provides a short summary of each study included in this review paper. Table S5, S6, and S7, present information about evaluation measures, journal and publisher wise distribution of survival prediction literature. Author contributions statement A.A. and M.N.A. conducted the literature review, V.S., D.A., and A.S. analyzed the results. All authors reviewed the manuscript. Competing interests The authors declare no competing interests. References 1. ↵ Haendel , M. et al. How many rare diseases are there? Nat. reviews drug discovery 19 , 77 – 78 ( 2020 ). OpenUrl 2. ↵ Jamison , D. T. Disease control priorities: improving health and reducing poverty . The Lancet 391 , e11 – e14 ( 2018 ). OpenUrl 3. ↵ World Health Organization . The top 10 causes of death . https://www.who.int/news-room/fact-sheets/detail/the-top-10-causes-of-death ( 2020 ). Accessed: January 4, 2024 . 4. ↵ Sellin , J. A. Does one size fit all? patents, the right to health and access to medicines . Neth. Int. Law Rev . 62 , 445 – 473 ( 2015 ). OpenUrl 5. ↵ Al-Lazikani , B. , Banerji , U. & Workman , P. Combinatorial drug therapy for cancer in the post-genomic era . Nat. biotechnology 30 , 679 – 692 ( 2012 ). OpenUrl CrossRef PubMed 6. ↵ Kosorok , M. R. & Laber , E. B. Precision medicine . Annu. review statistics its application 6 , 263 – 286 ( 2019 ). OpenUrl 7. ↵ Ashley , E. A. Towards precision medicine . Nat. Rev. Genet . 17 , 507 – 522 ( 2016 ). OpenUrl CrossRef PubMed 8. ↵ Farrokhi , M. et al. Role of precision medicine and personalized medicine in the treatment of diseases . Kindle 3 , 1 – 164 ( 2023 ). OpenUrl 9. ↵ Kamps , R. et al. Next-generation sequencing in oncology: genetic diagnosis, risk prediction and cancer classification . Int. journal molecular sciences 18 , 308 ( 2017 ). OpenUrl 10. ↵ Billheimer , D. , Gerner , E. W. , McLaren , C. E. & LaFleur , B. Combined benefit of prediction and treatment: a criterion for evaluating clinical prediction models . Cancer informatics 13 , CIN – S13780 ( 2014 ). OpenUrl 11. ↵ Tsimberidou , A.-M. et al. Long-term overall survival and prognostic score predicting survival: the impact study in precision medicine . J. Hematol. & Oncol . 12 , 1 – 12 ( 2019 ). OpenUrl CrossRef 12. ↵ Sarma , A. , Calfee , C. S. & Ware , L. B. Biomarkers and precision medicine: state of the art . Critical care clinics 36 , 155 – 165 ( 2020 ). OpenUrl 13. ↵ Chen , Y.-C. , Lee , U. J. , Tsai , C.-A. & Chen , J. J. Development of predictive signatures for treatment selection in precision medicine with survival outcomes . Pharm. Stat . 17 , 105 – 116 ( 2018 ). OpenUrl 14. ↵ Hao , Y. , Jing , X.-Y. & Sun , Q. Cancer survival prediction by learning comprehensive deep feature representation for multiple types of genetic data . ( 2023 ). 15. ↵ Glare , P. et al. A systematic review of physicians’ survival predictions in terminally ill cancer patients . Bmj 327 , 195 ( 2003 ). OpenUrl Abstract / FREE Full Text 16. ↵ Carobbio , A. et al. A multistate model of survival prediction and event monitoring in prefibrotic myelofibrosis . Blood cancer journal 10 , 100 ( 2020 ). OpenUrl 17. ↵ Tomczak , K. , Czerwińska , P. & Wiznerowicz , M. Review the cancer genome atlas (tcga): an immeasurable source of knowledge . Contemp. łczesnaOnkologia 2015 , 68 – 77 ( 2015 ). OpenUrl 18. ↵ Jensen , M. A. , Ferretti , V. , Grossman , R. L. & Staudt , L. M. The nci genomic data commons as an engine for precision medicine . Blood, The J. Am. Soc. Hematol . 130 , 453 – 459 ( 2017 ). OpenUrl 19. ↵ Arjmand , B. et al. Machine learning: a new prospect in multi-omics data analysis of cancer . Front. Genet . 13 , 824451 ( 2022 ). OpenUrl 20. Shen , J. et al. Artificial intelligence versus clinicians in disease diagnosis: systematic review . JMIR medical informatics 7 , e10010 ( 2019 ). OpenUrl 21. Mirbabaie , M. , Stieglitz , S. & Frick , N. R. Artificial intelligence in disease diagnostics: A critical review and classification on the current state of research guiding future direction . Heal. Technol . 11 , 693 – 731 ( 2021 ). OpenUrl 22. ↵ Pellegrini , M. Accurate prognosis for localized prostate cancer through coherent voting networks with multiomic and clinical data . Sci. Reports 13 , 7875 ( 2023 ). OpenUrl 23. ↵ Malik , V. , Kalakoti , Y. & Sundar , D. Deep learning assisted multi-omics integration for survival and drugresponse prediction in breast cancer . Bmc Genomics 22 , 1 – 11 ( 2021 ). OpenUrl CrossRef 24. ↵ Fan , Z. , Jiang , Z. , Liang , H. & Han , C. Pancancer survival prediction using a deep learning architecture with multimodal representation and integration . Bioinforma. Adv . 3 , vbad006 ( 2023 ). OpenUrl 25. ↵ Kourou , K. , Exarchos , T. P. , Exarchos , K. P. , Karamouzis , M. V. & Fotiadis , D. I. Machine learning applications in cancer prognosis and prediction . Comput. structural biotechnology journal 13 , 8 – 17 ( 2015 ). OpenUrl 26. ↵ Baek , B. & Lee , H. Prediction of survival and recurrence in patients with pancreatic cancer by integrating multiomics data . Sci. reports 10 , 18951 ( 2020 ). OpenUrl 27. ↵ Jiang , Y. , Alford , K. , Ketchum , F. , Tong , L. & Wang , M. D. Tlsurv: Integrating multi-omics data by multistage transfer learning for cancer survival prediction . In Proceedings of the 11th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics , 1 – 10 ( 2020 ). 28. ↵ Benkirane , H. , Pradat , Y. , Michiels , S. & Cournède , P.-H. Customics: A versatile deep-learning based strategy for multi-omics integration . PLoS Comput. Biol . 19 , e1010921 ( 2023 ). OpenUrl 29. ↵ Qian , X. et al. Study on the prediction model of atherosclerotic cardiovascular disease in the rural xinjiang population based on survival analysis . BMC Public Heal . 23 , 1 – 11 ( 2023 ). OpenUrl 30. ↵ Jung , J.-O. et al. Machine learning for optimized individual survival prediction in resectable upper gastrointestinal cancer . J. Cancer Res. Clin. Oncol . 149 , 1691 – 1702 ( 2023 ). OpenUrl 31. ↵ Jiang , A. et al. Establishment of a prognostic prediction and drug selection model for patients with clear cell renal cell carcinoma by multiomics data analysis . Oxidative Medicine Cell. Longev . 2022 ( 2022 ). 32. ↵ Han , M. , He , J. & Jiao , X. Research on prognostic risk assessment algorithm based on graph neural net-works and attention mechanisms . In 2022 International Conference on Information Technology, Communication Ecosystem and Management (ITCEM) , 101 – 106 ( IEEE , 2022 ). 33. ↵ Chai , H. et al. Integrating multi-omics data through deep learning for accurate cancer prognosis prediction . Comput. biology medicine 134 , 104481 ( 2021 ). OpenUrl 34. García-Laencina , P. J. , Abreu , P. H. , Abreu , M. H. & Afonoso , N. Missing data imputation on the 5-year survival prediction of breast cancer patients with unknown discrete values . Comput. biology medicine 59 , 125 – 133 ( 2015 ). OpenUrl 35. ↵ Van Buuren , S. , Boshuizen , H. C. & Knook , D. L. Multiple imputation of missing blood pressure covariates in survival analysis . Stat. medicine 18 , 681 – 694 ( 1999 ). OpenUrl 36. ↵ Ni , A. & Qin , L.-X. Performance evaluation of transcriptomics data normalization for survival risk prediction . Briefings Bioinforma . 22 , bbab257 ( 2021 ). OpenUrl 37. ↵ Feldner-Busztin , D. et al. Dealing with dimensionality: the application of machine learning to multi-omics data . Bioinformatics 39 , btad021 ( 2023 ). OpenUrl 38. ↵ Wang , C. , Lue , W. , Kaalia , R. , Kumar , P. & Rajapakse , J. C. Network-based integration of multi-omics data for clinical outcome prediction in neuroblastoma . Sci. Reports 12 , 15425 ( 2022 ). OpenUrl 39. ↵ Lv , J. , Wang , J. , Shang , X. , Liu , F. & Guo , S. Survival prediction in patients with colon adenocarcinoma via multiomics data integration using a deep learning algorithm . Biosci. Reports 40 , BSR20201482 ( 2020 ). OpenUrl 40. ↵ Tang , C. , Yu , M. , Ma , J. & Zhu , Y. Metabolic classification of bladder cancer based on multi-omics integrated analysis to predict patient prognosis and treatment response . J. translational medicine 19 , 1 – 15 ( 2021 ). OpenUrl 41. ↵ Wang , Z. et al. An integration framework for liver cancer subtype classification and survival prediction based on multi-omics data . In Intelligent Computing Methodologies: 16th International Conference, ICIC 2020, Bari, Italy, October 2–5, 2020, Proceedings, Part III 16 , 247 – 257 ( Springer , 2020 ). 42. ↵ Li , Y. , Sun , R. , Zhang , Y. , Yuan , Y. & Miao , Y. A methylation-based mrna signature predicts survival in patients with gastric cancer . Cancer Cell Int . 20 , 1 – 10 ( 2020 ). OpenUrl CrossRef 43. ↵ Owens , A. R. , McInerney , C. E. , Prise , K. M. , McArt , D. G. & Jurek-Loughrey , A. Novel deep learning-based solution for identification of prognostic subgroups in liver cancer (hepatocellular carcinoma) . BMC bioinformatics 22 , 1 – 22 ( 2021 ). OpenUrl CrossRef 44. ↵ Deepa , P. & Gunavathi , C. A systematic review on machine learning and deep learning techniques in cancer survival prediction . Prog. Biophys. Mol. Biol . ( 2022 ). 45. ↵ Herrmann , M. , Probst , P. , Hornung , R. , Jurinovic , V. & Boulesteix , A.-L. Large-scale benchmark study of survival prediction methods using multi-omics data . Briefings bioinformatics 22 , bbaa167 ( 2021 ). OpenUrl 46. ↵ Rahimi , M. , Akbari , A. , Asadi , F. & Emami , H. Cervical cancer survival prediction by machine learning algorithms: a systematic . ( 2023 ). 47. ↵ Pobar , I. , Job , M. , Holt , T. , Hargrave , C. & Hickey , B. Prognostic tools for survival prediction in advanced cancer patients: A systematic review . J. Med. Imaging Radiat. Oncol . 65 , 806 – 816 ( 2021 ). OpenUrl 48. ↵ Boshier , P. R. et al. Systematic review and validation of clinical models predicting survival after oesophagectomy for adenocarcinoma . Br. J. Surg . 109 , 418 – 425 ( 2022 ). OpenUrl 49. ↵ Wiegrebe , S. , Kopper , P. , Sonabend , R. & Bender , A. Deep learning for survival analysis: A review . arXiv preprint arXiv:2305.14961 ( 2023 ). 50. Salerno , S. & Li , Y. High-dimensional survival analysis: Methods and applications . Annu. review statistics its application 10 , 25 – 49 ( 2023 ). OpenUrl 51. Bakasa , W. & Viriri , S. Pancreatic cancer survival prediction: a survey of the state-of-the-art . Comput. Math. Methods Medicine 2021 , 1 – 17 ( 2021 ). OpenUrl 52. ↵ Ahmed , F. E. Artificial neural networks for diagnosis and survival prediction in colon cancer . Mol. cancer 4 , 1 – 12 ( 2005 ). OpenUrl CrossRef PubMed Web of Science 53. Altuhaifa , F. A. , Win , K. T. & Su , G. Predicting lung cancer survival based on clinical data using machine learning: A review . Comput. Biol. Medicine 107338 ( 2023 ). 54. Wekesa , J. S. & Kimwele , M. A review of multiomics data integration through deep learning approaches for disease diagnosis, prognosis, and treatment . Front. Genet . 14 ( 2023 ). 55. Kvamme , H. & Borgan , ø. Continuous and discretetime survival prediction with neural networks . Lifetime data analysis 27 , 710 – 736 ( 2021 ). OpenUrl CrossRef 56. ↵ Kantidakis , G. , Hazewinkel , A.-D. , Fiocco , M. et al. Neural networks for survival prediction in medicine using prognostic factors: A review and critical appraisal . Comput. Math. Methods Medicine 2022 ( 2022 ). 57. ↵ Gupta , V. et al. Survival prediction tools for esophageal and gastroesophageal junction cancer: A systematic review . The J. thoracic cardiovascular surgery 156 , 847 – 856 ( 2018 ). OpenUrl 58. ↵ Wissel , D. et al. Survboard: standardised benchmarking for multi-omics cancer survival models . bioRxiv 2022 – 11 ( 2022 ). 59. ↵ Lee , S. & Lim , H. Review of statistical methods for survival analysis using genomic data . Genomics & informatics 17 ( 2019 ). 60. ↵ Guan , F. et al. Integrative omics of schizophrenia: from genetic determinants to clinical classification and risk prediction . Mol. Psychiatry 27 , 113 – 126 ( 2022 ). OpenUrl 61. ↵ Mo , L. et al. Comparisons of forecasting for survival outcome for head and neck squamous cell carcinoma by using machine learning models based on multi-omics . Curr. Genomics 23 , 94 ( 2022 ). OpenUrl 62. ↵ Bashiri , A. , Ghazisaeedi , M. , Safdari , R. , Shahmoradi , L. & Ehtesham , H. Improving the prediction of survival in cancer patients by using machine learning techniques: experience of gene expression data: a narrative review . Iran. journal public health 46 , 165 ( 2017 ). OpenUrl 63. ↵ Tewarie , I. A. et al. Survival prediction of glioblastoma patients—are we there yet? a systematic review of prognostic modeling for glioblastoma and its clinical potential . Neurosurg. review 44 , 2047 – 2057 ( 2021 ). OpenUrl 64. ↵ Westerlund , A. M. , Hawe , J. S. , Heinig , M. & Schunkert , H. Risk prediction of cardiovascular events by exploration of molecular data with explainable artificial intelligence . Int. J. Mol. Sci . 22 , 10291 ( 2021 ). OpenUrl 65. ↵ Kresoja , K.-P. , Unterhuber , M. , Wachter , R. , Thiele , H. & Lurz , P. A cardiologist’s guide to machine learning in cardiovascular disease prognosis prediction . Basic research cardiology 118 , 10 ( 2023 ). OpenUrl 66. ↵ Miao , S. et al. Development and validation of a risk prediction model for overall survival in patients with nasopharyngeal carcinoma: a prospective cohort study in china . Cancer Cell Int . 22 , 1 – 11 ( 2022 ). OpenUrl 67. ↵ Wang , J. et al. Multi-omics fusion analysis models with machine learning predict survival of her2-negative metastatic breast cancer: a multicenter prospective observational study . Chin. Med. J . 136 , 863 – 865 ( 2023 ). OpenUrl 68. ↵ Zhang , J. , Zhang , M. , Tian , Q. & Yang , J. A novel model associated with tumor microenvironment on predicting prognosis and immunotherapy in triple negative breast cancer . Clin. Exp. Medicine 1 – 15 ( 2023 ). 69. ↵ Yang , Q. et al. Integrating genomic data with transcriptomic data for improved survival prediction for adult diffuse glioma . J. Cancer 11 , 3794 ( 2020 ). OpenUrl 70. ↵ Lin , Z. et al. A multi-omics signature to predict the prognosis of invasive ductal carcinoma of the breast . Comput. Biol. Medicine 151 , 106291 ( 2022 ). OpenUrl 71. ↵ Zhou , H.-F. et al. Survival prediction for patients with malignant biliary obstruction caused by pancreatic cancer undergoing biliary drainage: the combo-pas model . Surg. Endosc . 37 , 1943 – 1955 ( 2023 ). OpenUrl 72. ↵ Zhao , L. et al. Deepomix: A scalable and interpretable multi-omics deep learning framework and application in cancer survival analysis . Comput. structural biotechnology journal 19 , 2719 – 2725 ( 2021 ). OpenUrl 73. ↵ Hu , Q. et al. A new hpv score system predicts the survival of patients with cervical cancers . Front. Genet . 12 , 747090 ( 2021 ). OpenUrl 74. ↵ Zhang , J. Z. , Xu , W. & Hu , P. Tightly integrated multiomics-based deep tensor survival model for time-to-event prediction . Bioinformatics 38 , 3259 – 3266 ( 2022 ). OpenUrl 75. ↵ Lee , C.-J. et al. Machine learning with in silico analysis markedly improves survival prediction modeling in colon cancer patients . Cancer Medicine 12 , 7603 – 7615 ( 2023 ). OpenUrl 76. ↵ Yang , H. et al. A novel prognostic model based on multiomics features predicts the prognosis of colon cancer patients . Mol. Genet. & Genomic Medicine 8 , e1255 ( 2020 ). OpenUrl 77. ↵ Tong , D. et al. Improving prediction performance of colon cancer prognosis based on the integration of clinical and multi-omics data . BMC Med. Informatics Decis. Mak . 20 , 1 – 15 ( 2020 ). OpenUrl 78. ↵ Bichindaritz , I. & Liu , G. Adaptive multi-omics survival analysis in cancer . In Innovation in Medicine and Healthcare: Proceedings of 10th KES-InMed 2022 , 51 – 62 ( Springer , 2022 ). 79. ↵ Yu , J. , Wu , X. , Lv , M. et al. A model for predicting prognosis in patients with esophageal squamous cell carcinoma based on joint representation learning . oncol lett 20 ( 6 ): 1 – 10 ( 2020 ). OpenUrl CrossRef PubMed 80. ↵ Wu , W. et al. A novel multi-omics analysis model for diagnosis and survival prediction of lower-grade glioma patients . Front. Oncol . 12 , 729002 ( 2022 ). OpenUrl 81. ↵ Chauhan , P. S. et al. Urine cell-free dna multi-omics to detect mrd and predict survival in bladder cancer patients . npj Precis. Oncol . 7 , 6 ( 2023 ). OpenUrl 82. ↵ Chai , H. , Zhang , Z. , Wang , Y. & Yang , Y. Predicting bladder cancer prognosis by integrating multi-omics data through a transfer learning-based cox proportional hazards network . CCF Transactions on High Perform. Comput . 3 , 311 – 319 ( 2021 ). OpenUrl 83. ↵ Shetty , K. S. , Jose , A. , Bani , M. & Vinod , P. Network diffusion-based approach for survival prediction and identification of biomarkers using multi-omics data of papillary renal cell carcinoma . Mol. Genet. Genomics 1 – 12 ( 2023 ). 84. ↵ Tong , L. , Wu , H. & Wang , M. D. Integrating multiomics data by learning modality invariant representations for improved prediction of overall survival of cancer . Methods 189 , 74 – 85 ( 2021 ). OpenUrl 85. ↵ Li , X. et al. Development and validation of prediction model for overall survival in patients with lymphoma: a prospective cohort study in china . BMC Med. Informatics Decis. Mak . 23 , 1 – 11 ( 2023 ). OpenUrl 86. ↵ Wang , X. et al. Machine learning integrations develop an antigen-presenting-cells and t-cells-infiltration derived lncrna signature for improving clinical outcomes in hepatocellular carcinoma . BMC cancer 23 , 1 – 16 ( 2023 ). OpenUrl 87. ↵ Zhang , R. et al. Using integrated multi-omics data analysis to identify 5-gene signature for predicting survival of patients with hepatocellular carcinoma . ( 2022 ). 88. ↵ Pawar , A. , Chowdhury , O. R. , Chauhan , R. , Talole , S. & Bhattacharjee , A. Identification of key gene signatures for the overall survival of ovarian cancer . J. Ovarian Res . 15 , 1 – 13 ( 2022 ). OpenUrl 89. ↵ Hira , M. T. et al. Integrated multi-omics analysis of ovarian cancer using variational autoencoders . Sci. reports 11 , 6265 ( 2021 ). OpenUrl 90. ↵ Wu , X. & Fang , Q. Stacked autoencoder based multiomics data integration for cancer survival prediction . arXiv preprint arXiv:2207.04878 ( 2022 ). 91. ↵ Redekar , S. S. , Varma , S. L. & Bhattacharjee , A. Identification of key genes associated with survival of glioblastoma multiforme using integrated analysis of tcga datasets . Comput. Methods Programs Biomed. Up-dat . 2 , 100051 ( 2022 ). OpenUrl 92. Kazerooni , A. F. et al. Multi-omic prediction of overall survival in patients with glioblastoma: Additive and synergistic value of clinical measures, radiomics, and genomics . ( 2021 ). 93. Du , J. et al. Identification of prognostic model and biomarkers for cancer stem cell characteristics in glioblastoma by network analysis of multi-omics data and stemness indices . Front. cell developmental biology 8 , 558961 ( 2020 ). OpenUrl 94. ↵ Li , R. et al. Extended application of genomic selection to screen multiomics data for prognostic signatures of prostate cancer . Briefings Bioinforma . 22 , bbaa197 ( 2021 ). OpenUrl 95. ↵ Manganaro , L. et al. Non-small cell lung cancer survival estimation through multi-omic two-layer svm: A multi-omics and multi-sources integrative model . Curr. Bioinforma . 18 , 658 – 669 ( 2023 ). OpenUrl 96. ↵ Ellen , J. G. , Jacob , E. , Nikolaou , N. & Markuzon , N. Autoencoder-based multimodal prediction of non-small cell lung cancer survival . Sci. Reports 13 , 15761 ( 2023 ). OpenUrl 97. ↵ Zhang , Z.-S. , Xu , F. , Jiang , H.-J. & Chen , Z.-H. Prognostic prediction for non-small-cell lung cancer based on deep neural network and multimodal data . In Intelligent Computing Theories and Application: 17th International Conference, ICIC 2021, Shenzhen, China, August 12–15, 2021, Proceedings, Part III 17 , 549 – 560 ( Springer , 2021 ). 98. ↵ Othman , N. A. , Abdel-Fattah , M. A. & Ali , A. T. A hybrid deep learning framework with decision-level fusion for breast cancer survival prediction . Big Data Cogn. Comput . 7 , 50 ( 2023 ). OpenUrl 99. Zhou , L. , Rueda , M. & Alkhateeb , A. Identifying biomarkers of nottingham prognosis index in breast cancer survivability . In Proceedings of the 12th ACM Conference on Bioinformatics, Computational Biology, and Health Informatics , 1 – 9 ( 2021 ). 100. ↵ Tong , L. , Mitchel , J. , Chatlin , K. & Wang , M. D. Deep learning based feature-level integration of multi-omics data for breast cancer patients survival analysis . BMC medical informatics decision making 20 , 1 – 12 ( 2020 ). OpenUrl 101. ↵ Bhat , A. R. & Hashmy , R. Hierarchical autoencoderbased multi-omics subtyping and prognosis prediction framework for lung adenocarcinoma . Int. J. Inf. Technol . 1 – 9 ( 2023 ). 102. ↵ Lee , T.-Y. , Huang , K.-Y. , Chuang , C.-H. , Lee , C.-Y. & Chang , T.-H. Incorporating deep learning and multiomics autoencoding for analysis of lung adenocarcinoma prognostication . Comput. Biol. Chem . 87 , 107277 ( 2020 ). OpenUrl 103. ↵ Zhang , S. , Zeng , X. , Lin , S. , Liang , M. & Huang , H. Identification of seven-gene marker to predict the survival of patients with lung adenocarcinoma using integrated multi-omics data analysis . J. Clin. Lab. Analysis 36 , e24190 ( 2022 ). OpenUrl 104. ↵ Majji , R. , Rajeswari , R. , Vidyadhari , C. & Cristin , R. Squirrel search deer hunting-based deep recurrent neural network for survival prediction using pan-cancer gene expression data . The Comput. J . 66 , 245 – 266 ( 2023 ). OpenUrl 105. ↵ Yin , Q. , Chen , W. , Zhang , C. & Wei , Z. A convolutional neural network model for survival prediction based on prognosis-related cascaded wx feature selection . Lab. Investig . 102 , 1064 – 1074 ( 2022 ). OpenUrl 106. ↵ Zhang , X. , Xing , Y. , Sun , K. & Guo , Y. Omiembed: a unified multi-task deep learning framework for multiomics data . Cancers 13 , 3047 ( 2021 ). OpenUrl 107. ↵ Zheng , X. , Amos , C. I. & Frost , H. R. Pan-cancer evaluation of gene expression and somatic alteration data for cancer prognosis prediction . BMC cancer 21 , 1 – 11 ( 2021 ). OpenUrl CrossRef 108. ↵ Tan , K. , Huang , W. , Hu , J. & Dong , S. A multi-omics supervised autoencoder for pan-cancer clinical outcome endpoints prediction . BMC Med. Informatics Decis. Mak . 20 , 1 – 9 ( 2020 ). OpenUrl 109. ↵ Willems , A. , Panchy , N. & Hong , T. Using single-cell rna sequencing and microrna targeting data to improve colorectal cancer survival prediction . Cells 12 , 228 ( 2023 ). OpenUrl 110. Hathaway , Q. , Yanamala , N. , Budoff , M. , Sengupta , P. & Zeby , I. Cardiovascular risk stratification through deep neural survival networks-the multi-ethnic study of atherosclerosis (mesa) . J. Am. Coll. Cardiol . 77 , 561 – 561 ( 2021 ). OpenUrl 111. ↵ Hathaway , Q. A. , Yanamala , N. , Budoff , M. J. , Sengupta , P. P. & Zeb , I. Deep neural survival networks for cardiovascular risk prediction: The multi-ethnic study of atherosclerosis (mesa) . Comput. Biol. Medicine 139 , 104983 ( 2021 ). OpenUrl 112. ↵ Feng , Y. et al. Personalized prediction of incident hospitalization for cardiovascular disease in patients with hypertension using machine learning . BMC Med. Res. Methodol . 22 , 1 – 11 ( 2022 ). OpenUrl CrossRef 113. ↵ Richard , V. R. et al. Early prediction of covid-19 patient survival by targeted plasma multi-omics and machine learning . Mol. & Cell. Proteomics 21 ( 2022 ). 114. ↵ Zeng , W. , Wang , X. , Xu , K. , Zhang , Y. & Fu , H. Predic-tion of cardiovascular disease survival based on artificial neural network . In 2021 IEEE International Conference on Computer Science, Electronic Information Engineering and Intelligent Control Technology (CEI) , 219 – 224 ( IEEE , 2021 ). 115. ↵ Unterhuber , M. et al. Proteomics-enabled deep learning machine algorithms can enhance prediction of mortality . J. Am. Coll. Cardiol . 78 , 1621 – 1631 ( 2021 ). OpenUrl CrossRef 116. ↵ Xu , Z. et al. Prediction of cardiovascular disease risk accounting for future initiation of statin treatment . Am. journal epidemiology 190 , 2000 – 2014 ( 2021 ). OpenUrl 117. ↵ Vahabi , N. et al. Cox-smbpls: An algorithm for disease survival prediction and multi-omics module discovery incorporating cis-regulatory quantitative effects . Front. Genet . 12 , 701405 ( 2021 ). OpenUrl 118. ↵ Moreno-Sanchez , P. A. Improvement of a prediction model for heart failure survival through explainable artificial intelligence . Front. Cardiovasc. Medicine 10 ( 2023 ). 119. ↵ Kantidakis , G. et al. Survival prediction models since liver transplantation-comparisons between cox models and machine learning techniques . BMC Med. Res. Methodol . 20 , 1 – 14 ( 2020 ). OpenUrl CrossRef PubMed 120. ↵ Abdelhamid , S. et al. Multi-omic admission-based biomarkers predict 30-day survival and persistent critical illness in trauma patients after injury . J. Am. Coll. Surg . 235 , S95 ( 2022 ). OpenUrl 121. ↵ Grever , M. R. , Schepartz , S. A. & Chabner , B. A. The national cancer institute: cancer drug discovery and development program . In Seminars in oncology , vol. 19 , 622 – 638 ( 1992 ). OpenUrl 122. ↵ Lee , E. T. & Wang , J. Statistical methods for survival data analysis , vol. 476 ( John Wiley & Sons , 2003 ). 123. Voet , D. et al. Firehose: An analysis infrastructure . Firehose Broad GDAC, Broad Inst . 1 . 124. ↵ Zhao , Z. et al. Chinese glioma genome atlas (cgga): a comprehensive resource with functional genomic data from chinese glioma patients . Genomics, proteomics & bioinformatics 19 , 1 – 12 ( 2021 ). OpenUrl 125. Zhang , W. et al. Comparison of rna-seq and microarraybased models for clinical endpoint prediction . Genome biology 16 , 1 – 12 ( 2015 ). OpenUrl CrossRef PubMed 126. Liberzon , A. et al. Molecular signatures database (msigdb) 3.0 . Bioinformatics 27 , 1739 – 1740 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 127. Stanfill , A. G. & Cao , X. Enhancing research through the use of the genotype-tissue expression (gtex) database . Biol. research for nursing 23 , 533 – 540 ( 2021 ). OpenUrl 128. ↵ Bild , D. E. et al. Multi-ethnic study of atherosclerosis: objectives and design . Am. journal epidemiology 156 , 871 – 881 ( 2002 ). OpenUrl 129. Goldman , M. et al. The ucsc xena platform for public and private cancer genomics data visualization and interpretation . biorxiv 326470 ( 2018 ). 130. ↵ Clough , E. & Barrett , T. The gene expression omnibus database . Stat. Genomics: Methods Protoc . 93 – 110 ( 2016 ). 131. ↵ Poirion , O. B. , Jing , Z. , Chaudhary , K. , Huang , S. & Garmire , L. X. Deepprog: an ensemble of deep-learning and machine-learning models for prognosis prediction using multi-omics data . Genome medicine 13 , 1 – 15 ( 2021 ). OpenUrl 132. ↵ Amgalan , B. & Lee , H. Deod: uncovering dominant effects of cancer-driver genes based on a partial covariance selection method . Bioinformatics 31 , 2452 – 2460 ( 2015 ). OpenUrl CrossRef PubMed 133. ↵ Pölsterl , S. scikit-survival: A library for time-to-event analysis built on top of scikit-learn . The J. Mach. Learn. Res . 21 , 8747 – 8752 ( 2020 ). OpenUrl 134. ↵ Davidson-Pilon , C. lifelines: survival analysis in python . J. Open Source Softw . 4 , 1317 ( 2019 ). OpenUrl 135. Therneau , T. M. & Lumley , T. Package ‘survival’ . R Top Doc 128 , 28 – 33 ( 2015 ). OpenUrl 136. McKinney , W. , Perktold , J. & Seabold , S. Time series analysis in python with statsmodels . Jarrodmillman Com 96 – 102 ( 2011 ). 137. ↵ Kvamme , H. , Borgan , ø. & Scheel , I. Time-to-event prediction with neural networks and cox regression . arXiv preprint arXiv:1907.00825 ( 2019 ). 138. ↵ Fotso , S. et al. PySurvival: Open source package for survival analysis modeling ( 2019 –). 139. Jackson , C. H. flexsurv: a platform for parametric survival modeling in r . J. statistical software 70 ( 2016 ). 140. ↵ Sonabend , R. , Király , F. J. , Bender , A. , Bischl , B. & Lang , M. mlr3proba: an r package for machine learning in survival analysis . Bioinformatics 37 , 2789 – 2791 ( 2021 ). OpenUrl 141. Clements , M. Introduction to the rstpm2 package . Karolinska Institutet ( 2019 ). 142. ↵ Spytek , M. et al. survex: an r package for explaining machine learning survival models . arXiv preprint arXiv:2308.16113 ( 2023 ). 143. ↵ Kubi , M. G. , Lasisi , K. & Rasheed , B. A. Parametric and semi-parametric survival models with application to diabetes data . Sci J Biomed Eng Biomed Sci 3 , 001 – 010 ( 2022 ). OpenUrl 144. ↵ Ishak , K. J. , Kreif , N. , Benedict , A. & Muszbek , N. Overview of parametric survival analysis for healtheconomic applications . Pharmacoeconomics 31 , 663 – 675 ( 2013 ). OpenUrl PubMed 145. ↵ Sinha , D. & Dey , D. K. Semiparametric bayesian analysis of survival data . J. Am. Stat. Assoc . 92 , 1195 – 1212 ( 1997 ). OpenUrl CrossRef Web of Science 146. ↵ Stevenson , M. & EpiCentre , I. An introduction to survival analysis . EpiCentre, IVABS, Massey Univ . ( 2009 ). 147. ↵ Therneau , T. M. , Grambsch , P. M. , Therneau , T. M. & Grambsch , P. M. The cox model ( Springer , 2000 ). 148. ↵ Ishwaran , H. , Kogalur , U. B. , Blackstone , E. H. & Lauer , M. S. Random survival forests . ( 2008 ). 149. ↵ Binder , H. & Schumacher , M. Allowing for mandatory covariates in boosting estimation of sparse highdimensional survival models . BMC bioinformatics 9 , 1 – 10 ( 2008 ). OpenUrl CrossRef PubMed 150. ↵ Van Belle , V. , Pelckmans , K. , Suykens , J. A. & Van Huffel , S. Support vector machines for survival analysis . In Proceedings of the third international conference on computational intelligence in medicine and healthcare (cimed2007) , 1 – 8 ( 2007 ). 151. Shivaswamy , P. K. , Chu , W. & Jansche , M. A support vector approach to censored targets . In Seventh IEEE international conference on data mining (ICDM 2007) , 655 – 660 ( IEEE , 2007 ). 152. ↵ Khan , F. M. & Zubek , V. B. Support vector regression for censored data (svrc): a novel tool for survival analysis . In 2008 Eighth IEEE International Conference on Data Mining , 863 – 868 ( IEEE , 2008 ). 153. ↵ Fornili , M. , Ambrogi , F. , Boracchi , P. & Biganzoli , E. Piecewise exponential artificial neural networks (peann) for modeling hazard function with right censored data . In Computational Intelligence Methods for Bioinformatics and Biostatistics: 10th International Meeting, CIBB 2013, Nice, France, June 20-22, 2013, Revised Selected Papers 10 , 125 – 136 ( Springer , 2014 ). 154. ↵ Katzman , J. L. et al. Deepsurv: personalized treatment recommender system using a cox proportional hazards deep neural network . BMC medical research methodology 18 , 1 – 12 ( 2018 ). OpenUrl 155. ↵ Gensheimer , M. F. & Narasimhan , B. A scalable discrete-time survival model for neural networks . PeerJ 7 , e6257 ( 2019 ). OpenUrl 156. ↵ Kvamme , H. & Borgan, ø. Continuous and discretetime survival prediction with neural networks . arXiv preprint arXiv:1910.06724 ( 2019 ). 157. ↵ Lee , C. , Zame , W. , Yoon , J. & Van Der Schaar , M. Deephit: A deep learning approach to survival analysis with competing risks . In Proceedings of the AAAI conference on artificial intelligence , vol. 32 ( 2018 ). 158. ↵ Yu , C.-N. , Greiner , R. , Lin , H.-C. & Baracos , V. Learning patient-specific cancer survival distributions as a sequence of dependent regressors . Adv. neural information processing systems 24 ( 2011 ). 159. ↵ Fotso , S. Deep neural networks for survival analysis based on a multi-task framework . arXiv preprint arXiv:1801.05512 ( 2018 ). 160. ↵ Kvamme , H. & Borgan, ø. The brier score under administrative censoring: Problems and solutions . arXiv preprint arXiv:1912.08581 ( 2019 ). 161. ↵ Moher , D. et al. Preferred reporting items for systematic reviews and meta-analyses: the prisma statement . Int. journal surgery 8 , 336 – 341 ( 2010 ). OpenUrl View the discussion thread. Back to top Previous Next Posted January 07, 2024. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Survival Prediction Landscape: An In-Depth Systematic Literature Review on Activities, Methods, Tools, Diseases, and Databases Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Survival Prediction Landscape: An In-Depth Systematic Literature Review on Activities, Methods, Tools, Diseases, and Databases Ahtisham Fazeel Abbasi , Muhammad Nabeel Asim , Sheraz Ahmed , Sebastian Vollmer , Andreas Dengel medRxiv 2024.01.05.24300889; doi: https://doi.org/10.1101/2024.01.05.24300889 Share This Article: Copy Citation Tools Survival Prediction Landscape: An In-Depth Systematic Literature Review on Activities, Methods, Tools, Diseases, and Databases Ahtisham Fazeel Abbasi , Muhammad Nabeel Asim , Sheraz Ahmed , Sebastian Vollmer , Andreas Dengel medRxiv 2024.01.05.24300889; doi: https://doi.org/10.1101/2024.01.05.24300889 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (574) Allergy and Immunology (866) Anesthesia (304) Cardiovascular Medicine (4463) Dentistry and Oral Medicine (446) Dermatology (383) Emergency Medicine (611) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1517) Epidemiology (15254) Forensic Medicine (31) Gastroenterology (1132) Genetic and Genomic Medicine (6625) Geriatric Medicine (669) Health Economics (1002) Health Informatics (4565) Health Policy (1372) Health Systems and Quality Improvement (1617) Hematology (544) HIV/AIDS (1272) Infectious Diseases (except HIV/AIDS) (15938) Intensive Care and Critical Care Medicine (1107) Medical Education (624) Medical Ethics (147) Nephrology (670) Neurology (6643) Nursing (346) Nutrition (1001) Obstetrics and Gynecology (1149) Occupational and Environmental Health (958) Oncology (3352) Ophthalmology (983) Orthopedics (369) Otolaryngology (421) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1699) Pharmacology and Therapeutics (694) Primary Care Research (714) Psychiatry and Clinical Psychology (5465) Public and Global Health (9259) Radiology and Imaging (2213) Rehabilitation Medicine and Physical Therapy (1372) Respiratory Medicine (1199) Rheumatology (598) Sexual and Reproductive Health (717) Sports Medicine (535) Surgery (715) Toxicology (100) Transplantation (289) Urology (266) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a03ecf7c6aa9e2c5',t:'MTc4MDE1NTM0NQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.