Shared genetics across 178 phenotypes predicts novel drug therapeutic and side effects

doi:10.1101/2025.05.13.25327511

Shared genetics across 178 phenotypes predicts novel drug therapeutic and side effects

2025 · doi:10.1101/2025.05.13.25327511

preprint OA: closed CC-BY-4.0

📄 Open PDF Full text JSON View at publisher

Full text 65,626 characters · extracted from preprint-html · click to expand

Genetic similarity among 178 disease phenotypes predicts therapeutic and side effects for 1,711 drugs | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Genetic similarity among 178 disease phenotypes predicts therapeutic and side effects for 1,711 drugs Panagiotis N. Lalagkas , Rachel D. Melamed doi: https://doi.org/10.1101/2025.05.13.25327511 Panagiotis N. Lalagkas 1 Department of Biological Sciences, University of Massachusetts Lowell , Lowell MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rachel D. Melamed 1 Department of Biological Sciences, University of Massachusetts Lowell , Lowell MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: Rachel_Melamed{at}uml.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Human genetics demonstrates great potential for drug discovery, but challenges in identifying causal genes limit its clinical translation. Pleiotropy, the phenomenon where genetic variants or genes influence multiple traits, has been previously used to identify drug targets shared between phenotypically similar diseases. Here, we expand the use of pleiotropy to develop and evaluate a gene-agnostic method that predicts novel drug therapeutic and side effects across the phenome. We hypothesize that diseases with high genetic similarity to a drug’s known indications can point to new drug uses. To test this, we develop five metrics to quantify the genetic similarity between pairs of 178 diseases integrating genome-wide genetic correlation, gene-level associations and tissue-specific gene regulation. Comparing these metrics with data on indications and side effects of 1,711 common drugs, we find that more genetically similar diseases tend to share more drugs. We then use genetic similarity to predict drug therapeutic effects: our predictions with probability >0.1 show a 2.03-fold increased likelihood of progressing from Phase I clinical trials to regulatory approval. As well, predictions with probability >0.2 are 1.42 times more likely to correspond to true adverse effects. Notably, the indications model predicts side effects better than expected by chance, and vice-versa, implying a shared genetic basis for therapeutic and adverse drug effects. Together, our results suggest that genetic similarity can reveal new drug-disease links, putting forward a new use of genetics that bypasses the need for disease and drug target identification. Introduction Human genetics is increasingly used to identify effective drug gene targets for diseases through genome-wide association studies (GWAS) ( 1 , 2 ). Notably, drugs targeting genes pointed by GWAS disease-associated variants show higher success rates in clinical trials than those lacking such genetic support ( 3 – 5 ). Recently, this signal has been shown to predict drug side effects as well ( 6 ). However, it is challenging to link variants to causal genes that could be potential drug targets, and this approach must conservatively discard a substantial fraction of the genetic results ( 7 , 8 ). Therefore, there is a need for methods that can leverage the wealth of existing genetic data to accelerate drug discovery. Pleiotropy, the phenomenon where genetic variants or genes influence multiple traits through same or distinct biological processes, is widespread in the human genome, encompassing 90% of all GWAS trait-associated loci ( 9 – 11 ). This shared genetic risk has been shown to explain clinical co-occurrence of diseases ( 12 , 13 ). Additionally, pleiotropic effects of drug gene-targets have been previously used to guide drug repurposing. Woodward et al. review methods that identify gene-targets of antipsychotic drugs to be shared between pairs of psychiatric disorders, uncovering potential drug repurposing opportunities ( 14 ). A recent study used evidence of genetic similarity between Mendelian and complex diseases to show that Mendelian disease causal genes are potential drug targets for genetically similar complex diseases ( 15 ), even if those gene-targets have not been linked to the complex disease, and even if the diseases appear unrelated phenotypically ( 16 , 17 ). Other work showed that shared genetics underlying clinical co-occurrence of breast cancer and complex diseases can inform drug repurposing ( 18 ). Building on these findings, here we propose a broader hypothesis: that genetic similarity between a pair of diseases can point to novel effects of drugs that are known to act on only one of the two diseases. We test this hypothesis both in terms of therapeutic and side drug effects. Results Genetically similar diseases share more drugs We use publicly available data and develop five metrics to quantify the genetic similarity between 178 diseases ( Fig 1 shows an overview of the study framework). Each metric captures different aspects of the human genetic architecture including genome-wide genetic correlation, gene-level associations, tissue-specific gene regulation, and molecular QTL colocalization (see Methods). Consistent with prior studies, we find that diseases affecting the same body system (such as myocardial infarction and hypertension; both cardiovascular) have higher genetic similarity scores than those affecting different body systems (such as Alzheimer’s disease (neurological) and ulcerative colitis (gastrointestinal)) (p<0.05, Wilcoxon rank-sum test, two-sided; S1A-E Figs ) ( 19 ). We define two types of drug similarity: 1) the overlap of drugs indicated for both diseases and 2) the overlap of drugs causing both diseases as side effects. Download figure Open in new tab Fig 1. Overview of the study framework. Step 1: data collection. Step 2: construction of genetic similarity and drug-overlap metrics. Step 3: analytical workflow for integrating multiple genetic similarity metrics to predict drug-disease indications or side effects. Step 4: assessment of shared genetic architecture between indications and side effects. After harmonizing drug and disease identifiers across multiple sources, we have 1,643 drugs indicated for 159 diseases and 1,140 drugs causing 149 diseases as side effects ( Table 1 ). As expected, we find that diseases from the same body system share significantly more indicated drugs than those from different body systems, and a similar pattern is observed for side effect drugs (p<0.05, Wilcoxon rank-sum test, two-sided; S1F-G Figs ) ( 20 ). View this table: View inline View popup Table 1. Summary of the genetic and pharmacological data used. Next, we test whether pairs of diseases with higher genetic similarity have greater drug similarity. A confounding factor here is phenotypic similarity: pairs of diseases with more similar etiology, often affecting the same body system, share genetics and more drugs ( 21 ). While phenotypic similarity is well known to present opportunities for repurposing drugs for diseases with shared features, our goal is to identify the distinct potential of genetic similarity to identify drug indications that would not be obvious based only on phenotypic similarity. To isolate the signal from genetic similarity, we first stratify our analysis by annotated body system. Across nearly all genetic similarity metrics, we observe a positive correlation between genetic and drug similarity, even when analysis is restricted to diseases from different body systems ( Figure S2 ). To stringently focus only on genetic similarity among phenotypically dissimilar diseases, we also use disease embeddings from the ClinGraph knowledge graph model that summarize the phenotypic similarity between pairs of diseases ( S3 Fig ) ( 22 ). We then apply a range of thresholds for identifying pairs of diseases without phenotypic similarity. Even among pairs with low phenotypic similarity, genetic similarity remains associated with drug similarity ( Fig 2 ). While this pattern holds for both drug indications and side effects, the strength of association varies across genetic similarity metrics and drug similarity types. Particularly, the L2G-based metric shows weak or non-significant association with drug similarity, especially for drug side effect similarity ( Fig 2 , S2 Fig ). This may be a result of the conservative design of L2G prioritizing causal genes only at genome-wide significant GWAS loci, resulting in sparser data for assessing genetic similarity ( 23 ). These results support our overall hypothesis that genetics holds useful signal for understanding drug-disease connections beyond the strongest causal genes. Download figure Open in new tab Fig 2. Diseases with higher genetic similarity scores share more drugs even after stratification by phenotypic similarity. Each point represents the spearman correlation between a genetic similarity metric (columns) and drug sharing across disease pairs (top row: sharing of drug indications; bottom row: sharing of drug side effects). The size of the points is proportional to the number of disease pairs contributing to each estimate. Shaded areas denote the 95% confidence interval of the observed correlation (estimated by z-transformation; R package DescTools, function SpearmanRho()). Colors denote disease pairs subsets defined by phenotypic similarity (varying x-axis threshold), defined as ClinGraph embedding cosine similarity between two diseases. The blue solid horizontal line indicates the estimated correlation using all disease pairs (no phenotypic similarity threshold applied). The red color indicates disease pairs with phenotypic similarity greater than or equal to a x-axis threshold. The yellow color indicates disease pairs with phenotypic similarity below a threshold. Genetic similarity distinguishes known drug indications and side effects As we wish to build a model that can predict drug-indication pairs based on genetic similarity, we next test whether genetic similarity can distinguish a drug’s known indications from other diseases not known to be treated with that drug. For each drug-disease pair and for each genetic similarity metric, we find the greatest genetic similarity score between that disease and any of the drug’s known indications. We use the maximum similarity based on the assumption that if a disease is genetically similar to any existing drug indication, it may be a promising candidate for repurposing. Using all drug indication-disease pairs, we find that diseases known to be treated by the drug are genetically more similar to the drug’s other indications (p<0.05, Wilcoxon rank-sum test, two-sided) ( Fig 3 ). This relationship holds even after analyzing disjoint sets of drug indication-disease pairs affecting the same or different body systems ( S4 Fig ) or using different thresholds of phenotypic similarity ( S5 Fig ). We observe similar results for drug side effects ( Fig 3 ), which also persist after stratification by body system ( S6 Fig ) or phenotypic similarity ( S7 Fig ). Together, these findings support the utility of shared genetic architecture as a predictor for drug indication and side effects. Download figure Open in new tab Fig 3. Genetic similarity distinguishes known drug indications and side effects. Top-panel: drug indications. Bottom-panel: drug side effects. Y-axis: maximum genetic similarity between a drug’s known indications (or side effects) and a tested disease. Color denotes whether the disease is a known indication (or side effect) for the drug (green) or not (gray). Outliers, estimated as values below Q1 − 1.5×IQR or above Q3 + 1.5×IQR, are not shown but included in the statistical analysis (Wilcoxon rank-sum test, two-sided). IQR: Inter-quartile range; Q1: first quartile (25th percentile); Q3: third quartile (75th percentile) Integrating multiple measures of genetic similarity in a predictive model for drug indications Building on the above findings, we next integrate our measures of genetic similarity in a unified model to predict the probability that a given drug-disease pair represents a potential therapeutic indication. As above, we predict that diseases highly genetically similar to known indications of a drug are promising drug indications. But, as not all genetic similarity measures are available for all pairs of a drug indication and disease, we develop a Bayesian logistic regression model that allows us to make predictions for any drug-disease pair using all available genetic similarity data. This strategy accommodates missing information on genetic similarity of a disease to some indications of a drug, while leveraging complementary information across genetic similarity metrics. We assess the model performance for predicting drug therapeutic effects with and without removing phenotypically similar disease pairs. Without removing phenotypically similar indications, our model is able to predict indications for held-out drug-disease pairs with an AUROC of 0.747. To disentangle genetic similarity from confounding phenotypic similarity, as above, we remove drug indication-disease pairs with phenotypic similarity (see Methods) and re-train the model. Although this approach removes genetic similarity that overlaps with phenotypic similarity, excluding informative signal, our model is still predictive of known indications (AUROC=0.559, individual disease AUROCs shown in Fig 4A ). We note that fewer diseases are included in this analysis because the exclusion of phenotypically similar drug indication-disease pairs result in certain diseases having no indicated drugs remaining in the test set. Download figure Open in new tab Fig 4. Evaluation of the indications model. A. AUROC (x-axis) for each tested disease (y-axis). Diseases are colored by body system (Open Targets labels). B. Odds ratio (y-axis) for drug-disease pairs with predicted probability above a given threshold (x-axis) compared to pairs below the baseline probability of being an indication in the test set (0.004), indicating enrichment for testing in a clinical trial phase (color) versus never tested in clinical trials. Shaded regions indicate 95% confidence intervals calculated by Fisher’s exact test (two-sided). Numbers above points show the calculated odds ratio (only when p-value<0.05). The size of the point is proportional to the number of pairs with predicted probability exceeding the threshold (x-axis). In total, 66,395 drug-disease pairs are included in this analysis (124 pairs tested in clinical trials but lacking phase information are excluded). C . Relative success (y-axis) of progressing from Phase I to Approval for drug-disease pairs with predicted probability above a threshold (x-axis) compared to pairs below the baseline probability (0.004). Values above each point indicate the estimated relative success (fold-change; only when significantly different from 1). Values below points show the number of progressing drug-disease pairs relative to the total evaluated for pairs with predicted probability above the x-axis threshold versus those below the baseline probability. Shaded regions indicate Katz 95% confidence intervals. We next evaluate the generalizability of our predictions by comparing them with evidence from clinical trials. Although clinical trials data is not used during model training, we find that drug-disease pairs with predicted probabilities higher than the baseline probability of being an indication in the test set (0.004) are more likely to be tested in clinical trials, with the degree of enrichment increasing with predicted probability ( Fig 4B ). When looking at clinical trial progression, we find that drug-disease pairs with higher predicted probabilities are more likely to advance from Phase I to Approval than those with predicted probabilities below the baseline ( Fig 4C ). Here we report relative success (analogous to relative risk) because it captures the proportional difference in trial progression probabilities between groups, which is more interpretable for predicting outcomes conditional on trial initiation. Specifically, pairs with predicted probabilities >0.1 are 2.03 times more likely to progress from Phase I to regulatory approval compared to pairs with probabilities ≤0.004 ( Fig 4C ). While PhaseLI trials test compounds already deemed safe and biologically active in humans, most ultimately fail due to insufficient efficacy; our model is able to distinguish the subset that succeeds. We observe a similar pattern for progression from Phase I to Phase II, Phase II to Phase III, and Phase III to Approval, with the highest relative success being from Phase III to regulatory approval, where tested drugs are compared against standard-of-care treatment ( S8 Fig ). We provide the full list of drug-disease predicted probabilities of being an indication from both phenotypically dissimilar drug indication-disease pairs, and from all disease pairs in the Table S1 , along with the corresponding drug indications used for each prediction. Integrating multiple measures of genetic similarity in a predictive model for drug side effects We apply the same approach to estimate the probability that a given drug-disease pair is a side effect. The side effects model using all pairs achieves an AUROC of 0.582 on held-out drug-disease pairs. To evaluate the contribution of genetic similarity as opposed to phenotypic similarity confounding, as previously, we remove drug side effect-disease pairs with phenotypic similarity, re-train our model and re-calculate predicted probabilities. This new model is still predictive of known side effects (AUROC=0.548; individual disease AUROCs shown in Fig 5A ). Download figure Open in new tab Fig 5. Evaluation of the side effects models. A. AUROC (x-axis) per tested disease (y-axis). Color indicates the body system of each disease based on the Open Targets classification. B. Odds ratio (y-axis) for drug-disease pairs with predicted probability greater than a given threshold (x-axis) being a side effect (SIDER data) compared to those with predicted probability below the baseline probability of being a side effect in the test set (0.054). Shaded regions indicate 95% confidence intervals calculated by Fisher’s exact test (two-sided). The total number of drug-disease pairs included in this analysis is 43,510 (drugs: 430; diseases: 158). The size of the points is proportional to the number of pairs with predicted probability greater than the x-axis threshold (actual number of pairs is printed above each point). Next, we assess the generalizability of the side effect predictions derived from the model using phenotypically dissimilar pairs, as our goal is to evaluate the predictive power of genetic similarity. We first use SIDER, a database that extracts drug-side effect relationships from drug labels and black-box warnings( 24 ). After keeping drugs and diseases appearing in both SIDER and onSIDES (our training set) and removing overlapping true labels, we find that pairs with predicted probabilities >0.2 are 1.42 times (95% CI: 1.20-1.66) more likely to be true side effects than those with predicted probabilities below the baseline probability of being a side effect in the test set (0.054). We also compare our predictions to offSIDES, a database that identifies putative drug side effects by mining spontaneous adverse event reports from the FDA Adverse Event Reporting System (FAERS). We observe a similar enrichment, although not significant, potentially due to the low number of predicted drug-disease pairs with offSIDES data available ( S9 Fig ). We provide the full list of drug-candidate disease predicted probabilities of being a side effect from both approaches (one based on phenotypically dissimilar drug side effect-disease pairs, and one based on all pairs) in the Table S2 , along with the corresponding drug side effects used for each prediction. Genetic similarity points to a shared basis of drug indications and side effects Finally, we investigate whether indications and side effects of a drug share a common genetic basis. We hypothesize that genetic similarity of a disease to current drug indications can predict whether that disease is a side effect of the drug, and, conversely, similarity to current drug side effects can predict a new indication for that drug. This analysis is motivated by the concept that drugs affect a finite number of biological processes through the genes they target, and these processes may result in either indications or side effects. This concept is also supported by previous studies showing that both drug indications and side effects are enriched for human genetic associations, suggesting a potential genetic link between them( 3 , 4 , 6 ). However, this connection remains largely unexplored. To test this hypothesis, we re-train our indication and side effect models using only drugs and diseases with available information for both. We also exclude drug-disease pairs that are both an indication and a side effect to ensure that any observed signals are not driven by overlapping labels. This results in 667 drugs and 178 diseases (111,474 pairs) available for this analysis. Using these refitted models, we find that the indications model predicts true side effects significantly better than expected by chance (AUROC=0.525, p permutation <0.001) ( Fig 6A ). Additionally, we find that the side effects model predicts true drug indications better than random expectations (AUROC=0.542, p permutation <0.001) ( Fig 6B ). As side effects are by definition due to a drug’s indication mechanism of action adversely affecting another disease, it is reasonable that side effects could be more informative of indications than the reverse. Notably, since these models only use information about genetic similarity between diseases, these findings indicate that genetic similarity can suggest which drugs have biological effects relevant to the biology of a disease. Download figure Open in new tab Fig 6. The indication model predicts side effects (A) and vice versa (B). Drug-disease pairs that are both indications and side effects are excluded from model training and evaluation. Models are refitted on common drugs and diseases (total pairs =111,474 pairs; 667 drugs; 178 diseases). Red, vertical dashed lines indicate the observed AUROC in each case. Permutation tests (n=1,000) are conducted by shuffling the true drug-disease labels. Discussion In this work, we develop a genetics-informed framework that leverages pleiotropy to predict new drug indications and side effects. Pleiotropy has previously been used to understand disease etiology ( 25 ), and to suggest drug repurposing opportunities among similar diseases. Here, we extend this concept by systematically evaluating whether genetic similarity across a broad range of diseases can uncover drug therapeutic and adverse effects, while accounting for phenotypic similarity confounding. We believe that our work advances the use of genetics for both drug discovery and safety evaluation. Recent approaches show that GWAS-identified causal genes often represent promising drug targets ( 3 , 4 ). One such approach is the Open Targets L2G machine learning model, which combines different types of data to find the most likely causal gene at a genome-wide significant GWAS locus (p-value<5e-08). Although such approaches have growing support, they are limited because not all drug targets have strong GWAS signals, resulting in missed drug repurposing opportunities. Our method addresses this by enabling predictions even for diseases with weak or missing GWAS signals at drug’s gene-targets. For instance, 29.5% of predicted drug-disease indication pairs with probability p≥0.1 involve drugs whose gene-targets contain at least one subthreshold variant for the disease (GWAS p-value<5e-04), but only 11.3% of our predictions are significant at a genome-wide level ( Supplementary fig S10 ). This supports the ability of genetic similarity to identify potential drug-disease indications even when genome-wide significant GWAS hits are absent. Overall, we believe that our approach complements rather than replaces approaches that rely on genome-wide significant GWAS signals. To illustrate the strength of our approach, we provide an example of a predicted drug-disease pair that is biologically supported but would be overlooked by approaches relying solely on genetic data of the candidate disease. We identify naltrexone as a potential treatment for Irritable Bowel Syndrome (IBS) (predicted probability=0.283) due to its genetic similarity to alcoholism, a naltrexone’s indication. Naltrexone is an antagonist of the μ- ( OPRM1 ), κ-( OPRK1 ) and δ- ( OPRD1 ) opioid receptors ( 26 ). These receptors, besides in the brain, are also expressed in the gastrointestinal tract, where they play a role in regulating motility, secretion, and visceral sensation, all processes believed to be implicated in IBS pathophysiology ( 27 ). This biological rationale supports our prediction. Additionally, there is evidence from prior studies, including one pilot clinical study, suggesting that naltrexone may treat IBS by reducing neuroinflammation and visceral hypersensitivity ( 28 – 30 ). Notably, this repurposing candidate would have been missed by approaches looking solely at the IBS GWAS since there is currently no genetic evidence linking IBS to any of naltrexone known gene-targets ( OPRM1 , OPRK1 , OPRD1 ; based on data from the Open Targets Platform v25.09, https://platform.opentargets.org/ ). A similar insight applies to side effects. We predict that anastrozole may cause malabsorption syndrome (predicted probability=0.61) based on its genetic similarity to anastrozole’s known side effects, osteoporosis and phlebitis. Anastrozole is an aromatase inhibitor that blocks CYP19A and thereby prevents the conversion of androgens to estrogens ( 31 ). This reduces the systemic estrogen levels which explains anastrozole’s established adverse effects on bone and vascular health ( 32 – 34 ). Notably, estrogen receptors are also expressed in the gastrointestinal tract, where they regulate nutrient absorption, including calcium and vitamin D ( 35 – 37 ). Consequently, suppression of estrogen signaling by anastrozole could impair intestinal absorption. Together, these reflect a shared genetic link between osteoporosis, phlebitis and malabsorption syndrome tied to estrogen signaling, highlighting the ability of our genetics-informed side effect model to uncover mechanistically coherent drug-disease relationships across diverse clinical contexts. We also find evidence for a shared genetic basis between drug indications and side effects, as our indication model can predict side effects, and vice versa, better than expected by chance. This suggests that a drug’s therapeutic benefits and side effects may be due to the drug’s effect on biological pathways that influence multiple disease phenotypes. For instance, our side effect model predicts mercaptopurine, a purine antagonist, to have an effect on non-Hodgkin lymphoma (predicted probability=0.52) based on its genetic similarity to mercaptopurine’s known side effects, such as anemia and alopecia. Mercaptopurine impairs DNA replication and induces apoptosis in rapidly dividing cells by inhibiting purine synthesis ( 38 ). While this mechanism of action underlies its known toxicities (anemia due to suppression of hematopoietic progenitors; alopecia due to inhibition of hair follicle keratinocytes) it also suggests efficacy against other highly proliferative cell populations, such as malignant lymphocytes in non-Hodgkin lymphoma. Therefore, this prediction reflects a cross-system biological pleiotropy involving a shared proliferative and purine-dependent axis, in which modulation of the same molecular pathway confers side effects in normal tissues while providing therapeutic benefit in a proliferative malignancy. Our study has several limitations. First, while we use five complementary metrics to define genetic similarity and predict novel drug therapeutic and adverse effects, many drugs treat disease symptoms rather than underlying biological causes. Such symptomatic treatments may not be captured by genetic associations and could therefore be missed by our approach. Second, the incompleteness of publicly available genetic datasets limits our ability to compute genetic similarity scores for all drug-candidate disease pairs, thereby preventing a fair comparison of the relative predictive power of each developed metrics. Although imputation could address this limitation, we chose not to pursue it due to variation in the extent and nature of missingness across datasets. Third, the GWAS sample size varies across diseases and even for the same disease between different genetic databases. For example, GWAS Atlas and PhenomeXcan may use different GWASs for the same disease. Diseases with smaller GWAS may yield less confident similarity estimates, which could potentially cause us to overlook some drug-disease links. Fourth, our developed measures do not capture all genetic similarity between disease pairs. Some of our measures rely on significant genes or loci, which, as mentioned above, must be conservative and can miss some signal. Genome-wide measures like LD-score correlation, conversely, can miss more localized similarity. Future work could identify a wider range of measures of similarity. Fifth, there is a confounding association between genetic similarity and phenotypic similarity, and the latter is already commonly exploited to repurpose drugs for physiologically similar diseases. To isolate the utility of genetic similarity, we stringently remove pairs of diseases with obvious clinical similarity using both disease annotations and knowledge graph similarities. This also addresses potential sample overlap between GWAS as phenotypically similar diseases tend to be clinically associated. As a result, we are able to show that genetic similarity enriches for uses of drugs in phenotypically dissimilar diseases. We also supply the full results in the supplement. Therefore, we believe that this limitation does not invalidate the identified associations. In conclusion, our work emphasizes the potential of genetic similarity to inform drug discovery and overcome known limitations of single disease GWAS studies. To our knowledge, this is the first study that evaluates the use of genetic similarity to inform drug discovery at a phenome-wide scale. Future work could use our predictions as a resource for experimental validation, as well as could use complete genetic data to fairly compare the predictive performance of individual genetic similarity metrics and reveal which data sources are most informative for making drug-disease predictions. Furthermore, future studies can build on our findings of shared genetic basis between drug indications and side effects to identify biological pathways that drive both drug efficacy and toxicity. Ultimately, such insights could help reduce clinical trial attrition rates by prioritizing candidate drugs with a more favorable side effect profile. Materials and Methods Genetic data and development of genetic similarity metrics We define genetic similarity between two diseases as the extent to which they share genetic variants or genes. To quantify this, we develop five genetic similarity metrics, each derived from a different type of genetic data. Below, we describe each metric and its data source. LDSC-based metric: our first metric captures genome-wide genetic correlation between disease pairs using LD Score Regression (LDSC) ( 25 ). We obtain precomputed LDSC genetic correlation estimates from the GWAS Atlas ( https://atlas.ctglab.nl/ ; release 3, v20191115; gwasATLAS_v20191115_GC.txt.gz), covering 2,415 pairs across 70 diseases( 10 ). These values are used as is without further processing. MAGMA-based metric: our second metric uses output statistics from MAGMA, a tool that aggregates SNP-level GWAS signals within genes to generate gene-level p-values for association with a disease (MAGMA gene analysis) ( 39 ). We obtain precomputed MAGMA results from the GWAS Atlas ( https://atlas.ctglab.nl/ ; release 3, v20191115; gwasATLAS_v20191115_magma_P.txt.gz) for 81 diseases and 18,680 genes( 10 ). However, this file includes missing values for some disease-gene pairs. To address this without losing the majority of information, we exclude the top-10 diseases with the most missing values (>10%) and keep genes with complete data for all the remaining diseases. This results in a dataset of 71 diseases and 15,297 genes. To retain informative genes, we keep only those significantly associated with at least one disease (BH-adjusted p<0.05), yielding 10,302 genes. Then, for each disease pair, we calculate the spearman correlation between their gene-level association p-values. S-MultiXcan-based metric: our third metric is based on S-MultiXcan, a tool that summarizes genetically predicted gene expression across 44 GTEx tissues to estimate gene-disease associations ( 40 ). We obtain precomputed S-MultiXcan p-values from PhenomeXcan ( https://zenodo.org/records/3911190 ; smultixcan-mashr-pvalues.tsv.gz), covering 137 diseases and 22,215 genes. We convert p-values into z-scores, as described elsewhere ( 41 ). Then, for each disease pair, we compute the spearman correlation between their gene regulation profiles. L2G-based metric: our fourth metric uses data from the Open Targets Locus-to-Gene (L2G) model ( 23 ). L2G is a machine learning model that assigns a probability to each gene to be causal for a disease based on variants in a genome-wide significant GWAS locus and evidence from gene proximity, QTL colocalization, chromatin interactions, and variant pathogenicity. We download L2G scores for 137 diseases and 14,951 genes from the Open Targets Genetics platform (v22.09; https://ftp.ebi.ac.uk/pub/databases/opentargets/genetics/22.09/l2g/ ). In case of duplicated disease-gene entries, we keep the one with the highest L2G score. For disease-gene pairs with no available L2G score, we assign a value of 0, reflecting no evidence of causality. Finally, for each disease pair, we compute the spearman correlation between their L2G gene-level profiles. COLOC-based metric: our fifth and final metric uses colocalization data between GWAS loci and molecular QTLs (eQTLs, pQTLs, sQTLs). We obtain this data from the Open Targets Genetics platform (v22.09; https://ftp.ebi.ac.uk/pub/databases/opentargets/genetics/22.09/v2d_coloc/ ). For each disease, we identify genes with strong colocalization support (posterior probability for shared causal variant, H4 ≥ 0.8, coloc), yielding colocalization-based gene sets for 39 diseases (total of 1,758 genes). Then, for each disease pair, we quantify genetic similarity using the overlap coefficient, defined as: To ensure consistent disease mapping across all five genetic similarity metrics, we perform manual curation of disease identifiers. For each of the 178 diseases, we retrieve the corresponding MeSH ID and then match it to the relevant identifier in each genetic data source. In cases where multiple GWAS entries correspond to the same disease, we select the most recent GWAS or the one with the largest sample size. Each data source quantifies association of diseases with a variable portion of the genome. Open Targets computes L2G scores only for genome-wide significant GWAS loci for a disease and COLOC analysis is performed only when these loci overlap with known molecular QLTs. If no such overlap exists, the locus is excluded from COLOC analysis. Due to differences in data availability across sources, coverage varies by disease. Nevertheless, the vast majority of diseases (173/178; 97.2%) are represented in at least two of the three sources. Within each source, coverage also varies across data types which is a result of source-specific data processing pipelines (explained in the documentation of each resource). For example, within Open Targets, we have L2G data for 137 diseases, but we have COLOC data for 51 of them, depending on the number of GWAS loci passing genome-wide significance. Disease annotations and phenotypic similarity data We make extensive efforts to disentangle genetic similarity from clinical similarity in order to assess the additive value of genetic similarity for drug discovery. To this end, we compile disease annotations from Open Targets and complement them with manually curated labels from ICD10 and the UK HRCS. For ICD10, we map each disease to its broadest diagnostic category. For example, we assign type 2 diabetes (ICD10 code: E11) to the E00-E89 category, corresponding to “Endocrine, nutritional and metabolic diseases”. For UK HRCS, we manually assign each disease to the most relevant health category based on online guidelines ( https://hrcsonline.net/health-categories/ ). Using these annotations, we classify each disease pair as affecting the same body system if the two diseases share a category in at least one of the annotation sources or affecting different body systems otherwise. Diseases affecting different body systems can still be phenotypically similar. For instance, hypercholesterolemia (metabolic disease) and myocardial infarction (cardiovascular disease) are classified as affecting different body systems using standard disease annotations, yet they have a well-established causal relationship. Therefore, we also use ClinGraph, a large-scale clinical knowledge graph that integrates eight standardized medical vocabularies to represent relationships among diseases, symptoms, drugs, and clinical concepts. We use pre-trained disease embeddings from ClinGraph ( https://zitniklab.hms.harvard.edu/projects/Clinical-knowledge-embeddings/ ) and quantify phenotypic similarity between disease pairs as the cosine similarity of these embeddings. For diseases that map to multiple ClinGraph identifiers, we compute their phenotypic similarity to another disease as the mean cosine similarity across all corresponding identifier pairs. Supplementary figure 3 shows the distribution of ClinGraph phenotypic similarity values across disease pairs affecting same or different body systems according to standard disease annotations. Drug indications data We compile a comprehensive dataset of approved drug indications by combining data from three publicly available resources: ChEMBL, RxNORM, and SIDER. ChEMBL: we download drug-disease indication data using the ChEMBL API ( https://www.ebi.ac.uk/chembl/api/data/drug_indication.json ) and filter for drug-disease pairs with “max_phase_for_ind” equal to 4 or labeled as “Approved”. This ensures that only already approved indications are included. RxNORM: we first retrieve a list of drugs mapping RxNORM names to ChEMBL IDs from the UniChem EBI ( https://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/wholeSourceMapping/src_id1/src1src47.txt.gz ). Using the RxNORM names of these drugs, we query the RxNORM API (byDrugName function) to obtain diseases labeled as “may_treat” or “may_prevent” for each drug. Finally, we map the disease IDs to MeSH IDs using the UMLS API for consistency across data sources. SIDER: we download drug indication data from the SIDER database ( http://sideeffects.embl.de/media/download/meddra_all_indications.tsv.gz ). In SIDER, drug IDs are in PubChem and disease IDs are UMLS CUIs. We convert drug IDs to ChEMBL IDs using the corresponding UniChem mapping file ( https://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/wholeSourceMapping/src_id1/src1src5.txt.gz ) and disease IDs to MeSH IDs using the UMLS API. We combine drug indication data across all three resources, remove duplicate entries, and exclude overly broad disease categories (top MeSH headers), such as “cardiovascular diseases” and “nervous system diseases”. Drug side effects data from drug labels We obtain drug side effects data from the onSIDES (v3.1.0; https://github.com/tatonetti-lab/onsides/releases/download/v3.1.0/onsides-v3.1.0.zip ), a database that extracts adverse events from drug labels using natural language processing. We join the “product_adverse_effect.csv” and “product_label.csv” files to create a table with drug-side effects, and filter for those predicted to be side effects (pred1>pred0). Then, we use UniChem EBI to convert drug RxNORM names to ChEMBL IDs ( https://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/wholeSourceMapping/src_id1/src1src47.txt.gz ) and the UMLS API (crosswalk function) to convert disease MedDRA IDs to MeSH IDs. We also obtain drug side effects data from SIDER (v4.1; http://sideeffects.embl.de/ ). Using a similar approach, we map drug PubChem IDs to ChEMBL IDs (UniChem EBI) and disease UML CUIs to MeSH CUIs (UMLS API). We use these drug-disease side effects as an independent dataset to evaluate our side effects model predictions, after removing overlapping labels with onSIDES. Drug overlap metrics among diseases To quantify the overlap of indicated or side effect drugs among diseases, we use the aforementioned compiled drug datasets, and, for each disease pair, we estimate the overlap coefficient of the drugs that treat them or cause them as a side effect ( equation 1 ). Drug-disease pairs in clinical trials To find which of the analyzed drug-candidate disease pairs in our sample are currently being, or have been, tested in clinical trials, we download clinical trial data from ChEMBL and the Aggregate Content of https://ClinicalTrials.gov (AACT) database. AACT is a publicly available relational database that contains information about all the trials registered in https://ClinicalTrials.gov ( 42 ). We obtain information for all clinical trials that were registered in https://ClinicalTrials.gov as of November 4, 2022. For each drug-disease pair, we keep the maximum clinical trial phase reached. We group clinical trial phases to Phase I (Phase I and Early Phase I), Phase II (Phase II and Phase I/Phase II), Phase III (Phase III and Phase II/Phase III) or unknown phase (no phase information provided). Clinical trials in Phase 0 are at the pre-clinical stage and therefore excluded. Then, we map all drug MeSH IDs to ChEMBL IDs through DrugBank IDs using the UMLS API (crosswalk function) and a mapping file provided by UniChem EBI ( https://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/wholeSourceMapping/src_id1/src1src2.txt.gz ). We also map all disease IDs to MeSH IDs using the UMLS API. We use this data to evaluate our indication model predictions. Drug side effect data from spontaneous reports To evaluate our side effect model predictions, we use data from offSIDES ( http://github.com/tatonetti-lab/offsides/ ), a resource that identifies putative drug-side effect associations by detecting disproportionality signals in spontaneous adverse event reports from FAERS while adjusting for potential confounders ( 43 ). For each drug-disease pair, offSIDES reports a Proportional Reporting Ratio (PRR) and an associated p-value, quantifying whether a side effect is reported more frequently for a given drug compared with a matched background of reports for other drugs. We filter the offSIDES data for drug-side effect pairs with PRR>1, indicating an increased reporting frequency for the tested drug. We further filter for drugs and side effects present in our study. Drug-disease pairs with a Bonferroni adjusted p-value<0.05 are considered potentially true side effects. Finally, we convert drug PubChem IDs to ChEMBL IDs using the PubChem Exchange Identifier and disease UMLS CUIs to MeSH IDs using the UMLS API. This yields 20,057 pairs of 362 drugs and 159 diseases, of which 11,505 pairs are identified as significant side effects (Bonferroni p.adjust <0.05). Bayesian logistic regression model incorporating all genetic similarity data To integrate our measures of genetic similarity in a model to predict the probability that a given drug-disease pair represents a potential therapeutic indication, we train a Bayesian logistic regression model using Stan (rstan v2.32.7). We choose this modeling approach because: 1) its probabilistic framework allows the number of disease-indication pairs with genetic similarity to vary across genetic evidence, drugs, and diseases and 2) it incorporates all genetic similarity metrics along with the similarity of a disease to all indications of a drug, rather than only the most genetically similar drug indication. Our model predicts for a disease i, drug j pair (Y ij ), the probability p ij of the drug treating the disease. The prediction is modeled using the set of genetic similarities for each evidence type e available, assessing similarity of disease i to the N annotated indications { g ije1 , …, g ijeN }. The genetic similarity scores are approximately normally distributed, and the known drug-treatment labels are of course binary. Therefore, we model: The learned parameters are,e,pij, while is treated as a hyperparameter. To find the best σ, we test several values by training the model on a set of drug-indication pairs and evaluating its performance on held-out drug-indication labels, in a cross validation-like setup. Ultimately, only the predicted probabilities for held-out drug-disease pairs are used for evaluating our model. The full Stan model code is available on our GitHub repository ( https://github.com/lalagkaspn/genetic_similarity_drug_discovery ). We train one Bayesian logistic regression model using selected hyperparameters and genetic similarity data across all drug indication-disease pairs (indications model). To account for confounding by phenotypic similarity between diseases and isolate the signal from genetic similarity, we also train a second model using the same hyperparameters but restricted to phenotypically dissimilar drug indication-disease pairs. We define a pair of diseases as phenotypically dissimilar if they do not affect the same body system in any disease annotation and have ClinGraph phenotypic similarity <-0.1 (see “Disease annotations and phenotypic similarity data” for more details). We follow the same process to train the side effect model. Permutation tests for the cross-model prediction analysis To evaluate whether the observed associations in our cross-prediction analysis are due to chance, we perform permutation tests. Specifically, we randomly shuffle the true drug-disease labels (indications or side effects) to generate new pairings that disrupt any real associations and calculate the AUROC. We repeat this process 1,000 times to estimate a null distribution of AUROCs. We then compare the observed AUROC to the null distribution and calculate a permutation p-value as the proportion of permuted AUROCs that are greater than or equal to the observed AUROC. Observed associations are considered statistically significant if the permutation p-value is less than 0.05. Declarations Ethics approval and consent to participate Not applicable Consent for publication Not applicable Data availability Data and code for reproducing the results and figures presented in this study can be found in this GitHub repository: https://github.com/lalagkaspn/genetic_similarity_drug_discovery . All drug-disease predictions are included within the article and its Supplementary Tables S1 and S2. Completing interests Not applicable Funding NIGMS R35 GM151001-01 Authors’ contributions PNL curated and analyzed the genetic and drug data. RDM conceptualized and supervised the research. PNL and RDM interpreted the results and, drafted and revised the manuscript. All authors read and approved the final manuscript. Acknowledgements Not applicable Footnotes Edits mainly adjust for the issue of phenotypic similarity, creating a more conservative analysis that removes phenotypically similar diseases from the main results. References 1. ↵ Uffelmann E , Huang QQ , Munung NS , de Vries J , Okada Y , Martin AR , et al. Genome-wide association studies . Nat Rev Methods Primers . 2021 Dec; 1 ( 1 ): 59 . doi: 10.1038/s43586-021-00056-9 OpenUrl CrossRef 2. ↵ Visscher PM , Wray NR , Zhang Q , Sklar P , McCarthy MI , Brown MA , et al. 10 Years of GWAS Discovery: Biology , Function, and Translation. The American Journal of Human Genetics . 2017 Jul; 101 ( 1 ): 5 – 22 . doi: 10.1016/j.ajhg.2017.06.005 OpenUrl CrossRef PubMed 3. ↵ Nelson MR , Tipney H , Painter JL , Shen J , Nicoletti P , Shen Y , et al. The support of human genetic evidence for approved drug indications . Nat Genet . 2015 Aug; 47 ( 8 ): 856 – 60 . doi: 10.1038/ng.3314 OpenUrl CrossRef PubMed 4. ↵ King EA , Davis JW , Degner JF . Are drug targets with genetic support twice as likely to be approved? Revised estimates of the impact of genetic support for drug mechanisms on the probability of drug approval. Marchini J, editor. PLoS Genet . 2019 Dec 12; 15 ( 12 ): e1008489 . doi: 10.1371/journal.pgen.1008489 OpenUrl CrossRef PubMed 5. ↵ Minikel EV , Painter JL , Dong CC , Nelson MR . Refining the impact of genetic evidence on clinical success . Nature . 2024 Apr 17. doi: 10.1038/s41586-024-07316-0 OpenUrl CrossRef 6. ↵ Minikel EV , Nelson MR. Human genetic evidence enriched for side effects of approved drugs [Internet ]. 2023 [cited 2024 Apr 28]. Available from: http://medrxiv.org/lookup/doi/10.1101/2023.12.12.23299869 doi: 10.1101/2023.12.12.23299869 OpenUrl Abstract / FREE Full Text 7. ↵ Gallagher MD , Chen-Plotkin AS . The Post-GWAS Era: From Association to Function . The American Journal of Human Genetics . 2018 May; 102 ( 5 ): 717 – 30 . doi: 10.1016/j.ajhg.2018.04.002 OpenUrl CrossRef PubMed 8. ↵ Chen Z , Boehnke M , Wen X , Mukherjee B . Revisiting the genome-wide significance threshold for common variant GWAS. De Koning DJ, editor . G3 Genes|Genomes|Genetics . 2021 Apr 12; 11 ( 2 ): jkaa056 . doi: 10.1093/g3journal/jkaa056 OpenUrl CrossRef 9. ↵ Mackay TFC , Anholt RRH . Pleiotropy, epistasis and the genetic architecture of quantitative traits . Nat Rev Genet . 2024 Sep; 25 ( 9 ): 639 – 57 . doi: 10.1038/s41576-024-00711-3 OpenUrl CrossRef 10. ↵ Watanabe K , Stringer S , Frei O , Umićević Mirkov M , De Leeuw C , Polderman TJC , et al. A global overview of pleiotropy and genetic architecture in complex traits . Nat Genet . 2019 Sep; 51 ( 9 ): 1339 – 48 . doi: 10.1038/s41588-019-0481-0 OpenUrl CrossRef PubMed 11. ↵ Chesmore K , Bartlett J , Williams SM . The ubiquity of pleiotropy in human disease . Hum Genet . 2018 Jan; 137 ( 1 ): 39 – 44 . doi: 10.1007/s00439-017-1854-z OpenUrl CrossRef PubMed 12. ↵ Zhang J , Fang X , Ye Q , Yin X , Ye D . The shared genetic architecture underlying the autoimmune and cardiovascular disease: a multivariate genome-wide analysis . Cardiovasc Diabetol . 2026 Feb 11; 25 ( 1 ): 44 . doi: 10.1186/s12933-025-03041-8 OpenUrl CrossRef PubMed 13. ↵ Rubio-Perez C , Guney E , Aguilar D , Piñero J , Garcia-Garcia J , Iadarola B , et al. Genetic and functional characterization of disease associations explains comorbidity . Sci Rep . 2017 Dec; 7 ( 1 ): 6207 . doi: 10.1038/s41598-017-04939-4 OpenUrl CrossRef PubMed 14. ↵ Woodward DJ , Thorp JG , Middeldorp CM , AkóLílè W , Derks EM , Gerring ZF . Leveraging pleiotropy for the improved treatment of psychiatric disorders . Mol Psychiatry . 2025 Feb; 30 ( 2 ): 705 – 21 . doi: 10.1038/s41380-024-02771-7 OpenUrl CrossRef PubMed 15. ↵ Lalagkas PN , Melamed RD . Shared etiology of Mendelian and complex disease supports drug discovery . BMC Med Genomics . 2024 Sep 10; 17 ( 1 ): 228 . doi: 10.1186/s12920-024-01988-3 OpenUrl CrossRef PubMed 16. ↵ Blair DR , Lyttle CS , Mortensen JM , Bearden CF , Jensen AB , Khiabanian H , et al. A Nondegenerate Code of Deleterious Variants in Mendelian Loci Contributes to Complex Disease Risk . Cell . 2013 Sep; 155 ( 1 ): 70 – 80 . doi: 10.1016/j.cell.2013.08.030 OpenUrl CrossRef PubMed Web of Science 17. ↵ Melamed RD , Emmett KJ , Madubata C , Rzhetsky A , Rabadan R . Genetic similarity between cancers and comorbid Mendelian diseases identifies candidate driver genes . Nat Commun . 2015 Nov; 6 ( 1 ): 7033 . doi: 10.1038/ncomms8033 OpenUrl CrossRef PubMed 18. ↵ Lalagkas PN , Melamed RD . Shared genetics between breast cancer and predisposing diseases identifies novel breast cancer treatment candidates . Hum Genomics . 2024 Nov 14; 18 ( 1 ): 124 . doi: 10.1186/s40246-024-00688-4 OpenUrl CrossRef PubMed 19. ↵ Dong G , Feng J , Sun F , Chen J , Zhao XM . A global overview of genetically interpretable multimorbidities among common diseases in the UK Biobank . Genome Med . 2021 Dec; 13 ( 1 ): 110 . doi: 10.1186/s13073-021-00927-6 OpenUrl CrossRef PubMed 20. ↵ Rodriguez-Esteban R . A Drug-Centric View of Drug Development: How Drugs Spread from Disease to Disease. Searls DB, editor . PLoS Comput Biol . 2016 Apr 28; 12 ( 4 ): e1004852 . doi: 10.1371/journal.pcbi.1004852 OpenUrl CrossRef PubMed 21. ↵ Chen Y , Zhang X , Zhang G qiang , Xu R. Comparative analysis of a novel disease phenotype network based on clinical manifestations . Journal of Biomedical Informatics . 2015 Feb; 53 : 113 – 20 . doi: 10.1016/j.jbi.2014.09.007 OpenUrl CrossRef PubMed 22. ↵ Johnson R , Gottlieb U , Shaham G , Eisen L , Waxman J , Devons-Sberro S , et al. ClinVec: Unified Embeddings of Clinical Codes Enable Knowledge-Grounded AI in Medicine [Internet] . Health Informatics ; 2024 [cited 2026 Mar 4]. Available from: http://medrxiv.org/lookup/doi/10.1101/2024.12.03.24318322 doi: 10.1101/2024.12.03.24318322 OpenUrl Abstract / FREE Full Text 23. ↵ Mountjoy E , Schmidt EM , Carmona M , Schwartzentruber J , Peat G , Miranda A , et al. An open approach to systematically prioritize causal variants and genes at all published human GWAS trait-associated loci . Nat Genet . 2021 Nov; 53 ( 11 ): 1527 – 33 . doi: 10.1038/s41588-021-00945-5 OpenUrl CrossRef 24. ↵ Kuhn M , Letunic I , Jensen LJ , Bork P . The SIDER database of drugs and side effects . Nucleic Acids Res . 2016 Jan 4; 44 ( D1 ): D1075 – 9 . doi: 10.1093/nar/gkv1075 OpenUrl CrossRef PubMed 25. ↵ Schizophrenia Working Group of the Psychiatric Genomics Consortium , Bulik-Sullivan BK , Loh PR , Finucane HK , Ripke S , Yang J , et al. LD Score regression distinguishes confounding from polygenicity in genome-wide association studies . Nat Genet . 2015 Mar; 47 ( 3 ): 291 – 5 . doi: 10.1038/ng.3211 OpenUrl CrossRef PubMed 26. ↵ DrugBank . Naltrexone . https://go.drugbank.com/drugs/DB00704 [Internet]. Available from: https://go.drugbank.com/drugs/DB00704 27. ↵ Oświęcimska J , Szymlak A , Roczniak W , Girczys-Połedniok K , Kwiecień J . New insights into the pathogenesis and treatment of irritable bowel syndrome . Advances in Medical Sciences . 2017 Mar; 62 ( 1 ): 17 – 30 . doi: 10.1016/j.advms.2016.11.001 OpenUrl CrossRef PubMed 28. ↵ Li X , Li B , Zhang J , Chen T , Wu H , Shi X , et al. Efficacy of opioid receptor modulators in patients with irritable bowel syndrome: A systematic review and meta-analysis . Medicine . 2021 Jan 29; 100 ( 4 ): e24361 . doi: 10.1097/md.0000000000024361 OpenUrl CrossRef PubMed 29. Breslin HJ , Diamond CJ , Kavash RW , Cai C , Dyatkin AB , Miskowski TA , et al. Identification of a dual δ OR antagonist/μ OR agonist as a potential therapeutic for diarrhea-predominant Irritable Bowel Syndrome (IBS-d) . Bioorganic & Medicinal Chemistry Letters . 2012 Jul; 22 ( 14 ): 4869 – 72 . doi: 10.1016/j.bmcl.2012.05.042 OpenUrl CrossRef PubMed Web of Science 30. ↵ Kariv R , Tiomny E , Grenshpon R , Dekel R , Waisman G , Ringel Y , et al. Low-Dose Naltreoxone for the Treatment of Irritable Bowel Syndrome: A Pilot Study . Dig Dis Sci . 2006 Dec; 51 ( 12 ): 2128 – 33 . doi: 10.1007/s10620-006-9289-8 OpenUrl CrossRef PubMed 31. ↵ DrugBank . Anastrozole . https://go.drugbank.com/drugs/DB01217 [Internet]. DrugBank . Available from: https://go.drugbank.com/drugs/DB01217 32. ↵ Cauley JA . Estrogen and bone health in men and women . Steroids . 2015 Jul; 99 : 11 – 5 . doi: 10.1016/j.steroids.2014.12.010 OpenUrl CrossRef PubMed 33. Manolagas SC , O’Brien CA , Almeida M . The role of estrogen and androgen receptors in bone health and disease . Nat Rev Endocrinol . 2013 Dec; 9 ( 12 ): 699 – 712 . doi: 10.1038/nrendo.2013.179 OpenUrl CrossRef PubMed 34. ↵ Usselman CW , Stachenfeld NS , Bender JR . The molecular actions of oestrogen in the regulation of vascular health . Experimental Physiology . 2016 Mar; 101 ( 3 ): 356 – 61 . doi: 10.1113/EP085148 OpenUrl CrossRef PubMed 35. ↵ Wu Y , Guo X , Jiang A , Bai J , Nie X . Estrogen regulates duodenal calcium absorption and improves postmenopausal osteoporosis by the effect of ERβ on PMCA1b . Sci Rep . 2025 May 8; 15 ( 1 ): 16053 . doi: 10.1038/s41598-025-00605-2 OpenUrl CrossRef PubMed 36. Nie X , Xie R , Tuo B . Effects of Estrogen on the Gastrointestinal Tract . Dig Dis Sci . 2018 Mar; 63 ( 3 ): 583 – 96 . doi: 10.1007/s10620-018-4939-1 OpenUrl CrossRef PubMed 37. ↵ Chen C , Gong X , Yang X , Shang X , Du Q , Liao Q , et al. The roles of estrogen and estrogen receptors in gastrointestinal disease (Review) . Oncol Lett . 2019 Oct 11. doi: 10.3892/ol.2019.10983 OpenUrl CrossRef PubMed 38. ↵ DrugBank . Mercaptopurine . https://go.drugbank.com/drugs/DB01033 [Internet]. Available from: https://go.drugbank.com/drugs/DB01033 39. ↵ De Leeuw CA , Mooij JM , Heskes T , Posthuma D . MAGMA: Generalized Gene-Set Analysis of GWAS Data. Tang H, editor . PLoS Comput Biol . 2015 Apr 17; 11 ( 4 ): e1004219 . doi: 10.1371/journal.pcbi.1004219 OpenUrl CrossRef PubMed 40. ↵ Barbeira AN , Pividori M , Zheng J , Wheeler HE , Nicolae DL , Im HK . Integrating predicted transcriptome from multiple tissues improves association detection. Plagnol V, editor . PLoS Genet . 2019 Jan 22; 15 ( 1 ): e1007889 . doi: 10.1371/journal.pgen.1007889 OpenUrl CrossRef PubMed 41. ↵ Habib M , Lalagkas PN , Melamed RD . Mapping drug biology to disease genetics to discover drug impacts on the human phenome . Gromiha M, editor. Bioinformatics Advances . 2024 Jan 5; 4 ( 1 ): vbae038 . doi: 10.1093/bioadv/vbae038 OpenUrl CrossRef 42. ↵ Tasneem A , Aberle L , Ananth H , Chakraborty S , Chiswell K , McCourt BJ , et al. The Database for Aggregate Analysis of https://ClinicalTrials.gov (AACT) and Subsequent Regrouping by Clinical Specialty . PLoS ONE . 2012 Mar 16; 7 ( 3 ): e33677 . doi: 10.1371/journal.pone.0033677 OpenUrl CrossRef PubMed 43. ↵ Tatonetti NP , Ye PP , Daneshjou R , Altman RB . Data-Driven Prediction of Drug Effects and Interactions . Sci Transl Med . 2012 Mar 14; 4 ( 125 ). doi: 10.1126/scitranslmed.3003377 OpenUrl Abstract / FREE Full Text View the discussion thread. Back to top Previous Next Posted May 02, 2026. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Genetic similarity among 178 disease phenotypes predicts therapeutic and side effects for 1,711 drugs Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Genetic similarity among 178 disease phenotypes predicts therapeutic and side effects for 1,711 drugs Panagiotis N. Lalagkas , Rachel D. Melamed medRxiv 2025.05.13.25327511; doi: https://doi.org/10.1101/2025.05.13.25327511 Share This Article: Copy Citation Tools Genetic similarity among 178 disease phenotypes predicts therapeutic and side effects for 1,711 drugs Panagiotis N. Lalagkas , Rachel D. Melamed medRxiv 2025.05.13.25327511; doi: https://doi.org/10.1101/2025.05.13.25327511 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4536) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (541) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00ab13bbe85dfa9',t:'MTc3OTYwODg0Ng=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-23T02:00:01.238055+00:00

License: CC-BY-4.0