Full text
35,552 characters
· extracted from
preprint-html
· click to expand
Comparison of Software for Prediction of Fraction Absorbed and Unbound in Humans | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Comparison of Software for Prediction of Fraction Absorbed and Unbound in Humans View ORCID Profile Urban Fagerholm doi: https://doi.org/10.1101/2025.09.07.674727 Urban Fagerholm 1 Prosilico AB , Lännavägen 7, SE-141 45 Huddinge, Sweden Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Urban Fagerholm For correspondence: urban.fagerholm{at}prosilico.com Abstract Full Text Info/History Metrics Preview PDF ABSTRACT The main objective of this study was to evaluate and compare the performance of 5 PK software, ANDROMEDA by Prosilico 2.0, and 4 free, web-based prediction tools, PkCSM, Swiss ADME, ADMETlab 3.0 and DruMAP 2.0, for predictions of fraction absorbed (f a ) and unbound (f u ) in humans. Sets with compounds with available and undisclosed estimates were selected (n=140). The risk that test compounds have been used in training sets for model building (and thereby, influenced and exaggerated the predictive performances) was minimized. At least, these were not included in training sets for ANDROMEDA. One set consisted of compounds that have not been marketed and for which pharmacokinetic information has not been publically disclosed. Quantitative and qualitative evaluations and comparisons were done. For both f a and f u , ANDROMEDA was clearly more accurate and balanced than the others, with higher Q 2 (0.69 vs 0.35 for f a ; 0.94 vs 0.62-0.76 for f u ), lower mean errors (15 % vs 28 %; 2.3- vs 4.1- to 36-fold), lower maximum errors (54 % vs 92 %; 10- vs 30- to 524-fold), more correct predicted classes (70-77 % vs 13-54 %), no failed, inconclusive or poor predictions (as found for 3 of the other software), wider application range, and minimal skewness at low values. It had intercepts on f a - and f u -prediction axes that were ca 1/3 and 1/175 to 1/23 compared to those found for the other software, which is of particular importance. Two software, PkCSM and Swiss ADME, were considered inappropriate, whereas ADMETlab took an intermediate performance position. Apparently, DruMAP was second best performing software. Overall, there was poor performance overlap between the software (7-24 %), with many contradictory predictions. Advantages with ANDROMEDA suggest that this is the software of choice for those that desire adequate predictions of f a and f u in humans and estimates of certainty. The findings are of particular interest for the 3R-process. I ntroduction Several software for prediction of human pharmacokinetics (PK) are available. These include free, web-based products/services. The applicability of such software depends on factors such as coverage and clinical validity of parameters, compound and estimate ranges, prediction accuracy and precision, and security. Software of choice is also based on desired quality standards. A major obstacle for external validations and performance evaluations of prediction software is lack of transparency and user-knowledge of which compounds that have been used in training sets for model building. If test compounds in evaluations already have been used in training sets, prediction results will be misleading and exaggerated. Other obstacles include few published and peer-reviewed cross-validations and validations using hold-out sets, and limited disclosure of performance statistics. For example, a Q 2 -value could be good/high, and indicate that a model is robust and well-performing, despite skewness and large errors. Major errors could be hidden by the use of RMSE (instead of mean and maximum errors), and the use of % of compounds with <2-fold error could be a way to hide poor performance of a model with estimates ranging from 0 to 100 %. For example, a default value of 40 % would result in a large portion of prediction errors of maximally 2- to 3-fold. Lack of clinical validity, low accuracy and skewness are highlighted issue with some in silico prediction models and software [ 1 ]. Since solubility (S) is poorly correlated to in vivo dissolution potential (R 2 =0.13) and seems to exaggerate absorption limitations in humans, and intrinsic metabolic clearance (CL int ) measured with hepatocytes and microsomes correlates poorly with corresponding in vivo parameter (R 2 =0.1-0.4) and has relatively narrow range (LOQ of the same magnitude as median CL int for marketed drugs), in vitro data based models for these have limited clinical validity and applicability (Q 2 approximated to ca 0.05-0.15) [ 1 ]. They appear more suited for developing drugs and optimizing compounds for an in vitro human. Fraction absorbed (f a ) and fraction unbound in plasma (f u ) are essential parameters for prediction of overall PK, exposure profiles and doses. Prediction models for f a and f u often show moderately high Q 2 , but are typically skewed, with extensive overprediction at low values. An example for f a is the in silico model by Suenderhauf et al. [ 2 ], with a R 2 of 0.6 and RMSE of 26 %, but an intercept on the predicted f a -axis (at 0 % observed f a ) of ca 50 % f a . Many poorly absorbed compounds (f a <1-2 %) were predicted to have f a of 80-100 %. Thus, this model (and also others) seems applicable for highly permeable compounds only/mainly (which cannot be evaluated and confirmed by the model). It was approximated that the software ADMET Predictor (by Simulations Plus Inc.) has a Q 2 of ca 0.25 for f a [ 1 ], and with no apparent ability to distinguish between low to moderately high f a . On the contrary, Rule of 5 underestimates the absorption potential to a great extent (ca 60 % false negatives and ca 40 % average f a for a selection of rule-violent compounds [ 3 ]. Based on these findings and various systematic errors it is difficult to predict f a in humans with high accuracy using in silico models, unless a compound is highly permeable and soluble. A large extent (ca 1/3 to 1/2) of modern small compounds has limited permeation and dissolution (incomplete gastrointestinal uptake in humans), and this makes accurate predictions of f a difficult and decision-making uncertain and problematic. Skewness has also been shown for in silico -prediction models and software for f u . Yun et al. [ 4 ] used 3 different in silico f u -models/software, ADMET Predictor and models by Watanabe et al. and Ingle et al., and found Q 2 (or R 2 ; unknown how many test compounds that were already used in training sets) of ca 0.4-0.5 and intercepts of ca 4-7 % predicted f u at 1 % observed f u (apparently much greater overpredictions at f u <1 %). Many low f u compounds (f u 50 %. There were also underprediction trends at moderate to high f u , with average predicted f u of ca 45-60 % at observed f u of 100 %. Some compounds that do not bind to plasma proteins were predicted to be more than 97 % bound. Thus, the applicability domain of these models are compounds with moderate f u . A significant portion (ca 1/3 in 2021) of modern small drugs have f u of maximally 2 % [ 5 ]. Skewed and uncertain f u -prediction models are anticipated to jeopardize PK-prediction of many drugs under development. Ultimately, human PK prediction models and software are based on reliable and valid clinical (and/or preclinical) data and produce results with overall high accuracy, wide range and with minimal skewness. We have previously shown that it is possible to (compared to in vitro data-based in silico models) enhance the accuracy and coverage/range by approximately 3-fold with our software ANDROMEDA by Prosilico [ 1 , 5 ]). This software (available as SaaS and OnPrem; mainly for compounds with 150 to 750 g/mol molecular weight) is based on a large, unique human clinical PK-data base, machine learning and conformal prediction (CP) methodology. With this approach, valid confidence intervals are produced for each predicted numerical estimate. This study was undertaken with the main aim to evaluate and compare the performance of 5 PK software (ANDROMEDA 2.0, and 4 free, web-based prediction tools; PkCSM, Swiss ADME, ADMETlab 3.0 and DruMAP 2.0) for predictions of f a and f u in humans. Compounds and data sets were selected in order to minimize the risk that test compounds have been used in training sets for model building (and thereby, influenced and exaggerated the predictive performances). M ethods The following software were used and compared for prediction of human f a and f u for sets of selected small compounds: - ANDROMEDA by Prosilico 2.0 ([ 6 ] https://prosilico.com/andromeda/ ) – Only accessible by Prosilico and customers. Predicts f a (numerical; considering passive permeability, efflux and dissolution), f u , steady-state volume of distribution (V ss ), hepatic clearance (CL H ), intrinsic hepatic metabolic CL (CL int ), renal CL (CL R ) biliary CL (CL bile ) and total CL, MDR-1, BCRP, MRP2, oral bioavailability (F), half-life (t ½ ), and many more parameters. Based on machine learning and conformal prediction (CP) methodology, which gives valid confidence in predictions (including confidence intervals for each predicted numerical estimate). Mainly applicable for compounds with MW 150 to 700 g/mol and for non-saturated conditions. Not applicable for metals and quaternary amines, and has limited use for hydrolysis sensitive compounds and covalently bound drugs. - PkCSM ([ 7 ] https://biosig.lab.uq.edu.au/pkcsm/prediction ) – Predicts f a (numerical), f u , aqueous solubility, V ss , CL and MDR-1, but not parameters such as CL H , CL R , CL bile , F and t ½ . - SwissADME ([ 8 ] http://www.swissadme.ch/ ) – Predicts S (insoluble to highly soluble), good or poor f a (without defining how many % that is considered good) and MDR-1, but not parameters such as f u , V ss , CL H , CL R , CL bile , CL, F and t ½ . - ADMETlab 3.0 ([ 9 ] https://admetlab3.scbdd.com/ ) – Predicts f a (> or < 30 %), f u , V ss , in vitro CL int , MDR-1, CL and t ½ . but not but not parameters such as CL H , CL R and CL bile . - DruMAP 2.0 ([ 10 ] https://drumap.nibiohn.go.jp/ ) – Predicts f a (low 70 %), f u , MDR-1, S, in vitro CL int , CL R and f e -class, but not parameters such as CL H , CL bile , CL, V ss , F and t ½ . The following data sets were selected and used: A f a -data set with 44 compounds with varying permeability and dissolution (from very low to very high; including 9 compounds with f a between ca 0 and 10 %, 16 compounds with f a <30 %, 15 compounds with limited dissolved fraction, and two apparent in vivo BCS IV-compounds), used for internal validation of ANDROMEDA by Prosilico (thus, not included in ANDROMEDA training sets) ( Supplementary Table 1 ). Data were collected from the literature and internet. A f u -data set with 54 compounds with very low f u to 100 % f u , used for internal validation of ANDROMEDA by Prosilico (thus, not included in ANDROMEDA training sets) ( Supplementary Table 2 ). Data were collected from the literature and internet. A data set of 42 compounds (a majority not approved as medicines) for which human PK (apparently/probably) have not yet been disclosed and published ( Supplementary Table 3 ; selected based on compounds highlighted by Drug Hunter). This reduces the risk of exaggerated prediction performance of the evaluated software. View this table: View inline View popup Suppmentary Table 1. 44 compounds from Prosilico’s internal validation data set for f a . View this table: View inline View popup Suppmentary Table 2. 54 compounds from Prosilico’s internal validation data set for f u . View this table: View inline View popup Download powerpoint Suppmentary Table 3. 42 compounds without published f a and f u -values. Both quantitative (numerical) and qualitative (classes) evaluations and comparisons were done. For sets a and b, predicted results were compared to observed/measured estimates, which enabled evaluation of both actual and relative performances. For set c, for which measurements were unavailable, the relative performances of the software were evaluated. The following classes and limits were selected: f a – low (<30 %; 80 %:>0.8), f u – very low (<1 %; 60 %; >0.6). In cases where values were below a certain level, 70 % of that was used for calculations of errors. For example, 0.007 was used for a compound with f u <0.01. R esults Fraction absorbed (f a ) The f a -results are shown in Table 1 , Figure 1 and Supplementary Tables 4 and 5 . View this table: View inline View popup Download powerpoint Supplementary Table 4. Predicted vs observed f a (numerical and classifications) for the Prosilico validation data set using the 5 software. f a 0.8 (green). View this table: View inline View popup Download powerpoint Supplementary Table 5. Predicted f a (numerical and classifications) for the data set without available and published f a -values using the 5 software. f a 0.8 (green). View this table: View inline View popup Download powerpoint Table 1. Prediction results for f a . Download figure Open in new tab Figure 1. Observed f a vs predicted f a for the Prosilico test validation data set using ANDROMEDA and PkCSM (n=44). Qualitative (numerical) predictions – Prosilico test validation data set Q 2 -values (predicted vs observed f a ) for ANDROMEDA and PkCSM were 0.69 and 0.35, respectively ( Figure 1 ). Corresponding mean (median, maximum) absolute prediction errors were 15 (11, 54) % and 28 (17, 92) %, respectively. When using a default value of 50 % f a , mean, median and maximum predictions, absolute errors were 31, 34 and 50 %, respectively. Intercepts on the y-axis (predicted f a ) were 13 and 44 %, respectively. For all 8 compounds with f a close to zero (0-1 %), ANDROMEDA predicted zero f a , showing that it can predict poor absorption. PkCSM predicted zero uptake for 5 of them, but also 18 and 92 % f a for two and failed prediction for one. For a compound with molecular weight below the domain for ANDROMEDA (AB13), f a was underpredicted (1.5 vs 15-18 %). Quantitative (classifications) predictions – Prosilico test validation data set 77 % correct predicted classes was reached with ANDROMEDA (low/moderate/high classes predicted). With PkCSM (low/moderate/high classes predicted) 45 % correct class predictions and 1 failure were found. Swiss ADME predicted classes of f a and S, and for 32 % of compounds high f a was predicted for insoluble or poorly soluble compounds. Thus, there was a high degree of inconsistency/uncertainty in these predictions. There were also 5 failed predictions with this software. At least 23 %, and maximally 34 %, of the class predictions were correct with Swiss ADME. ADMETlab predicts ><30 % f a , and 16-84 % (not possible to distinguish between moderate and high f a ; can be approximated to average ca 50 %) of class predictions were correct. 3 of the 8 compounds with f a =0-1 % were predicted to have at least 30 % uptake. DruMAP predicted low f a (<20 %) for these 8, but was unable to distinguish between zero and 19 % f a . With this software, 64 % of the predicted classes were correct and failed predictions were found for 1 compound. For ANDROMEDA and DruMAP there were no predictions of low f a for compounds with high f a , or vice versa. For PkCSM and Swiss ADME this occurred for 9 % and 8-24 % of predictions, respectively. Overall, the 5 software predicted the same f a -class for 9 % of compounds only. Quantitative (classifications) predictions – Data set without available observed f a -data Median predicted f a for ANDROMEDA and PkCSM were 66 (moderate) and 86 (high) %, respectively. Median predicted f a -class for Swiss ADME, ADMETlab and DruMAP were low/inconclusive, moderate/high (>30%) and moderate, respectively. For 24 % of compounds, the software predicted the same f a -class. 80 % of ANDROMEDA predictions were 35-78 % f a . Only one compound was predicted to have low f a (28 %). PkCSM also predicted low f a for one compound only (8 %). ADMETlab predicted >30 % f a for all compounds. For DruMAP, 14 % of compounds were predicted to have low f a (<20 %). Swiss ADME generated more low f a -predictions, 38 % (potentially up to 71%, when considering the 33 % inconsistent/uncertain predictions). Fraction unbound (f u ) The f u -results are shown in Table 2 , Figures 2 and 3 , and Supplementary Tables 6 and 7 . View this table: View inline View popup Download powerpoint Supplementary Table 6. Predicted vs observed f u (numerical and classifications) for the Prosilico validation data set using 4 of the 5 software. f u 0.6 (blue); f u >0.6 (green). View this table: View inline View popup Download powerpoint Supplementary Table 7. Predicted f u (numerical and classifications) for the data set without available and published f u -values using 4 of the 5 software. f u 0.6 (blue); f u >0.6 (green). View this table: View inline View popup Download powerpoint Table 2. Prediction results for f u . Download figure Open in new tab Figure 2. Observed f u vs predicted f u (left; full scale, right; 0.05-0.15 scale; below; logarithmic scale) for the Prosilico test validation data set using (n=54). Download figure Open in new tab Figure 3. ANDROMEDA predicted f u vs predicted f u (left; full scale, right 0.05-0.15-scale) using PkCSM, ADMETlab 3.0 and DruMAP for all compounds (n=96). Qualitative (numerical) predictions – Prosilico test validation data set Q 2 -values (predicted vs observed f u ) for ANDROMEDA, PkCSM, ADMETlab and DruMAP were 0.94, 0.62, 0.75 and 0.76, respectively ( Figure 2 ). Corresponding mean (median, maximum) prediction errors were 2.3- (1.5-, 10-), 36- (4.9-, 524-), 4.3- (2.0-, 37-) and 4.1- (2.0-, 30-) fold, respectively. Intercepts on the y-axis (predicted f u ) for the 4 software were 0.08, 14, 2.5 and 1.8 %, respectively. R 2 -values for predictions with ANDROMEDA vs PkCSM, ADMETlab and DruMAP (all 96 compounds) were 0.63, 0.70 and 0.67, respectively ( Figure 3 ). Corresponding intercepts on the y-axis (predicted f u ) were 15, 2.0 and 1.5 %, respectively. PkCSM, ADMETlab and DruMAP also produced lower predictions than ANDROMEDA for compounds with higher f u , which was demonstrated by average predicted f u of ca 65-80 % at 100 % with ANDROMEDA, respectively. Quantitative (classifications) predictions – Prosilico test validation data set Correct class was predicted for 70, 13, 54 and 48 % of compounds with ANDROMEDA, PkCSM, Swiss ADME and DruMAP, respectively. Corresponding numbers for large class prediction errors (≥3 classes difference between predicted and observed f u ) were 0, 20 (and 15 % failed predictions), 6 and 2 %, respectively. The prediction overlap between software was 7 % (21 % for both data sets combined and when dividing f u into 3 classes; 50 %). Quantitative (classifications) predictions – Data set without available observed f u -data For the data set without available observed f u -data there was 10 % prediction overlap between software. Median predicted f u for ANDROMEDA, PkCSM, Swiss ADME and DruMAP were 6.9, 20, 4.0 and 3.6, respectively. PkCSM had 7 % failed predictions. Overall (using both data sets), ANDROMEDA was the software with the highest percentage of predictions of lowest and highest f u -classes, 35 %, followed by DruMAP (26 %), ADMETlab (25 %) and PkCSM (4 %). D iscussion Five software were evaluated with regards to the ability to predict f a and f u for 140 test compounds in humans, and compared. Selected test compounds (including many with challenging absorption characteristics and very low f u ) were not included in training sets for ANDROMEDA. It was not possible to explore whether this was also the case for the other software, but one can assumed that at least parts of selected test compounds have been used in training sets for these. In part, test compounds were selected in order to minimize this risk. For both f a and f u , ANDROMEDA was clearly more accurate than the other investigated software, with higher Q 2 (0.69 vs 0.35 and 0.94 vs 0.62-0.76), lower mean errors (15 % vs 28 % and 2.3- vs 4.1- to 36-fold), lower maximum errors (54 % vs 92 % and 10- vs 30- to 524-fold), more correct predicted classes (70-77 % vs 13-54 %), no failed, inconclusive or poor predictions (as found for 3 of the other software), wider application range, and minimal skewness at low values. It had intercepts on f a - and f u -prediction axes that were ca 1/3 and 1/175 to 1/23 compared to those found for the other software. It correctly predicted the f a of the 8 compounds with poorest absorption, and shows 1 % mean absolute prediction error for all new compounds with 0-5 % f a added to the upcoming software version (ANDROMEDA 3.0). The performance was in agreement with previous studies. For example, corresponding Q 2 , mean error and intercept obtained in a study with 136 compounds, including 59 that violate the Rule of 5, were 0.72, 14 % and 13 %, respectively [ 3 ]. Somewhat higher f a (ca 0.8) and lower intercept (6 %) have been found in other validation studies with ANDROMEDA. ANDROMEDA 3.0 has 6 % better performance (higher Q 2 , lower mean error and more narrow confidence intervals) than version 2.0 for the prediction of both f a and f u in humans, and has ca ¼ lower f u -intercept. which further increases the lead. Another advantage with this software is the production of true confidence intervals for each compound and parameter estimate. Advantages with ANDROMEDA suggest that this is the software of choice for those that desire adequate predictions of f a and f u in humans and estimates of certainty. Reasons to the better performance of ANDROMEDA are believed to include its validated prediction models for passive permeability-based uptake, efflux and fraction dissolved, numerical scale, unique algorithms and large amount of biopharmaceutical data used for model building. Poorest overall performance was found for PkCSM , with large overpredictions (up to 524-fold; also for its V ss -model) and many failed predictions. It predicted near complete to complete absorption for 11 compounds with 0 to 30 % uptake, and reached a mean prediction error for f a that was of same magnitude as that obtained when assuming 50 % f a for all compounds. Thus, with this software there is high risk to overpredict absorption and CL, be misled in the predictions of bioavailability and t ½ , and make inadequate decisions. Swiss ADME was associated with many failed and inconclusive/contradictory predictions for f a , and was not applicable for f u -predictions. It produced even weaker f a -predictions than PkCSM, with high portions of inconclusive, low and failed predictions, and the lowest percentage of correct class predictions, only 23-34 %. A significant proportion of compounds were predicted to have both high f a and poor solubility or insolubility, and this creates confusion and uncertainty. Two compounds were predicted to be insoluble with Swiss ADME, but to have high f a according to predictions with all 5 software and confirmed high in vivo f a . It was, however, relatively accurate in predicting low f a for 10 of 18 poorly absorbed compounds. For the remaining 8 it predicted high f a . The results speak against Swiss ADME as an accurate, consistent and trustworthy software for prediction of f a in humans. ADMETlab took a middle position performance-wise, and is limited by its 2.5 % f u -intercept (overprediction potential of highly bound compounds; see below), case with poor prediction (37-fold underprediction of f u for one compound), and dichotomous f a -model (> or < 30 %; without ability to distinguish between very poor and moderately low f a and between moderately high to very high f a ). A major limitation with ADMETlab is the extensive underprediction trend for t ½ , with predicted t ½ ranging from ca 0.2 to ca 2 h (a majority <1 h) for a large set of compounds with adequate t ½ in humans. Apparently, DruMAP was second best performing software. Drawbacks with this software include the 3-model for f a (low, moderate & high; without ability to distinguish between very poor and moderately low f a and between moderately high to very high f a ) and cases with poor f a - and f u - predictions (up to 30-fold error). It is not known whether this was due to comparably large number of training data set compounds among test compounds. ADMET Predictor was not included in this evaluation since it is not available as a freeware on the web. It has previously been shown to have an intercept of ca 6 % for f u -predictions (implying risk of extensive overpredictions for highly bound compounds) (Yun). Published f a -prediction results for ADMET Predictor are limited. The intermediate parameter used for ADMET Predictor predictions of absorption, effective human intestinal permeability (P eff ), has been shown to have a LOQ in the moderately high permeability zone (corresponding to ca 70-90 % f a ), which is a considerable limitation [ 1 ]. Intercepts on prediction axes are expected with in silico models, and the greater they are, the more impact they will have on the predictions (of for example, CL, t ½ and F) and decision-making. Many modern small drugs have low f u and limited f a [ 5 ], implying that skewed models are likely to mislead siginficantly. For a compound with 1 % f u , the general overprediction trend is 1.3-, 40-, 2.6- and 20-fold (estimated using logged values) with ANDROMEDA, PkCSM, ADMETlab and DruMAP, respectively. Corresponding estimates for a compound with a f u of 0.1 % are 1.9-. 394-, 7.0- and 194-fold, respectively. Overall, there was only 7-24 % performance overlap between the 5 software. For example, 60 % of investigated compounds were predicted to have both poor and good f a . There were cases where one software predicted very low f u and another moderate f u (with 233-fold difference) and a case where low and high f u was predicted with different software (2.4 and 79 %). This will, of course, be confusing for those who use all these for predictions of the selected parameters. The discrepancy could partly be explained by inclusion of many test compounds with complex absorption and low f a and very low f u (=difficult to predict), and partly by poor performances among tested software. With the use of only one validated software with adequate performance (such as ANDROMEDA) such a problem could be removed/reduced. For those interested in using free or purchased tools or internally developed software, and concerned about clinical accuracy, validity, trustworthiness and application domains, it is recommended to request performance results, including a broad range of statistics obtained in cross-validations and hold-out set tests relevant for humans in vivo . Software developers concerned about proprietary rights (as we are) are unlikely to reveal their training data sets and algorithms, and this is a major issue for external investigations, including this one. In this study we selected test sets that we have not used for model building, which enabled us to present true own predictions. Prediction results for the 42 compounds for which PK are not yet available (at to our knowledge) can easily be checked by developers of them and others. C onclusion A handful of software were evaluated with regards to the ability to predict human f a and f u for 140 test compounds with varying characteristics, and also compared. Test compounds were not included in training sets for ANDROMEDA. It was, however, not possible to explore whether this was also the case for the other software. For both f a and f u , ANDROMEDA was clearly more accurate and balanced than the others. The higher performance, in particular, in the lower ranges of f a and f u , are of great value and importance. Two software, PkCSM and Swiss ADME, were considered inappropriate, whereas ADMETlab took an intermediate performance position. Apparently, DruMAP was second best performing software. Overall, there was poor performance overlap between the software, with many contradictory predictions. Challenging PK-properties and software with poor performance are among explanations. Advantages with ANDROMEDA suggest that this is the software of choice for those that desire adequate predictions of f a and f u in humans. R eferences 1. ↵ Fagerholm U , Hellberg S. Human ADME/PK is lost in translation and prediction from in silico to in vitro to in vivo . bioRxiv , Feb 2025 . 2. ↵ Suenderhauf , C , Hammann F , Maunz A , Helma C , Huwyler J. 2011 . Combinatorial QSAR modeling of human intestinal absorption . Mol. Pharmaceut . 2011; 8 : 213 – 224 . OpenUrl CrossRef 3. ↵ Fagerholm U , Hellberg S , Alvarsson J , Ekmefjord M , Spjuth O. Comparing Lipinski’s Rule of 5 and machine learning based prediction of fraction absorbed for assessing oral absorption in humans . bioRxiv , Aug 2024 . 4. ↵ Yun YE , Tornero-Velez R , Purucker ST , Chang DT , Edginton AN . Evaluation of quantitative structure property relationship algorithms for predicting plasma protein binding in humans . Comput. Toxicol . 2021 ; 17 : 100142 . OpenUrl CrossRef PubMed 5. ↵ Fagerholm U , Hellberg S , Alvarsson J , Spjuth O. In silico prediction of human clinical pharmacokinetics with ANDROMEDA by Prosilico: Predictions for an established benchmarking data set, a modern small drug data set, and a comparison with laboratory methods. Altern . Lab. Anim . 2023 ; 51 : 39 – 54 . OpenUrl 6. ↵ ANDROMEDA by Prosilico 2.0 https://prosilico.com/andromeda/ . 7. ↵ PkCSM https://biosig.lab.uq.edu.au/pkcsm/prediction . 8. ↵ SwissADME http://www.swissadme.ch/ . 9. ↵ ADMETlab 3.0 https://admetlab3.scbdd.com/ . 10. ↵ DruMAP 2.0 https://drumap.nibiohn.go.jp/ . View the discussion thread. Back to top Previous Next Posted September 16, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Comparison of Software for Prediction of Fraction Absorbed and Unbound in Humans Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Comparison of Software for Prediction of Fraction Absorbed and Unbound in Humans Urban Fagerholm bioRxiv 2025.09.07.674727; doi: https://doi.org/10.1101/2025.09.07.674727 Share This Article: Copy Citation Tools Comparison of Software for Prediction of Fraction Absorbed and Unbound in Humans Urban Fagerholm bioRxiv 2025.09.07.674727; doi: https://doi.org/10.1101/2025.09.07.674727 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Pharmacology and Toxicology Subject Areas All Articles Animal Behavior and Cognition (7618) Biochemistry (17635) Bioengineering (13859) Bioinformatics (41846) Biophysics (21401) Cancer Biology (18534) Cell Biology (25422) Clinical Trials (138) Developmental Biology (13352) Ecology (19860) Epidemiology (2067) Evolutionary Biology (24285) Genetics (15582) Genomics (22463) Immunology (17700) Microbiology (40298) Molecular Biology (17141) Neuroscience (88424) Paleontology (666) Pathology (2825) Pharmacology and Toxicology (4813) Physiology (7633) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4284) Systems Biology (9808) Zoology (2267)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.