Bayesian Ordered Lattice Design For Phase I Clinical Trials

doi:10.1101/2025.03.23.25324471

Bayesian Ordered Lattice Design For Phase I Clinical Trials

2025 · doi:10.1101/2025.03.23.25324471

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 70,160 characters · extracted from preprint-html · click to expand

Bayesian Ordered Lattice Design For Phase I Clinical Trials | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Bayesian Ordered Lattice Design For Phase I Clinical Trials Gi-Ming Wang , Curtis Tatsuoka doi: https://doi.org/10.1101/2025.03.23.25324471 Gi-Ming Wang 1 Case Comprehensive Cancer Center, Case Western Reserve University. Find this author on Google Scholar Find this author on PubMed Search for this author on this site Curtis Tatsuoka 2 Department of Epidemiology and Public Health, Division of Biostatistics and Bioinformatics, University of Maryland-Baltimore Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: ctatsuoka{at}som.umaryland.edu Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract We develop a new framework specifically for early Phase I clinical trials called Bayesian Ordered Lattice Design (BOLD). This study is motivated by two key factors. First, Phase I clinical trials typically involve relatively small sample sizes, which can make the use of prior information on dose-limiting toxicity (DLT) rates highly significant. To address this challenge, the proposed Bayesian methodology incorporates prior information and posterior updating to guide dose selection, toxicity monitoring, early stopping and identification of the maximum tolerable dose (MTD). Second, a natural ordering among toxicity probabilities across different dose levels can be utilized, with the idea being that analysis of dose-level posterior probabilities can and should acquire insights from data obtained at other dose levels, by leveraging their order relationship. Our proposed approach employs straightforward dose-level Bayesian specifications and relies on intuitive and clinically interpretable DLT rate posterior probabilities for decision making. Importantly, we show that it can often outperform popular methods, in terms of accuracy in determining the MTD. This Bayesian approach is also computationally simple and avoids simulation. 1. Introduction Phase I clinical trials are designed to determine the maximum tolerable dose (MTD), which is the dose that has a probability of dose-limiting toxicity (DLT) closest to a predetermined target rate. 1 Designs are comprised of two primary elements. The first element involves a set of criteria for making decisions regarding the allocation of dose level to each participant. The second element entails methods for determining when to stop and the identification of an MTD at the end of the trial. 2 The process of identifying the appropriate dose for targeted therapies presents several challenges, including the need to enhance the accuracy of dose-finding efforts in order to reduce both human and financial costs, as well as to shorten development timelines. 3 , 4 From both practical and ethical perspectives, it is crucial to minimize decision-making errors and reduce as possible the risk of exposing patients to sub-therapeutic or excessively toxic doses. 1 A variety of Phase I trial designs have been established to determine MTD for single-agent studies. These designs can be categorized into three distinct types based on their statistical principles and methods of execution: algorithm-based, model-based, and model-assisted designs. 5 The traditional 3+3 design is the most widely utilized algorithm-based approach in clinical trials, employing a straightforward set of predetermined rules to guide dose escalation and de-escalation. 6 This design continues to be a predominant choice for Phase I trials, primarily due to its ease of use. 7 Nevertheless, the 3+3 design has faced significant criticism for its inadequate operating characteristics, 8 and its limited effectiveness in accurately identifying MTD and estimating DLT rates. Its heightened likelihood of administering sub-therapeutic doses to patients has also been noted. 9 Model-based designs utilize a statistical model, such as a logistic model, to characterize the dose-toxicity relationship and facilitate dose adjustments. 8 A prominent example of this design is the Continuous Reassessment Method (CRM). 8 As new data is collected, the CRM method iteratively refines the model estimates after each cohort, aiding in the determination of appropriate dose selection. 9 Compared to the traditional 3+3 design, CRM demonstrates superior accuracy in identifying and assigning a greater number of patients to MTD. 10 However, it is important to note that the implementation of CRM can involve inherent statistical and computational complexities, such as in the estimation of the model. 9 Model-assisted designs were created to integrate the benefits of both algorithm-based and model-based designs. Like model-based designs, model-assisted designs utilize a statistical model (e.g., the binomial model to estimate DLT rate) to aid in formulating decision rules. In a manner akin to algorithm-based designs, model-assisted designs can outline the rules for dose escalation and de-escalation, thereby simplifying implementation. 6 Common examples of model-assisted designs include TPI (toxicity probability interval design), mTPI (modified toxicity probability interval design), 11 Keyboard design (specifically, mTPI-2), 12 and Bayesian Optimal Interval Design (BOIN). 7 A notable characteristic of BOIN, one of the designs with which we will make a comparison, is that empirical proportions serve as the basis for dose selection. Its boundaries for escalation/de-escalation are derived from a Bayesian 3-class problem posed at each dose level, but Bayesian posteriors for the specific DLT rate values play no explicit role in dose selection. Instead, boundaries for the empirical proportion values determine dose selection decisions. We propose a Bayesian framework aiming to enhance the accuracy in determining the MTD, a challenging issue in dose escalation trials. The primary motivation behind the proposed design is that Phase I clinical trials typically involve relatively small sample sizes, making prior information regarding dose toxicity highly significant. In clinical trials, the majority of conventional (frequentist) statistical techniques typically incorporate data from prior studies solely during the design phase, rather than as integral to the formal analysis. 13 By utilizing prior information in a Bayesian framework, the accuracy of dose selection can be enhanced. Furthermore, since a natural ordering of toxicity rates emerges across different dose levels, this can be recognized and utilized to enhance the precision of determining MTD. The underlying premise is that higher dose levels are anticipated to have higher DLT rates. 14 The associated order constraints may be explicitly recognized through the posterior probabilities related to DLT rates across different doses. 15 This linkage of posterior probabilities to reflect the natural order among dose DLT values allows for “learning” from response data across dose levels. The objective of this research is to create a Bayesian Ordered Lattice Design (BOLD) that integrates prior knowledge and ordering constraints with empirical toxicity data for the purposes of adaptive dose selection, early stopping and classification of MTD in Phase I clinical trials. For the proposed BOLD, the incorporation of even weakly informative prior knowledge regarding dose toxicity in an order-constrained Bayesian manner can improve accuracy in scenarios where sample sizes are small. The term “lattice” originates from the idea that the dose levels lie within a formal order structure. Dose levels in a single-drug trial can be perceived as a linearly ordered lattice. Web Figure 1 of the Supplementary Material illustrates the lattice diagram featuring five dose levels, which also includes top and bottom states. The bottom state signifies that an MTD is lower than the minimum dose. The top state signifies that the MTD is higher than the maximum dose. Classification involves selecting a state within the lattice which indicates the dose level believed to be the MTD. Related work on Bayesian sequential classification on lattice models includes Tatsuoka and Ferguson (2003), 16 Tatsuoka (2014), 17 and Tatsuoka, Chen and Lu (2022). 18 2. Methodology Notation and Terminology Suppose J > 1 dose levels. Denote DLT rate for dose j as π j , 1 ≤ j ≤ J . We assume the natural order constraints π j ≤ π k if j < k . Terminology from lattice theory that we use includes the notion of a cover and anti-cover of a dose level. Note that the cover for dose j is dose j + 1 , j 1. These are the least upper bound and greatest lower bounds of dose j in the lattice. Let φ be the target DLT rate. N max is the maximum number of overall patients in the trial before stopping is invoked; is the upper limit of patients at dose j before stopping the trial is considered; n c is the number of patients per cohort at a given stage, where a stage is defined as the period of administration and observation for DLTs after a dose has been selected; n j represents the total number of patients who have received dose level j , and x j denotes the number of those patients with observed DLTs, with 0 ≤ x j ≤ n j . Toxicity thresholds for dose j are denoted by γ j , in that if the posterior probability of π j > φ exceeds γ j , then the administration for all doses k , j ≤ k , will no longer be considered. Finally, τ is a parameter reflecting a target probability value used in dose selection. Prior and conjugate posterior distribution The implementation of BOLD requires assigning dose-level prior distributions for the respective DLT rates. We employ Beta distributions, which are represented as π j ∼ Beta ( α j , β j ) for dose level j . Importantly, Beta distributions have conjugate posteriors π j | x j , n j ∼ Beta ( α j + x j , β j + n j − x j ). Bayesian updating is conducted after each stage. In prior specification, we adopt the following guiding principles: 1) It is assumed that the prior distributions of respective DLT rates reflect the order of the dose levels, such as in terms of the prior means; 2) Priors should not be excessively informative, in that each dose must have the potential to be identified as MTD, even when only limited sample sizes are available. This principle supports equipoise. Specifically, this requires that the variances of the prior DLT rates should not be too small. 3) The prior means should also be within reasonable proximity to the target rate φ . In the case of trials assuming non-informative priors, we define the parameters of the prior distribution α and β by setting the prior mean equal to the target rate, while the prior effective sample size (PESS), α + β , is suggested to match the standard cohort size at a given stage of administration. This will lead to a relatively weak prior in the sense that PESS is equal to one stage of dose administrations. It is interesting to note that if we were to assume a prior is a uniform distribution Beta (1, 1), the prior mean would be 0.5, which exceeds standard target toxicity values, although PESS = 2, which is weak. For informative priors, we will for instance specify the PESS of the dose considered most likely to be the MTD to be larger than other doses, which is equivalent to assigning a smaller prior variance. Order constraints on posterior probabilities A key idea of BOLD is to impose order constraints to “share” updated information between dose levels after each stage. Another important feature of BOLD is that inferences for dose selection, toxicity monitoring, and early stopping rely on the same statistical basis, which we refer to as the Conjugate Posterior Probability of the DLT rate being Above the Target (CPAT), expressed as CPAT j represents the posterior probability that dose j is too toxic. Note that this posterior probability value provides the most direct and up-to-date insight on the most pertinent clinical question: Is the dose’s DLT rate safe or not, relative to the target φ ? Larger values of (1) indicate that the DLT rate is more likely to be unacceptably high. Order constraints on CPAT values can be implemented after each stage using the Pool-Adjacent-Violators Algorithm (PAVA), 19 an isotonic regression method. 20 , 21 PAVA substitutes any neighboring values that violate the non-decreasing ordering with a weighted average, with the number of treated patients as a weighting factor. This ensures that the resulting estimates exhibit monotonicity. We apply PAVA in a “local” manner, in the sense that only the CPATs of the current dose and its cover and anti-cover doses are included. This minimizes the constraints to those relevant for decision-making, since we only consider escalation/de-escalation in one dose level increments. Inclusion of doses in constraints with little or no observed data is thus minimized. A corresponding constrained CPAT j value following PAVA is referred to as the PAVA-adjusted Posterior Probability of the DLT rate being Above the Target ( PPAT j ). PPAT values are the basis for dose selection. Dose selection BOLD jointly considers multiple dose levels in selection decisions based on “local” PPAT values. We restrict dose selection to either escalation, de-escalation, or maintaining the current dose. The proposed selection criterion involves identifying the dose for which the PPAT is nearest to τ among the current dose and its cover/anti-cover doses. If we denote this current set of dose levels as J ∗ , then dose j ∗ ∈ J ∗ is selected if it minimizes (2), where 0 < τ ≤ 0.5 is the PPAT threshold parameter: The default value of τ is 0.5. A PPAT value of 0.5 reflects the highest level of uncertainty as to the safety of that dose level (akin to a coin flip). Following observation from this stage, no matter the responses, we should gain improved insights one way or another as to whether the dose DLT rate is above or below the target. When there is a tie in minimizing (2), with | PPAT j − τ | = | PPAT k − τ | , j ≤ k , the decision rule is as follows: Case 1: If both PPAT values are below τ , choose dose k . Case 2: If both PPAT values exceed τ , choose j . Case 3: If PPAT j ≤ τ while PPAT k > τ , choose dose j . Toxicity control During experimentation, we will dynamically remove doses that are considered excessively toxic. Given a threshold γ j > 0, a dose j is excessively toxic if CPAT j > γ j . Note that if dose j is deemed too toxic, all doses k , j ≤ k ≤ J , are removed from further consideration. Furthermore, each dose can have its own threshold. Overdose control Note that if τ in (2) is reduced from the default value of 0.5, this discourages escalation, as it is more attractive to select dose levels that have a lower probability that the DLT rate exceeds target than 0.5; in other words, a higher probability of being “safe”. Hence, overdose control can be specified intuitively and simply by adjusting the PPAT threshold parameter τ . Stopping rule Stopping of the trial is invoked when: Case 1: The minimum dose level (i.e., dose 1) is excessively toxic, as indicated by CPAT 1 > γ 1 , suggesting that a MTD is lower than this minimum dose level. Case 2: The number of patients receiving treatment in the trial reaches N max . Case 3: The number of patients at dose j has reached and dose j is the next stage selection. MTD identification The MTD will be determined upon stopping. MTD identification relies on isotonically-regressed posterior means of DLT rates. As noted, if the lowest dose is found to be excessively toxic ( CPAT 1 > γ 1 ), all doses are considered too toxic and no MTD is selected. Otherwise, we implement PAVA “locally” on the conjugate posterior means of the final selected dose, along with its cover and/or anti-cover doses and regard these doses as the potential MTD candidates. We have found that this can improve accuracy versus considering all posterior means after isotonic regression, which involves imposing more order constraints. Candidates for MTD must also have observed trial data. Let J ∗ be the collection of candidates, and be the PAVA-adjusted posterior mean for dose k ∈ J ∗ . The dose k with smallest PAVA-adjusted posterior mean difference with the target DLT rate, is selected as MTD. If a tie occurs among , identification is as follows: Case 1: If , then dose j is selected as MTD. Case 2: If , then dose k is selected. Case 3: If , then dose j is selected. Design Inputs In sum, the following are the design inputs: 1) the target DLT rate φ ; 2) dose-level prior Beta distribution parameters for DLT rates for each π j ; 3) N max , for each dose j , and n c ; 4) toxicity thresholds γ j for each dose j ; 5) PPAT threshold parameter τ . We assume that the initial dose level being administered is the lowest one, but this can also be specified. Flowchart of BOLD The flowchart that outlines the procedures and guidelines for dose selection is presented in Figure 1 . A summarized description of the flowchart is as follows: 1) Specify the target DLT rate φ and other inputs. 2) Assess prior information, determining whether the prior distribution will be informative or non-informative, and subsequently assign the Beta prior parameters that determine prior mean and variance, as well as toxicity threshold for each dose level. 3) Initiate the trial at the lowest dose. 4) Administer the dose level to n c patients and observe for DLTs. 5) Update the CPAT values, which represent the posterior probability that a dose is overly toxic. 6) Assess toxicity control based on CPAT 1 exceeding γ 1 or not, and consider early stopping of the trial. 7) Otherwise, continue monitoring if the overall sample size ceiling, N max , is reached. 8) Consider dose elimination of the currently administered dose j along with all the higher doses ≥ j by assessing the CPAT value of the current dose using its corresponding threshold γ j , and proceed to de-escalate the dose. 9) Otherwise, implement order constraints on the updated CPAT values to obtain PPAT values locally (current and cover/anti-cover dose(s)). 10) Select the next dose to either escalate, de-escalate or maintain the current dose based on minimizing the PPAT criterion in equation (2) . 11) Monitor the current dose j for early stopping of the trial if the number of patients administered at dose j is at least and if the decision in Step 10 is to maintain the current dose. 12) Repeat another stage of experimentation from Steps 4 to 11 until the trial is stopped. 13) Once stopping occurs, determine the MTD by considering PAVA-adjusted posterior mean DLT rates. Download figure Open in new tab Figure 1: Flowchart of BOLD design for single-drug Phase I clinical trials. 3. Simulation Design and demonstration The performance of BOLD for Phase I clinical trials can be evaluated via simulations conducted with R (code available at https://github.com/hiddenmanna1996/BOLD ). In general, our default prior distribution, Beta ( α , β ), is set for all doses to have prior mean equal to the target rate φ , which reflects equipoise, and PESS (i.e. α + β ) = 3, the typical standard cohort size ( n c ), which reflects a weak prior. For φ = 0.2, this is equivalent to Beta (0.6, 2.4); when φ = 0.25, it is Beta (0.75, 2.25). The upper limit of patient accrual during a trial (i.e., N max ) is set at either 21 or 30. We focus on these smaller sample sizes since in our practice, timelines and duration are very much a concern for these trials, as well as the strong interest in moving forward to a dose expansion phase. Stopping occurs when the number of patients receiving any dose j reaches 12 (i.e. = 12) and concurrently dose j also is the next-stage dose selection decision. For BOLD, an exception is made for the lowest dose, where is set to 15 in order to enhance classification to the bottom state, which reflects that all doses are too toxic. Toxicity thresholds are set to γ j = 0.95, j > 1. For the lowest dose, γ 1 is set to 0.9, again to enhance classification to the bottom state. Note that the bottom state is only classified to indirectly, since it is not an actual dose, and its classification is conducted by the toxicity threshold being exceeded for the lowest dose. Lastly, we set PPAT threshold parameter τ = 0.5, the default value. As an illustration, assume that a trial considers 5 distinct dose levels, and the target response rate φ is set at 0.25. The MTD is assumed to be dose level 4. The prior mean DLT rate is assumed to be 0.25, the same as the target rate, for all the doses. Also assume the true DLT rates to be 0.1, 0.11, 0.12, 0.25 and 0.5 for doses 1 through 5, respectively. The updating of PPAT values after hypothetical toxicity responses and the corresponding dose selections are presented in Table 1 . Note the preservation of order among the PPAT values. The final step of MTD identification is also illustrated with the final PAVA-adjusted posterior means. View this table: View inline View popup Download powerpoint Table 1: Demonstration of dose selections for a 5-dose model with non-informative prior, target rate φ = 0.25, and maximum total sample size N max = 30, whereas the process of dose selection utilizing PPAT involves escalation (E), de-escalation (D), or maintaining the current dose (M). Comparative analysis for non-informative prior We begin a comparative analysis versus BOIN and CRM designs through simulations using non-informative priors. We first consider 5 doses, so that the possible MTD selections encompass “ 5”, where “ 5” signifies that all doses have DLT rates below the target. The simulations are carried out based on random scenarios of true DLT rates. 6 In situations when the MTD corresponds to a dose level j < 5, the difference in DLT rates between the j -th value and the ( j + 1)-th value, referred to as upper δ , is fixed at values of either 0.1, 0.15, 0.2, or 0.25. We carefully stratify DLT rate scenarios by these upper δ values, in order to carefully demarcate when performance differences between methods clearly emerge. Larger upper δ values reflect greater discriminability. Per scenario, 5 distinct DLT rates are generated and organized in ascending order within the range of (0.01, 0.7), imposing that the j -th value is the target rate φ and the ( j + 1)-th value is equal to φ + upper δ . If j > 1, the difference in DLT rate between the j -th value and the ( j − 1)-th value is restricted to be at least 0.05. Under these circumstances, the non-MTD DLT rate values are selected randomly from a uniform distribution and arranged in ascending order to correspond to the linear order among dose levels. In scenarios where all dose levels have DLT rates above MTD target, we randomly select 5 unique DLT rate values that are organized in ascending order within the range between 1.4 times the target rate and 0.7. In scenarios where all dose level rates are below MTD target, we randomly select 5 unique values organized in ascending order between 0.01 to 0.6 times the target rate. For every one of the 4 upper δ values (i.e., 0.10, 0.15, 0.20, and 0.25), a total of 50 random scenarios are generated for each of the 7 MTD levels (i.e., 5), culminating in a grand total of 1,400 random scenarios. For each random scenario, 100 simulations of response sequences are generated. This number of random scenarios per dose level and the corresponding number of simulations per scenario are the same throughout all examples. The simulations for BOIN and CRM are conducted utilizing R functions get.oc (Generate operating characteristics for single agent trials) and bcrm (Bayesian continual reassessment method for Phase I dose-escalation trials), respectively. The dose-toxicity response curve associated with the CRM design is assumed to follow a one-parameter logistic model with uniform prior on the interval (0, 10). 22 The prior probabilities of toxicity in the CRM design for each dose level (i.e., skeletons) are generated by the R function getprior() according to default specifications suggested by Lee and Cheung (2009). 23 The value of γ 1 , the toxicity threshold for dose 1, is set at 0.9 for all the three designs (BOLD, BOIN and CRM). For dose level j > 1, γ j for BOLD and BOIN is established at 0.95 (the bcrm code currently does not offer this option). The value of for all j is set to be 12 for BOLD, BOIN and CRM, except where = 15 for BOLD as previously indicated. More attention is given to the BOLD versus BOIN comparison, due to their methodological similarity. Accuracy is quantified as the proportion of trials that successfully identify the MTD, with the standard deviation of accuracy rate computed from the simulations across each of the scenarios. Efficiency is assessed by the mean and standard deviation of number of patients treated from across the scenarios. Figure 2 depicts the bar charts of accuracy by MTD levels with non-informative priors, stratified by N max and upper δ , with φ = 0.25. Bar charts illustrating efficiency (mean patient count and corresponding standard deviation) are presented in Web Figures 2 and 3 . Download figure Open in new tab Figure 2: Accuracy of BOLD, BOIN, and CRM design, represented by the average of accurate MTD identifications (%) across scenarios, for 5-dose model with non-informative prior by MTD level and stratified by N max and upper δ , assuming random scenarios of true DLT rates, at target rate φ = 0.25. The legend denotes the average and SD (in parenthesis) for all the 7 MTD levels (1st row) and for the MTD levels that are ≥ 1 (2nd row). In these figures, averages are reported based on all 7 scenarios, as well as for all scenarios except the case when all DLT rates exceed the target (the bottom state). The latter select set of scenarios would be of interest if an investigator was confident that at least one of the dose levels has a DLT rate at or below target. Hence, these average values are also of practical interest. Average accuracy rates for all designs at the upper δ value of 0.10 are around 50% or a bit higher, which underscores the limitations of small sample analysis when there is no clear discrimination between dose level DLT rates. Overall, these figures suggest that in many scenarios, the average accuracy across dose levels of BOLD exceeds BOIN and CRM designs. Specifically, at the dose level, this advantage becomes more pronounced when the MTD is at higher dose levels, and as the upper δ value increases. Moreover, the accuracy rates across dose levels seem to be more consistent for BOLD, in terms of maximizing the lowest of the dose-level accuracy rates (maximin property). Compared to BOIN, BOLD and CRM appear to leverage larger DLT rate differences more effectively as accuracy improvements are more pronounced. This is due to BOLD and CRM selecting dose across different levels at each stage of selection, as opposed to focusing on a specific dose until an escalation/de-escalation decision is made, as does BOIN. On the other hand, BOIN’s approach does lead to superior identification of the bottom state when it is true. These respective advantages hold at N max = 21 and 30. Note that for all these methods, accuracy improves as N max increases. Standard deviations of accuracy rates across scenarios are also tracked in Figure 2 . CRM has smaller standard deviations of accuracy when N max = 21, but the differences decrease at N max = 30. Compared to BOIN, BOLD generally has smaller standard deviations in accuracy rates across simulations. Figure 3A displays the plots of true DLT rates across dose levels from 50 random scenarios for each MTD level of a 5-dose model with upper δ = 0.15, MTD levels 1 to 5, φ = 0.25, and N max = 21. The plots also display accuracy comparisons between BOLD and BOIN for each scenario, demarcating > 10% differences in mean accuracy (computed from the larger value) across 100 simulations within each scenario. Similar plots when upper δ = 0.2 are shown in Figure 3B . These plots indicate that BOLD’s relative advantage in accurate MTD identification increases at higher dose levels. The cases when the bottom ( 5) are true are not shown. However, as mentioned, BOIN has clear advantages when all dose levels are overly toxic for the bottom state. BOIN also seems to have an advantage specifically at the middle level doses in the scenarios when the lower dose levels have DLT rates closer to zero. This leads the algorithm to quickly escalate out of the lower dose levels, which can be an issue more generally for BOIN when higher doses are the MTD. Download figure Open in new tab Figure 3: Plots of true DLT rates of a 5-dose model with 50 random scenarios for each MTD level, whereas upper δ = 0.15 and 0.2 at MTD levels 1 to 5, φ = 0.25, and N max = 21, alongside an accuracy comparison between BOLD and BOIN for each scenario, where the 10% difference of accuracy is defined as from the larger value. The plots for MTD = 5 are identical across both upper δ values. Although not displayed, there are zero blue and 47 reds for MTD 5. Efficiency is also considered. In Web Figure 2 , CRM requires a smaller number of patients for φ = 0.25. We do note that when N max is larger, differences between BOIN and CRM in average number of patients have not arisen in other simulations. 1 In other settings, there are no clear differences between BOLD and BOIN, and BOLD can have lower average patient counts when not including the bottom state. In terms of standard deviations in the number of patients across scenarios, as in Web Figure 3 , BOLD generally has similar or smaller values than CRM, and BOIN clearly has the highest values. This is an indication of the consistency and stability of the designs in terms of patient numbers. Web Figures 4 through 6 illustrate accuracy and Web Tables 1 and 2 present efficiency results at target φ = 0.15, 0.20 and 0.30 for N max = 21 and 30. Generally, patterns and trends in accuracy and efficiency appear similar as when φ = 0.25. In general, BOLD performs relatively poorly when the bottom state is true. For higher dose levels, BOIN performs relatively poorly for MTD levels 4 and 5, especially for the smaller study sample size when N max = 21. The selected CRM model can perform relatively poorly for the second highest dose level (dose J − 1). CRM also has a reduction in the average number of patients as φ increases, and BOLD often has smaller average standard deviations. Web Table 3 shows evaluations of overdosing rates (the proportion of patients treated with doses higher than the actual MTD), underestimation rates (the proportion of trials with identified MTD being lower than the actual MTD), and overestimation rates (the proportion of trials with identified MTD being higher than the actual MTD) with non-informative priors and 5-dose lattice. The assumed φ values are 0.25 and 0.3, for N max = 21 and 30 and upper δ = 0.15. Note that when dose level 5 is the true MTD, overdosing and overestimation do not occur. Similarly, there is no possibility of underestimating when the bottom state is true and every dose is toxic. The table indicates that the overdosing and overestimation rates for BOLD are clearly higher than those for BOIN, while the underestimation rates of BOLD are comparatively lower. However, note that accuracy rates for the higher dose levels are much higher for BOLD, and that the maximin rates for BOLD are clearly higher as well. To some degree, relatively higher overdose rates are a necessary cost to more quickly investigate higher dose levels and to achieve more balanced accuracy performance across doses. This is especially important in smaller sample settings. There is no clear advantage for BOLD versus CRM in lower overdose rates. To explore the impact of larger sample sizes, a sample of scenarios with design parameters as described in Figure 2 was simulated for N max = 36. Some adjustment in the -values was made to take advantage of the higher ceiling, so that = 18, = 15 , j > 1. Respective differences in accuracy, efficiency, and overdosing between the methods remain about the same as for the smaller sample cases. Relative advantage and disadvantages of BOLD versus BOIN should extend to larger sample sizes as well. Results are not presented here and we refer to the R app for the recreation of results. Simulations are also conducted for 2, 3, and 4-dose lattices. Web Tables 4 and 5 illustrate the accuracy, efficiency, and overdosing rates of BOLD, BOIN and CRM with non-informative priors by lattice size for N max = 21 and 30 respectively, utilizing random scenarios of true DLT rates with upper δ = 0.15 and 0.20 at φ = 0.25. The overall average performance across methods is similar for all of these lattice sizes, although by the 4-dose lattice, an advantage for BOLD emerges at N max = 21. The reduction in the relative advantage for BOLD in overall average accuracy for fewer dose levels is due in part to averaging over a smaller number of scenarios that still include the bottom state, where BOLD performs relatively poorly. When only non-bottom state scenarios are considered, BOLD continues on average to outperform BOIN and CRM in terms of accuracy. Note that for all designs, there is an increase in the accuracy rate as the number of dose levels decreases, accompanied by a lower mean number of patients administered treatment before stopping is invoked. Overdose control is explored in Web Table 6 . As indicated in the Methodology section, adjusting the PPAT threshold parameter ( τ ) may assist in reducing the overdosing rate. A summary is reported for the accuracy, efficiency, and overdosing rates of BOLD at a 5-dose model for φ = 0.25 and 0.3 and for N max = 21 and 30. The τ values considered are 0.5 (default), 0.48, and 0.45. Random scenarios of true DLT rates are generated with upper δ = 0.15. The table indicates that for φ = 0.3, the overdosing rates with τ = 0.48 are significantly reduced in comparison to τ = 0.5. Additionally, it demonstrates that both the accuracy rates and patient counts experience only a slight decline with lower τ values for φ = 0.3. For φ = 0.25, this approach has less dramatic impact (see Discussion). Comparative analysis with a titration design An important instance of Phase I clinical trial designs is the use of titration, which involves initially administering sub-therapeutic doses that are expected to be safe. This is prudent especially for first-in-human studies. If toxicity events arise with these first doses, the drug is perceived as potentially hazardous and it may be unwise to proceed further. Consequently, setting a lower toxicity threshold for more aggressive stopping and a lower prior toxicity rate to reflect the expected safety of a sub-therapeutic dose aligns with this principle. To compare the performance of BOLD under titration, we select iBOIN (app) as the benchmark. The iBOIN design maintains the same decision-making framework as BOIN; however, it distinguishes itself by incorporating historical data into the dose selection boundaries, which can be tailored to specific doses. The historical information used by iBOIN regarding DLT rates is termed the “skeleton”, 6 representing the anticipated DLT probability for each of the dose levels with a corresponding PESS value. Importantly, this step does not involve specifying associated prior distributions for dose-level DLT rate values and hence does not directly employ standard Bayesian updating. Table 2 illustrates the simulated accuracy, efficiency, overdosing, percentage of trials reaching toxicity threshold at dose 1, and average number of treated patients at dose 1 for BOLD and iBOIN under a titration model involving 3, 4, or 5 doses, including a single sub-therapeutic dose, along with select values of φ , N max , and upper δ . The PPAT threshold parameter ( τ ) is set at 0.45 for overdose control, and γ j = 0.95, j > 1. The toxicity threshold γ 1 for the sub-therapeutic dose is 0.6, reflecting a high sensitivity to safety concerns. Additionally, its prior mean toxicity rate for BOLD and the skeleton of iBOIN are both fixed at 0.05, while the true DLT rate in simulations for this lowest dose is restricted to be less than 0.05. Given that the sub-therapeutic dose is not considered as a possible MTD, results for the single sub-therapeutic dose are not included in the table. Instead, we use the notation “ < 2” to denote that the MTD is less than dose 2, where dose 2 is the lowest possible dose that can be identified as MTD for a next phase trial. The state “ < 2” is treated as the bottom state in this setting. This state can be classified to if: 1) dose 1 is found to be too toxic; 2) dose 2 is too toxic; 3) is reached and the next stage dose selection is dose 1, either through maintenance or by de-escalation from dose 2. Here, the value of for BOLD is established at 9, as we want to limit the number of patients administered this dose, and = 15, the value previously used for the first dose in non-titration cases. The iBOIN app does not allow for dose-level values of , so the value is 12 for all doses. View this table: View inline View popup Download powerpoint Table 2: Summary of performance of BOLD and iBOIN under the titration assumption with sub-therapeutic dose 1, including accuracy, efficiency, overdose, percentage of trials reaching toxicity threshold at dose 1, and average number of treated patients at dose 1. Models involve 3, 4, or 5 doses, along with a selection of φ , N max , and upper δ of random scenarios, where γ 1 = 0.6 and γ j = 0.95 for j > 1, prior mean toxicity rate for BOLD and the skeleton of iBOIN are both 0.05 for dose 1 and φ otherwise. The true DLT rate for dose 1 is below 0.05. PPAT threshold parameter τ is set to be 0.45 for BOLD. The state “ < 2” represents the bottom state and includes the cases in which the MTD level is “ < 1” or 1. “Avg.” denotes the average for all the MTD levels and “( ≥ 2)” denotes the average for the MTD levels that are ≥ 2. The table shows that for these design configurations, BOLD clearly surpasses iBOIN in terms of accuracy at the various dose levels (lattice sizes) in a titration design. This holds across a range of φ , N max and upper δ values, although for upper δ = 0.10 the advantage for BOLD is less. BOLD still generally does relatively worse in correctly identifying the bottom state. The average number of patients treated with the sub-therapeutic dose, and the percentage of times it is mistakenly classified as too toxic are also reported. For BOLD in particular, the impact of erroneous early stopping, as well as the number of patients receiving the sub-therapeutic dose, is small. Comparative analysis for informative priors We also conduct a comparative analysis with iBOIN for trials with informative priors favoring a specific dose level. Two types of informative prior specifications that we consider are: 1) A favored dose is associated with a greater PESS value, such as 6 versus 3, in contrast to the other doses, while the prior means (skeletons) are all equal to φ . Note that larger PESS values indicate relatively smaller posterior variance and hence is more informative. 2) Assume that the prior means (skeletons) are mildly distinct, with the favored dose having the prior mean (skeleton) equal to φ while other prior means (skeletons) are within a close range (e.g., ±0.05 from φ ), with values satisfying the linear order and PESS = 3 for all doses. Both approaches preserve equipoise and are only mildly informative. For each type of informative prior specifications, simulations for 5 dose levels are conducted. The settings for toxicity thresholds and random scenarios remain consistent with those employed for the trials with non-informative priors. First, the MTD is assumed to correspond to the favored dose level, to assess how “correctly” specified informative priors enhance performance. Second, we also perform sensitivity analyses related to prior specifications. In trials with informative priors, specifications may incorrectly favor the wrong dose in real-world applications. We thus evaluate the robustness of the proposed priors under misspecification. Two possible situations are examined: either the anti-cover dose or the cover dose of the favored dose is the true MTD. Table 3 summarizes the accuracy, efficiency, overdosing, underestimation, and overestimation rates of BOLD and iBOIN by averaging performance across randomly generated scenarios with informative priors for a 5-dose lattice, and with φ = 0.25, N max = 21 and 30, and the lower and higher range of upper δ values, 0.1 and 0.25. The table demonstrates that BOLD generally exhibits higher accuracy compared to iBOIN across scenarios, even when the MTD diverges from the favored dose. There is also greater gain in incorporating this information when the favored dose coincides as the MTD, as reflected by even greater advantages in accuracy favoring BOLD compared with non-informative settings. The mean patient counts for BOLD are often slightly lower than for iBOIN, and again the corresponding standard deviations of number of patients for BOLD are lower. Web Tables 7 and 8 present a summary of accuracy, efficiency, overdosing, underestimation, and overestimation rates of BOLD and iBOIN by MTD level using informative prior PESS specifications for N max = 21 and 30 respectively, with a 5-dose lattice, φ = 0.25, and upper δ = 0.1 and 0.25. Web Tables 9 and 10 present the summary of results from informative prior mean/skeletion specifications. For clarification, in these tables, if the true MTD is dose 1 while also being the anti-cover of the favored dose, then the favored dose is dose 2. Conversely, if the true MTD is dose 1 while also being the cover dose of the favored dose, then the favored dose is the bottom state. If the true MTD is dose 5 and it also is the anti-cover of the favored dose, then the favored dose is the top state; if it is the cover dose, then the favored dose is dose 4. Note that specification of informative prior means (skeletons) to favor a dose compared to specifying a higher PESS value (smaller prior variance) is more robust when the favored dose is not MTD, in that the cover and anti-cover dose accuracy rates when they are the MTD are higher. View this table: View inline View popup Download powerpoint Table 3: Summary of average accuracy, efficiency, overdose, underestimation, and overestimation rates of BOLD and iBOIN across MTD levels 1 to 5, utilizing informative priors with 5-dose lattice, φ = 0.25, N max = 21 and 30, and chosen upper δ values of 0.1 and 0.25 for random scenarios, with prior specifications (spec.) of PESS and prior mean. Either the favored dose is the MTD, the cover dose of the favored dose is MTD, or the anti-cover dose is MTD, whereas the cover dose for dose 5 corresponds to the top state in Web Figure 1 and the anti-cover dose for dose 1 to the bottom state. 4. Discussion BOLD incorporates Beta DLT rate posteriors into all decision-making processes, and employs order constraints throughout each stage. Wang and Ivanova (2014) employed constraints at each stage across sampled dose-level means as well in a Bayesian continuous response model, 24 and their approach involves simulating order-constrained posteriors using PAVA for each sampled set of values. We instead only apply PAVA to the dose-level posterior probability values for the event that the DLT rate exceeds threshold (CPATs), as in (1). Importantly, this provides a computational advantage. PAVA is only applied to a minimum set of values, and extensive simulation is averted. Thus, computations are quite fast. Note that CPAT values encompass information from the full posterior distribution in the sense that all of the posterior mass is reflected (the complement is known as well). This appears to be more informative than posterior probabilities of an interval around the target, which reflect only a portion of the posterior mass, such as in Keyboard-based methods. 11 , 12 Hence, our approach is novel. Delineating performance differences A series of simulations under different conditions have been studied to delineate scenarios where there are clear accuracy and efficiency differences between the proposed BOLD approach versus established methods, BOIN and CRM. There are many design parameters for Phase I trials, so we acknowledge that there are a number of parameter combinations that we have not incorporated into the scope of this work. Focus has been given on small to moderate sample size ceilings, N max ≤ 30, which we believe is a sweet spot for most investigator initiated trials. We also consider only the lowest dose as the first dose, which is the safest choice in terms of minimizing overdosing risk. There are certain key conditions that distinguish BOLD’s performance relative to BOIN: higher accuracy at higher dose levels, more balanced accuracy across dose levels from a maximin perspective, improved relative accuracy when discrimination of the MTD versus other doses is greater (as reflected by upper δ values), more consistent performance as reflected by the smaller standard deviation in accuracy rate estimation, the stability of the designs in terms of smaller standard deviation of the number of patients, and the more efficient leveraging of informative prior information to improve performance, including for a simple titration design. In informative contexts, BOLD involves specifying prior Beta probability distributions on specific DLT rate values, which is intuitive and in line with formal Bayesian analysis. In contrast, iBOIN uses skeletons with PESS values to reflect prior information, and prior probability distributions are not explicitly specified. Skeletons affect the decision boundaries for the empirical dose-level proportion. The benefits in accuracy for BOLD are apparent ( Tables 2 - 3 and Web Tables 7 - 10 ). A situation where BOIN clearly performs better is in identifying when all dose levels exceed MTD. By making escalation/de-escalation decisions based only on one dose level’s toxicity data, this can lead to slower dose switching, and helps in correctly identifying that dose 1 may be too toxic. When the lattice sizes are smaller, overall average accuracy is similar between BOLD and BOIN. For the select averages that exclude the bottom state, BOLD still has an advantage. In terms of the number of required patients, there is no clear advantage between BOLD and BOIN when considering all settings ( Web Figure 2 and Web Tables 1 - 2 ). Any differences in mean patient number are not large, and depend on which dose level is MTD, the toxicity target, and N max . The CRM comparison is not as extensive, and we note that a limitation of our analysis is that only the one-parameter CRM model was fit. For non-informative priors, it appears that BOLD consistently outperforms this CRM model in terms of overall average accuracy, and in a maximin sense across the dose-level accuracy rates. On the other hand, the average number of patients for CRM can be lower as the target φ value increases ( Web Tables 1 - 2 ). Overdose rates appear comparable. Given the consistent accuracy advantage, a critical criterion, we believe that the use of BOLD is appealing. Overdose control Overdosing, underestimation, and overestimation rates were monitored. In particular, as noted, the overdosing rate is higher for BOLD, especially for certain situations such as when φ = 0.30. Interestingly, when τ = 0.50 and φ = 0.3, if one patient in an initial cohort of 3 has a DLT at dose 1, CPAT 1 = 0.49. After order constraints are applied, PPAT 1 = PPAT 2 = 0.49 < 0.50, so escalation to dose 2 is selected at the next stage. Setting τ = 0.48, while the PPAT values don’t change, now by the selection criterion dose 1 is maintained. The PPAT threshold parameter value can thus be adjusted to alter key dose selection decisions that impact overdosing. Note that for φ = 0.25, this approach has less dramatic impact, as the first dose will already be maintained after 1 out of 3 patients has a DLT after stage 1, or 1 or 2 out of 6 patients after stage 2. In terms of overall accuracy after adjustment of τ , note that accuracy for the higher dose levels decreases. Still, overall accuracy is only slightly affected downwards. Recommendations In sum, a general set of recommendations that can be gleaned from the scenarios we considered are: 1) If there is an elevated concern that every one of the dose DLT rates could be above target (i.e. the bottom state is true), BOIN can detect this situation more accurately and quickly. If there is a belief that the MTD may reside among higher dose levels, or that at least one of the dose level DLT rates is below or equal to target rate, then BOLD is the preferable choice. 3) For smaller lattices, there is less difference in average accuracy, although this is due in part to the greater weight placed on the bottom state, when BOIN does better. 4) For smaller sample sizes, BOLD is generally more accurate, as it can switch dose levels faster. As the maximum sample size is set higher, the accuracy gap can decrease somewhat, but adjustments to values can help maintain BOLD’s accuracy advantage. 5) If the MTD is clearly distinguished in terms of the difference in DLT rate values from the dose above it, BOLD can leverage this more effectively to improve accuracy. If the MTD is not well distinguished, all the methods considered here do not perform well. 6) BOIN has less overdosing. However, this is in part due to a trade-off of BOLD having higher accuracy at higher dose levels. Overdose control through adjusting the PPAT threshold parameter τ can be helpful, depending on φ and the prior specifications for DLT rate parameters. 7) If it is of interest to incorporate prior information, BOLD can perform decisively better. We encourage the use of the respective R codes to help guide design adoption and inputs. In a Bayes risk sense, averages of dose-level accuracy and other performance statistics can be weighted by prior belief as to which dose level is the MTD. Note that our selective averaging that excludes the bottom state is an example of this approach. 6. Conclusion We propose a new and computationally fast approach that relies on prior distribution specifications at the DLT rate parameter level. The primary characteristic of the proposed Bayesian Ordered Lattice Design (BOLD) is its integration of posterior information through order constraints that are imposed at each decision stage. This enables “shared learning” about toxicity across dose levels. We delineate when advantages through improved accuracy in MTD identification can arise in Bayesian dose finding trials. The posterior probability of fundamental interest relates to whether a dose DLT rate exceeds the toxicity threshold. BOLD focuses on this posterior probability as a criterion for both dose selection and early stopping. Extensive simulation studies support that the accuracy performance of BOLD can often surpass BOIN and CRM. In an adaptation for titration and when Bayesian informative priors are utilized, BOLD demonstrates superior performance. Lastly, the BOLD framework is flexible, so the proposed methods can be extended to more general Bayesian phase I design settings that incorporate more complex information. Such designs will be elaborated upon in forthcoming work. Data Availability All data produced in the present work are contained in the manuscript Supplementary Material Download figure Open in new tab Web Figure 1: Hasse diagram of 5-dose lattice with linear order, where the bottom state represents that a MTD is below the minimum dose, and the top state signifies that the MTD exceeds the maximum dose. Classification is to one state, and classification to a specific dose level indicates that the corresponding dose is identified as the MTD. Download figure Open in new tab Web Figure 2: Efficiency of BOLD, BOIN and CRM design, represented by the average patient count, at 5-dose model with non-informative prior by MTD level and stratified by N max and upper δ , assuming random scenarios of true DLT rates, at target rate φ = 0.25. The legend denotes the average for all the 7 MTD levels (1st row) and for the MTD levels that are ≥ 1 (2nd row). Download figure Open in new tab Web Figure 3: Standard deviation (SD) of patient count of BOLD, BOIN and CRM design across scenarios at 5-dose model with non-informative prior by MTD level and stratified by N max and upper δ , assuming random scenarios of true DLT rates, at target rate φ = 0.25. The legend denotes the average for all the 7 MTD levels (1st row) and for the MTD levels that are ≥ 1 (2nd row). Download figure Open in new tab Web Figure 4: Accuracy of BOLD, BOIN, and CRM design, represented by the average of accurate MTD identifications (%) across scenarios, at 5-dose model with non-informative prior by MTD level and stratified by N max and upper δ , assuming random scenarios of true DLT rates, at target rate φ = 0.15. The legend denotes the average and SD (in parenthesis) for all the 7 MTD levels (1st row) and for the MTD levels that are ≥ 1 (2nd row). Download figure Open in new tab Web Figure 5: Accuracy of BOLD, BOIN, and CRM design, represented by the average of accurate MTD identifications (%) across scenarios, at 5-dose model with non-informative prior by MTD level and stratified by N max and upper δ , assuming random scenarios of true DLT rates, at target rate φ = 0.2. The legend denotes the average and SD (in parenthesis) for all the 7 MTD levels (1st row) and for the MTD levels that are ≥ 1 (2nd row). Download figure Open in new tab Web Figure 6: Accuracy of BOLD, BOIN, and CRM design, represented by the average of accurate MTD identifications (%) across scenarios, at 5-dose model with non-informative prior by MTD level and stratified by N max and upper δ , assuming random scenarios of true DLT rates, at target rate φ = 0.3. The legend denotes the average and SD (in parenthesis) for all the 7 MTD levels (1st row) and for the MTD levels that are ≥ 1 (2nd row). View this table: View inline View popup Download powerpoint Web Table 1: Summary of efficiency, represented by the average patient count and its standard deviation (in parenthesis), of BOLD, BOIN and CRM by upper δ value of random scenarios for φ = 0.15, 0.2 and 0.3 and N max = 21 at 5-dose model with non-informative prior, where “Avg.” denotes the average for all the 7 MTD levels and “( ≥ 1)” denotes the average for the MTD levels that are ≥ 1. View this table: View inline View popup Download powerpoint Web Table 2: Summary of efficiency, represented by the average patient count and its standard deviation (in parenthesis), of BOLD, BOIN and CRM by upper δ value of random scenarios for φ = 0.15, 0.2 and 0.3 and N max = 30 at 5-dose model with non-informative prior, where “Avg.” denotes the average for all the 7 MTD levels and “( ≥ 1)” denotes the average for the MTD levels that are ≥ 1. View this table: View inline View popup Download powerpoint Web Table 3: Summary of overdose, underestimation, and overestimation rates of BOLD, BOIN and CRM at 5-dose model with non-informative prior for φ = 0.25 and 0.3 with N max = 21 and 30, assuming random scenarios of true DLT rates with upper δ = 0.15, where “Avg.” denotes the average for all the MTD levels and “( ≥ 1)” denotes the average for the MTD levels that are ≥ 1. View this table: View inline View popup Download powerpoint Web Table 4: Summary of accuracy, efficiency, and overdosing rates of BOLD, BOIN and CRM by lattice (2, 3, 4-dose model) with non-informative prior for φ = 0.25 and N max = 21, utilizing random scenarios with upper δ = 0.15 and 0.2, where “Avg.” denotes the average for all the MTD levels and “(≥1)” denotes the average for the MTD levels that are ≥ 1. View this table: View inline View popup Download powerpoint Web Table 5: Summary of accuracy, efficiency, and overdosing rates of BOLD, BOIN and CRM by lattice (2, 3, 4-dose model) with non-informative prior for φ = 0.25 and N max = 30, utilizing random scenarios with upper δ = 0.15 and 0.2, where “Avg.” denotes the average for all the MTD levels and “(≥1)” denotes the average for the MTD levels that are ≥ 1. View this table: View inline View popup Download powerpoint Web Table 6: Summary of accuracy, efficiency and overdosing rates of BOLD, BOIN and CRM by PPAT threshold parameter τ at 5-dose model with non-informative prior for φ = 0.25 and 0.3 with N max = 21 and 30, assuming random scenarios of true DLT rates with upper δ = 0.15, where “Avg.” denotes the average for all the MTD levels and “( ≥ 1)” denotes the average for the MTD levels that are ≥ 1. View this table: View inline View popup Download powerpoint Web Table 7: Summary of accuracy, efficiency, overdose, underestimation, and overestimation rates of BOLD and iBOIN, utilizing prior PESS specification with 5-dose lattice, φ = 0.25, N max = 21, and chosen upper δ values of 0.1 and 0.25 with random scenarios. View this table: View inline View popup Download powerpoint Web Table 8: Summary of accuracy, efficiency, overdose, underestimation, and overestimation rates of BOLD and iBOIN, utilizing prior PESS specification with 5-dose lattice, φ = 0.25, N max = 30, and chosen upper δ values of 0.1 and 0.25 with random scenarios. View this table: View inline View popup Download powerpoint Web Table 9: Summary of accuracy, efficiency, overdose, underestimation, and overestimation rates of BOLD and iBOIN, utilizing prior mean/skeleton specification with 5-dose lattice, φ = 0.25, N max = 21, and chosen upper δ values of 0.1 and 0.25 with random scenarios. View this table: View inline View popup Download powerpoint Web Table 10: Summary of accuracy, efficiency, overdose, underestimation, and overestimation rates of BOLD and iBOIN, utilizing prior mean/skeleton specification with 5-dose lattice, φ = 0.25, N max = 30, and chosen upper δ values of 0.1 and 0.25 with random scenarios. Acknowledgements This work was supported in part by grants NSF 2333326, P30 CA134274-10 (University of Maryland, Baltimore), and P30 CA043703-34 (Case Comprehensive Cancer Center). Footnotes Postal address: 2103 Cornell Rd, Cleveland, OH 44106; Email: gxw174{at}case.edu We expanded the simulations and provided a much clearer delineation of when our proposed approach has relative advantages. We also have expanded the discussion and the methodology to include a simple titration design and greater attention to overdose rates, including an intuitive overdose control approach. The revised manuscript has also been prepared in LaTeX. References 1. ↵ Liu S , Yuan Y . Bayesian optimal interval designs for Phase I clinical trials . Journal of the Royal Statistical Society , Series C , 2015 . 64 : p. 507 – 523 . OpenUrl 2. ↵ Braun TM The current design of oncology Phase I clinical trials: progressing from algorithms to statistical models . Chin Clin Oncol. 2014 Mar ; 3 ( 1 ): 2 . OpenUrl PubMed 3. ↵ Ji Y , Jin JY , Hyman DM , et al. Challenges and Opportunities in Dose Finding in Oncology and Immuno-oncology . Clin Transl Sci , 2018 . 11 ( 4 ): p. 345 – 351 . OpenUrl CrossRef PubMed 4. ↵ Xu J , Zhang D , Mu R . A dose-finding design for Phase I clinical trials based on Bayesian stochastic approximation . BMC Med Res Methodol , 2022 . 22 ( 1 ): p. 258 . OpenUrl PubMed 5. ↵ Zhou H , Murray TA , Pan H , et al. Comparative review of novel model-assisted designs for Phase I clinical trials . Stat Med , 2018 . 37 ( 14 ): p. 2208 – 2222 . OpenUrl PubMed 6. ↵ Zhou Y , Lee JJ , Wang S , et al. Incorporating historical information to improve Phase I clinical trials . Pharm Stat . 2021 Nov; 20 ( 6 ): 1017 – 1034 . OpenUrl PubMed 7. ↵ Yuan Y , Hess KR , Hilsenbeck SG , et al. Bayesian Optimal Interval Design: A Simple and Well-Performing Design for Phase I Oncology Trials . Clin Cancer Res , 2016 . 22 ( 17 ): p. 4291 – 301 . OpenUrl Abstract / FREE Full Text 8. ↵ O’Quigley J , Pepe M , Fisher L . Continual reassessment method: a practical design for phase 1 clinical trials in cancer . Biometrics , 1990 . 46 ( 1 ): p. 33 – 48 . OpenUrl CrossRef PubMed Web of Science 9. ↵ Yuan Y , Wu J , Gilbert MR . BOIN: a novel Bayesian design platform to accelerate early phase brain tumor clinical trials . Neurooncol Pract , 2021 . 8 ( 6 ): p. 627 – 638 . OpenUrl PubMed 10. ↵ Iasonos A , Wilton AS , Riedel ER , et al. A comprehensive comparison of the continual reassessment method to the standard 3 + 3 dose escalation scheme in Phase I dose-finding studies . Clin Trials , 2008 . 5 ( 5 ): p. 465 – 77 . OpenUrl CrossRef PubMed Web of Science 11. ↵ Ji Y , Liu P , Li Y , et al. A modified toxicity probability interval method for dose-finding trials . Clin Trials , 2010 . 7 ( 6 ): p. 653 – 63 . OpenUrl CrossRef PubMed Web of Science 12. ↵ Yan F , Mandrekar SJ , Yuan Y . Keyboard: A Novel Bayesian Toxicity Probability Interval Design for Phase I Clinical Trials . Clin Cancer Res , 2017 . 23 ( 15 ): p. 3994 – 4003 . OpenUrl Abstract / FREE Full Text 13. ↵ Gupta SK . Use of Bayesian statistics in drug development: Advantages and challenges . Int J Appl Basic Med Res . 2012 Jan ; 2 ( 1 ): 3 – 6 . OpenUrl PubMed 14. ↵ Conaway MR , Dunbar S , Peddada SD . Designs for single- or multiple-agent Phase I trials . Biometrics , 2004 . 60 ( 3 ): p. 661 – 9 . OpenUrl CrossRef PubMed Web of Science 15. ↵ Yin G , Yuan Y . Bayesian dose finding in oncology for drug combinations by copula regression . Royal Statistical Society Series C: Applied Statistics , 2009 . 58 : p. 211 – 224 . OpenUrl 16. ↵ Tatsuoka C , Ferguson T . Sequential classification on partially ordered sets . Journal of the Royal Statistical Society Series B . 2003 . 65 . p. 143 – 157 . OpenUrl CrossRef Web of Science 17. ↵ Tatsuoka C . Sequential Classification on Lattices with Experiment-Specific Response Distributions , Sequential Analysis . 2014 . 33 ( 3 ), p. 400 – 420 . OpenUrl 18. ↵ Tatsuoka C , Chen W , Lu X . Bayesian Group Testing with Dilution Effects . Biostatistics . 2022 . doi: 10.1093/biostatistics/kxac004 . OpenUrl CrossRef 19. ↵ Barlow RE , Bartholomew DJ , Bremner JM , et al. Statistical Inference under Order Restrictions . 1972 : London, New York : John Wiley and Sons . 20. ↵ Leung DH , Wang Y . Isotonic designs for Phase I trials . Control Clin Trials . 2001 Apr ; 22 ( 2 ): 126 – 38 . OpenUrl CrossRef PubMed Web of Science 21. ↵ Ivanova A , Flournoy N . Comparison of Isotonic Designs for Dose-Finding . Stat Biopharm , Res . 2009 Feb 1; 1 ( 1 ): 101 . OpenUrl 22. ↵ Sweeting M , Mander A , Sabin T. bcrm: Bayesian Continual Reassessment Method Designs for Phase I Dose-Finding Trials . Journal of Statistical Software , 2013 . 54 ( 13 ), 1 – 26 . OpenUrl 23. ↵ Lee SM , Cheung YK . Model calibration in the continual reassessment method . Clin Trials . 2009 Jun ; 6 ( 3 ): 227 – 38 . OpenUrl CrossRef PubMed Web of Science 24. ↵ Wang Y , Ivanova A . Dose finding with continuous outcome in Phase I oncology trials . Pharm Stat . 2014 Mar-Apr ; 14 ( 2 ): 102 – 7 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted January 03, 2026. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Bayesian Ordered Lattice Design For Phase I Clinical Trials Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Bayesian Ordered Lattice Design For Phase I Clinical Trials Gi-Ming Wang , Curtis Tatsuoka medRxiv 2025.03.23.25324471; doi: https://doi.org/10.1101/2025.03.23.25324471 Share This Article: Copy Citation Tools Bayesian Ordered Lattice Design For Phase I Clinical Trials Gi-Ming Wang , Curtis Tatsuoka medRxiv 2025.03.23.25324471; doi: https://doi.org/10.1101/2025.03.23.25324471 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Oncology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15227) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6597) Geriatric Medicine (668) Health Economics (997) Health Informatics (4534) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9230) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00256ef1a18e2c5',t:'MTc3OTUyMTI2MQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00