Optimizing Contingency Management with Reinforcement Learning

doi:10.1101/2024.03.28.24305031

Optimizing Contingency Management with Reinforcement Learning

2024 · doi:10.1101/2024.03.28.24305031

preprint OA: closed CC-BY-NC-ND-4.0

📄 Open PDF Full text JSON View at publisher

Full text 47,551 characters · extracted from preprint-html · click to expand

Optimizing Contingency Management with Reinforcement Learning | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Optimizing Contingency Management with Reinforcement Learning Young-geun Kim , Laura Brandt , Ken Cheung , Edward V. Nunes , John Roll , Sean X. Luo , Ying Liu doi: https://doi.org/10.1101/2024.03.28.24305031 Young-geun Kim 1 Mailman School of Public Health, Department of Biostatistics , New York, NY, USA 2 Columbia University Irving Medical Center, Department of Psychiatry , New York, NY, USA Ph.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site Laura Brandt 3 City College of New York, Department of Psychology , New York, NY, USA Ph.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ken Cheung 1 Mailman School of Public Health, Department of Biostatistics , New York, NY, USA Ph.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site Edward V. Nunes 2 Columbia University Irving Medical Center, Department of Psychiatry , New York, NY, USA 4 New York State Psychiatric Institute, Division on Substance Use Disorders , New York, NY, USA M.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site John Roll 5 Washington State University, Department of Community and Behavioral Health , Spokane, WA, USA 6 Washington State University, Program of Excellence in Addictions Research , Spokane, WA, USA Ph.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sean X. Luo 2 Columbia University Irving Medical Center, Department of Psychiatry , New York, NY, USA 4 New York State Psychiatric Institute, Division on Substance Use Disorders , New York, NY, USA M.D., Ph.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: ying.liu{at}nyspi.columbia.edu Ying Liu 2 Columbia University Irving Medical Center, Department of Psychiatry , New York, NY, USA Ph.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: ying.liu{at}nyspi.columbia.edu Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF ABSTRACT Contingency Management (CM) is a psychological treatment that aims to change behavior with financial incentives. In substance use disorders (SUDs), deployment of CM has been enriched by longstanding discussions around the cost-effectiveness of prized-based and voucher-based approaches. In prize-based CM, participants earn draws to win prizes, including small incentives to reduce costs, and the number of draws escalates depending on the duration of maintenance of abstinence. In voucher-based CM, participants receive a predetermined voucher amount based on specific substance test results. While both types have enhanced treatment outcomes, there is room for improvement in their cost-effectiveness: the voucher-based system requires enduring financial investment; the prize-based system might sacrifice efficacy. Previous work in computational psychiatry of SUDs typically employs frameworks wherein participants make decisions to maximize their expected compensation. In contrast, we developed new frameworks that clinical decision-makers choose actions, CM structures, to reinforce the substance abstinence behavior of participants. We consider the choice of the voucher or prize to be a sequential decision, where there are two pivotal parameters: the prize probability for each draw and the escalation rule determining the number of draws. Recent advancements in Reinforcement Learning, more specifically, in off-policy evaluation, afforded techniques to estimate outcomes for different CM decision scenarios from observed clinical trial data. We searched CM schemas that maximized treatment outcomes with budget constraints. Using this framework, we analyzed data from the Clinical Trials Network to construct unbiased estimators on the effects of new CM schemas. Our results indicated that the optimal CM schema would be to strengthen reinforcement rapidly in the middle of the treatment course. Our estimated optimal CM policy improved treatment outcomes by 32% while maintaining costs. Our methods and results have broad applications in future clinical trial planning and translational investigations on the neurobiological basis of SUDs. INTRODUCTION Substance use disorders (SUDs) are behavioral pathologies that involve common underlying neurobiological substrates involving the dopaminergic circuitry in the midbrain ( 1 , 2 ). This circuit has been hypothesized to allow the brain to learn from and process rewarding experiences ( 3 – 5 ). A substantial body of literature has accumulated around interrogating animal models to elucidate the role of dopamine and prediction error in SUDs ( 6 ). However, translating these insights to clinical settings represents a substantial challenge, particularly because the meaning of these neural signatures has limited known behavioral correlates in the real world. With an increasing number of lives affected by SUD-related complications and overdoses ( 7 ), there is an urgency in developing novel SUD treatments ( 8 ). Contingency Management (CM), which uses financial incentives to promote substance abstinence, stands out as a promising approach ( 9 – 15 ) with the potential to enhance public health through broad dissemination. Furthermore, CM represents a natural translational foundation to connect preclinical neuroscience to clinical applications. There is ample evidence supporting CM’s effectiveness in treating a variety of SUDs, including alcohol ( 16 ), benzodiazepines ( 17 ), stimulants ( 18 , 19 ), cannabis ( 20 , 21 ), and opioid ( 22 , 23 ) use disorders. Its efficacy extends to diverse populations, including pregnant women ( 24 , 25 ) and adolescents ( 26 ). Common CM schemas can be classified into voucher-based ( 27 – 29 ) and prize-based ( 16 , 30 , 31 ) approaches. Voucher-based methods reward participants with vouchers of escalating value to redeem goods or services for submitting substance-free biological samples ( 32 ). Prize-based approaches, designed to mitigate the high incremental costs associated with vouchers, let participants draw for prizes using a lottery based on their urine test results ( 33 ). Meta-analyses demonstrated that both approaches are effective in encouraging SUD remission ( 34 – 39 ). Broad dissemination of CM treatment requires a balance between cost and efficacy. The high cost of voucher-based CM is a barrier to its widespread clinical adoption ( 40 ) and generates organizational ( 41 ) and legal challenges ( 42 ). A lower cost prized-based schema, however, might sacrifice efficacy ( 43 ): in general, higher financial reinforcement correlates to better outcomes ( 44 – 46 ). Clinical trials examine one or a few alternative treatment schemas at a time, with the schemas themselves determined by the clinicians and researchers ad hoc based on clinical feasibility and historical designs. Computationally, voucher-based and prize-based systems represent two small regions in a high dimensional space that captures all possible CM schemas in a treatment process. No existing study has compared a treatment-as-usual schema with the theoretically optimal schema. This is due to the lack of a mathematically precise method to capture the schemas and make computational inferences, as well as practical limitations in conducting clinical studies to evaluate all possible CM schemas. In response to this knowledge gap, we developed a new computational psychiatry framework using Reinforcement Learning to assess different classes of CM schemas using a CM clinical trial dataset. Reinforcement Learning is a powerful data science framework to analyze such dynamic treatment regimens in precision medicine ( 47 ), which typically focuses on identifying optimal rules to maximize treatment outcomes ( 48 ). While existing frameworks in computational psychiatry usually model participants’ decision-making to understand the mechanism of receiving compensation ( 5 ), our approach models the clinical treatment process to directly target cost-effectiveness and imposes budget limitations on CM schemas to identify optimal ones within these. We applied the off-policy evaluation techniques ( 49 ) to predict the outcomes of new CM schemas. We used a pragmatic clinical trial from the National Drug Treatment Clinical Trials Network (CTN-0007) trial ( 50 ) data, which contained detailed, week-by-week urine toxicology and prize-based rewards. We hypothesized that this method would allow us to identify the most cost-effective strategies that meet specified budget constraints. METHODS Participants and Study Design The CTN-0007 trial employed a prize-based CM schema targeting stimulant abstinence in individuals undergoing methadone maintenance treatment. Table 1 summarized the trial structure and cohort characteristics. The CTN-0007 trial is a community-based methadone maintenance treatment with 388 participants from six clinics, enhancing the main treatment outcomes. Participants visited local clinics or treatment programs twice weekly, yielding 24 visits over 12 weeks. At each visit, they provided biological samples such as breathalyzer tests for alcohol and urine tests for opioids. Participants who achieved substance abstinence since their last visit earned draws and had the opportunity to receive motivational incentives based on a prize structure. The prizes included: (i) 50% of tickets with a “good job” message without tangible incentive ($0 value), (ii) 41.8% of tickets with small incentives ($1 value), (iii) 8% of tickets with large incentives ($20 value), and (iv) 0.2% of tickets with jumbo incentives ($80 value). The primary substance use test was considered negative if all primary substances (alcohol, amphetamine, cocaine, and methamphetamine) yielded negative results. The escalation of prizes was linear: the number of draws increased by one as the abstinence period increased by one week, and it was reset to zero if the primary substance test result was positive. Two bonus draws were provided if test results for all substances were negative. Fully anonymized CTN-0007 data were obtained through the publicly accessible CTN data repository: http://datashare.nida.nih.gov/study/nida-ctn-0007 . Reinforcement Learning Framework for CM Schemas The CTN-0007 CM schema, which was applied to several other studies ( 21 , 33 ), sequentially offered incentives to reinforce substance abstinence based on prior substance testing records. This CM schema was, therefore, a dynamic treatment program ( 51 – 53 ), a sequence of decision rules tailored to individuals’ time-varying substance use history at each visit time. We formulated elements in CM from a Reinforcement Learning perspective: The CM schema was converted into a set of decision rules (corresponding to “policy” in Reinforcement Learning terminology ( 54 )) that took the individuals’ substance use histories (“state”) at each visit and mapped them to the incentives (“actions”). CM schema varied based on the probabilities of winning each prize and the escalation rule for the number of draws according to substance use histories. Our framework modeled voucher-based approaches by setting the probability of getting a specific prize to 1. Substance use test results at the next visit were either positive or negative (corresponding to “reward” in the Reinforcement Learning framework, not to be confused with reward in financial incentives). The efficacy of CM during the whole treatment program was evaluated by summing the total number of negative substance test samples (“discounted cumulative reward” ( 54 )). We summarized our framework in Figure 1. We provided basic notations with assumptions. Our framework denoted the incentive and substance use history at the t -th visit by A t asnd S t , respectively. We assumed that A t and S t had finite supports and was independent and identically distributed, where i denoted the patient index. The CM schema applied at the t -th visit was denoted by where represented the time-invariant CM parameter and was the vector of prize probabilities. Additionally, was the vector of the number of draws given the longest duration of substance abstinence. We described details on the CM parameterization in the next subsection. The reward was denoted by R t . We provided further details on our Reinforcement Learning framework in Appendix 1. Parameterization of CM Schemas We parameterized a range of CM schemas and analyzed the performance of these hypothetical CM schemas. CM schema parameters included (i) prize probabilities and (ii) the number of draws given the longest duration of substance abstinence . Reflecting the CTN-0007 study that had four prize types and was a twelve-week program, the was a four-dimensional vector whose elements were non-negative and their sum was 1 and was a twelve-dimensional vector whose k -th element was the ticket number when the longest duration of substance abstinence was k weeks. With these parameters, the was expressed as follows: where L ( S t )denoted the longest duration of primary substance abstinence and denoted the four-dimensional vector consisting of the number of drawn prizes. Additionally, the PTR t and STR t indicated the primary and secondary substance test results at the t -th visit, respectively, determined by S t . In this parameterization, the affected on the probability mass for given lottery events and the affected on the set of lottery events whose probability masses were summed up. For example, the set was when both primary and secondary substance test results were negative, reflecting the and two draws for primary and secondary substance abstinences, respectively. Evaluating New CM Schemas using Off-policy Evaluation We used Off-Policy Evaluation, a set of techniques in Reinforcement Learning that estimates the effectiveness of new policies with data collected from existing fixed policies ( 49 ), to evaluate parameterized hypothetical CM schemas. The CM schema described incentive structures given substance use records and its performance was evaluated with the value function ( 54 ), the expected total discounted reward, defined as Here, T was the number of total visit, was the total discounted reward, and γ was the discount factor to balance rewards from early and late stages. Given that all the negative test results equally contributed to the number of substance-free samples, which is the same as the proportion of substance-free samples up to constant multiplication, we set the value of γ to 1. We used the importance sampling estimator ( 55 ), an off-policy evaluation method, to evaluate hypothetical CM schemas with the CTN-0007 data. For a given target CM schema with parameter θ Tar (e.g., previously unstudied prize probabilities), the importance sampling estimator for the value of the target schema was unbiased and was expressed as follows: where was collected under the existing schema , the CM schema used in CTN-0007, θ CTN -0007 was the parameter for the existing schema, n was the sample size, and was the importance weight, representing the relative likelihood of data under the target CM schema compared to the existing schema. Note that both target and existing schemas were tractable, so no further model approximation was required to compute the importance weights. Optimizing CM Schema Under Cost Constraint We imposed a specified budget and the set of CM parameters to optimize. We defined the estimated optimal CM parameter under the budget level B with data as follows: where Θ,(B), indicated the set of feasible parameters under the budget B . We first specified a grid for parameters, described in the next subsection, and then filtered the grid using bootstrap confidence intervals (CIs) of treatment costs. With 1,000 bootstrapped datasets from CTN-0007, we calculated 95% bootstrap CIs of cost for a particular CM schema and removed the schema if the upper bound was larger than the budget. Among CM parameters in Θ( B ), we searched for the one that maximized value functions, the total number of primary substance-free samples (up to constant multiplication), estimated with the bootstrap mean for importance sampling estimators using incentive group in CTN-0007. Search Grid We considered a sixteen-dimensional search grid for parameters θ ; the first four were for prize probabilities and the last twelve were for the number of draws given the longest duration of substance abstinence . The search grid was illustrated in Figure 2(a). For the , we considered 1,000 points at an intuitive curve connecting prize probabilities for usual care (UC) and for CM schema used in CTN-0007. For the , we considered three options for escalation of prizes: (i) linear escalation rule in CTN-0007, (ii) logarithmic escalation, and (iii) logistic escalation. The rules were parameterized to have total ticket numbers comparable to the linear rule. The linear escalation rule increased the number of draws at a constant speed. The logarithmic escalation rule rapidly increased the number of draws early in treatment and kept decreasing the speed of increment. The logistic escalation rule gradually increased prize draws early in treatment, rapidly increased them in the middle of treatment, and then gradually increased again to converge to the number of draws by the linear rule in the late treatment. We provided details on the search grid in Appendix 2. RESULTS Lower Prize Expectation Does Not Sacrifice Treatment Effect We examined the marginal effect of prize probabilities. The comparison between CM schemas with diverse while fixing as the existing linear rule was shown in Figure 2(b). Hypothetical CM schemas with prize probabilities closer to the value for usual care (UC), had lower prize expectations per each draw. The CM schema with the had significantly lower treatment costs (p<0.001 1 ; mean difference=-$117.8; 95% CI=[-$149.13, -$91.80]) without notable changes on levels of treatment outcomes (p=0.119; mean difference=-1.61; 95% CI=[-4.08, 1.22]). Logistic Escalation Rule Is More Cost Effective Than Existing Linear Rule We examined the marginal effect of escalation rules for the number of draws given the longest duration of substance abstinence. The comparison between CM schemas with diverse while fixing as the probability of the CM schema in CTN-0007 was shown in Figure 2(c). The hypothetical CM schema using the logistic escalation rule achieved superior treatment results (p<0.001; mean difference=3.72; 95% CI=[1.34, 6.15]) without a significant cost increase relative to the existing linear escalation rule (p=0.075; mean difference=$30.92; 95% CI=[-$12.15, $73.66]): the average number of primary substance-free samples under the logistic escalation rule increased by 43% compared to the existing linear escalation rule. However, the budget, the upper bound of 95% CIs of costs, was larger than that of the existing schema. Conversely, the logarithmic escalation rule exhibited similar treatment outcomes but incurred higher costs. Optimal Policies under Different Budget Constraints We identified the optimal CM schema under the budget used in CTN-0007. The comparison between existing and optimal CM schemas was shown in Figure 2(d). The optimal schema achieved superior treatment results with comparable costs relative to the existing schema while satisfying the budget constraint. Detailed numerics were provided in Table 2. Table 2 described the comparative analysis results among three distinct CM schemas: (i) existing CM schema implemented in CTN-0007, (ii) the optimal CM constrained by the CTN-0007 budget ($149), and (iii) the optimal CM bounded by the annual cap established by Washington State ($100) ( 42 ). Our results showed that schema (ii) had a significantly larger number of primary substance-free samples (p=0.007; mean difference=2.77; 95% CI=[0.53, 5.06]), increasing by 32%, without a significant increase in costs (p=0.419; mean difference=$3.74; 95% CI=[-$36.02, $42.12]) compared to (i). Notably, schema (iii), despite its tighter budget, outperformed schema (i) in terms of treatment costs (p=0.013; mean difference=-$37.38; 95% CI=[-$73.33, -$4.31]) without significant sacrifice in treatment outcomes (p=0.099; mean difference=1.38; 95% CI=[-0.70, 3.51]). With the same budget used in schema (i), schema (ii) and (iii) modified the incentive allocation by lowering the prize expectation and by shifting more lottery tickets from early on in treatment to later in treatment. DISCUSSION In this study, we leveraged Reinforcement Learning techniques to assess CM schemas in treating SUDs and identified optimal CM schemas under budget constraints. We found that the CM schema using the logistic escalation rule was estimated to have a 43% improvement in negative urine drug screens and negative breathalyzer samples over a three-month study period compared to the existing CM schema in CTN-0007 using a linear program. Imposing a cost constraint, we showed that an optimal CM schema with logistic rule and the probability of getting $0, $1, $20, and $80 prizes of 55.40%, 37.28%, 7.14%, and 0.18%, respectively, was estimated to have a 32% improvement in total number of negative urine drug screens and negative breathalyzer over a three-month study period. Our findings on the marginal effect of prize probabilities align with established literature, illustrating that potent reinforcers boost treatment outcomes and raise costs. Moreover, our analysis highlighted the superior cost-effectiveness of the logistic escalation rule compared to its linear and logarithmic counterparts. The CM schemas we identified optimize cost-effectiveness, maximizing outcomes within budgetary constraints. Considering more CM parameters may further enhance the efficacy of our approach. For example, we can add parameters for the maximum prize amount and the choice of primary and secondary substances for future research. We assumed that the patients were not sensitive to the subtle difference between the informed and actually applied CM schemas. We can deliver the optimal CM schema identified by our approach in clinical settings. To validate our findings, conducting a Randomized Controlled Trial (RCT) is an essential future direction. Implementing previously unstudied CM schema, such as the logistic escalation rule, potentially enhances treatment efficacy. Additionally, our approach applies to broader clinical settings, allocating resources depending on the patient’s status. For example, the CTN-0013 study ( 56 ) provided Motivational Enhancement Therapy for pregnant substance users (MET-PS). We can extend our Reinforcement Learning framework and analyze the effectiveness of MET-PS by viewing the schema of providing MET-PS as a policy using participants’ substance use histories. Our Reinforcement Learning framework also allows time-varying personalized CM schemas, promising to provide tailored treatment interventions. For example, participants who abstain from opioid use in the first three weeks were less likely to return to opioid use ( 57 ). We can apply two CM parameters: one is for these early stages, and the other is for the remaining stages to implement time-varying CM schemas. Another example is the CTN-0013 study, where the efficacy of MET-PS varied between sites and between majority and minority groups. We can cluster participants according to their demographic or economic characteristics and apply cluster-specific CM parameters to implement personalized CM schemas. Data Availability All data produced are available online at http://datashare.nida.nih.gov/study/nida-ctn-0007 . CONFLICT OF INTEREST/DISCLOSURE STATEMENT Authors have no conflict to disclose. ACKNOWLEDGEMENT Young-geun Kim and Ying Liu were supported by NIH R01MH124106. APPENDIX Appendix 1. Details on Reinforcement Framework for CM Schemas Clinical trial data were encoded using our framework, illustrated in Table 3. An example of a patient’s trajectory under a CM schema was shown: on their 4 th visit, A 4 = $21 was provided based on the lottery results whose number of draw was determined by S 4 . The “reward” R 4 was 1 since the primary substance test result at the 5th visit was negative. This example suggested that the action A 4 of $21 strengthened the primary substance abstinence, resulting in R 4 of 1. In contrast, in the 2nd and 3rd visits, incentives of $2 and $1, respectively, were enough to maintain primary substance abstinence, and considering this, incentives provided at the 1st and 4th visits were excessive. Further technical details, including statistical assumptions required for the encoding framework and further examples were included in the Appendix 3. A Python implementation including example code to calculate off-policy evaluation estimators for any user-specified CM parameter was provided at our GitHub repository: https://github.com/kyg0910/Optimizing-Contingency-Management-with-Reinforcement-Learning . Appendix 2. Details on the Search Grid For the prize probabilities, we considered points on convex combinations between prize probabilities for usual care (UC) and for CM schema used in CTN-0007, where was the vector for UC, i.e., probability of 1 for no prize, and was for the CM schema used in CTN-0007. The larger m indicates that allocating higher probabilities on the no prize. Figure 2(a) visualized example values of prize probabilities. The choice of values in logarithmic and logistic rules, and , was to approximate 3.2ln L + 1 and 13/(1 + exp,(_0.69( L _6.5))), respectively, where L denoted the longest duration of substance abstinence in the week. The logarithmic curve passed the initial point of the linear escalation, ( 1 , 1 ), and the logistic curve was rotationally symmetric with respect to (6.5, 6.5), the midpoint of the linear escalation curve, to have ticket numbers comparable to the linear rule in the middle stage and to move tickets in the early stage to the late stage. Figure 3(b) visualized escalation rules. Appendix 3. Data Preprocessing Data Acquisition : The dataset for this study was obtained from CTN data repository. Three primary data files were used: Intake assessment questionnaire (corresponding to “qs.csv” data file), inclusion/exclusion criteria (“sc.csv”), and study termination (“suppds.csv”) forms. Inclusion and Exclusion Criteria : Our study adhered to the inclusion and exclusion criteria outlined in the CTN-0007 study protocol. We considered patients who had completed a minimum of four weeks of methadone treatment. Specifically, we included patients who met one of two conditions: (i) Those who had tested positive for stimulants in their urine within two weeks of the study entroy date and (ii) those who directly cam to the program from a controlled environment and had a stimulant positive urine in the two weeks before being enrolled in that environment. Participants who failed to provide informed consent during the simple consent quiz or had gambling problems were excluded. Additionally, we excluded participants with other issues, including those who ran in a methadone clinic before the research assistant’s arrival and cases where there was a lack of documentation to ascertain whether participants had completed the program. The final sample size of control and incentive groups were 188 and 195, respectively, and we used the incentive group in the analysis. Data Encoding : Data encoding was performed as follows. For substance use histories, we used urine drug screens for amphetamine, cocaine, methamphetamine, and opioids. Alcohol status was determined based on breathalyzer readings, with readings smaller than or equal to 0.0009 considered negative, in accordance with the CTN-0007 study protocol. We encoded the primary test result as negative when all primary substances (alcohol, amphetamine, cocaine, and methamphetimne) tested negative. The secondary test result was considered negative when the opioid result was negative. All the other cases, including missing data, were encoded as positive. For the lottery results, we recorded the number of draws for each of the four prize categories and the number of incentives provided. Footnotes ↵ 1 For each pair of estimators from two groups, say A j from the first group and B k from the second, we computed the difference A j − B k . Since there were 1,000 bootstrap importance sampling estimators in each group, this resulted in 1,000,000 computed differences. The p-value was the proportion of positive differences. REFERENCES 1. ↵ Volkow ND , Wang GJ , Fowler JS , Tomasi D , Telang F. Addiction: beyond dopamine reward circuitry . Proc Natl Acad Sci . 2011 ; 108 ( 37 ): 15037 – 42 . OpenUrl Abstract / FREE Full Text 2. ↵ Koob GF , Volkow ND . Neurobiology of addiction: a neurocircuitry analysis . Lancet Psychiatry . 2016 ; 3 ( 8 ): 760 – 73 . OpenUrl 3. ↵ Glimcher PW . Understanding dopamine and reinforcement learning: the dopamine reward prediction error hypothesis . Proc Natl Acad Sci . 2011 ; 108 ( Supplement_3 ): 15647 – 54 . OpenUrl Abstract / FREE Full Text 4. Redish AD . Addiction as a computational process gone awry . Science . 2004 ; 306 ( 5703 ): 1944 – 7 . OpenUrl Abstract / FREE Full Text 5. ↵ Huys QJ , Maia TV , Frank MJ . Computational psychiatry as a bridge from neuroscience to clinical applications . Nat Neurosci . 2016 ; 19 ( 3 ): 404 – 13 . OpenUrl CrossRef PubMed 6. ↵ Maia TV , Frank MJ . From reinforcement learning models to psychiatric and neurological disorders . Nat Neurosci . 2011 ; 14 ( 2 ): 154 – 62 . OpenUrl CrossRef PubMed Web of Science 7. ↵ Gupta S. Contingency management: Why it pays to quit . Nature . 2015 ; 522 ( 7557 ): S57 – 9 . OpenUrl 8. ↵ Hedegaard H , Miniño AM , Spencer MR , Warner M. Drug overdose deaths in the United States , 1999 – 2020 . 2021 ; 9. ↵ Stitzer ML , Iguchi MY , Felch LJ . Contingent take-home incentive: effects on drug use of methadone maintenance patients . J Consult Clin Psychol . 1992 ; 60 ( 6 ): 927 . OpenUrl CrossRef PubMed Web of Science 10. Carroll KM , Rounsaville BJ . A perfect platform: combining contingency management with medications for drug abuse . Am J Drug Alcohol Abuse . 2007 ; 33 ( 3 ): 343 – 65 . OpenUrl CrossRef PubMed Web of Science 11. Carroll KM , Ball SA , Jackson R , Martino S , Petry NM , Stitzer ML , et al. Ten take home lessons from the first 10 years of the CTN and 10 recommendations for the future . Am J Drug Alcohol Abuse . 2011 ; 37 ( 5 ): 275 – 82 . OpenUrl PubMed 12. Higgins ST , Silverman K , Sigmon SC , Naito NA . Incentives and health: an introduction . Prev Med . 2012 ; 55 : S2 – 6 . OpenUrl CrossRef PubMed 13. Petry NM , Alessi SM , Marx J , Austin M , Tardif M. Vouchers versus prizes: contingency management treatment of substance abusers in community settings . J Consult Clin Psychol . 2005 ; 73 ( 6 ): 1005 . OpenUrl CrossRef PubMed Web of Science 14. Petry NM , Alessi SM , Ledgerwood DM . A randomized trial of contingency management delivered by community therapists . J Consult Clin Psychol . 2012 ; 80 ( 2 ): 286 . OpenUrl CrossRef PubMed 15. ↵ Petry NM , Alessi SM , Ledgerwood DM . Contingency management delivered by community therapists in outpatient settings . Drug Alcohol Depend . 2012 ; 122 ( 1–2 ): 86 – 92 . OpenUrl PubMed 16. ↵ Petry NM , Martin B , Cooney JL , Kranzler HR . Give them prizes and they will come: Contingency management for treatment of alcohol dependence . J Consult Clin Psychol . 2000 ; 68 ( 2 ): 250 . OpenUrl CrossRef PubMed Web of Science 17. ↵ Stitzer ML , Bigelow GE , Liebson IA , Hawthorne JW . Contingent reinforcement for benzodiazepine□free urines: Evaluation of a drug abuse treatment intervention . J Appl Behav Anal . 1982 ; 15 ( 4 ): 493 – 503 . OpenUrl CrossRef PubMed Web of Science 18. ↵ Ghitza UE , Epstein DH , Preston KL . Contingency management reduces injection-related HIV risk behaviors in heroin and cocaine using outpatients . Addict Behav . 2008 ; 33 ( 4 ): 593 – 604 . OpenUrl PubMed 19. ↵ Roll JM , Petry NM , Stitzer ML , Brecht ML , Peirce JM , McCann MJ , et al. Contingency management for the treatment of methamphetamine use disorders . Am J Psychiatry . 2006 ; 163 ( 11 ): 1993 – 9 . OpenUrl CrossRef PubMed Web of Science 20. ↵ Budney AJ , Stanger C , Tilford JM , Scherer EB , Brown PC , Li Z , et al. Computer-assisted behavioral therapy and contingency management for cannabis use disorder . Psychol Addict Behav . 2015 ; 29 ( 3 ): 501 . OpenUrl 21. ↵ Campbell AN , Nunes EV , Matthews AG , Stitzer M , Miele GM , Polsky D , et al. Internet-delivered treatment for substance abuse: a multisite randomized controlled trial . Am J Psychiatry . 2014 ; 171 ( 6 ): 683 – 90 . OpenUrl CrossRef PubMed 22. ↵ Silverman K , Wong CJ , Higgins ST , Brooner RK , Montoya ID , Contoreggi C , et al. Increasing opiate abstinence through voucher-based reinforcement therapy . Drug Alcohol Depend . 1996 ; 41 ( 2 ): 157 – 65 . OpenUrl CrossRef PubMed Web of Science 23. ↵ Bickel WK , Amass L , Higgins ST , Badger GJ , Esch RA . Effects of adding behavioral treatment to opioid detoxification with buprenorphine . J Consult Clin Psychol . 1997 ; 65 ( 5 ): 803 . OpenUrl CrossRef PubMed Web of Science 24. ↵ Higgins ST , Washio Y , Heil SH , Solomon LJ , Gaalema DE , Higgins TM , et al. Financial incentives for smoking cessation among pregnant and newly postpartum women . Prev Med . 2012 ; 55 : S33 – 40 . OpenUrl CrossRef PubMed 25. ↵ Tappin D , Bauld L , Purves D , Boyd K , Sinclair L , MacAskill S , et al. Financial incentives for smoking cessation in pregnancy: randomised controlled trial . Bmj . 2015 ; 350 . 26. ↵ Stanger C , Budney AJ , Kamon JL , Thostensen J. A randomized trial of contingency management for adolescent marijuana abuse and dependence . Drug Alcohol Depend . 2009 ; 105 ( 3 ): 240 – 7 . OpenUrl CrossRef PubMed Web of Science 27. ↵ Higgins ST , Delaney DD , Budney AJ , Bickel WK , Hughes JR , Foerg F , et al. A behavioral approach to achieving initial cocaine abstinence . Am J Psychiatry . 1991 ; 148 ( 9 ): 1218 – 24 . OpenUrl CrossRef PubMed Web of Science 28. Higgins ST , Budney AJ , Bickel WK , Foerg FE , Donham R , Badger GJ . Incentives improve outcome in outpatient behavioral treatment of cocaine dependence . Arch Gen Psychiatry . 1994 ; 51 ( 7 ): 568 – 76 . OpenUrl CrossRef PubMed Web of Science 29. ↵ Higgins ST , Budney AJ , Bickel WK , Hughes JR , Foerg F , Badger G. Achieving cocaine abstinence with a behavioral approach . Am J Psychiatry . 1993 ; 150 ( 5 ): 763 – 9 . OpenUrl CrossRef PubMed Web of Science 30. ↵ Petry NM , Martin B. Low-cost contingency management for treating cocaine-and opioid-abusing methadone patients . J Consult Clin Psychol . 2002 ; 70 ( 2 ): 398 . OpenUrl CrossRef PubMed Web of Science 31. ↵ Petry NM , Martin B , Finocche C. Contingency management in group treatment: A demonstration project in an HIV drop-in center . J Subst Abuse Treat . 2001 ; 21 ( 2 ): 89 – 96 . OpenUrl CrossRef PubMed 32. ↵ Lussier JP , Heil SH , Mongeon JA , Badger GJ , Higgins ST . A meta□analysis of voucher□based reinforcement therapy for substance use disorders . Addiction . 2006 ; 101 ( 2 ): 192 – 203 . OpenUrl CrossRef PubMed Web of Science 33. ↵ Petry NM , Peirce JM , Stitzer ML , Blaine J , Roll JM , Cohen A , et al. Effect of prize-based incentives on outcomes in stimulant abusers in outpatient psychosocial treatment programs: a national drug abuse treatment clinical trials network study . Arch Gen Psychiatry . 2005 ; 62 ( 10 ): 1148 – 56 . OpenUrl CrossRef PubMed Web of Science 34. ↵ Benishek LA , Dugosh KL , Kirby KC , Matejkowski J , Clements NT , Seymour BL , et al. Prize□based contingency management for the treatment of substance abusers: A meta□analysis . Addiction . 2014 ; 109 ( 9 ): 1426 – 36 . OpenUrl 35. Prendergast M , Podus D , Finney J , Greenwell L , Roll J. Contingency management for treatment of substance use disorders: A meta□analysis . Addiction . 2006 ; 101 ( 11 ): 1546 – 60 . OpenUrl CrossRef PubMed Web of Science 36. Bentzley BS , Han SS , Neuner S , Humphreys K , Kampman KM , Halpern CH . Comparison of treatments for cocaine use disorder among adults: a systematic review and meta-analysis . JAMA Netw Open . 2021 ; 4 ( 5 ): e218049 – e218049 . OpenUrl 37. Bolívar HA , Klemperer EM , Coleman SR , DeSarno M , Skelly JM , Higgins ST . Contingency management for patients receiving medication for opioid use disorder: a systematic review and meta-analysis . JAMA Psychiatry . 2021 ; 78 ( 10 ): 1092 – 102 . OpenUrl 38. Regier PS , Redish AD . Contingency management and deliberative decision-making processes . Front Psychiatry . 2015 ; 6 : 76 . OpenUrl 39. ↵ Petry NM . Contingency management for substance abuse treatment: A guide to implementing this evidence-based practice . Routledge ; 2013 . 40. ↵ Kirby KC , Benishek LA , Dugosh KL , Kerwin ME . Substance abuse treatment providers’ beliefs and objections regarding contingency management: Implications for dissemination . Drug Alcohol Depend . 2006 ; 85 ( 1 ): 19 – 27 . OpenUrl CrossRef PubMed Web of Science 41. ↵ Becker SJ , Kelly LM , Kang AW , Escobar KI , Squires DD . Factors associated with contingency management adoption among opioid treatment providers receiving a comprehensive implementation strategy . Subst Abuse . 2019 ; 40 ( 1 ): 56 – 60 . OpenUrl 42. ↵ Glass JE , Nunes EV , Bradley KA . Contingency management: a highly effective treatment for substance use disorders and the legal barriers that stand in its way . Health Aff Forefr . 2020 ; 43. ↵ Petry NM , Bohn MJ . Fishbowls and candy bars: using low-cost incentives to increase treatment retention . Sci Pract Perspect . 2003 ; 2 ( 1 ): 55 . OpenUrl PubMed 44. ↵ Olmstead TA , Sindelar JL , Petry NM . Clinic variation in the cost-effectiveness of contingency management . Am J Addict . 2007 ; 16 ( 6 ): 457 – 60 . OpenUrl PubMed 45. Halpern SD , French B , Small DS , Saulsgiver K , Harhay MO , Audrain-McGovern J , et al. Randomized trial of four financial-incentive programs for smoking cessation . N Engl J Med . 2015 ; 372 ( 22 ): 2108 – 17 . OpenUrl CrossRef PubMed 46. ↵ Sindelar J , Elbel B , Petry NM . What do we get for our money? Cost □ effectiveness of adding contingency management . Addiction . 2007 ; 102 ( 2 ): 309 – 16 . OpenUrl CrossRef PubMed Web of Science 47. ↵ Tsiatis AA , Davidian M , Holloway ST , Laber EB . Dynamic treatment regimes: Statistical methods for precision medicine . CRC press ; 2019 . 48. ↵ Murphy SA . Optimal dynamic treatment regimes . J R Stat Soc Ser B Stat Methodol . 2003 ; 65 ( 2 ): 331 – 55 . OpenUrl 49. ↵ Liao P , Klasnja P , Murphy S. Off-policy estimation of long-term average outcomes with applications to mobile health . J Am Stat Assoc . 2021 ; 116 ( 533 ): 382 – 91 . OpenUrl 50. ↵ Peirce JM , Petry NM , Stitzer ML , Blaine J , Kellogg S , Satterfield F , et al. Effects of lower-cost incentives on stimulant abstinence in methadone maintenance treatment: A National Drug Abuse Treatment Clinical Trials Network study . Arch Gen Psychiatry . 2006 ; 63 ( 2 ): 201 – 8 . OpenUrl CrossRef PubMed Web of Science 51. ↵ Chakraborty B , Murphy SA . Dynamic treatment regimes . Annu Rev Stat Its Appl . 2014 ; 1 : 447 – 64 . OpenUrl 52. Lavori PW , Dawson R. A design for testing clinical strategies: biased adaptive within □ subject randomization . J R Stat Soc Ser A Stat Soc . 2000 ; 163 ( 1 ): 29 – 38 . OpenUrl 53. ↵ Liu Y , Wang Y , Kosorok MR , Zhao Y , Zeng D. Augmented outcome □ weighted learning for estimating optimal dynamic treatment regimens . Stat Med . 2018 ; 37 ( 26 ): 3776 – 88 . OpenUrl 54. ↵ Sutton RS , Barto AG . Reinforcement learning: An introduction . MIT press ; 2018 . 55. ↵ Precup D. Eligibility traces for off-policy policy evaluation . Comput Sci Dep Fac Publ Ser . 2000 ; 80 . 56. ↵ Winhusen T , Kropp F , Babcock D , Hague D , Erickson SJ , Renz C , et al. Motivational enhancement therapy to improve treatment utilization and outcome in pregnant substance users . J Subst Abuse Treat . 2008 ; 35 ( 2 ): 161 – 73 . OpenUrl PubMed 57. ↵ Luo SX , Feaster DJ , Liu Y , Balise RR , Hu MC , Bouzoubaa L , et al. Individual-level risk prediction of return to use during opioid use disorder treatment . JAMA Psychiatry . 2023 ; View the discussion thread. Back to top Previous Next Posted March 29, 2024. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Optimizing Contingency Management with Reinforcement Learning Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Optimizing Contingency Management with Reinforcement Learning Young-geun Kim , Laura Brandt , Ken Cheung , Edward V. Nunes , John Roll , Sean X. Luo , Ying Liu medRxiv 2024.03.28.24305031; doi: https://doi.org/10.1101/2024.03.28.24305031 Share This Article: Copy Citation Tools Optimizing Contingency Management with Reinforcement Learning Young-geun Kim , Laura Brandt , Ken Cheung , Edward V. Nunes , John Roll , Sean X. Luo , Ying Liu medRxiv 2024.03.28.24305031; doi: https://doi.org/10.1101/2024.03.28.24305031 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Psychiatry and Clinical Psychology Subject Areas All Articles Addiction Medicine (574) Allergy and Immunology (865) Anesthesia (304) Cardiovascular Medicine (4460) Dentistry and Oral Medicine (445) Dermatology (383) Emergency Medicine (611) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1517) Epidemiology (15251) Forensic Medicine (31) Gastroenterology (1132) Genetic and Genomic Medicine (6621) Geriatric Medicine (669) Health Economics (1002) Health Informatics (4564) Health Policy (1372) Health Systems and Quality Improvement (1617) Hematology (544) HIV/AIDS (1272) Infectious Diseases (except HIV/AIDS) (15938) Intensive Care and Critical Care Medicine (1107) Medical Education (624) Medical Ethics (147) Nephrology (670) Neurology (6642) Nursing (346) Nutrition (1001) Obstetrics and Gynecology (1148) Occupational and Environmental Health (957) Oncology (3350) Ophthalmology (981) Orthopedics (369) Otolaryngology (421) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1698) Pharmacology and Therapeutics (694) Primary Care Research (714) Psychiatry and Clinical Psychology (5464) Public and Global Health (9259) Radiology and Imaging (2212) Rehabilitation Medicine and Physical Therapy (1372) Respiratory Medicine (1198) Rheumatology (598) Sexual and Reproductive Health (716) Sports Medicine (533) Surgery (715) Toxicology (100) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a0393e31fe4552ad',t:'MTc4MDA5Njk2NQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2024) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-23T02:00:01.238055+00:00

License: CC-BY-NC-ND-4.0