Context-Dependent Interaction Between Goal-Directed and Habitual Control Under Time Pressure

doi:10.1101/2024.09.28.615575

Context-Dependent Interaction Between Goal-Directed and Habitual Control Under Time Pressure

2024 · doi:10.1101/2024.09.28.615575

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 64,466 characters · extracted from preprint-html · click to expand

Context-Dependent Interaction Between Goal-Directed and Habitual Control Under Time Pressure | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Context-Dependent Interaction Between Goal-Directed and Habitual Control Under Time Pressure View ORCID Profile Sascha Frölich , View ORCID Profile Ben J. Wagner , View ORCID Profile Michael N. Smolka , View ORCID Profile Stefan J. Kiebel doi: https://doi.org/10.1101/2024.09.28.615575 Sascha Frölich 1 Department of Psychology, Technische Universität Dresden , Dresden, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sascha Frölich For correspondence: sascha.froelich{at}tu-dresden.de Ben J. Wagner 1 Department of Psychology, Technische Universität Dresden , Dresden, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ben J. Wagner Michael N. Smolka 2 Department of Psychiatry and Psychotherapy, Technische Universität Dresden , Dresden, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Michael N. Smolka Stefan J. Kiebel 1 Department of Psychology, Technische Universität Dresden , Dresden, Germany 3 Centre for Tactile Internet with Human-in-the-Loop Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Stefan J. Kiebel Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Habits are an important aspect of human behaviour. Habits are reflexive, inflexible, and fast, in contrast to goal-directed behaviour which is reflective, flexible, and slow. Current theories assume that habits and goal-directed actions are controlled by two separate but interacting systems. However, it is not clear how these two systems interact when actions must be made under time pressure. Here we use a task which induces habitual behaviour in the form of action sequences, while concurrently requiring participants to perform goal-directed actions that are either congruent or incongruent with the habit. This task thus allows for concurrent measurement of both goal-directed and habitual behaviour, thereby permitting a nuanced analysis of the interaction between these two control modes. Using computational modelling, we find that models where the influence of the habit depends on the number of repetitions, explain participant behaviour better than models that assume the habit to be constant. We further show that roughly half of the participants modulate their use of the habit depending on the context, i.e. they selectively inhibit the habit’s influence when it is incongruent to their explicit goals, but not when both are congruent and the influence of the habit is adaptive. Additional drift-diffusion modelling of choice and reaction time data shows that proactive control is mobilized in the congruent task context whereas reactive control is mobilized in the incongruent task context. The present study thus indicates that habitual control is context-dependent and can be adaptively deployed via proactive and reactive control, rather than being a fixed or isolated mechanism. I. I ntroduction Habits are an essential aspect of human behaviour. Some studies estimate that a third to one half of human behaviour is habitual ( Wood et al., 2002 ). Habits are fast and reflexive, and allow us to efficiently select actions that are repeated often within the same context ( Wood and Rünger, 2016 ). Because of their significance and influence on behaviour, habits have been an active field of research in psychology and neuroscience for decades. Moreover, maladaptive habits represent pivotal mechanisms underlying a range of mental disorders, including obsessive-compulsive disorder and addiction ( Everitt and Robbins, 2005 ; Everitt et al., 2016 ). One hallmark of habitual behaviour is that it is faster than goal-directed behaviour ( Keramati et al., 2011 ). Hardwick et al. (2019) show that habitual behaviour dominates action selection when actions have to be performed fast, and is replaced by goal-directed behaviour when enough time is available ( Hardwick et al., 2019 ). Classical habit theory further posits habits as stimulus-response (S-R) associations that are initially executed as part of a stimulus-response-outcome (S-R-O) chain ( Dolan and Dayan, 2013 ; Bouton, 2019 ). In this view, an agent first tries to obtain a given outcome (O) by calculating and performing a goal-directed response (R) in presence of a given stimulus (S). With extensive repetition and reinforcement, the S-R associations become strengthened, until the stimulus alone triggers the habitual response ( Yin and Knowlton, 2006 ). While S-R learning is able to explain many experimental findings, it has also become clear that acquisition and expression of habits strongly depend on the given context ( Bouton, 2019 ; Wood and Rünger, 2016 ). In computational approaches to behaviour, habitual and goal-directed control are usually modeled as two distinct controllers ( Daw et al., 2005 , 2011 ; Lee et al., 2014 ; Miller et al., 2019 ; Keramati et al., 2016 ; Pezzulo et al., 2013 ). This seems in line with neuroscientific evidence showing that habitual and goal-directed behaviour are subserved by distinct corticostriatal connections ( Yin and Knowlton, 2006 ; Dolan and Dayan, 2013 ). Some of these computational approaches assume the output of the two controllers to be mixed or arbitrated during behavioural selection ( Daw et al., 2005 , 2011; Lee et al., 2014 ; Miller et al., 2019 ). In a mixing model, it is assumed that both the habitual and goal-directed controller perform their computations in parallel. This would mean that the goal-directed controller always performs its computations which makes the speed of habit execution, relative to goal-directed control, would be hard to explain. Other studies have described different types of interactions between goal-directed and habitual control ( Keramati et al., 2016 ; Pezzulo et al., 2013 ; Balleine and Dezfouli, 2019 ; Dezfouli and Balleine, 2012 ; Cushman and Morris, 2015 ). For example, in ( Dezfouli and Balleine, 2012 ), habits are modelled as action sequences, where the goal-directed controller selects an individual action, or a habitual action sequence. Congruent with experimental findings, the execution of the action sequence is characterised by fast and inflexible performance of the individual constituent actions. Another option for computational modelling, which has so far received far less attention, is to harness the experimentally established context-specificity of habits ( Bouton, 2019 ; Wood and Rünger, 2016 ). Following the modelling approach of Schwöbel et al. (2021) , the key idea is to make the interaction between the goal-directed controller and the habitual controller context-specific. In such a framework, an agent infers the current context to apply a previously learned context-specific arbitration between goal-directed and habitual behaviour. For example, when in the context of standing in the bathroom in the morning, the brain infers the context of ’I just got up, walked into the bathroom and need to prepare for the day’ and starts the habit stored in that context. Later during the day, when in the bathroom again at a different time, the brain infers a different context and brushing one’s teeth is not a habit but just one possible option. Such a simple but rapid context-specific arbitration moves the heavy lifting of the underlying computations from an arbitration between controllers to the inference of context. This may explain how the brain can minimize the computations required for the assumed arbitration between goal-directed and habitual control, especially under demanding response time deadlines ( Schwöbel et al., 2024 ). In the present study, we aimed to test for a context-specific and rapid interaction of habitual and goal-directed behaviour. To do this, we used a revised and refined variant of the Action Sequence Task (AST), which we have used previously ( Frölich et al., 2023 ), and which is built on the serial reaction time task ( Robertson, 2007 ; Lewicki et al., 1988 ; Nissen and Bullemer, 1987 ). In the AST, participants extensively repeat an action sequence under time pressure, since participants are instructed to respond within a tight deadline of a few hundred milliseconds (e.g., 600ms). Through repetition, and probably due to the need for fast responses, these action sequences become automatic quickly ( Nissen and Bullemer, 1987 ; Frölich et al., 2023 ). Sequences are executed mostly without explicit awareness of a sequential pattern, and can be considered habitual. Crucially, in the AST, common trials in which the execution of the learned action sequence was beneficial were sometimes followed by similarly appearing trials which required a goal-directed response. We implemented two different contexts of such trials. In the congruent context, the learned habitual action was the same as the goal-directed response (goal-directed action is congruent with habitual action), while in the incongruent trial context, the habitual action was different from the goal-directed response (goal-directed action is incongruent with habitual action). These two contexts enabled us to test whether participants learned to modify the strength of the habitual action in a context-specific fashion. To investigate the role of repetition on choice behaviour, we modelled choice data by comparing two model families. The first model family explained choices by the influence of a reward-learning goal-directed controller, and a value-free repetition-learning based habitual controller. The goal-directed controller learned Q-values values of actions using temporal-difference learning based on received reward similar to ( Miller et al., 2019 ). The habitual controller learned a repetition-value for each action based on action sequences a participant has performed in the past, building on the concept of habits as action sequences ( Dezfouli and Balleine, 2012 ). In the second model family, behaviour was purely based on reward-learning using temporal-difference learning of Q-values. Within each model family, we further differentiated between models where the influence of the habit is equally strong in all trials that required a goal-directed response, and where that influence is modulated depending on the congruency of the habit with the participants’ explicit task goals (that is, dependent on the context). We further used drift-diffusion modelling (DDM) of both choice and reaction time data in addition to modelling only choice behaviour ( Pedersen et al., 2017 ; Wagner et al., 2022 ). We used DDM to investigate whether the learned habitual action sequences impact participants’ prior response tendencies (proactive control), or whether they affect participants’ choices at the level of information processing, that is, evidence accumulation (reactive control). Results from previous studies have been mixed as to whether prior information, before entering a trial, mainly affects response bias ( van Ravenzwaaij et al., 2012 ; Mulder et al., 2012 ), or whether the influence of prior information modulates both response bias and drift rate, depending on the task at hand ( Hanks et al., 2011 ). Recently, Zhang et al. (2024) proposed a sequential sampling model in which habitual response tendencies map onto the response bias, whereas goal-directed responses influence the drift rate. Here, we test whether learned habitual action sequences influence prior response tendencies by modulating the DDM’s starting point bias, or whether they influence the decision-making process itself by modulating evidence accumulation. II. M ethods i. Participants We recruited participants on the online platform prolific.com . Exclusion criteria for study registration were age (below 25 or above 60 years), current behavioural or pharmacological treatment of a mental disorder, regular intake of psychotropic medication, impairments that make the use of a computer keyboard hard or impossible, and substance use disorders (except tobacco). Participants performed the experiment online and did a criterion between instructions and main experiment, excluding ca. 14% of participants (see supplementary material for details). The high fail rate is likely in part due to the online nature of the experiment. Of 81 participants who completed the experiment, 16 were excluded for experimental reasons (see supplementary material), resulting in 65 eligible participants. In order to have the same number of participants in each counterbalancing group, data analysis was performed with a total of 60 participants. Of the 60 participants, 29 were male and 31 female. Average age was 37.9 ± 9.0 years (min 25 years, max 58 years). ii. Task We used a revised and refined version of the Action-Sequences Task ( Frölich et al. (2023) ; Fig. 1 ; see also supplementary material). Participants saw one or two stimuli appear in four possible locations on the computer screen ( Fig. 1 A ). Each position was mapped to one of four keys on the computer keyboard (s,x,k, and m). In each trial, participants had 600 ms to respond to the appearance of one (single-target trial, STT) or two stimuli (dual-target trials), with a single press of the associated key. In case of a DTT, participants could choose which one of the two mapped keys to press. If participants responded correctly in time, they were probabilistically rewarded with a point. In case participants won a point, a green smiley appeared in the center of the screen. If they did not win a point, a red frowney was presented instead. In the case of errors, i.e. timeouts or double key presses, a penalty screen appeared for 1200 ms with a corresponding text (either “Too slow”, or “Please only press one key at a time”). The experiment had two different conditions, a repeating-sequence (Rep) condition, and a random-sequence condition (Rand), each consisting of 480 trials (408 single-target trials and 72 dual-target trials) ( Fig. 1 B ). In the Rep condition, stimuli were presented as a repeating sequence of twelve stimulus positions. In the Rand condition, stimuli appeared pseudo-randomly and did not follow a repeating sequence. In case of a DTT, participants had to choose which of the two stimuli to respond to, but were instructed to try to choose the one with the higher reward probability ( Fig. 1 C ). During instruction, participants were explicitly told which two locations had the highest reward probabilities, but were not told the exact values (80% and 20%, respectively). Each DTT contained one stimulus with low reward probability, and one stimulus with high reward probability. Crucially, in some DTTs of the Rep condition, the explicit instruction of choosing the higher reward probability conflicted with the repeating stimulus sequence. Specifically, a DTT of the Rep condition was considered congruent if the sequence element coincided with the location of the higher reward probability. Conversely, it was considered incongruent if the sequence element was in the location with lower reward probability. Participants were not told about the existence of the Rep and the Rand conditions, and were instructed to collect as many points as possible. They were told that their monetary reward at the end of the experiment depended on the total number of collected points. The experiment was performed over the course of two consecutive days, and participants had to perform both parts of the experiments around the same time of day (within the same four-hour time window). See the supplementary methods for more details, including a description of changes of the task since ( Frölich et al., 2023 ). Download figure Open in new tab Figure 1: Experimental design. ( A ) In each trial, participants saw one or two green target stimuli placed in four different boxes. Participants had to press one of four corresponding keys on the keyboard in response to the target position(s), within a deadline of 600 ms after stimulus onset. When pressing the correct key, a point reward was indicated probabilistically. If the trial was rewarded, a smiley was shown in the centre of the screen. In the case of no reward, a frowney appeared instead. This feedback phase lasted 400 ms. After the feedback, the intertrial interval (ITI) lasted 100 ms. ( B ) There were two experimental conditions: In the repeating-sequence condition (Rep), stimuli kept repeating the same twelve-item sequence (here purple for illustration), while in the random-sequence condition (Rand), targets appeared pseudo-randomly. The experiment was performed over the course of two consecutive days with alternating sequential and random blocks, six blocks on day 1 and eight blocks on day 2. Each block consisted of 480 trials. Block order was counterbalanced (see supplementary material). ( C ) Two of the four stimulus positions were associated with a high reward probability of 0.8. The other two stimuli were associated with a low reward probability of 0.2. reward probabilities did not change over the course of the experiment and the positions of high and low reward probability were counterbalanced (see supplementary material). dual-target trials (two green targets) appeared pseudo-randomly, with a frequency of 15% of all trials, i.e. 72 dual-target trials per block. In the repeating-sequence (Rep) condition, one of the two targets always corresponded to the current sequence element (here purple for illustration). In the repeating-sequence condition, dual-target trials (DTT) were either congruent if the sequence element was in a location with high reward probability (7.5% of all trials), or incongruent if the sequence element was in a location with low reward probability (7.5% of all trials). iii. Computational modelling iii.1 Generative modelling of choice behaviour Computational modelling allows us to test hypothesised cognitive mechanisms involved in response selection, and measure the strength of those effects by making use of within-subject response variances. Using computational modelling, we modelled responses in DTTs to test whether responses in dual-target trials (DTT) are influenced by the action sequence in the preceding single trial targets (STT), and whether there is evidence for a dynamical adaptation of habitual responding, depending on the type of DTT (i.e., congruent or incongruent context, see Glossary). We compared six different models of two distinct model families. We implemented the models with hierarchical stochastic variational inference in pyro 1.8.6 with python 3.11.6, followed by formal model comparison. We compared models of two model families. Model Family 1 consists of models with two distinct action controllers, a temporal-difference learning-based goal-directed action controller, and a repetition-based habit controller. Models in Model Family 2 were modelled analogously to the models in Model Family 1, but do not contain a repetition-based habitual controller. Instead, their behaviour is controlled by temporal-difference learning of Q-values, weighting the Q-values differently depending on the type of dual-target trial. View this table: View inline View popup Download powerpoint Table 1: Glossary of terms. Model Family 1: Models with Goal-Directed and Habitual Action Controllers Model Family 1 modelled choice behaviour as the result of the interaction between a goal-directed behavioural controller and a repetition-based habitual controller. The goal-directed controller was modelled as a simple temporal-difference learning algorithm for Q-value learning, Q t +1 = Q t + α ( O t − Q t ),. Here O t is the outcome (0 or 1) at trial t , and α a free learning rate parameter. One Q-value was learned for each of the four possible stimulus positions. The habitual controller was modelled based on performed action sequences. Each action a in addition to a goal-directed Q-value was associated with a repetition-bias R ( a ). This value was computed as the ratio of the number of times action sequence a t− 2 , a t − 1 , a t = a was performed since the beginning of the experiment, to the number of all performed action sequences beginning with a t− 2 , a t− 1 : where i, j∈ { 1, 2, 3, 4 }. All actions in STTs and DTTs were considered for the computation of the repetition-bias. The counts of the previously performed action sequences #( a t− 3 , a t − 2 , a t − 1 , a t ) were tallied separately for the Rand and the Rep condition. Model 1 (3 parameters) Simple repetition-bias model. This model computes response options based on learned Q-values and repetition-bias values R ( a ) and assumes no qualitative difference between congruent and incongruent DTTs. The probability of a goal-directed response is modeled as a sigmoid choice rule: θ Q and θ Rep are free model parameters. We call the response option with higher reward probability the optimal response ( a opt ), and the one with low reward-probability the suboptimal response option ( a subopt ). Note that, Q-values for a opt do not always have to be higher than those for a subopt . After learning however, Q ( a opt ) > Q ( a subopt ), and thus Δ Q > 0. In congruent DTTs, a opt corresponds to the repeating action sequence, while in incongruent DTTs, a subopt corresponds to the repeating action sequence. After learning, the difference between the response-bias values for the two actions, Δ R , is positive in the case of congruent DTTs, and negative in incongruent DTTs and thus intrinsically differentiates between congruent and incongruent contexts. θ Q and θ Rep are fitted across the Rep and the Rand conditions (see Fig. 1 b ). Note that all model parameters were fitted separately for days 1 and 2. Model 2 (4 free parameters) Repetition-bias model with context-switch I. This model is an extension of the simple repetition-bias model (model 1, Eq. 4 ) and formalizes the assumption that a conflict between goal-directed and habitual controller modulates the influence of the habitual action sequence, using an additional term that allows for a dynamic adaptation of the repetition-bias, depending on the DTT context (congruent or incongruent): where η Inc indicates an inferred incongruent context in a DTT. We approximate this inference by setting η Inc only to 1 if Δ R and Δ Q (see Eqs. 5 and 4 ) are of opposite sign, and otherwise to 0. Hence, the influence of the habitual action sequence on choice behaviour is θ Rep Δ R in congruent DTTs, and can be modulated as ( θ Rep − ηθ Inc )Δ R in incongruent DTTs. Model 3 (4 free parameters) Repetition-bias model with context-switch II. The previous model assumes a constant change of the effect of the habitual action sequence in case of an incongruent DTT, namely a reduction of θ Rep by θ Inc ( Eq. 6 ). However, it is reasonable to assume that conflict varies over trials. To implement this, we assumed that if at least one of the two terms Δ Q and Δ R is small, the conflict between goal-directed controller and habitual controller will be small as well. If the reduction of the effect of the habitual action sequence is triggered by a conflict between the goal-directed and the habitual controllers, this conflict is likely large when both |Δ Q | and |Δ R | are large, but small when one of them is almost zero, since in such a case the controller with the small value has no strong preference for one response option over the other. We can account for such a dynamic with the switch-term term min{|Δ R |, |Δ Q |}: Again, η Inc is only 1 in inferred incongruent DTTs, see the description of the Context-Switch model above. Model Family 2: Pure Q-Value-Learning Models To test for the presence of the repetition-bias R ( a ), we fitted models that are analogous to the models in Model Family 1, but without the mechanisms based on repetition-bias R ( a ). These models predict responses purely by weighting the learned Q-values depending on the current DTT. Model 4 (3 free parameters) Q-weighting model with identical effect strengths for congruent and incongruent trials. This model learns θ Q like the simple repetition-bias model (model 1). Instead of a repetition-bias weighted by θ Rep , this model learns an additional term θ Qdiff Δ Q , which is added in a congruent DTT and subtracted in an incongruent DTT. Conceptually, θ Q Δ Q should thus encode the effect of the goal-directed controller, while θ Qdi f f Δ Q encodes the supporting (congruent), or hindering (incongruent) influence of the habitual action sequence in goal-directed responding: Since this model is not endowed with a repetitionbias R ( a ), we have to directly provide it with knowledge of the DTT type ( η ): η is 1 in congruent DTTs, − 1 in incongruent DTTs, and 0 in the Rand condition. Model 5 (4 free parameters) Q-weighting model with context-switch. This model is similar to model 4, but with a qualitative distinction between congruent and incongruent context. In model 4, the deviation of goal-directed responding from random DTTs is of size θ Qdi f f ( θ Qdi f f Δ Q ), albeit with opposite directions for congruent and incongruent DTTs. Model 5 allows for different sizes of those deviations for congruent and for incongruent DTTs, which makes it analogous to models 2 and 3: Here θ Cong encodes the difference in optimal responding between the congruent context and random DTTs, and θ Inc the modulation in the incongruent context. Similar to Model 4, to enable the model to differentiate between the different DTT types, we set η Rep to 1 in DTTs of the Rep condition and η Inc to 1 only in incongruent DTTs. Model 6 (4 parameters) Q-weighting model with independent weights for all DTT types. As in Model 5, this model discriminates between all three DTT types, and predicts responses by weighting Q-value differences with a different parameter for each DTT type: θ Q , i is one of three different model parameters θ Q , depending on the DTT type, where information about the DTT type is again explicitly given to the model. Theoretically, of all other models, this model should be able to best track the mean value of optimal responses for all DTT types. iii.2 Drift-Diffusion Modelling The previous models with softmax-decision rules for choice behaviour showed that existence of a repetition-bias of the habitual controller, and that around half of participants attenuated the influence of the habitual controller in the incongruent context compared to the congruent context. Those models give however no insight into the cognitive processes that underlie habitual responding in the congruent context and response inhibition in the incongruent context. For example, attenuated habitual responding in the incongruent context could be the result of a reduced prior response tendency for the habitual response, or of a cognitive process that involves information processing, like context inference. Furthermore, the differences in reaction times in congruent, incongruent, and random DTTs ( Fig. 3 ) suggest that in each of those trial types, slightly different processes are at play. We therefore modeled choice data and reaction times jointly in a drift-diffusion model with collapsing decision boundaries. In contrast to the six models above with softmax choice-rule, the DDM can break down choice behaviour into different cognitive processes, including evidence accumulation and prior response tendency ( Forstmann et al., 2016 ; Peters and D’Esposito, 2020 ) (see Fig. 2 for an illustration of a DDM with collapsing boundaries). We examined whether the influence of the habitual action sequence on behaviour manifests at the level of evidence accumulation (drift rate) or whether habitual responses only affect the response bias (starting point). In the first case, the habit influences the choice process itself, while in the second case, it only impacts the prior response tendency. Implementation was done using the HSSM toolbox in python (Fengler et al., prep). Download figure Open in new tab Figure 2: illustration of a DDM with collapsing decision boundaries. a Boundary separation, z starting point, τ non-decision time, v drift rate, θ boundary angle, RT Reaction Time. The upper and lower response boundaries represent the two possible response options in a forced alternative-choice task (like a DTT). The model assumes that a response for the upper (lower) response option is made as soon as the evidence accumulation process, which is formalised as a random walk, reaches the corresponding response boundary. Red illustrates one possible evidence accumulation trajectory. Download figure Open in new tab Figure 3: Evidence for reward contingency learning and habitual motor sequence. A In single-target trials (STT) of the repeating-sequence condition (Rep), participants displayed shorter reaction times than in the random (Rand) condition. B In STTs of the Rep condition, participants performed less errors than in the Rand condition. C Proportion of optimal responses for dual-target trial (DTT) types random, congruent and incongruent . In all three conditions, participants responded optimally above-chance levels. The significant difference between congruent and incongruent DTTs indicates that participants learned a repetition bias. D Reaction times are different between all three types of DTTs. ***: p < 0.001, **: p < 0.01, *: p < 0.05, error bars indicate standard errors of the mean. Here, we defined the upper response boundary as the optimal response option (80% reward probability), whereas the lower boundary was defined as the suboptimal response option (20% reward probability). We furthermore used an absolute cutoff, such that for each participant, response times below 150ms were excluded from the analysis. Collapsing boundaries simulate time pressure ( Fig. 2 ). The parameter a is the boundary separation (i.e., the amount of evidence required before committing to a decision), τ the non-decision time (i.e., components of the RT not related to evidence accumulation, like motor preparation), z the starting point of the evidence accumulation process (i.e., a bias towards one of the response boundaries, with z > 0.5 reflecting a bias towards the optimal response boundary, and z < 0.5 reflecting a bias towards the suboptimal boundary), and v the rate of evidence accumulation (the drift rate). Given that the standard DDM likelihood has a long tail and is poorly suited for modeling data with strict time constraints, the decision threshold a was allowed to collapse within a trial via the angle parameter θ . iv. Code and Data Availability The experimental paradigm with which data collection was done (in javascript), raw data, and analysis code (in python and R) are available on the open science framework: https://osf.io/67txz/?viewonly=5dec53c3a89043fc954ce8d512d7bcfc III. R esults We first describe results obtained by standard linear inference tools, such as ANOVA, t-tests, and Pearson correlations. In the second part, we describe the results obtained by computational modelling. i. Behavioural Analysis i.1 Optimal Responding and Influence of Action Sequence Behavioural analysis was performed similar to our first study with a previous version of the AST, and results replicate our initially reported findings ( Frölich et al., 2023 ). A paired two-sample t-test between reaction times in STTs of the Rand and Rep conditions revealed a highly significant difference (t(59) = 11.0, p < 0.0001, Cohen’s d= 1.4) ( Fig. 3 A ). Participants responded faster in the Rep condition than in the Rand condition, with an average ΔRT= 10.1 ± 7.0ms. Furthermore, a paired two-sample t-test of error rates between the Rand and Rep conditions also showed a highly significant difference (t(59) = 6.7, p < 0.0001, Cohen’s d= 0.9), with fewer error rates in the Rep condition ( Fig. 3 B ), and an average ΔER= 0.7 ± 0.8%. We tested the relationship between the differences ΔRT and ΔER (which are the differences in reaction times (error rates) of STTs between Rep condition and Rand condition, Fig. 3 A and B ) and the difference in optimal responding in congruent and incongruent DTTs (we denote this difference as C − I ). Since ΔRT and ΔER measure the strength of the habit outside of choice situations, a positive correlation between those measures and the difference of optimal responding between congruent and incongruent DTTs ( C − I ) indicates that they serve as positive measures of habit (as described in ( Frölich et al., 2023 )). Indeed, we find such a significant positive correlation for both ΔRT (Pearson r = 0.62, p < 0.0001), and ΔER (Pearson r = 0.40, p = 0.002) ( Fig. 4 ). Download figure Open in new tab Figure 4: Behavioural differences in single-target trials predict choice differences in dual-target trials. A Reaction-time differences in STTs (ΔRT) correlate with the difference in optimal responding between congruent and incongruent DTTs. B Error rate differences in STTs (ΔER) correlate with the difference in optimal responding between congruent and incongruent DTTs. These results show that reaction-time differences and error-rate differences in STTs induced by the repeating action sequence are predictive of choice behaviour in DTTs. ***: p < 0.001, **: p < 0.01. The results reported here replicate results reported in ( Frölich et al., 2023 ). For each DTT type (congruent, incongruent, random) we computed the proportion of optimal responses (choices of the response option with high reward probability) for each participant as the ratio of high-probability reward responses to all valid responses. A response was counted as valid if participants pressed the key of one (and only one) of the presented stimuli within 600 ms . Average optimal responding across participants and all DTTs was 84.5% ( Fig. 3 C ), showing that participants were able to do the task and generally acted in a goal-directed manner. Note that optimal responding cannot necessarily be interpreted as goal-directed responding in all DTTs: In congruent DTTs, an optimal response might arise from habitual or from goal-directed responding, while in random and incongruent DTTs, optimal responding can be considered equivalent to goal-directed responding. A repeated-measures ANOVA was performed to analyze the effect of DTT type on optimal responding in DTTs ( Fig. 3 C ). The results indicate a highly significant main effect of DTT type (F(2, 118) = 65.2, p < 0.0001, partial η 2 = 0.53). Paired t-tests of logit-transformed optimal response proportions between the different DTT types demonstrate significant differences between all DTT types with most optimal responding in congruent DTTs, followed by random DTTs, and least optimal responding in incongruent DTTs ( Fig. 3 C ). These results show that the underlying action sequence influences choice behaviour, increasing optimal responding in congruent DTTs and reducing it in incongruent DTTs, compared to random DTTs. A repeated-measures ANOVA was next performed to analyze the effect of DTT type on reaction times ( Fig. 3 D ). We found a highly significant main effect of DTT type (F(2, 118) = 109.1, p < 0.0001, partial η 2 = 0.65). Paired two-sample t-tests revealed significant pairwise differences between all DTT types. ii. Modelling Results ii.1 Hierarchical Generative Modelling of Behaviour When using standard linear inference statistical techniques as above, the analysis relies on averages of observed choices. For example, above we interpreted the difference of optimal responding between congruent and incongruent context as evidence that the learned habit has an influence on optimal responding ( Fig. 3 C ). The question then arises whether we can learn more about the underlying interaction between habitual and goal-directed controllers by more careful and fine-grained computational modelling. We used computational modelling to test the assumption that the influence of the habitual action varies throughout the experiment and depending on the type of DTT, since habits strengthen through repetition. Furthermore, since here the habit is an action sequence, the effect of the habit in an DTT should depend on the actions performed lead to that trial, and how often the corresponding sequence of actions was performed in the past ( Schwöbel et al., 2021 ). Lastly, since the repeating action sequence is sometimes congruent and sometimes incongruent with the participants’ goals of responding optimally, we hypothesized that this might lead to different impacts of the habit depending on the context (i.e. congruent or incongruent). We used two concrete ways to model varying habit influence. First, we allowed the strength of the learned habitual action sequence to grow over the course of the experiment by implementing a repetition-bias R ( a ) that whose value for a given action sequence increases the more often the sequence was executed (see iii). Second, we expected participants to try and maximize their performance in a context-specific fashion. We assumed that participants use the habitual action sequence in STTs and congruent DTTs to increase performance, but suppress the influence of the habit in incongruent DTTs. When analyzing average optimal responding in DTTs at group-level ( Fig. 3 C ), we did not see such a suppression of habitual responding. However, this might have been due to averaging effects. More precise computational modelling of choice behaviour allows us to test this hypothesis at a more fine grained participant-level while taking advantage of inter-trial variations of responses, and circumventing the ceiling effect of optimal responding around 100% by using a softmax choice rule. We implemented these two aspects (incremental increase of habitual controller strength, and modulation of habitual responding depending on context) with six different models in two model families. Three different models in Model Family 1 computed a repetition-bias based on the action sequences a participant has actually performed in the past, while Model Family 2 only models mean goal-directedness for individual DTT types. We find that models that compute repetition-biases outperform those without a repetition-bias (average WAIC 43 298 vs. 44 777, WAIC: widely applicable information criterion) ( Fig. 5 ). We further find that the three best-performing models allow for different effect strength of the habit in the congruent and the incongruent context (models 2 (WAIC 43 369 ± 289) and 3 (43 057 ± 289) vs model 1 (WAIC 43 468 ± 290)). However, this effect is weak since the WAICs of models 1 through 3 are all within two standard errors from each other. We therefore also computed Bayes-Factors for individual participants based on the ELBOs (ELBO: evidence lower bound) of the inferred models. Bayes Factor analysis corroborated the findings that models 2 and 3 outperform model 1, indicating substantial evidence (Bayes Factor > 3.2) in favor of model 2 over model 1 for 25 of 60 participants (opposed to 17 participants for model 1), and 37 of 60 participants for model 3 over model 1 (opposed to 11 participants for model 1). Download figure Open in new tab Figure 5: Model Comparison. WAIC across both days. Models with repetition-bias (M1-M3) outperform the models without repetition-bias (M4-M6). Within models M1-M3 Error bars denote standard errors for WAIC. We hypothesized that participants for whom we found evidence that they adapt the use of the habit in a context-specific fashion might be able to do so either because they have more cognitive resources at their disposal, or because they are more motivated to perform well in the task. In the first case, we would expect to see an effect of age, that is, participants for whom model 3 outperforms model 1 in the model comparison should be of lower age than the other participants. In both cases we would expect to see more optimal responding in random DTTs in participants for whom model 3 outperforms model 1. Interestingly, we see no effect of age ( t 58 = − 0.2, p = 0.85), and even an opposite effect of optimal responding, that is, participants for whom model 3 outperforms model 1 show lower θ Q on day 2 ( t 58 = 2.3, p = 0.03, Cohen’s d= 0.6). ii.2 Drift-Diffusion Modelling Model comparison showed that roughly half of participants modulate the influence of the habitual action sequence depending on the task context. This modulation could be achieved by two means: Either participants increase the habit’s influence before entering a congruent DTT (proactive control), or conversely, that they decrease the habit’s influence within an incongruent DTT (reactive control). Proactive control is an option since the habitual action sequence allows participants to anticipate whether the upcoming habitual response will be in a high reward-probability or a low reward-probability position. The second possibility for modulating the habit’s influence depending on the task context is entering each trial unprepared and then deciding upon a choice depending on the context (reactive control). Since the drift-diffusion model allows for the breakdown of a choice process into prior response tendency and evidence accumulation (an umbrella term for all cognitive processes involved in choosing an action), we can test whether participants modulate the influence of the habit before entering a DTT (changes in prior response tendency; proactive control), or whether this modulation occurs once a trial was started (changes in evidence accumulation; reactive control). We fitted a DDM choice-rule in a DDM with collapsing bounds to account for time pressure (<600 ms). Since there is no repeating action sequence in the random condition, we tested the difference in starting point bias and drift rate in both congruent and incongruent DTTs compared to random DTTs ( Fig. 6 ). We present the analysis of the DDM on day 2 in order to avoid the influence of learning effects at the beginning of the task on day 1 (results for day 1 are however qualitatively similar, see supplementary material). We find that the drift-rate in congruent DTTs is only barely significantly increased compared to random DTTs ( µ (Δ v ) = 0.12 ± 0.48, t (59) = 2.0, p = 0.05, Cohen’s d= 0.26), while it is significantly decreased with a large effect size in incongruent DTTs ( µ (Δ v ) = − 0.39 ± 0.42, t (59) = 7.2, p = 0.0002, Cohen’s d= − 0.93). The effect on the starting point bias is reversed: Whereas it is significantly increased for congruent DTTs ( µ (Δ z ) = 0.07 ± 0.06, t (59) = 9.6, p < 0.0001, Cohen’s d= 1.24), the change for incongruent DTTs is not significant ( µ (Δ z ) = 0.009 ± 0.04, t (59) = 1.7, p = 0.09, Cohen’s d= 0.22), compared to random DTTs. These results suggest that congruent and incongruent DTTs are treated differently by participant, again supporting our previous findings that participants adapt their response strategy depending on the DTT context. Since the habitual action sequence can be considered a prior response bias, the results further support the above hypothesis that participants exploit the habitual action sequence in congruent DTTs (starting point bias shifts towards optimal response option). This would imply that participants have learned to exploit their habit proactively in those DTTs where the upcoming sequence element is in a high-reward location. Conversely, in incongruent DTTs, they engage an additional cognitive process (e.g., for interference control). Download figure Open in new tab Figure 6: DDM results of day 2 of the experiment. Distribution of posterior means. A v ( Rand ), Drift Rate in random DTT; B Δ v ( Cong ) & C Δ v ( Inc ): Difference of drift rate in congruent and incongruent DTTs compared to random DTTs. D z ( Rand ), Starting point bias in random DTTs; E Δ z ( Cong ) & F Δ z ( Inc ): Difference of starting point bias in congruent and incongruent DTTs compared to random DTTs. Orange dots denote posterior means for individual participants. A starting point bias of z ( Rand ) = 0.5 means no preference for one of the two responses, with z ( Rand ) > 0.5 indicating a preference for the optimal response option. Here, z ( Rand ) is slightly shifted towards the suboptimal response option. Solid (dashed) lines denote group means (standard deviations). ***: p < 0.001, **: p < 0.01, *: p < 0.05. p-values derived from one-sample t-tests against 0.5 (z (Rand)) or 0 (Δv/ Δz). IV. D iscussion In this study we investigated the interaction between a habitual action sequence and goal-directed behaviour, using a task where habitual and goal-directed actions are either congruent or incongruent with each other. Crucially, we imposed a tight response time limit of 600ms for responding. We showed that computational models assuming a varying repetition-based habit strength outperform models that do not use this assumption. This result indicates that the influence of the learned habit is not constant throughout the experiment but varies depending on the number of performed sequence repetitions. Computational modelling further showed that ca. 60 % of participants (according to model 3) adaptively modulate the influence of the habit on action selection depending on the task context, that is, whether the habit is congruent or incongruent with the goal in dual-target trials (DTT) trials. Further, using drift-diffusion modelling, we found that the habit differentially influences prior response tendencies and evidence accumulation (information processing), depending on the context. i. Habitual Action Sequences Strengthen Through Repetition A well-established computational model of habitual behaviour models goal-directed behaviour as model-based reinforcement learning and habitual behaviour as model-free reinforcement learning ( Daw et al., 2005 , 2011 ). This approach assumes that habits are reward-based, and disregards the possibility that habits might be strengthened by repetition alone ( Miller et al., 2019 ; Schwöbel et al., 2021 ). However, some researchers have proposed that habits are strengthened only through repetition ( Miller et al., 2019 ; Schwöbel et al., 2021 ). Further, Thorndike’s law of exercise proposes that the mere execution of an action increases its probability of being chosen again in the future ( Thorndike, 1911 ). Here, using computational modelling, we find evidence that habits are learned through repetition only, and that the habit strength varies with the number of repetitions performed in the past. In the model, we explicitly described the habit as a sequence. This is in line with the formulation of habits as action sequences ( Dezfouli and Balleine, 2012 ; Schwöbel et al., 2021 ), and with experimental evidence that the basal ganglia, which are heavily involved in habitual behaviour, chunk individual actions into sequences ( Graybiel, 1998 ). For example, ( Dezfouli and Balleine, 2012 ) proposed a theoretical framework for habits as action sequences, where individual actions are chunked into one habitual action sequence. In that framework, action sequences are executed to increase reward per unit time, since habitual action sequences are executed faster than their constituent actions individually. In agreement with that framework, we found that the learned action sequence leads to reduced error rates and reduced reaction times in the repeating-sequence condition, therefore implicitly increasing the reward per unit time in single-target trials. Future research will have to elucidate how reinforcement and sequence repetition combine to create habitual action sequences like the one observed here. ii. Habits Are Modulated Depending on Context Model comparison showed that according to the best model 3, for ca. 60 % of participants the influence of a habit is modulated by context in DTTs, that is, the modulation depends on whether the habit is congruent or incongruent with the participants’ instructed goals. For those participants who show evidence of such a modulation, habit influence in incongruent DTTs is reduced, compared to congruent DTTs. As congruent and incongruent DTTs are rare (15% of all trials) and are interspersed with STTs, this means that the habit influence is modulated adaptively in a trial-specific fashion. It is likely that one dominant factor for limited context-specific responding was the rather demanding response time limit of 600ms. Using drift-diffusion modelling, we further investigated mechanisms that may explain the trial-wise modulation of habit influence. We found a context-specific influence of the learned action sequence on choices. In congruent trials, the congruency between goal and habit increases proactive control, which manifests as a shift of the prior response tendency towards the optimal response. The fact that no prior response tendency towards the optimal response is observed in random DTTs suggests that goal-directed processes only create a prior response tendency when interacting with the habitual response, but are not used to create a prior response tendency on their own. In incongruent trials, the incongruency between goal and habit appears to lead to a reactive control process, which is evidenced by a decreased drift rate. This might be the result of an adaptive inhibition of the habitual response, in favor of increased optimal responding. Such suppression is well known in the field executive function research as interference control ( Diamond, 2013 ). In summary, the present findings show that ca. 60 % of participants modulate their learned habit in a context-specific fashion in order to increase optimal responding under time pressure. This rapid and adaptive modulation allows them to use their habit in a goal-directed manner when it is beneficial to performance, and minimize its impact on performance when acting habitually would be detrimental. In the present case, the context is defined by the congruency between habitual and goal-directed action, which raises the question of the nature of interaction between goal-directed and habitual control. Further research is warranted to investigate whether such context-specific interaction underlies the balancing between habitual and goal-directed control in general. Given that only around 60% of participants were able to adaptively allocate inhibitory control over the habit in the incongruent context, future research should investigate whether such individual differences are related to mental disorders of maladaptive habits, such as addiction. A uthor C ontributions SF, MS, and SK conceptualized experiment and data analysis (except drift-diffusion modelling). SF collected the data and programmed the data analysis (except drift-diffusion model). SF and BW conceptualized data analysis with drift-diffusion model. BW programmed data analysis with drift-diffusion model. SF, BW, MS, and SK interpreted the results and wrote the manuscript. V. F unding Funded by the German Research Foundation (DFG, Deutsche Forschungsgemeinschaft), SFB 940- Project number 178833530, TRR 265 - Project number 402170461, GRK 2773 - Project number 454245598, and as part of Germany’s Excellence Strategy – EXC 2050/1 – Project number 390696704 – Cluster of Excellence “Centre for Tactile Internet with Human-in-the-Loop” (CeTI) of Technische Universität Dresden. Footnotes https://osf.io/67txz/?view_only=5dec53c3a89043fc954ce8d512d7bcfc R eferences ↵ Balleine , B. W. and Dezfouli , A. ( 2019 ). Hierarchical action control: Adaptive collaboration between actions and habits . Frontiers in Psychology , page 2735 . ↵ Bouton , M. E. ( 2019 ). Extinction of instrumental (operant) learning: interference, varieties of context, and mechanisms of contextual control . Psychopharmacology , 236 ( 1 ): 7 – 19 . OpenUrl CrossRef PubMed ↵ Cushman , F. and Morris , A. ( 2015 ). Habitual control of goal selection in humans . Proceedings of the National Academy of Sciences , 112 ( 45 ): 13817 – 13822 . OpenUrl Abstract / FREE Full Text ↵ Daw , N. D. , Gershman , S. J. , Seymour , B. , Dayan , P. , and Dolan , R. J. ( 2011 ). Model-based influences on humans’ choices and striatal prediction errors . Neuron , 69 ( 6 ): 1204 – 1215 . OpenUrl CrossRef PubMed Web of Science ↵ Daw , N. D. , Niv , Y. , and Dayan , P. ( 2005 ). Uncertainty-based competition between prefrontal and dorsolateral striatal systems for behavioral control . Nature neuroscience , 8 ( 12 ): 1704 – 1711 . OpenUrl CrossRef PubMed Web of Science ↵ Dezfouli , A. and Balleine , B. W. ( 2012 ). Habits, action sequences and reinforcement learning . European Journal of Neuroscience , 35 ( 7 ): 1036 – 1051 . OpenUrl CrossRef PubMed ↵ Diamond , A. ( 2013 ). Executive functions . Annual review of psychology , 64 ( 1 ): 135 – 168 . OpenUrl CrossRef PubMed Web of Science ↵ Dolan , R. J. and Dayan , P. ( 2013 ). Goals and habits in the brain . Neuron , 80 ( 2 ): 312 – 325 . OpenUrl CrossRef PubMed Web of Science ↵ Everitt , B. J. and Robbins , T. W. ( 2005 ). Neural systems of reinforcement for drug addiction: from actions to habits to compulsion . Nature neuroscience , 8 ( 11 ): 1481 – 1489 . OpenUrl CrossRef PubMed Web of Science ↵ Everitt , B. J. , Robbins , T. W. , et al. ( 2016 ). Drug addiction: updating actions to habits to compulsions ten years on . Annu Rev Psychol , 67 ( 1 ): 23 – 50 . OpenUrl CrossRef PubMed Fengler , A. , Xu , P. , Bera , K. , Omar , A. , and Frank , M. J. (“in prep”). Hssm: A generalized toolbox for hierarchical bayesian estimation of computational models in cognitive neuroscience . Manuscript in preparation . ↵ Forstmann , B. U. , Ratcliff , R. , and Wagenmakers , E.-J. ( 2016 ). Sequential sampling models in cognitive neuroscience: Advantages, applications, and extensions . Annual review of psychology , 67 : 641 – 666 . OpenUrl CrossRef PubMed ↵ Frölich , S. , Esmeyer , M. , Endrass , T. , Smolka , M. N. , and Kiebel , S. J. ( 2023 ). Interaction between habits as action sequences and goal-directed behavior under time pressure . Frontiers in Neuroscience , 16 : 996957 . OpenUrl ↵ Graybiel , A. M. ( 1998 ). The basal ganglia and chunking of action repertoires . Neurobiology of learning and memory , 70 ( 1-2 ): 119 – 136 . OpenUrl CrossRef PubMed Web of Science ↵ Hanks , T. D. , Mazurek , M. E. , Kiani , R. , Hopp , E. , and Shadlen , M. N. ( 2011 ). Elapsed decision time affects the weighting of prior probability in a perceptual decision task . Journal of Neuroscience , 31 ( 17 ): 6339 – 6352 . OpenUrl Abstract / FREE Full Text ↵ Hardwick , R. M. , Forrence , A. D. , Krakauer , J. W. , and Haith , A. M. ( 2019 ). Time-dependent competition between goal-directed and habitual response preparation . Nature Human Behaviour , 3 ( 12 ): 1252 – 1262 . OpenUrl ↵ Keramati , M. , Dezfouli , A. , and Piray , P. ( 2011 ). Speed/accuracy trade-off between the habitual and the goal-directed processes . PLoS computational biology , 7 ( 5 ): e1002055 . OpenUrl ↵ Keramati , M. , Smittenaar , P. , Dolan , R. J. , and Dayan , P. ( 2016 ). Adaptive integration of habits into depth-limited planning defines a habitualgoal–directed spectrum . Proceedings of the National Academy of Sciences , 113 ( 45 ): 12868 – 12873 . OpenUrl Abstract / FREE Full Text ↵ Lee , S. W. , Shimojo , S. , and O’doherty , J. P. ( 2014 ). Neural computations underlying arbitration between model-based and model-free learning . Neuron , 81 ( 3 ): 687 – 699 . OpenUrl CrossRef PubMed Web of Science ↵ Lewicki , P. , Hill , T. , and Bizot , E. ( 1988 ). Acquisition of procedural knowledge about a pattern of stimuli that cannot be articulated . Cognitive psychology , 20 ( 1 ): 24 – 37 . OpenUrl CrossRef PubMed Web of Science ↵ Miller , K. J. , Shenhav , A. , and Ludvig , E. A. ( 2019 ). Habits without values . Psychological review , 126 ( 2 ): 292 . OpenUrl CrossRef ↵ Mulder , M. J. , Wagenmakers , E.-J. , Ratcliff , R. , Boekel , W. , and Forstmann , B. U. ( 2012 ). Bias in the brain: a diffusion model analysis of prior probability and potential payoff . Journal of Neuroscience , 32 ( 7 ): 2335 – 2343 . OpenUrl Abstract / FREE Full Text ↵ Nissen , M. J. and Bullemer , P. ( 1987 ). Attentional requirements of learning: Evidence from perfor-mance measures . Cognitive psychology , 19 ( 1 ): 1 – 32 . OpenUrl CrossRef Web of Science ↵ Pedersen , M. L. , Frank , M. J. , and Biele , G. ( 2017 ). The drift diffusion model as the choice rule in reinforcement learning . Psychonomic bulletin & review , 24 : 1234 – 1251 . OpenUrl ↵ Peters , J. and D’Esposito , M. ( 2020 ). The drift diffusion model as the choice rule in inter-temporal and risky choice: A case study in medial orbitofrontal cortex lesion patients and controls . PLoS computational biology , 16 ( 4 ): e1007615 . OpenUrl ↵ Pezzulo , G. , Rigoli , F. , and Chersi , F. ( 2013 ). The mixed instrumental controller: using value of information to combine habitual choice and mental simulation . Frontiers in psychology , 4 : 32689 . OpenUrl ↵ Robertson , E. M. ( 2007 ). The serial reaction time task: implicit motor skill learning? Journal of Neuroscience , 27 ( 38 ): 10073 – 10075 . OpenUrl FREE Full Text ↵ Schwöbel , S. , Marković , D. , Smolka , M. N. , and Kiebel , S. ( 2024 ). Joint modeling of choices and reaction times based on bayesian contextual behavioral control . PLOS Computational Biology , 20 ( 7 ): e1012228 . OpenUrl ↵ Schwöbel , S. , Marković , D. , Smolka , M. N. , and Kiebel , S. J. ( 2021 ). Balancing control: a bayesian interpretation of habitual and goaldirected behavior . Journal of mathematical psychology , 100 : 102472 . OpenUrl CrossRef ↵ Thorndike , E. ( 1911 ). Animal intelligence: Experimental studies. Macmillan . ↵ van Ravenzwaaij , D. , Mulder , M. J. , Tuerlinckx , F. , and Wagenmakers , E.-J. ( 2012 ). Do the dynamics of prior information depend on task context? an analysis of optimal performance and an empirical test . Frontiers in psychology , 3 : 132 . OpenUrl ↵ Wagner , B. , Mathar , D. , and Peters , J. ( 2022 ). Gambling environment exposure increases temporal discounting but improves model-based control in regular slot-machine gamblers . Computational Psychiatry , 6 ( 1 ): 142 . OpenUrl ↵ Wood , W. , Quinn , J. M. , and Kashy , D. A. ( 2002 ). Habits in everyday life: thought, emotion, and action . Journal of personality and social psychology , 83 ( 6 ): 1281 . OpenUrl CrossRef PubMed Web of Science ↵ Wood , W. and Rünger , D. ( 2016 ). Psychology of habit . Annual review of psychology , 67 ( 1 ): 289 – 314 . OpenUrl CrossRef PubMed ↵ Yin , H. H. and Knowlton , B. J. ( 2006 ). The role of the basal ganglia in habit formation . Nature Reviews Neuroscience , 7 ( 6 ): 464 – 476 . OpenUrl CrossRef PubMed Web of Science ↵ Zhang , C. , van Wissen , A. , Dotsch , R. , Lakens , D. , and IJsselsteijn , W. A. ( 2024 ). A sequential sampling approach to the integration of habits and goals . Computational Brain & Behavior , pages 1 – 22 . View the discussion thread. Back to top Previous Next Posted September 30, 2024. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Context-Dependent Interaction Between Goal-Directed and Habitual Control Under Time Pressure Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Context-Dependent Interaction Between Goal-Directed and Habitual Control Under Time Pressure Sascha Frölich , Ben J. Wagner , Michael N. Smolka , Stefan J. Kiebel bioRxiv 2024.09.28.615575; doi: https://doi.org/10.1101/2024.09.28.615575 Share This Article: Copy Citation Tools Context-Dependent Interaction Between Goal-Directed and Habitual Control Under Time Pressure Sascha Frölich , Ben J. Wagner , Michael N. Smolka , Stefan J. Kiebel bioRxiv 2024.09.28.615575; doi: https://doi.org/10.1101/2024.09.28.615575 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Neuroscience Subject Areas All Articles Animal Behavior and Cognition (7644) Biochemistry (17728) Bioengineering (13917) Bioinformatics (42038) Biophysics (21489) Cancer Biology (18637) Cell Biology (25553) Clinical Trials (138) Developmental Biology (13401) Ecology (19941) Epidemiology (2067) Evolutionary Biology (24367) Genetics (15622) Genomics (22547) Immunology (17764) Microbiology (40475) Molecular Biology (17208) Neuroscience (88749) Paleontology (667) Pathology (2842) Pharmacology and Toxicology (4834) Physiology (7659) Plant Biology (15175) Scientific Communication and Education (2047) Synthetic Biology (4304) Systems Biology (9835) Zoology (2272)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2024) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-06-17T06:32:23.968882+00:00