Single-Neuron Encoding of Learnability in the Dorsal Anterior Cingulate Cortex

doi:10.1101/2025.09.29.679390

Single-Neuron Encoding of Learnability in the Dorsal Anterior Cingulate Cortex

2025 · doi:10.1101/2025.09.29.679390

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 61,401 characters · extracted from preprint-html · click to expand

Single-Neuron Encoding of Learnability in the Dorsal Anterior Cingulate Cortex | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Single-Neuron Encoding of Learnability in the Dorsal Anterior Cingulate Cortex Yuhao Jin , Greg Jensen , View ORCID Profile Vincent Ferrera , Jacqueline Gottlieb doi: https://doi.org/10.1101/2025.09.29.679390 Yuhao Jin 1 Department of Biological Sciences, Columbia University , New York, NY 10027 5 Mortimer B. Zuckerman Mind Brain Behavior Institute, Columbia University , New York, NY 10027 Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: yj2525{at}columbia.edu Greg Jensen 2 Department of Psychology, Reed College , Portland, OR 97202 3 Department of Neuroscience, Columbia University , New York, NY 10027 5 Mortimer B. Zuckerman Mind Brain Behavior Institute, Columbia University , New York, NY 10027 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Vincent Ferrera 3 Department of Neuroscience, Columbia University , New York, NY 10027 4 Kavli Institute for Brain Science, Columbia University , New York, NY 10027 5 Mortimer B. Zuckerman Mind Brain Behavior Institute, Columbia University , New York, NY 10027 Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Vincent Ferrera For correspondence: vpf3{at}cumc.columbia.edu Jacqueline Gottlieb 3 Department of Neuroscience, Columbia University , New York, NY 10027 4 Kavli Institute for Brain Science, Columbia University , New York, NY 10027 5 Mortimer B. Zuckerman Mind Brain Behavior Institute, Columbia University , New York, NY 10027 Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: jg2141{at}columbia.edu Abstract Full Text Info/History Metrics Preview PDF Abstract In natural environments, associations that indicate true learnable regularities are intermixed with those that arise from random and ultimately unlearnable relationships between events. To efficiently allocate cognitive resources and avoid inferring spurious patterns, organisms must distinguish learnable from unlearnable associations, but the mechanisms underlying this ability are not understood. We recently showed that monkeys performing a transitive inference task, while discovering the true hidden order in learnable image sets, also behaved to varying degrees as if they inferred subjective order in objectively random (unlearnable) image sets. Here, we show that the ability to detect learnability is encoded by neurons in the dorsal anterior cingulate cortex (dACC, area 24c). dACC neurons responded strongly after a decision outcome as reported in previous studies and, additionally, signaled whether a trial was from a learnable vs unlearnable set before outcome delivery, and showed interactions whereby their selectivity for the outcome (reward vs lack of reward) was stronger for learnable versus unlearnable sets. Learnability and interaction responses were independent of sensory or reward cues (which were equated for learnable and unlearnable sets) but their strength correlated with the monkeys’ ability to avoid inferring false order in unlearnable sets. The findings suggest that the dACC is part of a network that monitors learnability and enables animals to appropriately focus learning on true patterns while avoiding false inferences about spurious and random associations. Introduction Cognitive neuroscience has made significant progress in understanding the theoretical and neural basis of associative and rule-based learning in environments in which participants are exposed to predictable rules, and are rewarded for learning responses that conform to these rules. Such environments, however, differ greatly from natural settings, in which animals face not only consistent associations that can be fruitfully learned but also many coincidental associations that are random and unlearnable. To learn efficiently in such settings individuals need not only learn but decide when to learn (i.e., distinguish true from random associations and direct learning accordingly). This ability is crucial both to avoid laboring in vain on ultimately unlearnable tasks, and to avoid erroneously inferring fictitious structures in objectively random events. Despite its importance, however, the behavioral and neural mechanisms of learnability detection are not well understood. Using learnability for controlling behavior poses a considerable computational challenge because it requires animals to estimate whether a true structure exists before fully learning the structure. In experiments that provide explicit learnability cues, participants readily react to these cues (e.g., humans modulate evidence accumulation and learning rates depending on whether they are instructed that rewards are random or that rewards are action-contingent, learnable and controllable) 1 , 2 , and mice slow their choices to boost learning earlier in a session, but only if they view discriminable (learnable) and not blurred and non-discriminable visual cues 3 . However, studies using more ecological settings in which learnable and unlearnable stimuli are randomly interleaved and uncued produced mixed results. On one hand, infants attend more to unlabeled learnable vs random linguistic grammars 4 and pigeons prefer otherwise identical sequences that differ only in the presence or absence of consistent relationships between stimuli and rewards 5 . On the other hand, humans who could freely allocate study time tended to focus on unlearnable games at the expense of mastering learnable games 6 , and humans often suffer from illusions of control: the erroneous belief that one can predict stochastic events 7 – 9 . Thus, humans and animals show variable capacity to autonomously estimate learnability in natural settings, which may depend on the specific individual, environment and task. A crucial question concerns the neural basis of learnability detection. A recent computational model, building on the proposed roles of the anterior cingulate cortex (ACC) in in monitoring and control 14 – 18 , proposes that the ACC acts as a “meta reinforcement learning controller” that recruits cognitive processes (attention, memory and/or learning rates) based on the context and demands of a task 11 – 13 . Consistent with this view, studies using fMRI and ERP have shown that the human executive network is sensitive to controllability when it is explicitly cued 1 , 2 , 8 – 10 . It remains unclear, however, whether or how this network encodes autonomous estimates of learnability independently of such cues. Here we examined this question by recording dACC neuronal activity during a transitive inference (TI) task in which monkeys learned to recognize abstract structures in ordered versus random pictorial sets 19 , 20 . In each session, monkeys made choices between image pairs that could be drawn from one of two sets: a learnable (L) set in which choices were rewarded according to a consistent hierarchical order (i.e., A was better than B, B was better than C, etc.), or an unlearnable (U) set in which choices were rewarded randomly. Importantly, L and U trials were fully unsignaled, being randomly interleaved and equated in terms of appearance and reward rates, and differed only in the presence or absence of a latent hierarchical structure. We recently showed that, while monkeys reliably discovered the true hierarchy of the L sets, they often chose as if they inferred a subjective order in the U sets, and this behavior reflected the use of an internal model of real or fictitious ordering that contravened simple reward-based associative learning 21 . Here, we show that individual ACC neurons encode learnability independently of rewards and correlate with variability in the monkeys’ ability to distinguish L and U sets. The findings illuminate the neural mechanisms by which animals autonomously estimate learnability and avoid inferring fictitious structures in random events. Results Monkeys showed variable ability to distinguish between learnable and unlearnable sets Adult male rhesus monkeys ( N =2) performed a task in which they chose one of two pictorial stimuli and were rewarded either randomly or according to a learnable rule. Each trial began with 500 msec of central fixation, after which the monkeys were shown two images at symmetric locations in the right and left visual field and, after a 400 ms delay, made a saccade to the stimulus of their choice ( Fig. 1A, B ). During each behavioral session, monkeys were tested with 20 novel image pairs organized in two sets. Images in the “learnable” (L) set had an objective (hidden) ranking ( Fig. 1C ) and monkeys were rewarded if they chose the item with the superior rank in each pair (e.g., given image pair B, D, reward was delivered for B but not D; Fig. 1A ). In contrast, images in the unlearnable (U) set had no objective ordering ( Fig. 1D ), and monkeys were rewarded probabilistically and independently of their choices ( Fig. 1B ). Each session introduced new L and U pictorial sets, and monkeys interacted with them over a minimum of 784 trials, including an initial training phase of 384 trials presenting only 4 adjacent-rank pairs from the L set and 4 randomly chosen pairs from the U set, and a subsequent testing phase randomly interleaving all 20 possible pairs from both sets ( Fig. 1E, F ). Monkeys G and F completed, respectively, 40 and 41 sessions with concurrent recording of dACC cells. Download figure Open in new tab Fig. 1 Task A-F: task paradigm. A-D : Task consisted of two pictorial lists with five items each where one list faithfully observed a predictable order (Learnable L list, C ) whereas the other was completely unordered (Unlearnable U list D ). Outcome was totally hinged or independent on the relative ordering for the L and U list separately. E-F : The task was divided into training phase (only the adjacent pairs in L and four random pairs in U) followed by testing phase with all the pairs presented. G, H: the evolution of preference levels over all stimuli over trials for the L set ( Fig G left, according to the objective rank ), strong ordering U set ( Fig H left ) and weak ordering U set ( Fig H right, according to the subjective rank ), which was calculated as the sliding window averages of chosen frequencies (n= 32 and 40 for the training and testing, stepped by 1 trial). I : The evolution of reward rates across trials. Traces show the reward rates for L (red) and U (blue) stimuli as a function of trials (sliding window of 10 trials stepped by 1 trial, N = 40 in monkey F and N = 41 in monkey G). Reward rates on the L set increased across trials, indicating the monkeys’ learning the correct ordering, and were dynamically yoked with those on the U set as explained in the text. Error bars in G-I denote standard error (SE). Monkeys showed subjective ordering on the U set whose strength differed by session Consistent with our previous report on this task 21 , the monkeys reliably learned the correct ordering for the L sets during the training phase and quickly generalized it to the new pairs at the start of the testing phase ( Fig. 1G ). Importantly, the monkeys also developed robust ordering preferences on the objectively random U set, consistently choosing as if some stimuli had higher ranks relative to others, although the strength of this preference differed by session ( Fig. 1H ). These preferences could not be ascribed to visual cues, as images from the L and U sets were visually equivalent and randomly interleaved. Importantly, we dynamically adjusted the reward probability on the U set to be equal to that on the previous 10 trials on the L set, and verified that the reward rates were identical on the two sets, both on average and in their trial by trial dynamics ( Fig. 1I , Comparison of averaged reward rates between L and U, Wilcoxon test, Pooled: p = 0.68; Monkey F: p = 0.72; Monkey G: p = 0.74). Thus, reward rates provided no explicit cues to the ordering that was in effect in a trial. As in our previous study 21 , we quantified the monkeys’ ordering preferences (OP), by fitting choice data during the testing phase with a hierarchical Bayesian model that assumed a noisy internal representation of stimulus rank ( Fig. 2A , top; Methods ). Using this fitting procedure, we derived a z -score indicating the monkeys’ preference for each particular image, such that more dissimilar/similar z -scores indicated stronger/weaker OP for a particular set ( Fig. 2A , bottom; Methods ). We then plotted the z -scores as a function of stimulus rank (the objective rank for the L set and subjective rank for the U set) and used the slope of the linear fit as a measure of OP (respectively, OP L and OP U ; Fig. 2B ), to determine if the empirically measured OP exceeded the baseline expected from random responding ( Methods ). Download figure Open in new tab Fig. 2 Analysis of ordering preference A. Pipeline for subjective ordering analysis. The chosen frequencies for each stimulus were fit by a mechanistic model assuming that stimuli had an ordered internal representation, which produced a set of z -scores indicating weaker ( left ) or stronger ( right ) subjective ordering. B . Example sessions with strong and weak OP Diff . The top and bottom rows show two representative sessions with, respectively, strong versus weak behavioral differentiation between L and U sets. In each session, the colored numbers show the OP for the L and U sets, and the colored traces show the corresponding linear fits of z -scores as a function of rank. Shading shows 95% CI of the fitted slopes. Gray traces indicate baseline slopes inferred by simulating an agent that emits random responses (average and 95% CI of fitted slopes over 81 simulations). C. Comparison of OP L and OP U . Each point is one session. The dashed trace is the equality diagonal. The gray distribution shows OP Diff . Within it, the dashed trace indicates equal ordering for L and U sets (OP Diff = 0), and positive values (plotted to the left) indicate greater behavioral differentiation. OP L values had an overall mean and standard error (SE) of 0.79 +/- 0.012, which was significantly greater than the baseline OP in each monkey ( Fig 2B ; monkey F: 0.82 +/- 0.018, p < 0.0001 vs baseline of 0.18 +/- 0.016; N = 41, paired t-test; monkey G, 0.77 +/- 0.014, p < 0.0001 vs baseline of 0.172 +/- 0.014, N = 40, paired t-test). OP L values were significantly above baseline in each individual session (95% CI monkey F: [0.79,0.86]; monkey G: [0.75,0.8]), confirming that both monkeys reliably inferred the true order of all the L sets. Remarkably, OP was also greater than expected by chance for U sets, indicating that monkeys had a significant tendency to respond as if these sets had a hidden order (OP U , monkey F: 0.51 +/- 0.04, p < 0.0001 vs baseline of 0.18 +/- 0.016, N = 41, paired t-test; monkey G, 0.53 +/- 0.04, p < 0.0001 vs baseline of 0.172 +/- 0.014). However, OP U was more variable relative to OP L (standard deviations of 0.27 vs 0.10; Fligner-Killeen Test, N = 81, p < 0.0001) suggesting that subjective ordering preferences for the U sets varied by session. Because we were interested in the monkeys’ ability to distinguish between the two sets, we computed the differential OP (OP Diff ) by taking the difference between OP L and OP U for each session. As shown by the histogram in Fig. 2C , OP Diff formed a broad distribution ranging from values close to 1.0 (indicating sessions in which the monkeys showed clear ordering for the L but not U sets) to near-zero for sessions in which monkeys treated both sets as being equally ordered. dACC Neurons Encode Learnability Independently of Reward To examine the neural mechanisms underlying the detection of learnability, we recorded neural activity in the dorsal anterior cingulate sulcus (dACC), focusing on the dorsal portion of the anterior third of the sulcus (Brodmann area 24c) that has been reported to respond to uncertainty and conflict 22 , 23 ( Fig 3A ; Methods ). We report the activity of 1072 neurons (436 from monkey F) that were recorded using multi-contact probes and classified as single-units based on offline sorting, but were not otherwise pre-selected for specific task responses ( Methods ). The neurons had sustained elevated responses between the onset of the choice stimuli until after the monkeys’ saccades, and additional phasic responses to the delivery of the outcome (reward or no reward accompanied by, respectively, high and low pitch tones; Fig 3B ). Download figure Open in new tab Fig 3 Summary of recording sites and neural response A. fMRI images showing the approximate extent of the recording areas for each monkey. The top images show the coronal plane 26 mm anterior to anterior commissure in each monkey; the bottom images show the sagittal plane, 3.5 mm to the left of midline. B. Average neural response aligned on stimulus onset. Traces show mean and SE of the peri-stimulus time histograms across all neurons for each subject (Monkey F: circle, N = 436 neurons; Monkey G: cross, N = 636 neurons) as well as combined (solid. N = 1072). The small gray histograms on the x-axis show the distributions of saccade onsets and outcomes delivery across all trials. To determine whether and how the cells encoded task variables, we fit firing rates using a GLM model that included trial-by-trial regressors for learnability (L vs U), outcome (reward/no-reward) and their interaction, alongside a nuisance regressor for saccade direction to rule out directional selectivity confounds, and evaluated the model for each cell in 3 task epochs (800 ms pre-choice, post-choice and post-outcome; Methods ). Over one third of the cells (36.8% (395/1072); monkey F, 47% (205/436); monkey G, 30% (190/636)) significantly encoded learnability in at least one task epoch (Wald z test, p < 0.05 after Benjamini-Hochberg multiple comparison correction). Of this subset, slightly over half (monkey F: 53% (108/205); monkey G, 58%, (111/190)), had positive coefficients indicating stronger responses for L vs U sets, while the remainder had significantly stronger responses for U vs L sets, as illustrated in Fig. 4A , left (top vs bottom). To estimate the time-course and strength of the learnability effects, we fit the GLM model ( Methods ) to time-resolved firing rates (average firing rate in 300 ms moving windows with a 50 ms step) and computed the coefficient of partial determination (CPD), which measures the fraction of each neuron’s firing rate variability attributable to each factor ( Methods ). Learnability accounted for a significantly larger fraction of variance than expected by chance (after shuffling trial labels; Methods ) throughout the pre-choice, post-choice and post-outcome epochs. This was the case in both the subset of responsive cells ( Fig. 4B , left, thin trace) and across the entire population of cells ( Fig. 4B , left, thick trace). Download figure Open in new tab Fig 4 dACC neurons encoded learnability, outcome and their interactions. A . PSTHs for the subsets of neurons encoding Learnability, Outcome and their Interactions. Neurons are separated by the signs of the GLM coefficients as explained in the text. Each trace shows the average and SE of the PSTH over the indicated number of cells (constructed after z-scoring relative to the averaged firing rates over 500ms before stimulus onset). Other conventions as in Fig. 3B . B . CPD for Learnability, Outcome and Interaction. Each trace shows the mean and SE of the CPD computed in a sliding window (size = 300ms, stride = 50ms) and averaged over the cells encoding the respective variable (black trace) or the entire population of cells (dark gray trace). Thick points in each trace show windows in which the CPD differed from the shuffled control. Dashed traces show the shuffled control (mean and SE over 50 iterations). C. Cross-validated decoding accuracy of Learnability and Outcome . The traces show the average of the excess decoding accuracy, defined as the accuracy in decoding each variable based on the firing rates of all cells minus the average accuracy after shuffling trial labels. Error bars show 95% CI, and darker symbols show significant decoding (CI’s do not overlap 0). The large panel on the left shows the pooled data and the smaller panels on the right show individual monkeys. D. CPD for outcome encoding is larger in L vs U sets. Each point is the averaged CPD for the outcome (reward vs no reward) across neurons for each session and set. A box-and-whisker plot is overlayed on the points; large black symbols show mean and SE. Independently of their learnability modulation, nearly 2/3 of the cells encoded the outcome, distinguishing between a reward and lack of reward (Wald z test, p < 0.05, Benjamini-Hochberg correction; 66% ( 703 / 1072 ) overall; monkey F, 65% ( 282 / 436 ), monkey G, 66%, ( 421 / 636 )). Among the reward-encoding cells, similar fractions showed positive and negative coefficients, responding significantly more strongly after a reward vs no-reward outcome (overall: 53% ( 369 / 703 ); monkey F 52% ( 146 / 282 ), monkey G 53%, ( 223 / 421 )), with the remainder showing the opposite modulation as depicted in the top and bottom rows in Fig. 4A (middle column). Interestingly, some positive reward-coding was present before reward delivery ( Fig. 4A , top row middle column) reflecting the fact that, on L sets, the reward outcome was strongly associated with the rank of the chosen stimulus and could thus be predicted before the choice ( Methods ). However, time-resolved CPD analysis showed that the fraction of variance explained by the outcome was significant only after outcome delivery, in both the reward-responsive cells and the whole population ( Fig. 4B , middle column). Together, the results show that dACC neurons had robust responses to learnability that were distinct from their previously-documented outcome responses. To verify the robustness of this finding and better understand the relative time-course of the two signals, we used the alternative method of logistic decoding to classify each variable based on the firing rates of all cells. Cross-validated decoding accuracy (in sessions with more than 10 simultaneously recorded cells) significantly exceeded the expected null-level performance for both learnability and outcome, showing that both variables could be reliably decoded from the overall dataset and each individual monkey ( Fig. 4C ). In the pre-choice, post-choice and post-outcome epochs, learnability was decoded with average accuracy of, respectively, 54.0% [53%, 54.6%], 55.1% [54.3%, 56%], and 58.3% [57.2%, 59.3%],significantly exceeding the levels expected by chance in each case (respectively, xx% [51%, 51.2%], xx% [51%, 51.2%], and xx% [50.7%, 50.9%]). Interestingly, learnability decoding persisted and even increased in post-choice and reward vs the pre-choice epochs, suggesting that the neurons conveyed sustained signals of learnability rather than merely distinguished the L vs U image sets. Moreover, while learnability was decoded before the monkey’s decision, reward decoding became marginally significant only after the decision and was strong only after outcome delivery ( Fig. 4C ). Given that the neurons encoded both learnability and the outcome, we examined if these responses interacted – i.e., if learnability modulated the neurons’ ability to distinguish between a reward and no-reward outcome. Consistent with this hypothesis, the GLM analysis showed that 20% the cells had significant interaction coefficients in at least one task epoch (Wald test, p < 0.05, Benjamini-Hochberg corrected; overall: 20%, ( 213 / 1072 ); monkey F: 19%, ( 85 / 436 ); monkey G: 20%, ( 128 / 626 )). Interaction coefficients could be positive or negative based on whether the cells responded more to a reward or lack of reward, as noted above ( Fig. 4A , right). Time-resolved CPD analysis showed that interaction effects accounted for a significant fraction of firing rate variance during the post-choice and post-outcome epochs, both in the subset of sensitive cells and in the full population ( Fig. 4B , right panel). Interestingly, whether the coefficients were positive or negative, the interactions reflected a stronger neural differentiation between reward and no-reward outcomes on L than U sets. Thus, neurons with positive interactions showed a stronger preference for reward over lack of reward in L vs U sets ( Fig. 4A , top right), and neurons with negative interactions showed a stronger preference for no-reward vs reward outcomes in L vs U sets ( Fig. 4A , bottom right). This was confirmed by a follow-up analysis in which we computed the CPD for encoding reward/no-reward during the post-outcome epoch for each stimulus set, and found that CPD was significantly larger in L vs U sets ( Fig. 4D , Methods , pooled data p = 0.04, monkey F, p < 0.0001; monkey G, p = 0.05, Wilcoxon rank-sum test). In sum, dACC neurons encoded learnability independently of rewards and showed stronger reward-related responses in L vs U sets. Neural Learnability and Interaction Effects Correlate with Behavior To determine how the neural responses were related to behavior, we examined the correlations between the strength of neuronal encoding the monkeys’ ability to show differential ordering of L vs U lists (denoted by OP diff ). We reasoned that, if the neurons contributed to behavior, on sessions in which the monkey was better able to differentiate between the L and U lists, the neurons would also show stronger encoding of Learnability, Outcome and/or Interactions. We found that OP diff showed significant positive correlations with the neural encoding of Learnability and Interaction but not Outcome. Fig. 5A illustrates the results from a session-level analysis in which we correlated the OP diff for each session with the average CPD across the cells in that session. Spearman rho values were significantly positive for Learnability (0.29, p = 0.01), and Learnability x Outcome interaction (0.41, p = 0.0002) but not for Outcome CPD by itself -0.02, p = 0.88). The results were robust in each monkey individually (monkey F: respective rho = 0.32 ( p = 0.05), 0.35 ( p = 0.02) and -0.11 ( p = 0.48); monkey G 0.39 ( p = 0.013), 0.58 ( p < 0.0001) and 0.06, p = 0.69). Results were also robust in an alternative analysis that correlated session-level OP diff with the CPD for individual cells in a session (full dataset, rho = 0.2 ( p < 0.0001), 0.15 ( p < 0.0001) and 0.11 ( p = 0.12) and for, respectively, Learnability, Interaction and Outcome, replicated in each monkey). Download figure Open in new tab Fig 5 dACC activity correlates with behavior A : OP diff correlates with CPD for Learnability and Interaction, not Outcome. Each point shows the OP diff for one session (abscissa) and the CPD for the respective variable, measured in the post-outcome epoch and averaged across all the neurons recorded in that session (ordinate). The lines and shading show the best fitted regression line and its 95% CI over N = 81 sessions. B . CPD for Learnability and Interaction, not Outcome, differed between sessions with above-median vs below-median OP diff (respectively, purple and yellow). The traces show mean and SE of the time-resolved CPD across the neurons in each group ( N indicated in the legend above). Dashed horizontal traces show the averaged CPD with each corresponding labels shuffled 50 times. Thicker points show time bins with CPD significantly different from its own shuffled control and horizontal bars at the top show whether CPD differed between the two groups by independent t -test. All other conventions as in Fig. 3B . To illustrate the time-course of the behavioral differentiation, we plotted the time-resolved CPD on sessions with above- and below-median OP diff ( Fig. 2C ; performing the median split within monkey to rule out subject-specific confounds). As shown in Fig 5B (middle panel) CPD for the Outcome was high and indistinguishable in the two behavior groups. In contrast, CPD for Learnability and Interactions was near-chance in sessions with below-median OP diff , was significantly stronger, and was significantly above-chance in sessions with below-median OP diff throughout the course of the trial Fig 5B (right and left panels). Thus, the dACC responses to learnability, and the differential encoding of the outcome for L vs U sets, were significantly correlated with the monkeys’ ability to distinguish between the two sets. Discussion Natural environments contain a practically infinite number of potential associations between disparate features and events. However, only a tiny minority of these associations signal consistent, learnable structures, while the vast majority are spurious and unlearnable. Distinguishing learnable from unlearnable contexts is therefore crucial, both to avoid laboring in vain and to prevent erroneous inferences about the presence of structures in objectively random events. Using learnability to control learning, however, is a significant challenge, because true and random associations are not labeled as such, and animals must infer hidden differences between these sets of associations before fully learning the structure. Here, we show that monkeys have some – albeit variable and imperfect – ability to distinguish structured vs unstructured pictorial sets, and crucially, this ability is correlated with the ability of dACC cells to distinguish these contexts. We show that individual dACC neurons and populations of cells encoded learnability independently of reward outcomes, and these responses varied in strength in a manner that was correlated with the monkeys’ ability to differentially learn in L versus U sets. Regression analysis, combined with the precisely-matched reward rates on L and U sets, showed that the learnability responses were clearly distinct from the trial’s rewards, and thus could not be explained by quantities such as reward prediction errors/surprise 24 , reward uncertainty 25 or reward expectation 26 that had been shown to be conveyed by dACC cells. The neural encoding of learnability persisted after the monkey’s decisions and interacted with outcome-related responses, such that the neuronal differentiation between a reward and a lack of reward was stronger in L vs U sets. This result is consistent with a substantial body of evidence from fMRI and ERP studies in humans showing reward-related responses are dampened in uncontrollable vs controllable tasks 1 , 34 , 35 . In our task, the dampened response to reward on one trial may have reduced the extent to which monkeys adjusted their choices on the following trial. Thus, in sessions in which this differential dampening was stronger (i.e., with stronger interactions between learnability and outcome-related responses) animals may have been better able to learn the true structure based on reward feedback on L sets but ignore the feedback on U sets, explaining the correlations with OP diff we found in the data. This possibility, as well as how it such an incremental reward-based mechanism may interact with internal representations, can be fruitfully explored in future experiments using detailed computational modeling of behavior in humans and monkeys. Our results are consistent with a role of the dACC in executive control and monitoring 14 – 16 , 27 , 28 , and with a neurocomputational model of meta-level control of attention and learning based on the task context and goals 12 , 13 . However, our findings suggest that executive regulation relies on more than external rewards or sensory cues, as has been assumed in previous work. In our task, the rates of reward on L and U sets were dynamically and precisely equated. Thus, L and U sets could not be distinguished in terms of previously proposed reward metrics, including the rates of reward, the uncertainty (volatility) of these rates 12 , 13 , or learning progress defined as the derivative of the rates over time 6 . Similarly, learnability responses could not be explained by visual cues. While the animals may have detected the presence of two pictorial sets (i.e., the fact that images from L and U sets did not co-occur in a trial), this cannot explain the dACC learnability responses, which persisted and grew after the choices (when the images disappeared) and, crucially, correlated with behavioral preferences. Our findings thus suggest that the dACC detects learnability based on mechanisms that are independent of both external rewards and sensory cues. What might be the nature of these mechanisms? One possibility is that the learnability responses in the dACC reflected, at least in part, the monitoring of internal representations of a hierarchical structure. This is consistent with the evidence that such representations, whether they reflected a veridical structure on an L set or a fictitious structure on a U set, were necessary to explain behavioral choices 29 , 30 . An alternative possibility is that the monkeys inferred learnability based on prior beliefs, simply assuming that U sets have a learnable order based on their prior experience with various learnable tasks. A final, not mutually exclusive possibility, is that animals monitor the predictability between simulated operations and corresponding outcomes (e.g., the mutual information between their choices and rewards). This quantity is a measure of “empowerment” in agent-centric learning and AI 31 – 33 , and is emerging as a strong candidate for a type of intrinsic motivation that guides learning independent of local reward. Thus, a critical question for future research concern the types of intrinsic motivations – be they based on prior beliefs, internal representations or empowerment – that contribute to the regulation of learning and other cognitive functions. In sum, our results show that dACC neurons detect learnability independently of sensory cues and rewards, and may underlie the intrinsically-motivated control of learning in complex environments containing unknown mixtures of structured and random associations. Materials and Methods Subjects We trained two adult male rhesus macaques ( Macaca mulatta , F and G, weighed 8.1 and 10 kg during the data collection) for our task. Besides basic training on eye fixation and visual guided saccades, subject F had prior training on the transitive inference task but neither was pretrained on our dual list paradigm. Subjects collected liquid rewards from the task. Our study was approved by the Institutional Animal Care and Use Committees (IACUCs) at Columbia University. Research was also conducted according to the guidelines from the Guide for the Care and Use of Laboratory Animals of the National Institutes of Health (NIH). Behavior control We implanted each subject with a metal bar over the acrylic headpost situated around the posterior midline, by which they were head fixed in the primate chair while interacting with the task by making the eye saccades. In order to infer the monkey’s eye position on the screen, we used an infrared camera for capturing video of one eye (Flea 3 FL3-U3, Point Grey, Wilsonville, OR, 600Hz frame rates), after which software (fly capture, FLIR, Richmond, British Columbia, Canada) automatically detected the center and outline of eyes, and output the x and y coordinates. Before the experiments, eye position was calibrated by requiring subjects to fixate on the small white square in the center and perimeters of the display for 500ms, during which we aligned monkeys’ eyes with the square locations. Stimuli were displayed by a CRS VSG 2/3F video frame buffer on an CRT high resolution monitor (sample frequency 60Hz, 1280×1024 pixels). We sized each stimulus at 140×140 pixels, located them symmetrically around the center (eccentricity of 200 pixels from the picture center) and set the viewing distance at 60cm. After the subject made the saccade, eye coordinates were sent to the behavior computer that proceeded to the next phase of the trial. To deliver the reward, the computer sent out two pulse signals to a solenoid valve resulting in two drops of water (0.1cc each) being pumped through the sipper tubes installed on the primate chair. We mounted another camera 45 degrees left to the chair and recorded the monkey’s face during the whole session by which we would be able to extract the time and frequency of licking each trial for future use. Task design In each session, 10 pictorial stimuli were selected from a database of over 2500 images and made sure they were not presented before. Those 10 images were randomly assigned to one of the two lists with 5 stimuli each. Of the 2 lists, one was learnable (L) where each stimulus was arbitrarily assigned with its unique rank and subjects were required to deduct the veridical order by trial and error. The other set was unlearnable (U), meaning pictures within were unordered by which subjects couldn’t obtain any predictable relationships among stimuli under such context. Each trial started with a small solid white square appearing at the center of the screen, after which the monkey was required to move his eyes towards it within 1500ms. After that they had to fixate on the square for another 500ms to proceed with the trial. Not fixating in time or breaking from fixation aborted the trial immediately. On average, monkeys broke about 25% of trials (Finish rate, Monkey F: 0.8 ± 0.03; Monkey G: 0.71± 0.04) each session suggesting good motivations. After the fixation period, 2 pictures were randomly drawn from one list and appeared equidistant from the screen center in opposite directions. Within three seconds, monkeys needed to respond by making a visual saccade to the left or right picture and hold their eyes in the acceptance window of the picture (150 by 150 pixels square) for another 500ms to confirm the choice. Feedback depended on the trial learnability. If the pairs were from the L set, the subject received two drops of water and simultaneous high frequency tone (880 Hz) if the choice followed the correct order or two seconds of screen being completely dark as the punishment as well as low frequency tone otherwise (440 Hz). On the other hand, if the trial was unlearnable, feedback was independent of subject’s choice so that reward probability for choosing either stimulus was equal to the mean reward rate over the past ten learnable trials relative to the current trial. Sessions were composed of training blocks and testing blocks. Each training block contained 48 trials, including four adjacent pairs (AB, BC, CD, DE) from two stimulus lists and each pair were presented 6 times with spatial counterbalance applied. Testing blocks followed a similar structure, presenting each of all 10 possible stimulus pairs for the two lists, each presented twice to counterbalance their positions on screen, resulting in 40 trials every block. L and U trials were interleaved by randomly shuffling the presenting sequence to keep the animal from predicting the trial learnability and certain image pair. Both monkeys needed to complete eight training blocks before proceeding to the testing block. They were allowed to finish as many testing blocks as they could until satiated. In summary, both monkeys completed about 840 trials per session (Monkey F: 843 ± 48; Monkey G: 841± 37) Neurophysiology Subjects were implanted with recording chambers at the front left, targeting posterior dACC (Brodmann area 24c, subject F: 23 mm anterior to the interaural plane, 13 mm medial to the sagittal midline, 0 degree tilted; subject G: 27.8 mm anterior, 14.6 mm medial, 40 degrees tilted), which has been reported to respond to more cognitive aspects of learning compared to the anterior part 36 . Single unit activities were recorded using linear multichannel probes (Poly 2 or edge probe, Neuronexus, Ann Arbor, Michigan) with 32 channels (spanning two lanes (16 each) per line) with average spacing of 100 μm between every adjacent channel (1 mΩ ). To determine the recording location, we performed structural imaging using a 3T MRI scanner (Siemens, Munich, Germany) prior to the chamber implant. The resulting scan of the brain structure allowed us to plan the implant trajectory and stereotaxic coordinates from the Brainsight (Rogue Research, Montreal, Quebec, Canada). After the surgery, we performed an additional structural MRI scan to verify the location of the chamber as well as match each location in the chamber with the targeting part of the brain. Every session, a recording probe was penetrated into the brain by the motorized micro drive system (NaN Instruments, Nazareth, Israel, recording range: anterior to AP monkey F: 22.8-26.9mm, monkey G: 22.9-29.2mm). Depth of penetration to dACC was determined based on MRI, further verified by empirical observation of high frequency spikes first, then silence and lastly medium frequency spikes when the probe went through the cortical area (FEF or premotor cortex), then white matter and dACC. Raw spikes were amplified and digitized (RHD 32 channels processor, Intan Technologies, Los Angeles, California), high-pass filtered (300-6000 Hz) and then sampled at 30 kHz by the Open Ephys data acquisition system (version 2.2, Cambridge, Massachusetts), which synchronized the data stream from brain, licking and all behavior variables. Both the raw LFP and filtered neural signals were saved in binary data format. We used Kilosort (version 2.5 37 and version 4 38 ) for spike sorting. The procedure is as follows: filtered spike binary files were imported to both versions of Kilosort in parallel. Each cluster was then manually inspected, merged or split based on overall spikes count, waveform, inter-spike interval (ISI) histogram and spikes feature projected in the low dimensional space. Then, we plotted a raster map for each cluster and did final examination based on if firing rates and spike amplitudes drifted during the whole session. Only the units that passed all criterions were included. Lastly, we compared sorting performance between two Kilosort versions and picked the one that yielded more neurons. Overall, we collected 1072 units from dACC (F: 426; G:636). Behavioral analysis To quantify how strongly subjects ordered stimuli under two environments, we implemented the procedure as before 21 . Briefly, we assumed preferences towards each stimulus were distributed along a vectoral continuum, parametrized by its z -score μ and uncertainty σ . The probability of choosing X when paired with Y was given by: This shows that chosen probability depends on the difference and overall uncertainty between z -scores of both stimuli, as well as the degree of being insensitive to the preferences by randomly responding (θ). We constructed the model within the Bayesian updating framework and fitted the model to the data only from the testing phase. Best-fit estimates of μ and σ were obtained using Markov chain Monte Carlo (MCMC) and implemented in Stan 29 . Afterwards, we performed linear regression over all z -scores sorted in an ascending order. This can let us use the slope as a metric to reflect the ordering preference (OP) and denote the subjective rank E-A for each pictorial set, particularly the U set. Meanwhile we can contrast the OP for L versus U to calculate OP diff to examine the strength of behavioral differentiation between the two sets. We also simulated data of random responses 80 times for each subject to acquire the “baseline” slopes, which served as the null model against which the actual slopes were compared. This provided a more conservative null hypothesis, as even random responding is expected to favor some items over others by pure chance during any given trial, resulting in slightly positive baseline slope. Statistical analysis on neural data Neural activity visualization To directly see the neural activity, we constructed the PSTH for each neuron with the activity aligned on the time point of stimulus presentation, smoothed by a gaussian kernel (kernel width: 240ms) and then normalized by the baseline activity defined by the averaged firing rates within the 500ms before the stimulus display. Time resolved Poisson General linear model (Poisson GLM) To look at learnability modulation, we constructed the linear mixed-effect model as depicted: We first extracted the spikes from the testing phase only since behavior was more stable compared to training ( Fig 1G, H , S1 ). Then we prepared the spike data in two ways. First way is to count the spikes from four major epochs of the trial: 800ms before stimulus on; stimulus on to choice; choice to feedback and 2000ms after the feedback. Second way is to better visualize the dynamics of encoding. We slid a time window (window size=300ms) from 800ms before stimulus on until 3700ms after with 50ms per stride. We regressed the spike counts within each epoch/window on the learnability(L), reward outcome(R) their interaction and saccade direction, using the sm.GLM(family=sm.families.Poisson()) function from the statsmodels python module. Since in the learnable set, choosing higher rank always incurred higher reward, we did not include rank to prevent collinearity among predictors. Besides reporting the β and p -value from each regressor, we also calculated the coefficient of partial determination (CPD) 39 , which computed for i th predictor in the k th time window: where r k denotes the squared norm of the residual vector and r i denotes the same over the reduced model when regressor i is removed. In other words, CPD measures the normalized contribution of the variance of spike counts across trials by each regressor. We also performed the random permutation test 50 times by shuffling each regressor, running Poisson GLM and calculating CPD independently. We compared the CPDs from the true and shuffled models using a non-paired t test to verify if the CPD is significant. Before regression, we constructed a confusion map over all regressors to ensure neither two regressors were highly correlated. Indeed, there were no two predictors who showed correlation strength higher than 0.7 in more than 10% of the sessions. We also implemented similar Poisson GLM within each list to examine effects by outcome: All the other procedures followed the previous regression. Decoding analysis To investigate how the population as a whole represented learnability and outcome, we first select sessions with more than 10 units simultaneously recorded. Then we counted the spikes from four major epochs of the trial similarly to the epoch-wise GLM. For each epoch, we trained a logistic classifier on the trial types and outcome separately from the population activities within each monkey as well as all data pooled, using Matlab built-in function fitclinear (Logistic learner, L2 regularization). We evaluated the decoding performance based on five-fold cross validation accuracy. Performance was compared with averaged accuracies from the control group where labels were shuffled across trials 100 times. Funding This work was supported by grant NIH-R01MH111703 from the National Institutes of Health (VPF) and NIH-R34NS137420 (VPF and JG) Data sharing plan Code and data are available from the first author upon request. Author contributions The study was designed and the manuscript was prepared by YJ, GJ, JG & VPF. Data were collected and analyzed by YJ. The authors declare no competing interest. Acknowledgments We thank Dr. Fabian Munoz and Yvoone Li for technical help with electrophysiology. We also thank Dr. Liam Paninski and Dr. Stuart Firestein for valuable discussions on analysis and interpretation the project. Funder Information Declared NIH Common Fund, https://ror.org/001d55x84 , NIH-R01MH111703 , NIH-R34NS137420 References 1. ↵ Wurm , F. et al. Task Learnability Modulates Surprise but Not Valence Processing for Reinforcement Learning in Probabilistic Choice Tasks . Journal of Cognitive Neuroscience 34 , 34 – 53 ( 2022 ). OpenUrl 2. ↵ Grahek , I. , Frömer , R. , Prater Fahey , M. & Shenhav , A . Learning when effort matters: neural dynamics underlying updating and adaptation to changes in performance efficacy . Cerebral Cortex 33 , 2395 – 2411 ( 2023 ). OpenUrl CrossRef PubMed 3. ↵ Masís , J. , Chapman , T. , Rhee , J. Y. , Cox , D. D. & Saxe , A. M . Strategically managing learning during perceptual decision making . eLife 12 , e64978 ( 2023 ). OpenUrl CrossRef PubMed 4. ↵ Gerken , L. , Balcomb , F. K. & Minton , J. L . Infants avoid ‘labouring in vain’ by attending more to learnable than unlearnable linguistic patterns: Infants attend more to learnable patterns . Developmental Science 14 , 972 – 979 ( 2011 ). OpenUrl CrossRef PubMed 5. ↵ Locurto , C. et al. There’s something about a pattern: Choice between pattern and random sequences in implicit learning . Journal of Experimental Psychology: Animal Learning and Cognition 49 , 62 – 74 ( 2023 ). OpenUrl 6. ↵ Ten , A. , Kaushik , P. , Oudeyer , P.-Y. & Gottlieb , J . Humans monitor learning progress in curiosity-driven exploration . Nat Commun 12 , 5972 ( 2021 ). OpenUrl PubMed 7. ↵ Leotti , L. A. , Iyengar , S. S. & Ochsner , K. N . Born to choose: the origins and value of the need for control . Trends in Cognitive Sciences 14 , 457 – 463 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 8. ↵ Mühlberger , C. , Angus , D. J. , Jonas , E. , Harmon-Jones , C. & Harmon-Jones , E . Perceived control increases the reward positivity and stimulus preceding negativity . Psychophysiology 54 , 310 – 322 ( 2017 ). OpenUrl PubMed 9. ↵ Zheng , Y. , Yang , C. , Jiang , H. & Gao , B . Neural dynamics underlying the illusion of control during reward processing . Soc Cogn Affect Neurosci 19 , nsae063 ( 2024 ). 10. ↵ Rodriguez , P. F . Stimulus-outcome learnability differentially activates anterior cingulate and hippocampus at feedback processing . Learn. Mem . 16 , 324 – 331 ( 2009 ). OpenUrl Abstract / FREE Full Text 11. ↵ Shenhav , A. , Botvinick , M. M. & Cohen , J. D . The Expected Value of Control: An Integrative Theory of Anterior Cingulate Cortex Function . Neuron 79 , 217 – 240 ( 2013 ). OpenUrl CrossRef PubMed Web of Science 12. ↵ Silvetti , M. , Vassena , E. , Abrahamse , E. & Verguts , T . Dorsal anterior cingulate-brainstem ensemble as a reinforcement meta-learner . PLoS Comput Biol 14 , e1006370 ( 2018 ). OpenUrl CrossRef PubMed 13. ↵ Vriens , T. , Vassena , E. , Pezzulo , G. , Baldassarre , G. & Silvetti , M . Meta-Reinforcement Learning reconciles surprise, value, and control in the anterior cingulate cortex . PLOS Computational Biology 21 , e1013025 ( 2025 ). OpenUrl 14. ↵ Akam , T. et al. The Anterior Cingulate Cortex Predicts Future States to Mediate Model-Based Action Selection . Neuron 109 , 149 – 163 .e7 ( 2021 ). OpenUrl CrossRef PubMed 15. Fu , Z. , Sajad , A. , Errington , S. P. , Schall , J. D. & Rutishauser , U . Neurophysiological mechanisms of error monitoring in human and non-human primates . Nat Rev Neurosci 24 , 153 – 172 ( 2023 ). OpenUrl CrossRef PubMed 16. ↵ Fleming , S. M. , Van Der Putten , E. J. & Daw , N. D . Neural mediators of changes of mind about perceptual decisions . Nat Neurosci 21 , 617 – 624 ( 2018 ). OpenUrl PubMed 17. Ebitz , R. B. & Platt , M. L . Neuronal Activity in Primate Dorsal Anterior Cingulate Cortex Signals Task Conflict and Predicts Adjustments in Pupil-Linked Arousal . Neuron 85 , 628 – 640 ( 2015 ). OpenUrl CrossRef PubMed 18. ↵ González , V. V. et al. A Common Stay-on-Goal Mechanism in the Anterior Cingulate Cortex for Information and Effort Choices . eNeuro 12 , ( 2025 ). 19. ↵ Munoz , F. et al. Learned Representation of Implied Serial Order in Posterior Parietal Cortex . Sci Rep 10 , 9386 ( 2020 ). OpenUrl CrossRef PubMed 20. ↵ Ciranka , S. et al. Asymmetric reinforcement learning facilitates human inference of transitive relations . Nat Hum Behav 6 , 555 – 564 ( 2022 ). OpenUrl PubMed 21. ↵ Jin , Y. , Jensen , G. , Gottlieb , J. & Ferrera , V . Superstitious learning of abstract order from random reinforcement . Proceedings of the National Academy of Sciences 119 , e2202789119 ( 2022 ). OpenUrl PubMed 22. ↵ Modirrousta , M. & Fellows , L. K . Dorsal Medial Prefrontal Cortex Plays a Necessary Role in Rapid Error Prediction in Humans . J. Neurosci . 28 , 14000 – 14005 ( 2008 ). OpenUrl Abstract / FREE Full Text 23. ↵ Kolling , N. , Behrens , T. , Wittmann , M. & Rushworth , M . Multiple signals in anterior cingulate cortex . Current Opinion in Neurobiology 37 , 36 – 43 ( 2016 ). OpenUrl CrossRef PubMed 24. ↵ Wallis , J. D. & Rich , E. L . Challenges of Interpreting Frontal Neurons during Value-Based Decision-Making . Front Neurosci 5 , 124 ( 2011 ). 25. ↵ Monosov , I. E. , Haber , S. N. , Leuthardt , E. C. & Jezzini , A . Anterior cingulate cortex and the control of dynamic behavior in primates . Curr Biol 30 , R1442 – R1454 ( 2020 ). OpenUrl CrossRef PubMed 26. ↵ Chang , S. W. C. , Gariépy , J.-F. & Platt , M. L . Neuronal reference frames for social decisions in primate frontal cortex . Nat Neurosci 16 , 243 – 250 ( 2013 ). OpenUrl CrossRef PubMed 27. ↵ Cohen , Y. , Schneidman , E. & Paz , R . The geometry of neuronal representations during rule learning reveals complementary roles of cingulate cortex and putamen . Neuron 109 , 839 – 851 .e9 ( 2021 ). OpenUrl CrossRef PubMed 28. ↵ Jahn , C. I. et al. Neural responses in macaque prefrontal cortex are linked to strategic exploration . PLoS Biol 21 , e3001985 ( 2023 ). OpenUrl CrossRef PubMed 29. ↵ Jensen , G. , Alkan , Y. , Ferrera , V. P. & Terrace , H. S . Reward associations do not explain transitive inference performance in monkeys . SCIENCE ADVANCES ( 2019 ). 30. ↵ Jensen , G. , Ferrera , V. P. & Terrace , H. S . Positional inference in rhesus macaques . Anim Cogn 25 , 73 – 93 ( 2022 ). OpenUrl CrossRef PubMed 31. ↵ Klyubin , A. S. , Polani , D. & Nehaniv , C. L . All Else Being Equal Be Empowered . in Advances in Artificial Life (eds Capcarrère , M. S. , Freitas , A. A. , Bentley , P. J. , Johnson , C. G. & Timmis , J. ) 744 – 753 ( Springer, Berlin, Heidelberg , 2005 ). doi: 10.1007/11553090_75 . OpenUrl CrossRef 32. Salge , C. , Glackin , C. & Polani , D. Empowerment -- an Introduction . Preprint at doi: 10.48550/arXiv.1310.1863 ( 2013 ). OpenUrl CrossRef 33. ↵ Elelimy , E. , Szepesvari , D. , White , M. & Bowling , M. Rethinking the Foundations for Continual Reinforcement Learning . Preprint at doi: 10.48550/arXiv.2504.08161 ( 2025 ). OpenUrl CrossRef 34. ↵ Ernst , B. & Steinhauser , M . Top-down control over feedback processing: The probability of valid feedback affects feedback-related brain activity . Brain Cogn 115 , 33 – 40 ( 2017 ). OpenUrl CrossRef PubMed 35. ↵ Ernst , B. & Steinhauser , M . Effects of feedback reliability on feedback-related brain activity: A feedback valuation account . Cogn Affect Behav Neurosci 18 , 596 – 608 ( 2018 ). OpenUrl CrossRef PubMed 36. ↵ Shenhav , A. , Botvinick , M. M. & Cohen , J. D . The expected value of control: An integrative theory of anterior cingulate cortex function . Neuron 79 , 217 – 240 ( 2013 ). OpenUrl CrossRef PubMed Web of Science 37. ↵ Pachitariu , M. , Sridhar , S. & Stringer , C. Solving the spike sorting problem with Kilosort . 2023.01.07.523036 Preprint at doi: 10.1101/2023.01.07.523036 ( 2023 ). OpenUrl Abstract / FREE Full Text 38. ↵ Pachitariu , M. , Sridhar , S. , Pennington , J. & Stringer , C . Spike sorting with Kilosort4 . Nat Methods 21 , 914 – 921 ( 2024 ). OpenUrl CrossRef PubMed 39. ↵ Chien , J. M. , Wallis , J. D. & Rich , E. L . Abstraction of Reward Context Facilitates Relative Reward Coding in Neural Populations of the Macaque Anterior Cingulate Cortex . J. Neurosci . 43 , 5944 – 5962 ( 2023 ). OpenUrl Abstract / FREE Full Text View the discussion thread. Back to top Previous Next Posted September 30, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Single-Neuron Encoding of Learnability in the Dorsal Anterior Cingulate Cortex Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Single-Neuron Encoding of Learnability in the Dorsal Anterior Cingulate Cortex Yuhao Jin , Greg Jensen , Vincent Ferrera , Jacqueline Gottlieb bioRxiv 2025.09.29.679390; doi: https://doi.org/10.1101/2025.09.29.679390 Share This Article: Copy Citation Tools Single-Neuron Encoding of Learnability in the Dorsal Anterior Cingulate Cortex Yuhao Jin , Greg Jensen , Vincent Ferrera , Jacqueline Gottlieb bioRxiv 2025.09.29.679390; doi: https://doi.org/10.1101/2025.09.29.679390 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Neuroscience Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17690) Bioengineering (13892) Bioinformatics (41936) Biophysics (21451) Cancer Biology (18588) Cell Biology (25499) Clinical Trials (138) Developmental Biology (13378) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24320) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88603) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15152) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00