Output-Contingent Working Memory and Decision-Making in Economic Choices

doi:10.1101/2025.08.29.673193

Output-Contingent Working Memory and Decision-Making in Economic Choices

2025 · doi:10.1101/2025.08.29.673193

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 72,340 characters · extracted from preprint-html · click to expand

Output-Contingent Working Memory and Decision-Making in Economic Choices | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Output-Contingent Working Memory and Decision-Making in Economic Choices Jintao Gu , View ORCID Profile Kuntan Ni , View ORCID Profile Xinying Cai , View ORCID Profile Sukbin Lim doi: https://doi.org/10.1101/2025.08.29.673193 Jintao Gu 1 Neural Science, NYU Shanghai , 567 West Yangsi Road, Shanghai, 200126, People’s Republic of China 2 NYU-ECNU Institute of Brain and Cognitive Science at NYU Shanghai , 3663 Zhongshan Road North, Shanghai, 200062, People’s Republic of China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kuntan Ni 1 Neural Science, NYU Shanghai , 567 West Yangsi Road, Shanghai, 200126, People’s Republic of China 2 NYU-ECNU Institute of Brain and Cognitive Science at NYU Shanghai , 3663 Zhongshan Road North, Shanghai, 200062, People’s Republic of China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kuntan Ni Xinying Cai 1 Neural Science, NYU Shanghai , 567 West Yangsi Road, Shanghai, 200126, People’s Republic of China 2 NYU-ECNU Institute of Brain and Cognitive Science at NYU Shanghai , 3663 Zhongshan Road North, Shanghai, 200062, People’s Republic of China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xinying Cai Sukbin Lim 1 Neural Science, NYU Shanghai , 567 West Yangsi Road, Shanghai, 200126, People’s Republic of China 2 NYU-ECNU Institute of Brain and Cognitive Science at NYU Shanghai , 3663 Zhongshan Road North, Shanghai, 200062, People’s Republic of China 3 NYU Shanghai Center for Data Science , 567 West Yangsi Road, Shanghai, 200126, People’s Republic of China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sukbin Lim For correspondence: sukbin.lim{at}nyu.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract In daily life, economic decisions often unfold sequentially. The brain is thought to compute the subjective value of each option, and comparisons can occur in different reference frames, for example, based on the commodity or presentation order. While primate prefrontal recordings have identified various reference frames for economic choice, it remains unclear how distinct neural mechanisms support them even in similar tasks. To address this, we trained recurrent neural networks (RNNs) on two sequential economic decision-making tasks differing only in output contingencies. Analysis of RNN activity, combined with latent connectivity inference, revealed distinct regimes: commodity-based choices with attractor dynamics and order-based choices with rotational dynamics. Moreover, value and choice representations in the order-based tasks aligned with neural data from a novel experiment where reference frames were not explicitly constrained. Our results suggest that different reference frames emerge depending on task demands and engage distinct working memory and decision-making mechanisms. Introduction Decision-making often involves comparing stimuli that are not simultaneously presented, requiring the temporary storage of relevant information until a choice is made. A classic neurophysiological study comes from perceptual decision-making tasks, where monkeys compared the frequency of two vibrotactile stimuli separated by a delay 1 , 2 . To meet task demands, the brain maintained a working memory representation of the first stimulus frequency along a continuous scale, enabling fine discrimination when the second stimulus arrived. Since only a single attribute of the stimuli was compared, the choice was based on the order of presentation – monkeys indicated whether the second stimulus had a higher or lower frequency than the first by pressing a corresponding button. On the other hand, economic decision-making is usually studied with simultaneously presented offers. Theories in this field suggest that subjective values – integrating multiple attributes relevant to the decision – are computed and compared within prefrontal regions, particularly the orbitofrontal cortex (OFC) 3 , 4 . For instance, in experiments where monkeys chose between juices varying in taste and quantity, distinct OFC neurons encoded the value of each option and the identity and value of the chosen option, with the option defined by juice type 5 . However, economic choices aren’t always presented at the same time. When offers appear sequentially, studies have shown that different reference frames can be used to represent the choice process 6 – 9 . In gambling tasks using two sequentially presented offers, neural recordings across multiple reward regions suggested that instead of a distinct representation for each offer, the same neurons encoded the values of both offers similarly 10 – 14 . Here, decisions could be made by comparing the current offer’s value to a threshold determined by prior offers 7 . The identity of options need not be maintained until the choice report; instead, choice can be represented based on the order of presentation, similar to mechanisms observed in perceptual decision-making 2 . Interestingly, presenting offers in sequence does not always guarantee an order-based representation. One study, using a design similar to the monkey juice experiment but with sequential offers, found that value and choice signals were represented in a juice-based reference frame 15 , 16 . In contrast, a more recent study with sequential juice offers – but without fixed choice contingencies – showed that order-based representations were more dominant than juice-based ones in the OFC (K.N. and X.C., in preparation). Given the diverse reference frames observed in economic choices, we hypothesized that the same network could adopt different reference frames depending on task demands, by engaging distinct working memory and decision-making processes. To test this, we trained recurrent neural networks (RNNs) on sequential economic decision-making tasks, and manipulated the output contingency, requiring choices to be reported based on either offer identity or presentation order. Our findings confirm that these reporting schemes elicit distinct reference frames for choice signals. Analyses of single-unit and population activity, along with recent techniques for inferring low-dimensional latent circuits, revealed distinct dynamics underlying value encoding, maintenance, and decision-making. Specifically, commodity-based reference frames exhibited attractor-like dynamics, whereas order-based reference frames were characterized by rotational dynamics. Furthermore, the value and choice representations observed in order-based tasks were consistent with experimental data. Overall, our work suggests that economic choices can flexibly engage different dynamic regimes, reinforcing the link between reference frames and the underlying neural mechanisms of decision-making. Results RNNs exhibit distinct reference frames with changes in behavioral reporting To investigate how offers with multiple attributes are maintained and compared, we trained recurrent neural networks (RNNs) on economic decision tasks between two juices having different tastes and quantities 5 , 15 ( Fig. 1 ) We considered the sequential presentation of two offers with an intermediate delay when the value of the first offer should be memorized for later comparison. Thus, the value of each offer has multiple attributes: juice type, amount, and presentation order. Download figure Open in new tab Figure 1. Task-optimized RNNs generate reference-frame-dependent choices. a Schematic of offer presentation and choice reporting. Two juice offers, differing in taste and quantity, were presented asynchronously. Trials were classified as AB or BA, with Juice A preferred. Choices were reported in distinct reference frames: left/right actions were associated with juice identity in the juice task or with presentation order in the order task. b Input and output channels of the RNNs. c Task phase segmentation: Phase 1 for encoding, Phase 2 for comparison, and Phase 3 for choice-to-action mapping. In Phases 1 and 2, a delay without stimuli followed the offer presentation. d RNN choice behavior in the juice task. The example RNN (blue) replicates the teaching psychometric curve (orange). While the instructed indifference point ρ teaching was fixed, the value of juice A in each RNN was calculated based on the indifference point inferred from its fitted curve (green). Similar results hold for the order-task RNN (not shown). In this panel, the RNN was tested on an extended set of offer pairs beyond those used during training. e-f Classifier performance of example RNNs in decoding choices ( e : juice task, f : order task). Gray and blue shaded areas indicate offer presentation and mapping cue presentation, respectively. To accommodate different reference frames of choice observed experimentally, we implemented two types of choice-to-action mapping ( Fig. 1a,b ). The first is a juice-based mapping, which depends on the juice type: for example, the preferred juice maps to the left action, and the less preferred to the right. The second is an order-based mapping, which depends on the presentation order: for example, first/second offer maps to left/right, regardless of juice type. We refer to these as the “juice task” and “order task”, respectively. Note that there is no spatial association during offer presentation, like two offers shown at the center of the screen. Both tasks consist of three phases: Phase 1 presents the first offer (Offer 1) followed by a delay (inter-offer period), Phase 2 presents the second offer (Offer 2) followed by another delay (wait period), and Phase 3 requires a left or right action ( Fig. 1c ). Offers 1 and 2, having no spatial association, are given through two input channels corresponding to juice types A and B, with A being preferred ( Fig. 1b ). We randomized the presentation order of each juice to prevent any consistent relationship between juice identity and order. In Phase 3, one of two possible choice-to-action mapping channels is activated in each task – e.g., juice task associating Juice A-Action L/Juice B-Action R, or vice versa. We trained RNNs to mimic the behavior choice pattern in monkeys ( Fig. 1d ). As the relative amount of juice B increases, the percentage of choosing B rises, forming a psychometric curve modelled as a logistic function of q B / q A , the ratio between juice amounts, q A and q B 17 . The indifference point –50 % choosing B –was set at 1.7 in both tasks, denoted by ρ teaching . The amounts of Juices A and B range from 0 to 4 and 0 to 8, respectively. For each session, the task type and psychometric curve were fixed. For each juice ratio, the RNN’s target output was randomly sampled to match the correct choice frequency, introducing fluctuations in the teaching signal. Trained RNN reproduced psychometric curves with indifference points close to the original one. We defined the value of each juice based on each RNN’s inferred indifference point – for instance, in Fig. 1d , the indifference point is 1.67, so the values of Juices A and B are 1.67 q A and q B . We next analyzed RNN activity to decode choice variables across reference frames using linear discriminant analysis ( Fig. 1e,f ). When Offer 2 is presented, juice-based and order-based choice signals emerge and persist in juice and order tasks, respectively, while order signals appear only transiently in the juice tasks. Spatial reference frames representing left/right choice, on the other hand, emerge only in Phase 3 after the choice-to-action mapping cue is given. Thus, different choice-to-action mappings impose distinct reference frames for choice. Distinct single-unit activity under juice-and order-based choices To investigate the mechanisms of working memory and decision-making underlying different choice reference frames, we first analyzed single-unit activity in RNNs trained for each task. Specifically, we examined how the tuning changes before and after the presentation of Offer 2, which illustrates encoding and maintenance schemes of Offer 1 and its comparison to Offer 2. Juice and order tasks exhibit drastically different single-unit tuning patterns. In RNNs trained for the juice task, unit activity can be distinctly segregated by juice types. In Figure 2 , an example unit shows positive tuning for Juice B and negative tuning for Juice A in Phase 1 ( Fig. 2a,c ). This juice identity encoding persists through Phase 2 when the choice is made. When we further segregated activity by trial type and final choice, units preferring Juice B during encoding also tend to prefer choosing Juice B ( Fig. 2b,d ). Download figure Open in new tab Figure 2. Single units in juice tasks encode offer values and choices based on juice identity. a Trial-averaged Phase 1 activity of an example unit, grouped by juice type (orange: Juice A, blue: Juice B) and Offer 1 quantity (darker color for larger amounts). Shaded areas indicate standard deviation. b Phase 2 activity of the same unit, grouped by juice choices (solid: choose A, dashed: choose B) and offer presentation order (orange: AB, blue: BA). c Tuning to Offer 1 value during the last 200ms of Phase 1. As in Ballesta & Padoa-Schioppa 15 , offer values are defined in Juice B units using the indifferent point. d Tuning to the chosen value during the last 20ms of Phase 2. e Positive correlation between encoding and choice preference based on juice type across units in an example RNN (example unit in A-D highlighted in red). The horizontal axis measures juice choice preference (1: prefer A, 0: prefer B), while the vertical axis represents encoding preference based on the difference between the slopes of two regression lines in ( c ). f Positive correlation across RNN ensembles trained for the juice task with different initialization. This pattern is also reflected in the similarity of tuning between encoded values (Phase 1) and chosen values (Phase 2; Fig. 2c,d ). To quantify this, we measured the preference for encoding and choice ( Fig. 2e,f ). Encoding preference was quantified as the difference between the tuning slopes for Juices A and B during the inter-offer period. Choice preference was quantified at the end of the wait period using receiver operating characteristic (ROC) analysis and an area under the curve (AUC) score, which differentiates between juice choices. As in the example unit, units with strong choice preference tend to inherit their encoding preference from Phase 1, showing a positive correlation between encoding and choice ( Fig. 2e ). Such a positive relationship was consistent across an RNN ensemble trained for the same juice tasks but with different initializations ( Fig. 2f ). In contrast, in most RNNs trained for the order task, unit activity does not exhibit segregation based on juice types ( Fig. 3 ). During Phase 1, nearly all offer-responsive units exhibited the same tuning sign for both juices. In the example unit, the value tuning for Juices A and B even have a similar slope ( Fig. 3a,c ). Phase 2 activity was also indistinguishable for choosing A and B ( Fig. 3b ). These findings align with the decoding analysis in Figure 1f , showing that juice identity cannot be decoded in order tasks. Download figure Open in new tab Figure 3. Single units in order tasks encode juice-agnostic offer values with phase-dependent tuning. a-d Same layout as Fig. 2a-d for an example RNN unit in the order task. Unlike juice-task units that segregate responses by juice type, this unit responds similarly to both juices but differentiates choices based on the presentation order, such that choosing B in the AB trial or choosing A in the BA trial shows higher firing rates than the opposite case. e Negative correlation between the Offer 1 encoding sign and order choice preference across units in the example RNN. AUC_order in the horizontal axis measures order choice preference (1: prefer Offer 1, 0: prefer Offer 2). f Consistent negative correlation across RNN ensembles, with a few outliers. Instead, order signals emerged during Phase 2, particularly with a negative correlation to the value tuning observed during encoding ( Fig. 3b,d-f ). For instance, units positively tuned to Offer 1’s value tended to prefer choosing Offer 2 ( Fig. 3b ). Tuning to the chosen value in Phase 2 also showed higher overall firing rates and positive tuning when choosing Offer 2 ( Fig. 3d ). We quantified order-based choice preference using AUC scores, which revealed a negative correlation with the tuning slope for Offer 1 value ( Fig. 3e,f ). Stable and distinct representations in juice-task RNNs We further investigated the relationship between value encoding/maintenance and decision-making processes in each task by analyzing population activity. Specifically, we applied principal component analysis (PCA) to Phases 1 and 2 to capture most of the activity variability arising from temporal dynamics and differences in conditions, such as trial type and offer values. We found that across both phases, RNN activity was largely restricted to two dimensions. In example RNNs, the first two principal components accounted for 91.6 % of the variance in the juice task and 95.7% in the order task. In the juice task, we identified two distinct activity patterns during Phase 1 that encode and maintain the values of Juice A and Juice B, referred to as Axes A and B, respectively ( Fig. 4a ). These axes formed an “L” shape, with offer values encoded monotonically along their respective directions and converging at the origin. During the Offer 1 presentation, activity magnitude increases while maintaining alignment along the corresponding axis. During the inter-offer period, the representation of Offer 1 remains largely stable (Supplementary Movie 1). Download figure Open in new tab Figure 4. Comparison of population analysis in juice and order tasks. a , b Example snapshots of juice-task RNN activity at the end of Phase 1 ( a ) and Phase 2 ( b ), projected onto the top two principal components (PCs). Colors indicate presented or chosen juice type (orange: Juice A, blue: Juice B) with darker shades representing larger Offer 1 quantities in ( a ). Background vector fields (gray arrows) show RNN dynamics in the absence of input. c Distribution of angles between juice axes across juice-task RNNs. d-f Population analysis for the order task with the same layout as ( a-c ). g Trial-averaged Phase 1 activity across time. Only AB trials are shown for clarity. Notably, these encoding axes are repurposed to represent choice signals ( Fig. 4b ). Following the presentation of Offer 2, the choice representation for each juice aligns with the same axes used for encoding and maintaining offer values (Supplementary Movie 2). Note that the Offer 2 presentation can perturb activity outside the plane defined by these two axes before it relaxes back, explaining the transient emergence of order-based choice signal ( Fig. 1e ). These stable representations of value and choice observed in Phases 1 and 2 are orthogonal to the output vectors for left or right actions, meaning no association with them (Supp Fig. 1). In Phase 3, when the choice-to-action mapping cue is on, each choice begins to align with the corresponding output vector. Stable working memory and choice representation in the juice task is reminiscent of attractor dynamics, where each juice axis acts as a distinct attractor, supporting both working memory and decision-making. Consistently, the analysis of vector fields during the delay period reveals attraction toward the two axes, resembling line attractors for each axis ( Fig. 4a,b ). In the space between the two axes, the vector field diverges toward one of the axes, enforcing a choice between the two juices. This behavior can be achieved by mutual inhibition between the representations of the two juices. Note that if the attractors strongly inhibit each other, their directions would be nearly opposite, forming an angle close to 180 degrees. However, in the trained RNN, the angle between the two axes is wider than 90 degrees, with a median around 110 degrees ( Fig. 4c ). This suggests that the two attractors exhibit moderate mutual inhibition. Trials are grouped by q A . States were pushed upwards by the Offer 1 stimulus and underwent clockwise rotation in the inter-offer period. Each point represents an activity snapshot taken every 100 ms. Gray lines connect snapshots across conditions. h Trial-averaged Phase 2 activity, with traces starting from empty circles and ending at filled circles. A subset of conditions from ( g ) is shown ( q A =2: dashed, q A = 4: solid, with three Offer 2 values, pale/normal/dark: q B =2,4,8). With sufficiently strong Offer 2 stimulus, the state underwent another round of clockwise rotation; otherwise, they decayed to the base activity. i Rotation index, measured by the imaginary gyration number 18 from RNN activity. Dynamic and overlapping representations in order-task RNNs Unlike juice tasks, the order task does not require maintaining information about the identity of the juice. Despite separate input channels for each juice, single-unit activity revealed that the values of the two juices are encoded and maintained almost identically ( Fig. 3a,c ). Similarly, population activity in the example RNN showed overlapping value representations for the two juices during the inter-offer period ( Fig. 4d ; Supplementary Movie 3), supported by the near-zero angle between the axes encoding the two juices’ values across RNNs ( Fig. 4f ). Choice representations also align across juices and orders, forming a unified “choice axis” ( Fig. 4e ; Supplementary Movie 4). Yet, the specific order remains decodable based on the relative position along the choice axis. Notably, representations of choosing Offer 2 spread broadly, while Offer 1 choices are concentrated at the tip, consistent with higher activity for Offer 2 choices in single-unit analysis ( Fig. 3d ). These value and choice representations in Phases 1 and 2 remain independent of the output vectors for specific actions, as in juice tasks (Supp Fig. 2). The overlap in value encoding and choice for different juices or orders suggests that order-task dynamics are not governed by distinct attractors, unlike the juice task. Instead, vector field analysis during the delay period revealed rotation-like dynamics ( Fig. 4d,e ). Upon the Offer 1 presentation, the activity grows proportionally to its value. During the inter-offer period, activity rotates following the vector field ( Fig. 4g ). Snapshots of activity for different Offer 1 values form a line, with value information preserved in the radius of rotation. Such rotation-like dynamics are more prominent in order-task RNNs than in juice-task RNNs ( Fig. 4i ). Rotation-like dynamics also contribute to decision-making ( Fig. 4h ). Without external input, activity decays back to the center of the rotation. For smaller Offer 2 values, such decay dominates, converging activity near the rotation center. In contrast, larger Offer 2 values counteract decay, and the trajectories rotate again during the second delay (wait period) as in the first delay (inter-offer period). Thus, Offer 1 choices stay near the rotation center, while Offer 2 choices extend along the choice axis, allowing order decoding by distance from the rotation center ( Fig. 4e ). Latent circuit inference reveals attractor dynamics with mutual inhibition in the juice task To gain mechanistic insight into how different dynamics arise during value encoding and comparison across tasks, we inferred a low-dimensional connectivity structure from RNN activity 19 . Using activity from Phases 1 and 2, we derived a two-dimensional circuit, yielding matrices that embed a latent-circuit activity to RNN activity and capture the strengths of input, output, and recurrent connections (Methods). In the juice task, the latent circuit revealed two competing nodes, each receiving strong input from one of the two juices ( Fig. 5a,b ). These nodes correspond to the A and B axes from PCA ( Fig. 4a,b ). Each node features strong recurrent connections that lead to positive feedback within itself. Weak mutual inhibition between the nodes enables competition and choice. Additionally, each node receives inhibition from the opposite juice’s input channel, further reinforcing this competition. Download figure Open in new tab Figure 5. Latent circuit inference in the juice task. a Reduced circuit diagram showing competitive nodes responsible for encoding and choosing different juices. b Inferred recurrent (left) and input (right) connectivity from an example RNN. c Consistency of choice representations between the RNN and its reduced circuit: RNN activity projected onto the A and B nodes using the inferred embedding matrix Q (Methods), with vector fields obtained from the reduced circuit’s dynamics. d Trial-averaged activity in Phases 1 and 2, focusing on the projection onto the A node for AB trials. The left panel shows the RNN activity projected with the embedding matrix Q T x . The right panel shows the reduced circuit y . Color darkness indicates Offer 1 values in Phase 1 (orange) and Offer 2 values in Phase 2 (blue). In Phase 2, solid and dashed lines represent trials where the RNN shows a higher probability of choosing A and B, respectively. The latent circuit’s dynamics closely resemble those of the RNN ( Fig. 5c ). Two attractors formed through positive feedback appear similar to lines, with choice representations for each juice aligned along them. Vector fields in the intermediate region push activity toward one attractor through mutual inhibition. Activity along each attractor qualitatively resembles the original RNN ( Fig. 5d ). For example, in AB trials, activity along the A axis increases with the value of Juice A during Phase 1, and is suppressed in Phase 2 by higher values of Juice B, resulting in high activity when choosing A and low activity when choosing B. Thus, two attractors with mutual inhibition support value encoding and comparison in juice tasks. We noticed moderate inhibition from the angle of two axes in PCA ( Fig. 4a-c ). Indeed, the mutual inhibition strength, w inh , plays a critical role in determining this angle (Supp Fig. 3). In RNN ensembles, wider angles between the two nodes corresponded with stronger w inh . A similar relationship appeared in a simplified circuit, where we assumed symmetry between the nodes and approximated the recurrent feedback strengths as 1. Notably, the moderate level of mutual inhibition w inh coordinates with input inhibition µw input to mitigate order bias. Too strong w inh suppresses Offer 2’s influence, biasing toward Offer 1. To counteract undesirable order bias, strong input inhibition µ from Offer 2 is required to relax suppression by Offer 1. This positive relationship between w inh and µ was evident across RNN ensembles and simplified circuits optimized to reduce order bias. Rotational dynamics and feedback inhibition in the latent circuit of order-task RNNs As in the juice tasks, the two nodes in the inferred circuit receive strong self-recurrent connections, supporting the memory of encoded values and choices during the delay periods ( Fig. 6 ). However, unlike the symmetric structure in juice tasks, the reduced circuit structure for the order task is asymmetric ( Fig. 6a,b ). First, the two nodes do not differentiate between inputs from different juices. Instead, one node receives excitation, the other inhibition, from both juice inputs. These nodes are referred to as the sensory and decision nodes, respectively. Second, the sensory node excites the decision node, while the decision node inhibits the sensory node in the form of feedback. Download figure Open in new tab Figure 6. Latent circuit inference in the order task. a Reduced circuit diagram with a sensory node for incoming offers and a decision node for memorized offers, independent of juice identity. b-d Same layout as Fig. 5b-d. In ( d ), a subset of trials was considered with a fixed Offer 2 value ( q B = 4). Color darkness indicates varying Offer 1 values ( q A =0,1,2,3,4), while solid and dashed lines represent trials where the RNN shows a higher probability of choosing Offer 1 and Offer 2, respectively. Such asymmetric structures support working memory and choice representations distinct from those in juice-task RNNs ( Fig. 6c,d ). Feedforward excitation from the sensory to the decision node enables a dynamic representation of working memory – under asymmetric input, Offer1 first activates the sensory node and then propagates to the decision node ( Fig. 6d ). Strong self-recurrent excitation prolongs the overall activity time constant, slowing the propagation and producing rotational trajectories in the state space ( Fig. 4g ). Feedback inhibition from the decision to the sensory node supports value comparison. By the end of Phase 1, decision node activity reflects Offer1 value, which suppresses the sensory node ( Fig. 6d ). In Phase 2, the sensory node becomes active only if the Offer 2 input overcomes this suppression. Thus, depending on the Offer 2 values relative to the Offer 1 values, sensory node activity becomes segregated. Because decision node activity eventually follows sensory node activity, the network shows lower activity for choosing Offer 1, and higher for choosing Offer 2 ( Fig. 6c-d and Fig. 4e ). Rotation dynamics are evident in vector fields of the inferred circuit, similar to those of the full RNN. We further examined how network parameters shape this rotation through eigenvector decomposition of the recurrent connections (Supp Fig. 4). Across RNN ensembles, the real part of eigenvalues related to the sum of the self-recurrent connections nearly cancels intrinsic leakage, yielding spiral dynamics without net convergence or divergence. The strengths of feedforward excitation and feedback inhibition modulate the imaginary part, which determines the rotational speed. Again, this part remains consistent across RNN ensembles, with trajectories rotating about 90 degrees over the 500 ms delay. The correspondence between rotation speed and delay durations suggests that the rotational dynamics may be sensitive to variations in delay periods. Nevertheless, the solution identified by the RNNs remains robust across a modest range of delays (Supp Fig. 5). When we trained RNNs with varying inter-offer periods, the dynamics remained rotational, with consistent choice output. However, varying the delay period between Offer 2 and choice-to-action mapping distorted the choice output, creating a bias toward Offer 1 with longer second delays and vice versa (Supp Fig. 6). Regression analysis and distinct correlation patterns in juice and order tasks Previous experimental studies have investigated the relationship between the encoding of asynchronously presented stimuli using bivariate linear regression in both perceptual 2 and economic decision-making 10 – 15 . Building on insights from population analysis and latent circuit inference, we performed a similar regression analysis on our task-optimized RNNs using the values of Offers 1 and 2. RNN activity in the last 200ms of each phase was averaged and regressed against the offer values – in Phase j (where j = 1 or 2, denoted by superscripts P1 and P2), the activity of unit was modeled as where v 1 and v 2 represent the values of Offer 1 and Offer 2, and β terms denote regression weights. To avoid spurious relationships arising from the correlation between offer values 15 , we used a test set of offer pairs where there is no correlation between v 1 and v 2 (Methods). We examined correlations among and ( Fig. 7 ). In the order task, and were positively correlated, indicating similar encoding of incoming offer values across phases ( Fig. 7b,c ). In contrast, was negatively correlated with both and , suggesting that the remembered value was encoded oppositely to the incoming one ( Fig. 7e,f,h,i ). This correlation pattern aligns with latent circuit inference, where the sensory node encodes incoming values identically across juices or orders, while the decision node, representing the memorized value, inhibits the sensory node ( Fig. 6 ). Note that the transformation of the Offer 1 representation across phases also affects the regression vector , which can attenuate its negative correlation with (Supp Fig. 7). Download figure Open in new tab Figure 7. Correlation patterns of regression weights for Offer 1 and Offer 2 values. a,d , g Example juice-task RNNs; b , e , h Example order-task RNNs; c , f , i Correlation coefficients across RNN ensembles, with results from example RNNs marked by red dots. a-c Correlation between Offer 1 representation in Phase 1 and Offer 2 representation in Phase 2. d-f Correlation between Offer 1 and Offer 2 representations within Phase 2. g-i Correlation between Offer 1 representations across Phases 1 and 2 ( n =50; Pearson correlation with P < 0.001 in a, b , d , g , h except for g where P = 0.351). Unlike the order-task RNNs, the correlation between and is positive in the juice-task RNNs ( Fig. 7g,i ). This positive correlation reflects stable value representations for each juice, highlighting a key distinction from the order task. Another notable feature is the discrepancy between correlation patterns obtained separately for AB and BA trials versus those obtained by averaging β terms across them (Supp Fig. 8). Specifically, correlations between and are positive when AB and BA trials are combined, but negative when analyzed separately ( Fig. 7a,c , Supp Fig. 8a). The latter reflects competition between the two value representations in the juice-task RNNs, whereas averaging β terms across AB and BA trials mixes activity along two distinct axes associated with each juice. This negative correlation between and in the segregated trials contrasts with the juice- and phase-agnostic encoding of incoming offer values in the order-task RNNs, which may further help differentiate among choice reference frames. Download figure Open in new tab Figure 8. Monkey experiment and neural data consistent with order-task RNN. a Sequential economic decision-making task where choice can be represented in spatial, order, or juice reference frames. Colors and point numbers represent juice type and quantity. b Classifier performance in decoding choices from orbitofrontal cortex (OFC) activity across reference frames. Solid lines show mean accuracy ± standard deviations of the mean (dotted), while dashed lines represent shuffle controls. c Correlation between Offer1 tuning in Phase 1 and order choice preference in Phase 2. d-f Correlations between Offer 1 tuning in Phase 1 and Offer 2 tuning in Phase 2 ( d ), Offer 1 and Offer 2 in Phase 2 ( e ), Offer 1 tuning in Phase 1 and Phase 2 ( f ) from OFC activity during the offer presentation period ( n = 437; Pearson correlation with P < 0.001in c-f ). Order-based choice signatures in experimental data We next examined the consistency of correlation patterns between the RNN and experimental data. Previous monkey studies on juice tasks with sequential offer representations had a similar structure to our RNN’s juice task, requiring the maintenance of juice identity without association to spatial locations until the action 15 , 16 . Recently, a modified juice task (K.N. and X.C., in preparation) was proposed in which Offers 1 and 2 were presented sequentially on different sides of the screen, with randomized sequences of juices A and B and left/right configurations ( Fig. 8a ). This allowed choices to be made within three reference frames: order, space, and juice. Decoding analysis in the OFC showed that the order reference frame emerges first, followed by a transformation into the spatial reference frame 20 ( Fig. 8b ). Given the dominance of the order reference frame, we explored the relationship between encoding and choice signals, as well as the correlation patterns among the weights for offer values from a restricted set of offer pairs with no correlation between offer values. We found that monkey data exhibited qualitative signatures of the order-task RNNs, both when pooled across subjects ( Fig. 8c-f ) and when analyzed individually (Supp Fig. 9). First, value tuning in Phase 1 was anti-correlated with choice preference, such that neurons with positive tuning to Offer 1 value in Phase 1 tended to prefer choosing Offer 2 ( Fig. 8c ; cf. Fig. 3e ). Second, encoding of incoming signals remained similar, as indicated by the positive correlation between and ( Fig. 8d ; cf. Fig 7b ). Lastly, the representation of the Offer 1 value in Phase 2 was negatively correlated with both and ( Figs. 8d,e ; cf. Figs. 7e,h ). Note that the correlation between and is weak — in one monkey, nearly zero—suggesting a strong influence of the transformation of the Offer 1 representation across phases (Supp Fig. 7,9h). Overall, analysis of neural data suggests a transformation in the encoding of Offer 1 across phases and a potential suppression of the value representation of Offer 2, in line with our predictions for order tasks ( Fig. 8g ). Discussion In this work, we used task-optimized RNNs 21 , 22 to investigate economic choices under sequential offer presentation, requiring memory of the first offer for comparison. Remarkably, a simple change in the choice-to-action mapping led to distinct reference frames of choices – commodity-based and order-based – despite identical offer presentations. Each exhibited distinct encoding, choice schemes, and memory representations. Commodity/juice-based choices relied on two attractors representing the value of each juice, supporting stable memory representation and decision-making through modest mutual inhibition. By contrast, order-based choices that do not require memorizing juice identity used juice-agnostic value representations that were dynamically transformed to distinguish the order in which offers were presented. These dynamics emerged from an asymmetric circuit structure with feedforward excitation, feedback inhibition, and strong self-recurrent excitation – all supporting rotation dynamics. Correlation analyses further revealed notable differences between juice- and order-based choices in how the first offer’s value was represented across task phases and related to choice signals. These two reference frames of economic choices are reminiscent of distinct mechanisms proposed for perceptual choices: one with mutual inhibition 23 and another with feedback inhibition 24 . Despite these parallels, these previous models relied on attractor dynamics for working memory. Even in the feedback-inhibition model 24 , memory maintenance was supported by strong recurrent excitation onto a so-called memory node – after information from the first offer is transferred to this node, the memory representation remains stable, rendering rotational dynamics unnecessary. Notably, in attractor-based memory, single-neuron activity may not appear constant due to task-irrelevant signals 25 – 27 . Recent studies have also cautioned that sequential firing patterns or data preprocessing can lead to a false interpretation of rotational dynamics when applying dimensionality reduction techniques 28 , 29 . In our work, single-unit activity during the delay period was similarly non-constant (e.g., Figs. 2a and 3a ), and some juice-task RNNs exhibited modest rotational indices ( Fig. 4i ). However, analysis of the reduced circuit suggested that stable attractors underpinned working memory, while the observed dynamic signals likely reflected transitions into attractor states or task-irrelevant dynamics orthogonal to the subspace spanned by the attractors. Rotational dynamics have gained interest in systems neuroscience 30 , linked to movement execution 31 , associative learning 32 , evidence accumulation 33 , and even processes occurring prior to task training 34 . Rotation dynamics in our order-based RNNs were functional, supporting working memory and decision-making. As in models with non-normal network connectivity 35 , memory representation evolved over time, ultimately rotating to an orthogonal direction to the sensory representation in the state space. Such orthogonal transformations have previously been highlighted for their roles in working memory in amplifying input signals 36 , filtering out distractors 37 , or supporting cognitive demand such as attention 38 , or decision-making 39 . Similarly, we show that orthogonal transformation is essential for comparing two sequential offers that converge on the same sensory node. Notably, the rotation dynamics also enable maintenance of the choice signal during the second delay period – a feature not achievable in feedback-inhibition models with attractor dynamics. In this study, we dissociated abstract decision formation from motor actions by introducing a postponed and randomized choice-to-action mapping. While similar paradigms have been used in juice-based choice tasks 8 , 15 , 40 , many studies did not impose such dissociation, allowing both non-spatial/abstract and spatial/action-based reference frames to emerge 6 . Still, some prefrontal regions, including the orbitofrontal cortex and ventromedial prefrontal cortex, have been proposed to form abstract decisions first 41 – juice-based under simultaneous presentation 5 and order-based under sequential presentation 10 , 12 . Consistently, Figure 8 shows that order-based signals emerged earlier than space-based ones in a task that did not constrain output contingency. Compelling theoretical frameworks proposed that decisions arise through distributed consensus across abstract and action-based representations 42 , 43 . How neural mechanisms supporting abstract decisions, as explored in this study, interact with action-based representations – possibly in distinct circuits 44 or layers 45 – remains to be explored. Several RNN studies have demonstrated that the mechanisms underlying cognitive functions can vary drastically depending on task demands. For example, variants of working memory tasks with different conditions on delay durations 46 , degrees of memory manipulation 47 , presence of network degradation 48 , or multiplexing time information 49 , have led to the emergence of distinct mechanisms – including attractor-based persistent activity, feedforward propagation, silent memory, and oscillatory coding. In line with these findings, our results show that when solving economic choice tasks with slight variations in output contingencies, RNNs can develop fundamentally different dynamics to support working memory and decision-making. These results underscore the flexibility of recurrent computations and support the view of decision-making as a task-dependent process spanning multiple representational formats. Methods RNN task definition We used the Python package PsychRNN 22 to define the “juice” and “order” task, create and train RNNs. We used a basic vanilla RNN architecture for all tasks, consisting of N = 50 recurrent units, 7 input channels, and 2 output channels. The input channels included two juice channels receiving juice quantity normalized by the maximum quantity, four choice-to-action mapping channels (two per task; Fig. 1b ), and one fixation channel which is active until the go signal ( Fig. 1c ). The RNNs dynamics followed the equation: , where g ( x ) = max( x , 0), and the intrinsic time constant τ = 100ms, and the simulation timestep dt = 10ms. White Gaussian noise was added to both external input u (std = 0.01) and synaptic input x (std = 0.1). The juice task was adapted from Ballesta & Padoa-Schioppa 15 and involved choice-to-action mappings linking juice types to actions (e.g., Juice A-Left/Juice B-Right). For the order task, we introduced two additional mappings linking offer order to actions (e.g. Offer 1-Left/Offer 2-Right). In each training trial, a pair of juice quantities ( q A , q B ) was sampled from the14 offer pairs used in one typical experiment session, with q A ∈ [0,4] and q B ∈ [0,8]. The 14 pairs are [(0, 1), (0, 2), (1, 0), (1, 3), (1, 4), (2, 1), (2, 2), (2, 3), (2, 4), (2, 6), (3, 2), (3, 3), (3, 8), (4, 4)]. Juice quantities were normalized by their respective maximal values before being fed into RNNs: and . As in the experiments, the presentation order of juices (AB or BA) was randomized across trials. In AB trials, was fed through the juice A channel in phase 1 and through the juice B channel in phase 2, and vice versa in BA trials. In phase 3, one of the four choice-to-action mapping channels (two for each task) was activated and remained on until the end of the trial. The fixation channel was active from the start of the trial until the go signal ( Fig. 1c ). RNN training procedure The RNNs were trained to produce an action output (L/R) following the go signal presented at 3000ms. The “correct” choice on each trial was stochastically sampled from a teaching psychometric curve and mapped to an action based on the trial-specific choice-to-action mapping cue. Throughout the study, we used a fixed teaching psychometric curve defined as: where the indifference point ρ teaching = 1.7 and the slope η teaching = 13. For instance, in a BA trial with ( q A , q B )=(2,4), the curve yields an 89.2% probability of choosing B. In an order-task trial where the choice-to-mapping cue is (Offer 1-L, Offer 2-R), the “correct” output of choosing B in BA trial would be translated into an activated L output channel and inactivated R output channel. In each training epoch, a batch of 112 trials was generated along with their corresponding stochastically generated “correct” outputs. The recurrent weight matrix W rec , input weight matrix W input , constant input b rec , output weight matrix W out , output bias b out and initial conditions x [0] were updated once every epoch by TensorFlow’s Adam optimizer to minimize the squared error between the target and actual outputs after the go signal. All trainable parameters were initialized using the default method (random Gaussian weight with radius 1.1). Empirically, we found that training 100000 trials (∼890 epochs) with a default learning rate (0.001) would normally converge. We determined convergence by both the mean squared errors and psychometric curves (described in the following paragraph). We used custom stopping criteria based on the psychometric curve to prevent overtraining. For each trained RNN, an offer was defined to be “selected” if its associated output channel had a greater time-averaged activity than the other channel after the go signal (3000-4000 ms). We calculated the frequency with which juice B was chosen under each offer pair condition and compared it to the expected choice probabilities. A psychometric curve was fit using logistic regression to estimate the RNN’s behavior indifference point ρ and slope η . Neurophysiology and data pre-processing The method for neurophysiological recordings, surgery, and behavioral task control for Figure 8 and Supp Figure 9 will be described in (K.N. and X.C., in preparation). Briefly, all experimental procedures conformed to the NIH Guide for the Care and Use of Laboratory Animals and were approved by the Institutional Animal Care and Use Committee at NYU Shanghai. Two adult male rhesus monkeys ( Macaca mulatta ; monkey N, 11.5 kg; monkey F, 17.0 kg) were trained to choose between two juices sequentially offered in variable amounts ( Fig. 8a ). The subjects were head-fixed, presented with visual stimuli on a computer monitor, and their eye positions were monitored to collect their responses. After the animals achieved stable task performance, craniotomy surgeries were performed to open rectangle recording windows – on the left hemisphere of monkey N and at the right hemisphere of monkey F. Acute extracellular recordings were performed with single or double 16-channel linear arrays within or across OFC and lateral prefrontal cortex (LPFC). This study includes only OFC data (824 neurons for monkey N and 402 neurons for monkey F). Recording sites varied across sessions. In each session, the relative value of the juices was estimated by logistic regression with a sliding window. Offer values were calculated in the same way as for the RNN units, but using session-specific indifference points. Offline sorted spike trains were preprocessed as follows. To remove drifting effects from spike sorting, the baseline activity of each trial (mean firing rate during the center fixation period) was subtracted from the firing rates at each time. Then, firing rates were binned with a sliding window (width = 100ms, step = 10ms) and z-scored by subtracting the mean firing rate across time and trials for each cell and dividing by the corresponding standard deviation. Decoding analysis in RNN and neural data In each trial, the RNN’s choice could be interpreted in terms of juice identity or order identity. For example, consider a trial in the order task trial with mapping cue = (offer1-L, offer2-R) and BA presentation order. If the RNN selected the Left output, the choice corresponds to juice B, and offer 1. In the experimental task, each offer was characterized by juice type, presentation order, and spatial locations, allowing choices to be labeled under three different reference frames. To quantify the choice representation in RNN activity, we trained linear discriminators to decode choice labels from population activity snapshots ( Fig. 1e-f ). Decoding performances are evaluated using 5-fold cross-validation, implemented with cross_val_score() and LinearDiscriminantAnalysis() function from Python package Scikit-learn . In Fig. 8b , the preprocessed experimental data underwent pseudo-population decoding in MATLAB. For each resampling, a pseudo population was constructed by randomly selecting 20 trials of each label for all cells, splitting into 5 cross-validators with 4 trials each, and building training-test sets through the leave-one-fold-out cross-validation. Linear discriminant classification ( classify in MATLAB) was then run to achieve the accuracy of label prediction. The resampling was repeated 100 times. Significance of decoding accuracies (DA) at each time bin was tested, in which raw DA (100 resamples) were compared to shuffled DA (100 resamples) at the significance level of p<0.001 by one-sided Wilcoxon rank sum tests. RNN single unit analysis In Figures 2 and 3 , we examined how RNN units encode the value of Offer 1 in Phase 1 and represent choices in Phase 2. Offer 1 values were defined as q B for Juice B and ρq A for Juice A, where ρ is each RNN’s behavior indifference point. To determine the tuning of each unit to Offer 1 value, we performed linear regression separately for trials involving each juice type. Time-averaged activities in the last 200ms of Phase 1 were used for linear regression. To determine choice preference in each reference frame, we conducted Receiver Operating Characteristic (ROC) analysis and computed Area Under the ROC Curves (AUC) for each single unit. We used roc_auc_score() function from scikit-learn . For the order reference frame, classifiers were defined such that the higher activity was associated with a choice of Offer 2, so an AUC > 0.5 indicates preference for choosing Offer 2, and vice versa. For the juice reference frame, classifiers associated high activity with a choice of Juice A, so units preferring choosing Juice A have AUC>0.5, and vice versa. RNN population analysis To examine the low-dimensional representation and dynamics of trained RNNs ( Fig. 4 ), we conducted principal component analysis (PCA) on population activity. To capture task-related variability across time and conditions, RNN activity from Phases 1 and 2 was concatenated across trials to form a data matrix X . We used the PCA() function from scikit-learn to estimate the principal components (PCs). We projected population activity onto the first two PCs for visualization. To visualize recurrent dynamics, we computed the derivatives of the input-free RNN dynamics at a grid of points in the PC1-PC2 plane (with all other PC dimensions fixed to zero) and plotted the resulting vector field. To assess the geometric relationship between juice representations in Phase 1 ( Figs. 4c,f ), we measured the angle between two juice coding axes. These axes were defined using the regression coefficients: for each juice type, we extracted the linear regression slopes with respect to the Offer 1 value across 50 units, forming two 50-dimensional vectors. The angle between them was measured as the inverse cosine of their normalized inner product. To quantify the rotational strength of population dynamics ( Fig. 4i ), we used the method of Kuzmina et al. 18 . The method utilizes the covariance matrix between RNN activity X and its derivative , where X is the matrix of concatenated RNN activity across trials. The time differential covariance matrix captures how each unit’s activity covaries with the derivative of other units’ activity. The rotational index was defined as , where λ i is the i -th eigenvalue of and Im( λ ) denotes the imaginary part. Latent circuit inference and analysis To gain mechanistic insight into RNN dynamics ( Figs. 5 , 6 ), we adapted the method from Langdon & Engel 19 . The method approximates high-dimensional RNN activity (time t within Phases 1 and 2) using a two-dimensional dynamical system and an orthonormal embedding matrix Q ∈ ℝ N ×2 such that The model was trained to minimize the sum of mean squared error . The dynamics of the reduced circuit are governed by , where w ij is the recurrent weight matrix, u ij is the input weight matrix, is the normalized input, and the nonlinearity function g ( y ) = max( y , 0). Parameters Q, w ij , and u ik ( i, j ∈ 1,2, k ∈ A, B ) were optimized using custom Python code with PyTorch with Adam optimizer (learning rate = 0.001, weight decay = 0.001). Optimization was stopped when the loss had not improved by more than 0.001 over 25 epochs. For each RNN, we repeatedly initialized parameters and optimized them until the reconstruction loss fell below the variability of RNN activity. For further analysis in the juice task, we parameterized a constrained model with symmetric recurrent connectivity and input matrix (Supplementary Figure 3): In Supplementary Figure 3, Supplementary Figure 5, and Supplementary Figure 6, we examined the effect of perturbing network or task parameters on the choice order bias 17 . We fit RNN choices separately for AB and BA trials to obtain indifference points ρ AB and ρ BA . The order bias ϵ is defined as: A bias towards Offer 1 yields a lower ρ AB and higher ρ BA , resulting in a negative ϵ . Correlation between regression coefficients in RNN and neural data In Figures 7 and 8 , as well as associated Supplementary Figures, we calculated the Pearson correlation coefficient between regression weights. Time-averaged activities in the last 200ms of the offer presentation period in each Phase were used for linear regression. As noted by Ballesta and Padoa-Schioppa 15 , spurious value tuning can be an artifact arising from unequal value ranges across juices and correlation between offer values v 1 and v 2 . To address this issue, we used a restricted set of offer pairs with no correlation between v 1 and v 2 in Figures 7 , 8 , and Supplementary Figure 9. Specifically, we included two trial types, either q A = 2 with varying q B , or q B = 4 with varying q A , noting that 2A is approximately equivalent in value to 4B. This design yields a cross-shaped distribution of offers without correlations. For monkey D, in sessions where the indifference points ρ deviated substantially from 2, we instead fixed q B at the value most comparable to 2A, rather than at 4. In Figure 8 , we further restricted the analysis to cells that were task-selective in at least one consecutive 500-ms task epoch, based on an ANOVA test (p<0.01). In Figure 7 and Supplementary Figure 7, correlations were computed between the pooled regression coefficients from all trials combining AB and BA trials. In Supplementary Figure 8, we estimated the β terms separately for AB and BA trials, and computed their average: Note that the results were qualitatively similar even when we estimated β from all trials. Acknowledgment We thank John Rinzel, Jeffrey Erlich, and Alex Reyes for their constructive suggestions throughout the research and for providing feedback on the manuscript. J.G. and S. L. were supported by STI2030-Major 38 Projects,No.2021ZD0203700/2021ZD0203705 and NYU Shanghai Boost Fund. K.N. and X.C.were supported by STI2030-Major 38 Projects, No.2021ZD0203700/2021ZD0203702 and NYU Shanghai Boost Fund. X. C. and S.L. acknowledge the support of the NYU-ECNU Institute of Brain and Cognitive Science at NYU Shanghai S. L. also acknowledges the support of the NYU Shanghai Center for Data Science. Funder Information Declared National Natural Science Foundation of China, https://ror.org/01h0zpd94 , 2021ZD0203700/2021ZD0203705 , 2021ZD0203700/2021ZD0203702 New York University Shanghai, https://ror.org/02vpsdb40 , NYU Shanghai Boost Fund Footnotes Declaration of interests: The authors declare no competing interests. References 1. ↵ Romo , R. , Brody , C. D. , Hernández , A. & Lemus , L. Neuronal correlates of parametric working memory in the prefrontal cortex . Nature 399 , 470 – 473 ( 1999 ). OpenUrl CrossRef PubMed Web of Science 2. ↵ Romo , R. & Salinas , E. Flutter Discrimination: Neural codes, perception, memory and decision making . Nat. Rev. Neurosci . 4 , 203 – 218 ( 2003 ). OpenUrl CrossRef PubMed Web of Science 3. ↵ Padoa-Schioppa , C. Neurobiology of Economic Choice: A Good-Based Model . Annu. Rev. Neurosci . 34 , 333 – 359 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 4. ↵ Levy , D. J. & Glimcher , P. W. The root of all value: A neural common currency for choice . Curr. Opin. Neurobiol . 22 , 1027 – 1038 ( 2012 ). OpenUrl CrossRef PubMed 5. ↵ Padoa-Schioppa , C. & Assad , J. A. Neurons in the orbitofrontal cortex encode economic value . Nature 441 , 223 – 226 ( 2006 ). OpenUrl CrossRef PubMed Web of Science 6. ↵ Hunt , L. T. , Woolrich , M. W. , Rushworth , M. F. S. & Behrens , T. E. J. Trial-Type Dependent Frames of Reference for Value Comparison . PLOS Comput. Biol . 9 , e1003225 ( 2013 ). OpenUrl CrossRef PubMed 7. ↵ Hayden , B. Y. & Moreno-Bote , R. A neuronal theory of sequential economic choice . Brain Neurosci. Adv . 2 , 239821281876667 ( 2018 ). OpenUrl 8. ↵ Cai , X. & Padoa-Schioppa , C. Neuronal evidence for good-based economic decisions under variable action costs . Nat. Commun . 10 , 393 ( 2019 ). OpenUrl CrossRef PubMed 9. ↵ Cai , X. Abstract Representations of Economic Choice in the Primate Orbitofrontal Cortex. SSRN Scholarly Paper at doi: 10.2139/ssrn.4984901 ( 2024 ). OpenUrl CrossRef 10. ↵ Strait , C. E. , Blanchard , T. C. & Hayden , B. Y. Reward value comparison via mutual inhibition in ventromedial prefrontal cortex . Neuron 82 , 1357 – 1366 ( 2014 ). OpenUrl CrossRef PubMed Web of Science 11. Strait , C. E. , Sleezer , B. J. & Hayden , B. Y. Signatures of value comparison in ventral striatum neurons . PLoS Biol . 13 , e1002173 ( 2015 ). OpenUrl CrossRef PubMed 12. ↵ Blanchard , T. C. , Hayden , B. Y. & Bromberg-Martin , E. S. Orbitofrontal Cortex Uses Distinct Codes for Different Choice Attributes in Decisions Motivated by Curiosity . Neuron 85 , 602 – 614 ( 2015 ). OpenUrl CrossRef PubMed 13. Azab , H. & Hayden , B. Y. Correlates of decisional dynamics in the dorsal anterior cingulate cortex . PLOS Biol . 15 , e2003091 ( 2017 ). OpenUrl CrossRef PubMed 14. ↵ Azab , H. & Hayden , B. Y. Correlates of economic decisions in the dorsal and subgenual anterior cingulate cortices . Eur. J. Neurosci . 47 , 979 – 993 ( 2018 ). OpenUrl CrossRef PubMed 15. ↵ Ballesta , S. & Padoa-Schioppa , C. Economic Decisions through Circuit Inhibition . Curr. Biol . 29 , 3814 - 3824 .e5 ( 2019 ). OpenUrl CrossRef PubMed 16. ↵ Shi , W. , Ballesta , S. & Padoa-Schioppa , C. Economic Choices under Simultaneous or Sequential Offers Rely on the Same Neural Circuit . J. Neurosci. Off. J. Soc. Neurosci . 42 , 33 – 43 ( 2022 ). OpenUrl Abstract / FREE Full Text 17. ↵ Padoa-Schioppa , C. Logistic analysis of choice data: A primer . Neuron 110 , 1615 – 1630 ( 2022 ). OpenUrl CrossRef PubMed 18. ↵ Kuzmina , E. , Kriukov , D. & Lebedev , M. Neuronal travelling waves explain rotational dynamics in experimental datasets and modelling . Sci. Rep . 14 , 3566 ( 2024 ). OpenUrl PubMed 19. ↵ Langdon , C. & Engel , T. A. Latent circuit inference from heterogeneous neural responses during cognitive tasks . Nat. Neurosci . 28 , 665 – 675 ( 2025 ). OpenUrl PubMed 20. ↵ Ni , K. & Cai , X. Prefrontal mechanisms of value working memory mediated economic decisions . ( 2024 ). 21. ↵ Barak , O. Recurrent neural networks as versatile tools of neuroscience research . Curr. Opin. Neurobiol . 46 , 1 – 6 ( 2017 ). OpenUrl CrossRef PubMed 22. ↵ Ehrlich , D. B. , Stone , J. T. , Brandfonbrener , D. , Atanasov , A. & Murray , J. D. PsychRNN: An Accessible and Flexible Python Package for Training Recurrent Neural Network Models on Cognitive Tasks . eneuro 8 , |pENEURO.0427-20.2020 ( 2021 ). 23. ↵ Machens , C. K. , Romo , R. & Brody , C. D. Flexible control of mutual inhibition: A neural model of two-interval discrimination . Science 307 , 1121 – 1124 ( 2005 ). OpenUrl Abstract / FREE Full Text 24. ↵ Miller , P. & Wang , X.-J. Inhibitory control by an integral feedback signal in prefrontal cortex: a model of discrimination between sequential stimuli . Proc. Natl. Acad. Sci. U. S. A . 103 , 201 – 206 ( 2006 ). OpenUrl Abstract / FREE Full Text 25. ↵ Brody , C. D. , Romo , R. & Kepecs , A. Basic mechanisms for graded persistent activity: discrete attractors, continuous attractors, and dynamic representations . Curr. Opin. Neurobiol . 13 , 204 – 211 ( 2003 ). OpenUrl CrossRef PubMed Web of Science 26. Murray , J. D. et al. Stable population coding for working memory coexists with heterogeneous neural dynamics in prefrontal cortex . Proc. Natl. Acad. Sci. U. S. A . 114 , 394 – 399 ( 2017 ). OpenUrl Abstract / FREE Full Text 27. ↵ Cueva , C. J. et al. Low-dimensional dynamics for working memory and time encoding . Proc. Natl. Acad. Sci . 117 , 23021 – 23032 ( 2020 ). OpenUrl Abstract / FREE Full Text 28. ↵ Lebedev , M. A. et al. Analysis of neuronal ensemble activity reveals the pitfalls and shortcomings of rotation dynamics . Sci. Rep . 9 , 18978 ( 2019 ). OpenUrl CrossRef PubMed 29. ↵ Shinn , M. Phantom oscillations in principal component analysis . Proc. Natl. Acad. Sci . 120 , e2311420120 ( 2023 ). OpenUrl CrossRef PubMed 30. ↵ Vyas , S. , Golub , M. D. , Sussillo , D. & Shenoy , K. V. Computation Through Neural Population Dynamics . Annu. Rev. Neurosci . 43 , 249 – 275 ( 2020 ). OpenUrl CrossRef PubMed 31. ↵ Churchland , M. M. et al. Neural population dynamics during reaching . Nature 487 , 51 – 56 ( 2012 ). OpenUrl CrossRef PubMed Web of Science 32. ↵ Libby , A. & Buschman , T. J. Rotational dynamics reduce interference between sensory and memory representations . Nat. Neurosci . ( 2021 ) doi: 10.1038/s41593-021-00821-9 . OpenUrl CrossRef PubMed 33. ↵ Aoi , M. C. , Mante , V. & Pillow , J. W. Prefrontal cortex exhibits multidimensional dynamic encoding during decision-making . Nat. Neurosci . 23 , 1410 – 1420 ( 2020 ). OpenUrl CrossRef PubMed 34. ↵ Pu , S. , Dang , W. Qi , X.-L. & Constantinidis , C. Prefrontal neuronal dynamics in the absence of task execution . Nat. Commun . 15 , 6694 ( 2024 ). OpenUrl PubMed 35. ↵ Goldman , M. S. Memory without Feedback in a Neural Network . Neuron 61 , 621 – 634 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 36. ↵ Stroud , J. P. , Watanabe , K. , Suzuki , T. , Stokes , M. G. & Lengyel , M. Optimal information loading into working memory explains dynamic coding in the prefrontal cortex . Proc. Natl. Acad. Sci . 120 , e2307991120 ( 2023 ). OpenUrl CrossRef PubMed 37. ↵ Parthasarathy , A. et al. Time-invariant working memory representations in the presence of code-morphing in the lateral prefrontal cortex . Nat. Commun . 10 , 1 – 11 ( 2019 ). OpenUrl CrossRef PubMed 38. ↵ Panichello , M. F. & Buschman , T. J. Shared mechanisms underlie the control of working memory and attention . Nature 592 , 601 – 605 ( 2021 ). OpenUrl CrossRef PubMed 39. ↵ Yoo , S. B. M. & Hayden , B. Y. The Transition from Evaluation to Selection Involves Neural Subspace Reorganization in Core Reward Regions . Neuron 105 , 712 - 724 .e4 ( 2020 ). OpenUrl CrossRef PubMed 40. ↵ Cai , X. & Padoa-Schioppa , C. Contributions of Orbitofrontal and Lateral Prefrontal Cortices to Economic Choice and the Good-to-Action Transformation . Neuron 81 , 1140 – 1151 ( 2014 ). OpenUrl CrossRef PubMed Web of Science 41. ↵ Padoa-Schioppa , C. & Cai , X. The orbitofrontal cortex and the computation of subjective value: consolidated concepts and new perspectives . Ann. N. Y. Acad. Sci . 1239 , 130 – 137 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 42. ↵ Cisek , P. Making decisions through a distributed consensus . Curr. Opin. Neurobiol . 22 , 927 – 936 ( 2012 ). OpenUrl CrossRef PubMed 43. ↵ Hunt , L. T. & Hayden , B. Y. A distributed, hierarchical and recurrent framework for reward-based choice . Nat. Rev. Neurosci . 18 , 172 – 182 ( 2017 ). OpenUrl CrossRef PubMed 44. ↵ Yim , M. Y. , Cai , X. & Wang , X.-J. Transforming the choice outcome to an action plan in monkey lateral prefrontal cortex: A neural circuit model . Neuron 103 , 520 – 532 ( 2019 ). OpenUrl PubMed 45. ↵ Livi , A. et al. Laminar Architecture of a Decision Circuit in Orbitofrontal Cortex. 2025.03.03.641234 Preprint at doi: 10.1101/2025.03.03.641234 ( 2025 ). OpenUrl Abstract / FREE Full Text 46. ↵ Orhan , A. E. & Ma , W. J. A diverse range of factors affect the nature of neural representations underlying short-term memory . Nat. Neurosci . 22 , 275 – 283 ( 2019 ). OpenUrl CrossRef PubMed 47. ↵ Masse , N. Y. , Yang , G. R. , Song , H. F. Wang , X.-J. & Freedman , D. J. Circuit mechanisms for the maintenance and manipulation of information in working memory . Nat. Neurosci . 22 , 1159 – 1167 ( 2019 ). OpenUrl CrossRef PubMed 48. ↵ Kozachkov , L. , Lundqvist , M. Slotine , J.-J. & Miller , E. K. Achieving stable dynamics in neural circuits . PLOS Comput. Biol . 16 , e1007659 ( 2020 ). OpenUrl PubMed 49. ↵ Zhou , S. , Seay , M. , Taxidis , J. , Golshani , P. & Buonomano , D. V. Multiplexing working memory and time in the trajectories of neural networks . Nat. Hum. Behav . 7 , 1170 – 1184 ( 2023 ). OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted September 04, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Output-Contingent Working Memory and Decision-Making in Economic Choices Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Output-Contingent Working Memory and Decision-Making in Economic Choices Jintao Gu , Kuntan Ni , Xinying Cai , Sukbin Lim bioRxiv 2025.08.29.673193; doi: https://doi.org/10.1101/2025.08.29.673193 Share This Article: Copy Citation Tools Output-Contingent Working Memory and Decision-Making in Economic Choices Jintao Gu , Kuntan Ni , Xinying Cai , Sukbin Lim bioRxiv 2025.08.29.673193; doi: https://doi.org/10.1101/2025.08.29.673193 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Neuroscience Subject Areas All Articles Animal Behavior and Cognition (7618) Biochemistry (17635) Bioengineering (13859) Bioinformatics (41846) Biophysics (21401) Cancer Biology (18534) Cell Biology (25422) Clinical Trials (138) Developmental Biology (13352) Ecology (19860) Epidemiology (2067) Evolutionary Biology (24285) Genetics (15582) Genomics (22463) Immunology (17700) Microbiology (40298) Molecular Biology (17141) Neuroscience (88424) Paleontology (666) Pathology (2825) Pharmacology and Toxicology (4813) Physiology (7633) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4284) Systems Biology (9808) Zoology (2267)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00