Sex differences in task engagement and lapse rate during reward learning plateaus

doi:10.1101/2025.10.29.685451

Sex differences in task engagement and lapse rate during reward learning plateaus

2025 · doi:10.1101/2025.10.29.685451

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 64,639 characters · extracted from preprint-html · click to expand

Sex differences in task engagement and lapse rate during reward learning plateaus | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Sex differences in task engagement and lapse rate during reward learning plateaus C.G. Aguirre , J.H. Woo , L. Alhabbal , T. Fujioka , R. Moore , View ORCID Profile T. Ye , J.J. Castrellon , A. Soltani , View ORCID Profile A. Izquierdo doi: https://doi.org/10.1101/2025.10.29.685451 C.G. Aguirre 1 Department of Psychology, University of California , Los Angeles, Los Angeles, CA 90095 Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: cgaguirre{at}ucla.edu aizquie{at}psych.ucla.edu J.H. Woo 2 Department of Psychological and Brain Sciences, Dartmouth College , Hanover, NH 03755 Find this author on Google Scholar Find this author on PubMed Search for this author on this site L. Alhabbal 1 Department of Psychology, University of California , Los Angeles, Los Angeles, CA 90095 Find this author on Google Scholar Find this author on PubMed Search for this author on this site T. Fujioka 1 Department of Psychology, University of California , Los Angeles, Los Angeles, CA 90095 Find this author on Google Scholar Find this author on PubMed Search for this author on this site R. Moore 1 Department of Psychology, University of California , Los Angeles, Los Angeles, CA 90095 Find this author on Google Scholar Find this author on PubMed Search for this author on this site T. Ye 1 Department of Psychology, University of California , Los Angeles, Los Angeles, CA 90095 Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for T. Ye J.J. Castrellon 1 Department of Psychology, University of California , Los Angeles, Los Angeles, CA 90095 Find this author on Google Scholar Find this author on PubMed Search for this author on this site A. Soltani 2 Department of Psychological and Brain Sciences, Dartmouth College , Hanover, NH 03755 Find this author on Google Scholar Find this author on PubMed Search for this author on this site A. Izquierdo 1 Department of Psychology, University of California , Los Angeles, Los Angeles, CA 90095 Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for A. Izquierdo For correspondence: cgaguirre{at}ucla.edu aizquie{at}psych.ucla.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Our understanding of sex differences in reward learning has been limited due to the predominant study of males, yet recent studies have uncovered significant differences in the use of adaptive strategies, sensitivity to negative feedback, and impulsivity. Here, we evaluated sex differences in flexible learning in two domains: the learning of stimulus- and action-based associations and their reversals. During action-based learning, rats selected between two identical visual stimuli presented on a touchscreen, where the spatial location predicted a higher probability of reward. For stimulus-based learning, rats chose between two distinct visual stimuli presented in pseudorandom spatial locations, one of which was associated with a higher probability of reward. Reversal phases involved switching reward contingency between the two actions or stimuli. To gain a detailed understanding of diffferences across conditions, we modeled animals’ trial-by-trial choices using reinforcement learning (RL) models and examined their steady-state behavior to capture transitions between distinct behavioral states. We found that female rats were more likely to omit trials and take longer to initiate trials in both domains. The omissions were more frequent in late-stage action-based reversal learning, once learning had plateaued. Moreover, although the estimated parameters of the best-fitting RL model revealed some sex differences, the model that incorporated transitions between different behavioral states provided a better overall fit to the data. This model also revealed that across all reversal phases, females exhibited a higher transition-specific lapse rate than males, indicating greater task disengagement once there was no need for further learning. Together, our fine-grained analysis of behavior adds to a growing literature on sex differences in flexible reward learning. Introduction There are persistent gaps in our understanding of sex differences in the context of reward-based learning and decision-making due to the sparsity of studies that include female animals. The emergence of sex-dependent effects in the few studies that have included both sexes has put into question the generalizability and interpretations of previous findings that only included males. There have been reports of sex differences in other measures that contribute to the decision-making process including patterns of exploration, use of adaptive strategies ( 1 , 2 ), impulsivity ( 3 ), and sensitivity to negative feedback ( 4 , 5 ). Importantly, studies that have included both sexes have not uncovered sex differences in learning accuracy in either action(spatial)-based ( 2 ) or stimulus-based ( 1 , 4 ) learning tasks. A common way to study flexible reward learning and decision-making is through reversal learning, which measures a subjects’ ability to form flexible associations between stimuli and/or actions with their outcomes ( 6 – 8 ). Importantly, reversal learning paradigms can be used to probe learning under uncertain conditions in a variety of modalities (e.g. visual, spatial, olfactory). Additionally, when fine-grained trial-by-trial data is collected, such behavioral methods allow us to assess adaptive strategies like Win-stay and Lose-shift, gather information about attention, deliberation, and motivation, and to compute sensitivity to positive and negative feedback ( 9 , 10 ). These performance measures may increase our sensitivity to detect sex differences by allowing us to capture dynamic behavioral changes within a session and across time. Sex differences have been described in several components of learning, such as the employment of adaptive strategies, with female mice displaying more Win-stay behaviors than males on a spatial-based two-armed bandit probabilistic task ( 2 ). Some groups have also uncovered that male and female rodents differ in explore/exploit behaviors, with female mice exploring less overall, but exploiting the better option much earlier in learning compared to males. Conversely, males tend to explore more overall and exploit the better option later in learning than females ( 1 , 2 ). Additionally, females tend to be more sensitive to negative feedback ( 4 , 5 , 11 , 12 ), exhibiting longer trial initiation latencies following unrewarded outcomes, in response to negative feedback ( 4 ). Altogether, these findings demonstrate that males and females may differ in their employment of adaptive strategies, as well as sensitivity to positive and negative feedback, which can influence learning. Here, we evaluated sex differences in the context of flexible learning using stimulus- and action-based reversal learning tasks. Animals were required to learn to associate either a spatial side (i.e. right or left) or a visual stimulus (i.e. fan or marble) with reward or no reward. After meeting criterion, the reward contingencies were reversed, such that the previously rewarded side or stimulus was no longer rewarded, and vice versa. Additionally, we varied the level uncertainty in the environment by testing them on both deterministic (100/0) and probabilistic (90/10) reward outcomes. Overall learning and accuracy were assessed across trials (for action learning) and sessions (for stimulus learning), along with performance measures such as use of adaptive strategies (i.e., win-stay, lose-shift), omissions, and latencies, to better assess potential sex differences. To capture the mechanism underlying sex differences in action-based reversal learning, we utilized computational models based on reinforcement learning (RL) and also adapted a computational method based on a hidden Markov model ( 13 ) to characterize behavioral transitions, and modified it to include distinct lapse rates, both prior to and following the transition in reversal. Overall, the combination of these methods enabled a more detailed assessment of sex differences in the post-reversal dynamics of learning behavior. Materials and Methods Timeline of experimental procedures is presented in Figure 1A . Download figure Open in new tab Figure 1. Study timeline and the reversal learning tasks. ( A) Study Timeline. ( B ) Action-based reversal learning task structure. Animals initiated trials by nosepoking the white center square, after which two identical visual stimuli (fan or marble) were presented on the left and right side of the screen and whose spatial side was associated with either a 100/0 (deterministic) or 90/10 (probabilistic) reward probability. ( C ) Stimulus-based reversal learning task structure. Animals initiated trials by nosepoking the white center square, after which two different visual stimuli (fan and marble) were presented on the left or right side of the screen, with each stimulus associated with either a 100/0 (deterministic) or 90/10 (probabilistic) reward probability. Subjects Animals were adult (N=32, 15 females) Long-Evans rats (Charles River Laboratories) average age post-natal-day (PND) 65-85 at the start of experiments, with a 240g body weight minimum for females and 280g body weight minimum for males at the time of surgery. Subjects for these experiments were male and female surgerized null virus control animals from Aguirre et al. ( 14 ) (n=14) and additional cohorts of animals of the same age (n=18) (see details on viral surgeries below). All rats underwent a 3-day acclimation period immediately upon arrival, in which they were pair-housed and given food and water ad libitum , with no experimenter interference. After the 3-day acclimation period, animals were handled over 5 consecutive days for 10 min each, and continued to be provided food and water ad libitum . Following the handling period, animals were individually-housed in standard vivarium housing conditions (room temperature 22–24° C) with a reverse 12 h light/dark cycle (lights off at 6am). Animals underwent surgery and then were first tested on pre-training schedules after a week of post-operative care. Following pre-training, animals were tested on discrimination learning followed by reversal learning, at which point they were at the minimum 3-week expression time for Designer Receptors Exclusively Activated by Designer Drugs (DREADDs) and could be administered clozapine-N-oxide (CNO) or saline as vehicle (VEH). To include all animal data, we entered drug as a covariate in order to account for any potential behavioral effects following CNO administration. All procedures were conducted in accordance to the recommendations in the Guide for the Care and Use of Laboratory Animals of the National Institutes of Health and with the approval of the Chancellor’s Animal Research Committee at the University of California, Los Angeles. Surgery Viral Constructs Rats were singly-housed and remained in their home cages for 3-4 weeks prior to testing. As part of a viral control group, they were prepared with an enhanced Green Fluorescent Protein (eGFP) on a CaMKIIa promoter (AAV8-CaMKIIa-eGFP, Addgene #176015), infused bilaterally into regions of the frontal cortex or amygdala. The eGFP vector was injected in BLA neurons (n=8) [(AP = −2.5; ML= ±5; DV = −7.8 (Vol. 0.1 µl), −8.1 (Vol. 0.2 µl)), vlOFC neurons (n=6) [AP = +3.7; ML= ±2.5; DV = −4.6, (Vol. 2 µl); AP= 4; ML= ±2.5; DV= −4.4, (Vol. 0.15 µl)], or ACC neurons (n=18) [AP = +3.7; ML= ±0.8; DV =-2.6 (Vol. 0.3 µl)], measured from bregma and infused at a rate of 0.1 µl/min. Surgical Procedure Infusions of eGFP control virus were performed under isoflurane gas (1-5% in O 2 ) anesthesia using aseptic stereotaxic techniques prior to any behavioral testing. During surgery, all animals were administered 5 mg/kg s.c. carprofen (NADA #141–199, Pfizer, Inc., Drug Labeler Code: 000069) and 1cc saline. After being placed in the stereotaxic apparatus (David Kopf; model 306041), an incision was made on the scalp and the scalp was retracted. The skull was leveled with a +/− 0.3 mm tolerance on the anterior-posterior axis in order to ensure that bregma and lambda were in the same horizontal plane. Small holes were drilled in the skull above the infusion target. Virus was bilaterally infused at a rate of 0.1 µl per minute in target regions (see coordinates above), after which, 5 min were allowed to elapse before withdrawing the needle containing the virus. Histology After completing the experiment, rats were euthanized with an overdose of Euthasol (Euthasol, 0.8 mL, 390 mg/mL pentobarbital, 50 mg/mL phenytoin; Virbac, Fort Worth, TX), were transcardially perfused, and their brains removed for histological processing. Brains were fixed in 10% buffered formalin acetate for 24 h followed by 5 days in a 30% sucrose solution. Afterward, the brains were sectioned into 40-µm coronal sections, mounted onto slides, and cover-slipped with DAPI. To visualize eGFP expression in cell bodies, slices were visualized using a BZ-X710 microscope (Keyence, Itasca, IL), and analyzed with BZ-X Viewer and analysis software. Food Restriction Rats were placed on food-restriction five days prior to any behavioral testing, with males given 12-14 grams of chow a day, and females given 10-12 grams/ day. Food restriction level was maintained throughout behavioral testing. Water remained freely available in the home cage. Animals were weighed at least every other day, and their weight was monitored so as to not fall below 85% of their maximum, free-feeding weight. Drug administration Thirty min prior to behavioral testing, systemic administration of clozapine-N-oxide, CNO (i.p., 3mg/kg in 95% saline, 5% DMSO) or saline vehicle (VEH) was injected intraperitonially. We followed a within-subject design for reversal learning, such that all rats received CNO and VEH injections in counterbalanced order. Thus, if a rat received VEH on the first reversal (R1), it was administered CNO on the second reversal (R2), with the same drug order for the third reversal (R3) and the fourth reversal (R4), or vice versa, as reported in Aguirre et al. 2024 ( 14 ). Behavioral Testing Pretraining Behavioral testing was conducted in operant conditioning chambers containing an LCD touchscreen and a sucrose pellet dispenser on the opposing side. All chamber equipment was operated using customized ABET II TOUCH software (Lafayette Instrument Co., Lafayette, IN). The pretraining protocol was adapted from established procedures ( 15 ), consisting of a series of training schedules: Habituation, Initiation Touch to Center (ITC), and Immediate Reward (IM). These pretraining schedules were designed to train rats to collect sucrose pellet rewards from the magazine, nose poke a center stimulus to initiate a trial, and to select a stimulus located on either the left or right side of the screen to obtain a reward. Pretraining schedules and criterion have been reported in more detail elsewhere ( 14 ). After completing all pretraining schedules, rats were advanced to the discrimination (initial) phase of either the action- and/or stimulus-based reversal learning task, with the task order counterbalanced ( Fig. 1 ). Action-based deterministic discrimination learning After completion of all pretraining schedules, rats were advanced to the discrimination (initial) phase of the action-based task ( Fig. 1B ). Rats were required to touch a white square stimulus on the center of the screen (40 seconds) to initiate a trial, after which they were presented with the same two visual stimuli (i.e., marble or fan) on the left and right side of the screen (60 seconds). Rats could nosepoke either the spatial side that was rewarded with a sucrose pellet, or the spatial side that was unrewarded, followed by a 10 s inter-trial interval (ITI). If the trial was unrewarded, a time-out of 5 s occurred prior to the ITI. Rats had to choose the correct side 75% of the trials or more, collect at least 60 rewards during a 60 min testing session for two consecutive days, to meet criterion. Animals were not administered any CNO or VEH injections during discrimination learning. After meeting criterion, rats were advanced to the first reversal phase in the next testing session. Action-based reversal learning After the discrimination learning phase, the rats underwent four reversals. Rats were injected intraperitoneally with either 3 mg/kg of CNO or VEH 30 min before each reversal testing session. The side previously associated with the higher reward probability (p R (B)=1.0) was now associated with a lower reward probability (p R (W)=0.0), and vice versa. The criterion was the same as the deterministic discrimination phase. After reaching the criterion for the first deterministic reversal phase (i.e., R1), the rats advanced to the second deterministic reversal phase (i.e., R2) beginning on the next testing session. Rats that had previously received VEH during the first reversal would now receive CNO injections during the second reversal, and vice versa. After completing two deterministic reversals, rats underwent two more reversals under probabilistic conditions (reversals 3 and 4), where the side with the highest reward probability was associated with p R (B)=0.9, and the side with the lower reward probability was associated with p R (W)=0. Stimulus-based deterministic discrimination learning After completion of all pretraining schedules, rats were advanced to the discrimination (initial) phase of learning of the stimulus-based task ( Fig. 1C ). Rats were required to touch a white square stimulus on the center of the screen to initiate a trial, after which they were presented with two different visual stimuli (i.e., marble and fan). Stimuli were randomly assigned as the correct or incorrect stimulus, associated with either a p R (B)=1.0 or p R (W)=0.0 probability of reward, respectively. Rats would nosepoke the stimulus of their choosing and depending on its reward probability, received a sucrose pellet reward or 5 s time-out period, followed by a 10 s ITI. The criterion was the same as the action-based task. Rats were given a maximum of 10 days to achieve criterion and then were advanced to the first reversal learning phase. Animals were not administered either CNO or VEH injections during discrimination learning. Stimulus-based reversal learning After the discrimination learning phase, the rats underwent four reversals. Rats were injected intraperitoneally with either 3 mg/kg of CNO or VEH 30 min before each reversal testing session. The stimulus previously associated with the higher reward probability (p R (B)=1.0), was now associated with a lower reward probability (p R (W)=0.0), and vice versa. The criterion was the same as the deterministic discrimination phase. After reaching the criterion for the first deterministic reversal phase (i.e., R1), the rats advanced to the second deterministic reversal phase (i.e., R2) beginning on the next testing session. Rats that had previously received VEH during the first reversal would now receive CNO injections during the second reversal, and vice versa. After completing two deterministic reversals, rats underwent two more reversals under probabilistic conditions (reversals 3 and 4), where the stimulus with the highest reward probability was associated with p R (B)=0.9, and the side with the lower reward probability was associated with p R (W)=0. Data Analyses MATLAB (MathWorks, Natick, Massachusetts; Version R2022b) was used for all statistical analyses and figure preparation. For each Action and Stimulus-based learning dataset, we fitted Generalized Linear Models (GLMs; fitglme ) to examine the effects of sex on the probability of choosing the better option across trials or sessions. Mixed-effects GLMs were conducted for each task separately, for each outcome variable, and with trials (action-based) or sessions (stimulus-based) as a within-subject factor, sex as a between-subject factor, and individual rat as a random factor using one the following formulas: [Action-based task: γ ∼[1+sex+trial+(1+trial|rat); Stimulus-based task: γ ∼[1+sex+session+(1+session|rat)]. Since learning reached asymptote after 5-days for stimulus-based reversal learning, only the first 5 testing sessions were included in the GLM for learning in that domain. For action-based learning, only trials included in the first two testing sessions were included in the GLM analyses, since most rats only required two days to meet criterion. Significant reversal number and/or sex interactions were further analyzed with a narrower set of fixed factors and Bonferroni-corrected post-hoc comparisons. Dependent measures for learning included probability of choosing the correct or better option (for which we fit the raw choice data to a binomial distribution), number of rewards collected, median initiation latencies and number of omissions (latency to initiate a trial and failure to initiate a trial, respectively), median correct and median incorrect choice latencies (latency to select the correct or better stimulus or spatial side and latency to select the incorrect or worse stimulus or spatial side, respectively), median reward latencies (latency to collect the reward), probability of Win-stay, and probability of Lose-shift. Probability of Win-stay and Lose-shift adaptive strategies were calculated such that each trial was classified as a win , when the animal received a sucrose pellet and as a loss if there was no reward delivered. Statistical significance was noted when p-values were less than 0.05. All Bonferroni post-hoc tests were corrected for number of comparisons. Outliers greater than two standard deviations above the mean were removed from the dataset. Analysis of choice behavior using reinforcement learning (RL) models To capture the difference in learning and choice behavior between male and female rats, we utilized two simple conventional RL models as investigated in our previous study ( 14 ). The subjective estimate of reward for each choice option (Q i ) was updated on a trial-by-trial basis based on the discrepancy between actual and expected reward values. In the first model, referred to as RL , the value estimate of the chosen option ( Q C ) was updated as follows: where R (t) indicates reward outcome on trial t (1 if rewarded, 0 otherwise), and ⍺ is the learning rate controlling the amount of update. The second model, referred to as RL decay , additionally updated the value of the unchosen option ( Q U ) as follows ( 16 ): where γ is the decay rate controlling the amount of passive decay in the value of the unchosen option. For this model, on omitted trials where no choice was made, both left and right options were considered unchosen, and their value estimates decayed passively following this equation. Both models used the following soft-max function to compute the probability of choosing a leftward option ( P L ): where Q L and Q R indicate the value estimates of left and right option, respectively, and β is the inverse temperature or choice sensitivity governing the extent to which higher-valued options are consistently selected. The likelihood for each session was given by: where C L and C R indicate whether the animal has chosen left or right option, respectively (1 if chosen, 0 otherwise), and P L and P R indicate choice probability obtained from Eq. (3) on trial t ( P R = 1 − P L ). To assess the goodness-of-fit, we computed the Akaike information criterion (AIC) for each session as follows: where k is the number of free parameters in the model (two for RL and three for RL decay ), and LL is the best (i.e., maximum) log-likelihood value selected from the iteration for the given session as computed in Eq. (4) . N indicates the number of trials in the given session. Since each session differed in the number of total trials, we normalized the AIC value by the number of trials. We used the standard maximum likelihood estimation method (using fmincon in MATLAB) to fit choice data and estimate the parameters for each session of the experiment. For each run, we repeated 100 different initial conditions selected from evenly spaced search space to avoid local minima. Capturing behavioral state transitions In addition to the RL models above, we also examined behavioral transitions following reversals using a model based on a Hidden Markov Model ( 13 ). Specifically, we considered two sigmoidal transition functions that involved the latency, the speed, and the lapse rate of transition after reversal in each session. The first sigmoidal curve, referred to as SC1 , described the probability of choosing the better rewarding option ( P Better ) as follows: where σ is the slope of the sigmoidal curve controlling the steepness (speed) of transition, s is the offset of the curve specifying the latency of transition, and ∊ is the lapse rate specifying the rate of error before and after transition. Because the performance could potentially differ before and after the choice transition, we also tested the second sigmoidal curve, referred to as SC2, to dissociate the lapse rates before and after the transition has occurred (as illustrated in Fig. 5A ). This two-lapse function was written as: where ∊ 1 and ∊ 2 represent the rates of error before and after the choice transition, respectively. As was done for the RL models, we fitted the sigmoidal curves to the choice data and estimated the parameters for each session of the experiment. To obtain the goodness-of-fit comparable with that of the RL models, we defined the log-likelihood with each session as: where C B and C W indicate whether the animal has chosen better or worse option, respectively (1 if chosen, if 0 otherwise) on trial t . Instead of excluding omitted trials in which no choice was made, which are crucial to our analysis, we coded those trials as 0.5 such that probability of choosing better and worse options were weighted equally. P B ( t ) is the choice probability for better option as calculated above in Eq. (6) or ( 7 ). Note that this definition is equivalent to the log-likelihood obtained from the RL model in Eq. (4) . To quantify the goodness-of-fit, we also computed the AIC normalized by number of trials per session, for sigmoid functions according to Eq. (5) ( k = 3 for SC1 , and k = 4 for SC2 ). Results Females and males learn initial discriminations at a comparable rate though they differ in trial initiations Both males and females learned initial discriminations comparably across trials and sessions: There were no sex differences in discrimination learning accuracy (i.e., the probability of choosing the correct side) across trials in the action-based task (β sex = −0.02, p = 0.55; Fig. 2A ) or across sessions in the stimulus-based task (i.e., probability of choosing the correct visual stimulus; β sex = −0.03, p = 0.50; Fig. 2D ). Download figure Open in new tab Figure 2. Females are slower to adjust to stimulus-based reversals than males. (A) Action-based discrimination learning (i.e. the probability of choosing the correct side) is plotted for the first 200 trials, with males (purple) and females (grey) exhibiting comparable learning across trials. (B) There were no sex differences in deterministic (100/0) reversal learning across trials in the action-based task for R1 and R2, only a significant main effect of trials. (C) There were no sex differences in probabilistic (90/10) reversal learning across trials in the action-based task for R3 and R4, only a significant main effect of trials. (D) Stimulus-based discrimination learning (i.e. the probability of choosing the correct stimulus) is plotted for the first 5 days, with males (orange) and females (grey) exhibiting comparable learning across days. (E-F) There was a significant sex difference in reversal learning across sessions in the stimulus-based task, with females exhibiting slower learning than males following both deterministic (E) and probabilistic (F) reversals. Note: Solid lines depict group averages and shading represents the Standard Error of the Mean (SEM). Action-based discrimination learning Averaged across two sessions, females omitted more trials (β sex = −21.16, p = 0.001), and collected fewer rewards (β sex = 78.85, p = 0.02) than males in action-based discrimination learning. There were no sex differences in any of the latencies measured, including initiation latencies ( p = 0.57), correct choice latencies ( p = 0.52), incorrect choice latencies ( p = 0.17), or reward latencies ( p = 0.54). Stimulus-based discrimination learning Averaged across five sessions, females took longer to initiate trials (β sex = −1.22, p = 0.03) than males in stimulus-based discrimination learning, but there were no sex differences in the number of initiation omissions (β sex = 6.10, p = 0.70), or rewards collected (β sex = 38.81, p = 0.52). There were also no sex differences in any of the other latencies measured, including correct choice latencies ( p = 0.70), incorrect choice latencies ( p = 0.27), or reward latencies ( p = 0.64). Females and males learn action-based, but not stimulus-based, reversals at a similar rate Action-based reversal learning Both females and males learned action-based reversals at a similar rate (β sex = −0.01, p = 0.93), with no significant effect of reversal number (p=0.15), or sex*reversal interaction (p=0.90), indicating females and males learned deterministic and probabilistic reversals, similarly. There was also a significant effect of session (β session = 0.32, p =1.60e-07) and trial (β trial = 0.001, p = 6.62e-64), suggesting animals improved both within and across sessions ( Figs. 2B and 2C ). We found that on average animals achieved the 75% criterion for both session 1 (M±SEM: 0.81±0.08) and session 2 (0.93±0.10), indicating that learning of the new contingency following a reversal happened within the first session. Hence, only early learning during the first session is depicted in the learning curves across reversals ( Figs. 2B and 2C ). Stimulus-based reversal learning Females were slower than males to learn following a reversal in the stimulus-based task (β sex = 0.07, p = 0.02), in line with previous findings ( 17 – 20 ). Animals still exhibited small improvements across sessions (β session = 0.02, p = 0.003), yet there was no significant effect of reversal ( p = 0.14), or sex*reversal interaction ( p = 0.06), indicating deterministic and probabilistic reversals had a similar effect on learning in males and females ( Figs. 2E and 2F ). Unlike action-based reversals, we found that animals on average were below chance levels for session 1 (M±SEM: 0.42±0.01) and only slightly above chance for session 5 (0.53±0.07), indicating that animals exhibited poor learning during both sessions, but did show modest improvements in accuracy from session 1 to session 5. Increased trial omissions in females occur in late-stage action-based reversal Action-based reversal learning There was a sex difference in initiation omissions (β sex = - 17.48, p = 0.04), and in initiation latencies (β sex = −1.18, p = 0.01), with females omitting more trials and taking longer to initiate than males across two sessions ( Figs. 3A and 3B ). There were no sex differences in any other latencies measured, including correct choice latencies ( p = 0.52), incorrect choice latencies ( p = 0.17), or reward latencies ( p = 0.54), only an effect of reversal number ( Fig. S1 A-C ). Download figure Open in new tab Figure 3. Females omit more trials, exhibit longer initiation latencies, and collect fewer rewards than males in both action- and stimulus-based reversals. ( A ) Average number of initiation omissions are plotted for males (purple) and females (grey) for the action-based task. Females omitted more trials than males in action-based reversal learning. (B ) Females take longer to initiate trials on average than males in action-based reversal learning. ( C ) Males collected more rewards than females on average in action-based reversal learning. ( D ) Average number of initiation omissions are plotted for males (orange) and females (grey) for the stimulus-based task. Females omitted more trials than males in stimulus-based reversal learning. ( E ) Females take longer to initiate trials on average than males in stimulus-based reversal learning. ( F ) Males collected more rewards on average than females in stimulus-based reversal learning. Note: Bar graphs represent overall group means and data points overlaid on bar plots indicate mean of individual animals. Asterisks indicate significant difference between male and female rats, *p<.05, **p<.01, ***p<.001 To assess whether these sex differences emerged during early or late reversal learning, we further analyzed initiation omissions and latencies during session 1 (early) and session 2 (late). Females omitted more trials than males in session 2 (β sex = −11.63, p = 0.01), but there were no differences in session 1 (β sex = −4.03, p = 0.30) ( Fig. 4A ). Females took longer to initiate trials than males on both session 1 (β sex = −11.63, p = 0.01) and session 2 (β sex = −4.03, p = 0.30), regardless of stage of learning ( Fig. 4B ). Thus, females were more likely to omit trials in late learning, after they had learned the new continency well, but took longer to initiate trials regardless of stage of learning. Download figure Open in new tab Figure 4. Females omit more trials and collect fewer rewards than males in late-stage action-based reversals. (A) Average number of initiation omissions are plotted for males (purple) and females (grey) for sessions 1 and 2 of the action-based task. Females omitted more trials than males in session 2 of action-based reversal learning. ( B ) Females took longer to initiate trials on average than males in both sessions of action-based reversal learning. ( C ) Males collected more rewards than females overall and in session 2 of action-based reversal learning. ( D ) Average number of initiation omissions are plotted for males (orange) and females (grey) for sessions 1 and 5 of the stimulus-based task. Females omitted more trials than males in both session 1 (first) and session 5 (last) of stimulus-based reversal learning. ( E ) Females took longer to initiate trails than males both session 1 and session 5 of stimulus-based reversal learning. ( F ) Males collected more rewards than females overall and in session 2 of stimulus-based reversal learning. Note: Bar graphs represent overall group means and data points overlaid on bar plots indicate mean of individual animals. Asterisks over bar plots indicate significant difference between male and female rats, *p<.05, **p<.01, ***p<.001 Stimulus-based reversal learning Females omitted more trials (β sex = −59.30, p = 0.0002) and took longer to initiate trials than males (β sex = −2.28, p = 0.005) in stimulus-based reversal learning ( Figs. 3D and 3E ). There was a significant sex*reversal interaction (β sex*reversal = 0.49, p =0.02), revealing that females took longer to initiate trials than males for reversal 1 (p=0.02), reversal 2 (p=0.004), reversal 3 (p=0.03), but not reversal 4 (p=0.89). There were no sex differences in any other type of latency measure: correct choice latencies ( p = 0.70), incorrect choice latencies ( p = 0.27), or reward latencies ( p = 0.64). There was only an effect of reversal number on correct and incorrection choice latencies ( Fig. S1D-F ). We compared sessions 1 and 5 to assess whether sex differences on initiation omissions and latencies were emerging during early or later reversal learning (note: “later” in the stimulus-based case still constituted chance performance, and not mastery-level performance, as in action-based reversal). In this case, females omitted more trials than males in both session 1 (β sex = −23.99, p = 1.40e-05) and session 5 (β sex = −12.72, p = 0.001) ( Fig. 4D ). Females also took longer to initiate trials than males in both session 1 (β sex = −4.56, p = 0.003) and session 5 (β sex = −3.30, p = 0.003) ( Fig. 4E ). Females omitted more trials and exhibited longer latencies than males regardless of early or later learning. Use of Win-Stay in action-based reversal learning is influenced by sex Action-based reversal learning To capture animals’ sensitivity to positive and negative feedback, we assessed their use of win-stay and lose-shift adaptive strategies in action-based reversal learning. Males were more likely to employ the Win-Stay strategy than females (β sex = 0.04, p = 0.04) in action-based reversal learning ( Fig. S2A ), but there were no sex differences in the use of Lose-shift (β sex = −0.07, p = 0.30; Fig. S2B ), indicating that males were more sensitive to positive, but not negative, feedback than females. Stimulus-based reversal learning There were no sex differences in the use of adaptive strategies, either Win-stay ( p = 0.99) or Lose-shift ( p = 0.21), in stimulus-based reversal learning ( Fig. S2C-D ), suggesting that males and females used both strategies similarly. Differences in reversal learning identified through fitting of choice data Action-based reversal learning To capture the difference in the mechanism underlying action-based reversal learning between male and female rats, we next utilized computational models based on reinforcement learning (RL), and also examined behavioral transitions following reversals using a model based on a Hidden Markov Model. We did not include stimulus-based reversal learning in subsequent analyses as reinforcement learning models proved to be a poor fit due to much slower learning compared to the action-based task. For RL models, we found that the model with a passive decay parameter ( RL decay ) had a significantly lower AIC than the simplest model ( RL ), indicating a better fit to the choice data ( p = 1.2267e-04, signed-rank test). Similarly, the sigmoidal curves with an additional lapse parameter ( SC2 ) better accounted for the choice data than the simpler function ( SC1 ) based on AIC ( p = 1.2911e-21, signed-rank test). Comparing the goodness-of-fit between two types of models ( RL decay vs. SC2 ), we found that sigmoidal transition functions better account for the animals’ choice behavior than RL models ( p = 4.2398e-07, signed-rank test). The results were consistent when tested among male ( p = .00484) or female ( p = 1.8427e-05) rats separately. This suggests that the sigmoidal curve, although simpler and lacking trial-by-trial integration of reward feedback into choice probability, better accounted for the rats’ transition behavior on each day of the reversal. Consistent with this interpretation, we found overall small values of the estimated learning rates in the RL model ( RL decay ), revealing slow rates of learning in both male and female rats ( Fig. S3 ), especially during the first day of the reversal (M±SEM across subjects: male: 0.0549±0.0167; female: 0.0664±0.0236; Fig. S4 ). Additionally, we found females had a reduction in the decay parameter for the unchosen options for the first reversals across all sessions compared to males (p=0.037; Fig. S3A ), suggesting females took longer to forget the unchosen option’s value. Comparing the goodness-of-fit in the best model ( SC2 ) between male and female rats, we found that male choice behavior was overall more predictable as indicated by lower AIC ( p = 1.0302e-06, rank sum test). Given these results, we next investigated the estimated parameter of the best-fitting sigmoidal function ( Fig. 5A ). Overall, the lapse rates were significantly larger prior to the transition point compared to after the transition for both male ( p = 1.5991e-32) and female rats ( p = 5.4109e-41; signed-rank test comparing ε 1 vs. ε 2 ), consistent with the notion that rats commit more errors before transitioning to the other choice option after reversal. Yet, analysis using mixed-effects GLMs revealed that there was a significant sex difference in the second lapse rate (ε 2 ), with female rats having a higher lapse rate after the transition point compared to male (β sex = 0.155, p = .0017; Fig. 5B ). No significant main effects of sex were observed for the other parameters (offset s : p = .238; slope α: p = .198; first lapse rate ε 1 : p = .308). These results indicate that the behavioral tendency that most distinguishes males and females is the after-transition period. To confirm this possibility directly, we next compared the improvement in the choice accuracy after the transition point, estimated by the offset s (i.e., comparing ( P ( correct Trials s ) within each session). We found a significant main effect of sex (β sex = −0.183, p = .0212), with the female rats showing reduced improvement in the choice accuracy after transitioning to the better choice option. Repeating the same analysis for the number of initiation omission (i.e., P ( omit Trialss )), we again found a significant sex difference (β sex = 0.187, p = .0014). Download figure Open in new tab Figure 5. Female rats exhibit higher lapse rates after transition compared to male rats during action-based reversal learning. (A) Illustration of sigmoidal transition curve with two lapse rates. To characterize the animals’ switching dynamics, we fitted a logistic regression model with four parameters (s, α , ɛ 1, ɛ 2) to observed choices. These four parameters represent the latent transition between actions (e.g. switching from left to right) following a reversal: the latency offset s (i.e. how many trials until switching to the other side), slope α (i.e. the rate at which the switch happens), first lapse ε 1 (i.e. error rate before transition, immediately following a reversal), and second lapse ε 2 . (i.e. error rate after transition, once learning of reversal has occurred). ( B-D ) Fitted parameters of the choice curves during the four reversal phases. For the deterministic (100/0) reversal R1 and R2 ( B-C ), there was a significant sex difference in the second lapse rate parameter ε 2 , such that females had a higher error rate after the transition once they had learned the reversal, compared to male rats. During probabilistic reversal (90/10) R3 and R4 ( D-E ), females also tended to have higher second lapse ε 2 compared to males. In R4 ( E ), females also had a sharper transition (i.e. faster switching) than males. Data points overlaid on top of the bar plots indicate mean of individual rats. Asterisks indicate significance from Wilcoxon rank sum test. Stimulus-based reversal learning We next investigated the fit to the animals’ choice data during stimulus-based task using the same RL models and sigmoidal transition curves. The results were overall consistent with the action-based task, such that the sigmoid function with two lapse parameters ( SC2 ) best accounted for the choice behavior than both single-lapse sigmoid function (SC1) and the better RL model based on AIC (signed-rank test; SC2 vs. SC1 : p = .0195; SC2 vs. SC2 vs. RL decay : p = .002). However, when comparing the quality of fit to the action-based task, we note that the fit to stimulus-based task was significantly worse even for the best model, offering minimal benefit over the null model with chance prediction (McFadden’s R 2 for SC2 in stimulus-based task: R 2 = 0.1313; Supplementary Table 1 ). For this reason, we did not follow-up with the analysis on the estimated parameters of the model for the stimulus-based task. Discussion We evaluated sex differences in the context of flexible learning using stimulus- and action-based reversal learning tasks, which required animals to associate either a visual stimulus or spatial side to an outcome. Animals experienced multiple reversals that varied in probabilistic reward outcomes (100/0, 90/10) and were tested on several measures, including learning and accuracy, task engagement (e.g. attention), motivation, and sensitivity to positive and negative feedback. Many of these outcome measures were collected on a trial-by-trial basis, allowing us to capture dynamic behavioral changes across time (e.g. trials, sessions). We found consistent sex differences in trial initiation omissions and trial initiation latencies across both stimulus- and action-based reversal learning. Using a reinforcement learning model to fit choice data and characterization of transitions between different behavioral states, we found females had a higher transition-specific lapse rate and poorer choice accuracy after the transition point, once there was no need for further learning. We also found that females had a lower decay rate during the first reversal, such that they took longer to forget the unchosen option’s value compared to males. Consistent with prior literature we did not find a sex difference in accuracy during discrimination learning ( 3 , 13 , 21 , 22 ) irrespective of domain (i.e., actions or stimuli). Although females were slower to adjust following a stimulus reversal, all animals exhibited more difficulty learning this task indicated by learning curves around chance level. Thus, this sex difference may be a phenomenon of subthreshold learning. Only a handful of groups have conducted cross-modal comparisons of discrimination on reversal learning, finding that generally rats acquire olfactory associations faster than visual ones ( 23 , 24 ). Our finding that females generally omit more trials than males has been corroborated in several decision-making tasks, including the 5-Choice Serial Reaction Time Task (5-CSRTT) and the Rat Gambling Task (rGT). Previous reports-one in the 5-CSRTT ( 25 ) and another using the rGT ( 26 ) find females omit more trials than males. That we observed this pattern in both stimulus and action-based reversal learning suggests it is a generalizable phenomenon. To our knowledge there has only been one study that has investigated omissions in the context of flexible learning, using an operant paradigm featuring both spatial and visual light-based cues, which reported that females omitted more trials and committed more perseverative errors relative to males ( 27 ). Interestingly, we found that females omitted more trials during late reversal learning (i.e., session 2), after they had already learned the new contingency, compared to males, with no sex difference observed during early reversal learning (i.e., session 1) for action-based reversals. There are likely a variety of factors that influence an animal’s ability to engage in the task (e.g., fatigue, boredom, satiety, hunger, hormones). One should consider how these internal states influence motivation and task engagement since they can also impact task performance. The role of fatigue has been mostly studied in tasks involving effort-based decision-making ( 28 ), in which animals have to exert physical effort, or in sleep studies where the animals are deliberately sleep deprived prior to being tested on decision-making paradigms ( 29 ). However, given that we did not find sex differences in either choice or reward latencies, which are reliable measures of processing speed and motivation, fatigue and a lack of motivation are not likely explanations. Another set of performance measures that are commonly used to assess sensitivity to positive and negative feedback are the adaptive strategies of win-stay and lose-shift, respectively. We found that males employ the win-stay strategy more than females for the action-based reversal learning task, which is consistent with prior studies that males exhibited more win-stay, and less lose-shift, compared to females using the Iowa Gambling Task ( 12 , 30 ). Differences in modality, as well as uncertainty and volatility of the environment can influence use of adaptive strategies. For example, Chen et al. (2021) found no sex differences in win-stay or lose-shift when using a visual-based restless bandit task, but found that females employed win-stay more than males during exploratory states, in a spatial-based restless bandit task. Taken together, the sex differences we found in initiation omissions and initiation latencies cannot be fully explained by sex differences in learning accuracy, satiety, or use of adaptive strategies, but does not rule out the possibility of boredom or ennui, leading to task disengagement. In order to evaluate this further, we used computational methods based on two conventional reinforcement learning (RL) models ( 14 ) to fit the animals’ choice behavior and estimate parameters (e.g., learning rate, inverse temperature, decay rate). The fit to the choice behavior revealed an overall slow rate of reversal learning, suggesting that the shift in choice preference after reversals occurred gradually in both male and female rats. Consistent with our previous findings ( 14 ), we also found females had a lower decay rate compared to males, suggesting they retained the memory of the unchosen option’s past value for a longer time. Accordingly, we tested sigmoidal functions to characterize animals’ choice as a gradual shift in preferred options. In particular, inspired by block Hidden Markov Models ( 13 ), which utilized sigmoidal transition curves to characterize post-reversal dynamics, we tested the extension of the sigmoidal curve function that accommodates distinct lapse rates prior to and following the transition in reversal. We found that this generalized model (SC2) best accounted for choice behavior in both males and females. The first lapse rate prior to the sigmoidal transition point was larger for both males and females, given that rats tend to commit more errors immediately following a reversal as they learn the new contingency and then eventually transition to the other choice option. Interestingly, we found that females had a higher second lapse rate than males, after they had already learned the reward contingency associated with a reversal. Thus, we focused on this after-transition period and found that females omitted more trials and exhibited poorer choice accuracy later in the reversal. This provides insight into our previous findings because these omissions occur later in the reversal, after learning of the new contingency has already occurred. Altogether, this finding suggests that females become more disengaged from the task than males when there is not much to learn, i.e., when reaching a learning asymptote. Conclusions These findings highlight the importance of continuing to report sex differences that emerge in the context of learning and decision-making. Large gaps in knowledge remain as to why these sex differences exist due to the sparsity of studies conducted in female subjects. Future studies should consider the possibility of sex-dependent changes in neuronal plasticity and how this may impact learning. Factors like chronic stress exposure can also differentially affect dendritic and spine morphology in prefrontal cortex and amygdala in males and females ( 31 , 32 ). Several studies have also reported that females tend to be more sensitive to negative feedback ( 4 , 5 , 12 ), but it remains unclear whether sex modulates neural activity associated with feedback sensitivity. Our findings suggest that males and females differ in their engagement during learning plateaus. Acknowledgements This work was supported by NIH 2R01DA047870. We acknowledge the contributions of undergraduate research assistants who assisted with behavioral testing: Kanak Das, Johnny Perez, and Saisriya Kolli. We also thank Madeline Goldfarb for her assistance with data management. Funder Information Declared NIDA , 2R01DA047870 References 1. ↵ Chen CS , Ebitz RB , Bindas SR , Redish AD , Hayden BY , Grissom NM . Divergent Strategies for Learning in Males and Females . Curr Biol . 2021 ; 31 ( 1 ): 39 – 50 .e4. OpenUrl CrossRef PubMed 2. ↵ Chen CS , Knep E , Han A , Ebitz RB , Grissom NM . Sex differences in learning from exploration . Elife . 2021 ; 10 . 3. ↵ Grissom NM , Reyes TM . Let’s call the whole thing off: evaluating gender and sex differences in executive function . Neuropsychopharmacology . 2019 ; 44 ( 1 ): 86 – 96 . OpenUrl CrossRef PubMed 4. ↵ Cox J , Minerva AR , Fleming WT , Zimmerman CA , Hayes C , Zorowitz S , et al. A neural substrate of sex-dependent modulation of motivation . Nat Neurosci . 2023 ; 26 ( 2 ): 274 – 84 . OpenUrl CrossRef PubMed 5. ↵ Orsini CA , Truckenbrod LM , Wheeler AR . Regulation of sex differences in risk-based decision making by gonadal hormones: Insights from rodent models . Behav Processes . 2022 ; 200 : 104663 . OpenUrl CrossRef PubMed 6. ↵ Dalton GL , Wang NY , Phillips AG , Floresco SB . Multifaceted Contributions by Different Regions of the Orbitofrontal and Medial Prefrontal Cortex to Probabilistic Reversal Learning . J Neurosci . 2016 ; 36 ( 6 ): 1996 – 2006 . OpenUrl Abstract / FREE Full Text 7. ↵ Izquierdo A , Brigman JL , Radke AK , Rudebeck PH , Holmes A . The neural basis of reversal learning: An updated perspective . Neuroscience . 2017 ; 345 : 12 – 26 . OpenUrl CrossRef PubMed 8. ↵ Schoenbaum G , Setlow B , Nugent SL , Saddoris MP , Gallagher M . Lesions of orbitofrontal cortex and basolateral amygdala complex disrupt acquisition of odor-guided discriminations and reversals . Learn Mem . 2003 ; 10 ( 2 ): 129 – 40 . OpenUrl Abstract / FREE Full Text 9. ↵ Harris C , Aguirre C , Kolli S , Das K , Izquierdo A , Soltani A . Unique features of stimulus-based probabilistic reversal learning . Behav Neurosci . 2021 ; 135 ( 4 ): 550 – 70 . OpenUrl CrossRef PubMed 10. ↵ Trepka E , Spitmaan M , Bari BA , Costa VD , Cohen JY , Soltani A . Entropy-based metrics for predicting choice behavior based on local response to reward . Nat Commun . 2021 ; 12 ( 1 ): 6567 . OpenUrl CrossRef PubMed 11. ↵ Pellman BA , Schuessler BP , Tellakat M , Kim JJ . Sexually Dimorphic Risk Mitigation Strategies in Rats . eNeuro . 2017 ; 4 ( 1 ). 12. ↵ van den Bos R , Jolles J , van der Knaap L , Baars A , de Visser L. Male and female Wistar rats differ in decision-making performance in a rodent version of the Iowa Gambling Task . Behav Brain Res . 2012 ; 234 ( 2 ): 375 – 9 . OpenUrl CrossRef PubMed 13. ↵ Le NM , Yildirim M , Wang Y , Sugihara H , Jazayeri M , Sur M . Mixtures of strategies underlie rodent behavior during reversal learning . PLoS Comput Biol . 2023 ; 19 ( 9 ): e1011430 . OpenUrl PubMed 14. ↵ Aguirre CG , Woo JH , Romero-Sosa JL , Rivera ZM , Tejada AN , Munier JJ , et al. Dissociable Contributions of Basolateral Amygdala and Ventrolateral Orbitofrontal Cortex to Flexible Learning Under Uncertainty . J Neurosci . 2024 ; 44 ( 2 ). 15. ↵ Stolyarova A , Izquierdo A . Complementary contributions of basolateral amygdala and orbitofrontal cortex to value learning under uncertainty . Elife . 2017 ; 6 . 16. ↵ Farashahi S , Rowe K , Aslami Z , Lee D , Soltani A . Feature-based learning improves adaptability without compromising precision . Nat Commun . 2017 ; 8 ( 1 ): 1768 . OpenUrl CrossRef PubMed 17. ↵ Aarde SM , Genner RM , Hrncir H , Arnold AP , Jentsch JD . Sex chromosome complement affects multiple aspects of reversal-learning task performance in mice . Genes, Brain and Behavior . 2021 ; 20 ( 1 ): e12685 . OpenUrl PubMed 18. Bissonette GB , Lande MD , Martins GJ , Powell EM . Versatility of the mouse reversal/set-shifting test: Effects of topiramate and sex . Physiology & Behavior . 2012 ; 107 ( 5 ): 781 – 6 . OpenUrl PubMed 19. Branch CL , Sonnenberg BR , Pitera AM , Benedict LM , Kozlovsky DY , Bridge ES , et al. Testing the greater male variability phenomenon: male mountain chickadees exhibit larger variation in reversal learning performance compared with females . Proceedings of the Royal Society B: Biological Sciences . 2020 ; 287 ( 1931 ): 20200895 . OpenUrl PubMed 20. ↵ LaClair M , Lacreuse A . Reversal learning in gonadectomized marmosets with and without hormone replacement: are males more sensitive to punishment? Animal Cognition . 2016 ; 19 ( 3 ): 619 – 30 . OpenUrl CrossRef PubMed 21. ↵ Bernstein BJ , Kendricks DR , Fry S , Wilson L , Koopmans B , Loos M , et al. Sex differences in spontaneous behavior and cognition in mice using an automated behavior monitoring system . Physiol Behav . 2024 ; 283 : 114595 . OpenUrl PubMed 22. ↵ Palmer JA , White SR , Chavez Lopez K , Laubach M . The Role of the Rat Prefrontal Cortex and Sex Differences in Decision-Making . J Neurosci . 2024 ; 44 ( 46 ). 23. ↵ Brushfield AM , Luu TT , Callahan BD , Gilbert PE . A comparison of discrimination and reversal learning for olfactory and visual stimuli in aged rats . Behav Neurosci . 2008 ; 122 ( 1 ): 54 – 62 . OpenUrl CrossRef PubMed 24. ↵ Gilbert PE , Pirogovsky E , Brushfield AM , Luu TT , Tolentino JC , Renteria AF . Age-related changes in associative learning for olfactory and visual stimuli in rodents . Ann N Y Acad Sci . 2009 ; 1170 : 718 – 24 . OpenUrl CrossRef PubMed Web of Science 25. ↵ Bayless DW , Darling JS , Stout WJ , Daniel JM . Sex differences in attentional processes in adult rats as measured by performance on the 5-choice serial reaction time task . Behav Brain Res . 2012 ; 235 ( 1 ): 48 – 54 . OpenUrl CrossRef PubMed 26. ↵ Georgiou P , Zanos P , Bhat S , Tracy JK , Merchenthaler IJ , McCarthy MM , et al. Dopamine and Stress System Modulation of Sex Differences in Decision Making . Neuropsychopharmacology . 2018 ; 43 ( 2 ): 313 – 24 . OpenUrl PubMed 27. ↵ Gargiulo AT , Hu J , Ravaglia IC , Hawks A , Li X , Sweasy K , et al. Sex differences in cognitive flexibility are driven by the estrous cycle and stress-dependent . Front Behav Neurosci . 2022 ; 16 : 958301 . OpenUrl PubMed 28. ↵ Iodice P , Ferrante C , Brunetti L , Cabib S , Protasi F , Walton ME , et al. Fatigue modulates dopamine availability and promotes flexible choice reversals during decision making . Sci Rep . 2017 ; 7 ( 1 ): 535 . OpenUrl PubMed 29. ↵ van Enkhuizen J , Acheson D , Risbrough V , Drummond S , Geyer MA , Young JW . Sleep deprivation impairs performance in the 5-choice continuous performance test: similarities between humans and mice . Behav Brain Res . 2014 ; 261 : 40 – 8 . OpenUrl PubMed 30. ↵ Donovan CH , Wong SA , Randolph SH , Stark RA , Gibb RL , Gruber AJ . Sex differences in rat decision-making: The confounding role of extraneous feeder sampling between trials . Behav Brain Res . 2018 ; 342 : 62 – 9 . OpenUrl PubMed 31. ↵ Farrell MR , Gruene TM , Shansky RM . The influence of stress and gonadal hormones on neuronal structure and function . Horm Behav . 2015 ; 76 : 118 – 24 . OpenUrl 32. ↵ Shansky RM , Hamo C , Hof PR , McEwen BS , Morrison JH . Stress-induced dendritic remodeling in the prefrontal cortex is circuit specific . Cereb Cortex . 2009 ; 19 ( 10 ): 2479 – 84 . OpenUrl CrossRef PubMed Web of Science View the discussion thread. Back to top Previous Next Posted October 31, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Sex differences in task engagement and lapse rate during reward learning plateaus Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Sex differences in task engagement and lapse rate during reward learning plateaus C.G. Aguirre , J.H. Woo , L. Alhabbal , T. Fujioka , R. Moore , T. Ye , J.J. Castrellon , A. Soltani , A. Izquierdo bioRxiv 2025.10.29.685451; doi: https://doi.org/10.1101/2025.10.29.685451 Share This Article: Copy Citation Tools Sex differences in task engagement and lapse rate during reward learning plateaus C.G. Aguirre , J.H. Woo , L. Alhabbal , T. Fujioka , R. Moore , T. Ye , J.J. Castrellon , A. Soltani , A. Izquierdo bioRxiv 2025.10.29.685451; doi: https://doi.org/10.1101/2025.10.29.685451 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Animal Behavior and Cognition Subject Areas All Articles Animal Behavior and Cognition (7618) Biochemistry (17636) Bioengineering (13859) Bioinformatics (41847) Biophysics (21401) Cancer Biology (18535) Cell Biology (25423) Clinical Trials (138) Developmental Biology (13353) Ecology (19860) Epidemiology (2067) Evolutionary Biology (24287) Genetics (15582) Genomics (22463) Immunology (17701) Microbiology (40300) Molecular Biology (17141) Neuroscience (88432) Paleontology (666) Pathology (2825) Pharmacology and Toxicology (4813) Physiology (7633) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4285) Systems Biology (9808) Zoology (2267)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00