Full text
78,938 characters
· extracted from
preprint-html
· click to expand
The effects of task similarity during representation learning in brains and neural networks | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results The effects of task similarity during representation learning in brains and neural networks View ORCID Profile N. Menghi , View ORCID Profile W. J. Johnston , S. Vigano’ , M. A. B. Hinrichs , B. Maess , View ORCID Profile S. Fusi , C. F. Doeller doi: https://doi.org/10.1101/2025.01.20.633896 N. Menghi 1 Max Planck Institute for Human Cognitive and Brain Sciences, Department of Psychology , Leipzig, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for N. Menghi For correspondence: menghi{at}cbs.mpg.de W. J. Johnston 2 Center for Theoretical Neuroscience, Columbia University , New York, NY, USA Mortimer B. Zuckerman Mind, Brain and Behavior Institute, Columbia University, New York, NY, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for W. J. Johnston S. Vigano’ 1 Max Planck Institute for Human Cognitive and Brain Sciences, Department of Psychology , Leipzig, Germany 3 Center for Mind/Brain Sciences, University of Trento , Rovereto 38068, Italy Find this author on Google Scholar Find this author on PubMed Search for this author on this site M. A. B. Hinrichs 1 Max Planck Institute for Human Cognitive and Brain Sciences, Department of Psychology , Leipzig, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site B. Maess 1 Max Planck Institute for Human Cognitive and Brain Sciences, Department of Psychology , Leipzig, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site S. Fusi 2 Center for Theoretical Neuroscience, Columbia University , New York, NY, USA Mortimer B. Zuckerman Mind, Brain and Behavior Institute, Columbia University, New York, NY, USA 4 Kavli Institute for Brain Sciences, Columbia University , New York, NY, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for S. Fusi C. F. Doeller 1 Max Planck Institute for Human Cognitive and Brain Sciences, Department of Psychology , Leipzig, Germany 5 Kavli Institute for Systems Neuroscience, Center for Neural Computation, The Egil and Pauline Braathen and Fred Kavli Center for Cortical Microcircuits, Jebsen Center for Alzheimer’s Disease, Norwegian University of Science and Technology , Trondheim 7491, Norway Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Preview PDF Abstract The complexity of our environment poses significant challenges for adaptive behavior. Recognizing shared structures across tasks can theoretically improve learning through generalization. However, how such shared representations emerge and influence performance remains poorly understood. Contrary to expectations, our findings revealed that individuals trained on tasks with similar low-dimensional structures performed worse than those trained on dissimilar tasks. Magnetoencephalography revealed correlated neural representations in the samestructure group and anticorrelated ones in the different-structure group. Crucially, practice reduced this performance gap and shifted the neural representations of the tasks in the samestructure group towards anticorrelation, like those in the different-structure group. A neural network model trained on similar tasks replicated these findings: tasks with similar structures require more iterations to orthogonalize their representations. These results highlight a complex interplay between task similarity, neural dynamics, and behavior, challenging traditional assumptions about learning and generalization. 1 Introduction To efficiently interact with the environment, intelligent agents create and use internal models of the world that represent the association between sensory inputs and selected actions or decisions [ 1 , 2 ]. These representations might be built by simply selecting a single task-relevant feature (and thus suppressing or ignoring irrelevant ones) or by implementing more complex operations to extract the abstract structure of the task at hand [ 3 – 6 ]. The task structure defines a set of parameters that organize one or more tasks. It creates a compressed space with lower dimensionality than the original sensory input while preserving a similar amount of information [ 7 ]. Utilizing this structure enables a more efficient mapping between inputs and actions [ 8 , 9 ]. We can, for example, learn that there is a relationship between the amount of light and water and the growth of the plants in our garden without having to memorize all the possible combinations of features and related outcomes. Given the extraordinary load on the cognitive system created by the multitude of tasks we learn, a particularly effective strategy is to capitalize on the shared similarities between different task representations, thus helping the brain to efficiently learn and generalize through a wide range of situations and experiences. This identification of commonalities operates at varying levels of abstraction. For example, there is evidence of shared representations for number and space [ 10 ], imagery and visual processing [ 11 ], face and object perception [ 12 ], sequences in working memory [ 13 ], associative learning [ 14 , 15 ] and for the representations of self and others in social cognition [ 16 ]. Such mechanisms have been observed in machine learning as well. Neural networks trained to learn multiple tasks also exploit similarities by creating a low-dimensional representation common to different tasks benefiting learning and generalization [ 17 – 21 ]. Going back to our garden, we can infer that plants that appear similar to known plants might also have similar water and light requirements. Recently, the investigation of how task representations are acquired and influence generalization, usually referred to as “representation” and “transfer” learning, respectively, has gained momentum in cognitive neuroscience and machine learning [ 14 , 19 , 21 – 24 ]. Few studies have started to elucidate the underlying brain mechanisms in humans, for instance observing neural (EEG) signatures of the emergence of low-dimensional, task-relevant, representations [ 25 ], or how their alignment can facilitate transfer learning across domains that are based on magnitude or linearly ordered structures [ 26 ]. However, a clear understanding of how the human brain creates and uses these representations remains elusive and a central topic of current research [ 27 ]: To fill this gap, we designed an experiment where we investigated the formation and use of shared representations across different tasks during learning, while at the same time monitoring the neural activity of participants using magnetoencephalography (MEG). Participants learned two tasks (named “Conceptual” and “Spatial”, see Methods) with an interleaved training regime. In both tasks, they had to decide whether a fictitious plant seed would grow or die based on either the relative amount of water and light (“Seed1”, Conceptual task, Fig. 1A top, where the opacity of the corresponding symbol indicates the amount of a feature [i.e., water or sun]) or the X-Y spatial position in visual space on a computer screen (“Seed2”, Spatial task, Fig. 1A bottom). To efficiently perform this classification, participants had to discover the underlying low-dimensional, task-relevant, hidden structure capturing the correct ratios of the two conceptual (water and light) or spatial (x and y) features ( Fig. 1B ). Crucially, participants were divided into two groups. In one group, the underlying low-dimensional, task-relevant hidden structure of the two tasks was the same (“Same Structure” or SameSt group, Fig. 1B top row), while in the other group, they were orthogonal (“Different Structure” or DiffSt group, Fig. 1B bottom row)(see Methods). Both groups performed a training phase first, in which participants received trial-wise feedback in a classification task, followed by a test phase, where they were presented with both old and new stimuli to classify without feedback to test their ability to generalize. Download figure Open in new tab Figure 1: S t imuli , Task Structures and Trial Structure Panel A Shows the experimental stimuli and cues. Two seeds were used to cue for the two different tasks, spatial and conceptual. In the conceptual task, we used a sun and a water drop symbol as features composing a stimulus configuration. In the spatial task, we used a position (x,y) on a square map as a stimulus. Panel B shows the different structures used. In the conceptual task, each vertical (sun opacity) and horizontal (a drop of water opacity) position can be combined, creating a continuous map or structure from which we draw stimuli. In these structures, black colour is associated with the outcome ”die”, and white colour is associated with the outcome ”grow”. The structures of the two tasks were the same in one group and orthogonal in the other group of participants. Panel C shows the different phases of the experiment. In each phase, conceptual and spatial tasks were organized in an interleaved regime. Panel D shows the schematic of a trial structure during the training phase. A fixation cross was shown for 0.5 seconds, and then the seed cueing the task domain appeared for 1 second. Brown seed cued participants to pay attention to the conceptual features and ignore the spatial ones and vice-versa, the grey seed cued participants to pay attention to the spatial features and ignore the conceptual ones. Afterward, both spatial and conceptual configurations appeared on screen and stayed there for 2.5 seconds maximum, or until response. This is when participants used a button box to predict if, based on the relevant features presented, the seed was associated with ”grow” or ”die”. Finally, feedback appeared and stayed on screen for 1 second. Panel E shows the schematic of a trial structure during the testing phase. A fixation cross was shown for 0.5 seconds, and then the seed cueing the task domain appeared for 1 second. Afterward, spatial and conceptual configurations appeared on screen and stayed there for 2.5 seconds maximum, or until response. Differently from the training phase, no feedback was provided. We expected the SameSt group to show higher performance than the DiffSt group in learning to perform the two tasks (training phase), as they share the same underlying hidden structure. We predicted that this advantage should also lead to better generalization performance in the test phase. In parallel, we analyzed the MEG signal to gain insight into the emergence and use of task-relevant representations and their differences across the two groups. Finally, we trained neural networks to perform tasks analogous to the experimental tasks. Comparing the performance and learning dynamics of the trained networks with those of human participants allowed us to draw conclusion about the cognitive mechanisms involved. 2 Results We trained participants to perform an associative learning task across two contexts in an interleaved regime. They had to predict if a seed was going to ”grow” or ”die” based on two distinct sets of features, where the relevant features were determined by the identity of the seed (see Fig. 1A-B ). Decisions about the first seed relied on what we called the ”conceptual features”: participants had to learn the association between configurations of the opacity of a sun and a drop of water images and the outcome (grow or die). Decisions about the second seed relied on ”spatial features”: participants had to learn the association between configurations of position in space in which the seed was planted and an outcome (grow or die). Two different seeds cued the different contexts (See Fig. 1A . Participants were randomly assigned to either a ”Same structure” (SameSt) or a ”Different structure” (DiffSt) group, which differed based on the similarity between the feature to outcome mappings (see Fig.1B ). Participants in the SameSt group had the same feature-outcome structure in both contexts, while participants in the DiffSt group had a flipped structure across the two contexts ( Fig.1B ). Each participant was first trained via feedback learning (see Fig.1D ), and then tested (see Fig.1E ) during two separate sessions, within the same day, separated by a navigation task, that pertained to a different question and will be analysed separately (See Methods). To assess the emergence of representation of and across tasks, we divided the analysis into two parts. First, in our behavioural data analysis, we compared participant accuracies between the two distinct groups and assessed their generalization performance on a novel set of configurations. Second, for our analysis of the MEG data, we employed Representation Similarity Analysis (RSA) to establish links between the neural representations and participants’ behavioural results. 2.1 Behavioural Results We started by analyzing behavioral performance during the training phase, where accuracy was computed as the proportion of correct responses over all trials across the two tasks (Spatial and Conceptual). A 2x2 mixed-design analysis of variance (ANOVA) revealed a main effect of group (DiffSt vs SameSt; F(1,52) = 8.455, p = 0.005), but no main effect of task (Conceptual vs Spatial; F(1,1) = 0.145, p = 0.704) nor interaction (F(1,52) = 0.246, p = 0.621). Post-hoc tests revealed that, contrary to our expectations, participants belonging to the DiffSt group performed better compared to the one in the SameSt group (t(52) = 2.908, p = 0.005; t-test)( Fig. 2A , left). This difference however was reduced and no longer statistically significant during the test phase (t(52) = 1.426, p = 0.159)( Fig. 2A , right). Download figure Open in new tab Figure 2: G e neral performance and Generalization Panel A shows participants’ accuracies during the training and test phases. Participants are divided into SameSt and DiffSt group conditions. Panel B shows participants’ performance for old and new stimuli during the testing phase. Participants are divided into SameSt and DiffSt following the color scheme of Panel A. A detailed analysis of performance during the test session through a three-way (2x2x2) mixed-design ANOVA revealed no main effect of group (DiffSt vs SameSt; F(1,52) = 2.494, p = 0.120) or domain (Conceptual vs Spatial; F(1,52) = 1.250, p = 0.268), but a strong main effect of novelty (New vs Old; F(1,52) = 37.14, p < 0.001), indicating that classification of already seen stimuli was more accurate (confirmed with a post-hoc t-test, t(26) = 5.864, p < 0.001). No significant interaction was reported, except for the one between Group and Novelty (F(1,26) = 5.237, p = 0.026), indicating that the DiffSt group classified old stimuli better than the SameSt group (approaching significance in a post-hoc t-test t(52) = 2.040, p = 0.051). Additional post-hoc t-tests, however, revealed that both groups could correctly classify new stimuli above chance (mean classification accuracy: 0.66; t(53) = 11.898, p < 0.001) and that such generalization ability did not differ between them (t(26) = -0.376, p = 0.709). These results are summarized in Fig. 2B . In short, contrary to our expectations, we found that similar structures between tasks did not facilitate learning, but rather likely created interference reflected in lower classification accuracy. This interference seemed to be reduced with practice, as both groups reached equivalent classification and generalization performance during the test phase. To gain a better understanding of this observation, we analysed the corresponding MEG data. 2.2 MEG Results While participants were performing the experiment, we measured their brain activity non-invasively using MEG. To gain insight into how task representations emerge and support behavior, we could analyze the representational geometry of task structure by means of Representational Similarity Analysis (RSA) [ 28 ]. In particular, we asked whether, how, and when representational models of how the different stimuli related to each other emerged in neural activity. By looking at such relational representations, we aimed to elucidate how task information was structured in the brain and understand what generated the observed interference in behavior. We generated six models encapsulating two distinct representations of the task: stimulus-bound and task-relevant (see Fig. 3A ). The first model, stimulus-bound, captured the similarity between stimuli in the bi-dimensional stimulus space of relevant features, and was calculated by computing the 2D Euclidean distances between stimuli, either conceptual (defined by the amount of water and sun) or spatial (defined by x and y coordinates on the screen); the second model, task-relevant, reflected the similarity between stimuli in the compressed, low-dimensional space that captures the correct ratio between the two features necessary for classification, and it was computed as the 1D Euclidean distance between stimuli after they have been compressed by projecting them according to their subspace. These models were constructed for both the stimuli within the spatial and conceptual domains ( Fig. 3B top and bottom left panels), as well as for their cross-domain or “abstract” correspondence (see Fig. 3B top and bottom right panels, for a description of these procedures see the Methods section). These models were correlated with neural matrices where dissimilarities were expressed as 1-Pearson’s r between the activity patterns across MEG channels associated with the presentation of each stimulus at a given time point. A positive correlation between neural activity and one of these models means that the brain’s response patterns correspond to the model structure. For example, a positive correlation with the stimulus-bound model would indicate that neural activity is organized according to the physical properties of the stimuli (such as their spatial or conceptual features). In contrast, a positive correlation with the task-relevant model would suggest that neural activity is more closely aligned with the compressed representation capturing the task structure. Download figure Open in new tab Figure 3: M o del Distances Panel A illustrates an example of distance computations for four points, according to Stimulus-Bound and Task-Relevant representations. The four points are selected here solely for visualization purposes. Stimulus-bound distances are calculated within a two-dimensional feature space, accurately capturing the configurations of conceptual and spatial features. Grey dashed lines denote distances between points. Task-relevant distances, however, are computed within a compressed space optimized for learning the task structure. These distances are measured along a one-dimensional manifold, and points are projected onto the red dashed line, which retains essential task information critical for generalization. Here, the blue and green points overlap, reducing their distance to zero. Panel B shows the calculation of distances for spatial, conceptual, and cross-domain tasks (example from the SameSt group). Each white and black square in the four square maps represents different configurations, with white indicating ”grow” and black indicating ”die.” Within a domain, Euclidean distances were calculated for spatial or conceptual tasks. Across domains, distances between the spatial and conceptual task stimuli were computed as if both lay in the same space. This approach yielded six model matrices in total: three stimulus-bound and three task-relevant. We then computed correlations between these model matrices and empirical matrices, where distance was calculated as the correlation distance between the Event-Related Fields (ERF) of different configurations at each time point. Statistical significance was assessed non-parametrically at the group level using a cluster-based permutation approach with a cluster-forming threshold of p < 0.05 (two-tailed), and a corrected significance level of p < 0.05 (two-tailed) [ 29 ]. Condition labels were randomly permuted 1000 times, following the default method implemented in MNE [ 30 ], see methods. 2.2.1 Training Phase Stimulus-bound representation First, we analysed the neural similarity between stimulus-bound representations of the stimuli during the training phase ( Fig. 3A , top panel), focusing on both within-domain ( Fig.3B , left panel) and across-domains similarities 3B, right panel). In both contexts, before and just after stimulus presentation, correlation fluctuated around the chance level. Within the spatial context, neural activity was significantly correlated to the one predicted by the stimulus-bound configuration model shortly after stimulus onset for both the SameSt (160-870 ms and 880-1050 ms) and the DiffSt groups (140-1020 ms) (See Supplementary Materials 9). This pattern was repeated within the conceptual context, where neural activity was similar to the stimulus-bound representations of the stimuli early after stimulus onset for both the SameSt group (80-540 ms, 90-250 ms and 90-320 ms) and the DiffSt group (70-570 ms, 90-580 ms and 560-820 ms) (See Supplementary Materials 9). All the clusters showed positive correlations. Interestingly however, in the cross-domain analysis, where the two feature spaces were equated with each other as if they were aligned to a common embedding ( Fig. 3B right panel), we found a positive significant cluster in the SameSt group (630-810 ms) (See Fig. 4A ) and a trend towards a negative cluster (p = 0.1) in the DiffSt group (740-880 ms) (See Supplementary Materials 9). The positive correlation here indicates that the more aligned the two configurations were in the hypothetical common or abstract cross-domain space (as if the two feature sets were identical), the more similar the neural activity patterns they elicited. Conversely, the negative correlation indicates that the more aligned the two configurations were in such abstract cross-domain space, the more diverse or dissimilar their neural activity patterns. Consistent results were found when we divided participants into good and bad performers in both DiffSt and SameSt groups (Supplementary Fig. 14). Download figure Open in new tab Figure 4: P a nel A shows the results of the cluster-permutation correction of stimulus-bound representations for cross-domain space during training for participants in the SameSt group. Clusters are colour-coded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with one colour. Panel B shows the results of the cluster-permutation correction of task-relevant representations for cross-domain space during training for participants in the DiffSt group. Clusters are colour-coded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with one colour. To summarize, we observed that patterns of neural activity evoked by stimuli configurations in the SameSt group reflected their relational structure in 2D feature spaces both when we considered the domains separately as well as when we considered them aligned to a common space. This finding was interesting as the two domains in the SameSt group indeed share a similar structure. Thus, the brain seems to reflect this in its neural activity. In the DiffSt group on the contrary, we observed that patterns of evoked neural activity were reflecting (positive correlations) stimulusbased models within each domain, but when we compared similarities across domains we observed a negative correlation, mirroring the representational structure of the two task spaces which are, indeed, anti-correlated. 2.2.2 Training Phase Task-Relevant representation Then, we analyzed the neural similarity between task-relevant representations of the stimuli ( Fig. 3A bottom panel) for both within-domain and across-domain conditions ( Fig. 3B ) during the training phase. In all the contexts (See Supplementary Materials Fig. 10 ), before and just after stimulus presentation correlation fluctuated around the chance level. Neural activity patterns recorded from the SameSt group did not significantly correlate with predicted distances in the task-relevant models, suggesting that such compressed representation was not reflected in this group’s brain activity. This applied to both the withinand the cross-domain analyses. Conversely, looking at the within-domain analyses, in the DiffSt group, within the spatial domain/task, we found two positive significant clusters (170-540 ms and 250-600 ms) (See Supplementary Materials Fig. 10 ), indicating that neural activity was more similar for compressed spatial configurations that were close in the task-relevant space. Similarly, within the Conceptual domain/task, we found two positive significant clusters showing that the neural activity was more similar for configurations that were close in the task-relevant space (570-780 ms and 870-1150 ms) (See Supplementary Materials Fig. 10 ). In the cross-domain analyses, in line with our results in the Stimulus-bound analysis (previous paragraph), we found a negative significant cluster (330-460 ms and 480-700 ms) (See Fig. 4 Panel B) in the cross-domain analysis, indicating neural activity was more dissimilar when compressed configurations belonging to the two tasks were close in the cross-domain space in the DiffSt group. In summary, we showed the emergence of task-relevant representation in the DiffSt group, suggesting the emergence of a low-dimensional representation. This pattern in neural activity is a potential candidate source of the difference observed in the behavior of the two groups: as participants had, given our design, to differentiate the two domains and focus their attention only on the information “cued” by the seed stimulus, the DiffSt group might have found this operation easier as the task structures facilitated their decorrelation, while for the SameSt group, the shared task structure might have been more detrimental. If this interpretation is correct, then a possible prediction follows: during the test phase, when the behavioral differences between the two groups are attenuated, we should observe these effects to reverse for the SameSt group. 2.2.3 Testing Phase Stimulus-bound representation While previous analyses focused on the training phase, we repeated them, now focusing on the testing phase, after learning had happened. In all the contexts (See supplementary materials Fig. 11 ), before and just after stimulus presentation correlation fluctuated around the chance level. In the within-domain analyses, in both the spatial and the conceptual tasks, neural activity was more similar for configurations that were close in the stimulus-bound space. This was significant early after stimulus onset for both the SameSt group (Spatial: 100-1210 ms; Conceptual: 70-1370 ms, 90-460 ms and 520-1000 ms) and the DiffSt group (Spatial: 90-1300 ms; Conceptual: 50-1500 ms and 85-1500 ms). Crucially, in the cross-domain analysis, we found a significant negative cluster in both the DiffSt group (240-940 ms, 340-590 ms and 370-760 ms) ( Fig. 5 Panel B) and the SameSt group (300-590 ms) ( Fig. 5 Panel A), indicating that now in both groups the neural activity was more dissimilar when configurations belonging to the two tasks were close in the cross-domain space. Consistent results were found when we more directly compared training and test sessions in both DiffSt and SameSt groups (Supplementary Fig. 14). Download figure Open in new tab Figure 5: P a nel A-B shows the results of the cluster-permutation correction of stimulus-bound representations for cross-domain space during testing sessions for participants in the SameSt and DiffSt groups. Clusters are color-coded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with one colour. Results are divided into spatial, conceptual and cross-domain spaces for both the SameSt and DiffSt groups. Panel C-D shows the results of the cluster-permutation correction of task-relevant representations for cross-domain space during training for participants in the SameSt and DiffSt groups. Clusters are color-coded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with one colour. This finding was interesting as the two domains in the SameSt group that share a similar structure might have been detrimental to learning. While during the training the representations of the two tasks were positively correlated, the negative correlation observed now could be a result of the learning process. 2.2.4 Testing Phase Task-Relevant representation This pattern was replicated also when we analysed Task-relevant representations ( Fig. 3A bottom panel). In all the contexts (See supplementary materials Fig. 12 ), before and just after stimulus presentation correlation fluctuated around the chance level. Early after stimulus onset, neural activity in the Spatial task was more similar for configurations that were close in the task-relevant space. This was significant for both the SameSt group (180-1080 ms, 200-1060 ms, 380-710 ms and 480-1020 ms) and the DiffSt group (800-1360 ms and 150-780 ms). In the Conceptual task, neural activity was more similar for configurations that were close in the task-relevant space. This was significant early after stimulus onset for both the SameSt group (70-1110 ms, 590-1220 ms and 1090-1360 ms) and the DiffSt group (60-1500 ms and 440-1500 ms). In the cross-domain analysis, we found two significant negative clusters in the DiffSt group (340-740 ms and 930-1500 ms) ( Fig. 5 Panel D) and a negative trending towards significance cluster in the SameSt group (370-710 ms; p = 0.085) ( Fig. 5 Panel C). The negative correlation means that neural activity was more dissimilar when configurations belonging to the two tasks were close in the cross-domain space. In summary, we showed the emergence of task-relevant representation in both the DiffSt and SameSt groups, suggesting the emergence of a low-dimensional representation. This pattern in neural activity shows how, during testing, both groups were able to differentiate the two domains, possibly facilitated by their decorrelation of their representations. 2.3 Artificial neural networks offers insight on the difference in learning performance To understand the difference in learning speed between participants who learned either the same or different structure conditions, we trained neural networks to perform tasks that are analogous to the experimental tasks ( fig. 6A, B ). The networks were given input variables relevant to each task as well as input variables that signaled the appropriate context for a given trial (similar to the seed identity in the experimental task). Then, some networks were trained to perform tasks with the same structure across the conceptual and spatial domains ( fig. 6B , top) and other networks were trained to perform tasks that had different structures across the two domains ( fig. 6B , bottom). More details about the modeling setup are given in Artificial neural network modeling in Methods . Download figure Open in new tab Figure 6: P a nel A Schematic of the neural network. There are two latent variables (LVs) relevant to the first context (cLV1 and cLV2), two relevant to the second (sLV1, sLV2), two irrelevant variables, and two variables that signal the context (i.e., the seed identity in the experiment). There is a single hidden layer and one output unit. Panel B The tasks the network is trained to perform in two contexts (conceptual and spatial, left and right). The tasks are a coarser version of the experimental task and also have the same structure and different structure conditions. Panel C Schematic of the three input conditions for the network: (left) the features are all orthogonal to each other; (middle) pairs of features across the two contexts are encoded along semi-orthogonal directions; (right) there are only two, rather than four, total features. Panel D The learning trajectory for each input structure, averaged over n = 50 repetitions for each condition. Panel E The average total difference in learning trajectory across the networks above for orthogonal inputs (left), a range of semi-orthogonal inputs (middle), and identical inputs (right). We began by providing the input variables to the network in perfectly orthogonal dimensions (e.g., each variable was a different “input” to the network; fig. 6C , left). This leads to identical learning speed across the networks trained with the sameand different-structure ( fig. 6D -E, left). This identical learning speed emerges because the sameand differentstructures are not distinguishable from the perspective of the network. Since the variables are all in orthogonal subspaces, there is no prior relationship between the variables in the first and second contexts. How can we induce an appropriate bias in the network? Instead of providing the task features along orthogonal dimensions, we provide them along semi-orthogonal – or, similar – dimensions ( fig. 6C , middle), where we index similarity by the cosine similarity between the vectors defining each dimension. This means, for example, that the amount of water in context 1 will be related to the x-position of the seeds in context 2. This manipulation will bias the network toward treating the different features similarly, and produce interference between the two features that must be overcome for reliable task performance. This manipulation causes faster learning for tasks with different relative to the same structure ( fig. 6D -E, middle) and qualitatively replicates the behavioral effect observed in our participants. Finally, we explore the case where the network is only given two task-relevant features ( fig. 6C , right). This is an extreme version of the similar feature case above, where the features are identical (i.e. perfectly aligned, or with zero angle). In this case, the same structure task is no longer necessarily contextual, since the network (or subject) can ignore the contextual variable (or seed) and still perform the task perfectly. This is not the case for the different structure task, where context remains relevant. The networks trained with the same structure learn far faster than those trained with different structure tasks ( fig. 6D -E, right). Overall, the neural networks reveal the conditions in which interference between features across the same and different contexts appears. When the relevant features are represented in initially non-orthogonal subspaces, then both the sameand different- structures require them to be orthogonalized for reliable task performance, but the same-structure orthogonalizes more slowly. The neural network makes predictions that can be tested in further work, where feature similarity is manipulated – as well as that can be tested by more granular neuro-imaging methods, where we could test the central prediction of the model: That at the start of training, the features will be represented in positively correlated subspaces. The slower orthogonalization predicted by the model is also born out by the data, where the MEG signal from the training period is positively correlated in same-structure participants, but negatively correlated in the different-structure participants. 3 Discussion We studied the temporal dynamics of when and how the brain acquires and shares representations of and across tasks and how sharing these structures affects behaviour. Participants learned two tasks where a low-dimensional manifold defined the associations between stimuli and outcome (representation) which were either similar between the two tasks or not (see Fig.1 ). The brain can benefit from the similarities between tasks to facilitate efficient learning. We expected shared representations to improve performance during learning. However, our behavioural analyses showed a surprising finding: during training, tasks sharing a common low-dimensional structure exhibited interference, leading to worse learning performance (see Fig.2A ). Nonetheless, this interference decreased with practice, with performance differences disappearing during the testing sessions. Notably, shared representations did not impact generalisation (see Fig.2B ). Through multivariate analyses of MEG data, we explored the emergence and impact of shared structures across different dimensionality and learning stages. We have identified a two-dimensional representation of task stimuli within-domain emerging shortly after stimulus onset (see Supplementary Figs 9 and 11 ). Notably, during the training phase, in the DiffSt group, we found evidence of a compressed representation supporting the hypothesis of the emergence of a task-relevant representation (see Fig. 10 ). However, such representation was not observed in the SameSt group, possibly because of differences in performance (See also [ 25 ]). Crucially, participants in the SameSt group represented stimuli from both tasks more similarly in the cross-domain space during training ( Fig. 4A ), hinting at a shared representation that potentially hindered performance. Conversely, the DiffSt group demonstrated dissimilar representations, indicating separate neural substrates for encoding tasks ( Fig. 4B ). During the testing phase, representation similarity shifted for the SameSt group ( Fig. 5A-C ), suggesting independent encoding of tasks in neural subspaces, coinciding with improved behavioural performance. The neural network model trained to perform a similar task provided us with further insight into the nature of the shared representations (See Fig.6D ). When the two tasks had orthogonal dimensions, the structure of the tasks did not affect the network’s performance, as the same and different conditions, from the network’s perspective, were effectively identical. However, when the network was trained on tasks with identical dimensions, it learned faster when the structures were similar, effectively treating it as a single task. Interestingly, when the dimensions of the two tasks were similar but not identical, the neural network, much like the human brain, experienced interference when attempting to use overlapping representations for these tasks. These results suggest that the brain might compute the dimensions related to different tasks in distinct ways. However, the low- dimensional similarity between the task structures affects performance, likely due to the cognitive interference that arises from attempting to apply overlapping representations. This implies that while the brain can efficiently share representations across highly similar tasks, it must carefully manage partially similar tasks to avoid detrimental interference. Our results speak to the cognitive control theories which suggest that while on one side, shared representations should benefit generalization and the acquisition of novel tasks, on the other side, they introduce the possibility of interference and related costs [ 31 , 32 ]. However, practice has been shown to resolve this interference by effectively separating different tasks, through pattern separation, and encoding them in distinct neural subspaces, allowing the brain to handle multiple tasks [ 33 – 40 ]. Notably, training participants with an interleaved regime might have contributed to the observed interference. A blocked training regime, where participants focus on one task at a time, might mitigate interference and boost learning and transfer by facilitating neural task separation. Future studies comparing different training regimes on shared representation dynamics might shed light on critical stages of transfer learning. It may be necessary for participants to learn one task before introducing a second one to avoid interference and facilitate the acquisition of novel tasks by capitalizing on shared representations [ 23 , 36 , 41 – 45 ]. Finally, recent studies have pointed to the possibility that task representations resemble cognitive maps [ 46 – 48 ]. Representations are structured within relational maps, with relevant features determining tasks’ relative positions [ 49 – 52 ]. Distances in these maps code for similarity, placing similar tasks closer than dissimilar ones [ 53 – 56 ]. Our findings show the brain’s ability to organize and manipulate task-related information similar to a cognitive map, with shared representations potentially acting as points of convergence or overlap in this map. 4 Methods 4.1 Participants A total of 60 volunteers participated in the experiment (mean age = 27.91, SD = 4.21, 27 males, 33 females). All the participants were naive to the purpose of the experiment. Data from six participants were discarded because their performance was below the chance level (56%) in both the spatial and the conceptual tasks. We performed analyses on the remaining 54 participants. All participants gave informed written consent, and the study procedure was approved by the local institutional review board of the University of Leipzig (Ethics Reference Number: 045/22-ek). At the end of the experiment, participants received reimbursement for their participation. 4.2 Apparatus and Stimuli Participants completed the MEG experiments inside a sound-attenuated, dimly lit, and magnetically shielded room. Stimuli were displayed on a rear-projection screen with a spatial resolution of 800x600 pixels and a refresh rate of 60 Hz using the Psychophysics Toolbox ( http://psychtoolbox.org/ ) [ 57 ] for Matlab (Mathworks). It’s worth noting that the projector was changed after subject 28, although the proportions remained consistent, and the refresh rate was increased to 120 Hz. Two images of seeds were used as cues to which kind of task (spatial or conceptual tasks) participants had to do (See the left panel of Fig. 1 ). In the conceptual task, the opacity of a sun and a drop of water images governed the simulated amount of water and light received by the seed. Participants were instructed that greater opacity indicated higher levels of water or sunlight. Literature suggests that lower magnitudes are represented to the left and downward, while higher magnitudes are represented to the right and upward [ 58 – 60 ]. The spatial task involved a black dot positioned on a white square, with the vertical and horizontal placements of the dot dictating the virtual planting location for the seed. We sampled a subset of feature combinations from these two-dimensional spaces to create the stimuli; see the supplementary materials for a breakdown of all the feature combinations that were used to create the stimuli. 4.2.1 Task Structures We used two different feature-outcome maps, which we refer to as structure (see Fig. 1 ). These structures can be approximated by two different “diagonal” rules, allowing participants to learn the value of different spatial or conceptual combinations without sampling them first. Both task structures were defined using a deterministic mapping where the rounded value of the log-odds of the outcome was a quadratic function of stimulus characteristics, either the opacity of the two images or the position in space. u t is a matrix consisting of two rows with parameters normalised going from 1 to 100. These parameters are the feature values Water and light opacity for the conceptual task, and x and y positions for the spatial task. Flipping the sign of the wd parameter in this mapping produced the two structures depicted in Fig. 1 . The parameters 2.4 and 0.71 have been arbitrarily chosen to create the maps. 4.3 Experimental Design Each participant was trained to do two tasks in an interleaved fashion. In the first one, the conceptual task, they had to learn the association between the opacity of a sun and a drop of water images and an outcome (grow or die); in the second one, they had to learn the association between a position in space and an outcome (grow or die). Importantly, spatial positions did not affect the conceptual task and conceptual features did not affect the spatial one. Participants were randomly assigned to either a ”Same structure” (SameSt) or a ”Different structure” group (DiffSt), with 30 participants in each group (27 per group after discarding bad performers). The features-outcome mappings for the SameSt group were generated using wd = 0.71 for both spatial and conceptual tasks (See previous paragraph and equation 1 ). The features-outcome mappings for the DiffSt group were generated using wd = 0.71 for the conceptual task and wd = − 0.71 for the spatial task. 4.4 Procedure The experiment comprises four distinct phases, as illustrated in the right panel of Fig. 1 and lasted about 2 hours. Throughout these phases, participants engage in a computerized task where they assume the roles of scientists within a biology laboratory. In this simulated scenario, the laboratory has successfully cultivated two novel plant species, and participants are tasked with acquiring knowledge about the specific environmental conditions required for the successful germination of these plant seeds. 4.4.1 Training As illustrated on the right part of Fig. 1 , during the first phase, the training task, in each trial participants will first be prompted with one of two seeds cueing which task to focus on and then the stimuli appeared and stayed on screen for 2500 ms maximum or until a response was made. Responses were made on a standard button box, one button indicating a prediction of ’grow’ and another predicting ”die”. Responses not given within the required time constitute ”missed trials”. Right after the button press feedback was provided, saying ”correct” if the prediction was correct, ”incorrect” if it was not and ”too slow” if they missed the trial (no response within 2500 ms). By making predictions and receiving feedback, participants learned the association between relevant features and the outcome “grow” or “die” as in a classic associative learning task. It’s important to note that some stimuli (i.e., coordinates in the two-dimensional feature spaces) were common between the two contexts, while others were unique to each context, providing a multifaceted learning experience for the participants. 4.4.2 Test During the second phase, the test tasks, we tested participants’ knowledge about the structures learned during the training. We presented old and new stimuli to test for differences in memorization, transfer and generalization. Some of the new stimuli were new in both contexts and some were previously encountered (old) in one context but entirely new in the other context. This phase is divided into “first testing session” and “second testing session” separated by a navigation task that will be analysed separately. 4.4.3 Navigation The navigation task addresses a distinct question that will be analyzed separately. Here we provide a short description of the task. Participants were presented with one of the two seeds and an expected outcome: “grow” or “die”. Participants were then asked to navigate the appropriate (spatial or conceptual) space, by varying the two features (x and y for the spatial seed, amount of sun and rain for the conceptual seed), locate a position on the map and “plant” the seed according to the outcome requested. Even though data from the navigation task have not been analysed as they pertain to a different question, we show how participants’ general performance does not change between testing sessions. 4.5 MEG Acquisition and Preprocessing Neuromagnetic data were acquired using a 306-sensor MEG system (204 planar gradiometers, 102 magnetometers, MEGIN Vectorview system) at the Max Planck Institute for Human Cognitive and Brain Sciences, Leipzig, Germany. MEG data were recorded at 1000 Hz sampling frequency. The MEG data was preprocessed offline using MNE Python software [ 30 ]. Specifically, we low-pass and high-pass filtered the data at 0.5 and 100 Hz. Notch filtering at 50Hz was performed to remove the power line artefact. Data across different runs was aligned to an individual common head position. DataMaxwell filter was then applied. We epoched the data from 500 ms before the onset of the stimulus to 1.5 seconds following it. We visually inspected the epochs to identify and exclude epochs with artefacts. On average, over the different phases, 4% of the trials were excluded. We never discarded more than 10% of the trials. Independent component analysis was then performed in each subject to remove eye movement and artefact components, and the remaining components were then back-projected to channel space. No trials were discarded during this procedure. A maximum of 2 non-neighbouring sensors were interpolated per participant. We interpolated 2 sensors for 4 participants, 1 sensor for 3 participants and none for the remaining 47. Before computing ERFs we applied baseline correction based on the activity pre-stimulus onset, applied a low-pass filter at 30 Hz and resampled the data at 250Hz. 4.6 MEG Data Analysis We performed univariate and multivariate analyses of the data to get a deeper insight into the relationship between stimulus-bound as well as task-relevant representations and participants’ performance. First, we performed representational similarity analysis using a searchlight at the sensor level to investigate the neural chronometry of the task representations created and how it was related to performance. Second, we computed Event-Related Fields (ERFs) to investigate differences between the groups in generalization. 4.6.1 Representational Similarity Analysis Our experiment used different stimulus configurations, each being a unique combination of either conceptual or spatial features. Here we used RSA to identify the relationship between these configurations and the multivariate MEG (gradiometers) signals as they evolve over time [ 28 ]. We selected the peri-stimulus signal from -500 ms to 1500 ms with respect to stimulus onset. Model Matrices We generated six distinct model matrices utilizing Euclidean distances. These models fall into two main categories: First, the Stimulus-bound Model, the Stimulus-bound Model, reflects distances within each of the two-dimensional spaces defined by screen features like the opacity of sun and water or position (x and y coordinates). Second, the Task-relevant Representation involves computing distances along a single dimension computed based on task-relevance, offering insights into the core aspects of the task structure. These models were constructed for both spatial and conceptual domains, as well as by calculating distances between the conceptual and spatial task configurations in a cross-domain space. Neural Dissimilarity Matrices To construct the neural dissimilarity matrix, we adopted a searchlight approach at the sensor level. This method involved calculating the averaged neural response for each configuration. For each sensor, we selected neighbouring channels within a maximum distance of 40 mm and computed the Spearman correlational distance between all the configurations. Across all time points and sensors, we conducted a correlation analysis between the neural dissimilarity matrix and the model matrices. Statistical significance was assessed non-parametrically at the group level using a cluster-based permutation approach with a cluster-forming threshold of p < 0.05 (two-tailed), and a corrected significance level of p < 0.05 (two-tailed) [ 29 ]. Condition labels were randomly permuted 1000 times, following the default method implemented in MNE. This provides an automatic method for finding significant clusters, corrected for multiple comparisons, that does not depend on a priori selection of time window and channels. 4.7 Artificial neural network modeling 4.7.1 Model architecture We trained feedforward neural network models with a single hidden layer to perform an analogous task to the one used in the behavioral experiments. The neural network had one hundred units in the hidden layer, each with a ReLU activation function. The network had a single output unit, used to signal the category of the stimulus (e.g., whether the stimulus would grow or die in the particular position and conditions shown, as in the experiment). The input to the network was composed of six latent variables, corresponding to the spatial features, the conceptual features, and the context (e.g., the seed identity). The spatial and conceptual feature latent variables could each take on three values. The two contextual variables were constrained to be one-hot. The network received a linearly transformed version of these input variables, x = Mz where z are the latent variables, M is a 6 × 6 matrix, and x are the inputs to the network. The columns of M were selected with correlation structure between the corresponding spatial and conceptual latent variables, such that, where M i is the i th column of M , ||M i || 2 is the length of the i th column of M , and c is the desired level of feature similarity. 4.7.2 Optimization The network was trained for 50 epochs, with a batch size of 100 training examples drawn from a set of 2000 total examples. The parameters were optimized with the Adam optimizer with a learning rate 10 − 3 . 4.7.3 Code The code was written in python, using tensorflow[ 61 ] and numpy[ 62 ]. The code is freely available on github. Acknowledgments We thank Yvonne Wolf-Rosier for her invaluable assistance with participant recruitment and data collection, Lola Kuhn for her support with recruitment, Kerstin Schumer for her exceptional management of the project. M.H. is supported by the Max Planck School of Cognition. C.F.D.’s research is supported by the Max Planck Society, the European Research Council (ERC-CoG GEOCOG 724836), the Kavli Foundation, the Jebsen Foundation, the Centre of Excellence scheme of the Research Council of Norway—Centre for Neural Computation (223262/F50), The Egil and Pauline Braathen and Fred Kavli Centre for Cortical Microcircuits, and the National Infrastructure scheme of the Research Council of Norway—NORBRAIN (197467/F50). Appendix A Supplementary Materials A.1 Stimulus Mappings Download figure Open in new tab Figure 7: S t imulus Mapping The figure depicts the different configurations of conceptual or spatial features presented during each phase per task and group. Every configuration was repeated 8 times in both the training and the testing sessions for a total of 288 trials in the training and 448 per testing session. During the navigation phase, each starting configuration was repeated 10 times for a total of 180 trials. A.2 Behavioral Results A.2.1 Learning Performance Testing Sessions Accuracy was computed as the proportion of correct responses over all trials in each testing session and context (Spatial and Conceptual). We performed two between-subject t-tests to see if general performance in the two groups was different. We found that participants performance was not statistically different during the testing phases (First Testing Session: t(52) = 1.489, p = 0.142; Second Testing Session: t(52) = 1.367, p = 0.177). Download figure Open in new tab Figure 8: G e neral performance and Generalization Panel A shows participants’ accuracies during the training, pre-test and post-test phases. Participants are divided into SameSt and DiffSt group conditions. Panel B shows participants’ performance for old and new stimuli during the testing phases. Participants are divided into SameSt and DiffSt following the color scheme of Panel A. A.2.2 Generalization Performance Testing Sessions We then quantified generalization during the test session by computing the proportion of correct responses for novel configurations that, contrary to old ones, were not presented during training. We performed two 2x2 mixed-design ANOVAs, with between factor the group and within factor the novelty of the stimuli (new or old) on the first and second testing sessions. During the first testing session, we found a significant main effect of novelty (F(1,52) = 44.655, p < 0.001) and interaction (F(1,52) = 5.025, p = 0.029), indicating that participants performed better with old stimuli compared to new ones, but no group main effect (F(1,52) = 2.663, p = 0.108) (See Fig. 8B , Generalization panel). During the post-test, we found no significant difference (Group: F(1,52) = 1.837, p = 0.181; Novelty: F(1,52) = 1.449, p = 0.234; interaction: F(1,52) = 0.059, p = 0.808). These results indicate that participants’ performance for novel stimuli, and thus their generalization, does not differ between the two group conditions (SameSt vs DiffSt). Albeit we do not find differences in generalization, participants’ performance for the old stimuli differs only in the testing session after the training, but not in the last one. A.3 MEG Results Fig. 9 shows the correlation between the neural dissimilarity matrices of the sensors belonging to the significant clusters and the Stimulus-bound model averaged across all stimulus configurations during the training phase per context. Download figure Open in new tab Figure 9: S t imulus -bound Representation during training Each panel shows the results of the clusterpermutation correction of the correlation between stimulus-bound models and the neural dissimilarity matrix. Clusters are color-coded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with one colour. Results are divided into spatial, conceptual and cross-domain spaces for both the SameSt and DiffSt groups. Fig. 10 shows the correlation between the neural dissimilarity matrices of the sensors of the significant clusters and the task-relevant model averaged across all stimulus configurations during the training phase per context. Download figure Open in new tab Figure 10: T a sk -Relevant Representation during training Each panel shows the results of the clusterpermutation correction of the correlation between stimulus-bound models and the neural dissimilarity matrix. X’s indicate no significant clusters were found. Clusters are color-coded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with one colour. Results are divided into spatial, conceptual and cross-domain spaces for both the SameSt and DiffSt groups. Fig. 11 shows the correlation between the neural dissimilarity matrices of the sensors of the significant clusters and the Stimulus-bound model averaged across all stimulus configurations per context during both testing phases. Fig. 12 shows the correlation between the neural dissimilarity matrices of the sensors of the significant clusters and the task-relevant model averaged across all stimulus configurations per context during both testing phases. Download figure Open in new tab Figure 11: S t imulus -bound Representation during test Each panel shows the results of the cluster-permutation correction of the correlation between stimulus-bound models and the neural dissimilarity matrix. Clusters are colorcoded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with one colour. Results are divided into spatial, conceptual and cross-domain spaces for both the SameSt and DiffSt groups. Download figure Open in new tab Figure 12: T a sk -Relevant Representation during test Each panel shows the results of the cluster-permutation correction of the correlation between stimulus-bound models and the neural dissimilarity matrix. Clusters are colorcoded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with one colour. Results are divided into spatial, conceptual and cross-domain spaces for both the SameSt and DiffSt groups. Download figure Open in new tab Figure 13: We divided participants into good and bad performers according to median split. We then computed neural similarity between stimulus-bound representations of the stimuli during the training phase and the crossdomains model. We found a positive significant cluster in the bad performers in the SameSt group and a negative cluster in the good performers in the DiffSt group. Panel A shows the results of the cluster-permutation correction of stimulus-bound representations for cross-domain space during training for participants in the SameSt group. Clusters are colour-coded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with each colour. Results are divided into spatial, conceptual and cross-domain spaces for both the SameSt and DiffSt groups. Panel B shows the results of the cluster-permutation correction of the stimulus-bound representations for cross-domain space during training for participants in the DiffSt group. Clusters are colour-coded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with one colour . Download figure Open in new tab Figure 14: We tested the neural similarity between stimulus-bound representations of the stimuli during the training phase compared to the test phase. We found only a positive significant cluster in the bad performers in the SameSt group. The figure shows the results of the cluster-permutation correction of stimulus-bound representations for crossdomain space comparing the training phase with testing one in the SameSt group. Clusters are colour-coded so that significant sensors, their significant time course and correlation are shown. Each cluster is associated with each colour. Results are divided into spatial, conceptual and cross-domain spaces for both the SameSt and DiffSt groups . References 1. ↵ Niv , Y . Learning task-state representations . Nature Neuroscience 22 , 1544 – 1553 ( 2019 ). OpenUrl CrossRef PubMed 2. ↵ Gurnani , H. & Gajic , N. A. C . Signatures of task learning in neural representations . Current opinion in neurobiology 83 , 102759 ( 2023 ). 3. ↵ Badre , D. , Bhandari , A. , Keglovits , H. & Kikumoto , A . The dimensionality of neural representations for control . Current Opinion in Behavioral Sciences 38 , 20 – 28 ( 2021 ). OpenUrl CrossRef PubMed 4. 4. Vaidya , A. R. , Jones , H. M. , Castillo , J. & Badre , D. Neural representation of abstract task structure during generalization . ELife 10 , e63226 ( 2021 ). 5. Bernardi , S. et al. The geometry of abstraction in the hippocampus and prefrontal cortex . Cell 183 , 954 – 967 ( 2020 ). OpenUrl CrossRef PubMed 6. ↵ Tervo , D. G. R. , Tenenbaum , J. B. & Gershman , S. J . Toward the neural implementation of structure learning . Current opinion in neurobiology 37 , 99 – 105 ( 2016 ). OpenUrl CrossRef PubMed 7. ↵ Braun , D. A. , Mehring , C. & Wolpert , D. M . Structure learning in action . Behavioural brain research 206 , 157 – 165 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 8. ↵ Radulescu , A. , Shin , Y. S. & Niv, Y. Human representation learning . Annual Review of Neuroscience 44 , 253 – 273 ( 2021 ). OpenUrl CrossRef PubMed 9. ↵ Vaidya , A. R. & Badre , D . Abstract task representations for inference and control . Trends in Cognitive Sciences 26 , 484 – 498 ( 2022 ). OpenUrl CrossRef PubMed 10. ↵ Notebaert , W. , Gevers , W. , Verguts , T. & Fias , W . Shared spatial representations for numbers and space: the reversal of the SNARC and the Simon effects . Journal of Experimental Psychology: Human Perception and Performance 32 , 1197 ( 2006 ). OpenUrl CrossRef PubMed Web of Science 11. ↵ Albers , A. M. , Kok , P. , Toni , I. , Dijkerman , H. C. & De Lange , F. P . Shared representations for working memory and mental imagery in early visual cortex . Current Biology 23 , 1427 – 1431 ( 2013 ). OpenUrl CrossRef PubMed 12. ↵ Vinken , K. , Prince , J. S. , Konkle , T. & Livingstone , M. S . The neural code for “face cells” is not face-specific . Science Advances 9 , eadg1736 ( 2023 ). 13. ↵ Huang , Q. & Luo , H . Shared structure facilitates working memory of multiple sequences . Elife 12 , RP93158 ( 2024 ). 14. ↵ Menghi , N. , Kacar , K. & Penny , W . Multitask Learning over Shared Subspaces . PLoS Computational Biology ( 2021 ). 15. ↵ Garner , K. , Lynch , C. R. & Dux , P. E . Transfer of training benefits requires rules we cannot see (or hear) . Journal of Experimental Psychology: Human Perception and Performance 42 , 1148 ( 2016 ). OpenUrl CrossRef PubMed 16. ↵ Decety , J. & Sommerville , J. A . Shared representations between self and other: a social cognitive neuroscience view . Trends in cognitive sciences 7 , 527 – 533 ( 2003 ). OpenUrl CrossRef PubMed Web of Science 17. ↵ Yang , G. R. , Joglekar , M. R. , Song , H. F. , Newsome , W. T. & Wang , X.-J . Task representations in neural networks trained to perform many cognitive tasks . Nature neuroscience 22 , 297 – 306 ( 2019 ). OpenUrl CrossRef PubMed 18. Tomov , M. S. , Schulz , E. & Gershman , S. J . Multi-task reinforcement learning in humans . Nature Human Behaviour 5 , 764 – 773 ( 2021 ). OpenUrl CrossRef PubMed 19. ↵ Johnston , W. J. & Fusi , S . Abstract representations emerge naturally in neural networks trained to perform multiple tasks . Nature Communications 14 , 1040 ( 2023 ). OpenUrl CrossRef PubMed 20. Goudar , V. , Peysakhovich , B. , Freedman , D. J. , Buffalo , E. A. & Wang , X.-J . Schema formation in a neural population subspace underlies learning-to-learn in flexible sensorimotor problemsolving . Nature Neuroscience 26 , 879 – 890 ( 2023 ). OpenUrl CrossRef PubMed 21. ↵ Johnston , W. J. & Fusi , S . Modular representations emerge in neural networks trained to perform context-dependent tasks . bioRxiv , 2024 – 09 ( 2024 ). 22. Wu , C. M. , Schulz , E. , Garvert , M. M. , Meder , B. & Schuck , N. W . Similarities and differences in spatial and non-spatial cognitive maps . PLoS computational biology 16 , e1008149 ( 2020 ). OpenUrl CrossRef 23. ↵ Flesch , T. , Balaguer , J. , Dekker , R. , Nili , H. & Summerfield , C . Comparing continual task learning in minds and machines . Proceedings of the National Academy of Sciences 115 , E10313 – E10322 ( 2018 ). OpenUrl Abstract / FREE Full Text 24. ↵ Lee , W. , Hazeltine , E. & Jiang , J . Decoding task representations that support generalization in hierarchical task . bioRxiv , 2024 – 12 ( 2024 ). 25. ↵ Menghi , N. , Silvestrin , F. , Pascolini , L. & Penny , W . The emergence of task-relevant representations in a nonlinear decision-making task . Neurobiology of Learning and Memory 206 , 107860 ( 2023 ). 26. ↵ Luyckx , F. , Nili , H. , Spitzer , B. & Summerfield , C . Neural structure mapping in human probabilistic reward learning . Elife 8 , e42816 ( 2019 ). OpenUrl CrossRef PubMed 27. ↵ Nau , M. , Schmid , A. C. , Kaplan , S. M. , Baker , C. I. & Kravitz , D. J . Centering cognitive neuroscience on task demands and generalization . Nature Neuroscience , 1 – 12 ( 2024 ). 28. ↵ Kriegeskorte , N. , Mur , M. & Bandettini , P. A . Representational similarity analysis-connecting the branches of systems neuroscience . Frontiers in systems neuroscience , 4 ( 2008 ). 29. ↵ Maris , E. & Oostenveld , R . Nonparametric statistical testing of EEG-and MEG-data . Journal of neuroscience methods 164 , 177 – 190 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 30. ↵ Gramfort , A. et al. MEG and EEG Data Analysis with MNE-Python . Frontiers in Neuroscience 7 , 1 – 13 ( 2013 ). OpenUrl PubMed 31. ↵ Musslick , S. & Cohen , J. D . Rationalizing constraints on the capacity for cognitive control . Trends in Cognitive Sciences 25 , 757 – 775 ( 2021 ). OpenUrl CrossRef PubMed 32. ↵ Garner , K. G. & Dux , P. E . Knowledge generalization and the costs of multitasking . Nature Reviews Neuroscience 24 , 98 – 112 ( 2023 ). OpenUrl CrossRef PubMed 33. ↵ Mill , R. D. & Cole , M. W . Neural representation dynamics reveal computational principles of cognitive task learning . bioRxiv , 2023 – 06 ( 2023 ). 34. Libby , A. & Buschman , T. J . Rotational dynamics reduce interference between sensory and memory representations . Nature neuroscience 24 , 715 – 726 ( 2021 ). OpenUrl CrossRef PubMed 35. Losey , D. M. et al. Learning leaves a memory trace in motor cortex . Current Biology ( 2024 ). 36. ↵ Flesch , T. , Saxe , A. & Summerfield , C . Continual task learning in natural and artificial agents . Trends in Neurosciences 46 , 199 – 210 ( 2023 ). OpenUrl CrossRef PubMed 37. Bhandari , A. , Keglovits , H. , Chicklis , E. & Badre , D . Task structure tailors the geometry of neural representations in human lateral prefrontal cortex . bioRxiv , 2024 – 03 ( 2024 ). 38. Guise , K. G. & Shapiro , M. L . Medial prefrontal cortex reduces memory interference by modifying hippocampal encoding . Neuron 94 , 183 – 192 ( 2017 ). OpenUrl CrossRef PubMed 39. Weber , J. et al. Subspace partitioning in the human prefrontal cortex resolves cognitive interference . Proceedings of the National Academy of Sciences 120 , e2220523120 ( 2023 ). OpenUrl CrossRef PubMed 40. ↵ Kikumoto , A. , Shibata , K. , Nishio , T. & Badre , D. Practice Reshapes the Geometry and Dynamics of Task-tailored Representations . bioRxiv ( 2024 ). 41. ↵ Mosha , N. & Robertson , E. M . Unstable memories create a high-level representation that enables learning transfer . Current Biology 26 , 100 – 105 ( 2016 ). OpenUrl CrossRef PubMed 42. Barron , H. C . Neural inhibition for continual learning and memory . Current opinion in neurobiology 67 , 85 – 94 ( 2021 ). OpenUrl CrossRef PubMed 43. Dekker , R. B. , Otto , F. & Summerfield , C . Curriculum learning for human compositional generalization . Proceedings of the National Academy of Sciences 119 , e2205582119 ( 2022 ). OpenUrl CrossRef PubMed 44. Beukers , A. O. et al. Blocked training facilitates learning of multiple schemas . Communications Psychology 2 , 28 ( 2024 ). 45. ↵ Kerŕen, C., Bramao , I. , Hellerstedt , R. & Johansson , M. Strategic retrieval prevents memory interference: The temporal dynamics of retrieval orientation . Neuropsychologia 154 , 107776 ( 2021 ). 46. ↵ Bustos , B. , Mordkoff , J. T. , Hazeltine , E. & Jiang , J . Task switch costs scale with dissimilarity between task rules . Journal of Experimental Psychology: General ( 2024 ). 47. Yang , G. et al. Dorsolateral prefrontal activity supports a cognitive space organization of cognitive control . Elife 12 , RP87126 ( 2024 ). 48. ↵ Webb , T. W. , et al. The relational bottleneck as an inductive bias for efficient abstraction . Trends in Cognitive Sciences ( 2024 ). 49. ↵ Gardenfors , P. Conceptual spaces: The geometry of thought ( MIT press , 2004 ). 50. Behrens , T. E. , et al. What is a cognitive map? Organizing knowledge for flexible behavior . Neuron 100 , 490 – 509 ( 2018 ). OpenUrl CrossRef PubMed 51. Bellmund , J. L. , Gärdenfors, P., Moser, E. I. & Doeller, C. F. Navigating cognition: Spatial codes for human thinking . Science 362 ( 2018 ). 52. ↵ Kaplan , R. , Schuck , N. W. & Doeller , C. F . The role of mental maps in decision-making . Trends in Neurosciences 40 , 256 – 259 ( 2017 ). OpenUrl CrossRef PubMed 53. ↵ Viganò, S., Rubino, V., Di Soccio, A., Buiatti, M. & Piazza, M . Grid-like and distance codes for representing word meaning in the human brain . NeuroImage 232 , 117876 ( 2021 ). 54. Theves , S. , Fernandez , G. & Doeller , C. F . The hippocampus encodes distances in multidimensional feature space . Current Biology 29 , 1226 – 1231 ( 2019 ). OpenUrl CrossRef PubMed 55. Nitsch , A. , Garvert , M. M. , Bellmund , J. L. , Schuck , N. W. & Doeller , C. F . Grid-like entorhinal representation of an abstract value space during prospective decision making . Nature Communications 15 , 1198 ( 2024 ). OpenUrl CrossRef PubMed 56. ↵ Barnaveli , I. , Vigano , S. , Reznik , D. , Haggard , P. & Doeller , C. F . Hippocampal-entorhinal cognitive maps and cortical motor system represent action plans and their outcomes . bioRxiv , 2024 – 07 ( 2024 ). 57. ↵ Brainard , D. H. The psychophysics toolbox . Spatial Vision 10 , 433 – 436 ( 1997 ). OpenUrl CrossRef PubMed Web of Science 58. ↵ Viganò, S., Bayramova, R., Doeller, C. F. & Bottini, R . Spontaneous eye movements reflect the representational geometries of conceptual spaces . Proceedings of the National Academy of Sciences 121 , e2403858121 ( 2024 ). OpenUrl CrossRef PubMed 59. Loetscher , T. , Bockisch , C. J. , Nicholls , M. E. & Brugger , P . Eye position predicts what number you have in mind . Current Biology 20 , R264 – R265 ( 2010 ). OpenUrl CrossRef PubMed 60. ↵ Hesse , P. N. & Bremmer , F . The SNARC effect in two dimensions: Evidence for a frontoparallel mental number plane . Vision research 130 , 85 – 96 ( 2017 ). OpenUrl CrossRef PubMed 61. ↵ Martın Abadi et al. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems Software available from tensorflow.org. 2015. https://www.tensorflow.org/ . 62. ↵ Harris , C. R. et al. Array programming with NumPy . Nature 585 , 357 – 362 . doi: 10.1038/s41586-020-2649-2 (Sept. 2020 ). OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted January 20, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following The effects of task similarity during representation learning in brains and neural networks Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share The effects of task similarity during representation learning in brains and neural networks N. Menghi , W. J. Johnston , S. Vigano’ , M. A. B. Hinrichs , B. Maess , S. Fusi , C. F. Doeller bioRxiv 2025.01.20.633896; doi: https://doi.org/10.1101/2025.01.20.633896 Share This Article: Copy Citation Tools The effects of task similarity during representation learning in brains and neural networks N. Menghi , W. J. Johnston , S. Vigano’ , M. A. B. Hinrichs , B. Maess , S. Fusi , C. F. Doeller bioRxiv 2025.01.20.633896; doi: https://doi.org/10.1101/2025.01.20.633896 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Neuroscience Subject Areas All Articles Animal Behavior and Cognition (7622) Biochemistry (17650) Bioengineering (13871) Bioinformatics (41880) Biophysics (21424) Cancer Biology (18566) Cell Biology (25461) Clinical Trials (138) Developmental Biology (13365) Ecology (19866) Epidemiology (2067) Evolutionary Biology (24290) Genetics (15590) Genomics (22475) Immunology (17713) Microbiology (40328) Molecular Biology (17148) Neuroscience (88473) Paleontology (666) Pathology (2827) Pharmacology and Toxicology (4816) Physiology (7635) Plant Biology (15114) Scientific Communication and Education (2044) Synthetic Biology (4286) Systems Biology (9815) Zoology (2268)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.