Gene expression changes in long-term memory unlikely to replicate in the long term

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 17,681 characters · extracted from preprint-html · click to expand
Gene expression changes in long-term memory unlikely to replicate in the long term | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Contradictory Results Gene expression changes in long-term memory unlikely to replicate in the long term View ORCID Profile Eran A. Mukamel , Zhaoxia Yu doi: https://doi.org/10.1101/2024.07.22.604349 Eran A. Mukamel 1 Department of Cognitive Science, University of California , San Diego Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Eran A. Mukamel For correspondence: emukamel{at}ucsd.edu Zhaoxia Yu 2 Department of Statistics, University of California , Irvine Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Data/Code Preview PDF SUMMARY Identifying the cellular effects of memory-forming experiences on neurons which enable subsequent memory recall is a fundamental aim of neuroscience. The search for the engram could benefit from single cell RNA sequencing, which can estimate the mRNA expression of all genes in large samples of individual brain cells from animals exposed to specific experiences. A recent study used spatial transcriptomics and single cell RNA-sequencing to identify “transcriptional signatures in subpopulations of neurons and astrocytes that were memory-specific and persisted for weeks” 1 . However, because the authors did not account for multiple statistical comparisons 2 and instead used an “unadjusted” threshold for statistical significance, the reported findings are likely dominated by false positives 3 . Moreover, the statistical analysis treated individual cells as independent samples without accounting for correlations across cells derived from the same biological tissue sample. Reanalysis of the study’s data using appropriate, widely accepted statistical procedures, identifies no significant differentially expressed genes. This suggests the data do not support the author’s claim to have identified cell type-specific transcriptional signatures of memory in the mouse basolateral amygdala. MAIN TEXT The risk of false positives due to multiple comparisons in large-scale genomic studies has been recognized for many years 2 . When testing the effect of a treatment on thousands of genes, around 5% of the tested genes are expected to pass an unadjusted significance threshold (p<0.05) even in the absence of any true effect. In the current study, the authors tested 3,350 genes for a significant difference of expression due to fear and recall experiences within a specific group of activated “engram” neurons (TRAPed BlaIn.Gpr88 neurons; Fig. 2e of Sun et al.). They applied a nominal (unadjusted) p-value threshold (p 1.75) and specificity for the fear/recall experience. They reported 32 genes associated with remote memory in the Gpr88 expressing inhibitory neurons, and 107 total genes passing “stringent criteria” across 6 types of neurons. Notably, the unadjusted threshold they applied would be expected to produce ∼160 false positives (5% of 3,350) in a single cell type, raising the question of whether the reported effects of memory are statistically valid. We focus on the main finding reported in the paper, which is that a specific population of GABAergic “engram” neurons (TRAPed, defined by expression of tdT) exhibited a pattern of up- and down-regulation of several hundred genes that was specifically induced by fear and recall (FR) experiences (see Methods). The authors identified these genes by comparing the expression of genes in FR mice with control mice that did not experience fear conditioning (no-fear, NF). Here we focused on the main analysis of a population of inhibitory neurons in the BLA expressing Gpr88 (BlaIn.Gpr88), but our findings apply equally to the other cell types which were analyzed in the same way. We first verified that, consistent with the original paper, analyzing the data with no adjustment for multiple comparisons yields 253 nominally significant genes (p1.75 ( Fig. 1a ). The 107 strictly filtered genes described by the authors are presumably a subset of these genes which result from applying further criteria, such as excluding genes which were also differentially expressed in non-TRAPed (tdT–) neurons. This confirms that we correctly interpreted the dataset and successfully replicated the reported analysis. Download figure Open in new tab Figure 1: Failing to adjust for multiple comparisons leads to spurious detection of differential expression. a-d , Volcano plots showing the difference in mRNA expression in TRAPed BLA.Int.Gpr88 cells between mice exposed to fear and recall (FR) vs. no-fear control (NF), as in Fig. 2e of the orignal paper 1 . The x-axis shows the fold-change, and the y-axis shows the statistical significance assessed by unadjusted p-value (a,c) or using the false discovery rate (FDR) adjusted p-value (b,d) (two-sided Mann-Whitney U rank-based test). Grey dots show all tested genes, and red dots show the significant genes identified in Fig. 2e of the original paper. a , b Analysis treating individual cells as independent samples (n=70 cells). c , d Analysis using pseudobulk expression profiles from each animal as an independent sample (n=11 animals; 4 FR, 7 NF). e , Number of significant genes using the original data (red) or after shuffling the treatment labels (FR, NF) across cells (blue bars, 1000 shuffles; error bar shows 25th-75th percentile range). The standard practice for controlling false positives in large-scale genomic studies is the false discovery rate (FDR), which aims to control the expected proportion of false positives among the detected significant genes. In practice, FDR-adjusted p-values are often computed to account for multiple comparisons 2 , 4 . Indeed, a closely related paper from the same laboratories used FDR correction to identify memory-related gene expression changes in the medial prefrontal cortext 5 . Using adjusted p-values at a specified FDR of 0.05 ensures that no more than ∼5% of the reported significant results will be false discoveries. When we applied the Benjamini-Hochberg FDR procedure, we found no genes with FDR<0.05 or <0.10; the smallest FDR was 0.18 ( Fig. 1b ). A second major problem with the statistical analysis is the treatment of individual cells as independent samples, without accounting for the correlation between cells derived from the same animal. Differences among individuals are a critical source of variability in biological data 6 , 7 . By treating single cells as independent samples, the statistical analysis ignores the within-sample dependence and leads to overconfident results. As a consequence, the findings will have poor generalizability to other mice treated in the same way, failing to demonstrate bona fide effects of remote memory on engram neurons. The most reliable approach for analyzing differential expression with single cell RNA-seq data handles inter-individual variability by using pseudobulk samples that combine data from all cells observed from each biological sample 8 . Alternatively, group differences in single cell RNA-seq data can be analyzed using mixed models that include random effects to account for individual variability 7 , 9 . We reanalyzed the data using pseudobulk expression profiles (n=11 mice) rather than individual cells (n=70) as the unit of analysis ( Fig. 1c,d ). This analysis identified only 21 genes with a nominally significant (unadjusted) p value (p<0.05). The FDR adjusted p-values for all the genes were 1. The likelihood that most of the observed expression changes are spurious false positives can be directly demonstrated by analyzing shuffled data. We randomly permuted the treatment labels (NF, FR) of the 70 cells 1,000 times, and applied each method of differential expression analysis to each permutation of the data. The number of genes passing the author’s threshold (p<0.05) for the shuffled data (170±75 mean ± s.d.) was close to the expected number (5% of 3,574 genes = 178) ( Fig. 1e ). Notably, 12.3% of shuffles resulted in a larger number of nominally significant genes than the original data (253). By contrast, the FDR adjusted tests led to no false positives across the shuffles, as expected. Progress in understanding the relationship between cognitive memory and experience-dependent changes in brain gene expression requires rigor and care in the analysis of complex single-cell transcriptomic data. By neglecting best practices such as adjustment for multiple comparisons and accounting for dependence due to multiple cells from the same animal, research based on RNA sequencing risks an accumulation of irreproducible findings 3 , 7 . Methods Processed gene expression data from the original study were downloaded from NCBI/GEO at accession GSE246147. The identity of the 70 amygdala cells which were annotated as “TRAPed BLA.Int.Gpr88” (i.e. expressing tdT and Gpr88) was kindly provided by Wenfei Sun. The identity of the animal from which each cell derived was extracted from the first three characters of the cell id. The cells were unevenly distributed across animals, with 39/70 cells coming from two FR animals, while 7 of the 11 animals contributed ≤3 cells each. We tested differences in expression using a Mann-Whitney U rank test, implemented in the Python packaged scipy.stats.mannwhitneyu. We included 3,574 genes which had normalized expression (counts per million, CPM) >600, in order to approximately match the analysis in Fig. 2e of the original study. Note that Fig. 2e of Sun et al. states “Total = 3,350 DEGs,” but we interpret this as referring to the number of genes tested rather than the number of significant differentially expressed genes. We calculated false discovery rate (FDR)-adjusted p-values using the Benjamini-Hochberg method 4 implemented in Python statsmodels.stats.multitest.fdrcorrection. To assess the rate of false positives, we randomly permuted the treatment label (NF and FR) of the 70 cells and repeated the Mann-Whitney U-test and FDR correction. We repeated the random shuffling 1,000 times to assess the mean and variability of the number of nominally significant genes under the null hypothesis. A notebook reproducing the analyses presented here is available at https://github.com/mukamel-lab/SunQuake_Nature2024_Reanalysis and will be deposited at Zenodo upon publication. Footnotes https://github.com/mukamel-lab/SunQuake_Nature2024_Reanalysis References 1. ↵ Sun , W. et al. Spatial transcriptomics reveal neuron-astrocyte synergy in long-term memory . Nature ( 2024 ) doi: 10.1038/s41586-023-07011-6 . OpenUrl CrossRef 2. ↵ Storey , J. D. & Tibshirani , R. Statistical significance for genomewide studies . Proc. Natl. Acad. Sci. U. S. A . 100 , 9440 – 9445 ( 2003 ). OpenUrl Abstract / FREE Full Text 3. ↵ Mukamel , E. A. Multiple Comparisons and Inappropriate Statistical Testing Lead to Spurious Sex Differences in Gene Expression . Biological psychiatry vol. 91 e1 – e2 ( 2022 ). OpenUrl 4. ↵ Benjamini , Y. & Hochberg , Y. Controlling the false discovery rate: a practical and powerful approach to multiple testing . J. R. Stat. Soc. Series B Stat. Methodol . 289 – 300 ( 1995 ). 5. ↵ Chen , M. B. , Jiang , X. , Quake , S. R. & Südhof , T. C. Persistent transcriptional programmes are associated with remote memory . Nature 587 , 437 – 442 ( 2020 ). OpenUrl CrossRef PubMed 6. ↵ Aarts , E. , Verhage , M. , Veenvliet , J. V. , Dolan , C. V. & van der Sluis , S. A solution to dependency: using multilevel analysis to accommodate nested data . Nat. Neurosci . 17 , 491 – 496 ( 2014 ). OpenUrl CrossRef PubMed 7. ↵ Yu , Z. et al. Beyond t test and ANOVA: applications of mixed-effects models for more rigorous statistical analysis in neuroscience research . Neuron 110 , 21 – 35 ( 2022 ). OpenUrl 8. ↵ Squair , J. W. et al. Confronting false discoveries in single-cell differential expression . Nat. Commun . 12 , 5692 ( 2021 ). OpenUrl CrossRef PubMed 9. ↵ Finak , G. et al. MAST: a flexible statistical framework for assessing transcriptional changes and characterizing heterogeneity in single-cell RNA sequencing data . Genome Biol . 16 , 278 ( 2015 ). OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted July 24, 2024. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Gene expression changes in long-term memory unlikely to replicate in the long term Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Gene expression changes in long-term memory unlikely to replicate in the long term Eran A. Mukamel , Zhaoxia Yu bioRxiv 2024.07.22.604349; doi: https://doi.org/10.1101/2024.07.22.604349 Share This Article: Copy Citation Tools Gene expression changes in long-term memory unlikely to replicate in the long term Eran A. Mukamel , Zhaoxia Yu bioRxiv 2024.07.22.604349; doi: https://doi.org/10.1101/2024.07.22.604349 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Neuroscience Subject Areas All Articles Animal Behavior and Cognition (7653) Biochemistry (17763) Bioengineering (13944) Bioinformatics (42101) Biophysics (21509) Cancer Biology (18667) Cell Biology (25592) Clinical Trials (138) Developmental Biology (13413) Ecology (19969) Epidemiology (2067) Evolutionary Biology (24393) Genetics (15647) Genomics (22582) Immunology (17791) Microbiology (40524) Molecular Biology (17222) Neuroscience (88860) Paleontology (667) Pathology (2848) Pharmacology and Toxicology (4841) Physiology (7670) Plant Biology (15182) Scientific Communication and Education (2048) Synthetic Biology (4312) Systems Biology (9843) Zoology (2274)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2024) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00