Multi-season analysis reveals hundreds of drought-responsive genes in sorghum

preprint OA: closed CC-BY-NC-4.0
📄 Open PDF Full text JSON View at publisher
Full text 78,319 characters · extracted from preprint-html · click to expand
Multi-season analysis reveals hundreds of drought-responsive genes in sorghum | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Multi-season analysis reveals hundreds of drought-responsive genes in sorghum Benjamin Cole , Wenxin Zhang , Junming Shi , Hao Wang , Christopher Baker , Nelle Varoquaux , Joy Hollingsworth , Robert Hutmacher , Jeffery Dahlberg , Grady Pierroz , Kerrie W. Barry , Vasanth Singan , Yuko Yoshinaga , Christopher Daum , Matthew Zane , Matthew Blow , Ronan O’Malley , Shengqiang Shu , Jerry W. Jenkins , John T. Lovell , Jeremy Schmutz , John W. Taylor , Devin Coleman-Derr , Axel Visel , Peggy G. Lemaux , Elizabeth Purdom , John P. Vogel doi: https://doi.org/10.1101/2025.06.27.662006 Benjamin Cole 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Wenxin Zhang 2 Division of Biostatistics, School of Public Health, University of California , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Junming Shi 2 Division of Biostatistics, School of Public Health, University of California , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hao Wang 2 Division of Biostatistics, School of Public Health, University of California , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Christopher Baker 3 Department of Plant and Microbial Biology, University of California , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Nelle Varoquaux 2 Division of Biostatistics, School of Public Health, University of California , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Joy Hollingsworth 4 Kearney Agricultural Research and Extension, University of California , Parlier, CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Robert Hutmacher 5 Department of Plant Sciences, University of California , Davis, CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jeffery Dahlberg 4 Kearney Agricultural Research and Extension, University of California , Parlier, CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Grady Pierroz 3 Department of Plant and Microbial Biology, University of California , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kerrie W. Barry Find this author on Google Scholar Find this author on PubMed Search for this author on this site Vasanth Singan 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yuko Yoshinaga 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Christopher Daum 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Matthew Zane 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Matthew Blow 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ronan O’Malley 6 University of Chicago, Department of Human Genetics , Chicago, IL Find this author on Google Scholar Find this author on PubMed Search for this author on this site Shengqiang Shu 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jerry W. Jenkins 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site John T. Lovell 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA 7 HudsonAlpha Institute for Biotechnology , Huntsville, AL Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jeremy Schmutz 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA 7 HudsonAlpha Institute for Biotechnology , Huntsville, AL Find this author on Google Scholar Find this author on PubMed Search for this author on this site John W. Taylor 3 Department of Plant and Microbial Biology, University of California , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Devin Coleman-Derr 3 Department of Plant and Microbial Biology, University of California , Berkeley CA 8 Plant Gene Expression Center, USDA-ARS , Albany CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Axel Visel 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Peggy G. Lemaux 3 Department of Plant and Microbial Biology, University of California , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: jpvogel{at}lbl.gov Elizabeth Purdom 9 Department of Statistics, University of California , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: jpvogel{at}lbl.gov John P. Vogel 1 DOE-Joint Genome Institute, Lawrence Berkeley National Laboratory , Berkeley CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: jpvogel{at}lbl.gov Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Persistent drought affects global crop production and is becoming more severe in many parts of the world in recent decades. Deciphering how plants respond to drought will facilitate the development of flexible mitigation strategies. Sorghum bicolor L. Moench (sorghum), a major cereal crop and an emerging bioenergy crop, exhibits remarkable resilience to drought. To better understand the molecular traits that underlie sorghum’s remarkable drought tolerance, we undertook a large-scale sorghum gene expression profiling effort, totaling nearly 1,500 transcriptome profiles, across a 3-year field study with replicated plots in California’s Central Valley. This study included time-resolved gene expression data from roots and leaves of two sorghum genotypes, BTx642 and RTx430, with different pre-flowering and post-flowering drought-tolerance adaptations under control and drought conditions. Quantification of genotype-specific drought tolerance effects was enabled by de novo sequencing, assembly, and annotation of both BTx642 and RTx430 genomes. These reference-quality genomes were used to construct a pan-gene set for characterizing conserved and genotype-specific expression. By integrating time-resolved transcriptomic responses to drought in the field across three consecutive years, we identified a set of drought-responsive genes that responded similarly in all three years of our field study. This expansive dataset represents a unique resource for sorghum and drought research communities and provides a methodological framework for the integration of multi-faceted time-resolved transcriptomic datasets. Introduction To meet the ever-growing global demand for resources (including food, feed and energy), there is an urgent need to explore alternative and renewable feedstocks while limiting the negative impacts of their production [ 1 ]. Bioenergy is an attractive source of renewable fuels [ 2 ], with bioethanol production already ubiquitous in the energy market. However, biomass production costs, yields, and conversion efficiencies still need to be substantially improved for cellulosic-derived bioenergy to be a viable alternative to current, dominant energy sources. This improvement would ideally incorporate crop resilience under extreme growing conditions. Drought, for example, poses especially large challenges to biomass crop production, resulting in up to 24-27% losses in yield [ 3 ]. Sorghum [ Sorghum bicolor (L.) Moench] is an important cereal crop for much of the developing world, a large component of animal feed for developed nations, and a promising bioenergy resource [ 4 ]. This is in part due to its high resilience to poor growing conditions, including drought and marginal soils, as well as its ease of cultivation [ 5 ]. Its relatively high yield under drought conditions is of particular interest, as droughts have become more frequent and more intense recently in many parts of the world [ 6 ]. Despite the ability of sorghum to persist through drought imposed at various life stages ( e.g., pre-flowering and post-flowering), severe drought in most cases reduces grain production and overall biomass growth [ 7 ]. Taken together, these features, combined with the relatively small, diploid sorghum genome, large germplasm collections, and efficient transformation and editing methods [ 8 , 9 ], make sorghum an excellent model for studying drought tolerance in agricultural systems. Recently, a large consortium set out to profile genome-wide responses to sorghum plants experiencing drought using advanced ‘omics technologies [ 10 – 22 ]. For this project sorghum plants were grown in replicated agricultural plots located at the Kearney Agricultural Research and Extension Center of the University of California’s Division of Agriculture and Natural Resources (UC-ANR) in Parlier, CA. This location was chosen due to its well-drained sandy-loam soil, high summer temperatures that reflect future climatic conditions and, most importantly, the near-total lack of precipitation during the summer months that make it possible to tightly control the amount of water applied. Transcriptomic data from a single year [ 11 ] of this experiment revealed that close to 30% of the whole transcriptome was altered by drought in two sorghum cultivars: BTx642, a stay-green [ 23 ] variety that retains photosynthetic potential during severe post-flowering drought stress, and RTx430 that lacks this ability. One of the notable advantages of this study included conducting the field study under “real” drought conditions, which greenhouse or in vitro settings simply cannot reproduce [ 24 ]. In addition to leaves, this study also concurrently sampled the biotic environment of sorghum roots (the rhizosphere), the microorganisms that live in the soil near plant roots and interact with the plant and react to drought conditions [ 13 , 16 ]. While highly informative, this study did not consider year-to-year variability in weather and soil conditions, which can be large and undoubtedly impact drought severity, microbial community composition, as well as confounding environmental stresses (e.g., heat). Sampling across multiple growing seasons and/or in different locations can provide a more robust evaluation of plant performance during drought. Moreover, in the first report transcriptomic data was only mapped to the BTx623 reference genome [ 25 , 26 ]. Using a single reference genome for transcriptional analysis of different genotypes introduces several potential artifacts. First, pan-genomic analysis of several plants has revealed extensive intraspecific presence-absence variation [ 27 ]. Thus, using a reference genome from a different genotype will completely miss genes that are not found in both genotypes. Second, genes in their degree of DNA similarity, which will affect measured expression levels due to the accuracy of read mapping. Finally, copy number variation will introduce errors into expression measurements. While the overall errors due to these factors may be relatively small on a genome-wide basis, they will be exaggerated for divergent genes which in some cases may be the most important genes determining genotype-specific phenotypes. Thus, a pan-genomic approach is necessary to fully understand the molecular shifts that occur during drought across multiple genotypes, even if they are closely related [ 28 ]. Here, we incorporate two additional years of data into the transcriptome analysis of field-droughted sorghum [ 11 ] and use a comparative genomics approach to more accurately characterize gene expression in each of the genotypes used. We also introduce additional time points around the water transitions (i.e., turning water back on after pre-flowering drought, and turning water off during post-flowering drought) to capture rapid responses following the end or onset of drought. Using this data, we defined a robust and comprehensive transcriptomic resource that includes highlighting loci that are highly sensitive to drought. Results Drought treatments and sampling As results from field studies strongly depend on variable soil and weather conditions, we decided to add two additional seasons of sampling on top of the initial season previously reported[ 11 ] to complete a 3-year drought trial. The present study specifically included sampling of two tissue types (root and leaf), two genotypes (RTx430 and BTx642), and three watering regimes (well-watered, pre-flowering drought and post-flowering drought), with weekly sampling across 17 weeks in a matrixed experimental design conducted on at UC-ANR Kearney Agricultural Research and Extension Center plots in the California Central Valley (Parlier, CA) ( Fig. 1 ). For both types of drought, the transition in watering status occurred 8 weeks after planting (Day 57), with pre-flowering drought plots receiving a full water treatment for the first time 7 weeks after planting, while regular watering ceased for post-flowering droughted plots at that time. Compared to the first year’s sampling procedure, we increased sampling density in Years 2 and 3, above the previous once-per-week regime, around the drought transition (8 hours, 1 and 3 days after watering resumption for pre-flowering plots, 2, 5 days after stopping watering for post-flowering plots). This allowed us to gain finer-scale resolution of transcriptomic changes after the resumption of watering after pre-flowering drought (Pre-flowering Drought Recovery) or after the onset of post-flowering drought. In total, we employed RNA-seq to characterize the transcriptomes from 1,476 samples. Download figure Open in new tab Figure 1: Experimental Overview. Samples were taken weekly each year, starting 7 days after germination. Blue regions in this diagram indicate periods of watering for each condition, while brown regions indicate periods of drought (no watering). Denser sampling occurred around the watering (and flowering) transition. Dots are colored according to the tissue and genotype sampled for a given week. A comparative genomic approach for transcriptome analysis While using a single genomic reference for a multi-genotype study simplifies the analysis of gene expression variation across multiple genotypes by ignoring gene presence/absence and copy number variation, genotypic differences important for the array of phenotypes observed, especially during drought ( e.g., stay-green), that are encoded by genotype-specific alleles, or copy-number variation would be missed. To address these shortcomings, we generated de novo genome assemblies and annotations for both BTx642 and RTx430 genotypes using a combination of shotgun DNA sequence data, HiC data, RNA-seq data from the 3-year EPICON time course survey (current study), as well as Iso-seq data derived from total RNA extracted from greenhouse-grown leaf, stem, root, callus, and derived callus tissue. This resulted in chromosome-scale genome assemblies consisting of 10 main chromosomes as well as 126 and 165 small unplaced contigs (for BTx642 and RTx430 assemblies, respectively; Table S1 ). Both genomes are available through the Phytozome database ( https://phytozome-next.jgi.doe.gov/ )[ 29 ]. We then generated a synteny-guided pan-gene set consisting of the two EPICON strains in addition to the sorghum var. BTx623 reference genome ( Table S2 ). This pan-gene set contained 41,607 pangenes, 25,964 of them (62.4%) shared between all three genotypes ( Fig. 2 ). When comparing two genotypes, pangenes can be described as either 1:1 (one syntenic copy per genotype), 1:M (one copy in one genotype, multiple copies in the other), M:M (multiple syntenic copies in both genotypes), or 1-0/M-0 (gene uniquely present in only one genotype). While the genomes of BTx642 and RTx430 are highly similar and syntenic ( Fig. 2B,C ) , our approach identified several thousand genes that were not present in one or the other genotype, and others with copy number variation. While this could be due to inaccurate or incomplete annotations for either genome, it may represent biological differences that could underlie the phenotypic differences of these genotypes to drought ( Fig. 2A ). Download figure Open in new tab Figure 2: Genome similarity between three sorghum genotypes. A) UpSet plot showing numbers of genes having 1:1 relationships between the three genotypes. Dot indicates the overlap category among genes for genotypes shown on bottom left. C) Riparian plot showing high degree of synteny in gene content between three sorghum cultivars, RTx430 (top), BTx642 (middle), and BTx623 (bottom; reference). Using this pan-gene set, we quantified transcript abundance for each sample by first mapping reads from all libraries to their corresponding reference genome. We then summarized gene expression for all pangenes (see Methods) as the sum of aligned counts for all composite genes within a pangene for each genotype. We reasoned that pangenes with multiple genes from a single genotype likely represent tandem duplication events with little functional difference. Thus, expression from any gene copy represents increased abundance in one functional gene product, making the sum of expression an appropriate summary metric. In addition to pangene expression profiles, we also compiled “genotype-centered” expression profiles for all genes in each individual genotype. Our complete dataset consisted of 1,476 gene expression profiles of 51,865 expressed, protein-coding genes (annotated genes from either genotype, including homologs), consolidated into 23,545 expressed pangenes with representatives from both genotypes. Of these, we identified 16,358 pangenes (69.5%) that represented 1:1 orthologs. We used principal component analysis (PCA) of these 1:1 orthologs across all samples to assess the major sources of variation. We found 6 principal components representing over 85% of the variance among expressed and variable genes. These principal components apparently separated expression profiles largely by tissue, genotype, and plant age ( Fig. S1 ). Download figure Open in new tab Figure S1: Variation between datasets. PCA was conducted with all transcriptomes, with the first six principal components plotted. Identification of a core set of genes impacted by drought We set out to identify genes that are differentially expressed upon drought stress consistently across years, conditions, and between genotypes, thus representing a robust drought-responsive transcriptome. To accomplish this, we used as a starting point the approach of Varoquaux et al. (2019), which modeled normalized expression as a set of smooth splines through sampling time (the “split-spline” approach, see Methods). We then contrasted expression models from droughted plants with those from well-watered plants. Here, we defined a gene as differentially expressed (DE) if it exhibited a FDR-controlled q-value less than 0.05 (drought vs. control across the entire timecourse in Year 2 and Year 3) and an estimated log 2 -fold change greater than 1 (drought vs. control across the time interval corresponding to pre- or post-flowering drought treatment). We repeated this analysis for each sampling year independently (“Year1”, “Year2”, “Year3”) as well as for all sampling years combined (“Year123”), aligning the time series each year by the date of anthesis (and watering transition). Through this analysis, we identified 8,503 pangenes as differentially expressed in at least one year (or all three years combined), in either genotype or drought condition, representing over 35% of the expressed pangenome. Overall, expression differences were consistent in pairwise comparisons of each year ( Fig. S2 ). Download figure Open in new tab Figure S2: Pairwise comparison of log-fold change in gene expression by year. Shown are log2-fold changes of each gene in pairwise comparisons across years. Left, Year1 vs. Year2; middle, Year1 vs. Year3; Right, Year2 vs. Year3. Color scale represents point density (2-dimensional binning). Our underlying hypothesis was that, despite the meticulous care taken to execute a reproducible drought stress year-to-year and the use of plot replication in our field study, only a subset of drought DE genes would be reproducibly and differentially expressed across multiple years due to variation in growing conditions (e.g., soil effects, temperature, drought severity, etc.). Confirming our hypothesis, we observed a wide variation in the number of DE genes identified, depending on the sampling year ( Fig. 3 ; Fig. S3 ), with only half (5,590) of the drought DE gene set being identified as DE when all three datasets are combined and analyzed together. Particularly striking was the number of genes identified as DE only in Year1. While a large number of factors may account for this phenomenon, one potential explanation is that the plot location used for Years 2-3 was different from that used in Year 1. This might result in differences in soil composition. Weather or other factors might also influence DE between years, however we did not identify a likely covariate that was consistent with our observations. For further analyses, we used DE and log-fold change estimates from a combined (Year123) analysis. Download figure Open in new tab Figure S3: DE genes recovered from each condition per year. Shown are UpSet plots describing set intersections of DE genes identified for each condition/year combination. Individual dots represent DE genes unique to a particular year. Connected dots represent DE genes that are common to specified years. This is an expanded set of comparisons, similar to Fig. 3 . Download figure Open in new tab Figure 3: DE genes recovered from each condition per year (calculated for each year independently). Shown are selected UpSet plots describing set intersections of DE genes identified for each condition/year combination (full set of plots shown in Fig. S3 ). Individual dots represent DE genes unique to a particular year. Connected dots represent DE genes that are common to specified years. We hypothesized that a set of genes exhibits more robust/reproducible expression changes during drought across both genotypes when considering data from all three years combined. This more heavily filtered set might enrich for genes that are less prone to variable factors, but are consistently responsive to drought. To do this, we exclusively classified genes based on whether they were differentially expressed in the same direction in both genotypes (“Core”) under Preflowering (1203 pangenes), Postflowering (703 pangenes), or during Preflowering drought recovery (“Recovery”; 935 pangenes), considering all three years of data together (though independently analyzing root and leaf data, as principal component analysis indicated this was a large source of variation in our data, Fig. S1 ). Genes assigned to the “Recovery” category did not meet the DE thresholds to classify them as drought responsive ( Fig. 4 ), though we reason they are likely important for sorghum’s overall drought response. Interestingly, a comparatively small set of genes (Core “Drought” Genes, 726 pangenes, Table S3 , Table S4 ) were found to be DE in the same direction (i.e., up- or down-regulated) in both genotypes and under both pre- and postflowering drought conditions, representing a set of universal drought-responsive genes. Download figure Open in new tab Figure 4: “Core” gene expression patterns. A) Genes arranged according to their categorization as Core, Other, or Genotype-specific. Core genes are either DE (in the same direction for a given tissue) in Pre-flowering, Post-flowering, Pre- and Post-flowering, or Pre-flowering Recovery in both genotypes. Other drought genes are DE but not for both genotypes (in the same direction). Genotype-specific genes are those that are expressed or present in only one genotype. These categories are assigned independently for root (a) and leaf (b) tissues. (C-D) expression profiles over the time series for Core drought genes. We performed GO enrichment analysis on all Core genes for each drought/recovery category ( Fig. 5 ). We observed significant (FDR < 0.05, BH-adjusted Fisher’s exact test) enrichment for terms associated with abiotic stress, specifically for genes up-regulated in both pre- and post-flowering drought stress. Terms related to heat, light, and ROS were represented, yet water deficiency and drought terms were notably absent among this set. Terms associated with cell wall-related processes were enriched among genes up-regulated in pre-flowering drought recovery, and commonly down-regulated in root for both pre- and post-flowering drought stress. This was also noted in a previous study [ 10 ], however this same study also measured cell wall composition and concluded that transcriptome changes among cell wall-related genes does not predict changes in actual cell wall content. We also observed enrichment of terms related to biotic stress among genes down-regulated during pre-flowering stress, consistent with our previous observation that genes associated with AMF (arbuscular mycorrhizal fungi) colonization are sharply down-regulated by drought [ 11 ]. Looking more closely at these genes, this phenomenon is recapitulated across all 3 years of this study, suggesting this is a robust feature of drought stress in sorghum roots ( Fig. S4 ). Download figure Open in new tab Figure S4: Root expression of AMF-related genes decreases during drought. Genes known to be induced by AMF are sharply down-regulated during both pre- and post-flowering drought stress in sorghum roots. Shown is a heat map of log-fold change expression across a time course for all three sampling years. Download figure Open in new tab Figure 5: GO term enrichment for Core drought gene sets. This is broken out for genes that are up- or down-regulated in a given condition, and organized according to broad categories of GO terms. Color indicates the extent of GO term enrichment (number found significant over total number of genes with that GO term among expressed genes). Dots represent whether that enrichment is significant (Fisher’s exact test, FDR < 0.05). To further build support for our Core gene set representing a robust set of drought-responsive genes, we turned to a recent meta-analysis [ 30 ] that also aimed to identify a set of loci in sorghum that was responsive to drought stress across multiple conditions and genotypes (including both BTx642 and RTx430). They compared drought expression data reported by 6 individual studies from sorghum grown under field or controlled conditions in different tissues at various developmental stages. They then built a model that predicts whether a gene expression profile corresponds to a drought condition and identified a set of 285 genes that are differentially expressed in sorghum, strongly contribute to that model’s accuracy, and are also conserved, differentially expressed and similarly predictive of drought in maize. We observed that a sizeable fraction (∼40%) of these genes are also differentially expressed in our study ( Fig. S7 ). The Pardo predictive genes not within our set (genes strongly DE in both genotypes for a given condition) have a less extreme log-fold change distribution than those genes exclusively observed in our set under most genotype/tissue/condition combinations (adjusted pairwise Wilcoxon rank sum p-value < 0.05), but themselves are more extreme than the rest of the expressed set of sorghum genes ( Fig. S7 ). That such a large number of genes appear to change expression in the same way across multiple studies lends support to these genes acting as robust markers of drought in sorghum. Lastly, genes that did not exhibit consistent differential expression profiles across both genotypes (i.e., not “Core”) were also observed and made up a comparatively larger share of the overall number of DE genes ( Fig. 4A,B ). Other genes were identified that we designated as “genotype-specific” either by virtue of presence or absence of variation among genomes, or due to expression in one genome but not the other. These represented a small minority of the overall DE set, yet may represent interesting facets of our field study, as discussed below. A refined list of putative stay-green loci One of the striking distinctions between the two genotypes in this study (and a major reason they were chosen for the EPICON project) is the ability of BTx642 to retain photosynthetic activity late into post-flowering drought stress [ 23 , 31 – 33 ]. This phenomenon has been studied for several decades, yet a comprehensive and causative list of genes underlying this phenotype, termed stay-green, has yet to be determined. Given the de-novo assembled genome sequence of BTx642, as well as the extensive transcriptome data generated in this study, we attempted to more specifically identify loci responsible for this agriculturally important trait. To do this, we relied on QTL mapping data generated from three studies [ 32 – 34 ] that are archived in the Sorghum QTL Atlas resource [ 35 ] to define rough coordinates in the BTx623 reference genome ( Fig. S6 ). We then used whole genome alignment to translate these intervals to genome coordinates in our BTx642 assembly (+ a 2.5 Mbp buffer), and then identified all transcripts that fell within this region. We identified over 4,000 genes within the putative stay-green loci ( Fig. S6 ). Of those genes, we found 603 that we labeled as “candidate stay-green genes”: 335 that had presence/absence variation (gene was uniquely present in BTx642 compared to RTx430), 89 that were present in both genotypes but not expressed in RTx430, and 183 that were differentially expressed between RTx430 and BTx642 ( Fig. S6 ). GO term enrichment analysis of the candidate stay-green genes revealed terms related to lipid metabolism as significantly enriched, however further characterization is needed to determine whether this enrichment factors into the mechanism of the stay-green phenotype. Download figure Open in new tab Figure S5: Expression of 7 pangenes corresponding to glyoxylate pathway loci in sorghum. Most genes are up-regulated in post-flowering drought stress, though several are also up-regulated during pre-flowering drought. Download figure Open in new tab Figure S6: Identification of stay-green candidates in EPICON data. Potential stay-green loci were first identified by comparing the BTx623 genome to BTx642, then using these alignments to map coordinates of 4 major QTL onto the BTx642 genome (A). Genes existing within these coordinates were then further characterized as having presence-absence variation (PAV) between the RTx430 and BTx642 genomes, being exclusively expressed in the BTx642 genome, being DE between genomes, and being DE in drought in either root or leaf in BTx642. The “Total stay-green candidates” were the union set of those Stay-Green associated genes that had PAV or specific expression, or that were significantly DE between genotypes (B). Expression profiles for these genes are plotted in (C) for those DE in Root or Leaf. GO term enrichment was also performed for these genes (D). Download figure Open in new tab Figure S7: Comparison to meta-analysis of sorghum drought transcriptomes. Core drought genes were compared to genes identified as important for drought (Pardo, et al., 2023). The left panel shows a heatmap of expression of genes identified by Pardo, et al. that were either identified or not as Core drought genes in the present study. The right panel shows a violin plot of expression for the Core drought and Pardo predictive gene sets, compared to the set of all expressed genes. Asterisks indicate significance levels for pairwise Wilcoxon Rank Sum tests (p < 0.05*; p < Drought DE genes involved in starch and glyoxylate metabolism We hypothesized that orthologs of Core drought genes would have evidence of drought responsiveness in model plant species. This was indeed the case, with examples including pg.02738 (Sobic.001G226600 / SbiRTX430.01G234000 / SbiBTX642.01G234500), the ortholog of a beta-amylase that accelerates starch degradation in drought stress in Arabidopsis [ 36 ]. Another instance is pg.01034 (Sobic.001G089000 / SbiRTX430.01G091100 / SbiBTX642.01G090000), the ortholog of an endoplasmic reticulum-targeted glucosidase gene critical to abscisic acid (ABA) signaling, a central drought stress pathway [ 37 ]. In roots, there was the induction of the rate-limiting step of proline biosynthesis, delta1-pyrroline-5-carboxylate synthase 1 (P5CS1, pg.16583 / Sobic.003G356000 / SbiRTX430.03G383900 / SbiBTX642.03G381100), a key osmoregulator in droughted tissue [ 38 ]. It is important to note that these examples are not exhaustive as the DE Core drought genes also include an ortholog of a gene involved in thickening of the leaf wax cuticle (pg.02691 / Sobic.001G222700 / SbiRTX430.01G230000 / SbiBTX642.01G230800), multiple heat shock proteins, and multiple dehydrins. Some genes with well-established functions that have not previously been linked to drought responses are also present among the DE Core drought genes. One example is the induction of the glyoxylate cycle-specific enzymes, malate synthase (MLS1, pg.26913 / Sobic.006G127100 / SbiRTX430.06G133600 / SbiBTX642.06G138800) and isocitrate lyase (ICL1, pg.10765 / Sobic.002G324000 / SbiRTX430.02G328400 / SbiBTX642.02G333900) in post-flowering drought conditions ( Fig. S5 ). Interestingly, the induction of ICL1 was observed in both post-flowering droughted root and leaf, whereas the induction of MLS1 was specific to post-flowering droughted leaf tissue. The function of the glyoxylate cycle is to convert acetyl-CoA, often produced via the catabolism of lipids or ketogenic amino acids, into the organic acid succinate. In fact, a key gene involved in lipid catabolism (ACX2, pg.24600 / Sobic.005G181000 / SbiRTX430.05G191100 / SbiBTX642.05G195600) shared the same DE drought pattern as the glyoxylate cycle-specific enzymes ( Fig. S5 ). This observation strongly suggests that the glyoxylate cycle may have a previously unknown role in post-flowering drought tolerance. Discussion Pangenome approach A valuable tool for plant researchers and breeders is the impressive germplasm collections for many species that contain extensive phenotypic variation resulting from local adaptations. However, recent pangenome studies have revealed extensive intraspecific presence-absence and copy number variation which complicates comparisons between even closely related lines [ 28 , 39 , 40 ]. Thus, to directly compare genomic (especially genome-wide expression) results across even closely related lines from a single species, a common gene space is needed. Traditionally, expression data from multiple genotypes have been compared to a single reference genome. While this produces acceptable results for the majority of genes for which 1 to 1 relationships exist, it under- or over-estimates expression of genes with copy number variation with respect to the reference genome and completely misses genes not found in the reference genome. Indeed, we observed that over 6,000 genes found in the two closely related sorghum genotypes (RTx430 and BTx642) used in this study do not have syntenic homologs in the sorghum reference genome produced from BTx623. To more accurately quantify gene expression in RTx430 and BTx642, we sequenced, de-novo assembled, and annotated genomes for both of these lines. The resulting genomes were nearly complete with the only significant ambiguities contained in some centromeric regions. Indeed, most of the genome assemblies had > 50x coverage from PacBio long-read sequence data, with BUSCO scores approaching 99-100% and a relatively small number of unassembled scaffolds (< 200 for both lines). We then used a pangenome-aware approach to quantify gene expression by creating a synteny-guided pan-gene set that accounted for complex orthology relationships and presence-absence variation. Gene counts were summed across genes within each pangene/line to derive expression values. By summing read counts (instead of computing an average), we sought to account for both differential gene abundance within the genome as well as sample-to-sample variation in gene expression. This does present some caveats that we did not explore, namely that genes that are duplicated and near-identical within a genome may have suppressed read counts due to the discarding of non-uniquely mapped reads (in a 1-2 homology scenario) preferentially in one genome. We considered this a rare case, as most genes are 1:1 in these closely related genomes, and there is a greater likelihood for sequence variation among homologs in more distantly related genomes. However, this should be considered when applying a pangenome strategy to other systems. Significantly, using our pangenome approach we detected 456 genotype-specific DE genes, which underscores the importance of this approach. Core gene sets, major sources of variation Using the new genomic resources, additional years of transcriptome data, and a pangenome-based strategy, we identified a robust set of genes differentially expressed in sorghum under two types of drought stress (“DE Core Genes”). This list includes a much smaller number of DE genes than many other surveys of drought in plants, primarily due to several factors. First, a stricter thresholding strategy that required genes to be DE specifically during the drought period (q 1) in both genotypes studied, and in the same direction. We consider this list to define a robust set of specific drought-responsive genes, rather than a comprehensive list of all genes that are potentially involved in drought response. We believe applying stringent thresholds and more narrowly defining DE categories is important, as at least for this study, a less stringent threshold for differential expression (DE between drought vs. control modeled across the entire time series with no effect size limitation) resulted in nearly all genes having some degree of differential expression, likely due to the large number of time points taken. As with many other plant species, some caution should be exercised when interpreting GO enrichment results in sorghum as a way to understand what biological processes underlie many of the DE gene sets. About 40% of the pangenes overall had some kind of GO annotation associated with them, leaving many genes (including putative stay-green loci) without putative function. This underscores a larger need for the sorghum community to experimentally ascribe functions to genes. Genes of interest Using the additional two years of transcriptome data, we were able to recapitulate our previous finding with one year of data that genes associated with colonization by AMF are strongly down-regulated in pre-flowering drought. We were also able to confirm findings from our previous report [ 11 ] that major genes of the glyoxylate cycle are strongly up-regulated in post-flowering drought stress, a phenomenon that, to the best of our knowledge, has not been reported previously for any plant species. Further study will be required to determine a precise role for the glyoxylate cycle in the post-flowering drought. However, we can conjecture that mobilization of stored fatty acids and amino acids stored in leaf and root tissues via induction of the glyoxylate cycle may provide an additional source of carbon and/or nitrogen to support grain filling under conditions of drought-induced photosynthetic repression. This finding may have ramifications for understanding sorghum’s exceptional post-flowering drought tolerance, as well as potentially revealing a general post-flowering drought tolerance strategy shared by other cereal crops but overlooked in prior studies. Lastly, we used our drought transcriptome data to further refine a list of candidate genes that may play a role in the stay-green phenotype. We did not identify a gene (or set of genes) that we can directly confirm are responsible for the phenotype. However, we did identify approximately 603 genes either differentially expressed between lines RTx430 and BTx642, or are only present in BTx642, and are also located within four broad loci that map to stay-green QTL [ 23 , 32 – 35 ] associated with the trait. These genes represent promising candidates to further characterize for their involvement in the stay-green phenotype. Multi-year transcriptomic datasets improve field study accuracy A major goal of plant genetic research is to understand and improve plant traits. Yet, this often necessitates a choice between sampling plant specimens grown in tightly controlled artificial conditions in the laboratory or greenhouse or growing large numbers of plants in agricultural fields under intrinsically unpredictable conditions. Discrepancies between successful interventions under controlled conditions versus viable agricultural interventions are well understood by the community and are recognized as an important caveat to doing research in the lab [ 41 ]. Conversely, field trials often have limited predictive value due to unpredictable variability or limited sample quality or quantity. Here, we have executed a carefully controlled field study using environmental conditions that lend themselves to tight control ( e.g., near complete absence of summer precipitation, use of soil types well-suited for drought studies) as well as replicated field plots for two genotypes and defined drought conditions to reduce biological noise (see methods & materials). However, in spite of the planning and controls used in our study, we show that, based on the three years of transcriptomic data, conducting a field trial for a single year does not account for the substantial variation in growing conditions, including soil, weather, or other factors (e.g., plants’ biotic environment). Our observation that a basic metric (the set of genes found to be differentially regulated between drought and control conditions) is wildly different between three successive years at the same field station serves as a warning that field research, especially studies that have expression profiling as an output, should be replicated across multiple growing seasons to provide robust results. In conclusion, we have generated one of the most comprehensive transcriptomic studies from a field-based drought trial for any plant species and by far the most comprehensive drought dataset for the drought-tolerant crop, Sorghum. By expanding our field trial through collection of samples across three growing seasons, we provide the community with a robust set of drought-responsive genes relative to the unpredictable nature of a single growing season. We also demonstrated the importance of using a pan-genome approach, especially for understanding genotype-specific traits. The dataset and our analyses are a valuable resource to the broader community that can be mined to form mechanistic hypotheses for future gene function testing and crop improvement. Methods RTx430 S. bicolor var. RTx430 genome sequencing, assembly, and annotation. 2500 ng of genomic DNA was sheared to >10kb using Covaris g-Tubes. The sheared DNA was treated with exonuclease to remove single-stranded ends and DNA damage repair mix followed by end repair and ligation of blunt adapters using SMRTbell Template Prep Kit 1.0 (Pacific Biosciences). The library was purified with AMPure PB beads and size selected with BluePippin (Sage Science) at >10 kb cutoff size. PacBio Sequencing primer was then annealed to the SMRTbell template library and sequencing polymerase was bound to them using Sequel Binding kit 2.0. The prepared SMRTbell template libraries were then sequenced on a Pacific Biosystems’ Sequel sequencer using v3 sequencing primer, 1M v2 SMRT cells, and Version 2.1 sequencing chemistry with 1×600 sequencing movie run times. 100 ng of DNA was sheared to 864 bp using the Covaris LE220 (Covaris) and size selected using SPRI beads (Beckman Coulter). The fragments were treated with end-repair, A-tailing, and ligation of Illumina compatible adapters (IDT, Inc) using the KAPA-Illumina library creation kit (KAPA biosystems). qPCR was used to determine the concentration of the libraries and were sequenced on the Illumina HiSeq. One Illumina 2×150 HiC library was also sequenced. The main assembly contained 80.4x of PACBIO CCS coverage (8,091 bp average read length) that was assembled using MECAT[ 42 ] and the resulting sequence was polished using ARROW. The V4 release of S. bicolor var. BTx623 was broken into 58,700 nonredundant, non-overlapping 1,500 bp syntenic markers. These markers were used to identify misjoins in the RTx430 assembly. Misjoins were characterized by an abrupt change in the BTx623 linkage group. A total of 32 breaks were made to the assembly. Scaffolds were then oriented, ordered, and joined together using HIC data combined with the JUICER/JUICEBOX[ 43 ] pipeline to order and orient contigs into chromosomes. Significant telomeric sequence was properly oriented in the assembly. A total of 553 joins were applied to the broken assembly to form the final assembly consisting of 10 chromosomes. 97.9% of the assembled sequence is contained in the chromosomes. Additionally, homozygous SNPs and INDELs were corrected in the release sequence using ∼60x of Illumina reads (2×150, 500bp insert). Transcript assemblies were made from ∼5.4B pairs of 2×150 stranded paired-end Illumina RNA-seq reads using PERTRAN (Shu, unpublished). About 1.5M PacBio Iso-Seq CCSs were corrected and collapsed by genome guided correction pipleine (Shu, unpublished) to obtain ∼292K putative full-length transcripts. 192,495 transcript assemblies were constructed using PASA[ 44 ] from RNA-seq transcript assemblies, CCSs above and ESTs from NCBI. Loci were determined by transcript assembly alignments and/or EXONERATE[ 45 ] alignments of proteins from Arabidopsis thaliana , soybean, rice, Setaria viridis , grape, high confidence and transcriptome supported BTx623 gene models and Swiss-Prot proteomes to repeat-soft-masked RTx430 genome using RepeatMasker[ 46 ] with up to 2K BP extension on both ends unless extending into another locus on the same strand. Gene models were predicted by homology-based predictors, FGENESH+[ 47 ], FGENESH_EST (similar to FGENESH+, but using EST to compute splice site and intron input instead of protein/translated ORF), and EXONERATE[ 45 ], PASA assembly ORFs (in-house homology constrained ORF finder) and from AUGUSTUS via BRAKER1[ 48 ]. The best scored predictions for each locus are selected using multiple positive factors including EST and protein support, and one negative factor: overlap with repeats. The selected gene predictions were improved by PASA[ 44 ]. Improvement includes adding UTRs, splicing correction, and adding alternative transcripts. PASA-improved gene model proteins were subject to protein homology analysis to above mentioned proteomes to obtain Cscore and protein coverage. Cscore is a protein BLASTP score ratio to MBH (mutual best hit) BLASTP score and protein coverage is highest percentage of protein aligned to the best of homologs. PASA-improved transcripts were selected based on Cscore, protein coverage, EST coverage, and its CDS overlapping with repeats. The transcripts were selected if its Cscore is larger than or equal to 0.5 and protein coverage larger than or equal to 0.5, or it has EST coverage, but its CDS overlapping with repeats is less than 20%. For gene models whose CDS overlaps with repeats for more than 20%, their Cscore must be at least 0.9 and homology coverage at least 70% to be selected. The selected gene models were subject to Pfam analysis and gene models whose protein is more than 30% in Pfam TE domains were removed and weak gene models. Incomplete gene models, low homology supported without fully transcriptome supported gene models and short single exon (< 300 BP CDS) without protein domain nor good expression gene models were manually filtered out. BTx642 S. bicolor var. BTx642 genome sequencing, assembly, and annotation. 2500 ng of genomic DNA was sequenced as described for RTx430. The main assembly consisted of 52.36x of PACBIO coverage (7,024 bp average read size), and was assembled using MECAT[ 42 ] and the resulting sequence was polished using ARROW. A total of 61,546 unique, non-repetitive, non-overlapping 1 KB syntenic markers were generated using the version 3.0 S. bicolor genome release and aligned to the polished BTx642 assembly. Contig breaks were identified as an abrupt change in linkage group. A total of 16 breaks were made. The broken contigs were then ordered, oriented, and assembled into 10 chromosomes using the version 3.0 S. bicolor syntenic markers. A total of 155 joins were made during this process. The HiC library was aligned to the integrated chromosomes, and several minor rearrangements were made. Adjacent alternative haplotypes were identified on the joined contig set. Althap regions were collapsed using the longest common substring between the two haplotypes. A total of 36 adjacent altHaps were collapsed. Care was taken to ensure that telomere was properly oriented in the chromosomes, and the resulting sequence was screened for retained vector and/or contaminants. Finally, Homozygous SNPs and INDELs were corrected in the release sequence using ∼55.4x of Illumina reads (2×150, 400bp insert). Transcript assemblies were made from ∼349M pairs of 2×150 stranded paired-end Illumina RNA-seq reads and ∼2B pairs of 2×150 stranded paired-end Illumina RNA-seq reads (expression profile experiments) using PERTRAN (Shu, unpublished). About 1.5M PacBio Iso-Seq CCSs were corrected and collapsed by genome guided correction pipleine (Shu, unpublished) to obtain ∼1M putative full-length transcripts. 215,476 transcript assemblies were constructed using PASA[ 44 ] from RNA-seq transcript assemblies above. Loci were determined by transcript assembly alignments and/or EXONERATE[ 45 ] alignments of proteins from Arabidopsis thaliana , soybean, rice, Setaria viridis, aquillegia, grape and Swiss-Prot proteomes to repeat-soft-masked BTx642 genome using RepeatMasker[ 46 ] with up to 2K BP extension on both ends unless extending into another locus on the same strand. Gene models were predicted by homology-based predictors, FGENESH+[ 47 ], FGENESH_EST (similar to FGENESH+, but using EST to compute splice site and intron input instead of protein/translated ORF), and EXONERATE[ 45 ], PASA assembly ORFs (in-house homology constrained ORF finder) and from AUGUSTUS via BRAKER1[ 48 ]. The best scored predictions for each locus are selected using multiple positive factors including EST and protein support, and one negative factor: overlap with repeats. The selected gene predictions were improved by PASA. Improvement includes adding UTRs, splicing correction, and adding alternative transcripts. PASA-improved gene model proteins were subject to protein homology analysis to above mentioned proteomes to obtain Cscore and protein coverage. Cscore is a protein BLASTP score ratio to MBH (mutual best hit) BLASTP score and protein coverage is highest percentage of protein aligned to the best of homologs. PASA-improved transcripts were selected based on Cscore, protein coverage, EST coverage, and its CDS overlapping with repeats. The transcripts were selected if its Cscore is larger than or equal to 0.5 and protein coverage larger than or equal to 0.5, or it has EST coverage, but its CDS overlapping with repeats is less than 20%. For gene models whose CDS overlaps with repeats for more than 20%, their Cscore must be at least 0.9 and homology coverage at least 70% to be selected. The selected gene models were subject to Pfam analysis and gene models whose protein is more than 30% in Pfam TE domains were removed and weak gene models. Incomplete gene models, low homology supported without fully transcriptome supported gene models and short single exon (< 300 BP CDS) without protein domain nor good expression gene models were manually filtered out. DNA and Iso-seq RNA for Iso-seq was prepared from mature root, leaf, panicle, stem and callus tissues from RTx430 and BTx642 plants grown in greenhouse conditions. Briefly, tissue was flash-frozen in liquid nitrogen and kept at -80 deg. C or on dry ice until RNA extraction. Prior to extraction, 1-2 g of tissue was plunged in liquid nitrogen, then ground in a Retsch bead mill (Tissuelyser) using 35 mL grinding jar with 2×12mm stainless steel grinding balls for 5 minutes at 30 Hz. Jars were re-frozen in liquid nitrogen, rotated, then ground again for an additional 5 minutes at 30 Hz. Tissue was scraped out of the jars, and 500 mg was measured into 2 mL tubes. RNA was extracted from each tissue aliquot using the Qiagen RNeasy kit (Cat. #AM217004) according to the manufacturer’s specifications. RNA was submitted to the Joint Genome Institute for Iso-seq library creation and sequencing. High molecular weight DNA was extracted at Arizona Genomics Institute. Growth conditions and drought treatment Sorghum plants were planted, grown, and treated as described by Varoquaux et al (2019) [ 11 ] with some differences in years 2 and 3. These included, for year 2, plants were sown in pre-wetted plots at Kearny Agricultural Research and Extension Center (Parlier, CA) on June 6th, 2017, and Week 1 samples were collected on June 28th, 2017, 16 days after emergence). Additional samples were acquired 8 hours, 1 and 3 days after watering resumed (August 11th, 2027) from control and pre-flowering drought-treated plots, as well as 2 and 5 days after water cessation (August 11th, 2017) from control and post-flowering plots. Otherwise, sampling dates occurred every 7 days until the conclusion of the experiment on October 11th, 2017. Year 3 plants were grown similar to year 2, with seeds sown on June 8th, 2018, Week 1 samples collected on June 26th, and the water transition occurring on August 15th, 2018. Sample collection, RNA isolation, library sequencing, and quantification All samples were collected and processed as described [ 11 ]. RNA was extracted as described [ 11 ]. Sequencing was performed at the Joint Genome Institute (Award DOI: 10.46936/10.25585/60001015), targeting 80M reads per library. Raw FASTQ file reads were filtered and trimmed using the JGI QC pipeline resulting in the filtered FASTQ file. Using BBDuk[ 49 ], raw reads were evaluated for artifact sequences by kmer matching (kmer=25), allowing 1 mismatch. Detected artifacts were trimmed from the 3’ end of the reads. RNA spike-in reads, PhiX reads and reads containing any Ns were removed. Quality trimming was performed using the phred trimming method set at Q6. Following trimming, reads under the length threshold were removed (1/3 of the original read length). Filtered reads from each library were aligned to the RTx430 or BTx642 reference genome using HISAT2 version 2.1.0[ 50 ]. featureCounts[ 51 ] was used to generate the raw gene counts file using gff3 annotations. Only primary hits assigned to the reverse strand were included in the raw gene counts (-s 2 -p -- primary options). Pangenome construction and summary The GENESPACE R package[ 52 ] (version 0.8.5) was used to generate a pangenome from three sorghum genomes: S. bicolor v3.1.1[ 26 ], RTx430 v. 2.1 and BTx642 v.1.1 using default settings. Gene counts from genes within pangenes were summed to generate pangene expression profiles. Pangenes that lacked representation from both RTx430 and BTx642 were excluded from additional analyses. Gene and pangene summary statistics and differential expression testing The Moanin R package ([ 53 ]) was used to identify differentially expressed genes using several different contrasts. First, DE was tested for drought vs. control in either genotype or tissue using all time points (overall DE). Second, DE was tested using a subset of time points corresponding to pre-flowering drought (prior to 57 DAG) or post-flowering drought (after 63 DAG). Summary statistics, including an estimated log2-fold change and a corresponding adjusted (BH) p-value were computed for each test. DE genes for a particular contrast were identified as being significantly different between drought vs. control (overall DE p < 0.01) and had a log2-fold change (absolute value) greater than 1. This was computed for pangenes (using expression for syntenic, homologous genes in both genotypes) and using genotype-specific expression data. GO enrichment was performed using the topGO R package [ 54 ] using all expressed genes (at least 5 libraries with at least 1 count) or pangenes (more than 1% of libraries with at least 1 count) as background. Genome alignment and STG locus identification The BTx642, BTx623 and RTx430 genomes were aligned using the minimap2[ 55 ] tool using default parameters associated with the “asm5” preset. Stay-green coordinates were first identified for BTx623 using the Sorghum QTL Atlas[ 35 ] for the top 4 loci on chromosomes 2, 3 and 5. These coordinates were then mapped to the BTx642 genome using the whole-genome alignment, filtering for hits that had a minimum alignment quality equal to 60 on the positive strand, adding a 2.5 Mbp extension on either side. Genes found within these intervals on the BTx642 genome were identified as potential stay-green candidates for further analysis. Data availability Raw sequencing data is available at the National Institute of Biotechnology Information Sequence Read Archive (NCBI SRA). SRA Accessions for each sample are available in Supplementary Table S5. Processed data matrices as well as analysis code is available at https://code.jgi.doe.gov/BJCole/epicon-3-year-transcriptome . Acknowledgements This project was supported in part by DOE Grant DE-SC0014081 awarded to P.G.L., D.C.D., J.D., R.H., E.P., A.V., and J.V.. This project was also supported by an Early Career Research Program award to B.C. The work (proposal: 10.46936/10.25585/60001015) conducted by the U.S. Department of Energy Joint Genome Institute ( https://ror.org/04xm1d337 ), a DOE Office of Science User Facility, is supported by the Office of Science of the U.S. Department of Energy operated under Contract No. DE-AC02-05CH11231 Funder Information Declared United States Department of Energy, https://ror.org/01bj3aw27 , DE-SC0014081 References 1. ↵ van Ruijven BJ , De Cian E , Sue Wing I . Amplification of future energy demand growth due to climate change . Nat Commun . 2019 ; 10 : 2762 . OpenUrl CrossRef 2. ↵ Hanssen SV , Daioglou V , Steinmann ZJN , Doelman JC , Van Vuuren DP , Huijbregts MAJ . The climate change mitigation potential of bioenergy with carbon capture and storage . Nat Clim Chang . 2020 ; 10 : 1023 – 9 . OpenUrl CrossRef 3. ↵ Rippey BR . The U.s. drought of 2012 . Weather Clim Extrem . 2015 ; 10 : 57 – 64 . OpenUrl CrossRef 4. ↵ Calviño M , Messing J . Sweet sorghum as a model system for bioenergy crops . Curr Opin Biotechnol . 2012 ; 23 : 323 – 9 . OpenUrl CrossRef PubMed Web of Science 5. ↵ Silva TN , Thomas JB , Dahlberg J , Rhee SY , Mortimer JC . Progress and challenges in sorghum biotechnology, a multipurpose feedstock for the bioeconomy . J Exp Bot . 2022 ; 73 : 646 – 64 . OpenUrl CrossRef 6. ↵ Satoh Y , Yoshimura K , Pokhrel Y , Kim H , Shiogama H , Yokohata T , et al. The timing of unprecedented hydrological drought under climate change . Nat Commun . 2022 ; 13 : 3287 . OpenUrl CrossRef 7. ↵ Abreha KB , Enyew M , Carlsson AS , Vetukuri RR , Feyissa T , Motlhaodi T , et al. Sorghum in dryland: morphological, physiological, and molecular responses of sorghum under drought stress . Planta . 2021 ; 255 : 20 . OpenUrl PubMed 8. ↵ Aregawi K , Shen J , Pierroz G , Sharma MK , Dahlberg J , Owiti J , et al. Morphogene-assisted transformation of Sorghum bicolor allows more efficient genome editing . Plant Biotechnol J . 2022 ; 20 : 748 – 60 . OpenUrl CrossRef 9. ↵ Shen J , Aregawi K , Anwar S , Miller T , Lemaux PG . High-frequency Wus2 -integrated transformation toolkit enhances Cas9 editing efficiency and expands capability with SpRY in sorghum [Internet] . bioRxiv . 2025 . Available from : doi: 10.1101/2025.01.21.634149 OpenUrl Abstract / FREE Full Text 10. ↵ Scavuzzo-Duggan T , Varoquaux N , Madera M , Vogel JP , Dahlberg J , Hutmacher R , et al. Cell Wall Compositions of Sorghum bicolor Leaves and Roots Remain Relatively Constant Under Drought Conditions . Front Plant Sci . 2021 ; 12 : 747225 . OpenUrl CrossRef 11. ↵ Varoquaux N , Cole B , Gao C , Pierroz G , Baker CR , Patel D , et al. Transcriptomic analysis of field-droughted sorghum from seedling to maturity reveals biotic and metabolic responses . Proc Natl Acad Sci U S A [Internet ]. 2019 ; Available from : doi: 10.1073/pnas.1907500116 OpenUrl Abstract / FREE Full Text 12. Wipf HM-L , Xu L , Gao C , Spinner HB , Taylor J , Lemaux P , et al. Agricultural Soil Management Practices Differentially Shape the Bacterial and Fungal Microbiome of Sorghum bicolor . Appl Environ Microbiol [Internet ]. 2021 ; 87 . Available from : doi: 10.1128/AEM.02345-20 OpenUrl Abstract / FREE Full Text 13. ↵ Xu L , Naylor D , Dong Z , Simmons T , Pierroz G , Hixson KK , et al. Drought delays development of the sorghum root microbiome and enriches for monoderm bacteria . Proc Natl Acad Sci U S A . 2018 ; 115 : E4284 – 93 . OpenUrl Abstract / FREE Full Text 14. Zhou M , Malhan N , Ahkami AH , Engbrecht K , Myers G , Dahlberg J , et al. Top-down mass spectrometry of histone modifications in sorghum reveals potential epigenetic markers for drought acclimation . Methods . 2020 ; 184 : 29 – 39 . OpenUrl CrossRef 15. Gao C , Xu L , Montoya L , Madera M , Hollingsworth J , Chen L , et al. Co-occurrence networks reveal more complexity than community composition in resistance and resilience of microbial communities . Nat Commun . 2022 ; 13 : 3867 . OpenUrl CrossRef PubMed 16. ↵ Gao C , Montoya L , Xu L , Madera M , Hollingsworth J , Purdom E , et al. Fungal community assembly in drought-stressed sorghum shows stochasticity, selection, and universal ecological dynamics . Nat Commun . 2020 ; 11 : 34 . OpenUrl CrossRef PubMed 17. Xu L , Dong Z , Chiniquy D , Pierroz G , Deng S , Gao C , et al. Genome-resolved metagenomics reveals role of iron metabolism in drought-induced rhizosphere microbiome dynamics . Nat Commun . 2021 ; 12 : 3209 . OpenUrl CrossRef PubMed 18. Baker CR , Patel-Tupper D , Cole BJ , Ching LG , Dautermann O , Kelikian AC , et al. Metabolomic, photoprotective, and photosynthetic acclimatory responses to post-flowering drought in sorghum . Plant Direct . 2023 ; 7 : e545 . OpenUrl CrossRef 19. Gao C , Montoya L , Xu L , Madera M , Hollingsworth J , Purdom E , et al. Strong succession in arbuscular mycorrhizal fungal communities . ISME J . 2019 ; 13 : 214 – 26 . OpenUrl CrossRef PubMed 20. Xu L , Pierroz G , Wipf HM-L , Gao C , Taylor JW , Lemaux PG , et al. Holo-omics for deciphering plant-microbiome interactions . Microbiome . 2021 ; 9 : 69 . OpenUrl CrossRef 21. Zhou M , Abdali SH , Dilworth D , Liu L , Cole B , Malhan N , et al. Isolation of Histone from Sorghum Leaf Tissue for Top Down Mass Spectrometry Profiling of Potential Epigenetic Markers . J Vis Exp [Internet ]. 2021 ; Available from : doi: 10.3791/61707 OpenUrl CrossRef 22. ↵ Gao C , Courty P-E , Varoquaux N , Cole B , Montoya L , Xu L , et al. Successional adaptive strategies revealed by correlating arbuscular mycorrhizal fungal abundance with host plant gene expression . Mol Ecol . 2023 ; 32 : 2674 – 87 . OpenUrl CrossRef 23. ↵ Borrell AK , van Oosterom EJ , Mullet JE , George-Jaeggli B , Jordan DR , Klein PE , et al. Stay-green alleles individually enhance grain yield in sorghum under drought by modifying canopy development and water uptake patterns . New Phytol . 2014 ; 203 : 817 – 30 . OpenUrl CrossRef PubMed 24. ↵ Lovell JT , Shakirov EV , Schwartz S , Lowry DB , Aspinwall MJ , Taylor SH , et al. Promises and Challenges of Eco-Physiological Genomics in the Field: Tests of Drought Responses in Switchgrass . Plant Physiol . 2016 ; 172 : 734 – 48 . OpenUrl Abstract / FREE Full Text 25. ↵ Paterson AH , Bowers JE , Bruggmann R , Dubchak I , Grimwood J , Gundlach H , et al. The Sorghum bicolor genome and the diversification of grasses . Nature . 2009 ; 457 : 551 – 6 . OpenUrl CrossRef PubMed Web of Science 26. ↵ McCormick RF , Truong SK , Sreedasyam A , Jenkins J , Shu S , Sims D , et al. The Sorghum bicolor reference genome: improved assembly, gene annotations, a transcriptome atlas, and signatures of genome organization . Plant J . 2018 ; 93 : 338 – 54 . OpenUrl CrossRef PubMed 27. ↵ Lei L , Goltsman E , Goodstein D , Wu GA , Rokhsar DS , Vogel JP . Plant pan-genomics comes of age . Annu Rev Plant Biol . 2021 ; 72 : 411 – 35 . OpenUrl CrossRef PubMed 28. ↵ Tao Y , Luo H , Xu J , Cruickshank A , Zhao X , Teng F , et al. Extensive variation within the pan-genome of cultivated and wild sorghum . Nat Plants . 2021 ; 7 : 766 – 73 . OpenUrl CrossRef 29. ↵ Goodstein DM , Shu S , Howson R , Neupane R , Hayes RD , Fazo J , et al. Phytozome: a comparative platform for green plant genomics . Nucleic Acids Res . 2012 ; 40 : D1178 – 86 . OpenUrl CrossRef PubMed Web of Science 30. ↵ Pardo J , Wai CM , Harman M , Nguyen A , Kremling KA , Romay MC , et al. Cross-species predictive modeling reveals conserved drought responses between maize and sorghum . Proc Natl Acad Sci U S A . 2023 ; 120 : e2216894120 . OpenUrl CrossRef PubMed 31. ↵ Rosenow DT . Breeding for resistance to root and stalk rots in Texas . In: Mughogho LK , editor. Consultative Group Discussion on Research Needs and Strategies for Control of Sorghum Root and Stalk Rot Diseases . ICRISAT ; 1984 . p. 209 – 18 . 32. ↵ Xu W , Subudhi PK , Crasta OR , Rosenow DT , Mullet JE , Nguyen HT . Molecular mapping of QTLs conferring stay-green in grain sorghum (Sorghum bicolor L . Moench). Genome . 2000 ; 43 : 461 – 9 . OpenUrl 33. ↵ Subudhi PK , Rosenow DT , Nguyen HT . Quantitative trait loci for the stay green trait in sorghum (Sorghum bicolor L . Moench): consistency across genetic backgrounds and environments. Theor Appl Genet . 2000 ; 101 : 733 – 41 . OpenUrl 34. ↵ Tao YZ , Henzell RG , Jordan DR , Butler DG , Kelly AM , McIntyre CL . Identification of genomic regions associated with stay green in sorghum by testing RILs in multiple environments . Theor Appl Genet . 2000 ; 100 : 1225 – 32 . OpenUrl CrossRef Web of Science 35. ↵ Mace E , Innes D , Hunt C , Wang X , Tao Y , Baxter J , et al. The Sorghum QTL Atlas: a powerful tool for trait dissection, comparative genomics and crop improvement . Theor Appl Genet . 2019 ; 132 : 751 – 66 . OpenUrl CrossRef 36. ↵ Zanella M , Borghi GL , Pirone C , Thalmann M , Pazmino D , Costa A , et al. β-amylase 1 (BAM1) degrades transitory starch to sustain proline biosynthesis during drought stress . J Exp Bot . 2016 ; 67 : 1819 – 26 . OpenUrl CrossRef PubMed 37. ↵ Han Y , Watanabe S , Shimada H , Sakamoto A . Dynamics of the leaf endoplasmic reticulum modulate β-glucosidase-mediated stress-activated ABA production from its glucosyl ester . J Exp Bot . 2020 ; 71 : 2058 – 71 . OpenUrl CrossRef PubMed 38. ↵ Muzammil S , Shrestha A , Dadshani S , Pillen K , Siddique S , Léon J , et al. An Ancestral Allele of Pyrroline-5-carboxylate synthase1 Promotes Proline Accumulation and Drought Adaptation in Cultivated Barley . Plant Physiol . 2018 ; 178 : 771 – 82 . OpenUrl Abstract / FREE Full Text 39. ↵ Gordon SP , Contreras-Moreira B , Woods DP , Des Marais DL , Burgess D , Shu S , et al. Extensive gene content variation in the Brachypodium distachyon pan-genome correlates with population structure . Nat Commun . 2017 ; 8 : 2184 . OpenUrl CrossRef PubMed 40. ↵ He Q , Tang S , Zhi H , Chen J , Zhang J , Liang H , et al. A graph-based genome and pan-genome variation of the model plant Setaria . Nat Genet . 2023 ; 55 : 1232 – 42 . OpenUrl CrossRef PubMed 41. ↵ Poorter H , Fiorani F , Pieruschka R , Wojciechowski T , van der Putten WH , Kleyer M , et al. Pampered inside, pestered outside? Differences and similarities between plants growing in controlled conditions and in the field . New Phytol . 2016 ; 212 : 838 – 55 . OpenUrl CrossRef PubMed 42. ↵ Xiao C-L , Chen Y , Xie S-Q , Chen K-N , Wang Y , Han Y , et al. MECAT: fast mapping, error correction, and de novo assembly for single-molecule sequencing reads . Nat Methods . 2017 ; 14 : 1072 – 4 . OpenUrl CrossRef PubMed 43. ↵ Juicer and Juicebox for chromatin conformation analysis . Nat Methods . 2016 ; 13 : 816 – 816 . OpenUrl 44. ↵ Haas BJ , Delcher AL , Mount SM , Wortman JR , Smith RK Jr . , Hannick LI , et al. Improving the Arabidopsis genome annotation using maximal transcript alignment assemblies . Nucleic Acids Res . 2003 ; 31 : 5654 – 66 . OpenUrl CrossRef PubMed Web of Science 45. ↵ Slater GSC , Birney E . Automated generation of heuristics for biological sequence comparison . BMC Bioinformatics . 2005 ; 6 : 31 . OpenUrl CrossRef PubMed 46. ↵ Smit , AFA , Hubley , R & Green , P. RepeatMasker [Internet] . 2013 -2015. Available from : http://www.repeatmasker.org 47. ↵ Salamov AA , Solovyev VV . Ab initio gene finding in Drosophila genomic DNA . Genome Res . 2000 ; 10 : 516 – 22 . OpenUrl Abstract / FREE Full Text 48. ↵ Hoff KJ , Lomsadze A , Borodovsky M , Stanke M . Whole-genome annotation with BRAKER . Methods Mol Biol . 2019 ; 1962 : 65 – 95 . OpenUrl CrossRef PubMed 49. ↵ BBMap [Internet] . SourceForge . 2022 [cited 2024 May 6]. Available from: https://sourceforge.net/projects/bbmap/ 50. ↵ Kim D , Paggi JM , Park C , Bennett C , Salzberg SL . Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype . Nat Biotechnol . 2019 ; 37 : 907 – 15 . OpenUrl CrossRef PubMed 51. ↵ Liao Y , Smyth GK , Shi W . featureCounts: an efficient general purpose program for assigning sequence reads to genomic features . Bioinformatics . 2014 ; 30 : 923 – 30 . OpenUrl CrossRef PubMed Web of Science 52. ↵ Lovell JT , Sreedasyam A , Schranz ME , Wilson M , Carlson JW , Harkess A , et al. GENESPACE tracks regions of interest and gene copy number variation across multiple genomes . Elife [Internet ]. 2022 ; 11 . Available from : doi: 10.7554/eLife.78526 OpenUrl CrossRef PubMed 53. ↵ Purdom NVA . moanin: An R Package for Time Course RNASeq Data Analysis [Internet] . 2024 . Available from: https://bioconductor.org/packages/release/bioc/html/moanin.html 54. ↵ Rahnenfuhrer AAA . topGO: Enrichment Analysis for Gene Ontology [Internet] . 2024 . Available from : doi: 10.18129/B9.bioc.topGO OpenUrl CrossRef 55. ↵ Li H . Minimap2: pairwise alignment for nucleotide sequences . Bioinformatics . 2018 ; 34 : 3094 – 100 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted July 02, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Multi-season analysis reveals hundreds of drought-responsive genes in sorghum Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Multi-season analysis reveals hundreds of drought-responsive genes in sorghum Benjamin Cole , Wenxin Zhang , Junming Shi , Hao Wang , Christopher Baker , Nelle Varoquaux , Joy Hollingsworth , Robert Hutmacher , Jeffery Dahlberg , Grady Pierroz , Kerrie W. Barry , Vasanth Singan , Yuko Yoshinaga , Christopher Daum , Matthew Zane , Matthew Blow , Ronan O’Malley , Shengqiang Shu , Jerry W. Jenkins , John T. Lovell , Jeremy Schmutz , John W. Taylor , Devin Coleman-Derr , Axel Visel , Peggy G. Lemaux , Elizabeth Purdom , John P. Vogel bioRxiv 2025.06.27.662006; doi: https://doi.org/10.1101/2025.06.27.662006 Share This Article: Copy Citation Tools Multi-season analysis reveals hundreds of drought-responsive genes in sorghum Benjamin Cole , Wenxin Zhang , Junming Shi , Hao Wang , Christopher Baker , Nelle Varoquaux , Joy Hollingsworth , Robert Hutmacher , Jeffery Dahlberg , Grady Pierroz , Kerrie W. Barry , Vasanth Singan , Yuko Yoshinaga , Christopher Daum , Matthew Zane , Matthew Blow , Ronan O’Malley , Shengqiang Shu , Jerry W. Jenkins , John T. Lovell , Jeremy Schmutz , John W. Taylor , Devin Coleman-Derr , Axel Visel , Peggy G. Lemaux , Elizabeth Purdom , John P. Vogel bioRxiv 2025.06.27.662006; doi: https://doi.org/10.1101/2025.06.27.662006 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genomics Subject Areas All Articles Animal Behavior and Cognition (7633) Biochemistry (17686) Bioengineering (13891) Bioinformatics (41932) Biophysics (21450) Cancer Biology (18587) Cell Biology (25498) Clinical Trials (138) Developmental Biology (13375) Ecology (19898) Epidemiology (2067) Evolutionary Biology (24311) Genetics (15608) Genomics (22502) Immunology (17736) Microbiology (40391) Molecular Biology (17177) Neuroscience (88589) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4823) Physiology (7641) Plant Biology (15150) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9823) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-05-23T02:00:01.238055+00:00
License: CC-BY-NC-4.0