Ecological relationships between human gut bacteria predicted from analysis of dense microbiome time series data from US travelers in Bangladesh

preprint OA: closed CC-BY-4.0
📄 Open PDF Full text JSON View at publisher
Full text 78,677 characters · extracted from preprint-html · click to expand
Ecological relationships between human gut bacteria predicted from analysis of dense microbiome time series data from US travelers in Bangladesh | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Ecological relationships between human gut bacteria predicted from analysis of dense microbiome time series data from US travelers in Bangladesh View ORCID Profile Casey G. Martin , Laurie M. Lyon , Antonio Gonzalez , Rob Knight , Catherine Lozupone doi: https://doi.org/10.1101/2025.02.27.639550 Casey G. Martin 1 Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Casey G. Martin Laurie M. Lyon 1 Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Antonio Gonzalez 2 Department of Pediatrics, University of California San Diego , La Jolla, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rob Knight 2 Department of Pediatrics, University of California San Diego , La Jolla, California, USA 3 Department of Computer Science and Engineering, University of California , San Diego, La Jolla, California, USA 4 Department of Bioengineering, University of California , San Diego, La Jolla, California, USA 5 Center for Microbiome Innovation, University of California , San Diego, La Jolla, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Catherine Lozupone 1 Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: Catherine.Lozupone{at}cuanschutz.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF ABSTRACT Gut microbiomes provide critical host homeostatic functions, resulting from a complex web of ecological interactions among community members. We studied these interactions using a time-lagged correlational strategy of dense longitudinal sequence data from Western individuals traveling abroad to Bangladesh who experienced diarrhea. We identified both negative (140) and positive (78) relationships between bacterial pairs. Positive relationships occurred in pairs that were significantly more phylogenetically distant, such as inter-order associations between Clostridiales and Bacteroidales, while negative relationships were more between more phylogenetically related pairs. Further analysis of computationally predicted genome content and metabolic pathways revealed that cooperative bacterial pairs overlapped less in function and offered each other metabolic support, while competitive pairs were more likely to compete for the same resources. Predicted levels of B vitamins (B5 and B3), enoyl acyl- carrier protein (acp) reductase II (FabK ) and its metabolites, and nucleotide/nucleoside derivatives were able to differentiate negatively and positively associated microbe pairs. Ultimately, our findings show that combining time-series analysis with metabolic/genomic network analysis can identify relationships between bacteria with plausible causal mechanisms that are consistent with existing ecological and biochemical observations. IMPORTANCE Understanding how microbes in the gut interact with each other is important for devising strategies to target the human gut microbiome therapeutically. For instance, understanding competitive relationships, where a shared need of similar limited resources limits the degree to which two microbes can co-exist, can inform strategies for limiting colonization of undesirable microbes. Understanding cooperative relationships, where one microbe provides the other with substrates needed for growth, can inform strategies to promote desirable microbes. By evaluating dense time-series gut microbiome data from individuals who experienced diarrhea while traveling, we were able to predict both cooperative and competitive relationships among human gut microbes as those whose abundances were significantly related within an individual over time. Strikingly, in subsequent analyses performed using inferred genomic information, pairs with negative associations from the time series analysis were predicted to compete over more metabolic substrates, and pairs with positive associations had significantly more metabolic complementarity. These predictions regarding the underlying molecular bases of interactions could inform how nutritional environment will impact interactions between gut microbiome community members. INTRODUCTION Microbial communities play central ecological roles, ranging from environmental biogeochemical cycling to supplying integral homeostatic functions for their associated hosts across the animal, fungal, and plant kingdoms ( 1 – 3 ). In the human gut, ecosystem services provided by the microbiome include energy harvest, immune education, and pathogen exclusion, and are the result of a complex web of facilitative and antagonistic interactions between community members ( 4 , 5 ). Even though the composition of the human gut microbiome has been extensively characterized across health and disease states, the fundamental ecological relationships between most consortium members have yet to be described. Resolving ecological linkages between microorganisms is important for the construction of basic microecological frameworks of community assembly and stability. Key questions include: How prevalent are cooperative and competitive relationships between bacteria in the gut and how do these interactions shape community composition? Are there phylogenetic patterns in these relationships? What attributes influence two microbes’ propensity to cooperate or compete? Questions regarding how ecological interactions like competition are influenced by evolutionary history and relatedness can be traced to back to Darwin, where he outlined his Congeneric Competition Hypothesis, a model that predicts that closely related organisms are more likely to share traits and occupy the same ecological niche, thereby leading to competition ( 6 ). Since the human gut has high functional redundancy where many microbes have highly overlapping metabolic pathways, competition due to niche overlap between functionally similar microbes may be an important force in shaping the composition and resilience of gut communities ( 7 ). On the other hand, metabolic cooperation has been documented in the gut, such as for utilization of host-derived glycoproteins or B-vitamins salvage ( 8 , 9 ). Determining the importance and specific mechanisms of both competitive and cooperative relationships in the gut microbiome will be essential for devising strategies to promote healthy compositions. While canonical experimental approaches are still the gold standard for in depth investigation of microbe-microbe interactions, high-throughput sequencing has enabled the advancement of statistical methods for inferring ecological interactions that can then be tested in the lab. For the past half-century, co-occurrence has been a popular tool in attempts to predict macroecological interactions, and these techniques have been widely adopted in high- throughput surveys of microbial relative abundance ( 10 – 12 ). The popularity of co-occurrence analysis is understandable, as it is amenable to a variety of experimental designs and cross- sectional data, but co-occurrence is not a reliable proxy of a direct interaction because it can occur between microbes with similar environmental preferences ( 13 ). Co-occurrence analysis also fails to account for directional interactions and asymmetry like amensalism and commensalism. Some limitations of co-occurrence-based strategies for cross-sectional datasets can be improved by conducting time-lagged correlation of longitudinal observations. In time- lagged correlation, a positive relationship is denoted when the relative abundance of one species at time t correlates with an increase in relative abundance a second species at time t + 1 , and a negative (competitive) relationship is inferred from a correlation with the decrease in the relative abundance of the second species at time t + 1 ( 14 ). Time-lagged correlations account for directionality and asymmetry in putative interactions which allow for the modeling of amensal (0/-), commensal (0/+), and exploitative (+/-) relationships between bacteria. Although time-lagged correlation methods are promising, these methods have not been broadly applied to understand microbial relationships in different contexts, including in the context of microbiome disturbance, and have not been coupled with functional information available from databases of annotated genomes and microbial metabolic networks to make molecularly-informed hypotheses about the mechanistic basis of these relationships. To address this gap, we further developed time-lagged correlation methods and applied them to a dense longitudinal study of four Westerners traveling abroad in Bangladesh who experienced diarrhea. This observational study’s high temporal resolution and multi-week time span enabled this analytic strategy. We combine information about these associations with their imputed genomes, metabolic network analysis, and extreme gradient-boosted random forest classifiers to predict the basis of potential cooperative and competitive relationships among gut microbes. RESULTS Community-level trends and dynamics Our longitudinal dataset consists of four adult Westerners, three males and one female, who were traveling abroad in Southeast Asia for 2-4 weeks. All 4 individuals lived in Colorado, USA and self-sampled every stool that they produced for 2 weeks prior to and then during and after travel to Dhaka, Bangladesh. Subject F01 was in Dhaka for 1 week and then in Cambodia for an additional week before returning to Colorado, where she continued to collect samples for an additional 2 weeks. M01 and M02 were in Dhaka for 2 weeks before returning to Colorado; M03 was in Dhaka for 1 week before traveling to Israel for a day, Canada for 3 days, and then returning to Colorado. Each individual collected fecal samples at the time of each bowel movement by swabbing used toilet paper, and the samples were subjected to 16S rDNA targeted sequencing. While in Dhaka, Bangladesh all 4 individuals experienced diarrhea and/or vomiting from a likely food-borne pathogen. The timing of the illness and shared food sources suggested that F01, M02, and M03 all had a shared exposure. The onset of illness for M01 was delayed so may have been from a different exposure. The symptoms and severity of illness varied across the 4 individuals, for instance with F01 experiencing vomiting and relatively mild diarrhea and M01 having severe diarrhea, having collected 13 unique stool samples on the day of illness onset. Examination of the 16S rRNA amplicons did not reveal any compositional blooms of likely bacterial culprits, such as Enterobacteraceae (which would include Escherichia coli and Salmonella spp.), or Campylobacter . During this diarrheal episode, designated by the vertical red bars in Figure 1 , subjects M01, M02 and M03 experienced an abrupt decline in the richness of bacteria in the gut as assessed with the phylogenetic diversity (PD) ( 15 ) alpha diversity measure ( Figure 1A ). Despite this drop in PD there were no obvious signs of canonical low- diversity dysbiosis defined as low alpha diversity coupled with increased colonization of facultative anaerobic bacterial families such as Enterobacteriaceae or Lactobacillaceae ( 16 ). Subject F01 had no drop in alpha diversity. Despite a suspected shared exposure in 3 of the 4 individuals, there was no compositional convergence among individuals during illness or recovery, consistent with interpersonal variation being strong in human studies ( 17 ). However, the drop in PD did correspond with an increase in turnover within individuals, as measured by Weighted UniFrac distances to samples at the subsequent timepoints ( Figure 1D ). For the three males, alpha diversity and diarrheal severity, as estimated by the number of stool samples collected per day ( Figure 1B ), were predictive of community stability with decreased phylogenetic diversity being indicative of increased turnover ( p = 1 x 10 -31 , R 2 = 0.13 ) (Figure S1). All four subjects had Bacteroides -dominant microbiomes typical of industrial societies at the outset of their travels, and this enteric disturbance resulted in an enterotype switch for subject M03 wherein Bacteroides genera were largely displaced by Prevotella ( Figure 1C ) which is an enterotype canonically associated with agrarian societies or high fiber diets in Western individuals ( 18 ). As subject M03 underwent dramatic compositional shifts in his core microbiome which were incompatible with our filtering strategies, we excluded him from downstream analysis involving the identification of putative ecological interactions which we here-in refer to as associations. Download figure Open in new tab Figure 1: Fecal microbiome composition over time in 4 travelers who experienced diarrhea. Data is from one female (F01) and three males (M01, M02, M03) who experienced diarrhea while traveling in Bangladesh. The red line indicates the timing of the diarrheal episode. A) Phylogenetic Diversity (PD) over time in all 4 individuals. B) The number of stool samples collected per day, showing a peak in M01 and M03 with diarrhea. C) Taxonomic composition (family level) over time. D) Turnover expressed as the Weighted UniFrac distance between each timepoint and the subsequent time point. Identification of putative ecological interactions (associations) To identify associations between microbial relative abundances, we adapted the time- lagged correlation strategy described by Trosvik et al ( 14 ), and applied it to 97% identity (ID) Operational Taxonomic Units (OTUs). We chose to bin highly related Amplicon Sequence Variants (ASVs) selected with DADA2( 19 ) into 97% ID OTUs, because this threshold has typically been used in microbiome data analyses to bin sequences derived from closely related organisms and to approximate the species level ( 20 ). Binning to 97% ID OTUs reduced the sparsity of the data matrix and increased the number of features that were observed commonly across a given individual without impacting resolution since 16S rRNA cannot typically define organisms or impute functional information at the strain level. Within each individual, and for all pairs of 97% ID OTUs (i, j) that were observed in at least 90% of the samples for that individual, we determined OTU pairs for which the centralized log ratio (CLR)-transformed value of OTU j at time t correlated with the change in the CLR-transformed value of OTU i between time t and time t + 1 (see methods). Missing data was imputed, and samples merged such that there was exactly one sample value per day (detailed in methods). A CLR-transformation was applied since compositionality has been shown to impact time-lagged analyses ( 21 ) and the CLR- transformation has been shown to be effective at correcting for compositionality-driven artifacts in simulations of non-time lagged correlations detections between OTUs ( 12 ). A positive Spearman coefficient β: would indicate that higher levels of OTU j (relative to the mean) meant greater increases in OTU i in the next time point, indicating that OTU j may facilitate success of OTU i . A negative β would indicate an antagonistic or competitive effect of OTU j on i. To determine whether β was more positive or negative than expected by chance, we permuted the order of the timeseries by shuffling the time (columns) in the data matrix. This created null distributions where the time value was no longer meaningful while maintaining the relative abundances distributions across OTUs at each timepoint. We assessed statistical significance as βij values that had extreme absolute magnitudes as compared to their corresponding null distribution of βij values. Surprisingly, we observed a positive correlation between permuted and empirical Spearman coefficients for all βij (Figure S2). One indicator of the potential driver of this trend is that null Spearman coefficients for i = j interactions (i.e. comparing an OTU to itself) were all among the most negative. This is demonstrated in Figure S3, where we show this phenomenon for a single OTU and across all OTUs in all individuals. A negative time-lagged correlation for randomized i = j interactions suggests that this is a statistical artifact, where OTUs that are highly positively correlated with each other at the same time points, are negatively correlated in time lagged analysis. We believe that this is driven by “regression towards the mean”, where if one sample of a random variable is extreme, the next sampling of the same random variable is likely to be closer to its mean ( 22 ). These trends highlight the importance of using our permutation strategy for assessing statistical significance rather than a traditional p-value. Due to the extreme null attributes of these i = j outliers, we omitted them from consideration from our analysis. This methodology identified 78 positive and 140 negative associations out of ∼8,500 considered i←j pairs ( Figure 2C ). Seventy- five of the 78 positive associations were found in Male 01 who also had the longest and most densely sampled time series; the remaining three positive associations were found in Female 01. A complete list of associations with their taxonomic assignments and direction of change is provided in Supplemental Table 1. Download figure Open in new tab Figure 2: Phylogenetic and functional patterns across association types. A) Estimated rates of auxophore competition and metabolic complementarity by association type as calculated by NetCooperate. * p < 0.05, ** p < 0.01, *** p < 0.001, **** p < 0.0001. B) Estimated rates of auxophore competition and metabolic complementarity as a function of phylogenetic distance and association type. The curves for the positive associations are different from those of the negative and null associations (p < 0.05); there is no difference between the negative and null association curves. C) Relative fraction of positive (78) to negative (140) associations. We found that the positive associations were between bacteria that were significantly more phylogenetically distant than the negative and null i←j pairs and that the negative associations were between more closely related bacteria ( Figure 2A ). As bacterial metabolic niche space is roughly correlated with phylogeny ( 23 ), we hypothesized that these patterns in relatedness across association types were a reflection of metabolic competition and cooperation. To test this hypothesis, we used PICRUSt ( 24 ) to impute bacterial genomes and then mapped the available enzymatic reactions to KEGG ( 25 ) in order to generate a putative metabolic network for each 16S rDNA amplicon. We then used these metabolic networks as inputs for NetCooperate ( 26 ), which returns information about the relative metabolic potential between two bacteria’s respective metabolic networks. NetCooperate estimates metabolic complementarity as the fraction of auxophores ( 27 ) (essential substrates which cannot be prototrophically synthesized) required by OTU i which can be synthesized by OTU j . We expanded NetCooperate to measure metabolic niche overlap, which is defined as the fraction of OTU i ’s auxophore pool that is also required by OTU j . We identified higher potential for metabolic cooperation amongst the positive associations compared to both the negative and null associations (Kruskal Wallis test with Dunn’s post-hoc test; p < 0.01), but there were no differences between the negative and null groups ( Figure 2A ). We found substantial differences in auxophore competition between all association types with the negative associations having the highest metabolic niche overlap followed by the null, and positive associations ( Figure 2A ). Together, these observations indicate that negative associations are enriched for microbe pairs that are more phylogenetically related, have lower potential for cooperation and a high degree of niche overlap, and the inverse is true for positive associations. To test if these metabolic relationships were merely a function of phylogeny and niche space co-correlation, we modeled these measures as a function of phylogenetic distance with a statistical interaction with the corresponding association type (negative, null, and positive). We observed that when adjusting for phylogenetic distance, positive associations still had higher metabolic cooperative potential and lower niche overlap compared to the negative and null associations, however, there were no differences between the negative and null association’s curves ( Figure 2B ). To find specific metabolic signatures that could provide mechanistic clues for the basis of these potential ecological interactions, we searched through the compounds that NetCooperate deemed to be metabolically complementary or competitive for each pair of associated OTUs. We identified substrates that were required by both organisms and which were specifically enriched in negative associations but not in positive ones using a Z-score threshold of two. Using this approach, we found three different compounds: 1) glyoxylate, a two- carbon carboxylic acid, 2) myo-inositol, a carbocyclic acid, and 3) nicotinate, vitamin B3 ( Figure 3a ). We used a similar approach to find metabolically complementary compounds that were enriched in positive associations yielding seventeen different compounds. Broadly speaking, these 17 metabolites can be categorized as fatty-acid carriers, nucleoside/nucleotide derivatives, and carbohydrates ( Figure 3b ). Download figure Open in new tab Figure 3: Compounds identified using NetCooperate that were significantly enriched in Negative or Positive associations. A) Metabolically competitive auxophores that were enriched in negative but not positive interactions compared to pairs with no detected relationship using a Z-score threshold of 2. B) Metabolically complementary compounds enriched in positive but not negative interactions using a Z-score threshold of 2. Plots are colored blue if the compound is a substrate of KEGG Orthologies (KOs) determined to be important by a trained extreme gradient boosted random forest classifier (XGBoost) - see Figure 4 . Though we were able to detect significant differences between the positive and negative association types in phylogenetic distance, metabolic complementarity, and auxophore competition, logistic classifiers that used these univariate summary measures were incapable of distinguishing unlabeled associations ( Figure 4A ). We also tested trait similarity, by forming vectors of both enzymatic and non-enzymatic orthologs using assignments to the KEGG Orthology (KO) database that were made from each OTUs genome predictions by PICRUSt. We then calculated the Jaccard distance of the two trait vectors (a list of given KOs) for each OTU pair. We found that this raw trait similarity measure was not predictive of the association type when using a simple logistic regression β ij J ( i ⃗ , ⃗ j ) ( Figure 4A ). We hypothesized that since the NetCooperate analysis identified metabolites that differed between positive and negative association types, a high dimensional feature set that explicitly included trait data from both OTU i and j instead of a summary difference/similarity metric would provide more discriminatory power. To this end, we trained an extreme gradient-boosted random forest classifier ( 28 ) (XGBoost) to predict the association type using the concatenated trait vectors of OTUs i and j . Download figure Open in new tab Figure 4: Extreme Gradient-Boosted Random Forests (XGBoost) trained on concatenated KO presence/absence trait vectors can discriminate between positive and negative associations from time-lagged correlation. A) Receiver Operator Characteristic (ROC) curves demonstrating that Random Forest classifiers trained on high-dimensional trait vectors outperform logistic classifiers that use univariate summary measures. B) Informative traits as determined by permutation testing using BorutaShap. Features are colored by which OTU’s genome ( i or j ) the trait originated from. C) Trait concordance across informative traits found by XGBoost + BorutaShap. ( i 1 : j 1 and i 0 : j 0 ) - trait is present or absent in both organisms. ( i 1 : j 0 ) - Trait is present in i and absent in j . ( i 0 : j 1 ) - Trait is absent in i and present in j . In this case, the trait vector is a bit vector of 1’s and 0’s indicating presence or absence of PICRUSt-inferred KOs. For instance, for each inferred KO and each i←j pair of OTUs, we encoded (0,0) if absent in both, (1, 1) if present in both, and (0,1) or (1,0) if present in one and not the other. We found that an XGBoost classifier achieved ∼72% classification accuracy (Wilcoxon test; p < 0.001) as determined by 5-fold cross-validation, and the receiver operator characteristic (ROC) curves substantially outperformed the univariate measures ( Figure 4A ). We trained the XGBoost classifier using the estimated KO copy number as predicted by PICRUSt as well as simple presence/absence information for each KO, and we detected no differences in performance between the models. As a result, we used the KO presence/absence trait vectors as they were simpler to interpret. We afterwards used BorutaShap ( 29 ), which uses permutation to determine whether features have a higher importance score than chance expectation, at a threshold of 0.9 on the trained XGBoost model, to extract important KOs for classifying association type, and identified 26 PICRUSt inferred KOs with discriminative power ( Figure 4B ). Sixteen of these informative KOs were from OTU i’s genome while the other ten originated from OTU j ( Figure 4B ). Notably, 65% ( 11 ) of compounds that we identified as enriched in positive associations in the metabolic complementarity measures ( Figure 3B ) are also substrates of three enzymatic KOs that BorutaShap deemed to be important in the XGBoost classifier. The various enoyl-acyl carriers ( Figure 3B ) are potential substrates of enoyl- [acyl-carrier-protein] reductase II (K02371) while the nucleoside/nucleotide derivatives are substrates of the 5’-nucleotidase (K01081) or nucleoside-diphosphate kinase (K00940). There are four different states for every trait in a given β ij pair: the trait is present or absent in both organisms ( i 1 : j 1 and i 0 : j 0 ), or the trait is present in i but not j ( i 1 : j 0 ) or vice versa ( i 0 : j 1 ). We compared the relative frequencies of these relationships and found that trait discordance ( i 1 : j 0 and i 0 : j 1 ) was more common in positive associations than in negative ones (p < 0.05). We illustrate the general patterns of informative trait discordance, given in shades of green, and concordance, given in shades of brown, in Figure 4C . We next visualized the state space of the 26 informative KOs identified using XGBoost + BorutaShap by embedding each OTU involved in an association into a principal component analysis (PCA) space based on the Jaccard distances between their presence/absence data for the 26 KOs ( Figure 5A ). The first two principal components accounted for ∼62% of the variance; PC1 linearly separates two primary clusters: 1) a dispersed grouping of Bacteroidales ( Bacteroides , Parabacteroides, Prevotella, and Paraprevotella ) with Desulfovibrionales ( Bilophila ) and Burkholderiales ( Sutterella ) on the left and 2) various Clostridiales species, Coriobacteriales , and an assigned member of Bacteroidaceae on the right. We observed that the positive associations occurred predominantly between these two clusters, and negative associations were more common within clusters. We also illustrate the putative presence/absence of select traits (5’ nucleotidase, enoyl-acp-reductase II and a sodium/pantothenate transporter) that independently appeared in both the metabolic network analysis and the XGBoost + BorutaShap feature selection. Download figure Open in new tab Figure 5: OTU Principal Component Analysis (PCoA) space using KOs identified by XGBoost + BorutaShap. A) OTUs, given as nodes colored by taxonomic order, that are closer in PCoA space are more functionally similar. Negative (top row) and positive associations (bottom row) are depicted as arrows between nodes ( j → i ). Columns designate the observed associations for the three individuals. B) Gene family presence absence data for 5’-nucleotidase (K01081), enoyl acyl-carrier protein reductase II (K02371), and sodium/pantothenate symporter (K14392). Red indicates an absence of the gene family for a given OTU and blue is presence. DISCUSSION In this study, we applied time-lagged correlation to dense time series data collected from travelers who had diarrhea to predict taxa pairs that had positive or negative associations indicating potential cooperative/facilitative or competitive relationships, respectively. We then used PICRUSt predictions of genomic content, metabolic network modeling and random forest to identify inferred metabolites and genes that predicted these relationships, generating novel hypotheses regarding the underlying driving factors of microbe-microbe interactions in the human gut. This work has thus produced a rich collection of hypotheses regarding interactions between microbes that could be tested in the lab. Understanding specific facilitative and antagonistic relationships that influence the success of microbes in the gut will facilitate efforts to modify microbiome composition to promote health. In this time series, the four subjects were suspected to have had a foodborne exposure to an enteric pathogen. Our search through the 16S rRNA amplicons did not identify a plausible bacterial source, suggesting an alternate etiology, such as a viral pathogen. We observed a sharp drop in phylogenetic diversity at the time of enteric disturbance in Males 01, 02, and 03 ( Figure 1A ) but did not detect convergence to a common microbiome composition. This illness was accompanied by decreased community stability, and in the case of subject M03, a drastic enterotype conversion to a Prevotella -rich microbiome composition. Changes in diet and water source can have a profound impact on microbiome structure and function, and immigrants from Southeast Asia to the United States experience distinctive shifts from a Prevotella- dominant to Bacteroides -dominant composition ( 18 ). We hypothesize that Male 03’s diarrheal event depleted community richness and opened new gastrointestinal niches, and the change in diet and environmental exposures due to travel in Southeast Asia led to a concordant enterotype conversion. We also found that the community dynamics (Figure S1) were consistent with the theory that high microbiome diversity begets community stability (lower turnover), however, the strength of this relationship was specific to the individual and only accounted for 13% of the variance. Applying our time lagged correlation analysis to microbiomes that underwent disturbance and remodeling should in principle have increased the number and type of interactions observed since repeated measurements of a more stable microbiome would not be expected to contribute as many highly informative data points ( 30 ). Since microbiomes do typically vary somewhat over time due to factors such as normal dietary variation or exposures, further studies with time series data collected with and without disturbance would be needed to determine the importance of disturbance in identifying relationships. Co-occurrence has often been used to infer ecological interactions, and its use and interpretation have been a matter of debate for over a century ( 13 ). Analysis of co-occurrence and microbial recruitment patterns in human microbiomes has indicated that co-occurring bacteria tend to be more closely related than chance expectation, and that this phenomenon is likely a signature of environmental filtering rather than direct interactions ( 10 , 31 ). The time- lagged-correlation strategy used here was applied previously to time series data collected from 2 individuals ( 14 ). One notable difference in our study was that we used a permutation-based method to determine statistical significance that corrected for underlying bias in each βij microbe pair. Both we and Trosvik et al found that empirical βij where i = j, had a large negative Spearman coefficient, and Trosvik et al suggested that this was due to abundances of taxa over time being influenced by the carrying capacity of an environment for that taxon. However, our finding that the null βij distribution was more negative than empirical βij where i = j , suggests an underlying statistical artifact rather than a biological explanation. We instead hypothesize that this behavior an example of a “regression to the mean” which is a phenomenon where a randomly sampled observation with an extreme value is more likely to be followed by random sample with a value closer to the population mean. Our finding that | null βij | > | empirical βij | indicates that there is actually less autocorrelation of taxa with themselves than you would expect by chance, perhaps suggesting factors against species turnover within communities. The overall positive correlation of empirical βij with randomized βij values would derive from pairs of i and j showing regression to the mean effects at a strength related to the degree of correlation. Future studies employing time-lagged correlations should thus use our permutational strategy for assessing statistical significance to avoid false positives. Note that other published microbiome association methods such as Local Similarity Analysis (LSA) ( 32 ) have used permutation strategies that have been suggested to sometimes produce false positives ( 33 ). However, these prior analyses have permuted i and j independently of each other rather than preserving the OTU relative abundance distributions at each time as we do here (i.e. shuffling the order of each row in a feature by time matrix rather than order of the columns). However, other methods developed to compute a null distribution for correlation that do not destroy autocorrelation structure in the data have been developed and would be interesting to also apply, including the IAAFT (iterative amplitude-adjusted Fourier transform) method and the Twin method ( 34 ) and other parametric approaches ( 35 , 36 ). We used a correlation strategy as opposed to a regression or dynamic systems model, which can have certain advantages over correlation, including the ability to control for confounders, account for perturbation frameworks, use data from multiple individuals together while controlling for dependency (rather than stratifying by individual), and forecast future system behaviors( 30 ). Regression based strategies may also be influenced by the regression to the mean artifact described above and this should be investigated further. Although sophisticated generalized Lotka-Volterra (gLV) based models have been developed and shown to be effective, such as MDSINE( 30 ),TPG-CODA( 38 ), and compositional Lotka-Volterra (cLV) ( 21 ), these have only been used to model a small number of taxa and have analytical challenges to scale to whole human microbiomes( 30 ). The linear mixed model based approach of MTV-LMM has been run at scale but was developed to identify taxa whose temporal dynamics depend on whole community composition rather than the pairwise interactions between individual taxa ( 37 ). Other methods such as the sparse vector autoregression (sVAR) model ( 33 ), have been applied to large datasets and used to differentiate autoregressive OTUs whose abundance dynamics depend on community composition at previous timepoints, as well as non-autoregressive OTUS whose abundance was more related to changes in external factors such as diet. With the sVAR approach, time-lagged interactions between OTUs within the model could be estimated using autoregressive model coefficients equivalent to partial Granger coefficients ( 39 ), which could be a promising alternate approach to apply here. Interestingly, prior work that applied regression models to time series data have found that a considerable portion of the human gut microbiome have time-dependent relative abundance patterns that can be predicted by microbiome composition ( 30 , 37 ) and have recovered experimentally-supported relationships between microbes in smaller synthetic communities evaluated over time in gnotobiotic mice, such as an inhibitory effect of Clostridium scindens on Clostridioides difficile ( 30 , 40 ). We found a higher prevalence of negative associations between phylogenetically similar OTUs, suggesting an importance of competitive interactions in shaping gut microbiome composition over time, but we also observed a high number of positive associations indicating potential cooperative relationships. The ratio between cooperative and competitive relationships in ecological networks and the resulting impact on microbiome function, stability, and resilience is currently uncertain; mathematical frameworks have produced seemingly conflicting conclusions that both higher and lower ratios of competition:facilitation can reduce community resistance to ecological perturbation ( 4 , 41 ). Moreover, interaction strength and the interaction network topology are also conjectured to influence community-level attributes ( 41 , 42 ). In the mammalian gut, both competitive and cooperative relationships have been demonstrated to augment community resilience and stability, and the successful explanatory models will almost certainly need to incorporate contextual details like network topology, spatial architecture, and temporal variability ( 43 ). The significance for competition was further supported in our analyses by the lower levels of shared auxophores based on the predicted metabolic networks of positively associated pairs compared to the null and negative pairs. This result is consistent with experimental and mathematical models of the mammalian gut microbiome that have depicted an ecosystem with prevalent competition for limited resources ( 7 , 42 , 44 – 46 ). Our analyses also resulted in the detection of many positive associations that had higher predicted metabolic complementarity, supporting that these cooperative relationships also play an important role in community dynamics over time. This is consistent with prior studies that have linked high alpha diversity and CR with complex cooperative cross-feeding in the microbiome ( 5 , 47 , 48 ). Metabolic cooperation has been previously documented for substantial carbon sources in the gut, especially at the epithelial mucosa where microbes collectively hydrolyze large, branched, host-derived glycoproteins into extracellular “public goods”( 8 , 9 ). Sharma et al also observed a high prevalence of division of labor and cooperative micronutrient salvage within the gut ( 8 ). They noted that auxotrophy for major B-vitamins was common throughout the bacterial community and showed that there are essential, committed B-vitamin salvage pathways between commensal bacteria. Moreover, they demonstrated that loss of the cooperative relationships between various B-vitamin prototrophs and auxotrophs destabilized the overall microbiome composition, indicating that there are likely numerous other interaction- mediated influences on community structure and function. Our NetCooperate metabolic network analysis and XGBoost + BorutaShap model independently found three distinct classes of molecules whose predicted levels varied between positive and negative associations: 1) B vitamins pantothenate and nicotinate, 2) enoyl acyl- carrier protein (acp) reductase II (FabK ) and 3) nucleotide/nucleoside derivatives. NetCooperate highlighted mutual nicotinate auxotrophy in negative associations while XGBoost + BorutaShap identified dissimilarity ( i 1 :j 0 and i 0 :j 1 ) in 5’-nucleotidase (K01081), which is involved in the metabolism of nicotinate-nucleosides, as being enriched in positive associations. These findings suggest competition and metabolic niche partitioning regarding nicotinate (B3) synthesis. Magnúsdóttir et al conducted genomic queries for various B vitamin auxotrophies across gut commensals, and they observed that environmental nicotinate salvage was unique to members of Actinobacteria , Firmicutes , and a single Proteobacteria ( 49 ). We detected numerous negative associations among members of these phyla ( Figure 5B ), and we hypothesize that these relationships may be influenced by competition over extracellular B3 pools. Our feature selection approach also identified the sodium/pantothenate (B5) symporter as an informative feature: Specifically, when OTU i, the microbe being influenced by j , was predicted to have the pantothenate symporter, j was not ( i 1 :j 0 ) . Here, we hypothesize that j is likely a B5 prototroph as it does not have a transporter, and j donates B5 to i, which takes up the “public good” via the sodium/pantothenate symporter. In vitro B vitamin cross-feeding assays using Escherichia coli demonstrate that B5 prototrophs are indeed able to donate vitamins to co-cultured auxotrophs and strongly rescue growth in a B5-deficient medium; however, this group also noted high variance between differing donor strains which suggests additional mediating factors in cross- feeding efficiency ( 49 ). As both B5 and B3 are severely depleted in ulcerative colitis, ecological relationships governed by B vitamin dynamics may represent a therapeutic target for future clinical interventions ( 50 ). Enoyl acp reductase II (FabK) catalyzes the terminal step of fatty acid elongation and is widespread in Clostridiales members ( 51 ). We were unable to find any literature regarding enoyl acp reductase involvement in bacterial interactions; however, in vitro experiments on Δ fabK Δ fabI Enterococcus faecalis strains revealed that fatty-acid deficient bacteria can be cultured in the presence of exogenous fatty acids ( 52 ). Although our study produced very intriguing and interpretable results, we acknowledge weaknesses. One is that functional genes and metabolites were made based on 2 levels of inference 1) PICRUSt was used to infer genes present from 16S rRNA and 2) NetCooperate was used to infer metabolite auxophores based on these genes. PICRUSt does not predict genes that are subject to frequent horizontal gene transfer, and we may have missed interactions that are based on genes/metabolites that are subject to high strain level variation or that were among OTUs not well represented in genome databases. Further study with dense shotgun metagenomic sequence data would be beneficial, but given the large number of samples needed for this type of analysis, are often prohibitively expensive. Follow-up studies to verify gene presence/absence calls and targeted metabolomics to survey levels of metabolites predicted to be important could help further validate and refine predictions. Correlation-based inferences of interactions have been shown to perform poorly in certain contexts such as in predicting simulated interactions between viruses and bacteria ( 54 ), but as applied here, these inferences were able to identify positively and negatively interacting pairs with independently determined metabolic signatures consistent with cooperative and competitive interactions, respectively. In summation, we find that our methodology corrects for spurious associations by using permutation-based testing. The downstream metabolic network analysis coupled with interpretable machine-learning approaches identified distinct functional signals across positive and negative relationships and fostered the generation of targeted hypotheses about the mechanistic basis of the relationship. Our findings support that both competitive and cooperative relationships shape gut microbiome compositional dynamics over time. MATERIALS AND METHODS Sample Collection Fecal samples were collected at the time of each bowel movement by swabbing used toilet paper ( 55 ). Swabs were stored at room temperature during travel and at - 80 ο C upon return and prior to 16S rRNA targeted sequencing. The human volunteers gave consent under the University of Colorado, Boulder, IRB protocol 0409.13. Sample processing DNA was extracted using the MP Biomedicals Powersoil kit following the Earth Microbiome Project (EMP) protocol ( http://www.earthmicrobiome.org ) ( 56 ). Barcoded primers targeting the V4 region of 16S rRNA were used to PCR amplify the extracted bacterial DNA also using the EMP standard protocols. Quantification of PCR products was completed using PicoGreen (Invitrogen, Carlsbad, CA). The UltraClean PCR Clean-Up Kit (MoBio, Carlsbad, CA) was used to clean and pool equal amounts of DNA from each sample. Sequencing was conducted using a MiSeq personal sequencer (Illumina, San Diego, CA). Sequence Processing Samples were demultiplexed using QIIME 1 ( 57 ), trimmed to a sequence length of 94 base-pairs, and denoised via Dada2 with the recommended default settings for maximum number of N’s in the amplicon sequence (maxN = 0), truncation quality (truncQ = 2), phiX removal (rm.phix = TRUE), and the maximum number of expected errors in a read (maxEE = c(2,2)). The resulting amplicon sequence variants (ASVs) were then binned into 97% OTUs using UCLUST’s ( 58 ) closed-reference schema. Sequences were aligned using pyNAST ( 59 ) and the core_set_aligned.fasta.imputed reference alignment downloaded from greengenes.lbl.gov. Fasttree ( 60 ) was used to create a phylogenetic tree using the lanemask_in_1s_and_0s file from greengenes.lbl.gov to mask highly variable positions in the alignment. Samples were rarefied to 7500 sequences per sample. Taxonomic assignments were made using greengenes 13_8. Alpha diversity (PD) and beta diversity (Weighted UniFrac) were calculated using QIIME 1. Lag-Correlation We performed CLR transformations on relative abundance data for each of our samples to account for the compositional nature of microbiome data, which has been shown to have the potential to impact time-series analyses, particularly in cases such as this when absolute abundances of microbes may change over time due to a perturbation ( 21 , 30 , 61 ). We then removed OTUs which were not present in at least 90% of the timepoints for a given individual in order t o focus our analysis on the most prevalent taxa and reduce loss of power while correcting for multiple comparisons. Due to the high variability in composition and thus many of the OTUs were present in < 90% of timepoints, M03 was excluded from downstream analyses. Having equal spacing between timepoints is important in time-series data because the degree of change can be influenced by the length of time between samples( 61 , 62 ). To produce a dataset with only one sample per day, we first created a representative sample for days with multiple samples by calculating the median CLR abundance for each OTU. To infer abundances for days where no bowel movements occurred, we used scipy’s Piecewise Cubic Hermite Interpolating Polynomial (PCHIP) ( 63 ). We chose this method because it maintains abundances above zero, preserves monotonicity, and avoids overshooting in cases of non- smooth data, and is thus well-suited for microbiome data analysis ( 64 ), and it has been applied in time-series analyses of microbiome data previously ( 33 ). We performed lag-correlation analysis separately for each individual. For each pair of OTUs (i,j), we determined the Spearman rank-correlation coefficient between the CLR-transformed relative abundance of OTU j at time t and the change in the CLR-transformed relative abundance of OTU i between time t and time t + 1. The procedure for generating these coefficients is given as follows: Let M be a 2 x n matrix representing a time series of relative abundances of two taxa, i and j . The rows of M correspond to taxa , and th e columns represent the chronologically ordered time points t ∈ {1, 2, 3, … n }. The empirically observed Spearman coefficient is calculated as where A permutation function, σ, is a bijective function σ :{1,2 , … n }⃗ {1,2 ,… n } which reorders the column indices of M by uniquely mapping each input index to given output index. For example, consider a permutation σ ( t ) for t ∈{1 ,2 , 3 , 4 }: This means: Applying this function to i and j results in: Thus, the permuted Spearman coefficient is then defined as: where To determine whether calculated Spearman coefficients were more extreme than chance expectation, we compared them to a null distribution which was generated by permuting the time series and recalculating the corresponding Spearman coefficient 2 x 10 6 times. This method was preferred over traditional p-value calculation for Spearman correlation as applied in Trosvik et al ( 14 ) because of a strong positive correlation between true Spearman correlations and those calculated based on a null distribution (Figure S2). We used numpy v1.26.0 and scipy v1.11.2. We used the Benjamini-Hochberg FDR correction and filtered for associations with a corrected p-value < 0.1. All statistical calculations downstream of the permutation analysis were made using the R programming language v4.1.2 “Bird Hippy.” Declarative specifications for the analysis environment can be found at https://github.com/casey-martin/bangladesh_time_series . Genome Imputation (PICRUSt): We used PICRUSt 1’s pipeline as described in their tutorial ( https://picrust.github.io/picrust/tutorials/genome_prediction.html#genome-prediction-tutorial ) to normalize 16S rRNA copy numbers and impute the corresponding genome contributions for the 97% OTUs and selected Kegg Orthologs as the output trait ( 24 ). PICRUSt’s precalculated tables for genome estimation were accessed using the following links: 16S Copy Number Normalization: http://kronos.pharmacology.dal.ca/public_files/picrust/picrust_precalculated_v1.1.4/13_5/16S_13_5_precalculated.tab.gz GreenGenes 13.5 KEGG Ortholog Table: http://kronos.pharmacology.dal.ca/public_files/picrust/picrust_precalculated_v1.1.4/13_5/ko_13_5_precalculated.tab.gz Metabolic Network Analysis (NetCooperate) The enzymes from the PICRUSt genome predictions were mapped to the KEGG database ( 25 ) and approximate metabolic networks were constructed using the archived 2012 KEGG compound database. Each KO was mapped to its associated list of reactions, and then each reaction was split into a directed graph of reactants → products and reactants ← products if the reaction was labeled as bidirectional. In the case where there were multiple reactants or products, all compounds were joined with a directed edge. For example, the formula A + B → C would yield an edge list of (A, C), (B, C). All compounds were then unified into a graph, and subgraphs of size < 10 were removed, leaving only the dominant, contiguous metabolic network for consideration of metabolic overlap and complementarity. NetCooperate provided measures of metabolic complementarity (fraction of auxophores required by OTU i which can be synthesized by OTU j ) were used in downstream analyses. We also estimated metabolic niche overlap (defined as the fraction of OTU i ’s auxophore pool that is also required by OTU j , using auxophore lists output by NetCooperate and custom python and R scripts. XGBoost + BorutaShap and PCA visualization The full genome predictions produced by PICRUSt (both enzymatic and non-enzymatic KOs) were used to train the XGBoost Random Forest Classifiers( 28 ) which consisted of 500 estimators with a max depth of 5. Model performance was not sensitive to these parameters as determined by coarse parameter space search. We tested a range of estimators [500, 1000, 10,000] and max depth [5, 7, 10]. Statistically significant features were identified using BorutaShap’s ( 29 ) percentile cutoff of 0.9 over the course of 100 trials. Model performance was estimated using a k-fold cross validation scheme using k = 5. We visualized the state space of 26 informative KOs identified using XGBoost + BorutaShap by embedding each OTU involved in an association into a PCA space. For each OTU, the 26 KOs were encoded as a binary vector, with a one signifying trait presence and a zero signifying trait absence. DATA AVAILABILITY The datasets generated during the current study are available in the ENA repository, https://www.ebi.ac.uk/ena/browser/view/PRJEB69530 . Data processing and analysis scripts may be found at: https://github.com/casey-martin/bangladesh_time_series . CONFLICT OF INTEREST Rob Knight is a scientific advisory board member, and consultant for BiomeSense, Inc., has equity and receives income. He is a scientific advisory board member and has equity in GenCirq. He is a consultant for DayTwo, and receives income. He has equity in and acts as a consultant for Cybele. He is a co-founder of Biota, Inc., and has equity. He is a cofounder of Micronoma, and has equity and is a scientific advisory board member. The terms of these arrangements have been reviewed and approved by the University of California, San Diego in accordance with its conflict of interest policies. Download figure Open in new tab Figure S1: Microbiome stability, measured here as median turnover with the subsequent time point, modeled as a linear mixed effects model of PD + # of Bowel Movements + Individual Effects (p < 0.0001). Download figure Open in new tab Figure S2: Plot of empirical Spearman correlation R coefficients versus the mean permuted R coefficient, which is calculated based on a null distribution generated by independently permuting the order of the time series and recalculating the Spearman R coefficient 2 x 106 times. Each point represents values for a single OTU i, OTU j pair and all possible pairs are shown. The plot is faceted by whether the OTU pair was observed in F01, M01, or M02. Significant pairs, which are colored by whether the relationship was negative or positive, had an FDR corrected p-value < 0.1. Uncorrected p-values were calculated as the fraction of times that the observed R Coefficient was more extreme than then permuted R coefficients. Download figure Open in new tab Figure S3: Time-lagged correlations of OTUs with themselves are negative, even in randomized data, suggesting a systemic bias. (A): the observed (top) and randomized (bottom) relative abundance of a single OTU in F01 over time. (B) The 1 st derivative ((Δ it = it+1 – it ) where i is the OTU) as calculated from the observed (top) and randomized (bottom) data. (C) Plot of the 1 st Derivative of this OTU versus its relative abundance in both the observed (top) and randomized (bottom) data. (D) Plot of the data while comparing all OTUs to themselves in each of the 4 individuals (labeled F01, M01, M02, and M03) for the observed (top) and randomized data. Blue lines represent the trend lines for each individual OTU, showing an overall negative bias in both observed and randomized data. F01 has the OTU shown in Panels A, B and C in larger red symbols. Supplemental Table 1: List of interacting OTUs. A list of all significant interactions detected with time-lagged correlation including the greengenes OTU IDs of each interacting pair where OTU j (column A) impacts OTU i (column B), the p-value of the interaction (column C), whether the interaction was positive (facilitative) or negative (competitive) (column D), the taxonomic assignments of OTU i (column E) and OTU j (column F), the phylogenetic branch-length distance between OTU i and OTU j (column G), and the subject ID of the study participant in which the interaction was detected (column H). ACKNOWLEDGEMENTS We would like to thank Luke Ursell for his help in conceptualizing the study and editing, Manuel Lladser for help with mathematical notations and Abigail Armstrong, Laurie Lyon, John Sterrett, and Jack Darcy for the many helpful conversations which ultimately guided the direction of this work. Footnotes Competing Interest: Rob Knight is a scientific advisory board member, and consultant for BiomeSense, Inc., has equity and receives income. He is a scientific advisory board member and has equity in GenCirq. He is a consultant for DayTwo, and receives income. He has equity in and acts as a consultant for Cybele. He is a co-founder of Biota, Inc., and has equity. He is a cofounder of Micronoma, and has equity and is a scientific advisory board member. The terms of these arrangements have been reviewed and approved by the University of California, San Diego in accordance with its conflict of interest policies. REFERENCES 1. ↵ Mohajeri MH , Brummer RJM , Rastall RA , Weersma RK , Harmsen HJM , Faas M , Eggersdorfer M . 2018 . The role of the microbiome for human health: from basic science to clinical applications . Suppl 1 . Eur J Nutr 57:1–14. 2. Valdes AM , Walter J , Segal E , Spector TD . 2018 . Role of the gut microbiota in nutrition and health . BMJ 361 : k2179 . OpenUrl FREE Full Text 3. ↵ Trivedi P , Leach JE , Tringe SG , Sa T , Singh BK . 2020 . Plant–microbiome interactions: from community assembly to plant health. 11 . Nat Rev Microbiol 18 :607–621. 4. ↵ Coyte KZ , Schluter J , Foster KR . 2015 . The ecology of the microbiome: Networks, competition, and stability. 6261 . Science 350 :663–666. 5. ↵ Oña L , Kost C . 2022 . Cooperation increases robustness to ecological disturbance in microbial cross- feeding networks. 6 . Ecology Letters 25 :1410–1420. 6. ↵ Darwin C . 2003 . The origin of species: by means of natural selection of The preservation of favored races in the struggle for life150th anniversary editien. Signet classics , New York . 7. ↵ Fujita H , Ushio M , Suzuki K , Abe MS , Yamamichi M , Okazaki Y , Canarini A , Hayashi I , Fukushima K , Fukuda S , Kiers ET , Toju H . 2023 . Metagenomic analysis of ecological niche overlap and community collapse in microbiome dynamics . Front Microbiol 14 : 1261137 . OpenUrl CrossRef PubMed 8. ↵ Sharma V , Rodionov DA , Leyn SA , Tran D , Iablokov SN , Ding H , Peterson DA , Osterman AL , Peterson SN . 2019 . B-Vitamin Sharing Promotes Stability of Gut Microbial Communities . Frontiers in Microbiology 10 . 9. ↵ Smith P , Schuster M . 2019 . Public goods and cheating in microbes. 11 . Current Biology 29 :R442– R447. 10. ↵ Faust K , Sathirapongsasuti JF , Izard J , Segata N , Gevers D , Raes J , Huttenhower C . 2012 . Microbial Co-occurrence Relationships in the Human Microbiome. 7 . PLOS Computational Biology 8:e1002606. 11. ↵ Lozupone CA , Stombaugh J , Gonzalez A , Ackermann G , Wendel D , Vázquez-Baeza Y , Jansson JK , Gordon JI , Knight R . 2013 . Meta-analyses of studies of the human microbiota . Genome Res 23 : 1704 – 1714 . OpenUrl Abstract / FREE Full Text 12. ↵ Friedman J , Alm EJ . 2012 . Inferring Correlation Networks from Genomic Survey Data . PLOS Computational Biology 8 : e1002687 . OpenUrl CrossRef 13. ↵ Blanchet FG , Cazelles K , Gravel D . 2020 . Co-occurrence is not evidence of ecological interactions. 7 . Ecology Letters 23 :1050–1063. 14. ↵ Trosvik P , de Muinck EJ. 2015 . Ecology of bacteria in the human gastrointestinal tract—identification of keystone and foundation taxa . 1 . Microbiome 3:44. 15. ↵ Faith DP . 1992 . Conservation evaluation and phylogenetic diversity. 1 . Biological Conservation 61 :1–10. 16. ↵ Kriss M , Hazleton KZ , Nusbacher NM , Martin CG , Lozupone CA . 2018 . Low diversity gut microbiota dysbiosis: drivers, functional implications and recovery . Curr Opin Microbiol 44 : 34 – 40 . OpenUrl CrossRef PubMed 17. ↵ Ursell LK , Clemente JC , Rideout JR , Gevers D , Caporaso JG , Knight R . 2012 . The interpersonal and intrapersonal diversity of human-associated microbiota in key body sites. 5 . J Allergy Clin Immunol 129 :1204–1208. 18. ↵ Vangay P , Johnson AJ , Ward TL , Al-Ghalith GA , Shields-Cutler RR , Hillmann BM , Lucas SK , Beura LK , Thompson EA , Till LM , Batres R , Paw B , Pergament SL , Saenyakul P , Xiong M , Kim AD , Kim G , Masopust D , Martens EC , Angkurawaranon C , McGready R , Kashyap PC , Culhane-Pera KA , Knights D . 2018 . U.S. immigration westernizes the human gut microbiome. 4 . Cell 175 :962-972.e10. 19. ↵ Callahan BJ , McMurdie PJ , Rosen MJ , Han AW , Johnson AJA , Holmes SP . 2016 . DADA2: High- resolution sample inference from Illumina amplicon data . Nat Methods 13 : 581 – 583 . OpenUrl CrossRef PubMed 20. ↵ Stackebrandt E , Goebel BM . 1994 . Taxonomic Note: A Place for DNA-DNA Reassociation and 16S rRNA Sequence Analysis in the Present Species Definition in Bacteriology . International Journal of Systematic and Evolutionary Microbiology 44 : 846 – 849 . OpenUrl CrossRef 21. ↵ Joseph TA , Shenhav L , Xavier JB , Halperin E , Pe’er I . 2020 . Compositional Lotka-Volterra describes microbial dynamics in the simplex . PLoS Comput Biol 16 : e1007917 . OpenUrl CrossRef PubMed 22. ↵ Stigler SM . 1997 . Regression towards the mean, historically considered . Stat Methods Med Res 6 : 103 – 114 . OpenUrl CrossRef PubMed 23. ↵ Fahimipour AK , Gross T . 2020 . Mapping the bacterial metabolic niche space. 1 . Nat Commun 11 :4887. 24. ↵ Langille MGI , Zaneveld J , Caporaso JG , McDonald D , Knights D , Reyes JA , Clemente JC , Burkepile DE , Vega Thurber RL , Knight R , Beiko RG , Huttenhower C . 2013 . Predictive functional profiling of microbial communities using 16S rRNA marker gene sequences. 9 . Nat Biotechnol 31 :814–821. 25. ↵ Kanehisa M , Goto S . 2000 . KEGG: kyoto encyclopedia of genes and genomes. 1 . Nucleic Acids Res 28 :27–30. 26. ↵ Levy R , Carr R , Kreimer A , Freilich S , Borenstein E . 2015 . NetCooperate: a network-based tool for inferring host-microbe and microbe-microbe cooperation. 1 . BMC Bioinformatics 16 :164. 27. ↵ Johnson WM , Alexander H , Bier RL , Miller DR , Muscarella ME , Pitz KJ , Smith H . 2020 . Auxotrophic interactions: a stabilizing attribute of aquatic microbial communities? 11 . FEMS Microbiology Ecology 96 :fiaa115. 28. ↵ Chen T , Guestrin C . 2016 . XGBoost: A Scalable Tree Boosting System , p. 785 – 794 . In Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. ACM, San Francisco California USA . 29. ↵ Keany E. 2020 . BorutaShap : A wrapper feature selection method which combines the Boruta feature selection algorithm with Shapley values. (1.1). Zenodo . 30. ↵ Bucci V , Tzen B , Li N , Simmons M , Tanoue T , Bogart E , Deng L , Yeliseyev V , Delaney ML , Liu Q , Olle B , Stein RR , Honda K , Bry L , Gerber GK . 2016 . MDSINE: Microbial Dynamical Systems INference Engine for microbiome time-series analyses . Genome Biol 17 : 121 . OpenUrl CrossRef PubMed 31. ↵ Darcy JL , Washburne AD , Robeson MS , Prest T , Schmidt SK , Lozupone CA . 2020 . A phylogenetic model for the recruitment of species into microbial communities and application to studies of the human microbiome. 6 . ISME J 14 :1359–1368. 32. ↵ Ruan Q , Dutta D , Schwalbach MS , Steele JA , Fuhrman JA , Sun F . 2006 . Local similarity analysis reveals unique associations among marine bacterioplankton species and environmental factors . Bioinformatics 22 : 2532 – 2538 . OpenUrl CrossRef PubMed Web of Science 33. ↵ Yuan AE , Shou W . 2022 . Data-driven causal analysis of observational biological time series . Elife 11 : e72518 . OpenUrl CrossRef PubMed 34. ↵ Lancaster G , Iatsenko D , Pidde A , Ticcinelli V , Stefanovska A . 2018 . Surrogate data for hypothesis testing of physical systems . Physics Reports 748 : 1 – 60 . OpenUrl CrossRef 35. ↵ Clifford P , Richardson S , Hemon D . 1989 . Assessing the Significance of the Correlation between Two Spatial Processes . Biometrics 45 : 123 . OpenUrl CrossRef PubMed Web of Science 36. ↵ Dutilleul P , Clifford P , Richardson S , Hemon D . 1993 . Modifying the t Test for Assessing the Correlation Between Two Spatial Processes . Biometrics 49 : 305 . OpenUrl CrossRef PubMed Web of Science 37. ↵ Shenhav L , Furman O , Briscoe L , Thompson M , Silverman JD , Mizrahi I , Halperin E . 2019 . Modeling the temporal dynamics of the gut microbial community in adults and infants . PLoS Comput Biol 15 : e1006960 . OpenUrl CrossRef PubMed 38. ↵ Äijö T , Müller CL , Bonneau R . 2018 . Temporal probabilistic modeling of bacterial compositions derived from 16S rRNA sequencing . Bioinformatics 34 : 372 – 380 . OpenUrl CrossRef PubMed 39. ↵ Gibbons SM , Kearney SM , Smillie CS , Alm EJ . 2017 . Two dynamic regimes in the human gut microbiome . PLoS Comput Biol 13 : e1005364 . OpenUrl CrossRef PubMed 40. ↵ Buffie CG , Bucci V , Stein RR , McKenney PT , Ling L , Gobourne A , No D , Liu H , Kinnebrew M , Viale A , Littmann E , van den Brink MRM, Jenq RR, Taur Y, Sander C, Cross JR, Toussaint NC, Xavier JB, Pamer EG. 2015 . Precision microbiome reconstitution restores bile acid mediated resistance to Clostridium difficile . Nature 517 : 205 – 208 . OpenUrl CrossRef PubMed Web of Science 41. ↵ Ratzke C , Barrere J , Gore J . 2020 . Strength of species interactions determines biodiversity and stability in microbial communities. 3 . Nat Ecol Evol 4 :376–383. 42. ↵ Weiss AS , Burrichter AG , Durai Raj AC , von Strempel A , Meng C , Kleigrewe K , Münch PC , Rössler L , Huber C , Eisenreich W , Jochum LM , Göing S , Jung K , Lincetto C , Hübner J , Marinos G , Zimmermann J , Kaleta C , Sanchez A , Stecher B . 2022 . In vitro interaction network of a synthetic gut bacterial community. 4 . ISME J 16 :1095–1109. 43. ↵ Earle KA , Billings G , Sigal M , Lichtman JS , Hansson GC , Elias JE , Amieva MR , Huang KC , Sonnenburg JL . 2015 . Quantitative Imaging of Gut Microbiota Spatial Organization. 4 . Cell Host & Microbe 18 :478–488. 44. ↵ Shepherd ES , DeLoache WC , Pruss KM , Whitaker WR , Sonnenburg JL . 2018 . An exclusive metabolic niche enables strain engraftment in the gut microbiota. 7705 . Nature 557 :434–438. 45. Kearney SM , Gibbons SM , Erdman SE , Alm EJ . 2018 . Orthogonal Dietary Niche Enables Reversible Engraftment of a Gut Bacterial Commensal . 7. Cell Rep 24 :1842–1851. 46. ↵ Palmer JD , Foster KR . 2022 . Bacterial species rarely work together. 6593 . Science 376 :581–582. 47. ↵ Ze X , Duncan SH , Louis P , Flint HJ . 2012 . Ruminococcus bromii is a keystone species for the degradation of resistant starch in the human colon. 8 . ISME J 6 :1535–1543. 48. ↵ McNally CP , Borenstein E . 2018 . Metabolic model-based analysis of the emergence of bacterial cross- feeding via extensive gene loss. 1 . BMC Systems Biology 12 :69. 49. ↵ Magnúsdóttir S , Ravcheev D , De Crécy-Lagard V , Thiele I. 2015 . Systematic genome assessment of B-vitamin biosynthesis suggests co-operation among gut microbes . Front Genet 6 . 50. ↵ Roediger WEW . 2019 . Causation of human ulcerative colitis: A lead from an animal model that mirrors human disease. 4 . JGH Open 3 :277–280. 51. ↵ Radka CD , Frank MW , Rock CO , Yao J . 2020 . Fatty acid activation and utilization by Alistipes finegoldii, a representative Bacteroidetes resident of the human gut microbiome. 4 . Mol Microbiol 113 :807–825. 52. ↵ Zhu L , Zou Q , Cao X , Cronan JE . 2019 . Enterococcus faecalis Encodes an Atypical Auxiliary Acyl Carrier Protein Required for Efficient Regulation of Fatty Acid Synthesis by Exogenous Fatty Acids. 3 . mBio 10 :e00577-19. 53. Quinn TP , Erb I , Gloor G , Notredame C , Richardson MF , Crowley TM . 2019 . A field guide for the compositional analysis of any-omics data . Gigascience 8 :giz107. 54. ↵ Coenen AR , Weitz JS . 2018 . Limitations of Correlation-Based Inference in Complex Virus-Microbe Communities . mSystems 3 : e00084 – 18 . OpenUrl CrossRef PubMed 55. ↵ McDonald D , Hyde E , Debelius JW , Morton JT , Gonzalez A , Ackermann G , Aksenov AA , Behsaz B , Brennan C , Chen Y , DeRight Goldasich L , Dorrestein PC , Dunn RR , Fahimipour AK , Gaffney J , Gilbert JA , Gogul G , Green JL , Hugenholtz P , Humphrey G , Huttenhower C , Jackson MA , Janssen S , Jeste DV , Jiang L , Kelley ST , Knights D , Kosciolek T , Ladau J , Leach J , Marotz C , Meleshko D , Melnik AV , Metcalf JL , Mohimani H , Montassier E , Navas-Molina J , Nguyen TT , Peddada S , Pevzner P , Pollard KS , Rahnavard G , Robbins-Pianka A , Sangwan N , Shorenstein J , Smarr L , Song SJ , Spector T , Swafford AD , Thackray VG , Thompson LR , Tripathi A , Vázquez-Baeza Y , Vrbanac A , Wischmeyer P , Wolfe E , Zhu Q , The American Gut Consortium , Knight R , Mann AE , Amir A , Frazier A , Martino C , Lebrilla C , Lozupone C , Lewis CM , Raison C , Zhang C , Lauber CL , Warinner C , Lowry CA , Callewaert C , Bloss C , Willner D , Galzerani DD , Gonzalez DJ , Mills DA , Chopra D , Gevers D , Berg-Lyons D , Sears DD , Wendel D , Lovelace E , Pierce E , TerAvest E , Bolyen E , Bushman FD , Wu GD , Church GM , Saxe G , Holscher HD , Ugrina I , German JB , Caporaso JG , Wozniak JM , Kerr J , Ravel J , Lewis JD , Suchodolski JS , Jansson JK , Hampton-Marcell JT , Bobe J , Raes J , Chase JH , Eisen JA , Monk J , Clemente JC , Petrosino J , Goodrich J , Gauglitz J , Jacobs J , Zengler K , Swanson KS , Lewis K , Mayer K , Bittinger K , Dillon L , Zaramela LS , Schriml LM , Dominguez-Bello MG , Jankowska MM , Blaser M , Pirrung M , Minson M , Kurisu M , Ajami N , Gottel NR , Chia N , Fierer N , White O , Cani PD , Gajer P , Strandwitz P , Kashyap P , Dutton R , Park RS , Xavier RJ , Mills RH , Krajmalnik-Brown R , Ley R , Owens SM , Klemmer S , Matamoros S , Mirarab S , Moorman S , Holmes S , Schwartz T , Eshoo-Anton TW , Vigers T , Pandey V , Treuren WV , Fang X , Zech Xu Z , Jarmusch A , Geier J , Reeve N , Silva R , Kopylova E , Nguyen D , Sanders K , Salido Benitez RA , Heale AC , Abramson M , Waldispühl J , Butyaev A , Drogaris C , Nazarova E , Ball M , Gunderson B. 2018 . American Gut: an Open Platform for Citizen Science Microbiome Research. 3 . mSystems 3 :e00031-18. 56. ↵ Thompson LR , Sanders JG , McDonald D , Amir A , Ladau J , Locey KJ , Prill RJ , Tripathi A , Gibbons SM , Ackermann G , Navas-Molina JA , Janssen S , Kopylova E , Vázquez-Baeza Y , González A , Morton JT , Mirarab S , Zech Xu Z , Jiang L , Haroon MF , Kanbar J , Zhu Q , Jin Song S , Kosciolek T , Bokulich NA , Lefler J , Brislawn CJ , Humphrey G , Owens SM , Hampton-Marcell J , Berg-Lyons D , McKenzie V , Fierer N , Fuhrman JA , Clauset A , Stevens RL , Shade A , Pollard KS , Goodwin KD , Jansson JK , Gilbert JA , Knight R . 2017 . A communal catalogue reveals Earth’s multiscale microbial diversity. 7681 . Nature 551 :457–463. 57. ↵ Caporaso JG , Kuczynski J , Stombaugh J , Bittinger K , Bushman FD , Costello EK , Fierer N , Peña AG , Goodrich JK , Gordon JI , Huttley GA , Kelley ST , Knights D , Koenig JE , Ley RE , Lozupone CA , McDonald D , Muegge BD , Pirrung M , Reeder J , Sevinsky JR , Turnbaugh PJ , Walters WA , Widmann J , Yatsunenko T , Zaneveld J , Knight R . 2010 . QIIME allows analysis of high-throughput community sequencing data. 5 . Nat Methods 7 :335–336. 58. ↵ Edgar RC . 2010 . Search and clustering orders of magnitude faster than BLAST. 19 . Bioinformatics 26 :2460–2461. 59. ↵ Caporaso JG , Bittinger K , Bushman FD , DeSantis TZ , Andersen GL , Knight R . 2010 . PyNAST: a flexible tool for aligning sequences to a template alignment. 2 . Bioinformatics 26 :266–267. 60. ↵ Price MN , Dehal PS , Arkin AP . 2009 . FastTree: computing large minimum evolution trees with profiles instead of a distance matrix. 7 . Mol Biol Evol 26 :1641–1650. 61. ↵ Silverman JD , Shenhav L , Halperin E , Mukherjee S , David LA . 2018 . Statistical Considerations in the Design and Analysis of Longitudinal Microbiome Studies doi: 10.1101/448332 . OpenUrl Abstract / FREE Full Text 62. ↵ Diggle PJ , Heagerty P , Liang K , Zeger S . 2002 . Analysis of Longitudinal Data . OUP Oxford . 63. ↵ Virtanen P , Gommers R , Oliphant TE , Haberland M , Reddy T , Cournapeau D , Burovski E , Peterson P , Weckesser W , Bright J , van der Walt SJ , Brett M , Wilson J , Millman KJ , Mayorov N , Nelson ARJ , Jones E , Kern R , Larson E , Carey CJ , Polat İ, Feng Y, Moore EW, VanderPlas J, Laxalde D, Perktold J, Cimrman R, Henriksen I, Quintero EA, Harris CR, Archibald AM, Ribeiro AH, Pedregosa F, van Mulbregt P. 2020 . SciPy 1.0: fundamental algorithms for scientific computing in Python. 3 . Nat Methods 17 :261–272. 64. ↵ Karwowska Z , Szczerbiak P , Kosciolek T . 2023 . Microbiome time series data reveal predictable patterns of change doi: 10.1101/2023.06.08.544023 . OpenUrl Abstract / FREE Full Text View the discussion thread. Back to top Previous Next Posted February 27, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Ecological relationships between human gut bacteria predicted from analysis of dense microbiome time series data from US travelers in Bangladesh Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Ecological relationships between human gut bacteria predicted from analysis of dense microbiome time series data from US travelers in Bangladesh Casey G. Martin , Laurie M. Lyon , Antonio Gonzalez , Rob Knight , Catherine Lozupone bioRxiv 2025.02.27.639550; doi: https://doi.org/10.1101/2025.02.27.639550 Share This Article: Copy Citation Tools Ecological relationships between human gut bacteria predicted from analysis of dense microbiome time series data from US travelers in Bangladesh Casey G. Martin , Laurie M. Lyon , Antonio Gonzalez , Rob Knight , Catherine Lozupone bioRxiv 2025.02.27.639550; doi: https://doi.org/10.1101/2025.02.27.639550 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Microbiology Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17691) Bioengineering (13892) Bioinformatics (41937) Biophysics (21452) Cancer Biology (18588) Cell Biology (25504) Clinical Trials (138) Developmental Biology (13378) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24320) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88605) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15156) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-05-24T02:00:01.246996+00:00
License: CC-BY-4.0