Phylogenetic and functional diversity amongDrosophila-associated metagenome-assembled genomes

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 76,785 characters · extracted from preprint-html · click to expand
Phylogenetic and functional diversity among Drosophila-associated metagenome-assembled genomes | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Phylogenetic and functional diversity among Drosophila -associated metagenome-assembled genomes View ORCID Profile Aaron A. Comeault , Alberto H. Orta , David Fidler , Tobias Nunn , Amy R. Ellison , Tayte A. Anspach , View ORCID Profile Daniel R. Matute doi: https://doi.org/10.1101/2024.12.19.629488 Aaron A. Comeault 1 School of Environmental and Natural Sciences, Molecular Ecology & Evolution Group, Prifysgol Bangor University , Bangor, LL57 2DG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Aaron A. Comeault For correspondence: a.comeault{at}bangor.ac.uk Alberto H. Orta 1 School of Environmental and Natural Sciences, Molecular Ecology & Evolution Group, Prifysgol Bangor University , Bangor, LL57 2DG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site David Fidler 1 School of Environmental and Natural Sciences, Molecular Ecology & Evolution Group, Prifysgol Bangor University , Bangor, LL57 2DG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tobias Nunn 1 School of Environmental and Natural Sciences, Molecular Ecology & Evolution Group, Prifysgol Bangor University , Bangor, LL57 2DG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Amy R. Ellison 1 School of Environmental and Natural Sciences, Molecular Ecology & Evolution Group, Prifysgol Bangor University , Bangor, LL57 2DG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tayte A. Anspach 2 Department of Biology, University of North Carolina , Chapel Hill, North Carolina, 27599, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Daniel R. Matute 2 Department of Biology, University of North Carolina , Chapel Hill, North Carolina, 27599, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Daniel R. Matute Abstract Full Text Info/History Metrics Preview PDF Abstract Host-associated microbial communities can mediate interactions between their hosts and biotic and abiotic environments. While much work has been done to document how microbiomes vary across species and environments, much less is known about the functional consequences of this variation. Here, we test for functional variation among drosophilid-associated bacteria by conducting Oxford Nanopore long-read sequencing and generating metagenome-assembled genomes (MAGs) from six species of drosophilid fly collected in association with ‘anthropogenic’ environments in North America, Europe, and Africa. Using phylogenetic analyses, we find that drosophilid flies harbor a diverse microbiome that includes core members closely related to the genera Gilliamella , Orbus , Entomomonas , Dysgonomonas , and others. Comparisons with publicly available bacterial genomes show that many of these genera are associated with phylogenetically diverse insect gut microbiomes. Using functional annotations and predicted secondary metabolite biosynthetic gene clusters, we show that MAGs belonging to different bacterial orders and genera vary in gene content and predicted functions including metabolic capacity and how they respond to environmental stressors. Our results provide evidence that wild drosophilid flies harbor phylogenetically and functionally diverse microbial communities. These findings highlight a need to quantify the abundance and function of insect-associated bacteria from the genera Gilliamella , Orbus , Entomomonas , and others on the performance of their insect hosts across diverse environments. Introduction Host-associated microbial communities (microbiomes) affect the biology of their hosts and how those hosts interact with the environment in diverse ways. Despite the clear effects that microorganisms can have on performance and fitness of their hosts, many studies of wild populations of non-model microbiomes have characterized variation in the microbiome using 16S rRNA gene community profiling ( 1 ). While 16S rRNA gene community profiling has provided important insight into how biotic and abiotic variation can affect microbiome composition ( 2 ), we know less about how specific microorganisms—across diverse hosts and environments—affect functional or performance traits of the microbiome and host species. Estimating functional traits of specific microorganism is one way that we can begin to generate a mechanistic understanding of how members of microbial communities affect host performance. Insects—the most diverse group of animals—provide valuable ecosystem services (e.g. pollination and nutrient cycling) ( 3 ), are important agricultural pests, and vector a range of diseases, some with economic impacts of billions of US dollars per year ( 4 ). Insect- associated microorganisms can affect these processes by detoxifying environmental and dietary toxins ( 5 ), moderating vectoral capacity ( 6 ), conferring resistance to infection of their hosts ( 7 , 8 ), and even contributing to successful biological invasions ( 9 ). Among insect, fruit flies from the genus Drosophila (particularly Drosophila melanogaster ) have emerged as a model system used to study host-microbe interactions ( 10 – 12 ). Research on host-microbiome interactions in D. melanogaster has largely focused on the effects of interactions with bacteria from the genera Acetobacter and Lactobacillus . For example, Acetobacter have been shown to promote larval development when larvae are raised on deficient diets, suggesting links between bacteria and host nutrition ( 13 ); mate choice in adult D. melanogaster can be influenced by Lactobacillus acquired through diet ( 14 ); and experimental manipulation of Acetobacter and Lactobacillus together can lead to rapid evolution in D. melanogaster , indicating that these bacteria are a source of selection that can contribute to local adaptation ( 15 , 16 ). However, the microbial communities of wild versus lab-reared Drosophila differ significantly, and Acetobacter and Lactobacillus can be rare in wild Drosophila ( 10 , 17 ). Abundances of these genera have also been shown to vary across laboratory strains of 11 Drosophila species ( 18 ). The few studies that estimate microbial diversity in wild Drosophila have shown that they harbor a more diverse microbiome than laboratory populations ( 10 , 19 ). Common members of wild Drosophila microbiomes include bacteria from the orders Bacteriodales and Pseudomonadales, and families Orbaceae, Enterobacteriaceae, and Enterococcaceae ( 10 , 19 , 20 ). Therefore, while laboratory experiments in D. melanogaster provide evidence that symbiotic or commensal bacteria can affect diverse aspects of their host’s biology, differences between the bacteria that are the focus of laboratory studies and those reported in wild Drosophila highlight a need to better understand assembly rules and functional roles of members of wild Drosophila microbiomes. Despite significant variation among the microbiomes of Drosophila species and individuals, the composition of wild Drosophila microbiomes suggests that these communities are not randomly assembled from a common pool of microorganisms, and patterns of co-occurrence suggest negative interactions between certain bacterial taxa ( 19 ). Recent metagenomic analyses also provide correlative evidence for functional variation among the gut bacteria associated with three mushroom-feeding Drosophila species ( 20 ). However, we still have a poor understanding of the functional traits possessed by specific Drosophila -associated bacteria. Quantifying functional variation among Drosophila -associated bacteria would facilitate clearer hypotheses and predictions of the impacts of host-associated bacteria on host performance and fitness. It would also allow researchers to leverage the ecological and evolutionary diversity found among species of Drosophila ( 21 – 24 ) to better understand the processes and mechanisms that shape host-microbiome interactions in the wild. The gut microbiome of honeybees (genus Apis ) is arguably the best functionally characterized insect microbiome ( 25 , 26 ). For example, functional work on Gilliamella and Snodgrassella —core members of the bee microbiome—has shown how these bacteria possess complementary metabolic pathways and genes that affect colonization of the gut ( 27 ). Gilliamella apicola is also capable of metabolizing toxic sugars ( 28 ), and screens for biosynthetic gene clusters possessed by G. apis have identified unique ribosomally synthesized, post-translationally modified peptides (RiPPs) that protect A. mellifera from infection by the pathogen Melissococcus plutonius ( 7 ). These examples illustrate how identifying core microbiome taxa, along with using ‘bottom-up’ genome annotation approaches to predict their functional capacities, can be important first-steps towards identifying genes that mediate microbe-microbe interactions and effect host fitness. In this study we mine whole-organism long-read Oxford Nanopore Technology (ONT) sequences to study phylogenetic and functional diversity among bacteria associated with six species of drosophilid fly. Four of the six host species we study— D. immigrans , D. hydei , D. repleta , and Zaprionus indianus —are geographically widespread and found in association with “anthropogenic” environments (sometimes referred to as “human commensals”), such as agricultural fields and compost heaps. The other two species— Z. taronus and Z. tsacasi —are forest dweller found in sub-Saharan Africa. (We note that the genus Drosophila is paraphyletic, with Zaprionus nested within Drosophila ( 24 ), and as such, for brevity, we include the genus Zaprionus when referring to “ Drosophila ” samples and bacteria throughout.) For each host sample, we identified and classified bacterial sequences from ONT sequences generated from individual wild-caught flies to estimate microbiome diversity and core microbial taxa. We then generated metagenome-assembled genomes (MAGs) for each sample and classified MAGs against the Genome Taxonomy Database. Finally, we annotated high-quality MAGs and compare functional predictions among taxonomic groups. We were able to recover a diverse set of microbial reads from whole-organism sequencing and found that our Drosophila samples are host to a diverse microbiome, with some ‘core’ taxa being shared among species and sample locations. Phylogenomic analyses of MAGs assembled for these core taxa show that they are phylogenetically related to bacteria from the orders Enterobacterales, Pseudomonodales, and Bacteriodales that have been sequenced from other insects, including Gilliamella and Frischella bacteria that are core members of the Apis (honeybees) and Bombus (bumble bees) gut microbiome. Finally, we show that Drosophila -associated MAGs from different genera vary in gene content, predicted functional enrichments, and predicted ability to produce secondary metabolic products. Materials and methods Specimen collection We sampled populations of drosophilid flies from the United Kingdom, United States of America, and the island of São Tomé (São Tomé and Príncipe) (table S1). Individual flies were attracted to banana traps and then collected within 12 hours via aspiration or sweep netting. Flies were then briefly anesthetized with FlyNap (Carolina Biological, USA), identified under a microscope, and preserved in 100% ethanol. We focused our sequencing effort on the ‘human commensal’ species D. hydei (N=8), D. repleta (N=4), D. immigrans (N=13), and Zaprionus indianus (N=4) collected from sites in the USA, UK, and São Tomé and Principe (table S1). We also sampled one individual from each of the forest specialists Z. taronus and Z. tsacasi from São Tomé and Principe. DNA isolation and sequencing DNA was isolated from individual flies using a phenol chloroform protocol developed for obtaining high molecular weight DNA from drosophilid flies for sequencing on ONT sequencers ( 21 ). Prior to DNA isolation, tissues were hydrated in hydration buffer and homogenized with sterilized pestles in tissue lysis buffer (see dx.doi.org/10.17504/protocols.io.dm6gpbdn8lzp/v2). Sequence libraries were prepared from individual HMW extractions following a modified Oxford Nanopore Ligation Sequencing Kit protocol (dx.doi.org/10.17504/protocols.io.dm6gpbdn8lzp/v2) and LSK-110 kits. We sequenced each library on individual R9.4.1 flow cells run on Oxford Nanopore MinION MK1C machines and base called raw reads (in fast5 format) with guppy (v6.3.8), specifying the “super high accuracy” model with the option “–config dna_r9.4.1_450bps_sup.cfg”. Bacterial diversity across samples To summarize bacterial diversity, we classified sequences with Kraken 2 (v2.1.2; ( 29 )) against Kraken’s standard database containing RefSeq archaea, bacteria, viral, plasmid, human, and UniVec_Core sequences. We then generated BIOM-format taxonomic summaries for each sample using the kraken-biom tool ( 30 ). Combined BIOM tables were imported and converted into a phyloseq object in R using import_biom and phyloseq functions from the phyloseq library ( 31 ). We removed non-bacterial taxa and taxa with low variance across samples (threshold = 1 x 10 -7 ). To identify the most abundant bacteria within each sample we first visualized variation in the proportion of reads assigned to bacterial classes. To summarize differences in the relative abundances of bacteria across samples we converted the phyleseq object to a DGElist using the ‘ phyloseq_to_edgeR ’ function from the PathoStat library ( 32 ) and conducted a Multidimensional scaling analysis with the plotMDS function from the limma R package using the “pairwise” gene comparison method. Because sequencing resulted in uneven sequencing depths across samples, we conducted a redundancy analysis (RDA) using the rda function from the vegan R library. We specifically modeled taxonomic abundance (read counts per bacterial taxon) as a function of the interaction between total sequencing depth and host species. Metagenome assembly and classification Using read-level classifications generated by Kraken2 , we identified and isolated bacterial reads from whole-organism fastqs by selecting reads that were not unclassified or classified as "Homo" using seqtk ’s ‘ subseq ’ command (v1.3-r106; https://github.com/lh3/seqtk ). We then assembled metagenomic contigs from the pool of putatively bacterial reads with metaFlye (v2.9; ( 33 )), and polished assembled contigs with medaka (v1.7.2; ( 34 )). Polished contigs were binned into MAGs using metaBAT2 ( 35 ) and levels of completeness and contamination were assessed using CheckM (‘lineage_wf’; v1.1.3; ( 36 )). We used GTDB-TK ’s ‘ classify_wf ’ pipeline ( 37 ) to assign taxonomy and determine phylogenetic relationships among MAGs with completeness greater than 45% and contamination less than 10% (as determined by CheckM ). GTDB-TK leverages the Genome Taxonomy Database ( 38 ) and tools that allow for taxonomic assignment through sequence clustering, alignment, and large-scale phylogenetic reconstruction ( 39 – 44 ). To explore relationships among drosophilid MAGs and publicly available bacterial genomes, we also carried out focused analyses on MAGs from bacterial genera that were abundant across our set of assembled MAGs (see results) using GTDB-TK ’s ‘ de_novo_wf ’ pipeline. Similar to classify_wf , the de_novo_wf pipeline uses Prodigal ( 40 ) and HMMER ( 39 ) to identify marker genes in each MAG. Genes are then concatenated and phylogenetic relationships are inferred using FastTree ( 43 ) run with the WAG+GAMMA model. These analyses applied taxonomic filters at the order level and required a minimum of 50% of amino acids for a given MAG to be included in the alignment for that MAG to be retained in the analysis. We subsequently implemented de_novo_wf analyses separately for the orders Enterobacterales, Bacteriodales, and Pseudomonodales (outgroups: Pseudomonadales, Sphingobacteriales, and Enterobacterales, respectively). Finally, we manually pruned phylogenetic trees produced by this analysis and compared the host organisms of publicly available bacterial genomes that were within the same genera as focal drosophilid MAGs. Annotation and functional characterization of MAGs We annotated MAGs that received CheckM completeness and contamination scores greater than 45% and 10%, respectively, using eggNOG-mapper v2 ( 45 , 46 ) run with eggNOG v5.0 ( 47 ) and protein predictions made by Prodigal ( 40 ). We focused functional comparisons on lineages of bacteria for which we had multiple MAGs assembled from either different drosophilid species or different locations within the same species. Specifically, we focused on MAGs assembled for bacterial taxa within the families Enterobacterales (N=27 MAGs), Pseudomonodales (N=21), and Bacteroidales (N=14), as species within these families are likely to be core (or symbiotic) members of the microbiomes of the Drosophila species we sampled (see Results). For gene sets annotated for each MAG within these three families, we identified COG categories and KEGG Orthology (KO) pathway maps that were enriched at the genome level using COG and KEGG annotations generated by eggNOG-mapper and the ‘ enrichCOG ’ and ‘ enrichKO ’ functions in the MicrobiomeProfiler R library (v1.6.1; ( 48 )). We compared functional diversity among groups of Drosophila -associated bacteria by identifying COG categories and KEGG pathway maps that were enriched in at least 80% of the focal group being assessed and in less than 50% of all other MAGs in our dataset. We tested for functional enrichment among MAGs from each of the genera Gilliamella , Orbus , Acinetobacter , Entomomonas , Pseudomonas , and Dysgonomonas . We also compared enrichment in Gilliamella and Orbus MAGs we assembled as part of this study with enrichment in 12 publicly available Gilliamella genomes derived from Apis bees and 13 derived from Bombus bees (table S3; Gilliamella clade in fig 4A ). Finally, we predicted biosynthetic gene products produced by MAGs classified in the orders Enterobacterales, Pseudomonodales, or Bacteroidales using the ‘antibiotics and secondary metabolite analysis shell’ ( antiSMASH v7; ( 49 )). We first annotated genes in each MAG belonging to these three families using Prokka (v1.14.5) run with default parameters. Prokka annotations were then used as input to antiSMASH. We ran the online version of antiSMASH ( https://antismash.secondarymetabolites.org/ ; ( 49 )) with relaxed detection strictness and all options on. For visualization and analyses we labeled singleton biosynthetic classes as “Others” and nonribosomal peptides’ hybrid classes as “Other NRPS”. We performed Kruskal–Wallis (KW) and post-hoc Wilcoxon ranks sum test to determine differences of the frequency of BGCs per MAG between bacteria orders. We fit a linear model to test for effects of genome size and bacterial order on the (log 10 ) number of BGCs per MAG. In this analysis we first tested for an interaction between genome size and bacterial order, but this was not significant; we therefore include genome size and bacterial order as predictors in the final model, but not the interaction between the two. Finally, to test whether the presence/abundance of BGCs classes varied across bacterial orders we conducted a permutational analysis of variance (PERMANOVA) on Euclidean distances between MAGs as implemented with the adonis function in the vegan package in R (Oksanen et al. 2020). Results Bacterial diversity across sampled reads By mining whole-organism sequence reads with Kraken2 we recovered a median of 198,036 bacterial reads (range: 30,630-5,574,666) spanning a median of 765.21 Mbps of sequence (range 197.91-8,829.46 Mbps) per individual drosophilid fly (table S1; fig. S1). Across all samples, sequence reads were uniquely assigned to 12 phyla, 23 classes, 62 orders, 114 families, and 254 genera of bacteria. Gammaproteobacteria, Alphaproteobacteria, Bacilli, and Mollicutes were each the most abundant class of bacteria in sequences obtained from 22, 4, 3, and 2 individuals, respectively; and Gammaproteobacteria, Alphaproteobacteria, and Bacilli were the second most abundant classes of bacteria in individuals where they were not the most abundant, except for one individual with Epsilonproteobacteria being the second most abundant class (13.4% of reads) and three individuals with Flavobacteria as the second most abundant class (13.1% to 26.0% of reads) ( fig. 1A ; fig. S2). The percentage of reads assigned to the most abundant class of bacteria per individual ranged from 25.0% to 90.2%, while the percentage of reads assigned to the second most abundant class of bacteria per individual ranged from 3.9% to 43.1% ( fig. 1A ). Reads assigned to Acetobacter and Lactobacillus —the two genera of bacteria that are the focus of laboratory research in Drosophila melanogaster —were relatively rare across our samples, with median proportions of reads per sample of 0.18% (90% empirical quantile: 0.03% to 8.25%) and 0.22% (0.04% to 2.51%), respectively. Inspecting rarefaction curves showed that taxonomic classification across reads was saturated for ∼50% of sampled Drosophila ; however, taxon sampling was not saturated in individuals where we recovered fewer than ∼100,000 bacterial reads (fig. S3; table S1). Download figure Open in new tab Figure 1. Bacterial sequence reads derived from whole-organism extraction and Nanopore Minion sequencing represent diverse taxa belonging to 23 classes ( A ). Differences in the relative abundances of microbial taxa among samples were affected by the number of sequences classified as bacterial ( B ; dim 1) and by species-level differences in the microbial community ( B ; dim 2). In panel A, individual names include details of the species (dhyd = D. hydei ; drep = D. repleta ; dimm = D. immigrans , zind = Z. indianus , ztaro = Z. taronus , ztsac = Z. tsacasii ), location (USA, UK, or STP [São Tomé and Principe]), and sex (fem = female, mal = male). Two individuals in the data set were F1 offspring from wild-caught females and are indicated with * in panel A. We summarized differences in microbial taxa abundances among samples by first conducting a multidimensional scaling analysis. The first principal coordinate axis (PCoA) explained 29% of variation in leading log2-fold changes in bacterial read abundances among samples ( fig. 1B ); however, PCoA dimension 1 scores are correlated with the number of bacterial sequences recovered from a sample (Pearson’s rho = 0.63; P < 0.00015). The second PCoA accounted for 16% of variation among samples and differentiated samples based on host species rather than the location the hosts were collected from ( fig. 1B ). Redundancy analysis on the taxonomic matrix revealed that the interaction between sequencing depth and host species significantly affected bacterial diversity and abundance (model R 2 = 81.35%; permutation test: F 9,21 = 15.54; P = 0.003; fig. S4). Classification and analysis of bacterial sequences therefore allowed us to quickly identify diverse bacterial communities that varied among host species. However, we do not explore community diversity further because the ONT sequencing we conducted is PCR-free whole-genome shotgun sequencing, therefore variation in read abundance could be affected by factors that include differences in taxonomic abundance, genome size, and sequence classification accuracy among bacterial taxa and across their genomes. Bacterial diversity across MAGs Across all samples, we assembled 143,751 contigs that were then binned into 366 MAGs (table S1). We retained 103 ‘focal’ MAGs after filtering for CheckM completeness scores greater than 45% and contamination less than 10%. We were unable to recover MAGs from six of our 31 sampled host individuals, and both the number of contigs and MAGs assembled from a sample were correlated with the amount of bacterial sequence data recovered from the whole-organism sequences (Kendall’s tau = 0.65 and 0.58, respectively; both P < 1 x 10 -5 ; fig. S5). Largely consistent with read-level classifications reported by Kraken2 , the most abundant classes of bacteria across the 103 focal MAGs were Gammaproteobacteria (55 MAGs), Bacteroidia (21 MAGs), Bacilli (11 MAGs), Alphaproteobacteria (10 MAGs), Clostridia ( 4 ), and Camylobacteria ( 2 ). Using GTDB-TK taxonomy, we identified seven orders that were represented by at least 5 MAGs: Enterobacterales (N=27), Pseudomonodales (N=21), Bacteriodales (N=14), Lactobacillales (N=9), Acetobaterales (N=7), Flavobacteriales (N=5), and Burkholderiales (N=5) ( fig. 2A ). Three of these orders—Enterobacterales, Pseudomonodales, and Bacteriodales—were represented by multiple MAGs assembled from at least two host species, and within at least one of those hosts they were assembled from samples collected at multiple locations ( fig. 2B-D ). Within these three orders, seven genera were represented by MAGs assembled from D. hydei , D. repleta , and D. immigrans (our most widely sampled species) and/or from multiple geographic locations ( fig. 3 ). We focused functional analyses on the 62 MAGs from bacteria belonging to these three orders and seven genera because they are either ecologically associated with a common environment used by the species we sampled, or they are evolutionarily associated as core members of the human- commensal drosphilid microbiome. Download figure Open in new tab Figure 2. Phylogenetic classifications of drosophilid-derived MAGs. A) Phylogenetic relationships among Drosophila MAGs from maximum-likelihood placement by pplacer as implented in GTDB-TK’s ‘classify_wf’ pipeline against the GTDB-Tk reference tree. Phylogenetic relationships among Drosophila MAGs from the three most abundant orders—Enterobacterales (B), Pseudomonadales (C), and Bacteriodales (D)—generated using GTDB-Tk’s ‘de_novo_wf’ pipeline and highlighting genera represented by multiple MAGs within each order. In panels B through D, details on the host species, individual sample identifier, and location, are reported in tip names (details as in Figure 1 ). Download figure Open in new tab Figure 3. Core bacterial genera are shared among Drosophila species ( A ) and across geographic regions we sampled ( B ). Bacterial genera (number inside circles) and the number of MAGs assembled for each genus (beside generic names) are given for each scenario of overlap. Bacterial genera highlighted in orange ( Entomomonas , Gilliamella , and Pseudomonas_E ) were only assembled from D. immigrans hosts. Note that numbers of MAGs belonging to Dysgonomonas and Orbus include MAGs assembled from Z. indianus (see Figure 2 ), and D. repleta were grouped with D. hydei for this analysis. Endosymbiotic bacteria sequences were also abundant in four of our 31 samples: sequences classified as Wolbachia were abundant in Z. taronus and Z. tsacasi collected on São Tomé (39,265 and 2,675 reads, respectively), and high-quality MAGs classified in the diverse group of Wolbachia pipientis were assembled from both samples (CheckM completeness > 99% and contamination = 0.00%; table S2). Sequences classified as Spiroplasma were abundant in two D. hydei collected from the UK (86,369 and 76,409 reads), and MAGs classified as Spiroplasma poulsonii were assembled from both samples (CheckM completeness = 98.5% and 72.18% and contamination = 3.01% and 0.00%, respectively; table S2). To explore the hypotheses that ‘focal’ bacterial genera are either found in a common environment shared among human commensal drosophilids or are evolutionarily associated members of the core drosophilid microbiome, we compared MAGs to publicly available bacterial genomes using GTDB-TK ’s ‘ de_novo_wf ’ pipeline. Drosophilid-derived MAGs classified as Gilliamella and Orbus were found to be closely related to bacteria belonging to the genera Gilliamella , Orbus , and Frischella that have been assembled from diverse honey bee ( Apis ) and bumble bee ( Bombus ) hosts ( fig 4A ) ( 27 , 50 ). Gilliamella and Orbus MAGs derived from the drosophilid hosts we sampled tended to form monophyletic clades relative to bacteria from honey or bumble bee hosts, suggesting host-specific divergence ( fig 4A ). Orbales have also been reported in metagenomic studies of wild mushroom-feeding Drosophila ( 19 , 20 ), suggesting broad associations between these bacteria and drosophilid hosts. Drosophilid-derived MAGs classified in the genus Dysgonomonas were found to be closely related to Dysgonomonas genomes derived from diverse sources, including environmental samples, insects, and mammals ( fig 4B ; table S3); and drosophilid- derived MAGs classified in the genus Entomomonas were related to two Entomomonas species in the GTDB-TK database—one was derived from an eastern honey bee ( Apis cerana ) host and the other from a house cricket ( Acheta domesticus ) ( fig 4C ). Phylogenetic comparisons to publicly available genomes therefore suggest that many of the MAGs from the genera Gilliamella , Orbus , and Entomomonas are associated as core members of the insect (gut) microbiome. Download figure Open in new tab Figure 4. Phylogenetic relationships among focal MAGs assembled from drosophilid flies (bold type) and publicly available genomes included in GTDB-Tk’s ’de_novo_wf’ pipeline. A ) Enterobacterale MAGs from drosophilid hosts are closely related to genomes from the genera Gilliamella , Orbus , and Frischella that have been assembled from diverse honey bee ( Apis ) and bumble bee ( Bombus ) hosts. B) Drosophilid MAGs from the genus Dysgonomonas are closely related to diverse Dysgonomonas genomes assembled from insect, mammal, and environmental sources (sources not shown in panel B). C) Drosophilid MAGs from the genus Entomomonas are closely related to genomes from Entomomonas sequenced from the eastern honey bee ( Apis cerana ) and the house cricket ( Acheta domesticus ). Functional variation among drosophilid-associated bacteria We quantified enrichment in COG categories and KEGG pathways among MAGs belonging to the seven focal bacterial genera described above to test for evidence of functional differences among them. We found evidence for variation in enrichment of 12 COG categories and 27 KEGG pathways across genes annotated in each MAG ( fig. 5 ). Comparing enrichment of KEGG pathways among bacterial genera found that greater than 80% of drosophilid-derived Gilliamella or Orbus genomes are enriched for genes belonging to 10 KEGG pathways that are under-enriched (i.e. fewer than 50% of MAGs with enrichment) in the 35 Pseudomonadales and Bacteriodales MAGs in our dataset ( fig 5B ). Acinetobacter, Entomomonas, or Pseudomonas genomes are enriched for genes belonging to 14 KEGG pathways that are under-enriched in the 43 Enterobacterales and Bacteriodales MAGs in our dataset ( fig 5C ), while Dysgonomonas genomes are enriched for genes belonging to 4 KEGG pathways that are under- enriched in the 48 Pseudomonadales and Enterobacterales MAGs in our dataset ( fig 5D ). Download figure Open in new tab Figure 5. MAGs belonging to different bacterial genera differ in their functional gene content. A) COG categories that show variation in enrichment among MAGs. B) KEGG pathways that are enriched in greater than 80% of the MAGs assembled from Gilliamella or Orbus (order: Enterobacterales) but were enriched in fewer than 50% of other MAGs in our dataset. C) As in B), but with overrepresented enrichment within genera Acinetobacter , Entomomonas , or Pseudomonas (order: Pseudomonadales). D) As in B), but with overrepresented enrichment within the genus Dysgonomonas (order: Bacteriodales). KEGG pathways that are enriched in Gilliamella or Orbus MAGs tend to be enriched across both genera: an average of 87.5% of MAGs in these genera show enrichment in the same 10 KEGG pathways (range = 58.3 to 100%; fig 4B ). Seven of the 10 KEGG categories that are enriched in Gilliamella and Orbus MAGs are also enriched in more than 50% of other Enterobacterales MAGs (map00051, map00052, map00480, map00500, map00564, map01503, and map02060), suggesting functions associated with these pathways are shared across the Enterobacterales species associated with drosophilid hosts. However, three KEGG pathways (map00040, map00053, and map01501) are enriched in fewer than 50% of other Enterobacterales MAGs, suggesting possible unique or enhanced functions within Gilliamella and/or Orbus . Two of these three pathways contain genes involved in carbohydrate metabolism— specifically in pentose and glucuronate interconversions (map00040) and ascorbate and aldarate metabolism (map00053)—, while the third (map01501) is involved in resistance to beta-Lactam antibiotics. Interestingly, map00053 is only enriched in one of the three Gilliamella MAGs while map01501 is enriched in all three Gilliamella MAGs and 6 of the 12 (50%) Orbus MAGs, suggesting potential functional differences between strains of these closely related genera. In contrast to Enterobacterales, there is more variation in enrichment among Pseudomonadales from the genera Acinetobacter, Entomomonas, and Pseudomonas : an average of 62.7% of MAGs from these genera show enrichment in the same KEGG pathway (range = 33.3% to 93.3%; fig 5C ). Within Bacteriodales, the four KEGG pathways that are enriched across the 10 drosophilid-derived Dysgonomonas MAGs tend to be enriched in the other Bacteriodales MAGs (50% - 100%); however, we note that there are only 4 MAGs assigned to the order Bacteriodales that were not within the genus Dysgonomonas in our dataset. Nonetheless, all four KEGG pathways enriched in Dysgonomonas (map00051, map00541, map00680, and map03420) were rarely enriched in non-Bacteriodales (44.2%, 16.9%, 20.1%, and 2.3%, of MAGs with enrichment, respectively; fig 5D ). Map00051 was enriched in Dysgonomonas, Gilliamella, and Orbus genomes ( fig 5D , first column) and is involved in the metabolism of fructose and mannose sugars. The other three pathways contain genes involved in glycan sugar biosynthesis and metabolism, methane metabolism, and nucleotide excision repair. Taken together, variation in KEGG pathway enrichment across Drosophila -associated MAGs indicate that bacterial members of the Drosophila microbiome are functionally different; for example, via differences in metabolic capacity for various substrates (e.g. carbohydrates versus methane) or the ability to resist environmental stressors (e.g. antimicrobials or general DNA damage). We also found that Drosophila -associated Gilliamella and Orbus MAGs differed in functional enrichment when compared to publicly available Gilliamella genomes isolated from Apis and Bombus hosts. COG categories N (cell motility) and U (intracellular trafficking, secretion, and vesicular transport) are enriched in Apis - and/or Bombus - associated genomes, but are not enriched across the majority of Drosophila -associated Gilliamella or Orbus MAGs (however, 4/11 [36.36%] of the Orbus MAGs isolated from Drosophila were enriched for COG category U). In addition to COG categories, 11 KEGG map pathways varied in enrichment among Gilliamella ( Drosophila ), Orbus ( Drosophila ), Gilliamella ( Apis ), and Gilliamella ( Bombus ) MAGs or genomes ( table 1 ). Four pathways (map00053, map00561, map00561, and map00630) are enriched in Orbus genomes derived from Drosophila hosts relative to genomes from the other four groups. These pathways contain genes involved in the metabolism of various carbohydrates and lipids ( table 1 .). One of these pathways (map00630) is also enriched in Gillimella genomes derived from hosts in the genus Apis . Three pathways (map00261, map00450, and map01501) are enriched in Gilliamella derived from Drosophila hosts: maps 00261 and 01501 are both involved in antimicrobial production, and map01501 was also less likely to be enriched in Drosophila -derived MAGs that were not in the genus Gilliamella ( fig. 5B ). Four pathways involved in biofilm formation or movement (map02025, map02026, map02030, and map02040) are enriched in Gilliamella derived from both Bombus and Apis hosts. The two pathways involved in biofilm formation— map02025 and map02026—are also enriched in 5 (45%) and 3 (27%) of the Orbus MAGs, respectively. While the KEGG pathways above highlight potential functional differences among Orbaceae from different hosts, we also found 51 KEGG pathways that are enriched across all Orbaceae genomes (table S4). These pathways highlight that Orbaceae also share diverse metabolic and biosynthetic pathways. View this table: View inline View popup Table 1. KEGG pathways that showed variation in enrichment across genomes from Orbus from Drosophila hosts, Gilliamella from Drosophila host, Gilliamella from Apis host, or Gilliamella from Bombus host (see fig. 4A for relationships among these groups). The proportion of genomes of each group that showed enrichment in each pathway is given under columns “ Orbus ”, “ Gill. ( Dros .)”, “ Gill . ( Apis )”, and “ Gill . ( Bombus )”, respectively. Values above 0.8 are highlighted in bold italic font. Biosynthetic potential of drosophilid-associated bacteria We used antiSMASH to annotate secondary metabolite biosynthetic gene clusters (BGCs) in 56 of the 62 MAGs classified as Enterobacterales, Pseudomonadales, or Bacteroidales ( fig. 6A ). Across MAGs we identified 177 biosynthetic gene clusters from 31 small molecule classes and their hybrids (table in S5). Across the 56 MAGs that carried at least 1 BGC, we annotated an average of 3 BGCs per MAGs, found more BGCs per MAG for MAGs from the order Pseudomonadales (maximum of 14 BGCs per MAG) (KW χ 2 = 14.683, p < 0.001) (fig. S6A), and found a significant positive relationship between the genome size and the number of BGCs per MAG (ANOVA on genome size; F = 48.668, P = 3.8x10 -09 ; fig. S6B). The most abundant class of BGC was post-translational modified peptides (RiPP-like) (n=40); followed by aryl polyene (n=28), nonribosomal peptides’ hybrids (Other NRPS) (n=22), nonribosomal peptides (NRPS) (n=19) and beta-lactone (n=15) ( fig. 6B ). In total, MAGs classified as Pseudomonadales possessed 91 biosynthetic gene clusters from 24 BGC classes, Enterobacterales possessed 68 biosynthetic gene clusters from 16 BGC classes, and Bacteroidales possessed 18 biosynthetic gene clusters from 9 BGC classes. The relative abundance of BGC classes differed among our three focal bacterial orders (Permutational MANOVA: F = 3.722; P = 0.001; fig 6 ): for example, 32% of BGCs in Enterobacterales were in the class RIPP-like, while 20% were RIPP-like in Pseudomonadales, and RIPP-like BGCs were absent in Bacteroidales. Aryl polyene class made up 20%, 12%, and 11% of BCGs annotated in Pseudomonadales, Enterobacterales, and Bacteroidales MAGs, respectively; NRPS hybrids (Other NRPS) made up 16%, 15%, and 8% of BCGs annotated in Bacteroidales, Enterobacterales, and Pseudomonadales MAGs; and NRPS made up 22%, 9%, and 8% of BGCs annotated in Bacteroidales, Enterobacterales, and Pseudomonadales MAGs. Beta- lactone BCGs were found in Pseudomonadales (20%) and Enterobacterales (12%) but were absent in Bacteroidales. The only BGC class that was unique to Bacteroidales was hybrid T1PKS+hglE-KS (16%) ( fig. 6C ). MAGs from the genera Gilliamella , Orbus , Entomomonas possessed 23 biosynthetic gene clusters from 4 BGC classes: aryl polyene (n=2), NRPS (n=9), NRPS-like (n=2), NRPS+NRPS-like hybrid (n=2), and RiPP-like (n=8). The most abundant class of BGC in Gilliamella , Orbus , or Entomomonas MAGs were aryl polyene (n=2), RiPP-like (n=7), and NRPS (n=5), respectively ( Fig. 7 ). Taken together, variation in the predicted BGCs among drosophilid-associated bacteria suggests that these bacterial can produce a diverse range of secondary biosynthetic molecules, and bacteria in different orders vary in the secondary metabolites they produce. Download figure Open in new tab Figure 6. Drosophila MAGs differ in the secondary metabolite biosynthetic gene clusters (BGCs) they possess. A ) Summary of biosynthetic products for MAGs from the Bacteriodales, Pseudomonadales, and Enterobacterales, including details of the host species and location the flies were collected from. Across MAGs, we identified 177 biosynthetic gene products belonging to 18 types of BGC ( B ). The relative abundance of BGC types was significantly different among the three focal orders of bacteria included in this analysis ( C ). Download figure Open in new tab Figure 7. Abundance of biosynthetic gene clusters (BGCs) found in the classes (“Type of BGC”) that were the most common within the focal genera Orbus , Gilliamella , and Entomomonas . Discussion Insect-associated bacteria can have diverse effects on their host’s biology—for example, they can detoxify dietary toxins ( 51 ) and provide protection from infection ( 7 ). However, there is also evidence that experimental manipulation of insect microbiomes (via antibiotic treatment) does not affect growth and development ( 52 ). These conflicting results highlight a need to identify the specific members of host-associated microbial communities and their functions. Consistent with previous studies of the microbiomes of wild drosophilids ( 10 , 19 , 20 ), we recovered a diverse set of host-associated bacterial reads from whole-organism sequencing ( fig. 1 ). By assembling and analyzing MAGs from these reads, we found that the species of Drosophila (including Zaprionus ) we study here are host to diverse lineages of bacteria (figs. 2-4) that differ in their functional gene content (figs. 5-7). Bacteria in the family Orbaceae were among the most abundant taxa across samples, and we assembled MAGs from multiple host species and locations (figs 2-4). Orbaceae has previously been reported in metabarcoding studies of wild Drosophila as dominant members of their microbiome ( 19 , 20 ) ( 19 , 20 , 53 ), and have also been reported from Apis , Bombus , and Xylocopa bees ( 25 – 27 , 50 , 54 ), Eristalis flys ( 55 ), and Heliconius and Sasakia butterflys ( 56 , 57 ). These diverse hosts suggest that Orbaceae may be ‘core’ functional members of insect microbiomes. Gilliamella apicola is a species from the family Orbaceae found in bee hosts that has been shown to have functional metabolic capacities that complement other members of the honey bee gut microbiome ( 27 ), can detoxify toxic sugars found in the diets of bees ( 28 ), and can produce biosynthetic molecules that protect bees from infection by the bacterial pathogen Melissococcus plutonius ( 7 ). Using functional annotations, we found that Drosophila - associated Orbaceae MAGs are enriched for genes involved in pentose and glucuronate interconversions (map00040), ascorbate and aldarate metabolism (map00053), and resistance to beta-Lactam antibiotics (map01501) ( fig. 4 ). Drosophilid- associated Orbaceae MAGs also harbored secondary metabolite biosynthetic gene clusters (BGCs) in the aryl polyene and RiPP-like classes. Ribosomally synthesized and post-translationally modified peptides (RiPP) have diverse roles, including playing a role in microbial interactions and antimicrobial activity ( 58 , 59 ), and effects of RiPP-like BGCs on pathogens have been described from Gilliamella strains isolated bees ( 7 ). The diverse gene products produced by these bacteria highlight how insects and their microbiomes may be an important source of novel bioactive molecule discovery ( 60 ). While functional validation is needed—along with quantifying the impacts that drosophilid-associated Orbaceae have on host performance and fitness—, pathways and BSCs that are enriched in the drosophilid-associated MAGs we analyze here are candidates that could inform future functional studies. Indeed, a recently developed Pathfinder plasmid system has been verified in Orbaceae bacteria ( 61 ) and could be used to facilitate tests of candidate genes and pathways in Orbaceae isolated from diverse hosts. In addition to bacteria from the family Orbaceae , we assembled four MAGs from the genus Entomomonas from D. immigrans collected in the USA and São Tomé and Príncipe ( fig. 4C ), and MAGs from the genus Dysgonomonas from D. immigrans , D. repleta , and Z. indianus collected from the USA and São Tomé and Príncipe ( fig. 4B ). Phylogenomic comparisons of drosophilid-associated Entomomonas MAGs to publicly available bacterial genomes showed that they are related to Entomomonas strains isolated from different insect orders ( fig. 4C ). By contrast, publicly available genomes from the genus Dysgonomonas are derived from both host-associated and environmental sources ( fig. 4 ); however, Dysgonomonas has been reported as one of the core members of the microbiome of wild cactophilid Drosophila ( 53 ). By comparing relationships among drosophilid-associated MAGs and publicly available microbial genomes, our analyses suggest that Orbaceae , Entomomonas , and (to a lesser extent) Dysgonomonas are bacterial genera that may be evolved to utilize insects as hosts. As insect-associated microbial resources increase, identifying these ‘core’ members of the insect microbiome will facilitate tests of assembly and functional rules governing bacteria-insects interactions—for example, testing whether they represent generalist interactions or tightly coevolved symbioses ( 62 ). We annotated an average of three BCGs for each drosophilid-associated MAG belonging to the Enterobacterales, Pseudomonadales, and Bacteriodales, indicating that members of the drosophilid microbiome have the capacity to produce potentially important secondary metabolites, despite generally not having large genomes (fig. S6). Secondary metabolites can play important roles in host-microbiome interactions and have previously been described in members of the gut microbiome of honey and bumble bees ( 7 ), herbivorous turtle ants ( 63 ) and mosquitoes ( 64 ). Secondary metabolites produced by BGCs can be involved in the regulation of symbiosis in fungus- faming termites ( 65 ), pathogenicity of malaria mosquitoes ( 64 ), and the deoxytication of β-Methylamino-L-alanine in cycad-feeding insects ( 66 ). BGCs belonging to the RiPP class were the most abundant in our drosophilid-associated MAGs, and may play a particularly important role in the microbiome since it has been observed that they can function to inhibit the growth of pathogens in bees ( 7 ) and can be involved in microbiome-host communication ( 67 ). Likewise, aryl polyenes (the second most abundant class in our dataset) have been observed to function as antioxidants, preventing stress caused by reactive oxygen species produced by the host in the case of bees ( 7 , 68 ). Whether these molecules perform similar functions in drosophilid hosts remains to be confirmed; however, our results contribute to a growing body of work suggesting that BGCs possessed by the gut microbiome bacteria of insects contribute to the biology of their hosts and are a rich source of diverse secondary metabolites ( 7 , 69 ). Wolbachia and Spiroplasma are well known endosymbionts in insects, including in Drosophila ( 70 – 72 ), and we recovered sequences from these genera in four of our 31 samples. From these sequences were assembled MAGs identified as Wolbachia from both Z. tsacasi and Z. taronus hosts in our dataset. Both Z. tsacasi and Z. taronus are forest-dwelling species that we collected on the island of Saõ Tomé. Wolbachia infection frequencies have been shown to be highly variable among populations, species, and geographic regions ( 70 , 73 – 76 ) and our results suggest that they are relatively rare across human-commensal drosophilids. Similarly, a screen of Spiroplasma in 35 species of Drosophila found that only three species—all from the “ repleta ” species group—were host to Spiroplasma infections ( 75 ). Spiroplasma infection rates in D. hydei (a member of the repleta species group) from the UK have been shown to vary from 15 to 29% across a 9-year period ( 77 ). The fact that we only found Spiroplasma in two samples of D. hydei from the UK is consistent with these past studies and may be indicative of a phylogenetic (and/or geographic) signal of Spiroplasma infection in Drosophila hosts. However, because we did not sample extensively in any one location, we are unable to estimate infection frequencies. Our data could prove useful for future phylogenetic or comparative genomic studies, and they provide novel MAGs from both Spiroplasma and Wolbachia . Our results show that whole-organism reads generated using long-read ONT sequencers can be mined for metagenomic reads, and these can be used to estimate diversity and differences among host-associated microbial communities ( fig. 1 ). ‘Mined’ bacterial reads can also be assembled into high-quality MAGs when sufficient bacterial sequence is extracted from the total sequence pool (fig. S2; table S2). Mining whole- organism reads to characterize host-associated microbes is likely to be a particularly useful approach when studying organisms where it is challenging to separate microbes from the host, or when microbial communities change when the organism is raised under artificial conditions. However, this approach is limited in not knowing where on the host the microorganisms are located. In many cases the location or life-history of the microorganisms can be reasonably inferred from knowledge of closely related taxa (e.g. endosymbionts and gut commensals); however, this information should be confirmed with additional species-specific data. Mining microbial reads from large datasets could be used to extract information from sequencing projects where characterizing the microbial community is not a primary goal. For example, recent work has used whole- organism sequencing of individual Drosophila spp. to generate genomic and phylogenetic resources for the group ( 78 ). This dataset includes wild-caught individuals whose data could be mined to quantify host-associated microbial diversity. Because laboratory and wild Drosophila show significant differences in the microbial communities they are host to ( 10 , 19 ) accurate and transparent metadata need to be published alongside whole-organism sequencing to facilitate meaningful comparisons among host individuals. Moreover, our work highlights a need for functional studies of diverse insect microbiomes to gain a holistic view of the diversity, evolutionary history, and functional roles that insect-associated microorganisms play in their diverse hosts, and the ecosystems they inhabit. Data Availability Raw sequence reads are available on the NCBI SRA under BioProject PRJNA1188364: https://www.ncbi.nlm.nih.gov/bioproject/PRJNA1188364 . Supporting figures, tables, scripts, and MAGs are available at Zenodo: doi:10.5281/zenodo.14173040: https://doi.org/10.5281/zenodo.14173040 . Acknowledgements We thank Darren Obbard for sharing flies collected in the UK and Brandon Cooper for help collecting flies in São Tomé. This work was supported by a Royal Society Research Grant, The Royal Society of London (RGS\R1\221323) to AAC and the NERC Envision Doctoral Training Program (NE/L002604/1) awarded to AHO. References 1. ↵ Muñoz-Benavent M , Pérez-Cobas AE , García-Ferris C , Moya A , Latorre A . 2021 . Insects’ potential: Understanding the functional role of their gut microbiome . Journal of Pharmaceutical and Biomedical Analysis 194 : 113787 . OpenUrl CrossRef PubMed 2. ↵ Ferguson LV , Dhakal P , Lebenzon JE , Heinrichs DE , Bucking C , Sinclair BJ . 2018 . Seasonal shifts in the insect gut microbiome are concurrent with changes in cold tolerance and immunity . Functional Ecology 32 : 2357 – 2368 . OpenUrl CrossRef 3. ↵ Maran AM , Weintraub MN , Pelini SL . 2020 . Does stimulating ground arthropods enhance nutrient cycling in conventionally managed corn fields? Agriculture, Ecosystems & Environment 297 : 106934 . OpenUrl CrossRef 4. ↵ Roiz D , Pontifes PA , Jourdain F , Diagne C , Leroy B , Vaissière A-C , Tolsá-García MJ , Salles J-M , Simard F , Courchamp F . 2024 . The rising global economic costs of invasive Aedes mosquitoes and Aedes -borne diseases . Science of The Total Environment 933 : 173054 . OpenUrl CrossRef PubMed 5. ↵ Berasategui A , Salem H , Paetz C , Santoro M , Gershenzon J , Kaltenpoth M , Schmidt A . 2017 . Gut microbiota of the pine weevil degrades conifer diterpenes and increases insect fitness . Molecular Ecology 26 : 4099 – 4110 . OpenUrl CrossRef 6. ↵ Weiss B , Aksoy S . 2011 . Microbiome influences on insect host vector competence . Trends in Parasitology 27 : 514 – 522 . OpenUrl CrossRef PubMed Web of Science 7. ↵ Lang H , Liu Y , Duan H , Zhang W , Hu X , Zheng H . 2023 . Identification of peptides from honeybee gut symbionts as potential antimicrobial agents against Melissococcus plutonius. 1 . Nat Commun 14:7650. 8. ↵ Lesperance DN , Broderick NA . 2020 . Microbiomes as modulators of Drosophila melanogaster homeostasis and disease . Current Opinion in Insect Science 39 : 84 – 90 . OpenUrl CrossRef PubMed 9. ↵ Himler AG , Adachi-Hagimori T , Bergen JE , Kozuch A , Kelly SE , Tabashnik BE , Chiel E , Duckworth VE , Dennehy TJ , Zchori-Fein E , Hunter MS . 2011 . Rapid Spread of a Bacterial Symbiont in an Invasive Whitefly Is Driven by Fitness Benefits and Female Bias . Science 332 : 254 – 256 . OpenUrl Abstract / FREE Full Text 10. ↵ Chandler JA , Lang JM , Bhatnagar S , Eisen JA , Kopp A . 2011 . Bacterial Communities of Diverse Drosophila Species: Ecological Context of a Host–Microbe Model System . PLOS Genetics 7 : e1002272 . OpenUrl CrossRef 11. Douglas AE . 2018 . The Drosophila model for microbiome research . Lab Anim 47 : 157 – 164 . OpenUrl CrossRef 12. ↵ Ludington WB , Ja WW . 2020 . Drosophila as a model for the gut microbiome . PLOS Pathogens 16 : e1008398 . OpenUrl CrossRef PubMed 13. ↵ Sannino DR , Dobson AJ , Edwards K , Angert ER , Buchon N . 2018 . The Drosophila melanogaster Gut Microbiota Provisions Thiamine to Its Host . mBio 9 : 10 .1128/mbio.00155-18. OpenUrl CrossRef 14. ↵ Sharon G , Segal D , Ringo JM , Hefetz A , Zilber-Rosenberg I , Rosenberg E . 2010 . Commensal bacteria play a role in mating preference of Drosophila melanogaster . Proceedings of the National Academy of Sciences 107 : 20051 – 20056 . OpenUrl Abstract / FREE Full Text 15. ↵ Rudman SM , Greenblum S , Hughes RC , Rajpurohit S , Kiratli O , Lowder DB , Lemmon SG , Petrov DA , Chaston JM , Schmidt P . 2019 . Microbiome composition shapes rapid genomic adaptation of Drosophila melanogaster . Proceedings of the National Academy of Sciences 116 : 20025 – 20032 . OpenUrl Abstract / FREE Full Text 16. ↵ Walters AW , Hughes RC , Call TB , Walker CJ , Wilcox H , Petersen SC , Rudman SM , Newell PD , Douglas AE , Schmidt PS , Chaston JM . 2020 . The microbiota influences the Drosophila melanogaster life history strategy . Molecular Ecology 29 : 639 – 653 . OpenUrl CrossRef 17. ↵ Brown JJ , Jandová A , Jeffs CT , Higgie M , Nováková E , Lewis OT , Hrček J . 2023 . Microbiome Structure of a Wild Drosophila Community along Tropical Elevational Gradients and Comparison to Laboratory Lines . Applied and Environmental Microbiology 89 : e00099 – 23 . OpenUrl CrossRef PubMed 18. ↵ Wong AC-N , Chaston JM , Douglas AE . 2013 . The inconstant gut microbiota of Drosophila species revealed by 16S rRNA gene analysis . The ISME Journal 7 : 1922 – 1932 . OpenUrl CrossRef PubMed 19. ↵ Martinson VG , Douglas AE , Jaenike J . 2017 . Community structure of the gut microbiota in sympatric species of wild Drosophila . Ecology Letters 20 : 629 – 639 . OpenUrl CrossRef PubMed 20. ↵ Bost A , Martinson VG , Franzenburg S , Adair KL , Albasi A , Wells MT , Douglas AE . 2018 . Functional variation in the gut microbiome of wild Drosophila populations . Molecular Ecology 27 : 2834 – 2845 . OpenUrl CrossRef 21. ↵ Kim BY , Wang JR , Miller DE , Barmina O , Delaney E , Thompson A , Comeault AA , Peede D , D’Agostino ER , Pelaez J , Aguilar JM , Haji D , Matsunaga T , Armstrong EE , Zych M , Ogawa Y , Stamenković-Radak M , Jelić M , Veselinović MS , Tanasković M , Erić P , Gao J-J , Katoh TK , Toda MJ , Watabe H , Watada M , Davis JS , Moyle LC , Manoli G , Bertolini E , Košťál V , Hawley RS , Takahashi A , Jones CD , Price DK , Whiteman N , Kopp A , Matute DR , Petrov DA . 2021 . Highly contiguous assemblies of 101 drosophilid genomes . eLife 10 : e66405 . OpenUrl CrossRef PubMed 22. Markow TA , O’Grady P . 2008 . Reproductive Ecology of Drosophila . Functional Ecology 22 : 747 – 759 . OpenUrl CrossRef 23. O’Grady PM , DeSalle R . 2018 . Phylogeny of the Genus Drosophila . Genetics 209 : 1 – 25 . OpenUrl Abstract / FREE Full Text 24. ↵ Suvorov A , Kim BY , Wang J , Armstrong EE , Peede D , D’Agostino ERR , Price DK , Waddell PJ , Lang M , Courtier-Orgogozo V , David JR , Petrov D , Matute DR , Schrider DR , Comeault AA . 2022 . Widespread introgression across a phylogeny of 155 Drosophila genomes . Current Biology 32 : 111 – 123 .e5. OpenUrl CrossRef PubMed 25. ↵ Bonilla-Rosso G , Engel P . 2018 . Functional roles and metabolic niches in the honey bee gut microbiota . Current Opinion in Microbiology 43 : 69 – 76 . OpenUrl CrossRef PubMed 26. ↵ Zheng H , Steele MI , Leonard SP , Motta EVS , Moran NA . 2018 . Honey bees as models for gut microbiota research . Lab Anim 47 : 317 – 325 . OpenUrl CrossRef 27. ↵ Kwong WK , Engel P , Koch H , Moran NA . 2014 . Genomics and host specialization of honey bee and bumble bee gut symbionts . Proceedings of the National Academy of Sciences 111 : 11509 – 11514 . OpenUrl Abstract / FREE Full Text 28. ↵ Zheng H , Nishida A , Kwong WK , Koch H , Engel P , Steele MI , Moran NA . 2016 . Metabolism of Toxic Sugars by Strains of the Bee Gut Symbiont Gilliamella apicola . mBio 7 : 10 .1128/mbio.01326-16. OpenUrl CrossRef PubMed 29. ↵ Wood DE , Lu J , Langmead B . 2019 . Improved metagenomic analysis with Kraken 2 . Genome Biology 20 : 257 . OpenUrl CrossRef PubMed 30. ↵ Dabdoub S. 2016 . kraken-biom: Enabling interoperative format conversion for Kraken results (Version 1.2) . 31. ↵ McMurdie PJ , Holmes S . 2013 . phyloseq: An R package for reproducible interactive analysis and graphics of microbiome census data . PLoS ONE 8 : e61217 . OpenUrl CrossRef PubMed 32. ↵ Manimaran S , Bendall M , Diaz SV , Castro E , Faits T , Zhao Y , Federico AN , Johnson WE . 2023 . PathoStat: PathoStat Statistical Microbiome Analysis Package . 33. ↵ Kolmogorov M , Bickhart DM , Behsaz B , Gurevich A , Rayko M , Shin SB , Kuhn K , Yuan J , Polevikov E , Smith TPL , Pevzner PA . 2020 . metaFlye: scalable long-read metagenome assembly using repeat graphs. 11 . Nat Methods 17:1103–1110. 34. ↵ Lee JY , Kong M , Oh J , Lim J , Chung SH , Kim J-M , Kim J-S , Kim K-H , Yoo J-C , Kwak W . 2021 . Comparative evaluation of Nanopore polishing tools for microbial genome assembly and polishing strategies for downstream analysis . Sci Rep 11 : 20740 . OpenUrl CrossRef PubMed 35. ↵ Kang DD , Li F , Kirton E , Thomas A , Egan R , An H , Wang Z . 2019 . MetaBAT 2: an adaptive binning algorithm for robust and efficient genome reconstruction from metagenome assemblies . PeerJ 7 : e7359 . OpenUrl CrossRef PubMed 36. ↵ Parks DH , Imelfort M , Skennerton CT , Hugenholtz P , Tyson GW . 2015 . CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes . Genome Res 25 : 1043 – 1055 . OpenUrl Abstract / FREE Full Text 37. ↵ Chaumeil P-A , Mussig AJ , Hugenholtz P , Parks DH . 2022 . GTDB-Tk v2: memory friendly classification with the genome taxonomy database . Bioinformatics 38 : 5315 – 5316 . OpenUrl CrossRef PubMed 38. ↵ Parks DH , Chuvochina M , Rinke C , Mussig AJ , Chaumeil P-A , Hugenholtz P . 2022 . GTDB: an ongoing census of bacterial and archaeal diversity through a phylogenetically consistent, rank normalized and complete genome-based taxonomy . Nucleic Acids Research 50 : D785 – D794 . OpenUrl CrossRef PubMed 39. ↵ Eddy SR . 2011 . Accelerated Profile HMM Searches . PLOS Computational Biology 7 : e1002195 . OpenUrl CrossRef 40. ↵ Hyatt D , Chen G-L , LoCascio PF , Land ML , Larimer FW , Hauser LJ . 2010 . Prodigal: prokaryotic gene recognition and translation initiation site identification . BMC Bioinformatics 11 : 119 . OpenUrl CrossRef PubMed 41. Matsen FA , Kodner RB , Armbrust EV . 2010 . pplacer: linear time maximum- likelihood and Bayesian phylogenetic placement of sequences onto a fixed reference tree . BMC Bioinformatics 11 : 538 . OpenUrl CrossRef PubMed 42. Ondov BD , Treangen TJ , Melsted P , Mallonee AB , Bergman NH , Koren S , Phillippy AM . 2016 . Mash: fast genome and metagenome distance estimation using MinHash . Genome Biology 17 : 132 . OpenUrl CrossRef PubMed 43. ↵ Price MN , Dehal PS , Arkin AP . 2010 . FastTree 2 – Approximately Maximum- Likelihood Trees for Large Alignments . PLoS One 5 : e9490 . OpenUrl CrossRef PubMed 44. ↵ Shaw J , Yu YW . 2023 . Fast and robust metagenomic sequence comparison through sparse chaining with skani . Nat Methods 20 : 1661 – 1665 . OpenUrl CrossRef PubMed 45. ↵ Cantalapiedra CP , Hernández-Plaza A , Letunic I , Bork P , Huerta-Cepas J . 2021 . eggNOG-mapper v2: Functional Annotation, Orthology Assignments, and Domain Prediction at the Metagenomic Scale . Molecular Biology and Evolution 38 : 5825 – 5829 . OpenUrl CrossRef PubMed 46. ↵ Huerta-Cepas J , Forslund K , Coelho LP , Szklarczyk D , Jensen LJ , von Mering C , Bork P. 2017 . Fast Genome-Wide Functional Annotation through Orthology Assignment by eggNOG-Mapper . Molecular Biology and Evolution 34 : 2115 – 2122 . OpenUrl CrossRef PubMed 47. ↵ Huerta-Cepas J , Szklarczyk D , Heller D , Hernández-Plaza A , Forslund SK , Cook H , Mende DR , Letunic I , Rattei T , Jensen LJ , von Mering C , Bork P. 2019 . eggNOG 5.0: a hierarchical, functionally and phylogenetically annotated orthology resource based on 5090 organisms and 2502 viruses . Nucleic Acids Research 47 : D309 – D314 . OpenUrl CrossRef PubMed 48. ↵ Yu G , Chen M. 2023 . MicrobiomeProfiler: An R/shiny package for microbiome functional enrichment analysis . https://bioconductor.org/packages/MicrobiomeProfiler . 49. ↵ Blin K , Shaw S , Augustijn HE , Reitz ZL , Biermann F , Alanjary M , Fetter A , Terlouw BR , Metcalf WW , Helfrich EJN , van Wezel GP , Medema MH , Weber T. 2023 . antiSMASH 7.0: new and improved predictions for detection, regulation, chemical structures and visualisation . Nucleic Acids Research 51 : W46 – W50 . OpenUrl CrossRef PubMed 50. ↵ Kwong WK , Moran NA . 2016 . Gut microbial communities of social bees . Nat Rev Microbiol 14 : 374 – 384 . OpenUrl CrossRef PubMed 51. ↵ Ceja-Navarro JA , Vega FE , Karaoz U , Hao Z , Jenkins S , Lim HC , Kosina P , Infante F , Northen TR , Brodie EL . 2015 . Gut microbiota mediate caffeine detoxification in the primary insect pest of coffee . Nat Commun 6 : 7618 . OpenUrl CrossRef PubMed 52. ↵ Phalnikar K , Kunte K , Agashe D . 2019 . Disrupting butterfly caterpillar microbiomes does not impact their survival and development . Proceedings of the Royal Society B: Biological Sciences 286 : 20192438 . OpenUrl CrossRef PubMed 53. ↵ Martinson VG , Carpinteyro-Ponce J , Moran NA , Markow TA . 2017 . A Distinctive and Host-Restricted Gut Microbiota in Populations of a Cactophilic Drosophila Species . Applied and Environmental Microbiology 83 : e01551 – 17 . OpenUrl PubMed 54. ↵ Holley JC , Jackson MN , Pham AT , Hatcher SC , Moran NA . 2022 . Carpenter Bees (Xylocopa) Harbor a Distinctive Gut Microbiome Related to That of Honey Bees and Bumble Bees . Applied and Environmental Microbiology 88 : e00203 – 22 . OpenUrl PubMed 55. ↵ Li J , Sauers L , Zhuang D , Ren H , Guo J , Wang L , Zhuang M , Guo Y , Zhang Z , Wu J , Yao J , Yang H , Huang J , Wang C , Lin Q , Zhang Z , Sadd BM . 2023 . Divergence and convergence of gut microbiomes of wild insect pollinators . mBio 14 : e01270 – 23 . OpenUrl PubMed 56. ↵ Hammer TJ , McMillan WO , Fierer N . 2014 . Metamorphosis of a Butterfly- Associated Bacterial Community . PLoS One 9 : e86995 . OpenUrl CrossRef PubMed 57. ↵ Kim JY , Lee J , Shin N-R , Yun J-H , Whon TW , Kim M-S , Jung M-J , Roh SW , Hyun D-W , Bae J-W . 2013 . Orbus sasakiae sp. nov., a bacterium isolated from the gut of the butterfly Sasakia charonda, and emended description of the genus Orbus . International Journal of Systematic and Evolutionary Microbiology 63 : 1766 – 1770 . OpenUrl CrossRef PubMed 58. ↵ Arnison PG , J. Bibb M , Bierbaum G , A. Bowers A , S. Bugni T , Bulaj G , A. Camarero J , J. Campopiano D , L. Challis G , Clardy J , D. Cotter P , J. Craik D , Dawson M , Dittmann E , Donadio S , C. Dorrestein P , Entian K-D , A. Fischbach M , S. Garavelli J , Göransson U , W. Gruber C , H. Haft D , K. Hemscheidt T , Hertweck C , Hill C , R. Horswill A , Jaspars M , L. Kelly W , P. Klinman J , P. Kuipers O , James Link A , Liu W , A. Marahiel M , A. Mitchell D , N. Moll G , S. Moore B , Müller R , K. Nair S , F. Nes I , E. Norris G , M. Olivera B , Onaka H , L. Patchett M , Piel J , T. Reaney MJ , Rebuffat S , Paul Ross R , Sahl H-G , W. Schmidt E , E. Selsted M , Severinov K , Shen B , Sivonen K , Smith L , Stein T , D. Süssmuth R , R. Tagg J , Tang G-L , W. Truman A , C. Vederas J , T. Walsh C , D. Walton J , C. Wenzel S , M. Willey J , Donk WA van der . 2013 . Ribosomally synthesized and post-translationally modified peptide natural products: overview and recommendations for a universal nomenclature . Natural Product Reports 30 : 108 – 160 . OpenUrl CrossRef PubMed 59. ↵ Papagianni M . 2003 . Ribosomally synthesized peptides with antimicrobial properties: biosynthesis, structure, function, and applications . Biotechnology Advances 21 : 465 – 499 . OpenUrl CrossRef PubMed Web of Science 60. ↵ Grundmann CO , Guzman J , Vilcinskas A , Pupo MT . 2024 . The insect microbiome is a vast source of bioactive small molecules . Nat Prod Rep 41 : 935 – 967 . OpenUrl CrossRef PubMed 61. ↵ Elston KM , Phillips LE , Leonard SP , Young E , Holley JC , Ahsanullah T , McReynolds B , Moran NA , Barrick JE . 2023 . The Pathfinder plasmid toolkit for genetically engineering newly isolated bacteria enables the study of Drosophila- colonizing Orbaceae . ISME Communications 3 : 49 . OpenUrl CrossRef PubMed 62. ↵ O’Brien PA , Webster NS , Miller DJ , Bourne DG . 2019 . Host-Microbe Coevolution: Applying Evidence from Model Systems to Complex Marine Invertebrate Holobionts . mBio 10 : 10 .1128/mbio.02241-18. OpenUrl CrossRef 63. ↵ Chanson A , Moreau CS , Duplais C . 2021 . Assessing Biosynthetic Gene Cluster Diversity of Specialized Metabolites in the Conserved Gut Symbionts of Herbivorous Turtle Ants . Front Microbiol 12 . 64. ↵ Ganley JG , Pandey A , Sylvester K , Lu K-Y , Toro-Moreno M , Rütschlin S , Bradford JM , Champion CJ , Böttcher T , Xu J , Derbyshire ER . 2020 . A Systematic Analysis of Mosquito-Microbiome Biosynthetic Gene Clusters Reveals Antimalarial Siderophores that Reduce Mosquito Reproduction Capacity . Cell Chem Biol 27 : 817 – 826 .e5. OpenUrl CrossRef PubMed 65. ↵ Murphy R , Strube ML , Schmidt S , Silué KS , Koné NA , Rosendahl S , Poulsen M . 2024 . Non-ribosomal peptide synthase profiles remain structurally similar despite minimally shared features across fungus-farming termite microbiomes . ISME Communications 4 :ycae094. 66. ↵ Gutiérrez-García K , Whitaker MRL , Bustos-Díaz ED , Salzman S , Ramos-Aboites HE , Reitz ZL , Pierce NE , Cibrián-Jaramillo A , Barona-Gómez F . 2023 . Gut microbiomes of cycad-feeding insects tolerant to β-methylamino-L-alanine (BMAA) are rich in siderophore biosynthesis . ISME Communications 3 : 122 . OpenUrl CrossRef PubMed 67. ↵ Dai H , Han J , Wang T , Yin W-B , Chen Y , Liu H . 2023 . Recent advances in gut microbiota-associated natural products: structures, bioactivities, and mechanisms . Nat Prod Rep 40 : 1078 – 1093 . OpenUrl CrossRef PubMed 68. ↵ Schmidt K , Santos-Matos G , Leopold-Messer S , El Chazli Y , Emery O , Steiner T , Piel J , Engel P . 2023 . Integration host factor regulates colonization factors in the bee gut symbiont Frischella perrara . eLife 12 : e76182 . OpenUrl CrossRef PubMed 69. ↵ Murphy R , Benndorf R , de Beer ZW , Vollmers J , Kaster A-K , Beemelmanns C , Poulsen M. 2021 . Comparative Genomics Reveals Prophylactic and Catabolic Capabilities of Actinobacteria within the Fungus-Farming Termite Symbiosis . mSphere 6 : 10 .1128/msphere.01233-20. OpenUrl CrossRef 70. ↵ Cattel J , Kaur R , Gibert P , Martinez J , Fraimout A , Jiggins F , Andrieux T , Siozios S , Anfora G , Miller W , Rota-Stabelli O , Mouton L . 2016 . Wolbachia in European Populations of the Invasive Pest Drosophila suzukii: Regional Variation in Infection Frequencies . PLOS ONE 11 : e0147766 . OpenUrl CrossRef PubMed 71. Moran NA , McCutcheon JP , Nakabachi A . 2008 . Genomics and evolution of heritable bacterial symbionts . Annu Rev Genet 42 : 165 – 190 . OpenUrl CrossRef PubMed Web of Science 72. ↵ Stouthamer R , Breeuwer JA , Hurst GD . 1999 . Wolbachia pipientis: microbial manipulator of arthropod reproduction . Annu Rev Microbiol 53 : 71 – 102 . OpenUrl CrossRef PubMed Web of Science 73. ↵ Ahmed MZ , Araujo-Jnr EV , Welch JJ , Kawahara AY . 2015 . Wolbachia in butterflies and moths: geographic structure in infection frequency . Front Zool 12 : 16 . OpenUrl CrossRef PubMed 74. Hague MTJ , Mavengere H , Matute DR , Cooper BS . 2020 . Environmental and Genetic Contributions to Imperfect wMel-Like Wolbachia Transmission and Frequency Variation . Genetics 215 : 1117 – 1132 . OpenUrl Abstract / FREE Full Text 75. ↵ Mateos M , Castrezana SJ , Nankivell BJ , Estes AM , Markow TA , Moran NA . 2006 . Heritable Endosymbionts of Drosophila . Genetics 174 : 363 – 376 . OpenUrl Abstract / FREE Full Text 76. ↵ Zug R , Hammerstein P . 2012 . Still a Host of Hosts for Wolbachia: Analysis of Recent Data Suggests That 40% of Terrestrial Arthropod Species Are Infected . PLOS ONE 7 : e38544 . OpenUrl CrossRef PubMed 77. ↵ Jones JE , Court R , Kageyama D , Obbard DJ , Hurst GDD . 2024 . Variable prevalence of protective Spiroplasma infection over time in two natural populations of Drosophila hydei . bioRxiv doi: 10.1101/2024.07.31.606006 . OpenUrl Abstract / FREE Full Text 78. ↵ Kim BY , Gellert HR , Church SH , Suvorov A , Anderson SS , Barmina O , Beskid SG , Comeault AA , Crown KN , Diamond SE , Dorus S , Fujichika T , Hemker JA , Hrcek J , Kankare M , Katoh T , Magnacca KN , Martin RA , Matsunaga T , Medeiros MJ , Miller DE , Pitnick S , Schiffer M , Simoni S , Steenwinkel TE , Syed ZA , Takahashi A , Wei KH-C , Yokoyama T , Eisen MB , Kopp A , Matute D , Obbard DJ , O’Grady PM , Price DK , Toda MJ , Werner T , Petrov DA . 2024 . Single-fly genome assemblies fill major phylogenomic gaps across the Drosophilidae Tree of Life . PLOS Biology 22 : e3002697 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted December 22, 2024. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Phylogenetic and functional diversity among Drosophila-associated metagenome-assembled genomes Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Phylogenetic and functional diversity among Drosophila -associated metagenome-assembled genomes Aaron A. Comeault , Alberto H. Orta , David Fidler , Tobias Nunn , Amy R. Ellison , Tayte A. Anspach , Daniel R. Matute bioRxiv 2024.12.19.629488; doi: https://doi.org/10.1101/2024.12.19.629488 Share This Article: Copy Citation Tools Phylogenetic and functional diversity among Drosophila -associated metagenome-assembled genomes Aaron A. Comeault , Alberto H. Orta , David Fidler , Tobias Nunn , Amy R. Ellison , Tayte A. Anspach , Daniel R. Matute bioRxiv 2024.12.19.629488; doi: https://doi.org/10.1101/2024.12.19.629488 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genomics Subject Areas All Articles Animal Behavior and Cognition (7644) Biochemistry (17728) Bioengineering (13917) Bioinformatics (42038) Biophysics (21489) Cancer Biology (18637) Cell Biology (25553) Clinical Trials (138) Developmental Biology (13401) Ecology (19941) Epidemiology (2067) Evolutionary Biology (24367) Genetics (15622) Genomics (22547) Immunology (17764) Microbiology (40475) Molecular Biology (17208) Neuroscience (88749) Paleontology (667) Pathology (2842) Pharmacology and Toxicology (4834) Physiology (7659) Plant Biology (15175) Scientific Communication and Education (2047) Synthetic Biology (4304) Systems Biology (9835) Zoology (2272)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2024) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00