Full text
63,735 characters
· extracted from
preprint-html
· click to expand
Untrimmed ITS2 metabarcode sequences cause artificially reduced abundances of specific fungal taxa | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Untrimmed ITS2 metabarcode sequences cause artificially reduced abundances of specific fungal taxa View ORCID Profile Kathleen E. Kyle , View ORCID Profile Jonathan L. Klassen doi: https://doi.org/10.1101/2024.08.02.606430 Kathleen E. Kyle a Department of Molecular and Cell Biology, University of Connecticut , Storrs, CT, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kathleen E. Kyle Jonathan L. Klassen a Department of Molecular and Cell Biology, University of Connecticut , Storrs, CT, USA b Institute for Systems Genomics, University of Connecticut , Storrs, CT, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jonathan L. Klassen For correspondence: jonathan.klassen{at}uconn.edu Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Advances in DNA metabarcoding have greatly expanded our knowledge of microbial communities in recent years. Pipelines and parameters have been tested extensively for bacterial metabarcoding using the 16S rRNA gene and best practices are largely established. For fungal metabarcoding using the ITS gene, however, only a few studies have considered how such pipelines and parameters can affect community prediction. Here we report a novel bias uncovered during ITS2 sequencing of Trichoderma -infected ant fungus gardens and confirmed using mock communities. Abnormally low forward read quality caused Trichoderma ITS2 reads to be computationally filtered before and during read pair merging, thus almost entirely eliminating Trichoderma ASVs from the resulting fungal community profiles. Sliding window quality trimming before filtering allowed most of these reads to pass filtering and merge successfully, producing community profiles that now correlated with visual signs of Trichoderma infection and matched the composition of the mock communities. Applying such sliding window trimming to a previously generated environmental ITS2 dataset increased the detected fungal diversity and again overcame read quality biases against Trichoderma to instead detect it in nearly every sample and often at high relative abundances. This analysis additionally identified a similar, but distinct, bias against a second fungal genus Meyerozyma . The prevalence of such quality biases against other fungal ITS sequences is unknown but may be widespread. We therefore advocate for routine use of sliding window quality trimming as a best practice in ITS2 metabarcoding analysis. Importance Metabarcode sequencing produces DNA abundance profiles that are presumed to reflect the actual microbial composition of the samples that they analyze. However, this assumption is not always tested, and taxon-specific biases are often not apparent, especially for low-abundance taxa in complex communities. Here we identified ITS2 read quality aberrations that caused dramatic reductions in the relative abundances of specific taxa in multiple datasets characterizing ant fungus gardens. Such taxon-specific biases in read quality may be widespread in other environments and for other fungal taxa, thereby causing incorrect descriptions of these mycobiomes. Introduction Fungal classification is notoriously difficult ( 1 , 2 ), which may partly explain why fungi remain understudied compared to bacteria ( 3 – 6 ) despite their global importance in terrestrial and plant-associated ecosystems, including agriculture ( 7 – 9 ). Community amplicon sequencing, or “metabarcoding”, has been widely used to characterize bacterial communities using the 16S rRNA gene as a common bacterial barcode ( 10 – 12 ). Metabarcoding was later adapted to fungal communities, especially using the internal transcribed spacer (ITS) region of the eukaryotic rRNA gene cluster ( 13 ). Despite its later adoption, ITS metabarcoding is now one of the most widely used techniques for characterizing microfungal communities ( 14 – 16 ). There are many biases associated with DNA metabarcoding ( 17 – 19 ). From the method of DNA extraction to the many challenges of PCR, biases in library generation and sequencing are well-documented ( 14 , 17 , 20 – 24 ). Historically, most studies of computational biases focused on the algorithms used to bin sequences into operational taxonomic units (OTUs) or exact/amplicon sequence variants (E/ASVs) ( 25 – 27 ), and those used for taxonomic classification ( 28 , 29 ). Some recent studies have also considered biases associated with the upstream data manipulation steps ( 30 – 34 ). Performed before the more computationally intensive binning and classification steps, such “preprocessing” checks the raw sequencing data for quality concerns and, if overlapping paired-end reads are available, merges reads into a consensus sequence. Preprocessing standards have largely been developed for and adopted from bacterial metabarcoding ( 35 – 39 ). However, fungal ITS sequences pose extra challenges, particularly due to their length heterogeneity. For example, the widely used v4 region of the bacterial 16S rRNA gene (hereafter “16S v4”) is consistently ∼250 bp long, but the comparable fungal rRNA internal transcribed spacer region 2 (hereafter “ITS2”) can range from ∼50-800 bp long ( 40 , 41 ). Thus, Illumina paired-end reads used to sequence 16S v4 metabarcodes overlap significantly with each other such that once merged, nearly every base is sequenced twice for improved basecall accuracy. When using a similar sequencing approach for ITS2 metabarcoding, sequences ∼250 bp long can be merged and therefore sequenced twice. However, if the ITS2 gene is < 250 bp the sequencing reads will contain non-biological bases when sequencing progresses into the 3’ adapter/primer region (“primer readthrough”; 42 ) and possibly beyond. In contrast, if the ITS2 sequences are too long the paired reads will share little or no overlap, reducing the proportion of bases sequenced twice and making merging difficult or impossible ( 43 ). This has led some researchers to question the value of using paired-end sequencing of ITS2 barcodes ( 44 – 46 ). Successful read merging also depends on read quality. During preprocessing, low-quality bases are typically “trimmed” from the beginning and end of each read and primer readthrough is sometimes “clipped”. Entire reads that still don’t meet specified quality thresholds are then “filtered” out from the dataset. Because the length of the 16S v4 metabarcode is homogeneous, many pipelines remove the same number of bases from the ends of all reads to remove bases at the 3’ read end that have the lowest quality scores, producing higher quality reads of a fixed length ( 36 , 37 , 47 – 51 ). In contrast, truncation of ITS2 reads to a fixed length is inappropriate because they have more variable lengths. Thus, ITS pipelines often omit this truncation step (although some do clip primer readthrough; 42 , 46 , 52 , 53 ). Using untrimmed metabarcoding reads has several downsides. Filtering that only considers the average quality of entire reads will often remove many untrimmed reads that typically have lower-quality bases at their 3’ ends, notwithstanding many high-quality bases at their 5’ ends. Untrimmed reads that pass filtering will be more challenging to merge properly, if at all, due to the inclusion of these low-quality bases and adapter and primer sequences if they were not clipped. Any merged sequences will inherit the lower quality of these untrimmed reads, potentially leading to inaccurate taxonomic binning and classification. Filtering ITS2 metabarcode data may be especially inaccurate if pipeline parameters are unchanged from defaults chosen for 16S v4 datasets. The presence of low-quality data may also increase compute times. More fundamentally, using uniform read lengths assumes that read quality varies similarly for all reads in a dataset. This assumption seems weak given the taxon-specific biases of most other metabarcoding steps ( 12 , 14 , 54 – 56 ). Here we report an underappreciated taxon-specific filtering bias during ITS2 metabarcoding (but see ref # 57 ). In our previous research ( 58 ), we infected ant fungus gardens with Trichoderma (Ascomycota: Sordariomycetes: Hypocreales: Hypocreaceae), leading to Trichoderma growth that was visually apparent, yet there were no Trichoderma reads after ITS2 metabarcoding. We determined that Trichoderma reads in these samples uniquely failed to pass the filtering and read merging steps in our analysis pipeline, removing nearly all Trichoderma sequences from the final output. Using sliding window quality trimming before filtering remedied this bias against Trichoderma in both defined mock communities and our experimental infection dataset. Furthermore, we detected the same bias against Trichoderma in an ITS2 metabarcoding dataset from environmental samples, as well as a similar bias against Meyerozyma (Ascomycota: Saccharomycetes: Saccharomycetales: Saccharomycetaceae) that was also remedied by sliding window quality trimming. This study demonstrates how a taxon-specific bias due to an unusual reduction in quality at the 3’ end of ITS2 metabarcoding reads was not accommodated by typical filtering parameters, which led to erroneous taxonomic profiles and thus erroneous biological conclusions. Results During our previous work infecting fungus gardens cultivated by Trachymyrmex septentrionalis ants with the fungal pathogen Trichoderma ( 58 ), we visually observed Trichoderma growth on infected fungus gardens and not on control fungus gardens inoculated only with buffer ( Fig. 1A ). We were therefore surprised when our ITS2 community profiles for the infected fungus gardens contained very few Trichoderma ASVs ( Fig. 1B , Suppl. Fig. S1A), despite their containing the expected ASVs from the ant’s cultivar fungus (the main constituent of ant fungus gardens). Compared to the mock-inoculated controls there were also many fewer reads in infected samples after filtering, with nearly all reads removed after paired read merging ( Fig. 1C , Suppl. Fig. S1B). The infected samples also had distinctively abnormal read quality profiles. At ∼125 bp, forward read quality suddenly became highly variable and the median decreased sharply from a Phred score of ∼35 to a score of ∼25 that then persisted to the ends of these 250 bp reads ( Fig. 1D , Suppl. Fig. S1C). The quality of the reverse reads was poorer than the quality of the forward reads in every sample, but the quality of reverse reads from infected samples was noticeably poorer than that of those from the uninfected samples (Suppl. Fig. S1C). These data suggested an unexpected and dramatic bias against ITS2 reads from Trichoderma in this analysis. Download figure Open in new tab Figure 1. Trichoderma ITS2 reads are completely removed by filtering or failure to merge during metabarcode sequencing analysis and have abnormal forward read quality. A) Representative images of a healthy fungus garden 4 days after inoculation with sterile PBS (“mock-inoculated”, top) and an infected fungus garden 4 days after inoculation with Trichoderma spores in PBS (“ Trichoderma -inoculated”, bottom). B) Relative abundances of fungal ASVs for the mock-inoculated (top) and Trichoderma -inoculated (bottom) fungus gardens pictured in A. C) Percent of reads remaining after each step of the metabarcoding analysis pipeline for the samples pictured in A. Forward and reverse reads are abbreviated as R1 and R2, respectively. D) Quality plots for forward ITS2 reads from the samples pictured in A. Reads from the mock-inoculated (top) and Trichoderma -inoculated (bottom) samples are plotted in orange and green, respectively. The solid line shows the median basecall quality score (Phred) at each base position and dotted gray lines show the basecall quality quartiles. See Suppl. Fig. S1 for results from the full dataset. To reproduce this Trichoderma -specific bias more quantitatively, we sequenced mock communities created using defined proportions of DNA from pure cultures of Trichoderma and the ant cultivar fungus. As expected, the reduction in quality midway through the forward reads and the number of reads discarded during filtering and merging both increased alongside the proportion of Trichoderma DNA in the mock communities ( Fig. 2A , Suppl. Fig. S2). No Trichoderma ASVs were detected in any of the mock communities, including that containing 100% Trichoderma DNA ( Fig. 2B ). Somewhat surprisingly, we detected cultivar fungus ASVs in the 100% Trichoderma mock community, but at very low levels likely originating from cross-contamination during community generation or sequencing. Download figure Open in new tab Figure 2. Mock communities replicate the bias against Trichoderma ITS2 metabarcodes and demonstrate that sliding window trimming can mitigate it. A) Percent of untrimmed reads remaining at each step of the metabarcoding analysis pipeline for mock communities constructed using different proportions of cultivar fungus and Trichoderma DNA. B ) ASV relative abundances in each mock community using untrimmed reads. C ) Percent of trimmed reads remaining at each step of the metabarcoding analysis pipeline for the same mock communities as in (A). D ) ASV relative abundances in each mock community using trimmed reads. Forward and reverse reads are abbreviated as R1 and R2, respectively. We hypothesized that the sudden quality drop in the middle of the forward Trichoderma reads caused most of them to fall below the default quality thresholds and thus be filtered out or fail to merge, causing low final Trichoderma read counts and the absence of Trichoderma ASVs. To test this, we first quality trimmed the 3’ end of all reads in the mock community samples using Trimmomatic’s sliding window trimmer ( 59 ) and then analyzed them as before. Sliding window quality trimming reduced the drop in read quality midway through the forward reads (Suppl. Fig. S3), and most trimmed reads successfully passed the read filtering and merging steps of the analysis pipeline ( Fig. 2C ). Trichoderma ASVs were now detected in all mock communities except for that containing 100% cultivar fungus DNA and their relative abundances correlated with the expected cultivar fungus: Trichoderma ratios of the input DNA ( Fig. 2D ), albeit with some overrepresentation of Trichoderma , perhaps due to different ITS2 copy numbers in Trichoderma and the cultivar fungus, not constructing the mock communities as molar proportions, or sequencing bias favoring Trichoderma . Overall, sliding window trimming generated metabarcoding community profiles that were much closer to the expected values compared to the profiles generated without such trimming ( Fig. 2 ). We next applied sliding window trimming to our initial Trichoderma -infection dataset. Now Trichoderma ASVs were detected at relative abundances consistent with the visual appearance of these samples (Suppl. Fig. S4A), and the number of trimmed reads that were retained after filtering and merging (Suppl. Fig. S4B) and the average quality of the trimmed forward reads (Suppl. Fig. S4C) both increased for all Trichoderma -infected samples. Thus, sliding window trimming successfully mitigated bias against Trichoderma in these experimental samples, as it did for the mock communities. Finally, we tested the effect of sliding window trimming on an environmental ITS2 dataset that we previously generated from 98 freshly excavated ant fungus garden samples ( 58 ). Without trimming, very little taxonomic diversity appeared in this environmental dataset ( Fig. 3 , left), with 86/90 samples having ≥ 98% cultivar reads, 3 samples containing < 30% cultivar reads, and 1 sample having 62% cultivar reads. In contrast, considerably greater taxonomic diversity was apparent after sliding window trimming, particularly for Trichoderma, which occurred in nearly all trimmed samples (81 out of 90) at often high relative abundances ( Fig. 3 , right). Similarly, trimming increased the relative abundance of the yeast genus Meyerozyma in four samples. One sample had 19% Meyerozyma before trimming that increased to 88% after trimming, and three samples went from 0% to 1.5%, 67%, or 93% Meyerozyma . Log2-transformed fold changes of trimmed versus untrimmed relative abundances of the genera in these environmental samples confirmed that Trichoderma and Meyerozyma relative abundances increased following sliding window trimming ( Fig. 4 ). In this analysis, the genera whose relative abundances decreased after read trimming (cultivar fungus, Penicillium , Cladosporium , and Oberwinklerozyma ) occurred in samples also containing Trichoderma , Meyerozyma , and/or “other” fungi due to the relative nature of the metabarcoding data (i.e., the trimmed samples with the largest taxon increases also had the largest taxon decreases). Download figure Open in new tab Figure 3. Using trimmed reads revealed greater diversity of non-cultivar fungi in environmental fungus gardens compared to using untrimmed reads. ASV relative abundances in environmental fungus gardens using untrimmed (left) and sliding window-trimmed (right) reads. Each row represents an individual fungus garden, which are ordered by the relative abundance of Trichoderma after trimming. The samples with the three highest relative abundances of Trichoderma or Meyerozyma are labeled T 1- T 3 and M 1- M 3, respectively. The taxon labelled “Other” includes all ASVs that were < 1% abundant in all samples. See Suppl. Fig. S5 for the absolute abundances of fungal reads in this dataset. Download figure Open in new tab Figure 4. Trimming particularly increases the relative abundances of Trichoderma and Meyerozyma . The relative abundances of genera in the environmental samples ( Fig. 3 ) were compared using their Log2-transformed fold-change in trimmed versus untrimmed datasets, with each dot comparing the relative abundance of a genus in a single fungus garden sample. The center line and outer edges of the boxplots show the median and quartiles of the fold-change in relative abundances for each genus, respectively. Whiskers extend to the highest and lowest data points no further than 1.5x the interquartile range. Genera were excluded if they were < 1% abundant in both untrimmed and trimmed datasets. Taxon labels and colors match those in Fig. 3 . Closer inspection of the environmental samples with the highest abundances of either Trichoderma ( T 1- T 3) or Meyerozyma ( M 1- M 3) mirrored the changes we observed in our infection and mock community experiments following sliding window trimming ( Fig. 5 ). Left untrimmed, these environmental samples all had distinctive drops in the quality of the forward reads ( Fig. 5B, G ) and high numbers of reads discarded during filtering and merging ( Fig. 5C, H ), making both taxa underreported in the relative abundance plots ( Fig. 5E, J ). Notably, the quality plots for samples containing Meyerozyma were similar to, but distinct from, those containing Trichoderma, with the sudden quality drop of the forward reads occurring at ∼200 bp for Meyerozyma ( Fig. 5G ) compared to at ∼125 bp for Trichoderma ( Fig. 5B , Fig. 1C , Suppl. Fig. S1C). Sliding window trimming mitigated both taxon-specific biases by improving the quality of the forward reads ( Fig. 5A, F ) and increasing read retention during read filtering and merging ( Fig. 5C, H ). The resulting relative abundances of both taxa were much higher ( Fig. 5D, I ) compared to in the untrimmed samples. Given our detection of such biases in samples that contained only limited taxonomic diversity, we speculate that similar read quality biases may be widespread in metabarcoding datasets that include many other fungal taxa. Download figure Open in new tab Figure 5. Trimming increases forward read quality and the number of reads that pass filtering and merge successfully for the environmental samples with the highest relative abundances of Trichoderma ( T 1- T 3) and Meyerozyma ( M 1- M 3). A/B) Forward read quality plots for trimmed (A) and untrimmed (B) samples T 1- T 3. C) Percent of reads remaining after each analysis pipeline step for trimmed and untrimmed samples T 1- T 3. D/E) Relative abundances of fungal ASVs in trimmed (D) and untrimmed (E) samples T 1- T 3. F/G) Forward read quality plots for trimmed (F) and untrimmed (G) samples M 1- M 3. H) Percent of reads remaining after each analysis pipeline step for trimmed and untrimmed samples M 1- M 3. I/J) Relative abundances of fungal ASVs in trimmed (I) and untrimmed (J) samples M 1- M 3. In panels A, B, C, F, G, and H, untrimmed data are colored gray and trimmed data are colored green (samples T 1- T 3) or blue (samples M 1- M 3). In panels A, B, F, and G, the median quality and the quality quartiles per base are plotted as solid and dotted lines, respectively. The light green or blue lines at the bottom of panels B and F, respectively, show the percentage of reads trimmed to that sequence length or longer. Forward and reverse reads are abbreviated as R1 and R2, respectively. Discussion This study demonstrates that data preprocessing, particularly sliding window quality trimming, can significantly affect fungal metabarcoding data quality and analytical output. The taxon-specific ITS2 read quality biases that we identified ( Figs. 1D , 5B, 5G , Suppl. Figs. S1C, S2) required these reads to be trimmed without applying a fixed truncation length to the entire dataset to avoid taxon-specific read loss during downstream filtering and merging ( Figs. 1C , 2A , 5C, 5H , Suppl. Fig. S1B) and ultimately the underrepresentation or absence of these taxa in the resulting ITS2 community profiles ( Figs. 1B , 2B , 3 , 5E, 5J , Suppl. Fig. S1A). In fact, sliding window quality trimming improved read retention after filtering and merging for all taxa in our datasets ( Fig. 2C , Suppl. Figs. S4B, S5) and improved classification of the cultivar fungus (Suppl. Fig. S6), demonstrating its general benefit compared to fixed length truncation. So long as a metabarcoding pipeline can tolerate reads with variable lengths, sliding window quality trimming should apply to metabarcoding using any barcode. We did not detect “ASV splitting” following trimming (Suppl. Fig. S6), which has been thought to arise from variable metabarcode read lengths ( 60 , 61 ). Concatenating paired reads, alongside or instead of merging, may also improve taxonomic classification ( 30 , 60 ), particularly for long amplicons that do not overlap enough for merging ( 34 ). That most reads in our study successfully merged after trimming ( Figs. 2C , 5C, 5H , Suppl. Fig. S4B) suggests that the abnormally low forward read quality we observed is instead likely due to sequencing progressing beyond the end of short ITS2 template DNA molecules. Our results additionally emphasize the importance of sequencing mock community controls alongside experimental samples ( Fig. 2 ). Mock community controls are a gold standard tool to identify taxon-specific biases and validate computational pipelines ( 62 ). Ideally, these communities should contain every taxon present in the experimental communities being analyzed. Paradoxically, this requires prior knowledge of experimental community composition that is often unavailable, especially for understudied communities. Alternatively, constructing mock communities that include all currently known taxa is as impractical as it is impossible. Custom mock communities offer some promise ( 30 , 63 , 64 ), but these still require a priori knowledge of community composition, are technically challenging to create, are not standardized across research groups, and are currently rare for fungi (ATCC MSA-1010 and MSA-2010 from American Type Cultivar Collection, Manassas, VA, USA; 40 , 65 ). Even if not comprehensive, mock community controls are useful to detect biases against the taxa that they do contain and thus should be used routinely. When representative mock community controls do not exist, mindful data analysis is imperative. Researchers analyzing metabarcoding data should perform sufficient quality control during all steps of a computational analysis and especially appreciate that the default parameters are often set using well-characterized bacterial communities that may not apply to studies of other communities, particularly those targeting fungi (see 57 for an example of parameter optimization). These quality checks should include, but are not limited to, evaluating each sample for unusual patterns of read quality, tracking how many reads are removed from samples at each pipeline step, and comparing final community composition metrics between datasets processed using different parameters (e.g., trimmed vs. untrimmed). Results also should be examined carefully with respect to prior expectations given the experimental design, for example during ITS2 metabarcoding studies attempting to detect Trichoderma in plant root communities following its application as a biocontrol agent (e.g., 66 ). Despite their limitations, metabarcoding studies of novel or under-characterized microbial communities are important and necessary. Most importantly, this study demonstrates how a seemingly small adjustment to data preprocessing can significantly impact the biological conclusions drawn from an analytical interpretation. Without sliding window trimming, environmental T . septentrionalis fungus gardens appeared to have perplexingly little microfungal diversity ( Fig. 3 left). In contrast, trimmed reads revealed that environmental fungus gardens host a more diverse and abundant fungal community, of which Trichoderma is exceptionally prevalent ( Fig. 3 right, Fig. 4 ). This latter result better agrees with the presence of metabolites commonly associated with Trichoderma in ant fungus gardens ( 58 ), other studies that have cultured many different microfungi (including Trichoderma ) from ant fungus gardens ( 67 – 73 ), and the known abundance of Trichoderma in diverse soil and plant-associated communities such as the rhizosphere ( 74 – 78 ) where it often acts as a mycoparasite ( 79 – 81 ). Unexpectedly, we further discovered a strikingly similar bias against ITS2 reads from an unrelated genus, Meyerozyma ( Fig. 5F-J ), that belongs to a different subphylum (Saccharomycotina) than Trichoderma (Pezizomycotina). In our other research projects, we found another similar quality bias against ITS2 reads from Clonostachys (Suppl. Fig. S7), a genus more closely related to Trichoderma , both in the order Hypocreales. Rolling et al. ( 57 ) reported a similar taxon-specific aberration in read quality for different taxa than those studied here and using ITS1 metabarcoding. Therefore, although the full distribution of such biases across all fungal taxonomy and barcodes is unknown, these data suggest they could be widespread. In conclusion, we recommend using sliding window quality trimming, appropriate quality controls, and mindful data analysis as part of best practices for metabarcoding, particularly for fungi. Methods Data Generation Except for the cultivar fungus: Trichoderma DNA mock communities, all data and methods have been described elsewhere ( 58 ). The mock communities were prepared by extracting genomic DNA from pure cultures of Trichoderma and the T . septentrionalis cultivar fungus isolated from laboratory colony JKH000219 which was collected from Florida in 2016 (Florida Department of Agriculture and Consumer Services unnumbered Letter of Authorization; 58 ). Trichoderma strain JKS001884 was grown on Potato Dextrose Agar (PDA, Difco) + antibiotics (ABX, 50 mg/L penicillin and 50 mg/L streptomycin; both Fisher Scientific) at 25 °C for 1 week. Cultivar fungus was isolated by collecting small tufts of hyphae from the JKH000219 fungus garden using sterile extra-fine forceps (being careful to only collect hyphae and not surrounding pieces of forage) and growing them on PDA+ABX plates at 25 °C. These were checked daily for pathogen (non-cultivar) growth, in which case pathogens were cut and removed from the agar plates using sterile blades or cultivar hyphae were transferred onto to new PDA+ABX plates. This continued until the cultivar fungus comprised a pure culture (∼2-4 weeks). Hyphae from these pure cultures were collected into bead-beating tubes with 250 µL of cetyltrimethylammonium bromide (CTAB) buffer and 0.5 g each of 0.1 mm and 1 mm sterile silica/zirconium beads for DNA extraction, and extracted DNA was quantified using Qubit 3.0 with a dsDNA high sensitivity kit ( 58 ). Five mock communities were created using ratios of 100:0, 90:10, 50:50, 10:90, 0:100 cultivar fungus to Trichoderma genomic DNA by mass. These mock communities were submitted to the Microbial Analysis, Resources, and Services (MARS) facility at the University of Connecticut for ITS2 metabarcode sequencing using Illumina indexed primers fITS7 (aka ITS3, 5’-GTGARTCATCGAATCTTTG-3’, 63 ) and ITS4 (5’-TCCTCCGCTTATTGATATGC-3’, 82 ) that contained Illumina adapters and dual 8 base indices ( 49 ). Samples were amplified from 30 ng of extracted DNA in triplicate 15 µl reactions using Go-Taq DNA polymerase (Promega) with the addition of 3.3 μg bovine serum albumin (BSA, New England BioLabs). To overcome inhibition from host DNA, 0.1 pmol of each primer without the adapters or indexes was added to the mastermix. The ITS2 PCR reaction was incubated at 95 °C for 2 minutes, then for 5 cycles of 30 s at 95.0 °C, 60 s at 48.0 °C and 60 s at 72.0 °C, then for 25 cycles of 30 s at 95.0 °C, 60 s at 55.0 °C and 60 s at 72.0 °C, followed by final extension at 72.0°C for 10 minutes. PCR products were pooled for quantification and visualization using a QIAxcel with a DNA Fast Analysis cartridge (Qiagen). PCR products were normalized based on the concentration of DNA from 250-400 bp then pooled using the epMotion 3075 liquid handling robot. The pooled PCR products were cleaned using Omega Bio-Tek Mag-Bind Beads according to the manufacturer’s protocol using a ratio of 0.8x beads to PCR product. The cleaned pool was sequenced on the MiSeq using a v2 2×250 base pair kit (Illumina, Inc). Data Analysis All ITS2 amplicon datasets were analyzed using R v3.6.3 or 4.1.0 ( 83 ). The untrimmed dataset was processed following the DADA2 “ITS Pipeline Workflow (1.8)” ( 37 ). The only changes were setting parameter “randomize” to “TRUE” for learning errors with function “learnErrors”, dereplicating the forward and reverse reads using function “derepFastq” before running the “dada” function, graphing the output of the “track” variable using R barplot to visualize read retention at each pipeline step, and using ITSx v1.1.3 ( 84 ) with default parameters to remove potential flanking 18S rRNA regions from the ASVs prior to taxonomic classification. ASVs were removed that did not pass ITSx filtering and duplicate ITSx-treated ASVs were merged. These ASVs were then classified as described in ( 37 ) using the UNITE database v8.2 general fasta format ( 85 ). The resulting ASV table, taxon table, and sample data table were collected into a phyloseq object for further processing using phyloseq v1.26.1 ( 37 , 86 ) and tidyr v1.3.1 ( 87 ). For the trimmed dataset, reads were processed exactly as above except FASTQ files were first trimmed at the 3’ end using Trimmomatic v0.39 ( 59 ) with parameters SLIDINGWINDOW:5:20. For each sample, ASVs that were < 1% abundant were manually classified as “other” and their abundances were combined. T. septentrionalis ants predominantly cultivate fungi from tribe Leucocoprineae ( Agaricaceae : Agaricales ), typically annotated as either genus Leucocoprinus or Leucoagaricus ( 88 – 90 ). However, the taxonomy of these fungi is complex ( 90 – 92 ) and thus ASVs were manually defined as “cultivar fungus” if they were classified as belonging to Leucocoprinus , Leucoagaricus , or “unclassified family Agaricaceae ”. These cultivar fungus ASVs were confirmed to be closely related to other fungus-growing ant cultivar fungi using NCBI blastn megablast against the nonredundant nucleotide database, nr/nt ( 93 , 94 ). All code used for analysis is available at https://github.com/kek12e/ms_ITS2trimming . Data availability Unprocessed fungal ITS2 community amplicon sequencing FASTQ files are publicly available in the National Center for Biotechnology Information (NCBI) Sequence Read Archive (SRA) under BioProject PRJNA763335 (environmental fungus gardens), PRJNA743045 ( Trichoderma -infected laboratory fungus gardens), and PRJNA1138067 (cultivar fungus: Trichoderma mock communities and Clonostachys ). Acknowledgements Funding for this work was received from NSF grant IOS-1656475 (J.L.K). We thank Dr. Kendra Maas from the Microbial Analysis, Resources, and Services (MARS) facility at the University of Connecticut for assistance with ITS2 library preparation and sequencing, and Madison Adams for generation of the environmental ITS2 dataset. Footnotes Competing Interest Statement : K.E.K. and J.L.K. declare no competing interests. https://github.com/kek12e/ms_ITS2trimming https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA763335 https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA743045 https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA1138067 References 1. ↵ Lücking R , Aime MC , Robbertse B , Miller AN , Ariyawansa HA , Aoki T , Cardinali G , Crous PW , Druzhinina IS , Geiser DM , Hawksworth DL , Hyde KD , Irinyi L , Jeewon R , Johnston PR , Kirk PM , Malosso E , May TW , Meyer W , Öpik M , Robert V , Stadler M , Thines M , Vu D , Yurkov AM , Zhang N , Schoch CL . 2020 . Unambiguous identification of fungi: where do we stand and how accurate and precise is fungal DNA barcoding? IMA Fungus 11 : 14 . OpenUrl 2. ↵ Hibbett DS , Taylor JW . 2013 . Fungal systematics: is a new age of enlightenment at hand? Nat Rev Microbiol 11 : 129 – 133 . OpenUrl CrossRef PubMed 3. ↵ Hyde KD , Baldrian P , Chen Y , Thilini Chethana KW , De Hoog S , Doilom M , de Farias ARG , Gonçalves MFM , Gonkhom D , Gui H , Hilário S , Hu Y , Jayawardena RS , Khyaju S , Kirk PM , Kohout P , Luangharn T , Maharachchikumbura SSN , Manawasinghe IS , Mortimer PE , Niego AGT , Phonemany M , Sandargo B , Senanayake IC , Stadler M , Surup F , Thongklang N , Wanasinghe DN , Bahkali AH , Walker A. 2024 . Current trends, limitations and future research in the fungi? Fungal Divers 125 : 1 – 71 . OpenUrl 4. Hibbett D , Abarenkov K , Kõljalg U , Öpik M , Chai B , Cole J , Wang Q , Crous P , Robert V , Helgason T , Herr JR , Kirk P , Lueschow S , O’Donnell K , Nilsson RH , Oono R , Schoch C , Smyth C , Walker DM , Porras-Alfaro A , Taylor JW , Geiser DM . 2016 . Sequence-based classification and identification of fungi . Mycologia 108 : 1049 – 1068 . OpenUrl 5. Desprez-Loustau M-L , Robin C , Buée M , Courtecuisse R , Garbaye J , Suffert F , Sache I , Rizzo DM . 2007 . The fungal dimension of biological invasions . Trends Ecol Evol 22 : 472 – 480 . OpenUrl CrossRef PubMed Web of Science 6. ↵ Niskanen T , Lücking R , Dahlberg A , Gaya E , Suz LM , Mikryukov V , Liimatainen K , Druzhinina I , Westrip JRS , Mueller GM , Martins-Cunha K , Kirk P , Tedersoo L , Antonelli A . 2023 . Pushing the frontiers of biodiversity research: unveiling the global diversity, distribution, and conservation of fungi . Annu Rev Environ Resour 48 : 149 – 176 . OpenUrl 7. ↵ Berbee ML , James TY , Strullu-Derrien C . 2017 . Early diverging fungi: diversity and impact at the dawn of terrestrial life . Annu Rev Microbiol 71 : 41 – 60 . OpenUrl CrossRef 8. Hawksworth DL . 1991 . The fungal dimension of biodiversity: magnitude, significance, and conservation . Mycol Res 95 : 641 – 655 . OpenUrl CrossRef Web of Science 9. ↵ Hawksworth DL , Lücking R . 2017 . Fungal diversity revisited: 2.2 to 3.8 million species . Microbiol Spectr 5 : FUNK–0052 – 2016 . OpenUrl 10. ↵ Caporaso JG , Lauber CL , Walters WA , Berg-Lyons D , Lozupone CA , Turnbaugh PJ , Fierer N , Knight R . 2011 . Global patterns of 16S rRNA diversity at a depth of millions of sequences per sample . Proc Natl Acad Sci U S A 108 Suppl 1 : 4516 – 4522 . OpenUrl Abstract / FREE Full Text 11. Caporaso JG , Lauber CL , Walters WA , Berg-Lyons D , Huntley J , Fierer N , Owens SM , Betley J , Fraser L , Bauer M , Gormley N , Gilbert JA , Smith G , Knight R . 2012 . Ultra-high-throughput microbial community analysis on the Illumina HiSeq and MiSeq platforms . ISME J 6 : 1621 – 1624 . OpenUrl CrossRef PubMed Web of Science 12. ↵ Knight R , Vrbanac A , Taylor BC , Aksenov A , Callewaert C , Debelius J , Gonzalez A , Kosciolek T , McCall L-I , McDonald D , Melnik AV , Morton JT , Navas J , Quinn RA , Sanders JG , Swafford AD , Thompson LR , Tripathi A , Xu ZZ , Zaneveld JR , Zhu Q , Caporaso JG , Dorrestein PC . 2018 . Best practices for analysing microbiomes . Nat Rev Microbiol 16 : 410 – 422 . OpenUrl PubMed 13. ↵ Schoch CL , Seifert KA , Huhndorf S , Robert V , Spouge JL , Levesque CA , Chen W , Fungal Barcoding Consortium . 2012 . Nuclear ribosomal internal transcribed spacer (ITS) region as a universal DNA barcode marker for Fungi . Proc Natl Acad Sci U S A 109 : 6241 – 6246 . OpenUrl Abstract / FREE Full Text 14. ↵ Tedersoo L , Bahram M , Zinger L , Nilsson RH , Kennedy PG , Yang T , Anslan S , Mikryukov V . 2022 . Best practices in metabarcoding of fungi: from experimental design to results . Mol Ecol 31 : 2769 – 2795 . OpenUrl CrossRef 15. Nilsson RH , Anslan S , Bahram M , Wurzbacher C , Baldrian P , Tedersoo L . 2019 . Mycobiome diversity: high-throughput sequencing and identification of fungi . Nat Rev Microbiol 17 : 95 – 109 . OpenUrl CrossRef PubMed 16. ↵ Lindahl BD , Nilsson RH , Tedersoo L , Abarenkov K , Carlsen T , Kjøller R , Kõljalg U , Pennanen T , Rosendahl S , Stenlid J , Kauserud H . 2013 . Fungal community analysis by high-throughput sequencing of amplified markers--a user’s guide . New Phytol 199 : 288 – 299 . OpenUrl CrossRef PubMed Web of Science 17. ↵ Tedersoo L , Lindahl B . 2016 . Fungal identification biases in microbiome projects . Environ Microbiol Rep 8 : 774 – 779 . OpenUrl CrossRef 18. Halwachs B , Madhusudhan N , Krause R , Nilsson RH , Moissl-Eichinger C , Högenauer C , Thallinger GG , Gorkiewicz G . 2017 . Critical issues in mycobiota analysis . Front Microbiol 8 : 180 . OpenUrl CrossRef 19. ↵ Bálint M , Bahram M , Eren AM , Faust K , Fuhrman JA , Lindahl B , O’Hara RB , Öpik M , Sogin ML , Unterseher M , Tedersoo L . 2016 . Millions of reads, thousands of taxa: microbial community structure and associations analyzed via marker genes . FEMS Microbiol Rev 40 : 686 – 700 . OpenUrl CrossRef PubMed 20. ↵ Bohmann K , Elbrecht V , Carøe C , Bista I , Leese F , Bunce M , Yu DW , Seymour M , Dumbrell AJ , Creer S . 2022 . Strategies for sample labelling and library preparation in DNA metabarcoding studies . Mol Ecol Resour 22 : 1231 – 1246 . OpenUrl 21. Sze MA , Schloss PD . 2019 . The impact of DNA polymerase and number of rounds of amplification in PCR on 16S rRNA gene sequence data . mSphere 4 : 00163 – 00119 . OpenUrl 22. Tedersoo L , Anslan S , Bahram M , Põlme S , Riit T , Liiv I , Kõljalg U , Kisand V , Nilsson H , Hildebrand F , Bork P , Abarenkov K . 2015 . Shotgun metagenomes and multiple primer pair-barcode combinations of amplicons reveal biases in metabarcoding analyses of fungi . MycoKeys 10 : 1 – 43 . OpenUrl 23. Schloss PD , Gevers D , Westcott SL . 2011 . Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies . PLoS One 6 : e27310 . OpenUrl CrossRef PubMed 24. ↵ Schirmer M , Ijaz UZ , D’Amore R , Hall N , Sloan WT , Quince C . 2015 . Insight into biases and sequencing errors for amplicon sequencing with the Illumina MiSeq platform . Nucleic Acids Res 43 : e37 . OpenUrl CrossRef PubMed 25. ↵ Schloss PD , Westcott SL . 2011 . Assessing and improving methods used in operational taxonomic unit-based approaches for 16S rRNA gene sequence analysis . Appl Environ Microbiol 77 : 3219 – 3226 . OpenUrl Abstract / FREE Full Text 26. Edgar RC . 2016 . UNOISE2: improved error-correction for Illumina 16S and ITS amplicon sequencing . bioRxiv doi: 10.1101/081257 . OpenUrl Abstract / FREE Full Text 27. ↵ Callahan BJ , McMurdie PJ , Holmes SP . 2017 . Exact sequence variants should replace operational taxonomic units in marker-gene data analysis . ISME J 11 : 2639 – 2643 . OpenUrl PubMed 28. ↵ Edgar RC . 2018 . Accuracy of taxonomy prediction for 16S rRNA and fungal ITS sequences . PeerJ 6 : e4652 . OpenUrl CrossRef 29. ↵ Nilsson RH , Larsson K-H , Taylor AFS , Bengtsson-Palme J , Jeppesen TS , Schigel D , Kennedy P , Picard K , Glöckner FO , Tedersoo L , Saar I , Kõljalg U , Abarenkov K . 2019 . The UNITE database for molecular identification of fungi: handling dark taxa and parallel taxonomic classifications . Nucleic Acids Res 47 : D259 – D264 . OpenUrl CrossRef PubMed 30. ↵ Haider D , Hall MW , LaRoche J , Beiko RG . 2024 . Mock microbial community meta-analysis using different trimming of amplicon read lengths . Environ Microbiol 26 : e16566 . OpenUrl 31. Abellan-Schneyder I , Matchado MS , Reitmeier S , Sommer A , Sewald Z , Baumbach J , List M , Neuhaus K . 2021 . Primer, pipelines, parameters: issues in 16S rRNA gene sequencing . mSphere 6 : e01202 – 20 . OpenUrl CrossRef PubMed 32. Mohsen A , Park J , Chen Y-A , Kawashima H , Mizuguchi K . 2019 . Impact of quality trimming on the efficiency of reads joining and diversity analysis of Illumina paired-end reads in the context of QIIME1 and QIIME2 microbiome analysis frameworks . BMC Bioinformatics 20 : 581 . OpenUrl CrossRef PubMed 33. Bokulich NA , Subramanian S , Faith JJ , Gevers D , Gordon JI , Knight R , Mills DA , Caporaso JG . 2013 . Quality-filtering vastly improves diversity estimates from Illumina amplicon sequencing . Nat Methods 10 : 57 – 59 . OpenUrl CrossRef PubMed Web of Science 34. ↵ Dacey DP , Chain FJJ . 2021 . Concatenation of paired-end reads improves taxonomic classification of amplicons for profiling microbial communities . BMC Bioinformatics 22 : 493 . OpenUrl 35. ↵ Schloss PD . 2020 . Reintroducing mothur: 10 years later . Appl Environ Microbiol 86 : e02343 – 19 . OpenUrl CrossRef 36. ↵ Bolyen E , Rideout JR , Dillon MR , Bokulich NA , Abnet CC , Al-Ghalith GA , Alexander H , Alm EJ , Arumugam M , Asnicar F , Bai Y , Bisanz JE , Bittinger K , Brejnrod A , Brislawn CJ , Brown CT , Callahan BJ , Caraballo-Rodríguez AM , Chase J , Cope EK , Da Silva R , Diener C , Dorrestein PC , Douglas GM , Durall DM , Duvallet C , Edwardson CF , Ernst M , Estaki M , Fouquier J , Gauglitz JM , Gibbons SM , Gibson DL , Gonzalez A , Gorlick K , Guo J , Hillmann B , Holmes S , Holste H , Huttenhower C , Huttley GA , Janssen S , Jarmusch AK , Jiang L , Kaehler BD , Kang KB , Keefe CR , Keim P , Kelley ST , Knights D , Koester I , Kosciolek T , Kreps J , Langille MGI , Lee J , Ley R , Liu Y-X , Loftfield E , Lozupone C , Maher M , Marotz C , Martin BD , McDonald D , McIver LJ , Melnik AV , Metcalf JL , Morgan SC , Morton JT , Naimey AT , Navas-Molina JA , Nothias LF , Orchanian SB , Pearson T , Peoples SL , Petras D , Preuss ML , Pruesse E , Rasmussen LB , Rivers A , Robeson MS 2nd . , Rosenthal P , Segata N , Shaffer M , Shiffer A , Sinha R , Song SJ , Spear JR , Swafford AD , Thompson LR , Torres PJ , Trinh P , Tripathi A , Turnbaugh PJ , Ul-Hasan S , van der Hooft JJJ , Vargas F , Vázquez-Baeza Y , Vogtmann E , von Hippel M , Walters W , Wan Y , Wang M , Warren J , Weber KC , Williamson CHD , Willis AD , Xu ZZ , Zaneveld JR , Zhang Y , Zhu Q , Knight R , Caporaso JG. 2019 . Reproducible, interactive, scalable and extensible microbiome data science using QIIME 2 . Nat Biotechnol 37 : 852 – 857 . OpenUrl CrossRef PubMed 37. ↵ Callahan BJ , McMurdie PJ , Rosen MJ , Han AW , Johnson AJA , Holmes SP . 2016 . DADA2: high-resolution sample inference from Illumina amplicon data . Nat Methods 13 : 581 – 583 . OpenUrl CrossRef PubMed 38. Nilakanta H , Drews KL , Firrell S , Foulkes MA , Jablonski KA . 2014 . A review of software for analyzing molecular sequences . BMC Res Notes 7 : 830 . OpenUrl CrossRef 39. ↵ Turnbaugh PJ , Ley RE , Hamady M , Fraser-Liggett CM , Knight R , Gordon JI . 2007 . The human microbiome project . Nature 449 : 804 – 810 . OpenUrl CrossRef PubMed Web of Science 40. ↵ Palmer JM , Jusino MA , Banik MT , Lindner DL . 2018 . Non-biological synthetic spike-in controls and the AMPtk software pipeline improve mycobiome data . PeerJ 6 : e4925 . OpenUrl CrossRef 41. ↵ Yang R-H , Su J-H , Shang J-J , Wu Y-Y , Li Y , Bao D-P , Yao Y-J . 2018 . Evaluation of the ribosomal DNA internal transcribed spacer (ITS), specifically ITS1 and ITS2, for the analysis of fungal diversity by deep sequencing . PLoS One 13 : e0206428 . OpenUrl 42. ↵ Gweon HS , Oliver A , Taylor J , Booth T , Gibbs M , Read DS , Griffiths RI , Schonrogge K . 2015 . PIPITS: an automated pipeline for analyses of fungal internal transcribed spacer sequences from the Illumina sequencing platform . Methods Ecol Evol 6 : 973 – 980 . OpenUrl CrossRef PubMed 43. ↵ Ansorge R , Birolo G , James SA , Telatin A . 2021 . Dadaist2: a toolkit to automate and simplify statistical analysis and plotting of metabarcoding experiments . Int J Mol Sci 22 : 5309 . OpenUrl 44. ↵ Pauvert C , Buée M , Laval V , Edel-Hermann V , Fauchery L , Gautier A , Lesur I , Vallance J , Vacher C . 2019 . Bioinformatics matters: the accuracy of plant and soil fungal community data is highly dependent on the metabarcoding pipeline . Fungal Ecol 41 : 23 – 33 . OpenUrl 45. Hoggard M , Vesty A , Wong G , Montgomery JM , Fourie C , Douglas RG , Biswas K , Taylor MW . 2018 . Characterizing the human mycobiota: a comparison of small subunit rRNA, ITS1, ITS2, and large subunit rRNA genomic targets . Front Microbiol 9 : 2208 . OpenUrl CrossRef 46. ↵ Bokulich N , QIIME2 Development Team . 2018 . Fungal ITS analysis tutorial . https://forum.qiime2.org/t/fungal-its-analysis-tutorial/7351/1 . Retrieved 18 July 2024. 47. ↵ Schloss PD. 2022 . Miseq SOP . https://mothur.org/wiki/miseq_sop/ . Retrieved 18 July 2024. 48. Schloss PD , Westcott SL , Ryabin T , Hall JR , Hartmann M , Hollister EB , Lesniewski RA , Oakley BB , Parks DH , Robinson CJ , Sahl JW , Stres B , Thallinger GG , Van Horn DJ , Weber CF. 2009 . Introducing mothur: open-source, platform-independent, community-supported software for describing and comparing microbial communities . Appl Environ Microbiol 75 : 7537 – 7541 . OpenUrl Abstract / FREE Full Text 49. ↵ Kozich JJ , Westcott SL , Baxter NT , Highlander SK , Schloss PD . 2013 . Development of a dual-index sequencing strategy and curation pipeline for analyzing amplicon sequence data on the MiSeq Illumina sequencing platform . Appl Environ Microbiol 79 : 5112 – 5120 . OpenUrl Abstract / FREE Full Text 50. Callahan BJ. 2021 . DADA2 pipeline tutorial (1.16) . https://benjjneb.github.io/dada2/tutorial.html . Retrieved 18 July 2024. 51. ↵ QIIME 2 development team . 2024 . “Moving Pictures” tutorial . https://docs.qiime2.org/2024.5/tutorials/moving-pictures/ . Retrieved 18 July 2024. 52. ↵ Callahan B. 2018 . DADA2 ITS Pipeline Workflow (1.8) . https://benjjneb.github.io/dada2/ITS_workflow.html . Retrieved 18 July 2024. 53. ↵ Edgar RC . 2010 . Search and clustering orders of magnitude faster than BLAST . Bioinformatics 26 : 2460 – 2461 . OpenUrl CrossRef PubMed Web of Science 54. ↵ Pollock J , Glendinning L , Wisedchanwet T , Watson M . 2018 . The madness of microbiome: attempting to find consensus “best practice” for 16S microbiome studies . Appl Environ Microbiol 84 : e02627 – 17 . OpenUrl 55. Sinha R , Abnet CC , White O , Knight R , Huttenhower C . 2015 . The microbiome quality control project: baseline study design and future directions . Genome Biol 16 : 276 . OpenUrl CrossRef PubMed 56. ↵ McLaren MR , Willis AD , Callahan BJ . 2019 . Consistent and correctable bias in metagenomic sequencing experiments . eLife 8 : e46923 . OpenUrl 57. ↵ Rolling T , Zhai B , Frame J , Hohl TM , Taur Y. 2022 . Customization of a DADA2-based pipeline for fungal internal transcribed spacer 1 (ITS1) amplicon data sets . JCI Insight 7 : e151663 . OpenUrl 58. ↵ Kyle KE , Puckett SP , Caraballo-Rodríguez AM , Rivera-Chávez J , Samples RM , Earp CE , Raja HA , Pearce CJ , Ernst M , van der Hooft JJJ , Adams ME , Oberlies NH , Dorrestein PC , Klassen JL , Balunas MJ. 2023 . Trachymyrmex septentrionalis ants promote fungus garden hygiene using Trichoderma -derived metabolite cues . Proc Natl Acad Sci U S A 120 : e2219373120 . OpenUrl 59. ↵ Bolger AM , Lohse M , Usadel B . 2014 . Trimmomatic: a flexible trimmer for Illumina sequence data . Bioinformatics 30 : 2114 – 2120 . OpenUrl CrossRef PubMed Web of Science 60. ↵ Liu T , Chen C-Y , Chen-Deng A , Chen Y-L , Wang J-Y , Hou Y-I , Lin M-C . 2020 . Joining Illumina paired-end reads for classifying phylogenetic marker sequences . BMC Bioinformatics 21 : 105 . OpenUrl CrossRef 61. ↵ Edgar RC . 2013 . UPARSE: highly accurate OTU sequences from microbial amplicon reads . Nat Methods 10 : 996 – 998 . OpenUrl CrossRef PubMed Web of Science 62. ↵ Bokulich NA , Ziemski M , Robeson MS 2nd . , Kaehler BD. 2020 . Measuring the microbiome: best practices for developing and benchmarking microbiomics methods . Comput Struct Biotechnol J 18 : 4048 – 4062 . OpenUrl 63. ↵ Ihrmark K , Bödeker ITM , Cruz-Martinez K , Friberg H , Kubartova A , Schenck J , Strid Y , Stenlid J , Brandström-Durling M , Clemmensen KE , Lindahl BD . 2012 . New primers to amplify the fungal ITS2 region--evaluation by 454-sequencing of artificial and natural communities . FEMS Microbiol Ecol 82 : 666 – 677 . OpenUrl CrossRef PubMed Web of Science 64. ↵ Nguyen NH , Smith D , Peay K , Kennedy P . 2015 . Parsing ecological signal from noise in next generation amplicon sequencing . New Phytol 205 : 1389 – 1393 . OpenUrl CrossRef PubMed 65. ↵ Bakker MG . 2018 . A fungal mock community control for amplicon sequencing experiments . Mol Ecol Resour 18 : 541 – 556 . OpenUrl 66. ↵ Alukumbura AS , Bigi A , Sarrocco S , Fernando WGD , Vannacci G , Mazzoncini M , Bakker MG . 2022 . Minimal impacts on the wheat microbiome when Trichoderma gamsii T6085 is applied as a biocontrol agent to manage fusarium head blight disease . Front Microbiol 13 : 972016 . OpenUrl 67. ↵ Rodrigues A , Mueller UG , Ishak HD , Bacci M Jr . , Pagnocca FC . 2011 . Ecology of microfungal communities in gardens of fungus-growing ants (Hymenoptera: Formicidae): a year-long survey of three species of attine ants in Central Texas . FEMS Microbiol Ecol 78 : 244 – 255 . OpenUrl CrossRef PubMed 68. Currie CR , Mueller UG , Malloch D . 1999 . The agricultural pathology of ant fungus gardens . Proc Natl Acad Sci U S A 96 : 7998 – 8002 . OpenUrl Abstract / FREE Full Text 69. Pagnocca FC , Masiulionis VE , Rodrigues A . 2012 . Specialized fungal parasites and opportunistic fungi in gardens of attine ants . Psyche J Entom 2012 : 905109 . OpenUrl 70. Silva A , Rodrigues A , Bacci M Jr . , Pagnocca FC , Bueno OC . 2006 . Susceptibility of the ant-cultivated fungus Leucoagaricus gongylophorus (Agaricales: Basidiomycota) towards microfungi . Mycopathologia 162 : 115 – 119 . OpenUrl CrossRef PubMed Web of Science 71. Rodrigues A , Bacci M Jr , Mueller UG , Ortiz A , Pagnocca FC . 2008 . Microfungal “weeds” in the leafcutter ant symbiosis . Microb Ecol 56 : 604 – 614 . OpenUrl CrossRef PubMed Web of Science 72. Montoya QV , Meirelles LA , Chaverri P , Rodrigues A . 2016 . Unraveling Trichoderma species in the attine ant environment: description of three new taxa . Antonie Van Leeuwenhoek 109 : 633 – 651 . OpenUrl 73. ↵ Rocha SL , Evans HC , Jorge VL , Cardoso LAO , Pereira FST , Rocha FB , Barreto RW , Hart AG , Elliot SL . 2017 . Recognition of endophytic Trichoderma species by leaf-cutting ants and their potential in a Trojan-horse management strategy . R Soc Open Sci 4 : 160628 . OpenUrl CrossRef 74. ↵ Guzmán-Guzmán P , Porras-Troncoso MD , Olmedo-Monfil V , Herrera-Estrella A . 2019 . Trichoderma species: versatile plant symbionts . Phytopathology 109 : 6 – 16 . OpenUrl CrossRef 75. Druzhinina IS , Seidl-Seiboth V , Herrera-Estrella A , Horwitz BA , Kenerley CM , Monte E , Mukherjee PK , Zeilinger S , Grigoriev IV , Kubicek CP . 2011 . Trichoderma : the genomics of opportunistic success . Nat Rev Microbiol 9 : 749 – 759 . OpenUrl CrossRef PubMed 76. Kredics L , Chen L , Kedves O , Büchner R , Hatvani L , Allaga H , Nagy VD , Khaled JM , Alharbi NS , Vágvölgyi C . 2018 . Molecular tools for monitoring Trichoderma in agricultural environments . Front Microbiol 9 : 1599 . OpenUrl 77. Macías-Rodríguez L , Contreras-Cornejo HA , Adame-Garnica SG , Del-Val E , Larsen J . 2020 . The interactions of Trichoderma at multiple trophic levels: inter-kingdom communication . Microbiol Res 240 : 126552 . OpenUrl 78. ↵ Contreras-Cornejo HA , Macías-Rodríguez L , del-Val E , Larsen J. 2018 . Interactions of Trichoderma with plants, insects, and plant pathogen microorganisms: chemical and molecular bases , p. 263 – 269 . In Merillon , J-M , Ramawat , KG (eds.), Co-Evolution of Secondary Metabolites . Springer International Publishing , Cham . 79. ↵ Kubicek CP , Herrera-Estrella A , Seidl-Seiboth V , Martinez DA , Druzhinina IS , Thon M , Zeilinger S , Casas-Flores S , Horwitz BA , Mukherjee PK , Mukherjee M , Kredics L , Alcaraz LD , Aerts A , Antal Z , Atanasova L , Cervantes-Badillo MG , Challacombe J , Chertkov O , McCluskey K , Coulpier F , Deshpande N , von Döhren H , Ebbole DJ , Esquivel-Naranjo EU , Fekete E , Flipphi M , Glaser F , Gómez-Rodríguez EY , Gruber S , Han C , Henrissat B , Hermosa R , Hernández-Oñate M , Karaffa L , Kosti I , Le Crom S , Lindquist E , Lucas S , Lübeck M , Lübeck PS , Margeot A , Metz B , Misra M , Nevalainen H , Omann M , Packer N , Perrone G , Uresti-Rivera EE , Salamov A , Schmoll M , Seiboth B , Shapiro H , Sukno S , Tamayo-Ramos JA , Tisch D , Wiest A , Wilkinson HH , Zhang M , Coutinho PM , Kenerley CM , Monte E , Baker SE , Grigoriev IV . 2011 . Comparative genome sequence analysis underscores mycoparasitism as the ancestral life style of Trichoderma . Genome Biol 12 : R40 . OpenUrl CrossRef PubMed 80. Harman GE , Howell CR , Viterbo A , Chet I , Lorito M . 2004 . Trichoderma species-- opportunistic, avirulent plant symbionts . Nat Rev Microbiol 2 : 43 – 56 . OpenUrl CrossRef PubMed Web of Science 81. ↵ Lorito M , Farkas V , Rebuffat S , Bodo B , Kubicek CP . 1996 . Cell wall synthesis is a major target of mycoparasitic antagonism by Trichoderma harzianum . J Bacteriol 178 : 6382 – 6385 . OpenUrl Abstract / FREE Full Text 82. ↵ White TJ , Bruns T , Lee S , Taylor J . 1990 . Amplification and direct sequencing of fungal ribosomal RNA genes for phylogenetics , p. 315 – 322 . In Innis , MA , Gelfand , DH , Sninsky , JJ , White , TJ (eds.), PCR protocols: a guide to methods and applications . Academic Press, Inc , San Diego . 83. ↵ R Core Team. 2021 . R: a language and environment for statistical computing . Vienna, Austria . https://www.R-project.org/ . 84. ↵ Bengtsson-Palme J , Ryberg M , Hartmann M , Branco S , Wang Z , Godhe A , De Wit P , Sánchez-García M , Ebersberger I , de Sousa F , Amend A , Jumpponen A , Unterseher M , Kristiansson E , Abarenkov K , Bertrand YJK , Sanli K , Eriksson KM , Vik U , Veldre V , Nilsson RH. 2013 . Improved software detection and extraction of ITS1 and ITS2 from ribosomal ITS sequences of fungi and other eukaryotes for analysis of environmental sequencing data . Methods Ecol Evol 4 : 914 – 919 . OpenUrl CrossRef PubMed 85. ↵ Abarenkov K , Zirk A , Piirmann T , Pöhönen R , Ivanov F , Nilsson RH , Kõljalg U . 2020 . UNITE general FASTA release for Fungi (Version 04.02.2020) . UNITE Community . doi: 10.15156/BIO/786368 OpenUrl CrossRef 86. ↵ McMurdie PJ , Holmes S . 2013 . phyloseq: an R package for reproducible interactive analysis and graphics of microbiome census data . PLoS One 8 : e61217 . OpenUrl CrossRef PubMed 87. ↵ Wickham H , Vaughan D , Girlich , M. 2024 . Tidyr: Tidy Messy Data . https://tidyr.tidyverse.org . 88. ↵ Beigel K , Matthews AE , Kellner K , Pawlik CV , Greenwold M , Seal JN . 2021 . Cophylogenetic analyses of Trachymyrmex ant-fungal specificity: “One to one with some exceptions.” Mol Ecol 30 : 5605 – 5620 . OpenUrl CrossRef 89. Solomon SE , Rabeling C , Sosa-Calvo J , Lopes CT , Rodrigues A , Vasconcelos HL , Bacci M Jr . , Mueller UG , Schultz TR . 2019 . The molecular phylogenetics of Trachymyrmex Forel ants and their fungal cultivars provide insights into the origin and coevolutionary history of “higher-attine” ant agriculture . Syst Entomol 44 : 939 – 956 . OpenUrl 90. ↵ Mueller UG , Kardish MR , Ishak HD , Wright AM , Solomon SE , Bruschi SM , Carlson AL , Bacci M Jr . . 2018 . Phylogenetic patterns of ant-fungus associations indicate that farming strategies, not only a superior fungal cultivar, explain the ecological success of leafcutter ants . Mol Ecol 27 : 2414 – 2434 . OpenUrl CrossRef 91. Urrea-Valencia S , Júnior RB , Kooij PW , Montoya QV , Rodrigues A . 2023 . Unraveling fungal species cultivated by lower attine ants . Mycol Prog 22 : 66 . OpenUrl 92. ↵ Luiso J , Kellner K , Matthews AE , Mueller UG , Seal JN . 2020 . High diversity and multiple invasions to North America by fungi grown by the northern-most Trachymyrmex and Mycetomoellerius ant species . Fungal Ecol 44 : 100878 . OpenUrl 93. ↵ Morgulis A , Coulouris G , Raytselis Y , Madden TL , Agarwala R , Schäffer AA . 2008 . Database indexing for production MegaBLAST searches . Bioinformatics 24 : 1757 – 1764 . OpenUrl CrossRef PubMed Web of Science 94. ↵ Sayers EW , Bolton EE , Brister JR , Canese K , Chan J , Comeau DC , Connor R , Funk K , Kelly C , Kim S , Madej T , Marchler-Bauer A , Lanczycki C , Lathrop S , Lu Z , Thibaud-Nissen F , Murphy T , Phan L , Skripchenko Y , Tse T , Wang J , Williams R , Trawick BW , Pruitt KD , Sherry ST . 2022 . Database resources of the National Center for Biotechnology Information . Nucleic Acids Res 50 : D20 – D26 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted August 03, 2024. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Untrimmed ITS2 metabarcode sequences cause artificially reduced abundances of specific fungal taxa Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Untrimmed ITS2 metabarcode sequences cause artificially reduced abundances of specific fungal taxa Kathleen E. Kyle , Jonathan L. Klassen bioRxiv 2024.08.02.606430; doi: https://doi.org/10.1101/2024.08.02.606430 Share This Article: Copy Citation Tools Untrimmed ITS2 metabarcode sequences cause artificially reduced abundances of specific fungal taxa Kathleen E. Kyle , Jonathan L. Klassen bioRxiv 2024.08.02.606430; doi: https://doi.org/10.1101/2024.08.02.606430 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Microbiology Subject Areas All Articles Animal Behavior and Cognition (7652) Biochemistry (17749) Bioengineering (13935) Bioinformatics (42079) Biophysics (21500) Cancer Biology (18655) Cell Biology (25586) Clinical Trials (138) Developmental Biology (13410) Ecology (19949) Epidemiology (2067) Evolutionary Biology (24377) Genetics (15637) Genomics (22561) Immunology (17779) Microbiology (40505) Molecular Biology (17218) Neuroscience (88823) Paleontology (667) Pathology (2845) Pharmacology and Toxicology (4839) Physiology (7666) Plant Biology (15182) Scientific Communication and Education (2048) Synthetic Biology (4304) Systems Biology (9840) Zoology (2273)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.