Full text
74,427 characters
· extracted from
preprint-html
· click to expand
RESOLVING BIOLOGY’S DARK MATTER: SPECIES RICHNESS, SPATIOTEMPORAL DISTRIBUTION, AND COMMUNITY COMPOSITION OF A DARK TAXON | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results RESOLVING BIOLOGY’S DARK MATTER: SPECIES RICHNESS, SPATIOTEMPORAL DISTRIBUTION, AND COMMUNITY COMPOSITION OF A DARK TAXON View ORCID Profile Emily Hartop , Leshon Lee , Amrita Srivathsan , Mirkka Jones , Pablo Peña-Aguilera , Otso Ovaskainen , Tomas Roslin , View ORCID Profile Rudolf Meier doi: https://doi.org/10.1101/2024.05.07.592951 Emily Hartop 1 Department of Natural History, NTNU University Museum, Norwegian University of Science and Technology , Trondheim, NO-7491, Norway 2 Zoology Department, Stockholm University , SE-106 91, Stockholm, Sweden Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Emily Hartop For correspondence: emily.hartop{at}ntnu.no rudolf.meier{at}mfn.berlin Leshon Lee 3 Department of Biological Sciences, National University of Singapore, Science Drive 4 , Singapore 117558 4 National University of Singapore, Lee Kong Chian Natural History Museum , 2 Conservatory Dr, Singapore 117377 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Amrita Srivathsan 5 Center for Integrative Biodiversity Discovery, Leibniz Institute for Evolution and Biodiversity Science , Museum für Naturkunde, Invalidenstraße 43, 10115 Berlin, Germany 3 Department of Biological Sciences, National University of Singapore, Science Drive 4 , Singapore 117558 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mirkka Jones 6 Faculty of Biological and Environmental Sciences, University of Helsinki , P.O. Box 65, Helsinki FI-00014, Finland 7 Institute of Biotechnology, HILIFE Helsinki Institute of Life Science, University of Helsinki , P.O. Box 65, Helsinki FI-00014, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Pablo Peña-Aguilera 8 Swedish University of Agricultural Sciences (SLU), Department of Ecology , Ulls väg 18B, SE-75651 Uppsala, Sweden Find this author on Google Scholar Find this author on PubMed Search for this author on this site Otso Ovaskainen 9 Department of Biological and Environmental Science , P.O. Box 35, FI-40014, University of Jyväskylä , Jyväskylä, Finland 6 Faculty of Biological and Environmental Sciences, University of Helsinki , P.O. Box 65, Helsinki FI-00014, Finland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tomas Roslin 6 Faculty of Biological and Environmental Sciences, University of Helsinki , P.O. Box 65, Helsinki FI-00014, Finland 8 Swedish University of Agricultural Sciences (SLU), Department of Ecology , Ulls väg 18B, SE-75651 Uppsala, Sweden Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rudolf Meier 5 Center for Integrative Biodiversity Discovery, Leibniz Institute for Evolution and Biodiversity Science , Museum für Naturkunde, Invalidenstraße 43, 10115 Berlin, Germany 10 Institute for Biology, Humboldt University of Berlin , Unter den Linden 6, 10117 Berlin, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rudolf Meier For correspondence: emily.hartop{at}ntnu.no rudolf.meier{at}mfn.berlin Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Background Zoology’s dark matter comprises hyperdiverse, poorly known taxa that are numerically dominant but largely unstudied, even in temperate regions where charismatic taxa are well understood. It is everywhere, but high diversity, abundance, and small size have historically stymied its study. We demonstrate how entomological dark matter can be elucidated using high-throughput DNA barcoding (“megabarcoding”). We reveal the high abundance and diversity of scuttle flies (Diptera: Phoridae) in Sweden using 31,800 specimens from 37 sites across four seasonal periods. We investigate the number of scuttle fly species in Sweden and the environmental factors driving community changes across time and space. Results Swedish scuttle fly diversity is much higher than previously known, with 549 mOTUs (putative species) detected, compared to 374 previously recorded species. Hierarchical Modelling of Species Communities reveals that scuttle fly communities are highly structured by latitude and strongly driven by climatic factors. Large dissimilarities between sites and seasons are driven by turnover rather than nestedness. Climate changes are predicted to significantly affect the 47% of species that show significant responses to mean annual temperature. Results were robust whether using haplotype diversity or species-proxies (mOTUs) as response variables. Additionally, species-level models of common taxa adequately predict overall species richness. Conclusions Understanding the bulk of the diversity around us is imperative during an era of biodiversity loss. We show that dark insect taxa can be efficiently characterized and surveyed with megabarcoding. Undersampling of rare taxa and choice of operational taxonomic units do not alter the main ecological inferences, making it an opportune time to tackle zoology’s dark matter. B ackground If we go on the way we have, the fault is our greed and if we are not willing to change, we will disappear from the face of the globe, to be replaced by the insect. – Jacques Yves Cousteau Biodiversity loss in the Anthropocene is driven by changes in climate, land use, but also species introductions ( Díaz et al. 2006 ; Cardinale et al. 2012 ). Such loss can result in the concomitant decline in ecosystem services vital to society ( Mace et al. 2018 ; Lu et al. 2020 ) and may ultimately disrupt global supply chains and food security (WEF, 2020). Accurate monitoring of biodiversity is therefore a global priority ( Naeem et al. 2016 ). A crucial first step to monitoring biodiversity is obtaining robust quantitative baseline data. Most biologists would consider this to be data on species diversity, abundance, and biomass for those taxa that contribute substantially to these quantitative metrics. However, most biodiversity studies cover only a few well-studied groups that are relatively easily identified and quantified (e.g., birds, mammals, amphibians, bees, and butterflies). Such charismatic taxa are then used as proxies for all taxa in a region (Ricklefs 2006; Wiens 2007 ; Svenning et al. 2008 ; Ollerton 2017; Garda et al. 2018 ; Bell et al. 2019 ; Bogoni et al. 2020 ; Orr et al. 2020 ) instead of basing our understanding of global biodiversity on a broad and unbiased representation of biodiversity covering a wide range of traits and responses to environmental change ( Bar-On et al. 2018 , Goodsell et al. 2024 ). A key component of global biodiversity are “dark taxa”, i.e., taxonomic groups for which less than 10% of the diversity is described and the species diversity is estimated to be upwards of 1000 ( Hartop et al. 2022 ). Such poorly known groups do not only inhabit inaccessible realms like the deep sea ( Rabone et al. 2023 ) but also the terrestrial habitats in which we live. A recent study ( Srivathsan et al. 2023 ) revealed that 20 insect families (of which 10 belong to Diptera) account for >50% of local species diversity. Alarmingly, the very same families suffered from extreme taxonomic neglect and are therefore poorly represented in biodiversity surveys. Identifying and tackling the diversity of these dark taxa with scalable techniques thus emerges as an urgent priority for biodiversity science. Large-scale studies of dark taxa have only become feasible in recent years due to advancements in sequencing technologies coupled with efficient single-specimen DNA barcoding workflows (Srivathsan et al. 2019a; Yeo et al. 2020 ; Srivathsan et al. 2021 ; Hartop et al. 2022 ; Meier et al. 2024 ; Chua et al., 2023 : “megabarcoding”). Such workflows allow large numbers of specimens to be processed and sorted to putative species (mOTUs), while providing exact specimen counts, and vouchers for subsequent morphological, taxonomic, and biological work. Scuttle flies in the family Phoridae (Diptera) have been considered the seventh most speciose and abundant insect family globally ( Srivathsan et al. 2023 ). However, to date only ca. 4000 species have been described, although their actual diversity may be two orders of magnitude larger (Srivathsan et al. 2019a). In addition to their extreme species richness, scuttle flies occupy a wide range of ecological niches, from herbivores and predators to scavengers, parasitoids, and parasites (reviewed by Disney 1994 ). Nonetheless, previous ecological studies focusing on scuttle flies ( Durska 1996 , 2001 , 2002 , 2003 , 2006 , 2013 , 2020 ; Durska et al. 2005 ) have been of limited scope due to time consuming morphological identification methods. In this study, we use sorting with megabarcoding to generate the data for answering fundamental questions about this dark taxon. We ask how many species of scuttle flies occur in Sweden, how are they distributed across time and space, and what environmental variables drive their distribution. To test whether the choice of species and species delimitation method will affect the results, we carry out the same analyses using both mOTUs as species-proxies and haplotype diversity, and test whether rare species influence the overall inferences. We show how that “Dark Taxon Zoology” can quickly yield the answers urgently needed in the Anthropocene. R esults Diversity We obtained a total of 31 739 COI barcodes belonging to 2 697 haplotypes from scuttle fly samples from 37 sites ( Fig. 1 , Supp. Fig. S1) and four seasonal time periods (Supp. Fig. S2) across Sweden (Supp. Table S1). At the species threshold (1.7%) we detected 549 mOTUs. Species accumulation curves revealed that scuttle fly species diversity was incompletely sampled overall and across sites, horticultural zones and time periods ( Fig. 2 , Supp. Figs. S3, S4). Between 38 and 145 species were observed per site, with Chao1 richness estimates per site ranging from 83 to 244 species ( Fig. 2b,c ). Regional richness estimates varied between nearly 400 species estimated in the southernmost coastal zone (Zone 1), to fewer than 200 estimated for the alpine zone (Supp. Fig. S3). Midsummer and late-summer time-periods were both characterised by richness estimates of around 450 species, while the late spring and offseason time-periods showed lower species richness estimates of around 300 species (Supp. Fig. S4). Total species richness in Sweden was estimated at between 652 species (for Chao1, Fig. 2a ) and 713 species (combined non-parametic estimator (CNE) in Ronquist et al. 2020 ), suggesting that 100-160 species of scuttle flies remain to be sampled. Download figure Open in new tab Figure 1. The location of 37 study sites colour-coded according to the plant hardiness zones (odlingszoner, 1-8 and Alpine) of the Swedish Horticultural Society (Riksförbundet Svensk Trädgård) (map used with permission) next to an NMDS plot of study samples colour-coded with the same zones. Download figure Open in new tab Figure 2. (a) Species accumulation and Chao1 estimate curves for the scuttle fly dataset across Sweden. Notably, current sampling is far from exhaustive, and numbers of singletons and doubletons in our dataset are high regardless of sample size, (b) Species accumulation curves by sampling sites, colour-coded by zone, and (c) Chao1 estimate curves by sampling sites, colour-coded by zone. For a map of the zones using the same colour codes, see Fig. 1 . Ordinations of community composition NMDS plots revealed a clear distinction between scuttle fly communities in the southern (zones 1-3) and northern (zones 4-alpine) plant hardiness zones ( Fig. 1 , Supp. Fig. S5). ANOSIM supported significant differences between southern and northern zones at this threshold (R=0.58, p=0.001), and SIMPER revealed that North-South similarity was just 13.2%, as compared to similarities of 24.6% and 23.2%, respectively, among samples within the northern and southern zones (Supp. Tables S2-S3). The separation between zones was consistent across clustering thresholds ranging from haplotypes-as-such to a threshold of 1.7% sequence similarity, with stress values around 0.21 (Supp. Fig. S5 top row). Above a threshold of 3%, however, the patterns were increasingly blurred and the stress values were higher (0.25-0.27) (Supp. Fig. S5 bottom row). These patterns were also evident in ANOSIM analyses, where the sample statistic decreased from 0.54 for haplotypes to 0.27 for 5% mOTUs, indicating a decreasing relationship between scuttle fly community composition and plant hardiness zones at higher clustering thresholds (Supp. Table S2). Similarly, in SIMPER analyses, average similarity between zones increased from 20.2% for haplotypes to 41.4% for 5% mOTUs (Supp. Table S3). For species, scuttle fly communities were found to be distinct across most zones (R=0.51, p=0.001), with higher (average=28.5%) similarity within zones than between (average=19.2%) zones (Supp. Tables S2, 3). Samples from the late spring, midsummer, and late-summer time-periods showed a clear progression along the ranked plant hardiness zones, while offseason samples appeared more randomly distributed (Supp. Fig. S6). Northern sites (IV-alpine) showed higher distinctness across seasons (R=0.59) than when considering all sites (R=0.27) (Supp. Table S4). The between-season similarity of all sites averaged 16.9% and within time-period similarity averaged 23.7% (Supp. Table S5, top). However, for northern sites only (IV-alpine), between time-period similarity was 21.3% and within time-period similarity was 37.2% (Supp. Table S5, bottom). Hierarchical Modelling of Species Communities (HMSC) Models of the four response matrices (species-occurrence, haplotypes-occurrence, species-abundance, haplotypes-abundance) showed relatively good MCMC convergence with potential scale reduction factors of the models’ beta and omega parameters close to one (Supp. Fig. S7). Explained variance averaged c. 30% for the occurrences and c. 60% for the abundances of both species and haplotypes, but there were large differences in model fit among taxa (Supp. Fig. S8), haplotype occurrence mean ± sd Tjur R 2 =0.30 ± 0.15 (range 0.05 – 0.82); haplotype abundance R 2 =0.61 ± 0.26 (range 0.00-1.00); species presence-absence Tjur R 2 =0.30 ± 0.14 (range 0.05-0.72); species abundance R 2 =0.59 ± 0.26 (range 0.02-1.00)). Sampling time-period explained the largest fraction of variance, on average, in all four HMSC models (mean 10-11% in the occurrence models; mean 15-16% in the abundance models; Fig. 3 , Supp. Fig. S8). Mean annual temperature explained almost as much variance as sampling time-period in the occurrence models (mean 7% and 8% in the species vs haplotype models), but clearly less than sampling season in the abundance models (mean 6% in the species models and 5% in the haplotype models) ( Fig. 3 , Supp. Fig. S8). All response variables showed strong climatic (including temporal) community structure and there was also strong spatial structure in species and haplotype occurrences linked with the latitudinal temperature gradient across Sweden. Tree cover explained more variance in the abundance models (mean 6% for both species and haplotypes) than in the occurrence models (mean 1% for both species and haplotypes). Trapping effort also explained more variance on average in the abundance models (mean 7% and 8%) than in the occurrence models (mean 1.5% for both), as did the effect of having (vs not having) sequenced the full trap sample (2% mean for occurrence, 6% mean for abundance). Download figure Open in new tab Figure 3. Summary of explained variance in the occurrences and abundances of scuttle fly haplotypes and species across samples (bar height corresponds to meanTjur’s R 2 across the modelled taxa in the occurrence models and to mean R 2 in the abundance models). All four models show strong climatic structure (in shades of brown) on scuttle fly communities, as captured by fixed variables describing sampling season and mean annual temperature and a random effect based on median sampling date. The spatial fraction of explained variance (lime green) reflects community structural differences among sites that were not captured by the fixed effect covariates. Differences in sampling effort, i.e. whether or not trapped flies were all sequenced or not and the number of field trapping days per sample, also affect the predictability of community structure (shades of orange). The abundance of species and haplotypes, and to a lesser extent their occurrence, was also strongly structured by habitat type as described by forest and woodland cover (blue). Finally, we included a categorical random effect representing sample identity (purple). While the occurrences of taxa and haplotypes showed statistically supported responses to all model covariates, their abundances showed responses less frequently, and responses with strong support were primarily with seasonal covariates ( Fig. 4 ). Download figure Open in new tab Figure 4. Predicted occurrence and abundance responses of 193 scuttle fly haplotypes and 162 mOTUs (“Species”) to HMSC model covariates. Positive (red) and negative (blue) estimated responses with a posterior probability of 0.95 are illustrated. The three rows below the intercept illustrate the estimated effects of three levels of a categorical variable representing sampling time-period (midsummer, late summer, offseason) relative to the baseline (late spring). The subsequent three rows represent responses to % forest or woodland cover and mean annual temperature (“bio1”) at sampling sites. The final two rows represent the effects of two sampling-related differences among samples: the number of trapping days and whether all specimens in the trap sample were sequenced, and hence available for HMSC analysis, or not. Most species and haplotypes with a statistically supported seasonal abundance trend peaked in the late spring. Compared to late spring, the midsummer fauna showed a reduction of 28% in species counts and 16% in haplotype counts, respectively, with a further reduction of 24% in both species counts and haplotype counts towards the late summer. Taxa and haplotypes were also usually more prevalent in late spring than in mid- or especially late summer. However, a minority of species and haplotypes (9% and 7%, respectively) showed the reverse pattern, being more prevalent in late summer than in the spring. Offseason captures in the late fall to early winter were consistently low, and the number of offseason samples included in the models was the smallest (n = 20 samples). Nonetheless, 33% of the species modelled and 29% of the haplotypes modelled were occasionally detected in offseason samples. Taxon occurrences showed a mixture of positive (29% vs 30% for species and haplotypes, respectively) and negative (18% vs 23%) responses to the annual temperature gradient (“bio1”) across Sweden ( Fig. 4 ), reflecting the broad-scale compositional changes from southern towards northern Sweden seen in the NMDS ordinations. Where detected, the occurrence responses of taxa to forest or woodland cover were more often positive than negative (10% positive vs 4% negative for species; 8% vs 2% for haplotypes; Fig. 4 ). Longer trapping periods did not consistently result in higher detection probabilities of taxa, presumably due to seasonal differences in trapping duration (4% positive vs 5% negative responses for species; 8% positive vs 4% negative for haplotypes; Fig. 4 ). As expected, most taxa (81-82% in both the species and haplotype models) showed a negative occurrence response to the binary variable indicating whether all specimens in a sample were sequenced or not (“FullSample”) ( Fig. 4 ). The mean annual temperature gradient across Sweden was predicted to affect the prevalence of 38% of species, but does not appear to be a main driver of species abundance ( Fig. 4 ). The predicted effect of the temperature gradient on species prevalence during late summer was positive in 22% of taxa and negative in 16% of taxa (Supp. Fig. S9). Beyond spatial patterns explained by these climatic predictors, there was evidence of localised spatial autocorrelation in species and haplotype site occupancies at scales of less than 40 km. Neither species nor haplotype abundances were spatially autocorrelated, nor did we detect statistical support for temporal autocorrelation in any model. Residual correlations in the distributions of taxa were detected among sites and samples in both the species and especially the haplotype occurrence models (Supp. Fig. S10). Residual associations of taxa over time were also evident, but less frequently (Supp. Fig. S10). Residual associations between taxon/haplotype occurrences likely indicate that our models either lack or imperfectly represent some of the variables that structure the occurrences of scuttle fly taxa and haplotypes in space and time. No residual associations among taxa were evident among sites or over time in the species or haplotype abundance models, and very few were detected among samples (Supp. Fig. S10). Hence, covariance in the abundances of taxa and haplotypes across occupied samples was well modelled by the environmental and other covariates in these models. The observed richness of the excluded rare vs modelled common species and haplotypes was strongly positively correlated (R = 0.59 for species and R = 0.80 for haplotypes). Compositional differences between samples in terms of the rare vs common species and haplotypes were also positively correlated (for taxon presence-absence Mantel R = 0.31 for species and Mantel R = 0.64 for haplotypes; for taxon abundance Mantel R = 0.30 for species and Mantel R = 0.60 for haplotypes). Community Dissimilarity Turnover accounted for the bulk of spatial, temporal and spatio-temporal variation in community dissimilarity ( Fig. 5 ). In the spatial analyses, the mean turnover of species and haplotypes between sample pairs was 0.75 and 0.87, respectively, while the corresponding mean values of nestedness were 0.07 and 0.03. In temporal analyses, similarly, the mean values of turnover were 0.45 and 0.74 for species and haplotypes respectively, and the corresponding means of nestedness were 0.13 vs. 0.07. Finally, mean turnover values for the spatio-temporal analyses were 0.75 and 0.84 vs a mean nestedness of 0.07 and 0.04 for species and haplotypes, respectively. Download figure Open in new tab Figure 5. Pairwise community dissimilarity among scuttle fly species and haplotype samples, Following Baselga & Orme (2012), we partitioned overall dissimilarity into its turnover and nestedness components and illustrate these as well as overall dissimilarity as a function of distance (standardized Euclidean distance calculated from sample site coordinates). Regardless of the time-period and operational taxonomic units used, we found a significant positive correlation between turnover and geographical distances, meaning that communities in closer proximity to each other are more similar in composition (Supp. Fig. S11). The mean spatial turnover of species communities was highest in the summer time periods (midsummer: 0.82, late summer: 0.84) and lower in late spring and offseason (0.78 and 0.72, respectively). However, we did not find any significant correlation between temporal distances (difference in mean week) and turnover (see Table S6). Patterns of nestedness showed no detectable correlation with any of the distances explored (except for the grouped temporal distance) (Supp. Table S6). Within each time-period, scuttle fly communities become more distinct from neighbouring communities from late spring to late summer (Supp. Table S6 and Supp. Fig. S11-13). D iscussion Our study marks the first country-wide examination of a dark taxon’s diversity and distribution. It revealed more than 500 species based on processing ca. 31 800 specimens. The ecological analyses suggest that climate change may have profound (and quickly apparent) effects on communities of scuttle flies, that may serve as early indicators of future shifts in the environment. We demonstrate that armed with recent advancements in sequencing technologies, bioinformatics pipelines and molecular barcoding workflows ( Meier et al. 2006 ; Wang et al. 2018 , 2018a; Srivathsan et al. 2019b, 2021 ; Yeo et al. 2020 ; Hartop et al. 2021), we are now able to resolve patterns in the alpha diversity, spatial and temporal turnover, and species communities of challenging dark taxa. We show that for the scuttle flies in Sweden, a major part of all species remains undiscovered, that local communities show major turnover in space and time, and that ecological patterns are largely robust to the finer details of species delimitation. Below, we will address each of these findings in further detail – and the importance of these patterns in dark taxon biology to biodiversity science. Diversity Our sample only scratched the surface of the scuttle fly fauna of Sweden ( Fig. 2 , Supp. Figs. S3, S4). While the true Swedish fauna of scuttle flies is still hard to estimate, the 549 putative species found (and 652-713 predicted based on current sampling) greatly exceed the 374 previously documented from the country. Previous estimates for the scuttle fly fauna have ranged from 1 100 to nearly 2 000 species, suggesting that the true numbers may be even higher ( Ronquist et al. 2020 ). This would not be surprising, given that our study included sampling from just 37 traps in a country of over 450 000 square kilometers; i.e., many habitats remained unsampled. However, the available data confirms that for dark taxa the basic pattern in ecology holds that most species are rare and that finding all species would require massive sampling ( Callaghan et al. 2023 ). Where the remaining species will be lurking is unknown. While Malaise traps are remarkably effective at capturing flies ( Karlsson et al. 2020 ; Skvarla et al. 2020 ), other trapping methods would certainly reveal additional species. An “All Diptera Biodiversity Inventory” conducted in Costa Rica utilised a wide variety of methods ( Borkent and Brown 2015 ), revealing that 59% of fly species were unique to a specific collection method ( Borkent et al. 2018 ). Intriguingly, species-rich taxa with particularly interesting and often impactful biologies (e.g. parasitoids) may be underrepresented in Malaise trap samples. For example, Brown (1996) reported that nearly half of the species of ant-parasitising scuttle flies were caught exclusively over army ant raids. Similar results were obtained by Disney et al. (1982) , who found some species of scuttle flies were uniquely or primarily collected in water traps rather than Malaise traps. Another possible frontier is the canopy. Ongoing research in upper levels of the canopy in the Amazon suggests that it contains many scuttle fly species not found in lower sampling elevations ( Gilliland 2021 ), contrasting with previous data showing scuttle flies as a taxon primarily collected at ground-level ( Kitching et al. 2005 ). In a limited sample (159 specimens) from canopy trap prototypes run concurrently with the Malaise trap sampling for this study we found a single unique mOTU not seen in the 31 794 scuttle flies from ground level, indicating that even in a temperate environment we may have more to find in the canopy. Thus, no single sampling method will suffice to reveal all taxa and additional trapping sites that target regions or habitats un- or under-represented in the current study will also be needed for an exhaustive inventory of scuttle flies. Some of this complementary sampling may have already been carried out by the Swedish Malaise Trap Project years ago ( Karlsson et al. 2020 ). An excellent follow-up to this study would be sequencing scuttle fly samples from that sampling campaign, or from another insect campaign that is ongoing in Sweden – the Insect Biome Atlas project ( www.insectbiomeatlas.com ). In addition to focusing future efforts on revealing more species, the analysis of other life stages may offer a more nuanced understanding of scuttle fly communities. Our study is based on a sample of adult flies. Patterns for the longer-living larval stages are at this point unknown, as they are not easily collected. Lessons from Ecological Analyses Our ordinations of community composition suggested strong structuring of scuttle fly communities by Swedish horticultural zones ( Fig. 1 ) and season (Supp. Fig. S6). Clear north-south structuring of the Swedish insect fauna has also been observed in damselflies, mosquitos, and caddisflies ( Gullefors 2008 , Wellenreuther et al. 2012 , Lundström et al. 2013 ), and strong phenological patterning is a hallmark of any high-latitude fauna ( Wolda, 1988 ). Our ordinations also indicated that spatial structuring was largely independent of mOTU clustering threshold at or below the species proxy level (Supp. Fig. S5). To address these findings more rigorously and without the constraints of pre-determined zones, we implemented HMSC. This confirmed that scuttle fly community variation is both highly seasonal and strongly tied to the latitudinal temperature gradient over Sweden, with all models strongly predicted by climatic covariates ( Fig. 3 , Supp. Fig. 8). The presence of significant spatial autocorrelation in the HMSC occurrence models reflects the gradation of scuttle fly distributions across space. Scuttle fly communities also showed clear compositional changes over time during the warm season, from late spring through mid and late summer, while offseason sampling was too inconsistent to reveal any patterns (Supp. Figs. S2, S6). Consistent with our findings that scuttle fly communities are driven by climatic covariates, we found more rapid spatial turnover in taxa at the species than at the haplotype level ( Fig. 5 ). Recent work has predicted that when dispersal limitation is the dominant driver of species distributions, the rate of spatial turnover of biological communities will be similar at both the haplotype and species levels ( Baselga et al. 2022 ). Conversely, when environmental conditions strongly constrain species ranges, community similarity is predicted to decay at different rates across genealogical scales. Our results have several implications in the face of climate change. Individual taxa show a mixture of positive and negative responses to mean annual temperature, and to a lesser extent to seasonality. This implies that we may see a substantial number of both climate change “winners and losers” in the future, as species ranges and phenology expand, contract, or shift ( Fig. 4 , Supp. Fig. S9). Adult scuttle flies are ephemeral – with short lifespans and high turnover and mobility – they may respond rapidly and serve as indicators for future shifts of other taxa and in the environment more broadly. Our observation of steeply declining abundance from late spring through the summer into the offseason may partially reflect the extreme temperatures and drought in Europe in summer 2018 ( Bakke et al. 2020 , Bastos et al. 2020 ). While this suggests that our seasonal results may be atypical, they are perhaps also a sign of summers to come, as climate change increases the frequency and severity of these events ( Hari et al. 2020 ). With our initial results confirmed by HMSC, the predictive value of the simple plant hardiness map for scuttle fly distributions offers excellent news. It suggests that even such a relatively coarse-grained tool can be used as a reliable indicator of the regional compositions of scuttle flies. That this is the case is only intuitive: Many scuttle flies exist close to ground level and therefore, like plants, their distributions may be closely tied to soil temperature, acidity, moisture, or composition (all of which would be interesting covariates to explore in future modelling). Additionally, some species are known to have direct interactions with plants, fungi, and ground-dwelling insects – factors which may again tie them to the microclimate at ground level. Future studies might focus on these microenvironmental variables, to address the unexplained variance in scuttle fly communities from this study. Dark Taxon Zoology Dark taxa have historically been largely ignored due to the complexities involved in studying groups of highly diverse and abundant organisms of small size. How, then, do we make the study of Dark Taxa efficient and start “dark taxon zoology” to respond to the need for quantitative data on biodiversity? To address this, we assessed whether the taxa analysed need to exactly match taxonomically validated species for ecological hypotheses to be addressed. If not, we may avoid endless discussions about species delimitation and can proceed with ecological analyses. Promisingly, our current results suggest that ecological patterns are largely robust to the exact clustering thresholds used. Overall biodiversity patterns, patterns of community turnover and drivers of distribution were virtually identical using thresholds from 0% (haplotype data) to 1.7% ( Figs. 3 - 5 , Supp. Figs. S5, 8, 11-13). Haplotype data are convenient because they do not require any taxonomic decisions, which may please molecular ecologists, with the important caveat that exceeding an appropriate species proxy threshold will obscure patterns because too many species will be lumped together (Supp. Fig. S5, bottom row). A second potential stumbling block in the study of dark taxa lies in the large numbers of rare species. If understanding basic ecological patterns is dependent on these rare species, dark taxon zoology will be difficult. Our results indicate that both richness and compositional differences between samples of rare versus common species and haplotypes were both positively correlated. This suggests that more common and rarer species respond similarly to the same drivers. Again, this is excellent news, since it suggests that ecological inferences regarding the drivers of species distribution and community composition can be based on the more common species – which are much easier to detect. C onclusions We here argue for a dark taxon zoology and illustrate that it is not a hopeless undertaking by targeting scuttle flies as a typical dark taxon in Sweden. We sample across the entire country, we estimate the species richness, resolve patterns of distribution across time and space, and pinpoint environmental features that drive these distributions. Our results suggest that such assessments will be insensitive to specific taxonomic cut-offs, and robust to undersampling of rare taxa. Overall, the study hopes to contribute to a more quantitative approach to biodiversity. In the future, advances in molecular workflows, bioinformatics, robotics and automation will make these groups increasingly efficient to study ( Meier et al. 2024 ). We hope that our case study will serve to propel forward Dark Taxon Zoology, bringing the main part of diversity into the realm of biodiversity science. M aterials and M ethods Target taxon: the scuttle flies of Sweden Sweden has one of the best-known animal faunas in the world due to efforts dating from Carl Linnaeus to the Swedish Taxonomy Initiative and Malaise Trap Project (Linnaeus 1735; Miller 2005 ; Karlsson et al. 2020 , but see Ronquist et al. 2020 ). While 374 species of scuttle flies have been documented in the country (SLU Artdatabanken 2021), this is an underestimate. For comparison, a single suburban garden in Cambridge, UK, yielded nearly 100 species of scuttle flies ( Brown and Hartop 2017 ), while backyards in Los Angeles, California, can support up to 82 species ( Brown and Hartop 2017 ). Previous estimates of scuttle fly diversity in Sweden have proposed that the true fauna may approach 2000 species (CNE estimate in Ronquist et al. 2020 ), but this may be an overestimate based on an error-prone process of morphological identification ( Ronquist et al. 2020 ). Sampling To start resolving the species richness, spatiotemporal distribution, and community composition of Swedish scuttle flies, we sampled communities of flying insects at 37 locations across Sweden ( Fig. 1 , Supp. Fig. S1, Supp. Table S1). These samples were collected in ∼80% ethanol using Townes-style Malaise traps ( Townes 1972 ). Scuttle flies were sorted from the trap samples and preserved in ∼80% ethanol at −20°C. Sampling started in May 2018 and continued into the following year. Although trapping was continuous, the sample periods varied across sites due to their extensive distribution across Sweden. Four time periods that matched the summer phenology were therefore established in sequence for each site: late spring, midsummer, late summer, and offseason (Supp. Fig. S2, Supp. Table S1). Some offseason samples could not be retrieved in late 2018 and were collected in 2019. To ensure uniformity in the number of weeks per sampling period and avoid introducing biases, we attributed the latest sampling date for the offseason traps in 2018 to those samples collected in 2019. Additionally, late spring samples were only obtained from 25 sites, as 12 sites were not installed until later in the season (see Supp. Table S1 for sampling details). To compare richness estimates across seasons, we plotted accumulation curves for each time-period excluding the 12 traps that were not yet installed in late spring. Sequencing and bioinformatics A total of 136 samples were selected for analysis. Most samples (94) contained thousands of scuttle fly specimens and subsampling was needed. Individuals were randomly selected with a minimum of two 96-well microplates of scuttle flies processed per sample. This resulted in a total of at least 190 specimens extracted per site and time-period for most samples. 31 samples containing fewer than 190 specimens and were thus processed completely, and two samples were found to contain no scuttle flies. Additional plates of specimens from nine samples that had been processed in an earlier study ( Hartop et al. 2022 ) were also included (Supp. Table S1). All sample information, including numbers of barcodes obtained per sample and whether a sample was fully processed, is found in Supp. Table S1. DNA were obtained from the specimens using 10 μL of HotSHOT lysis buffer ( Truett et al. 2000 ). The extraction was carried out in a thermocycler at 65 °C for 18 minutes, then for 2 minutes at 98 °C. Amplification was carried out on a 313-bp fragment of Cytochrome Oxidase 1 ( COI ) using primers m1COlintF: 5’-GGWACWGGWTGAACWGTWTAYCCYCC-3’ ( Leray et al. 2013 ) and modified jgHCO2198: 5’-TANACYTCNGGRTGNCCRAARAAYCA-3’ ( Geller et al. 2013 ). For a small subset of samples, COI was amplified using the primer pair jgHCO2198 and LCO1490 ( Folmer et al. 1994 ; Geller et al. 2013 ). Amplifications were conducted with tagged primers following Wang et al. (2018) for Illumina and Srivathsan et al. (2019b, 2021 ) for MinION. PCR reactions contained 4 μl Mastermix from CWBio, 1 μl of 1 mg/ml BSA, 1 μl of 10 μM of each primer, and 1 μl of DNA. PCR conditions were a 5 min initial denaturation at 94 °C followed by 35 cycles of denaturation/annealing/extension (94 °C (1 min)/47 °C (2 min)/72 °C (1 min)), and a final extension at 72 °C (5 min). A subset of wells (N=8-12 with negative control) from each PCR plate was run on an agarose gel to check for plate-wide failure, before products were pooled and then purified using AMPure XP beads (Beckman Coulter Life Sciences, Indiana, USA). DNA concentration was quantified with a Qubit™ dsDNA HS Assay Kit (Invitrogen, California, USA). Illumina and MinION sequencing were used to sequence the amplicons for this study. Illumina libraries were prepared using a TruSeq DNA PCR-free kit to obtain 250bp PE sequences using Illumina HiSeq 2500; the sequencing was outsourced. Nanopore sequencing using MinION was conducted in house following Srivathsan et al. (2019b). Library preparation was conducted using either SQK-LSK109 or SQK-LSK110 ligation sequencing kits (Oxford Nanopore Technologies, Oxford, UK) with 200-ng pooled and purified PCR products. The manufacturer’s instructions were followed except for use of 1× Ampure beads instead of 0.4× as suggested in the instructions because the minibarcode amplicons in our experiments were short (∼391 bp with primers and tags), and a modified protocol for end-repair as described in Srivathsan et al. (2021) . The sequencing was carried out using a MinION sequencer with either R9.4.1 or R10.3 flow cells for a maximum of 72 hours. Basecalling was conducted using Guppy versions 2.3.5+53a111f and 4.2.3+f90bd04. FastQ files were demultiplexed to obtain individual COI sequences identifiable back to individual specimens with unique specimen codes. This was done following Srivathsan et al. (2019a, 2021 ) for MinION and Wang et al. (2018) for Illumina. To exclude contaminants and potential mis-sorts, COI sequences were matched using BLAST to NCBI GenBank’s nucleotide database and sequences with >97% similarity to non-scuttle fly taxa were removed. Sequences were then aligned using MAFFT v7 ( Katoh and Standley 2013 ) and clustered into molecular Operational Taxonomic Units (mOTUs) using Objective Clustering (part of “TaxonDNA”, see Meier et al. 2006 ), a distance-based method that groups sequences based on a user-defined threshold for minimum interspecific uncorrected p-distance. A species-proxy clustering threshold of 1.7% was used for the primary analyses as this was the distance that maximized cluster congruence to species-level morphology for a subset of 18 000 scuttle flies from this dataset ( Hartop et al. 2022 ). Hereafter, references to “species” are to mOTUs at this threshold. To assess the impact of the selected threshold on distribution patterns, we examined other thresholds ranging from 0-5% and ran models using both species and haplotype data. Diversity To characterise local and national species richness, we used accumulation curves and Chao1 richness, as implemented in EstimateS ( Colwell 2013 ) and R (R Development Core Team) package iNEXT ( Chao et al. 2014 ) for total diversity, diversity per zone, and diversity per site. Following the approach of Ronquist et al. (2020) , a combined non-parametric estimator (CNE) was used to obtain an alternative estimate of total species richness. Characterisation of community variation To visualise scuttle fly community composition in the context of geographic and climatic variation, we used Non-metric Multi-Dimensional Scaling (NMDS) plotted with R packages ggplot2 (Wickham, 2016) and Vegan v2.5-4 (Oksanen et al., 2019) using both abundance-based (Bray-Curtis) and incidence-based (Jaccard) dissimilarity indices. Since both metrics yielded nearly identical results, the results in the paper are based on Bray-Curtis indices. Sites were classified according to the nine plant hardiness zones of the Swedish Horticultural Society that synthesize climatic variables of particular importance to horticultural plants into horticultural zones: strong and rapid temperature changes, very low temperatures sustained for long periods, evaporation occurring when the sun is shining but the ground is still frozen, and temporal considerations of climatic and environmental conditions ( Fig. 1 , Riksförbundet Svensk Trädgård 2018, used with permission). As these zones have proven useful in identifying the survival and growth of horticultural plants across Sweden, we hypothesized that they may offer a relevant description of the environment for any organism sensitive to similar climatic variables – including scuttle flies. They also offer a more regional approach than the broad biogeographic classification previously used for spatial analysis of scuttle flies in Sweden ( Ronquist et al. 2020 ). We therefore tested whether the zones could predict scuttle fly richness and distributions ( Disney 1994 ; Kitching et al. 2005 ; McGlynn et al. 2019 ). To quantify differences in scuttle fly community composition between plant hardiness zones and to assess the significance of those differences, we used Analysis of Similarities (ANOSIM) and Similarity Percentage analysis (SIMPER). These tests were run with PRIMER v7 ( Clarke and Gorley 2006 ). We excluded samples that contained fewer than 100 specimens from the ordination analyses, as small sample sizes can artificially generate large distances in the NMDS plots, thus obscuring structured variation in community composition arising from responses to the environment. Hierarchical Modelling of Species Communities To relate variation in community composition to continuous environmental drivers without the assumptions of zones used in our NMDS visualisations, we used Hierarchical Modelling of Species Communities (HMSC; ( Ovaskainen et al. 2017 ; Ovaskainen and Abrego 2020 ), a type of joint species distribution model ( Warton et al. 2015 )). Due to the zero-inflated nature of the data, we fitted hurdle models, i.e., one model for the occurrence of taxa across samples (probit regression), and a second model for their abundance conditional on presence (linear regression for log-transformed count data, with zeros masked as missing data), henceforth referred to simply as an “abundance model”. To evaluate the impact of the criterion used in species delimitation, we used four different response variables: species presence-absence, species abundance (where present), haplotype occurrence and haplotype abundance (where present), thus resulting in four HMSC models being fit. As these analyses are uninformative for taxa with very sparse data, we included only haplotypes or species with at least five occurrences (n = 391 haplotypes and 273 species, out of the 2 697 haplotypes and 549 species observed; see Results). To be able to relate each sample to specific climatic conditions, we also excluded 15 samples for which the trapping duration exceeded approximately a month (>34 days). We included five predictor variables, coded as fixed effects, in our HMSC models. To test for seasonal changes in scuttle fly communities among the sampling time-periods, we included the four-category variable “Time.Period”. To assess the effect of the spatial gradient in mean climatic conditions across Sweden on scuttle fly communities, we included the continuous variable mean annual temperature (“bio1” from the Worldclim database, Fick and Hijmans 2017 ). The importance of tree cover for scuttle fly distributions was assessed by including a continuous variable describing the percentage of forest or woodland cover within a 50m buffer (“ForestWood”) derived from the Swedish National Land Cover Database ( https://www.naturvardsverket.se/en/services-and-permits/maps-and-map-services/national-land-cover-database/ ). Site values for “bio1” and “ForestWood” were extracted using the R package raster ( Hijmans 2024 ). To account for the effect of sampling effort on taxon detection, we further included a continuous variable quantifying the total number of trapping days per sample (“TrapDays”) and a binary variable indicating whether all specimens in a sample were sequenced or not (“FullSample”). The latter, binary variable defines whether the sample was fully sequenced or not, and was included to account for the higher likelihood of encountering taxa in samples that had very high individual abundances (i.e., samples that could not be fully sequenced). We moreover included a spatially explicit random effect based on sample site coordinates (“Random: site”) and a temporally explicit random effect based on median sampling date (“Random: time”) to model any spatial or temporal autocorrelation in the scuttle fly dataset, and a categorical random effect representing sample identity (“Random: sample”). We fitted the model with the R-package HMSC ( Tikhonov et al. 2020 ) assuming the default prior distributions. We sampled the posterior distribution of four MCMC chains, each of which was run for 37 500 iterations, of which the first 12 500 were removed as burn-in. The iterations were thinned by 100 to yield 250 posterior samples per chain, and thus 1 000 posterior samples in total. To explore the rate of Markov chain Monte Carlo (MCMC) convergence, we also fitted otherwise identical models but with 375 iterations (burn-in 125, thin 1) and 3,750 iterations (burn-in 1 250, thin 10). Convergence was assessed by examining the potential scale reduction factor (PSRF) distribution over the fixed effect ( β ) and random effect ( Ω ) parameters, equivalent to the Gelman–Rubin statistic ( Gelman et al. 2014 ). We examined the explanatory power of the models through species-specific coefficients of discrimination (Tjur R 2 ) for the occurrence models, which measure how well the model discriminates those samples in which a taxon occurs from those in which it does not occur, and R 2 for the abundance models. Tjur R 2 is defined as the difference in average model-predicted probability of occurrence for samples in which the species is present vs absent ( Tjur 2009 ). To assess whether the distributions of rare taxa are likely to be driven by similar factors to the common ones, we calculated Pearson and Mantel correlations, respectively, between the observed species richness and community composition (Bray-Curtis dissimilarity) of the modelled more common taxa and the excluded rare taxa (those with <5 occurrences). Community dissimilarity To further partition variation in scuttle fly community composition in time (between seasons) and space (across Sweden), we dissected overall community dissimilarity (β-diversity) into its turnover (i.e. species replacement) and nestedness (differences of species richness between sites) components ( Baselga, 2010 ). Our analyses encompassed three aspects: 1) spatial differences; differences in community composition between each sampling site considering their geographical distance, 2) temporal differences; differences in community composition between each sampling period, accounting for their temporal distance in weeks, and 3) spatio-temporal differences; community differences between each sampling site and sampling period including their joint effect. We computed pairwise Jaccard community dissimilarity values using both the species and haplotypes datasets for comparison. Given the observed geographical distance between sampling sites, these distances were rescaled to a mean of zero and a standard deviation of one before analyses. To characterise the temporal difference between samples in each sample pair, we used the difference in the mean week of sampling (for sample-specific details, see Supp. Table S1). Values of total β-diversity, turnover, and nestedness were calculated for each pairwise comparisons of sampling sites, sampling period and sample pairs. We excluded self-pairs and included data for each pair only once. To test the correlation between each beta diversity component and the distances in space and time we used Mantel tests based on Pearson moment correlations. All calculations were implemented in R package ‘betapart’ (Baselga & Orme, 2012). D eclarations Availability of Data and Materials Data and scripts are available on the project GitHub page at https://github.com/leshonlee/DocumentingPhorids . General HMSC pipeline scripts are available at https://www2.helsinki.fi/en/researchgroups/statistical-ecology/hmsc . Competing Interests The authors declare that they have no competing interests. Funding EH was funded by Swedish Taxonomy Initiative grant 2016-203 4.3. TR and OO were funded by the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (ERC-synergy grant 856506—LIFEPLAN). OO was funded by Academy of Finland (grant no. 336212 and 345110). MJ was supported by the Academy of Finland’s ‘Thriving Nature’ research profiling action. Authors’ Contributions EH and RM designed the study. EH and LL generated the data. EH, LL, AM, MJ, and PPA analysed the data. EH, LL, and RM wrote the initial draft of the manuscript. All authors contributed to manuscript revision, read, and approved the submitted version. Acknowledgements We thank the Scuttle Fly Sorting Party crew at Station Linné – Carina Romero Ugarph, Harald Havnås, Johan Ennerfelt, Marianne “Mia” Blomqvist, Nino Pettersson, Robert Ennerfelt, and especially Dave Karlsson. We thank the members of the Evolutionary Biology Lab at the National University of Singapore for help with the many hours of wetlab work. We thank Darren Yeo for assisting us with visualisations. We thank Inger Ekrem at Riksförbundet Svensk Trädgård for helping with the plant hardiness zone map, Tomas Lagerström for further information on the map, and Eva Ronquist for first bringing our attention to the map. We thank the Swedish Taxonomy Initiative for the support to investigate the scuttle flies of Sweden, this study brings us one step closer to the ultimate goal of describing all of these species. Footnotes Author affiliations updated, minor revisions have been made to format and figure numbering. https://github.com/leshonlee/DocumentingPhorids . References ↵ Bakke SJ , Ionita M , Tallaksen LM. The 2018 northern European hydrological drought and its drivers in a historical perspective . Hydrology and Earth System Sciences . 2020 ; 24 ( 11 ): 5621 – 53 . doi: 10.5194/hess-24-5621-2020 OpenUrl CrossRef ↵ Bar-On YM , Phillips R , Milo R . The biomass distribution on Earth . Proceedings of the National Academy of Sciences . 2018 ; 115 ( 25 ): 6506 – 11 . doi: 10.1073/pnas.1711842115 OpenUrl Abstract / FREE Full Text ↵ Baselga A . Partitioning the turnover and nestedness components of beta diversity . Global Ecology and Biogeography . 2010 ; 19 ( 1 ): 134 – 43 . OpenUrl ↵ Baselga A , Gómez-Rodríguez C , Araújo MB , Castro-Insua A , Arenas M , Posada D , et al. Joint analysis of species and genetic variation to quantify the role of dispersal and environmental constraints in community turnover . Ecography . 2022 ; 2022 ( 5 ). doi: 10.1111/ecog.05808 OpenUrl CrossRef Baselga A , Orme CDL. betapart: an R package for the study of beta diversity . Methods in Ecology and Evolution . 2012 ; 3 ( 5 ): 808 – 12 . OpenUrl ↵ Bastos A , Ciais P , Friedlingstein P , Sitch S , Pongratz J , Fan L , et al. Direct and seasonal legacy effects of the 2018 heat wave and drought on European ecosystem productivity . Sci Adv . 2020 ; 6 ( 24 ). doi: 10.1126/sciadv.aba2724 OpenUrl FREE Full Text ↵ Bell JR , Botham MS , Henrys PA , Leech DI , Pearce-Higgins JW , Shortall CR , et al. Spatial and habitat variation in aphid, butterfly, moth and bird phenologies over the last half century . Glob Change Biol . 2019 ; 25 : 1982 – 94 . OpenUrl ↵ Bogoni JA , Peres CA , Ferraz KMPMB . Extent, intensity and drivers of mammal defaunation: a continental-scale analysis across the Neotropics . Sci Rep . 2020 ; 10 : 14750 . OpenUrl ↵ Borkent A , Brown BV . How to inventory tropical flies (Diptera)-One of the megadiverse orders of insects . Zootaxa . 2015 ; 3949 : 301 – 22 . OpenUrl ↵ Borkent ART , Brown BV , Adler PH , Amorim DDS , Barber K , Bickel D , et al. Remarkable fly (Diptera) diversity in a patch of Costa Rican cloud forest: Why inventory is a vital science . Zootaxa . 2018 ; 4402 . ↵ Brown BV , Hartop EA . Big data from tiny flies: patterns revealed from over 42,000 phorid flies (Insecta: Diptera: Phoridae) collected over one year in Los Angeles, California, USA . Urban Ecosyst . 2017 ; 20 : 521 – 34 . OpenUrl ↵ Brown BV . Phorid Newsletter 5 . Phorid Newsl . 1996 ; 5 . ↵ Callaghan CT , Borda-de-Água L , van Klink R , Rozzi R , Pereira HM . Unveiling global species abundance distributions . Nat Ecol Evol . 2023 ; 7 : 1600 – 9 . doi: 10.1038/s41559-023-02173-y OpenUrl CrossRef ↵ Cardinale BJ , Duffy JE , Gonzalez A , Hooper DU , Perrings C , Venail P , et al. Biodiversity loss and its impact on humanity . Nature . 2012 ; 486 : 59 – 67 . OpenUrl CrossRef PubMed Web of Science ↵ Chao A , Ellison AM , Colwell RK , Gotelli NJ , Sander EL , Hsieh TC , et al. Rarefaction and extrapolation with Hill numbers: a framework for sampling and estimation in species diversity studies . Ecol Monogr . 2014 ; 84 : 45 – 67 . OpenUrl CrossRef Web of Science ↵ Chua PYS , Bourlat SJ , Ferguson C , Korlevic P , Zhao L , Ekrem T , et al. Future of DNA-based insect monitoring . Trends Genet . 2023 Jul ; 39 ( 7 ): 531 – 44 . doi: 10.1016/j.tig.2023.02.012 . Epub 2023 Mar 10 pmid: . PMID: 36907721 . OpenUrl CrossRef PubMed ↵ Clarke K , Gorley RN . PRIMER v6: user manual/tutorial. PRIMER-E , Plymouth . 2006 ; 29 : 1060 – 65 . OpenUrl ↵ Colwell RK. EstimateS: Statistical estimation of species richness and shared species from samples. Version 9 and earlier . User’s Guide and application . 2013 . ↵ Díaz S , Fargione J , Chapin FS , Tilman D . Biodiversity loss threatens human well-being . PLoS Biol . 2006 ; 4 : 1300 – 5 . OpenUrl CrossRef Web of Science ↵ Disney RHL . Scuttle flies: the Phoridae . London : Chapman and Hall ; 1994 . Disney RHL , Durska E . Conservation evaluation and the choice of faunal taxa to sample . Biodivers Conserv . 2008 ; 17 : 449 – 451 . OpenUrl ↵ Disney RHL , Erzinclioglu YZ , de C Henshaw DJ , Unwin DM , Withers P , Woods A. Collecting methods and the adequacy of attempted fauna surveys, with reference to the Diptera . Field Stud . 1982 ; 5 : 607 – 621 . OpenUrl ↵ Durska E . The species composition and structure of scuttle fly communities (Diptera: Phoridae) in mature tree stands in pine forests at different stages of habitat degradation . Fragm Faun . 1996 ; 39 : 267 – 285 . OpenUrl ↵ Durska E . Secondary succession of scuttle fly communities (Diptera: Phoridae) in moist pine forest in Bialowieza Forest . Fragm Faun . 2001 ; 44 : 79 – 128 . OpenUrl ↵ Durska E. The phenology of dominant scuttle-fly (Diptera: Phoridae) species in the Bialowieza Forest . Entomol Fenn . 2002 ; 13 : 123 – 127 . OpenUrl ↵ Durska E . The phenology of Triphleba Rondani species (Diptera: Phoridae) in moist pine forests in the Bialowieza Forest . Entomol Fenn . 2003 ; 14 : 177 – 182 . OpenUrl ↵ Durska E . Diversity of scuttle fly (Diptera: Phoridae) communities in the plantations of moist pine forests of the Bialowieza Primeval Forest and the Tuchola Forest (Poland) . Biodivers Conserv . 2006 ; 15 : 385 – 393 . OpenUrl ↵ Durska E . Effects of disturbances on scuttle flies (Diptera: Phoridae) in Pine Forests . Biodivers Conserv . 2013 ; 22 : 1991 – 2021 . OpenUrl ↵ Durska E . Preliminary data of the scuttle flies (Diptera: Phoridae) in the linden-oak-hornbeam forest of the Wigry National Park , North East Poland. Fragm Faun . 2020 ; 63 : 89 – 98 . OpenUrl ↵ Durska E , Kaczorowska E , Disney RHL . Scuttle flies (Diptera: Phoridae) of saline habitats of the Gulf of Gdansk, Poland . Entomol Fenn . 2005 ; 16 : 159 – 164 . OpenUrl ↵ Fick SE , Hijmans RJ . Worldclim 2: New 1-km spatial resolution climate surfaces for global land areas . Int J Climatol . 2017 ; 37 : 4302 – 4315 . OpenUrl CrossRef PubMed ↵ Folmer O , Black M , Hoeh W , Lutz R , Vrijenhoek R . DNA primers for amplification of mitochondrial cytochrome c oxidase subunit I from diverse metazoan invertebrates . Mol Mar Biol Biotechnol . 1994 ; 3 : 294 – 299 . OpenUrl CrossRef PubMed ↵ Garda AA , Stein MG , Machado RB , Lion MB , Juncá FA , Napoli MF. Ecology, biogeography, and conservation of amphibians of the Caatinga. In: Caatinga: The Largest Tropical Dry Forest Region in South America . Springer International Publishing ; 2018 . p. 133 – 149 . ↵ Geller J , Meyer C , Parker M , Hawk H . Redesign of PCR primers for mitochondrial cytochrome c oxidase subunit I for marine invertebrates and application in all-taxa biotic surveys . Mol Ecol Resour . 2013 ; 13 : 851 – 861 . OpenUrl CrossRef PubMed ↵ Gelman A , Carlin JB , Stern HS , Dunson DB , Vehtari A , Rubin DB . Bayesian data analysis ( 3rd ed .). London : CRC Press ; 2014 . ↵ Gilliland HC. Hundreds of new and unusual insects discovered in the Amazon’s canopy . Available from: https://www.nationalgeographic.com/magazine/article/hundreds-of-new-and-unusual-insects-discovered-in-the-amazon-canopy-feature ; 2021 . ↵ Goodsell R , Tack A , Ronquist F , van Dijk L , Iwaszkiewicz-Eggebrecht E , Miraldo A , et al. The rarity of Invertebrates prevents reliable application of IUCN Red-List criteria . EcoEvoRxiv . 2024 . Available from : doi: 10.32942/X23G71 . OpenUrl CrossRef ↵ Gullefors B . Limes norrlandicus - a natural biogeographical border for caddisflies (Trichoptera) in Sweden . Ferrantia . 2008 ; 55 : 61 – 65 . OpenUrl ↵ Hari V , Rakovec O , Markonis Y , Hanel M , Kumar R . Increased future occurrences of the exceptional 2018 – 2019 Central European drought under global warming. Sci Rep . 2020 ; 10 : 12207 . doi: 10.1038/s41598-020-68872-9 . OpenUrl CrossRef ↵ Hartop E , Srivathsan A , Ronquist F , Meier R . Towards Large-Scale Integrative Taxonomy (LIT): Resolving the Data Conundrum for Dark Taxa . Syst Biol . 2022 . doi: 10.1093/sysbio/syac033 . OpenUrl CrossRef ↵ Hijmans R. raster: Geographic Data Analysis and Modeling . R package version 3.6-27 , Available from: https://rspatial.org/raster ; 2024 . ↵ Karlsson D , Hartop E , Forshage M , Jaschhof M , Ronquist F . The Swedish Malaise Trap Project: A 15 Year Retrospective on a Countrywide Insect Inventory . Biodivers Data J . 2020 ; 8 . ↵ Katoh K , Standley DM . MAFFT Multiple Sequence Alignment Software Version 7: Improvements in Performance and Usability . Mol Biol Evol . 2013 ; 30 : 772 – 780 . OpenUrl CrossRef PubMed Web of Science ↵ Kitching RL , Bickel DJ , Boulter S. Guild Analyses of Dipteran Assemblages: A Rationale and Investigation of Seasonality and Stratification in Selected Rainforest Faunas. In: The Evolutionary Biology of Flies . New York : Columbia University Press ; 2005 . p. 388 – 415 . ↵ Leray M , Yang JY , Meyer CP , Mills SC , Agudelo N , Ranwez V , et al. A new versatile primer set targeting a short fragment of the mitochondrial COI region for metabarcoding metazoan diversity: application for characterizing coral reef fish gut contents . Front Zool . 2013 ; 10 : 34 . Linnaeus C. Systema naturæ , sive regna tria naturæ systematice proposita per classes, ordines, genera, & species . Leiden : Lugduni Batavorum ; 1735 . ↵ Lu Y , Yang Y , Sun B , Yuan J , Yu M , Stenseth NC , et al. Spatial variation in biodiversity loss across China under multiple environmental stressors . Sci Adv . 2020 ; 6 . ↵ Lundström JO , Schäfer ML , Hesson JC , Blomgren E , Lindström A , Wahlqvist P , et al. The geographic distribution of mosquito species in Sweden . J Eur Mosq Control Assoc . 2013 ; 31 . ↵ Mace GM , Barrett M , Burgess ND , Cornell SE , Freeman R , Grooten M , et al. Aiming higher to bend the curve of biodiversity loss . Nat Sustain . 2018 ; 1 : 448 – 451 . OpenUrl ↵ McGlynn TP , Meineke EK , Bahlai CA , Li E , Hartop EA , Adams BJ , et al. Temperature accounts for the biodiversity of a hyperdiverse group of insects in urban Los Angeles . Proc R Soc B . 2019 ; 286 : 20191020 . ↵ Meier R , Hartop E , Pylatiuk C , Srivathsan A . Towards holistic insect monitoring: species discovery, description, identification, and traits for all insects . Phil Trans R Soc B . 2024 ; 379 : 20230120 . doi: 10.1098/rstb.2023-0120 . OpenUrl CrossRef ↵ Meier R , Shiyang K , Vaidya G , Ng PK . DNA barcoding and taxonomy in Diptera: a tale of high intraspecific variability and low identification success . Syst Biol . 2006 ; 55 : 715 – 28 . OpenUrl CrossRef PubMed Web of Science ↵ Miller G . Linnaeus’s Legacy Carries On . Science . 2005 ; 307 : 1038 – 1039 . OpenUrl ↵ Naeem S , Chazdon R , Duffy JE , Prager C , Worm B . Biodiversity and human well-being: an essential link for sustainable development . Proc R Soc B Biol Sci . 2016 ; 283 : 20162091 . Ollerton J. Pollinator Diversity: Distribution, Ecological Function , and Conservation . Annu Rev Ecol Evol Syst . 2017 ; 48 : 353 – 76 . OpenUrl CrossRef ↵ Orr MC , Hughes AC , Chesters D , Pickering J , Zhu CD , Ascher JS . Global Patterns and Drivers of Bee Distribution . Curr Biol . 2020 ; 30 : 843 – 848 . OpenUrl ↵ Ovaskainen O , Abrego N . Joint Species Distribution Modelling . Cambridge University Press ; 2020 . ↵ Ovaskainen O , Tikhonov G , Norberg A , Guillaume Blanchet F , Duan L , Dunson D , et al. How to make more out of community data? A conceptual framework and its implementation as models and software . Ecol Lett . 2017 ; 20 : 561 – 76 . OpenUrl CrossRef Page R. Dark taxa: GenBank in a post-taxonomic world [Internet] . 2011 [cited 2024 May 28]. Available from: https://iphylo.blogspot.com/2011/04/dark-taxa-genbank-in-post-taxonomic.html . Page RD . DNA barcoding and taxonomy: dark taxa and dark texts . Philos Trans R Soc Lond B Biol Sci . 2016 ; 371 . ↵ Rabone M , Wiethase JH , Simon-Lledó E , Emery AM , Jones DOB , Dahlgren TG , et al. How many metazoan species live in the world’s largest mineral exploration region? Current Biology . 2023 . Riksförbundet Svensk Trädgård. Zonkartan [Internet] . 2018 [cited 2024 May 28]. Available from: http://www.tradgard.org/svensk_tradgard/zonkartan.html . ↵ Ronquist F , Forshage M , Häggqvist S , Karlsson D , Hovmöller R , Bergsten J , et al. Completing Linnaeus’s inventory of the Swedish insect fauna: Only 5,000 species left? PLOS ONE . 2020 ; 15 . ↵ Skvarla MJ , Larson JL , Fisher JR , Dowling APG . A Review of Terrestrial and Canopy Malaise Traps . Ann Entomol Soc Am . 2020 ; 21 . SLU Artdatabanken. Famij: Phoridae - puckelflugor [Internet] . 2021 [cited 2024 May 28]. Available from: https://www.dyntaxa.se/Taxon/Info/2001326 . ↵ Srivathsan A , Ang Y , Heraty JM , Hwang WS , Jusoh WFA , Kutty SN , et al. Convergence of dominance and neglect in flying insect diversity . Nat Ecol Evol . 2023 Jul ; 7 ( 7 ): 1012 – 1021 . OpenUrl Srivathsan A , Hartop E , Puniamoorthy J , Lee WT , Kutty SN , Kurina O , et al. Rapid, large-scale species discovery in hyperdiverse taxa using 1D MinION sequencing . BMC Biol . 2019 ; 17 : 96 . ↵ Srivathsan A , Hartop EA , Puniamoorthy J , Lee WT , Kutty SN , Kurina O , et al. MinION barcodes: biodiversity discovery and identification by everyone, for everyone . BioRxiv Prepr . 2021 . ↵ Svenning JC , Borchsenius F , Bjorholm S , Balslev H . High tropical net diversification drives the New World latitudinal gradient in palm (Arecaceae) species richness . J Biogeogr . 2008 ; 35 : 394 – 406 . OpenUrl CrossRef Web of Science ↵ Tikhonov G , Opedal ØH , Abrego N , Lehikoinen A , Jonge MMJ , Oksanen J , et al. Joint species distribution modelling with the R-package HMSC . Methods Ecol Evol . 2020 ; 11 : 442 – 447 . OpenUrl CrossRef ↵ Tjur T . Coefficients of determination in logistic regression models - a new proposal: the coefficient of discrimination . American Statistician . 2009 ; 63 : 366 – 372 . OpenUrl CrossRef Web of Science ↵ Townes H . A light-weight Malaise trap . Entomol News . 1972 ; 83 : 239 – 247 . OpenUrl ↵ Truett GE , Heeger P , Mynatt RL , Truett AA , Walker JA , Warman ML . Preparation of PCR-Quality Mouse Genomic DNA with Hot Sodium Hydroxide and Tris (HotSHOT) . BioTechniques . 2000 ; 29 : 52 – 54 . OpenUrl CrossRef PubMed Web of Science ↵ Wang WY , Srivathsan A , Foo M , Yamane S , Meier R . Sorting specimen-rich invertebrate samples with cost-effective NGS barcodes: validating a reverse workflow for specimen processing . Mol Ecol Resour . 2018 . ↵ Warton DI , Blanchet FG , O’Hara RB , Ovaskainen O , Taskinen S , Walker SC , et al. So Many Variables: Joint Modeling in Community Ecology . Trends Ecol Evol . 2015 ; 30 : 766 – 779 . OpenUrl CrossRef PubMed ↵ Wellenreuther M , Larson KW , Svensson EI . Climatic niche divergence or conservatism? Environmental niches and range limits in ecologically similar damselflies . Ecology . 2012 ; 93 : 1353 – 1366 . OpenUrl CrossRef PubMed Web of Science ↵ Wiens JJ . Global patterns of diversification and species richness in amphibians . Am Nat . 2007 ; 170 : S86 – S106 . OpenUrl CrossRef PubMed Web of Science ↵ Wolda H . Insect seasonality: why? Annual Review of Ecology and Systematics . 1988 ; 19 ( 1 ): 1 – 18 . OpenUrl CrossRef Web of Science ↵ Yeo D , Srivathsan A , Meier R . Longer is Not Always Better: Optimizing Barcode Length for Large-Scale Species Discovery and Identification . Syst Biol . 2020 ; 0 : 1 – 16 . OpenUrl View the discussion thread. Back to top Previous Next Posted May 28, 2024. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following RESOLVING BIOLOGY’S DARK MATTER: SPECIES RICHNESS, SPATIOTEMPORAL DISTRIBUTION, AND COMMUNITY COMPOSITION OF A DARK TAXON Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share RESOLVING BIOLOGY’S DARK MATTER: SPECIES RICHNESS, SPATIOTEMPORAL DISTRIBUTION, AND COMMUNITY COMPOSITION OF A DARK TAXON Emily Hartop , Leshon Lee , Amrita Srivathsan , Mirkka Jones , Pablo Peña-Aguilera , Otso Ovaskainen , Tomas Roslin , Rudolf Meier bioRxiv 2024.05.07.592951; doi: https://doi.org/10.1101/2024.05.07.592951 Share This Article: Copy Citation Tools RESOLVING BIOLOGY’S DARK MATTER: SPECIES RICHNESS, SPATIOTEMPORAL DISTRIBUTION, AND COMMUNITY COMPOSITION OF A DARK TAXON Emily Hartop , Leshon Lee , Amrita Srivathsan , Mirkka Jones , Pablo Peña-Aguilera , Otso Ovaskainen , Tomas Roslin , Rudolf Meier bioRxiv 2024.05.07.592951; doi: https://doi.org/10.1101/2024.05.07.592951 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Ecology Subject Areas All Articles Animal Behavior and Cognition (7647) Biochemistry (17728) Bioengineering (13921) Bioinformatics (42047) Biophysics (21490) Cancer Biology (18637) Cell Biology (25555) Clinical Trials (138) Developmental Biology (13403) Ecology (19942) Epidemiology (2067) Evolutionary Biology (24368) Genetics (15625) Genomics (22549) Immunology (17764) Microbiology (40475) Molecular Biology (17208) Neuroscience (88761) Paleontology (667) Pathology (2842) Pharmacology and Toxicology (4834) Physiology (7659) Plant Biology (15175) Scientific Communication and Education (2047) Synthetic Biology (4304) Systems Biology (9835) Zoology (2272)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.