Full text
75,135 characters
· extracted from
preprint-html
· click to expand
Evolutionary dynamics of the tgr gene family in Dictyostelium allows escape from Crozier’s Paradox | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Evolutionary dynamics of the tgr gene family in Dictyostelium allows escape from Crozier’s Paradox View ORCID Profile Rafael A. Polo Prieto , View ORCID Profile Sean McFadyen , Fabiola Medina , View ORCID Profile Mische Holland , Cathleen M.E. Broersma , View ORCID Profile Jenna Masterson , View ORCID Profile Courtney Masterson , View ORCID Profile Hui Hui Phua , View ORCID Profile Jennie Kuzdzal-Fick , View ORCID Profile Murray P. Cox , View ORCID Profile Tera C. Levin , View ORCID Profile Elizabeth A. Ostrowski doi: https://doi.org/10.1101/2025.10.22.684067 Rafael A. Polo Prieto 1 Department of Biology, University of Houston , USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rafael A. Polo Prieto Sean McFadyen 2 College of Sciences, Massey University , New Zealand Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sean McFadyen Fabiola Medina 1 Department of Biology, University of Houston , USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mische Holland 3 Department of Biological Sciences, University of Pittsburgh , Pittsburgh, PA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Mische Holland Cathleen M.E. Broersma 2 College of Sciences, Massey University , New Zealand Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jenna Masterson 2 College of Sciences, Massey University , New Zealand Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jenna Masterson Courtney Masterson 2 College of Sciences, Massey University , New Zealand Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Courtney Masterson Hui Hui Phua 4 Liggins Institute, University of Auckland , New Zealand Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Hui Hui Phua Jennie Kuzdzal-Fick 1 Department of Biology, University of Houston , USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jennie Kuzdzal-Fick Murray P. Cox 2 College of Sciences, Massey University , New Zealand 5 Department of Statistics, University of Auckland , New Zealand Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Murray P. Cox Tera C. Levin 3 Department of Biological Sciences, University of Pittsburgh , Pittsburgh, PA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tera C. Levin Elizabeth A. Ostrowski 4 Liggins Institute, University of Auckland , New Zealand 6 Department of Biological Sciences, University of Washington Bothell , USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Elizabeth A. Ostrowski For correspondence: eostrow{at}uw.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Crozier’s Paradox states that genetic kin recognition will select for its own demise, and theoretical analyses over the past decades have supported this conclusion. Here we examine the molecular evolution of two kin recognition genes in the social amoeba Dictyostelium discoideum, tgrB1 and tgrC1 , which enable cells to recognize and reject unrelated cells during cooperative multicellular development. Our results reveal extraordinary polymorphism in these genes, placing them amongst the most rapidly evolving genes in the genome. Co-occurring amoebae, isolated from just a few grams of soil, show highly diverged recognition alleles, indicating plentiful sequence variation that can impact social decision-making on a micro-scale. Analyses of closely related gene duplicates show dynamic evolution of the gene family as a whole, suggesting a mechanism for replenishing genetic variation lost through Crozier’s Paradox. Our results provide evidence that kin recognition loci can retain sufficient genetic variation in real-world settings and suggest that large gene families may be crucial to retaining genetic variation necessary to evade Crozier’s paradox. Introduction Cooperative systems often involve costs that are shared unequally among the members. Such systems are prone to the evolutionary emergence of selfish individuals, who access the benefits of membership without paying its full costs ( 1 – 3 ). One way in which such cheaters can be defeated is if the cooperative benefits can be preferentially directed to kin, ensuring that one’s own alleles (especially those for the cooperative act itself) benefit from any sacrifices ( 4 – 10 ). The ability to distinguish kin from non-kin is thus an essential component of stable, complex cooperation. Despite the potential benefits of genetic kin recognition, the scientific consensus is that it does not work ( 11 – 15 ). The problem was first described nearly half a century ago by Crozier, who pointed out that rare alleles will provide the most accurate information about relatedness, but individuals with common alleles can engage in more cooperation and thus receive a disproportionate share of its benefits ( 15 ). According to Crozier’s Paradox, recognition alleles should be subject to positive frequency-dependence, which causes them to increase in frequency, but simultaneously erodes their effectiveness by depleting the genetic variation necessary for their function. Thus, whether and how recognition loci maintain genetic variation in natural settings, whether the variation is maintained through negative-frequency dependence, and the likelihood of encountering unrelated strains (necessitating the ability to distinguish related from unrelated individuals) is key to assessing the applicability of Crozier’s Paradox. To address these questions, we took advantage of prior knowledge of the genetic basis of kin recognition in the social amoeba Dictyostelium discoideum . Briefly, D. discoideum is a single-celled amoeba that lives in forest soil, primarily in the eastern US and east Asia (16– 18). During the unicellular stage of the life cycle, the amoebae prey upon other microbes and divide to produce genetically identical, haploid daughter cells. In response to starvation, however, they can facultatively switch to a multicellular form. During the transition to multicellularity, the cells aggregate in groups containing hundreds of thousands of cells. Initially, they transform into a motile multicellular slug, capable of migrating away from the site of aggregation. Later, the cells undergo terminal cell differentiation and build a multicellular fruiting body. Some cells vacuolize and die, forming the slender, upright stalk of the fruiting body. Others form resistant spores, which survive, sit atop the stalk, and disperse to new environments. Multicellular development is thus cooperative, and stalk formation is altruistic: stalk cells die to benefit the survival and dispersal of the spores ( 19 , 20 ). Dictyostelium ’s aggregative multicellularity poses a conundrum: because genetically different strains can co-aggregate, some strains may behave selfishly, forming spores but not stalk ( 19 ). However, prior work has shown that Dictyostelium can recognize and reject foreign cells in its aggregations ( 21 ). Cells of different genotypes initially co-aggregate but then separate out into different structures as development proceeds. Although this process is imperfect, at least in laboratory settings, it nevertheless increases the relatedness of strains that share the spoils of multicellular development, the stalk ( 21 ). Thus, kin recognition potentially stabilizes altruistic cooperation in this species ( 20 ). The genetic basis of this ability has also been established ( 22 ). Briefly, two cell-surface proteins, TgrB1 and TgrC1, bind each other on the surface of the cells during the early stages of aggregation. When this binding is strong, the cells can remain together during multicellular development. The genes are named for their protein domains: Transmembrane, IPT, I mmunoglobulin-like E-set Repeat domains, or tiger ( tgr) genes. The functions of the TgrB1 (ligand) and TgrC1 (receptor) proteins, in both allorecognition and multicellular development, have been well-studied ( 22 – 26 ). The proteins not only determine acceptance vs rejection of genetically different cells, but also adoption of the spore or stalk cell fate, and thus impact altruism versus selfishness ( 24 , 27 ). The tgrB1 and tgrC1 genes belong to a large gene family, consisting of an estimated 63 members, based on current annotations of the reference genome ( Fig. 1A ), making it one of the largest gene families in Dictyostelium . However, the functions of the other gene family members are almost completely unknown ( 28 ). Like TgrB1 and TgrC1, these genes primarily show the same sequence of domains, although some, like tgrB2 , lack a transmembrane domain ( Fig. 1B ). Download figure Open in new tab Figure 1. The tgr gene family in Dictyostelium discoideum . (A) Chromosomal locations of annotated tgr genes in the D. discoideum lab strain AX4 based on the GFF available from dictybase.org . Clusters, indicated by vertical red bars, consist of duplicated gene and pseudogene pairs in close proximity on the chromosome. (B) “Tiger” ( tgr) stands for combination of protein domains: Transmembrane (with N-terminal signal sequences), IPT/Ig-like E-set Repeat proteins. Shown are the domain structures for TgrB1, TgrB2, and TgrC1; additional proteins are shown in Fig. S1. Filled boxes indicate Uniprot IPT/TIG or Ig E-set domains (Interpro IDs IPR002909, IPR014756), whereas shaded blue boxes indicate the homologous superfamily Ig-like folds (IPR013783). (C) Many tgr genes are situated on the chromosomes in head-to-head pairs. In tgrB1 and tgrC1 , the genes share a promoter, resulting in their joint transcription around 9 hours of development, when cell-type differentiation has started and co-aggregated genotypes separate. TgrB1 and TgrC1 sit together in a head-to-head arrangement with a single, shared promoter; with peak expression at 9 hrs of development, when cells are sorting out ( 26 ). Similarly, many (but not all) tgr genes are paired (examples in Fig. 1C ). The genes within each pair often exhibit low sequence similarity. However, each gene pair is typically duplicated several times, forming a cluster of closely related gene pairs (red bars; Fig. 1A ), with intervening non- tgr genes. And while many clusters consist of head-to-head genes, one cluster ( tgrA ) consists of unpaired genes, transcribed from the same strand. Other tgr genes occur as isolated pairs without nearby duplicates (e.g., tgrD1 and tgrE1 ), and still others are singletons, at least with respect to their distance to other tgr genes on the chromosome (e.g., tgrR1 ). As a whole, the tgr gene family is an example of ‘birth-and-death’ evolution ( 29 ), as are many genes involved in recognition ( 29 – 31 ). In addition to forming clusters of closely related genes (‘birth’ through gene duplication), many tgr genes show evidence of pseudogenization (‘death’ through inactivating mutations), based on the reference sequence of the common laboratory strain AX4 (32; GFF from dictybase.org ). However, whether loci that are pseudogenes in the reference strain are also pseudogenes in other strains is unknown. Importantly, existing transcriptome data indicate that expression levels of all but a few of the gene family members are low (Fig. S2), at least during clonal multicellular development of the laboratory strain, making it challenging to assign functions to these genes, ascertain correct gene models, and determine which ones are pseudogenes. Here we examine the within-species evolutionary dynamics of the tgr gene family. First, we show that Illumina short-read sequencing cannot produce accurate sequences, a consequence of the high levels of polymorphism and numerous closely related paralogs, which leads to cross-mapping of the short-reads across paralogs and inaccurate base calls. Conversely, we show that Sanger and Nanopore technologies generate highly accurate sequences. Using Sanger sequencing, we examine the molecular evolution of tgrB1 and tgrC1 , as well as their related gene duplicates, tgrB2, tgrC2, tgrC3 , and tgrC5 . Using Nanopore long-reads, we generate highly complete genome assemblies, allowing us to comprehensively study the molecular evolution of the entire gene family. We show extraordinary sequence variation in tgrB1, tgrC1 , and other tgr genes, and that strains co-occurring over just a few centimeters harbor highly diverged recognition alleles. Indeed, much of the allelic divergence found in the species resides within a single soil sample. Finally, we show within-species rearrangements, demonstrating the rapid and dynamic evolution of the gene family. Along with Holland et al. ( 33 ), these findings suggest that the gene family may act as a reservoir of genetic diversity, supplying the genetic variation that underpins rapidly evolving kin recognition and explains how this organism escapes Crozier’s Paradox. Results Illumina sequencing is inaccurate for the tgr gene family We initially compared pre-existing Illumina sequences ( 34 ) to Sanger sequences ( 26 ) and found many sequence differences (strain list and sequencing methodology provided in Table S1). Owing to improvements in the Illumina sequencing technology since these strains were initially sequenced, including longer read lengths that should help with accurate mapping, we decided to sequence some strains with the newer Illumina technology (see Methods for details of sequencing, mapping, and SNP calls). Simultaneously, we sequenced many of these strains using gold-standard Sanger sequencing for five loci: tgrB1, tgrC1 , as well as tgrB2, tgrC2, tgrC3 , and tgrC5 . These results were largely consistent with the earlier Sanger sequences for these strains ( 26 ) but again did not match the Illumina sequences, despite updated filtering and SNP calling methods ( Fig. 2 ). Download figure Open in new tab Figure 2. Nucleotide mismatches between Sanger, Illumina, and Nanopore sequencing technologies. Each point represents a comparison of the same strain’s sequence, for the same gene, as determined by the two methods listed on the x-axis. Nanopore and Sanger sequencing produce nearly identical sequences, suggesting both technologies are highly accurate Owing to the numerous closely related gene paralogs in the tgr gene family, as well as the low complexity of the D. discoideum genome (77.6% AT, 11% and 16% of nucleotides are simple sequence repeats and homopolymeric repeats, respectively ( 32 )), we surmised that the sequence accuracy for these complex regions would be increased by long-read sequencing. In addition, we suspected structural variation among even these closely related strains. For example, we were unable to PCR-amplify tgrB2 in nearly half of the strains from Texas. Amplification using primers based on neighboring genes eventually revealed a 2-kb deletion in the region where tgrB2 should be located (Fig. S3). Given this suspected structural variation, we undertook Nanopore genome sequencing and assembly of 9 strains (see Table S2). Three of these strains potentially lack tgrB2 . Another two strains were suspected to be cryptic species, because they were highly genetically divergent from D. discoideum , co-occurred in the same soil sample with them, yet lacked discernable morphological differences (Fig. S4). We suspected these more distantly related D. discoideum strains might also harbor structural variation in the tgr gene family (see companion paper: Holland et al. ( 33 )). In contrast to our results comparing Illumina to Sanger sequencing, we found that our Nanopore sequences largely matched those of the Sanger sequencing ( Fig. 2 ). Longer reads, which permit de novo assembly, likely prevent errors caused by mapping short reads to closely related gene paralogs, in concert with high levels of polymorphism. tgrB1 and tgrC1 — but also tgrB2 — are extraordinarily polymorphic and rapidly evolving Following genome assembly and annotation, we aligned the sequences of each gene in the genome. We observed extremely low sequence variation on average, with a mean and median nucleotide diversity across all D. discoideum genes of 0.003 and 0.0014, slightly higher than previous estimates ( 34 ). Most likely, previous approaches that mapped short reads to the reference genome ( 34 ) could not call SNPs in the most polymorphic regions, leading to underestimates of diversity. We analyzed the presence and absence of different members of the gene family using Liftoff, which lifts over gene annotations among assemblies from the same or closely related species by aligning genes (specifically, exons) rather than entire genomes ( 36 ). These analyses successfully identified many, but not all, of the expected tgr gene family members, and absences were typically observed across multiple strains ( Fig. 3 ). Manual inspection of genes that were designated by Liftoff as orthologs to tgr pseudogenes in AX4 revealed a variety of patterns: many of these alleles appear to be pseudogenes, but there were also many exceptions where one or a few strains had uninterrupted reading frames over most or all of the gene. These results suggest that even closely related strains (differing on average only by 0.3% at the nucleotide level) vary in their functional tgr gene repertoire. Download figure Open in new tab Figure 3. Variation in tgr gene family content among strains. The x-axis shows each tgr gene, based on the reference genome annotation, with ‘ps’ indicating a pseudogene in the reference genome. Genes are ordered according to chromosome (one to six) and then by position. The Y-axis indicates each strain. Black boxes indicate gene presence, while yellow boxes indicate gene absence, based on Liftoff analysis. Our ability to locate most paralogs helps us to differentiate orthologs from paralogs, allowing us to investigate evidence of selection on individual tgr genes. For nucleotide diversity, we observe tgr genes in the right-tail of the distribution, with genes from the tgrBC cluster dominating these outliers ( Fig. 4A and Fig. S8). In addition, the designated tgr pseudogenes do not dominate the tail of the distribution, suggesting that functional tgr genes (such as tgrB1, tgrC1 , and tgrB2 ) are evolving more rapidly than their pseudogene counterparts. Download figure Open in new tab Figure 4. (A) Nanopore genome assemblies reveal the tgr gene family is highly polymorphic with tgrB1, tgrB2 , and tgrC1 showing the greatest levels of nucleotide variation. (B) Genome-wide ratio of the rates of nonsynonymous (dN) to synonymous (dS) substitution ( ω ), an indicator of positive or diversifying selection. Each value represents the mean of all pairwise comparisons between a D. discoideum strain and one of the distantly related (cryptic species) strains. The tgrB2 gene is ranked in the 1st percentile of the genome-wide distribution, whereas tgrC5 and tgrB1 both reside in the 8th percentile. Although mean ω is less than one on average, sliding windows show high heterogeneity in sequence evolution across the gene, with regions that are hypervariable within species also showing elevated rates of nonsynonymous substitution, i.e., dN/dS> 2 (Fig. S8), suggesting rapid adaptive evolution. Analyses of the ratio of the rates of nonsynonymous to synonymous substitution, an indicator of positive or diversifying selection, also placed tgrB1, tgrC1, tgrB2 , and some other tgr genes in the tail of the distribution ( Fig. 4B ), indicating that their hypervariability is likely driven by selection (see also Fig. S8). In addition, of four large gene families in Dictyostelium (tgrs, ABC transporters, polyketide synthases, and actin genes), only tgr genes and polyketide synthases showed elevations in nucleotide diversity (Fig. S5). This result means that extreme (or even elevated) sequence variation is not inherent to all large gene families and further indicates that selection may be driving their rapid evolution. Spatial analyses indicate extreme genetic variation at the centimeter-scale in nature To understand how selection operates on tgr genes in nature, we focused on the tgrBC cluster, which contains a mix of highly polymorphic genes ( tgrB1, tgrB2 , and tgrC1 ) and relatively conserved genes ( tgrC2, tgrC3 , and tgrC5 ). To enhance our ability to detect selection and to take advantage of approaches that incorporate allele frequencies, we sequenced these genes in 51 strains from four sites, which varied in pairwise distance from ~100 m to >1,000 km ( Fig. 5 ). At each site, we obtained 6 g of soil from the surface of a 10-cm-by-10-cm plot. Thus, the maximum distance among any pair of strains is 14 cm (= √(10 2 + 10 2 )). Our prior sequencing, including strains from these locations, indicates low genetic differentiation at distances ranging from tens to hundreds of meters and very high differentiation at spatial scales greater than one kilometer (mean F ST>0.4; Kuzdzal-Fick, in preparation). Thus, we expect some gene flow between SMFS1 and SMFS2, but little gene flow among the more distant pairs of sites. Download figure Open in new tab Figure 5. Spatial variation in tgrB1, tgrB2, tgrC1, tgrC2, tgrC3 , and tgrC5 . (A) Each site consisted of a 10-by-10-centimeter plot, from which surface soil was collected for isolation of Dictyostelium . (B) Sampling locations for 51 D. discoideum strains, GSNP: Great Smoky National Park; NHPA: New Hampshire, Proctor Academy; SMFS: Smith MacLeish Field Station. (C) Haplotype networks for tgrB and tgrC genes, illustrating the centimeter-scale diversity. For highly polymorphic genes tgrB1, tgrB2 , and tgrC1 , each 10 cm-by-10 cm site contains at least 3 to 4 genetically divergent haplotypes. Each circle represents a unique haplotype, and its area is proportional to the number of strains that have that haplotype. Edges and numbers indicate the inferred mutational steps separating the two haplotypes; for simplicity, numbers are only shown for edges with distances >4. For metrics of nucleotide diversity, our findings with this larger strain collection mirrored what we observed with the smaller number of Nanopore genomes: extreme polymorphism (in descending order) in tgrC1, tgrB2 , and tgrB1 — and substantially lower polymorphism in tgrC3, tgrC2 , and tgrC5 ( Fig. 6 ). We also observed similar patterns of polymorphism across the four sampling locations, suggesting that selection is operating similarly across geographically distinct locations. Especially interesting was the consistently extreme polymorphism in tgrB2 , given that this gene lacks a transmembrane domain and has not been studied, unlike tgrB1 and tgrC1 , which have established roles in kin recognition. Download figure Open in new tab Figure 6. Molecular evolution analyses of tgrB and tgrC genes. (A) Tajima’s D , a metric of selection that typically ranges from approximately +2 to −2, but can be more extreme. Values of zero are expected for genes that are evolving neutrally, i.e., in the absence of selection. Strongly positive values, particularly compared to other genes in the genome, can indicate balancing selection – i.e., an excess of high-frequency alleles. Strongly negative values, compared to other genes in the genome, can indicate positive or purifying selection – i.e., skewed allele frequencies, where a few variants are high-frequency, but most variants are rare. (B) Nucleotide diversity (ν). Each point is the mean of approximately 10-20 natural isolates per site that were sequenced using the Sanger technology. To further examine how selection is operating on these genes in nature, we calculated Tajima’s D for each locus ( Fig. 6 ). Large negative values of Tajima’s D indicate highly skewed allele frequencies, potentially generated by positive or purifying selection. Conversely, large positive values of Tajima’s D indicates multiple alleles at intermediate frequency, potentially generated by balancing selection – i.e., negative frequency-dependent selection ( 37 , 38 ). Importantly, Crozier’s Paradox predicts that recognition genes will show positive frequency-dependent selection, reflected in depleted genetic variation and a negative Tajima’s D . Conversely, effective balancing selection—and effective maintenance of genetic variation—should result in positive values of Tajima’s D . Despite the large differences in levels of polymorphism between tgrB2, tgrC2 , and tgrC3 , they all showed similarly positive values of Tajima’s D. However, tgrC1 has a Tajima’s D value close to zero, a surprising result given its extreme polymorphism that is thought to be driven by balancing or diversifying selection. Only tgrB1 showed a consistently negative Tajima’s D , potentially indicating variation-removing selection. Overall, Tajima’s D analysis suggests differences in how selection is operating on these genes in nature that was not evident from polymorphism analyses alone. To better visualize the spatial distribution of genetic variation in the tgrB and tgrC genes, we created haplotype networks ( Fig. 5C ). In each network, the area and colors represent the number of strains with a given haplotype and their sampling site. Edges indicate the number of mutations that distinguish the haplotypes (only indicated if >4). Consistent with the findings from the nucleotide diversity and Tajima’s D analyses, the networks for tgrB1, tgrB2 , and tgrC1 reveal large numbers of haplotypes and long branches (see also Fig. S6). Importantly, each 10-cm by 10-cm plot harbors at least 3 or 4 genetically diverged haplotypes that can differ by more than a hundred nucleotides. Haplotypes from the same site (same color) are often not directly connected to each other by an edge. Taken together, these results indicate that not only are tgrB and tgrC genes extraordinarily polymorphic within (and across) species, but a high fraction of the species-wide polymorphism can be found over just a few centimeters in the soil. So far, the molecular evolution metrics have been averaged across the entire gene, but different regions — particularly different protein domains — may have different degrees of constraint. In the case of Tajima’s D, averaging positive and negative values might also result in the incorrect conclusion of no selection. For that reason, we calculated nucleotide diversity (ρε), Tajima’s D , and F ST in sliding windows across the gene, using 100-bp windows and a 25-bp slide (Fig. S7), using our Sanger sequences from the four sites. These results highlight several additional aspects of the genetic variation: ( 1 ) tgrC1 ’s polymorphism peaks in its 5’ immunoglobulin domains, ( 2 ) although tgrB1 does have a consistently negative Tajima’s D across most of the gene, the region surrounding its transmembrane and cytoplasmic signaling domains shows positive values, indicating potential balancing selection in that specific region, ( 3 ) although polymorphism is very low in tgrC2 and tgrC3 , they have extremely high values of F ST (=1) over some portions of the gene, mostly overlapping the immunoglobulin domains, indicating strong geographic divergence and potentially local adaptation. These results indicate surprisingly diverse patterns of molecular evolution for genes that occur in close proximity on the chromosome. The molecular evolution analyses also revealed that most of tgrB2 ’s extreme nucleotide diversity is attributable to a short stretch of ~100 bp near the start of the gene ( Fig. 7A ). Further inspection of the sequence alignment reveals why nucleotide diversity is so high in this region: instead of high variability among all strains (as we observe for tgrB1 ; see haplotype network in Fig. 5C ), we observe that its sequence variation in this region of the gene largely segregates into just two highly divergent haplotypes, each at high frequency ( Fig. 7B ; see also Fig. S9). The region of extreme sequence divergence is also highly discrete, surrounded by regions of high homogeneity. The mosaic appearance of the tgrB2 gene suggests genetic variation that has been introduced suddenly rather than slowly through accumulated mutations. Sequence comparisons do not clearly identify a source, as the region was equally similar to tgrB1 and tgrB2 . However, phylogenetic analyses of the tgrB1, tgrB2 , and tgrBp s shows intermingled paralogs within clades, a signature of interlocus gene conversion or recombination (Fig. S10). Download figure Open in new tab Figure 7. Polymorphism in tgrB2 . (A) Peak of extreme nucleotide diversity occurs around base 300. (B) Sequence alignment reveals that the polymorphism is highly discrete and that variation segregates into two near equal-frequency haplotypes that are extremely divergent. Dynamic evolution and mosaicism in the tgrBC locus Dynamic evolution of the gene cluster is apparent among our strains, with the most noticeable differences between the D. discoideum and putative cryptic species strains (EO1176 and EO1089; Fig. 8 ). Nanopore assembly also confirmed the deletion of tgrB2 from its expected genomic location ( Fig. 8A and Fig. S3). Interestingly, Nanopore assemblies for QS18 and QS33 suggested that tgrB2 was present but in a new location: the locus where tgrB_ps resides in AX4 and other strains, making it paired with tgrC_ps1 . Examination of the sequence surrounding the translocated tgrB2 alleles revealed about 50 bases at the end of the gene that do not match the non-translocated tgrB2 alleles, suggesting a cut-and-paste event. However, beyond those bases, the tgrB2 bears no further resemblance to tgrB1 . These non-matching bases are nearly identical to a stretch of tgrB1 , suggesting that the tgrB2 alleles in these strains acquired portions of tgrB1 through the transposition event. Download figure Open in new tab Figure 8. Dynamic gene family evolution in the tgrBC locus. Each line shows the inferred location of the tgrBC genes and their chromosomal locations, based on Liftoff analysis; intervening non- tgrBC genes not shown. Genes are named according to their names in the reference strain AX4. Slashes indicate a region of the chromosome not shown. QS18 and QS33 show a translocation of tgrB2 , consistent with our earlier PCR results showing a deletion impacting this gene (see Fig. S3). (A) D. discoideum strains (and one cryptic species strain) compared to the canonical AX4 arrangement. (B) Section of the tgrBC locus, comparing the cryptic species to the canonical AX4 arrangement. Transcriptomic analysis of QS33, which harbors the translocated tgrB2 , at 9 hours of development reveals that: ( 1 ) the tgrB2 allele is strongly expressed, at similar levels to tgrB1 and tgrC1 (Table S3), ( 2 ) the translocated tgrB2 , like the non-translocated tgrB2 s in other strains, is a truncated tgrB protein lacking the transmembrane domain, and ( 3 ) despite potentially having a paired tgrC (which the canonical tgrB2 lacks; compare locations of tgrB2 in Fig. 8A ), the neighboring tgrC gene appears to be a pseudogene, just as it is in other strains. Specifically, although the tgrC mRNA is spliced, there are internal stop codons present and the expression level was low compared to tgrB1, tgrC1 , and tgrB2 (Table S3). At first glance, the cryptic species strains, EO1089 and EO1176 appear to lack a functional tgrC1 gene; their tgrB1 gene is paired with a gene that is orthologous to tgrC_ps1 , a pseudogene in AX4. Further inspection of this gene, however, reveals that while it is also truncated, similar to tgrC_ps1 in AX4, it is in frame over most of its length, unlike its AX4 counterpart, which has internal stop codons. The EO1089 and EO1176 alleles are very similar, differing at only 23 sites across nearly 1700 base pairs. The gene appears functional in EO1089, as it maintains an uninterrupted reading frame for ~1700 bases. In EO1176, however, we observe a 2-bp insertion, followed by a 1-bp insertion 163 bases away, which results in stop codons until the frame is restored. All other indels between these strains are in frame. The N-terminal region of the gene is most similar in AX4 to tgrC1 , but still quite divergent (only ~75% identical at the amino acid level), but the similarity to tgrC1 increases further in the gene (~85% identical). Taken together, these results suggest that the tgrC gene paired to tgrB1 in these strains might be functional, albeit missing the transmembrane domain that should anchor the protein in the cell membrane. Taken together, these results indicate structural rearrangements among strains of D. discoideum and their closely related cryptic species, generating evolutionary divergence at the tgrBC locus. Discussion Social amoebae show extraordinary levels of centimeter-scale sequence diversity in allorecognition loci—indicating both the means and opportunity for selection on kin recognition in nature Evolutionary biologists have long noted the potential for conflict in the Dictyostelium life cycle, given that its transition to multicellularity involves a stage where a fraction of the population dies altruistically to support the rest ( 19 , 20 , 39 ). The capability to recognize and evade cooperating with unrelated strains through interactions at the tgrB1 - C1 locus is thought to be a valuable protection against cheating ( 20 , 40 , 41 ). Nevertheless, despite high levels of species-wide polymorphism in these loci ( 26 ), it has remained unclear if sufficient variation is present over the small spatial scales over which the social decision-making to cooperate or reject takes place. Our results reveal enormous levels of sequence variation in allorecognition at the centimeter-scale, over distances traversable by the migratory slugs and within the known range of genetic migration ( 42 , 43 ). Simply put, massive polymorphism is useless unless strains actually encounter individuals of varying relatedness, making the ability to distinguish between them important. This crucial requirement has been demonstrated here, with extremely divergent (as well as identical) alleles co-occurring over distances of not more than 14 cm in the soil. tgrB1 and tgrC1 escape Crozier’s Paradox Crozier’s Paradox predicts that genetic kin recognition will be a victim of its own success, depleting its own genetic variation through positive frequency-dependent selection ( 11 , 15 ). Here, we show that neither tgrB1 nor tgrC1 is depleted of genetic variation. Indeed, both genes exhibit extreme levels of sequence variability. Not only do we show this pattern in one location, but similar findings were found across geographically distant sites, suggesting consistent selection in nature. Our results also do not show strong evidence of variation-depleting selection, in that Tajima’s D values tended to be positive for tgrB2, tgrC1 and the other tgrC genes, a pattern more consistent with balancing selection—a force that maintains genetic variation. However, Tajima’s D values were negative across most of tgrB1 (aside from the end of the gene with the cytoplasmic tail). It showed extreme values of dN/dS, on average, especially near the start of the gene, where the polymorphism is greatest (Fig. S8). Thus, it seems possible that tgrB1 experiences repeated selective sweeps. Overall, however, we do not see strong evidence of either the existence of the paradox, in the form of positive frequency-dependent selection, nor the outcome of the paradox, in the form of depleted genetic variation. Genes families can retain the genetic variation required to counteract Crozier’s Paradox Many solutions to Crozier’s Paradox have been raised over the years, and Crozier himself suggested that variation at kin recognition genes might be maintained by selection on their secondary functions ( 11 , 15 , 44 ). For example, variation in the vertebrate MHC, which functions in kin recognition, can be maintained by its secondary role in pathogen recognition. To date, however, secondary functions of tgr genes have not been identified. Although expression is low for the majority of genes aside from tgrB1 and tgrC1 during multicellular development, alternative roles for these genes, which could help to maintain genetic variation, have not yet been ruled out. A little mentioned feature that may help to maintain polymorphism is that recognition genes nearly always reside in large gene families, consisting of clusters of duplicated genes and pseudogenes. Gene duplicates are prone to rearrangements, leading to rapid birth-and-death dynamics, where functional genes can ‘die’ or be ‘resurrected’ through rearrangements with gene paralogs. Gene conversion and non-allelic recombination can lead to novel sequences, increasing haplotype number when there is strong diversifying selection ( 32 , 45 – 48 ). Although these processes are well-recognized for the evolution of immune genes, to our knowledge, large gene families have not previously been proposed as a solution to Crozier’s Paradox. Molecular evolution analyses suggest additional targets of selection, especially tgrB2 Although the established kin recognition genes tgrB1 and tgrC1 show extraordinary sequence variability, our analyses uncovered additional genes and members of the tgr gene family that are also highly polymorphic or show other signatures of selection. tgrB2 is especially perplexing: its sequence variation is similar in magnitude to that of tgrB1 and tgrC1 overall, and its peak of polymorphism near the N-terminus dwarfs that of the other two genes. The high frequency of this haplotype, especially across independent populations suggests it is unlikely to be selectively neutral. The gene also shows consistently positive Tajima’s D over nearly its entirety, indicative of balancing selection favoring different alleles. It is found in a different chromosomal position in some strains. In our four cm-scale populations, sequence variation has a mosaic pattern, indicative of variation that has been imported rather than evolved via stepwise mutations. Finally, despite having the highest rate of nonsynonymous to synonymous of all the tgr genes, it lacks a transmembrane domain and a functional tgrC partner in either genomic location where it is found—yet is highly expressed at the point in development where kin recognition occurs. Overall, these results suggest that this gene may be important, although its function remains obscure. More generally, these findings highlight that molecular evolution analyses can be useful to identify genes that are selectively important—particularly rapidly evolving genes involved in recognition, whether kin or immune recognition. Variable selection across the tgrBC locus The companion paper ( 33 ) shows strong evidence, using D. discoideum ’s closest outgroup species, that the tgrBC locus is hypermutable, with duplications, deletions, and rearrangements that rapidly generate novel alleles and loci—helping to explain how these genes generate their extreme sequence polymorphism. With even more closely related strains (with only ~0.3% average divergence at the nucleotide level), we also show evidence of rearrangements. However, our primarily within-species analysis (with several likely cryptic species) means that we can readily identify orthologous genes across the strains, enabling a variety of molecular evolution analyses that pinpoint the form and strength of selection acting on the gene family. Against this backdrop of an extraordinarily unstable genomic region, we find variation among tgrB and tgrC genes in their likely form of selection (positive vs balancing) and levels of genetic variation. These results strongly suggest that the hypermutability is nevertheless shaped by natural selection – and indicates that tgrB1, tgrC1 , and tgrB2 experience extremely strong selection that is greater than that of the rest of the genome or most of the other tgr genes. More important, the genomic instability, particularly the high rates of recombination within this locus (see ( 33 )), may help to explain how such tightly linked genes, especially tgrB1 and tgrC1 , can evolve independently. Thus, the high rate of recombination within this locus not only enables rapid molecular evolution generally but also allows that evolution to be restricted to particular genes. Birth-and-death evolution in gene families involved in recognition Together with Holland et al. ( 33 ), who reveal the extraordinary genomic flexibility of the tgrBC locus and the dynamics of birth-and-death in D. discoideum and its sister species D. citrinum , we suggest that the tgr gene family is a new exemplar of birth-and-death evolution, with long-read assemblies providing unprecedented clarity on these difficult-to-sequence genomic regions, previously refractive to evolutionary analysis. Our work shows that highly accurate genome assemblies can be obtained and demonstrates the extraordinary variation and selective importance that these recognition genes entail for the organism. Methods Strains and Sampling EO strains – EO strains were isolated from soil samples of approximately 10 g, obtained from the top centimeter of soil in 10-cm by 10-cm plots in the following locations: Proctor Academy, New Hampshire (NHPA); Smith-MacLeish Field Station (SMFS1, SMFS2), and Great Smoky National Park (GSNP). GPS coordinates are provided in Table S1. To isolate strains, we combined 6 g of soil with 30 ml of sterile water and then deposited approximately 200-400 ul across multiple hay infusion agar plates ( 49 ), along with 400 ul of a stationary phase culture of Klebsiella pneumoniae as a food source. We isolated a single fruiting body from each plate after approximately one week, which was stored in 30% glycerol at −80C. Subsequently, we cloned these isolates by plating the spores at low density on SM agar plates (Formedium Ltd) with 400 ul of K. pneumoniae and choosing fruiting bodies from the center of a single, well-spaced plaque. Spores were again frozen at −80C. Cryptic species strains – Two of these strains, EO1089 and EO1176, were identified using previous Illumina sequencing as likely cryptic species (Kuzdzal-Fick et al., in preparation). Specifically, they are highly genetically divergent from our D. discoideum strains, despite co-occurring with them in nature, but more closely related to D. discoideum than D. citrinum is, the current sister species. QS strains QS strains were collected previously by the Queller-Strassmann lab and sequenced initially by Ostrowski et al. ( 34 ) using Ilumina and Benabentos et al. ( 26 ) using Sanger sequencing. We sequenced some of these strains again using Sanger, Illumina, and/or Nanopore sequencing. Further details are provided in Table S1. Sanger sequencing Strains, culture conditions, and DNA extraction Spores were inoculated from frozen spore stocks onto SM agar plates in co-culture with the K. pneumoniae . Cells were allowed to develop into multicellular fruiting bodies. DNA was extracted by placing 5-10 sori in 250 μL of a 5% Chelex solution, followed by the addition of 10 μL of 10 mg/ml Proteinase K. The samples were then mixed and incubated at 40°C for 4 h, followed by 70 °C for 10 min to denature the proteinase K. The supernatant containing the genomic DNA was stored at −20 °C until use. Primer design and PCR protocol The genes ( tgrB1, tgrB2, tgrBps, tgrC1, tgrC2, tgrC3 , and tgrC5 ) were each amplified using a single pair of primers for each gene, from genomic DNA. Primers were designed in Primer3 based on the AX4 reference genome ( 32 ). The PCR protocol consisted of an initial denaturation step at 94 °C for 120 s, followed by 35 cycles of: a denaturation step at 94 °C for 20 s, two annealing steps for 15 s each (initial 5 °C lower than primer melting temperature (Tm) and secondary 8 °C lower than primer Tm), and an extension step at 62 °C for a variable time depending on the length of the gene (extension time = 1 min/Kb * 1.30). Gel electrophoresis was used to visualize and verify the presence of a single band of the expected size, using a 2% agarose gel, stained with 0.5 μg/mL of ethidium bromide. Additional sequencing primers were designed so that the entire gene would be covered by at least one read. PCR product purification and DNA sequencing PCR products were purified using a QIAGEN PCR Purification Kit (protocol according to manufacturer’s specifications and eluted in water) with QIAGEN purification columns. Some products were instead purified directly by GENEWIZ Inc., which used enzymatic PCR purification (ExoSAP-IT). Purified PCR products were sent to GENEWIZ for Sanger sequencing. Consensus sequence assembly and evolutionary genetics analysis Chromatograms were aligned using the MUSCLE algorithm ( 50 ) in Geneious 11.1.4 and then visually verified and edited to ensure there were no misalignments or base miscalls produced during the alignment process. Consensus sequences were generated for each gene of every strain. These consensus sequences were then aligned using MUSCLE, eliminating the introns using the AX4 coding sequence as reference, and exported as FASTA alignments for analysis with the R package PopGenome ( 51 ). FASTA alignments were read into R and transformed into overlapping sliding windows 100 basepairs wide, with 25 basepair steps. We used the “neutrality.stats” and “diversity.stats” commands to obtain the values for Tajima’s D, F ST, and nucleotide diversity for each step of the sliding window. Haplotype networks were made using the R package PEGAS ( 52 ). Manual adjustment of the edges was necessary to prevent overplotting of the nodes. Labels are provided on all edges that exceed four mutations in length. Illumina sequencing Beginning with a multisample vcf file restricted to biallelic alleles ( 27 ), we used bcftools (1.14) to calculate variant allele frequency with +fill-tags -t FORMAT/VAF. After filtering for – minDP 4 and –maxDP 510 with vcftools (0.1.16), we ran gatk (gatk4-4.2.5.0) VariantFiltration --genotype-filter-name “VAF_filter” \ --genotype-filter-expression “isHomVar == 1 && VAF < 0.8”. We used samtools (1.11) faidx to select each region of interest and bcftools consensus --sample -M ‘N’ to produce fasta files of each strain’s tgr sequences. TgrC1 Splicing In cases where SNPs occurred in or near the splice site consensus sequence, we verified the splicing. Briefly, strains were revived from frozen stocks on SM agar plates overlaid with K. pneumoniae culture. Spores from mature fruiting bodies were deposited onto new SM plates with a fresh K. pneumoniae lawn and incubated for 40 h at 22 °C in darkness. Cells were collected during mid-exponential growth phase in KK2 buffer, washed three times (450 g, 3 min, 4 °C), pelleted, and then counted. A total of ~1×107 to 6×107 cells was deposited onto mixed cellulose ester (MCE) filters. Filters were placed atop a cellulose pad (PALL) in a 6-cm Petri dish, moistened with 1 mL cold PDF. Plates were incubated at 22 °C in darkness in sealed, humidified containers. After hours, cells were scraped off the filter into the inner cap of a 1.5 mL Eppendorf tube using a sterile spatula. We added 1 mL of Trizol and vortexed to lyse the cells. After 5 min incubation, 200 µL chloroform was added, followed by vigorous shaking, a 3 min incubation at room temperature, and centrifugation (12,000 × g, 15 min, 4 °C). The aqueous phase of the mixture was transferred to a new tube, mixed with 500 µL isopropanol, incubated for min, and centrifuged again (12,000 × g, 15 min, 4 °C) to pellet RNA. After washing with 1 mL 75% ethanol, samples were stored at –80 °C until further processing. For tgrC1 splicing, the RNA was treated with DNAse using the TURBO DNAse kit (Invitrogen) and cDNA was generated using the iScript cDNA synthesis kit. We used a tgrC1 gene-specific reverse primer that binds at position 2204 of the gene (5’ TGGTCCTGAACGAACTCC-3’). The resulting cDNA was used as template to amplify the region surrounding the intron, using forward and reverse primers that bind at bases 55 and 882 of the gene, respectively (5’-TCAATGAATCCTCCAACACCA-3’, 5’-ACATTGGACCGGGATCAACTC-3’). The PCR product was sent for Sanger sequencing at Auckland Genomics. The sequences were mapped to the AX4 reference sequence using Geneious Prime to determine splicing. Nanopore Direct RNA Sequencing For direct RNA sequencing, we used RNA isolated from QS33 as described above, after 9 hours of development, corresponding to the expected timeframe for tgrB1 and tgrC1 peak expression (see Fig. S2). We used the Oxford Nanopore direct RNA sequencing kit and followed the manufacturer’s protocol (SQK-RNA004), using the polyadenylation mode. We obtained ~40 million reads. The reads were mapped to the QS33 genome assembly using the nanoseq pipeline ( https://nf-co.re/nanoseq/3.1.0/ ( 53 ), and BAMs were visualized in IGV v2.19.5 ( 54 ). We used Liftoff ( 36 ) to transfer annotations between the AX4 reference genome and the QS33 assembly. For genes of interest, we extracted the reads and assembled them in Geneious to check splicing and reading frame. Nanopore Sequencing Genomic DNA was isolated following the protocol of Ostrowski et al. 2015 ( 34 ) and stored in TE buffer (pH 8.0) at 4 °C until sequencing. Sequencing was performed on Oxford Nanopore Technologies (ONT) MinION Mk1B devices using FLO-MIN106 flow cells. Both new and reused cells were used, with reused cells washed between runs using the ONT Flow Cell Wash Kit (EXP-WSH004). Libraries were prepared from ~1 µg gDNA with the ONT Genomic DNA by Ligation protocol (SQK-LSK109, version 14Aug2019) with the following modifications. Prior to library preparation, DNA fragments shorter than 10 kb were removed with a Short Read Eliminator (SRX) kit (Circulomics). During AMPure bead clean-up steps, incubation on the magnetic rack was extended to 2–4 min until the liquid was clear. For the final elution, the library was diluted in buffer pre-heated to 50 °C and incubated for 10 min. Nanopore Assembly and Analysis Assembly Reads for all strains were basecalled using ONT Guppy basecalling software version 6.3.8 with a minimum quality score filter of 9. Draft assemblies were generated de novo with Flye v2.9 ( 55 ) without specifying a target genome size. The resulting assemblies were polished four times with Racon v1.5.0 ( 56 ), followed by a single polishing step with Medaka v1.7.2 ( https://github.com/nanoporetech/medaka ). For all genomes excluding the two cryptic strains, a circular Klebsiella pneumoniae contig (~5.2Mbp) was generated which was removed manually before genome statistics were calculated. Polished assemblies were initially annotated by lifting over annotations from the AX4 reference genome available from DictyBase ( http://dictybase.org/ ) using Liftoff v1.6.3 ( 36 ), with multicopy mode enabled. Annotated assemblies were scaffolded to the AX4 reference genome with RagTag v2.1.0 ( 57 ). QUAST v5.3.0 ( 58 ) was used to generate assembly statistics (N50, L50, GC%, etc.) on both scaffolded and unscaffolded assemblies. BUSCO v5.5.0 ( 59 ) was used to assess the genome completeness of all sequenced strains. All BUSCO assessments were run with the ‘--auto-lineage-euk’ flag to automatically select the best database for comparison, which selected the eukaryote_odb10 database in all cases. Nucleotide diversity and sequencing comparison Sequences obtained through Illumina and Nanopore sequencing were compared to a Sanger counterpart to assess accuracy over the tgr gene regions. Comparisons were made by aligning gene sequences with MAFFT v7.526 ( 60 ) and counting the mismatched base pairs. Alignments of orthologs in all D . discoideum strains were used to calculate nucleotide diversity (π) for all genes across the genome with Egglib v3.3.5 ( 61 ) and only sites with less than 30% missing data were retained. Non-synonymous to synonymous mutation ratio estimation When estimating non-synonymous to synonymous mutation ratios, an alternate gene annotation pipeline was employed to ensure accurate prediction of coding sequences. We used GeneMarkEP+ ( 62 ) to predict protein sequences and provided the reference D. discoideum AX4 protein sequences as the protein hint database. To identify rRNA genes, we used RNAmmer ( 63 ) with default parameters. We then used MAKER ( 64 ) to generate a custom repeat library, which was provided to RepeatMasker for softmasking each genome prior to annotation. tRNA genes were identified using tRNAscan-SE ( 65 ). Functional annotation labels were added to each predicted coding sequence with results from BLASTP of the UniProt database release 2024_01 ( 66 ) and InterProScan v5.57-90 ( 67 ) of the PFAM database ( 68 ). Finally, unique gene identifiers were appended using scripts from the MAKER pipeline. Orthologs were identified from the MAKER set of coding sequences using mmseq2 v17.b804f ( 69 ) ‘easy-rbh’ reciprocal best hits to the reference AX4 coding sequences. Codon-aware alignments between orthologs were produced using MAFFT and pal2nal v14.1 ( 70 ). Non-synonymous to synonymous mutation ratios were estimated in a pairwise fashion between an outgroup (EO1089/EO1176) and a D. discoideum strain for a single omega value (model 0) using PAML/codonml 4.10.7 ( 71 ). All possible pairs were considered. Sliding window analysis was performed pairwise in 100 codon windows, with a step of 1 codon and the average dN/dS value over all pairwise comparisons per window plotted. Windows which did not have enough variation to estimate dN/dS (dS < 0.01 or dN/dS = 99) were excluded from the calculation. Structural variation analysis Sniffles v2.6.0 ( 72 ) was used to identify structural variations between our assemblies and the AX4 reference after mapping base-called reads to the final assemblies with minimap2 v2.22 ( 73 ). Overlaps between structural variations and areas of interest were identified with bedtools v2.31.1 ( 74 ). Acknowledgements The authors would like to acknowledge the use of the Research Computing Data Core (RCDC) at the University of Houston and the New Zealand eScience Infrastructure (NeSI) high performance computing facilities for data analyses. New Zealand’s national facilities are provided by NeSI and funded jointly by NeSI’s collaborator institutions and through the Ministry of Business, Innovation & Employment’s Research Infrastructure program. EAO and MC were supported by a grant MFP-UOA2412 from the Marsden Fund Council, managed by the New Zealand Royal Society Te Aparangi. CB and SM were supported by Massey University Doctoral and Masters scholarships, respectively. The authors would like to thank Proctor Academy, Smith MacLeish Field Station, and Mountain Lake Biological Station for permission to collect soil samples. Soil was collected in Great Smoky National Park under permit GRSM-2017-SCI-2014. Funder Information Declared Marsden Fund, managed by the New Zealand Royal Society Te Aparangi. Literature Cited 1. ↵ J. L. Sachs , U. G. Mueller , T. P. Wilcox , J. J. Bull , The evolution of cooperation . Q. Rev. Biol . 79 , 135 – 160 ( 2004 ). OpenUrl CrossRef PubMed Web of Science 2. ↵ S. A. West , A. S. Griffin , A. Gardner , S. P. Diggle , Social evolution theory for microorganisms . Nat. Rev. Microbiol . 4 , 597 – 607 ( 2006 ). OpenUrl CrossRef PubMed Web of Science 3. ↵ M. Ghoul , A. S. Griffin , S. A. West , Toward an evolutionary definition of cheating . Evolution 68 , 318 – 331 ( 2014 ). OpenUrl CrossRef PubMed 4. ↵ W. D. Hamilton , Genetical Evolution of Social Behaviour 1 . J. Theor. Biol . 7 , 1 – 16 ( 1964 ). OpenUrl CrossRef PubMed Web of Science 5. J. J. Boomsma , P. d’Ettorre , Nice to kin and nasty to non-kin: revisiting Hamilton’s early insights on eusociality . Biol. Lett . 9 , 20130444 ( 2013 ). OpenUrl CrossRef PubMed 6. L. W. Buss , Somatic cell parasitism and the evolution of somatic tissue compatibility . Proc. Natl. Acad. Sci. U. S. A . 79 , 5337 – 5341 ( 1982 ). OpenUrl Abstract / FREE Full Text 7. W. D. Hamilton , The genetical evolution of social behaviour. II . J. Theor. Biol . 7 , 17 – 52 ( 1964 ). OpenUrl CrossRef PubMed Web of Science 8. A. F. Agrawal , Kin recognition and the evolution of altruism . Proc. Biol. Sci . 268 , 1099 – 1104 ( 2001 ). OpenUrl CrossRef PubMed 9. M. J. Sheehan , C. Miller , H. K. Reeve , Identity Signaling and Patterns of Cooperative Behavior . Integr. Comp. Biol . 57 , 580 – 588 ( 2017 ). OpenUrl PubMed 10. ↵ R. Gadagkar , Kin recognition in social insects and other animals—A review of recent findings and a consideration of their relevance for the theory of kin selection . Proceedings: Animal Sciences 94 , 587 – 621 ( 1985 ). OpenUrl 11. ↵ F. Rousset , D. Roze , Constraints on the origin and maintenance of genetic kin recognition . Evolution 61 , 2320 – 2330 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 12. A. Gardner , S. A. West , Social evolution: the decline and fall of genetic kin recognition . Curr. Biol . 17 , R810 – 2 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 13. A. Grafen , Do animals really recognize kin? Anim. Behav . 39 , 42 – 54 ( 1990 ). OpenUrl CrossRef Web of Science 14. A. R. Blaustein , M. Bekoff , J. A. Byers , T. J. Daniel , Kin recognition in vertebrates: what do we really know about adaptive value? Anim. Behav . 41 , 1079 – 1083 ( 1991 ). OpenUrl 15. ↵ R. H. Crozier , Genetic clonal recognition abilities in marine invertebrates must be maintained by selection for something else . Evolution 40 , 1100 – 1101 ( 1986 ). OpenUrl CrossRef PubMed Web of Science 16. A. R. Swanson , E. M. Vadell , J. C. Cavender , Global distribution of forest soil dictyostelids . J. Biogeogr . 26 , 133 – 148 ( 1999 ). OpenUrl CrossRef Web of Science 17. K. B. Raper , The Dictyostelids ( Princeton University Press , 1984 ). 18. R. H. Kessin , Dictyostelium: Evolution, Cell Biology, and the Development of Multicellularity ( Cambridge University Press , 2001 ). 19. ↵ J. E. Strassmann , Y. Zhu , D. C. Queller , Altruism and social cheating in the social amoeba Dictyostelium discoideum . Nature 408 , 965 – 967 ( 2000 ). OpenUrl CrossRef PubMed Web of Science 20. ↵ E. A. Ostrowski , Enforcing cooperation in the social amoebae . Curr. Biol . 29 , R474 – R484 ( 2019 ). OpenUrl PubMed 21. ↵ E. A. Ostrowski , M. Katoh , G. Shaulsky , D. C. Queller , J. E. Strassmann , Kin discrimination increases with genetic distance in a social amoeba . PLoS Biol . 6 , e287 ( 2008 ). OpenUrl CrossRef PubMed 22. ↵ S. Hirose , R. Benabentos , H.-I. Ho , A. Kuspa , G. Shaulsky , Self-recognition in social amoebae is mediated by allelic pairs of tiger genes . Science 333 , 467 – 470 ( 2011 ). OpenUrl Abstract / FREE Full Text 23. S. Hirose , G. Chen , A. Kuspa , G. Shaulsky , The polymorphic proteins TgrB1 and TgrC1 function as a ligand–receptor pair in Dictyostelium allorecognition . J. Cell Sci . 130 , 4002 – 4012 ( 2017 ). OpenUrl Abstract / FREE Full Text 24. ↵ M. Katoh-Kurasawa , P. Lehmann , G. Shaulsky , The greenbeard gene tgrB1 regulates altruism and cheating in Dictyostelium discoideum . Nat. Commun . 15 , 3984 ( 2024 ). OpenUrl CrossRef PubMed 25. S. Hirose , B. Santhanam , M. Katoh-Kurosawa , G. Shaulsky , A. Kuspa , Allorecognition, via TgrB1 and TgrC1, mediates the transition from unicellularity to multicellularity in the social amoeba Dictyostelium discoideum . Development 142 , 3561 – 3570 ( 2015 ). OpenUrl Abstract / FREE Full Text 26. ↵ R. Benabentos , et al. , Polymorphic members of the lag gene family mediate kin discrimination in Dictyostelium . Curr. Biol . 19 , 567 – 572 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 27. ↵ J. J. Kuzdzal-Fick , A. Moreno , C. M. E. Broersma , T. F. Cooper , E. A. Ostrowski , From individual behaviors to collective outcomes: fruiting body formation in Dictyostelium as a group-level phenotype . Evolution 77 , 731 – 745 ( 2023 ). OpenUrl CrossRef PubMed 28. ↵ K. Kibler , J. Svetz , T.-L. Nguyen , C. Shaw , G. Shaulsky , A cell-adhesion pathway regulates intercellular communication during Dictyostelium development . Dev. Biol . 264 , 506 – 521 ( 2003 ). OpenUrl CrossRef PubMed Web of Science 29. ↵ M. Nei , X. Gu , T. Sitnikova , Evolution by the birth-and-death process in multigene families of the vertebrate immune system . Proc. Natl. Acad. Sci. U. S. A . 94 , 7799 – 7806 ( 1997 ). OpenUrl Abstract / FREE Full Text 30. E. V. Koonin , Y. I. Wolf , G. P. Karev G. P. Karev , Y. I. Wolf , E. V. Koonin , “ Birth and Death Models of Genome Evolution ” in Power Laws, Scale-Free Networks and Genome Biology , E. V. Koonin , Y. I. Wolf , G. P. Karev , Eds. ( Springer US , 2006 ), pp. 65 – 85 . 31. ↵ H. Piontkivska , M. Nei , Birth-and-death evolution in primate MHC class I genes: divergence time estimates . Mol. Biol. Evol . 20 , 601 – 609 ( 2003 ). OpenUrl CrossRef PubMed Web of Science 32. ↵ L. Eichinger , et al. , The genome of the social amoeba Dictyostelium discoideum . Nature 435 , 43 – 57 ( 2005 ). OpenUrl CrossRef PubMed Web of Science 33. ↵ M. Holland , et al. , Hypermutable hotspot enables the rapid evolution of self/non-self recognition genes in Dictyostelium . bioRxivorg 2025.08.01.668227 ( 2025 ). 34. ↵ E. A. Ostrowski , et al. , Genomic signatures of cooperation and conflict in the social amoeba . Curr. Biol . 25 , 1661 – 1665 ( 2015 ). OpenUrl CrossRef PubMed 35. A. Shumate , S. L. Salzberg , Liftoff: accurate mapping of gene annotations . Bioinformatics 37 , 1639 – 1643 ( 2021 ). OpenUrl CrossRef PubMed 36. ↵ T. K. Oleksyk , M. W. Smith , S. J. O’Brien , Genome-wide scans for footprints of natural selection . Philos. Trans. R. Soc. Lond. B Biol. Sci . 365 , 185 – 205 ( 2010 ). OpenUrl CrossRef PubMed 37. ↵ J. J. Vitti , S. R. Grossman , P. C. Sabeti , Detecting natural selection in genomic data . Annu. Rev. Genet . 47 , 97 – 120 ( 2013 ). OpenUrl CrossRef PubMed Web of Science 38. ↵ J. E. Strassmann , D. C. Queller , How social evolution theory impacts our understanding of development in the social amoeba Dictyostelium . Dev. Growth Differ . 53 , 597 – 607 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 39. ↵ J.A. Ågren , N. G. Davies , K. R. Foster , Enforcement is central to the evolution of cooperation . Nat Ecol Evol 3 , 1018 – 1029 ( 2019 ). OpenUrl PubMed 40. ↵ H.-I. Ho , S. Hirose , A. Kuspa , G. Shaulsky , Kin recognition protects cooperators against cheaters . Curr. Biol . 23 , 1590 – 1595 ( 2013 ). OpenUrl CrossRef PubMed 41. ↵ C. N. Jack , B. Adu-Oppong , M. Powers , D. C. Queller , J. E. Strassmann , Cost of movement in the multicellular stage of the social amoebae Dictyostelium discoideum and D. purpureum . Ethol. Ecol. Evol . 23 , 358 – 367 ( 2011 ). OpenUrl 42. ↵ C. N. Jack , et al. , Migration in the social stage of Dictyostelium discoideum amoebae impacts competition . PeerJ 3 , e1352 ( 2015 ). OpenUrl CrossRef PubMed 43. ↵ L. Holman , J. S. van Zweden , T. A. Linksvayer , P. d’Ettorre , Crozier’s paradox revisited: maintenance of genetic recognition systems by disassortative mating . BMC Evol. Biol . 13 , 211 ( 2013 ). OpenUrl CrossRef PubMed 44. ↵ D. Graur , W.-H. Li , Fundamentals of molecular evolution , 2nd Ed. ( Oxford University Press , 1999 ). 45. ↵ S. Takuno , T. Nishio , Y. Satta , H. Innan , Preservation of a pseudogene by gene conversion and diversifying selection . Genetics 180 , 517 – 531 ( 2008 ). OpenUrl Abstract / FREE Full Text 46. J. A. Fawcett , H. Innan , The role of gene conversion in preserving rearrangement hotspots in the human genome . Trends Genet . 29 , 561 – 568 ( 2013 ). OpenUrl CrossRef PubMed Web of Science 47. J. T. Martinsohn , A. B. Sousa , L. A. Guethlein , J. C. Howard , The gene conversion hypothesis of MHC evolution: a review . Immunogenetics 50 , 168 – 200 ( 1999 ). OpenUrl CrossRef PubMed Web of Science 48. ↵ M. D. Daugherty , S. E. Zanders , Gene conversion generates evolutionary novelty that fuels genetic conflicts . Curr. Opin. Genet. Dev . 58–59 , 49 – 54 ( 2019 ). OpenUrl CrossRef PubMed 49. ↵ J. C. Cavender , K. B. Raper , The Acrasieae in Nature. I. Isolation . Am. J. Bot . 52 , 294 – 296 ( 1965 ). OpenUrl CrossRef PubMed 50. ↵ R. C. Edgar , MUSCLE: multiple sequence alignment with high accuracy and high throughput . Nucleic Acids Res . 32 , 1792 – 1797 ( 2004 ). OpenUrl CrossRef PubMed Web of Science 51. ↵ B. Pfeifer , U. Wittelsbürger , S. E. Ramos-Onsins , M. J. Lercher , PopGenome: an efficient Swiss army knife for population genomic analyses in R . Mol. Biol. Evol . 31 , 1929 – 1936 ( 2014 ). OpenUrl CrossRef PubMed Web of Science 52. ↵ E. Paradis , pegas: an R package for population genetics with an integrated-modular approach . Bioinformatics 26 , 419 – 420 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 53. ↵ P. A. Ewels , et al. , The nf-core framework for community-curated bioinformatics pipelines . Nat. Biotechnol . 38 , 276 – 278 ( 2020 ). OpenUrl CrossRef PubMed 54. ↵ J. T. Robinson , et al. , Integrative genomics viewer . Nat. Biotechnol . 29 , 24 – 26 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 55. ↵ M. Kolmogorov , J. Yuan , Y. Lin , P. A. Pevzner , Assembly of long, error-prone reads using repeat graphs . Nat. Biotechnol . 37 , 540 – 546 ( 2019 ). OpenUrl CrossRef PubMed 56. ↵ R. Vaser , I. Sović , N. Nagarajan , M. Šikić , Fast and accurate de novo genome assembly from long uncorrected reads . Genome Res . 27 , 737 – 746 ( 2017 ). OpenUrl Abstract / FREE Full Text 57. ↵ M. Alonge , et al. , Automated assembly scaffolding using RagTag elevates a new tomato system for high-throughput genome editing . Genome Biol . 23 , 258 ( 2022 ). OpenUrl CrossRef PubMed 58. ↵ A. Mikheenko , V. Saveliev , P. Hirsch , A. Gurevich , WebQUAST: online evaluation of genome assemblies . Nucleic Acids Res . 51 , W601 – W606 ( 2023 ). OpenUrl CrossRef PubMed 59. ↵ F. A. Simão , R. M. Waterhouse , P. Ioannidis , E. V. Kriventseva , E. M. Zdobnov , BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs . Bioinformatics 31 , 3210 – 3212 ( 2015 ). OpenUrl CrossRef PubMed 60. ↵ K. Katoh , K. Misawa , K. Kuma , T. Miyata , MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform . Nucleic Acids Res . 30 , 3059 – 3066 ( 2002 ). OpenUrl CrossRef PubMed Web of Science 61. ↵ M. Siol , T. Coudoux , S. Ravel , S. De Mita , EggLib 3: A python package for population genetics and genomics . Mol. Ecol. Resour . 22 , 3176 – 3187 ( 2022 ). OpenUrl PubMed 62. ↵ T. Brůna , A. Lomsadze , M. Borodovsky , GeneMark-EP+: eukaryotic gene prediction with self-training in the space of genes and proteins . NAR Genom. Bioinform . 2 , qaa026 ( 2020 ). 63. ↵ K. Lagesen , et al. , RNAmmer: consistent and rapid annotation of ribosomal RNA genes . Nucleic Acids Res . 35 , 3100 – 3108 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 64. ↵ B. L. Cantarel , et al. , MAKER: an easy-to-use annotation pipeline designed for emerging model organism genomes . Genome Res . 18 , 188 – 196 ( 2008 ). OpenUrl Abstract / FREE Full Text 65. ↵ P. P. Chan , B. Y. Lin , A. J. Mak , T. M. Lowe , tRNAscan-SE 2.0: improved detection and functional classification of transfer RNA genes . Nucleic Acids Res . 49 , 9077 – 9096 ( 2021 ). OpenUrl CrossRef PubMed 66. ↵ The UniProt Consortium , UniProt: the Universal Protein Knowledgebase in 2025 . Nucleic Acids Res . 53 , D609 – D617 ( 2024 ). OpenUrl 67. ↵ M. Blum , et al. , InterPro: the protein sequence classification resource in 2025 . Nucleic Acids Res . 53 , D444 – d456 ( 2025 ). OpenUrl CrossRef PubMed 68. ↵ J. Pei , et al. , Bridging the Gap between Sequence and Structure Classifications of Proteins with AlphaFold Models . J. Mol. Biol . 436 , 168764 ( 2024 ). OpenUrl PubMed 69. ↵ M. Steinegger , J. Söding , MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets . Nat. Biotechnol . 35 , 1026 – 1028 ( 2017 ). OpenUrl CrossRef PubMed 70. ↵ M. Suyama , D. Torrents , P. Bork , PAL2NAL: robust conversion of protein sequence alignments into the corresponding codon alignments . Nucleic Acids Res . 34 , W609 – 12 ( 2006 ). OpenUrl CrossRef PubMed Web of Science 71. ↵ Z. Yang , PAML 4: Phylogenetic Analysis by Maximum Likelihood . Mol. Biol. Evol . 24 , 1586 – 1591 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 72. ↵ M. Smolka , et al. , Detection of mosaic and population-level structural variants with Sniffles2 . Nat. Biotechnol . 42 , 1571 – 1580 ( 2024 ). OpenUrl CrossRef PubMed 73. ↵ H. Li , Minimap2: pairwise alignment for nucleotide sequences . Bioinformatics 34 , 3094 – 3100 ( 2018 ). OpenUrl CrossRef PubMed 74. ↵ A. R. Quinlan , I. M. Hall , BEDTools: a flexible suite of utilities for comparing genomic features . Bioinformatics 26 , 841 – 842 ( 2010 ). OpenUrl CrossRef PubMed Web of Science View the discussion thread. Back to top Previous Next Posted October 23, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Evolutionary dynamics of the tgr gene family in Dictyostelium allows escape from Crozier’s Paradox Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Evolutionary dynamics of the tgr gene family in Dictyostelium allows escape from Crozier’s Paradox Rafael A. Polo Prieto , Sean McFadyen , Fabiola Medina , Mische Holland , Cathleen M.E. Broersma , Jenna Masterson , Courtney Masterson , Hui Hui Phua , Jennie Kuzdzal-Fick , Murray P. Cox , Tera C. Levin , Elizabeth A. Ostrowski bioRxiv 2025.10.22.684067; doi: https://doi.org/10.1101/2025.10.22.684067 Share This Article: Copy Citation Tools Evolutionary dynamics of the tgr gene family in Dictyostelium allows escape from Crozier’s Paradox Rafael A. Polo Prieto , Sean McFadyen , Fabiola Medina , Mische Holland , Cathleen M.E. Broersma , Jenna Masterson , Courtney Masterson , Hui Hui Phua , Jennie Kuzdzal-Fick , Murray P. Cox , Tera C. Levin , Elizabeth A. Ostrowski bioRxiv 2025.10.22.684067; doi: https://doi.org/10.1101/2025.10.22.684067 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Evolutionary Biology Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17690) Bioengineering (13892) Bioinformatics (41935) Biophysics (21451) Cancer Biology (18587) Cell Biology (25499) Clinical Trials (138) Developmental Biology (13377) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24318) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88601) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15152) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.