Full text
59,437 characters
· extracted from
preprint-html
· click to expand
A general model for genomic traits evolution | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results A general model for genomic traits evolution José Ignacio Arroyo , Alejandro Maass , Pablo A. Marquet , Geoffrey West , Christopher P. Kempes doi: https://doi.org/10.1101/2025.10.22.684021 José Ignacio Arroyo 1 Santa Fe Institute , Santa Fe NM, USA 2 Center for Mathematical Modeling, University of Chile and IRL-CNRS 2807 , Santiago, Chile Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: jiarroyo{at}santafe.edu Alejandro Maass 2 Center for Mathematical Modeling, University of Chile and IRL-CNRS 2807 , Santiago, Chile 3 Millennium Institute Center for Genome Regulation , Santiago, Chile 4 Department of Mathematical Engineering, University of Chile , Santiago, Chile Find this author on Google Scholar Find this author on PubMed Search for this author on this site Pablo A. Marquet 1 Santa Fe Institute , Santa Fe NM, USA 2 Center for Mathematical Modeling, University of Chile and IRL-CNRS 2807 , Santiago, Chile 5 Facultad de Ciencias Biológicas, Pontificia Universidad Católica de Chile , Santiago, Chile Find this author on Google Scholar Find this author on PubMed Search for this author on this site Geoffrey West 1 Santa Fe Institute , Santa Fe NM, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Christopher P. Kempes 1 Santa Fe Institute , Santa Fe NM, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Preview PDF Abstract A large body of data indicates that genomic elements, including gene families and entire genomes, exhibit diverse types of evolutionary dynamics. Several models have been developed to describe the dynamics of, for example, genome size; however, a simple deterministic model with interpretable parameters that could be useful for fitting a variety of data remains an open problem. Here, we show that (a continuous form of) the Breeder’s equation for the evolution of quantitative traits/characters leads to a general model that explains the dynamics of genomic elements generated by indel mutations (duplications, insertions, and deletions) and selection. Our framework consists of a general exponential-linear model that predicts at least six different types of dynamical behaviors, including exponential growth and decay. The equations fit data, such as evolution experiments across generations, field observational data of genome sizes across time, and data from phylogenetic reconstructions of ancestral states of genome size across millions of years of a variety of taxa, including viruses, bacteria, fungi, unicellular eukaryotes, plants, and animals, at both intra- and interspecific levels. To test the universality of the dynamics, we derived a dimensionless equation that enables the (re)scaling and subsequent collapse of all data for growth or decay onto a single universal curve. Thus, our model provides a general basis not only for explaining the dynamics in the size of gene families or whole genomes of populations across taxonomic and environmental scales, but also other traits, providing a foundation for further theoretical developments. Introduction Understanding how genomes evolve is fundamental to explaining the origins of genome diversity and complexity, and ultimately the diversity of life [ 1 ]. Genome evolution is a twostep process that involves the generation of changes through mutation and their subsequent fixation in populations, which can occur through random drift or natural selection. The mutations that change genome size are called indel mutations. The mechanisms that generate indels include the duplication, insertion, or deletion of genomic parts, a process that occurs during DNA replication or recombination, for example, through unequal crossing over [ 2 ]. Despite mutations often being referred to as “random error”, their occurrence is not uniformly distributed across the genome [ 3 , 4 ], and their average probability across the genome depends on factors such as temperature [ 5 ] or genome size itself [ 6 ]. The process of duplication, insertion, or deletion creates a set of related genes called gene families, which are groups of genes that share a common ancestor. Classic examples of gene families include CRISPR-Cas, the immune gene family, and transcription-related genes, which can often be a constraint requiring multiple copies of genes [ 7 – 9 ]. In contrast, genome streamlining through deletions and selection is known to occur in highly stable environments, such as in facultative or obligate endosymbiont bacteria, often leading to slow growth [ 10 ]. This process can lead to large gene families with thousands of copies when adaptive selection occurs, for example, in the cases of olfactory receptors or immune genes. Genomic material is not only generated endogenously but also introduced by horizontal gene transfer (HGT). HGT can occur within and between any taxonomic domain, including virus-to-virus. All of these types of indel mutations, which increase or decrease genome size, can vary from a single nucleotide to the entire genome. After an indel mutation occurs, its fixation in the population depends on its fitness effects and the effective population size [ 1 , 11 ]. The effects of fitness can be neutral, meaning that having an additional copy of an identical or slightly modified gene could cause no change in fitness and be randomly maintained in the genome or removed by selection, given that having extra copies represents a cost. Alternatively, the change in the number of copies of a gene can have a selective advantage, as having an extra copy of a gene and hence extra proteins can be advantageous. However, over time, the initially identical copies can diverge, and one of the copies acquires a slightly different function, such as being expressed in different stages of ontogeny or in different tissues, which may also be beneficial. Examples of this include the globin gene family, for example, which, after different rounds of duplication, have diversified to transport oxygen in different developmental stages and tissues [ 12 ]. Gene loss can also be adaptive. For example, facultative or obligate microbial endosymbionts lose many genes that the host can supply, and ultimately evolve very small genomes [ 13 ]. Data on genome size dynamics in the literature are limited to a few model species; nevertheless, a wide variety of dynamical behaviors can be observed (e.g., [ 14 – 16 ]). One of the best examples of data on genome size dynamics comes from the Long Term Evolution Experiment (LTEE) [ 17 ]. The LTEE is an experimental evolution of 12 E. coli populations that reached 50,000 generations by 2016 [ 17 ]. Some of the experiment’s results include an increase in genome size and a significant relationship between fitness and genome size in one of the populations [ 14 ]. Other experiments have also been performed in yeast, showing a trend to evolve toward diploidy [ 18 ]. Examples in multicellular organisms are scarce, but they are present in maize [ 19 ], for example. Also, recent studies have focused not only on aggregated properties such as total genome size, but also on the size (number of copies or copy-number for simplicity) of gene families [ 20 ]. In E. coli , for example, experiments have shown changes in the number of genes in gene families related to antibiotic resistance, responding to different concentrations of antibiotic [ 20 ]. At the interspecific level, data show that the amount of genome information, measured as genome size, has increased through evolutionary time, with a correlation between genome size and organismal complexity, from bacteria to plants and animals [ 21 – 23 ]. Several models have been proposed to explain changes in genome composition and size through time [ 6 , 24 – 28 ]. These models differ in their mathematical approaches and predictions. Stochastic models include, for example, processes of birth-and-death to explain the distribution of gene families sizes [ 25 , 29 , 30 ], coalescence theory [ 31 ] to explain the distribution of frequency of the presence of genes in individuals of a population, or Markov processes that predict genome streamlining [ 6 ], and deterministic models include that explain, for example, the relationship between indel rate and genome size [ 32 , 33 ]. However, there is relatively little testing of these models against data on genome size dynamics generated by experimental evolution experiments [ 17 ], field observations [ 34 ], or ancestral reconstructions [ 35 ]. Despite all this work, a deterministic, simple, and general first-principles model that could be used to fit data on the diverse evolutionary dynamics of genomic structures created by gene duplication-deletion processes (e.g., gene families) has not been developed. Here, we provide a minimal, deterministic model for the temporal dynamics of a group of related genes that evolve through processes of indel mutations, such as those affecting gene families or entire genomes. To do so, we use one of the most fundamental equations from theoretical evolutionary biology for the evolution of quantitative traits, Breeder’s equation, which incorporates the major forces of genome expansion and contraction rates described above, coupled with selection. We can fit our model to genome dynamics data and estimate key evolutionary parameters, enabling us to interpret the mechanisms underlying any trajectory of genome size change. Our work provides a foundation for future theoretical developments. Our approach represents an efficient theory, that is, a theory built on the minimal set of first principles and assumptions that satisfactorily explain the data [ 36 ], rather than on detailed models aimed at each individual species. This approach allows us to find a universal curve for genome changes over time and to compare the effective parameters of different species. Results Model derivation We are interested in a general and coarse-grained approach to changes in groups of related genes that evolve via indel mutations, such as gene families or entire genomes, measured as the number of genes or genome size over time, drawing on classical results from population genetics. To do so, we will argue that genomic components, such as gene families or genome size, are quantitative traits that can be modeled using the same theory developed initially for macroscopic traits, such as body size. This argument is based on the fact that the number of copies of a gene family or whole genome, which can be measured in the number of genes or base pairs (bp) or Mb, depends on the genes themselves, particularly the genes that encode for proteins and enzymes involved in replication and recombination [ 37 ]. Notice that units such as the number of genes or base pairs (bp) are discrete, but these units can be converted to megabases (Mb), which is a continuous unit. In particular, we will use the Breeder’s equation, a fundamental theorem of evolution. The Breeder’s equation was originally empirically formulated (Lush 1937) to quantify the change in the average trait of a population in the context of selective breeding to achieve genetic improvements in animals and plants, after selection in generation t + Δ t compared to before selection in generation t . Later, Breeder’s equation was theoretically derived in different ways ([ 38 – 40 ], and found to be comparable to other equations, such as Robertson-Price’s equation, re-expressed in alternative ways [ 41 ], and refined [ 40 ], to account, for example, for natural populations. The time elapsed from one generation to the next, meaning the time between a parent having an offspring and that offspring having an offspring. For simplicity, we will call the generation time g . The Breeder’s equation [ 42 ] can be formulated as a discrete equation, where Δ x is the selection response, which is the change in the average trait in offspring with respect to the entire population , i.e., . The term h 2 is the narrow sense heritability, which is a parameter that measures the proportion of phenotypic variation in a trait that is attributable to additive genetic variation, and S is the selection differential, which is a measure used in quantitative genetics and breeding to quantify the difference between the average trait value of a selected group of parent individuals and the average trait value of the entire population , i.e. . See Table 1 for all terms of this and subsequent equations. Following these definitions, this equation can be rewritten as, View this table: View inline View popup Download powerpoint Table 1. Relevant variables and parameters of the model. For example, if the average mass of an animal is , and a fraction with the highest weight is selected for reproduction, which has a weight is 120 kg , and the heritability h 2 is 0.5 (which as a practical approximation can be measured as the correlation between the weight of offspring and parents), the the predicted response is Δ x = 0.5 × (120-100)=10 kg, then the average weight of the offspring will be 110 kg . In equation (2) , h 2 is constant and does not depend on time, as is the trait value in generation t before selection. The average trait value of the population of generations t and ( t + g ), i.e. , and the average trait value of the population of t + g , i.e. depend on time. Given that is the average trait in generation t + g , then can be rewritten as, , the we can rewrite equation (2) as, The term h 2 , the realized heritability (narrow sense), is the proportion of phenotypic variation due to additive genetic variation and can be expressed as , where is the additive genetic variance (the total effect on a trait originating from one or more gene loci), and is the total phenotype variance. The additive genetic variance is part of the decomposition of the genetic variance: , where is the variance due to dominance effects, is the variance due to interactive effects. If assuming that and , we have and , then . The phenotypic variance can also be decomposed into genetic and environmental ): then . This last expression can be simplified even further. The genetic variance depends on the mutation rate [ 43 – 46 ]. The mutation rate includes not only indels, generated not only by replication or recombination but also by gains via horizontal gene transfer -HGT-, so this equation would also apply above the species level, since HGT occurs between phylogenetically distant species. The relationships between genetic variance and mutation rate is for haploid/asexual [ 46 ] and for diploid/sexual [ 44 ], where n is the number of loci, µ is the mutation rate, and v s is a spread parameter of the exponential quadratic fitness function. As can be seen from the previous definitions, the two expressions differ by a factor of 2 in sexual populations; hence, in both cases, the term is a set of constants times the mutation rate. For simplicity, to define generically a term for genetic variance for both asexual and sexual populations, we will define the term λ , then the general expression for genetic variance would be: , where λ = 2 nv s for asexual, λ = 4 nv s for sexual populations. It is also known that the practical definition of evolutionary rate (k) is related to the mutation rate ( µ ) and generation time ( g ): , then gk = u . It is worth mentioning that the expression for evolutionary rate should not be confounded with the similar but different expression for probability of fixation, such as Haldane’s (approximated) equation: p = 4 N e µs (where N e is the effective population size, s is the selection coefficient)[ 11 ]. Taking all these together, and using the general expression for genetic variance and mutation rate, , the term heritability can be reexpressed as . Replacing this expression for h 2 in our previous expression of equation (3) , we have, Multiplying both sides of equation (4) by, and taking the limit when (g is small - which is the common approach in population genetics theory to go from discrete to continuous time [ 48 , 49 ], that is g → 0, , For simplicity, we will rewrite equation (6) as, where we define , and . The general solution of this equation is, Equation (7) can describe dynamical behaviors of exponential growth or decay. We distinguish at least six different types of behaviors depending on whether the parameter values can be lower, equal, or greater than zero. These different general scenarios are summarized in Table 2 (and specific examples with fitted parameter values will be discussed and shown later in Figure 1 ). If , which biologically means that a trait (such as the number of genes in a gene family) is 0–implying de novo gene birth–, and i) a 0, there is exponential decay with a horizontal asymptote at 0. If , four possible behaviors are possible. If a < 0 and iii) the dynamics starts at x(0) (i.e., the curve intercepts x at x(0)) followed by an exponential growth, and iv) if the dynamics starts at x(0) followed by exponential decay intercepting t. If a > 0 and v) there is an exponential decay with a steady state when t tends to infinity (i.e., a vertical asymptote), and vi) if there is an asymptotic growth with a steady state when t tends to infinity (i.e., a vertical asymptote). From making the derivative equal to zero in equation (6) , we have that the steady state value of , is . View this table: View inline View popup Download powerpoint Table 2. Possible dynamical behaviors of the models described by equation (7) . Download figure Open in new tab Figure 1: Examples of the dynamical behaviors of the number of genes in a gene family or genome size. A) genome size in yeast (Gerstein et al. 2006), B, F) average number of duplicated genes in the genome in C. elegans (Farslow et al. 2015), C) genome size from bacteria to mammals (Sharow et al. 2006), D) genome size in a plant RNA virus (Willemsen et al. 2019), E) number of antibiotic resistance duplicated genes in E. coli (Pereira et al. 2021). Notice that in equation (7) , time t is in generations, but by multiplying by a factor ϕ corresponding to the time in other units per generation (e.g., minutes/generations), time can be measured in any other unit, and there would be a new parameter multiplying t ; , where the term tϕ would be time measured in minutes, for example. Dimensionless form A classic method for exhibiting and testing the generality of an equation is to express it in terms of rescaled dimensionless variables, which predict that a plot of all of the data collapses onto a single “universal” curve [e.g., [ 50 ]]. To derive a dimensionless for equation (7) , given that at both sides of the equation we have units of, for example, base pairs (bp), we simply divide each side of the equation by , we define dimensionless time as τ = at for time measured in generations or τ = ( a/ϕ ) tϕ for time measured in other units such as minutes, after dividing both sides of equation (7) by and after rearranging we can rewrite this equation as, , or simplified as, where ψ represents a value that can be 1 (if , a > 0) or 1 (if , a < 0). If ψ = 1, the curve describes asymptotic growth, and if ψ = −1, the curve describes asymptotic decay. For the particular case when , implying , the dimensionless equation would be, , where ψ represents a value that can be 1 (if ( h 2 /g ) > 0) or −1 (if ( h 2 /g ) < 0). If ψ = 1, the curve describes asymptotic growth, and if ψ = −1, the curve describes asymptotic decay. Equation (8) can be derived through different rearrangements besides dividing the original equation (7) by , but also by dividing by or simply by subtracting and rearranging. Fitting the model to data To test the fit of data to equation (7) , we collected examples from the literature from experimental and observational studies, for both number of base pairs and genes through evolutionary time, for diverse taxonomic groups, from viruses to mammals (see Methods). Among all the time series in our dataset of dynamics of gene copy number or genome size found in the literature, most of them fitted significantly to equation (7) . Some examples are shown in Figure 1 , and the remaining fits, together with the estimated parameters, and goodness-of-fit statistics are provided in the Supplementary Material ( https://www.dropbox.com/scl/fi/lb83mi3zoq2xig7p39lx4/SM_modgenomeevol_arroyo25.xlsx?rlkey=82fz19wqsuqpvq8psj7isk1l8&st=h6lp80do&dl=0 ). Each of the specific examples shown in Figure 1 corresponds to a case of all the possible regimes described in Table 2 , which include cases of growth and decay ( Figure 1 ). Among the examples, interesting cases include the LTEE experiment in E . coli [ 14 ], experimental evolution in yeast[ 18 ] where there is a tendency to evolve toward or remain diploid from either haploid, diploid, or tetraploid populations ( Figure 1A ), and cases of increase in copy number in E. coli , for example, in antibiotic resistance genes in response to different concentrations of antibiotics. There were cases that did not fit any of the general equations, meaning that the change in gene family or genome size was invariant, indicating neutral evolutionary dynamics. We do not show examples of this type here, as we focused on evolutionary dynamics driven by both (indel) mutation and selection. We scaled the data of curves representing genome size dynamics for examples in Figure 1 and others in our database, using both increasing and decreasing patterns, employing the dimensionless equation (8) for data collapse. This equation predicts that decreasing curves should converge to a single curve with an intercept of 2 on the y-axis, and decrease exponentially and asymptotically toward 1 ( Figure 2 , upper panel). On the other hand, increasing curves should collapse into a single curve with an intercept at zero and grow asymptotically toward 1 ( Figure 2 , lower panel). In both cases, curves with high goodness-of-fit collapsed into single universal curves ( Figure 2 ), demonstrating the universality of the model, despite the idiosyncrasies of the particular species or evolutionary paths that determined different parameter values. Download figure Open in new tab Figure 2: Examples of data collapse. Empty symbols correspond to the number of genes, and filled symbols to the number of base pairs, ×: viruses, □: prokaryotes, ◦:unicellular eukaryotes, △:animals, ⋄:plants. We scaled the data of all significant fits with a high goodness-of-fit (an R − sq. > 0.8) using the estimated parameters and the dimensionless forms of the equations, for all cases of linear and non-linear selection. When applying this method to different fitted data sets, it predicts a data collapse, i.e., all scaled data should follow the same single ‘universal’ curve predicted by the dimensionless equation. This classic methodology used in physics demonstrates that, despite the idiosyncrasies of the different dynamics, which are probably due to intrinsic (characteristics of the species) or extrinsic factors (environmental context), all of them are governed by the same general equation. Discussion Here we discuss the following: i) previous approaches to model genome size evolution, ii) why we used Breeder’s equation instead of other alternatives, iii) alternatives to Breeder’s equation, iv) limitations of our model, and v) extensions of the model. Most previous models have focused on formulating a model rather than deriving one, using stochastic models, and often on a single type of mechanism (horizontal or vertical) [ 6 , 24 – 27 ]. The approaches and predictions of these models vary widely, including birth- and-death, Markov chains, random walks, agent-based models, and network growth [ 6 , 27 , 30 , 51 , 52 ]. The predictions encompass the distribution of genes within a gene family, the scaling of rates and size, as well as the stochastic dynamics of genome decay, among others. The missing piece in this variety of approaches was a deterministic model derived from first principles to describe the dynamics of genome structure, including not only genome size but also the size of gene families or any other group of genes emerging from processes of duplication/insertion-deletion. Although we could have simply derived a phenomenological model for a deterministic dynamics of genomes, our goal was to derive a deterministic model from first principles. For example, using a model analogous to Newton’s heating or cooling law, assuming that genome size (x) change depends on indel mutation rate (m) and size itself but constrained by a minimum or maximum viable size in a specific environment ( x v ), (given that there is a minimum set of genes necessary for fundamental biological functions, as synthetic cells have demonstrated it [ 53 ], and on the other hand there are energetic costs of having too much genomic material.) Following this logic, we could formulate the phenomenological model: dx/dt = m ( x − x v ). However, we opted for a first-principles approach here. The importance of developing a first-principles-based theory explaining the origin and evolution of genomic complexity is fundamental, for example, to explain not just the origin of the complexity of genome structures but also transcriptomes and proteomes, as a basis for a theory explain the emerging knowledge of the structure and dynamics of genomes, and especially considering the vast amount of genomics data and upcoming big genome sequencing projects, such as the Earth Biogenome project [ 54 ]), and to better understand cancer, as the number of mutations increases with age (i.e., time) [ 55 ]. To derive a model from first principles, we used the simplest first-principles approach, Breeder’s equation. We chose to use Breeder’s equation for at least three reasons: i) because it is the most fundamental equation for describing the evolution of a trait. Other equations, such as Lande’s equation, are derived from this equation. ii) Using other equations, such as Lande’s equation or the adaptive dynamics equation, requires more assumptions, such as assuming quadratic polynomials for the fitness functions, some of which do not make sense. For example, assuming stabilizing or disruptive selection (which implies assuming an exponential quadratic polynomial) where only the variance changes and not the mean, results in an equation similar to the one obtained in equation (7) , but with this equation. iii) On the other hand, assuming, for example, directional selection, which implies using a linear polynomial, results in a linear dynamics of two parameters, which is statistically indistinguishable from an exponential dynamics, also of two parameters, so there is no way to distinguish statistically between directional selection or another type. Also important to mention is that we used the original form of Breeder’s equation here, but it can be formulated in alternative ways [ 56 ], all of which are reducible to a similar mathematical form. Beyond Breeder’s equation, at least three other slightly more complicated approaches, in the sense that require more assumptions, might have converged to a similar model. These alternative approaches are Lande’s equation and Adaptive Dynamics. Lande’s equation, on one hand, describes the change of the average of a quantitative trait as a function of heritability (which in turn is a function of mutation rate) and the selection gradient, which is derivative of the logarithm of the mean fitness with respect to the trait [ 57 ], and is equivalent to Breeder’s equation [ 58 ]. The (canonical) equation of Adaptive Dynamics, on the other hand, describes the change of average trait as a function of mutation rate, variance of the trait (of a mutant), (steady-state) population size, and the selection gradient, which is the derivative of the fitness with respect to the trait. As can be seen from the description, both theoretical approaches, evolutionary quantitative genetics and adaptive dynamics, share the commonality that the evolution of a trait depends on the mutation rate and the fitness gradient (i.e., selection) [ 59 ]. From these basic expressions, the ultimate form of the equation depends on the selection of the fitness function. Three basic approaches have been described in the literature: directional selection, when one extreme of the traits is favored and can be approached by a linear function. Alternatively, a quadratic approach can be used under stabilizing selection, when an intermediate trait value is favored, or diversifying (also called disruptive) when both extremes are favored [ 60 ]. Here, a normal (Gaussian) fitness function ([ 61 ], see also [ 58 ]). In adaptive dynamics, to conveniently derive a linear differential equation, a quadratic approximation[ 60 ] can be used for the fitness function. These two approaches, however, are all phenomenological and have not been derived from first principles yet. The alternative, based on first principles as a function of fitness, is possible for some traits. For example, in metabolic theory, the relationship between fitness and cell size in unicellular organisms, or body size in multicellular ones, has been derived. This derivation predicts a scaling relationship of the form r = r 0 m a , where r is fitness, commonly measured as the per capita population growth rate, r 0 is a constant, m is size, and a is an exponent that varies depending on the taxonomic group. The limitation of this approach is that although it is possible to establish a theoretical relationship between size and other traits, such as life history traits, and although many traits empirically correlate with size, there is no general model that relates r to any trait. Therefore, this first-principles approach is only helpful for a few traits. In addition to the above models, it is possible to use the classical allele frequency models. For example, the model dx/dt = sx (1 − x ) − ux , whee x is frequency of an allele, s is the selection coefficient, u is mutation rate [ 62 – 64 ]., which is equal in form to Levin’s metapopulation model [ 65 ], can be modified to describe the change in the number of alleles or genes. In our analysis, we found that most cases of both experimental and field data agreed with scenarios of selection, driven by different factors such as antibiotic resistance and stressed medium, among others. In natural populations, the direction of the change —decrease or increase — can be hypothesized to depend on the availability of resources. For example, species that live in hosts, such as symbiont bacteria [ 66 , 67 ], have reduced their genome because they can utilize specific processed metabolites from the host environment. This process is not exclusive to bacteria, but can also occur in multicellular eukaryotes, for example, in herbivore-plant interactions [ 68 ]. There were also many cases of invariant dynamics (see SM) that correspond to instances of just mutation but no selection, where there is a stochastic change in genome size with no trends toward increasing or decreasing. This model has limitations, which are evident from the fact that some patterns are not fitted by the model, such as dynamics with a logistic (S-) shape (e.g., [ 69 – 71 ]), or dynamics with alternative states, such as the transition from haploid to diploid in yeast [ 72 ]. This model could be extended in different ways. For example, by integrating this framework with recent developments on the limits of evolutionary rates of traits [ 73 ], or empirical studies that show a universal relation between mutation rate and genome size [ 74 ] and population size [ 75 ]. This model explains previous phenomenological attempts to explain scaling behavior in genomes, such as the scaling of regulatory genes, emerging from coupled exponential dynamics [ 76 ], for example. Additionally, this model serves as a basis for further development, as the parameters that define the steady state, for example, are related to other traits such as body size and temperature [ 77 ]. Tests of the dynamics of traits for a single lineage are scarce. Probably because the data is also scarce, except for a few traits such as cell size [ 78 ] or body size [ 79 ]. Not many experimental evolution experiments exist in addition to the LTTE in E. coli , as far as we know, there is no other in prokaryotes, but there are a few in unicellular eukaryotes (in yeast and algae)[ 18 , 80 ]. regarding observational evidence, there are not many time series of long-term observations of a trait, but there must be a few. A well-known example is brain size in humans [ 81 ] or at the interspecific level, body size in mammals, for example, [ 82 ]. Beyond biology, this model might be applied to any generic process of mutation and selection, such as the growth of companies, which have been argued to be similar to living organisms as they grow and reproduce, and are under selection pressure dictated by markets, consumers, etc. [ 83 ]. This strongly suggests that a generic equation or set of equations could be formulated to account for generic processes of endogenous creation/destruction with a feedback structure and exogenous influences. In conclusion, we formalized the process of evolution of the average size of a genomic component, which originates from processes of duplication/insertion-deletion and selection, commonly including gene families or the entire genome, as well as metabolic pathways, functional categories, and other components. We applied the Breeder’s equation to describe the dynamics of genomic quantitative characters that change due to indel mutations, such as the number of genes in a gene family or the number of genes in a genome, which are traditionally simply measured as genome size. The model is supported by data available in the literature from various gene families and bacterial genomes that have evolved under relatively constant environmental conditions, as well as by data from field observations spanning time. With this study, we demonstrate that a minimal deterministic model can explain the genome evolutionary dynamics of asexual populations. More importantly, constitutes a basis for extending this framework by integrating it with other theories and making new predictions. Methods Data We searched for data on the change in the number of genes within a gene family or the number of base pairs (i.e., genome size) over time for a single species or groups of species using Google and Google Scholar. We found articles reporting experimental or observational data, or dynamics inferred from phylogenetic ancestral reconstructions of the evolution of the number of base pairs (genome size) or genes in a gene family in all major taxonomic groups in the tree of life: viruses, bacteria, unicellular eukaryotes, fungi, plants, and animals. Some examples include the Long-Term Evolution Experiment (LTEE) in E. coli [ 14 , 84 ], experimental evolution of yeast genome size under different concentrations of stressors[ 18 ], or evolution of the number of duplicated antibiotic resistance genes in response to different antibiotic concentrations [ 20 ] (Supplementary Table 1). In many studies, for the same species, there were many populations corresponding to biological replicates that had slightly similar responses. We fitted our model to each population. In our database, we assigned a unique ID to each population, indicating the first author of the article, the year of the publication, the figure and panel, and the specific population, naming it from top to bottom as population 1,2,3, etc., using the following nomenclature “authoryearfigurepanelpopulation”. The total dataset consisted of 31 articles, from which 101 representative time series, each of the size of a gene family or the entire genome, were obtained. Most data on genome size were transformed into Mbp (1 Mbp = 10 6 bp ). Some of the genome size data was measured in fluorescence. For simplicity, we just converted the data in the example of yeast, in Figure 1 , to Mbp, simply considering that the haploid genome size for yeast is approx 12 Mbp and for a diploid is approx 24 Mbp. There were other cases that could also be converted from cell size, for example, as seen in Gallet et al. 2017 [ 14 ]. For some cases of the time series in our database of lineages above the species level, it is simpler to transform absolute time to generations because the generation times are not too diverse (e.g., Proboscidea, salamanders), but in other cases, such as a time series from bacteria to mammals, the generation time is too diverse. Also, measuring time for viruses in generations is complicated (but see (Yarwood 1956)[ 85 ]), to be transformed to generations, so we transformed time in years or weeks into days. Fitting To estimate the parameters of the exponential models derived from equation (7) ; , we defined the reduced parameter q, as . Then equation (7) becomes . After estimating q we can estimate . To estimate the parameters q, a , and , we used the Levenberg-Marquardt (LM) non-linear regression method as implemented in the function “nlsLM” of the “minpack.lm” package [ 86 ] in the R language [ 87 ]. The starting parameter values that were used are reported in the Supplementary Material. The LM algorithm was run for a maximum of 100 iterations. To select the best fitting model, we used relative likelihood L ( M i | data ), where M i stands for the likelihood of model i. The relative likelihood was calculated as , where AIC min is the AIC of the model with the lowest AIC. The relative likelihood can be interpreted as being proportional to the probability that the i th model minimizes the (estimated) information loss [ 88 ]. A relative likelihood 2 was considered significant [ 89 ]. Data collapse is a classical approach in statistical physics that demonstrates how different curves responding to the same general equation can be plotted in a generic way, with dimensionless parameters all equal to 1, thereby showing that all of them follow the same behavior. Here, for data collapse, we included a few examples of not only fits that were significant but also those with an R-sq. > 0.9. Acknowledgements JIA & CK were supported by SFI and NSF Award Number 2133863. JIA & AM were supported by the Center for Mathematical Modeling (CMM) grant FB210005, which is a Basal fund for centers of excellence from ANID-Chile. AM was supported by the Center for Genome Regulation, which is the Millennium Institute Project ICN2021 044, supported by the Millennium Scientific Initiative of the Ministry of Economy, Development and Tourism (Chile), and Grant Exploración number 13220002. References 1. ↵ Lynch , M. & Walsh , B. The origins of genome architecture ( Sinauer Associates Sunderland, MA , 2007 ). 2. ↵ Zhang , J. Evolution by gene duplication: an update . Trends in ecology & evolution 18 , 292 – 298 ( 2003 ). OpenUrl 3. ↵ Domingues , V. Mutations are not random . Nature Ecology & Evolution 7 , 5 – 5 ( 2023 ). OpenUrl PubMed 4. ↵ Monroe , G. Are mutations random? 2023 . 5. ↵ Waldvogel , A.-M. & Pfenninger , M. Temperature dependence of spontaneous mutation rates . Genome Research 31 , 1582 – 1589 ( 2021 ). OpenUrl Abstract / FREE Full Text 6. ↵ Fischer , S. , Bernard , S. , Beslon , G. & Knibbe , C. A model for genome size evolution . Bulletin of mathematical biology 76 , 2249 – 2291 ( 2014 ). OpenUrl CrossRef PubMed 7. ↵ Stevenson , B. S. & Schmidt , T. M. Life history implications of rRNA gene copy number in Escherichia coli . Applied and environmental microbiology 70 , 6670 – 6677 ( 2004 ). OpenUrl Abstract / FREE Full Text 8. Roller , B. R. , Stoddard , S. F. & Schmidt , T. M. Exploiting rRNA operon copy number to investigate bacterial reproductive strategies . Nature microbiology 1 , 1 – 7 ( 2016 ). OpenUrl 9. ↵ Metzl-Raz , E. , Kafri , M. , Yaakov , G. & Barkai , N. Gene transcription as a limiting factor in protein production and cell growth . G3: Genes, Genomes, Genetics 10 , 3229 – 3242 ( 2020 ). OpenUrl 10. ↵ Partensky , F. & Garczarek , L. Prochlorococcus: advantages and limits of minimalism . Annual review of marine science 2 , 305 – 331 ( 2010 ). OpenUrl CrossRef PubMed 11. ↵ McCandlish , D. M. & Stoltzfus , A. Modeling evolution using the probability of fixation: history and implications . The Quarterly review of biology 89 , 225 – 252 ( 2014 ). OpenUrl CrossRef PubMed 12. ↵ Hardison , R. Hemoglobins from bacteria to man: evolution of different patterns of gene expression . Journal of Experimental Biology 201 , 1099 – 1117 ( 1998 ). OpenUrl Abstract 13. ↵ Siozios , S. et al. Genome dynamics across the evolutionary transition to endosymbiosis . Current Biology 34 , 5659 – 5670 ( 2024 ). OpenUrl CrossRef PubMed 14. ↵ Gallet , R. et al. The evolution of bacterial cell size: the internal diffusion-constraint hypothesis . The ISME journal 11 , 1559 – 1568 ( 2017 ). OpenUrl PubMed 15. Robillard , É. , Le Rouzic , A. , Zhang , Z. , Capy , P. & Hua-Van , A. Experimental evolution reveals hyperparasitic interactions among transposable elements . Proceedings of the National Academy of Sciences 113 , 14763 – 14768 ( 2016 ). OpenUrl Abstract / FREE Full Text 16. ↵ Willemsen , A. , Zwart , M. P. , Higueras , P. , Sardanyes , J. & Elena , S. F. Predicting the stability of homologous gene duplications in a plant RNA virus . Genome Biology and Evolution 8 , 3065 – 3082 ( 2016 ). OpenUrl CrossRef PubMed 17. ↵ Tenaillon , O. et al. Tempo and mode of genome evolution in a 50,000-generation experiment . Nature 536 , 165 – 170 ( 2016 ). OpenUrl CrossRef PubMed 18. ↵ Gerstein , A. C. , Chun , H.-J. E. , Grant , A. & Otto , S. P. Genomic convergence toward diploidy in Saccharomyces cerevisiae . PLoS genetics 2 , e145 ( 2006 ). OpenUrl 19. ↵ Roessler , K. et al. The genome-wide dynamics of purging during selfing in maize . Nature plants 5 , 980 – 990 ( 2019 ). OpenUrl PubMed 20. ↵ Pereira , C. , Larsson , J. , Hjort , K. , Elf , J. & Andersson , D. I. The highly dynamic nature of bacterial heteroresistance impairs its clinical detection . Communications biology 4 , 521 ( 2021 ). OpenUrl PubMed 21. ↵ Sharov , A. A. Genome increase as a clock for the origin and evolution of life . Biology Direct 1 , 1 – 10 ( 2006 ). OpenUrl PubMed 22. Milinkovitch , M. C. , Helaers , R. , Depiereux , E. , Tzika , A. C. & Gabaldón , T. 2 × genomes-depth does matter . Genome biology 11 , 1 – 12 ( 2010 ). OpenUrl CrossRef 23. ↵ Dolgonosov , B. Knowlledge production, hyperbolic growth and phase transitions in biosystems . Studia Universitatis Babes-Bolyai, Geographia 57 ( 2012 ). 24. ↵ Sela , I. , Wolf , Y. I. & Koonin , E. V. Theory of prokaryotic genome evolution . Proceedings of the National Academy of Sciences 113 , 11399 – 11407 ( 2016 ). OpenUrl Abstract / FREE Full Text 25. ↵ Zhou , Y. & Mishra , B. in Modelling in Molecular Biology 287 – 304 ( Springer , 2004 ). 26. Koonin , E. V. et al. Birth and death models of genome evolution . Power Laws, scale-free networks and genome biology , 65 – 85 ( 2006 ). 27. ↵ Bentkowski , P. , Van Oosterhout , C. & Mock , T. A model of genome size evolution for prokaryotes in stable and fluctuating environments . Genome biology and evolution 7 , 2344 – 2351 ( 2015 ). OpenUrl CrossRef PubMed 28. ↵ Wang , J. , Chen , P.-J. , Wang , G. J. & Keller , L. Chromosome size differences may affect meiosis and genome size . Science 329 , 293 – 293 ( 2010 ). OpenUrl Abstract / FREE Full Text 29. ↵ Hsieh , L.-C. , Luo , L. , Ji , F. & Lee , H. Minimal model for genome evolution and growth . Physical review letters 90 , 018101 ( 2003 ). OpenUrl CrossRef PubMed 30. ↵ Karev , G. P. , Berezovskaya , F. S. & Koonin , E. V. Modeling genome evolution with a diffusion approximation of a birth-and-death process . Bioinformatics 21 , iii12 – iii19 ( 2005 ). OpenUrl CrossRef PubMed 31. ↵ Baumdicker , F. , Hess , W. R. & Pfaffelhuber , P. The infinitely many genes model for the distributed genome of bacteria . Genome biology and evolution 4 , 443 – 456 ( 2012 ). OpenUrl CrossRef PubMed 32. ↵ Shadrin , A. A. & Parkhomchuk , D. V. Drake’s rule as a consequence of approaching channel capacity . Naturwissenschaften 101 , 939 – 954 ( 2014 ). OpenUrl PubMed 33. ↵ Petrov , D. A. Mutational equilibrium model of genome size evolution . Theoretical population biology 61 , 531 – 544 ( 2002 ). OpenUrl CrossRef PubMed Web of Science 34. ↵ King , A. J. , van Gorkom , T. , van der Heide , H. G. , Advani , A. & van der Lee , S. Changes in the genomic content of circulating Bordetella pertussis strains isolated from the Netherlands, Sweden, Japan and Australia: adaptive evolution or drift? BMC genomics 11 , 1 – 15 ( 2010 ). OpenUrl CrossRef PubMed 35. ↵ Boscaro , V. et al. Parallel genome reduction in symbionts descended from closely related free-living bacteria . Nature ecology & evolution 1 , 1160 – 1167 ( 2017 ). OpenUrl PubMed 36. ↵ Marquet , P. A. et al. On theory in ecology . BioScience 64 , 701 – 710 ( 2014 ). OpenUrl CrossRef Web of Science 37. ↵ Hastings , P. J. , Lupski , J. R. , Rosenberg , S. M. & Ira , G. Mechanisms of change in gene copy number . Nature Reviews Genetics 10 , 551 – 564 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 38. ↵ Queller , D. C. Fundamental theorems of evolution . The American Naturalist 189 , 345 – 353 ( 2017 ). OpenUrl CrossRef PubMed 39. Heywood , J. S. An exact form of the breeder’s equation for the evolution of a quantitative trait under natural selection . Evolution 59 , 2287 – 2298 ( 2005 ). OpenUrl CrossRef PubMed Web of Science 40. ↵ Morrissey , M. B. , Kruuk , L. E. & Wilson , A. J. The danger of applying the breeder’s equation in observational studies of natural populations . Journal of evolutionary biology 23 , 2277 – 2288 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 41. ↵ Cooper , M. & Messina , C. D. Breeding crops for drought-affected environments and improved climate resilience . The Plant Cell 35 , 162 – 186 ( 2023 ). OpenUrl CrossRef PubMed 42. ↵ Lush , J. L. Animal breeding plans . ( 1937 ). 43. ↵ Zeng , Z.-B. & Cockerham , C. C. Mutation models and quantitative genetic variation . Genetics 133 , 729 – 736 ( 1993 ). OpenUrl Abstract / FREE Full Text 44. ↵ Houle , D. The maintenance of polygenic variation in finite populations . Evolution 43 , 1767 – 1780 ( 1989 ). OpenUrl CrossRef PubMed Web of Science 45. Lynch , M. The rate of polygenic mutation . Genetics Research 51 , 137 – 148 ( 1988 ). OpenUrl 46. ↵ Turelli , M. Heritable genetic variation via mutation-selection balance: Lerch’s zeta meets the abdominal bristle . Theoretical population biology 25 , 138 – 193 ( 1984 ). OpenUrl CrossRef PubMed Web of Science 47. Fu , Y.-X. Estimating mutation rate and generation time from longitudinal samples of DNA sequences . Molecular Biology and Evolution 18 , 620 – 626 ( 2001 ). OpenUrl PubMed Web of Science 48. ↵ Felsenstein , J. Theoretical evolutionary genetics joseph felsenstein . University of Washington, Seattle ( 2005 ). 49. ↵ Bürger , R. in The Mathematics of Darwin’s Legacy 67 – 89 ( Springer , 2011 ). 50. ↵ West , G. B. , Brown , J. H. & Enquist , B. J. A general model for ontogenetic growth . Nature 413 , 628 – 631 ( 2001 ). OpenUrl CrossRef GeoRef PubMed Web of Science 51. ↵ He , Y. , Tian , S. & Tian , P. Fundamental asymmetry of insertions and deletions in genomes size evolution . Journal of Theoretical Biology 482 , 109983 ( 2019 ). OpenUrl CrossRef PubMed 52. ↵ Solé , R. V. , Pastor-Satorras , R. , Smith , E. & Kepler , T. B. A model of large-scale proteome evolution . Advances in Complex Systems 5 , 43 – 54 ( 2002 ). OpenUrl 53. ↵ Hutchison III , C. A. et al. Design and synthesis of a minimal bacterial genome . Science 351 , aad6253 ( 2016 ). OpenUrl Abstract / FREE Full Text 54. ↵ Lewin , H. A. et al. Earth BioGenome Project: Sequencing life for the future of life . Proceedings of the National Academy of Sciences 115 , 4325 – 4333 ( 2018 ). OpenUrl Abstract / FREE Full Text 55. ↵ Cagan , A. et al. Somatic mutation rates scale with lifespan across mammals . Nature 604 , 517 – 524 ( 2022 ). OpenUrl CrossRef PubMed 56. ↵ Rutkoski , J. E. A practical guide to genetic gain . Advances in agronomy 157 , 217 – 249 ( 2019 ). OpenUrl CrossRef 57. ↵ Lande , R. Natural selection and random genetic drift in phenotypic evolution . Evolution , 314 – 334 ( 1976 ). 58. ↵ Kopp , M. & Matuszewski , S. Rapid evolution of quantitative traits: theoretical perspectives . Evolutionary Applications 7 , 169 – 191 ( 2014 ). OpenUrl PubMed 59. ↵ Abrams , P. A. , Harada , Y. & Matsuda , H. On the relationship between quantitative genetic and ESS models . Evolution , 982 – 985 ( 1993 ). 60. ↵ Lande , R. & Arnold , S. J. The measurement of selection on correlated characters . Evolution , 1210 – 1226 ( 1983 ). 61. ↵ Bürger , R. & Lynch , M. Evolution and extinction in a changing environment: a quantitative-genetic analysis . Evolution 49 , 151 – 163 ( 1995 ). OpenUrl CrossRef PubMed Web of Science 62. ↵ Wright , S. Evolution in Mendelian populations . Genetics 16 , 97 ( 1931 ). OpenUrl FREE Full Text 63. Wright , S. et al. Adaptation and selection . Genetics, paleontology and evolution 365 , 389 ( 1949 ). OpenUrl 64. ↵ Nei , M. in Molecular Evolutionary Genetics 327 – 351 ( Columbia University Press , New York Chichester, West Sussex , 1987 ). isbn: 9780231886710 . doi: 10.7312/nei-92038-013 . OpenUrl CrossRef 65. ↵ Levins , R. Some demographic and genetic consequences of environmental heterogeneity for biological control . Bulletin of the ESA 15 , 237 – 240 ( 1969 ). OpenUrl 66. ↵ Moran , N. A. , McLaughlin , H. J. & Sorek , R. The dynamics and time scale of ongoing genomic erosion in symbiotic bacteria . Science 323 , 379 – 382 ( 2009 ). OpenUrl Abstract / FREE Full Text 67. ↵ McCutcheon , J. P. & Moran , N. A. Extreme genome reduction in symbiotic bacteria . Nature Reviews Microbiology 10 , 13 – 26 ( 2012 ). OpenUrl CrossRef PubMed 68. ↵ Greenhalgh , R. et al. Genome streamlining in a minute herbivore that manipulates its host plant . Elife 9 , e56689 ( 2020 ). OpenUrl CrossRef PubMed 69. ↵ Langmüller , A. M. , Haller , B. C. , Nolte , V. & Schlötterer , C. Purifying Selection Shapes the Dynamics of P-element Invasion in Drosophila Populations . bioRxiv , 2024 – 12 ( 2024 ). 70. Beaumont , M. , Selvaraju , D. , Pianezza , R. & Kofler , R. Rapid emergence of hyperparasitic elements may stop P-element invasions in the absence of a piRNA-based host defence . bioRxiv , 2025 – 03 ( 2025 ). 71. ↵ Pal , A. & Andersson , D. I. Bacteria can compensate the fitness costs of amplified resistance genes via a bypass mechanism . Nature Communications 15 , 2333 ( 2024 ). OpenUrl PubMed 72. ↵ Gerstein , A. C. & Otto , S. P. Cryptic fitness advantage: diploids invade haploid populations despite lacking any apparent advantage as measured by standard fitness assays . PloS one 6 , e26599 ( 2011 ). OpenUrl CrossRef PubMed 73. ↵ García-Pintos , L. P. Limits on the evolutionary rates of biological traits . Scientific Reports 14 , 11314 ( 2024 ). OpenUrl PubMed 74. ↵ Gregory , T. R. Insertion–deletion biases and the evolution of genome size . Gene 324 , 15 – 34 ( 2004 ). OpenUrl CrossRef PubMed Web of Science 75. ↵ Krašovec , R. et al. Spontaneous mutation rate is a plastic trait associated with population density across domains of life . PLoS biology 15 , e2002731 ( 2017 ). OpenUrl CrossRef PubMed 76. ↵ Van Nimwegen , E. Scaling Laws in the Functional Content of Genomes . Trends Genet . 19 , 479 – 84 ( 2003 ). OpenUrl CrossRef PubMed Web of Science 77. ↵ Brown , J. H. , Gillooly , J. F. , Allen , A. P. , Savage , V. M. & West , G. B. Toward a metabolic theory of ecology . Ecology 85 , 1771 – 1789 ( 2004 ). OpenUrl CrossRef Web of Science 78. ↵ Lenski , R. E. & Travisano , M. Dynamics of adaptation and diversification: a 10,000-generation experiment with bacterial populations . Proceedings of the National Academy of Sciences 91 , 6808 – 6814 ( 1994 ). OpenUrl Abstract / FREE Full Text 79. ↵ Montgomery , S. H. et al. The evolutionary history of cetacean brain and body size . Evolution 67 , 3339 – 3353 ( 2013 ). OpenUrl CrossRef PubMed 80. ↵ Malerba , M. E. , Ghedini , G. & Marshall , D. J. Genome size affects fitness in the eukaryotic alga Dunaliella tertiolecta . Current Biology 30 , 3450 – 3456 ( 2020 ). OpenUrl CrossRef PubMed 81. ↵ DeSilva , J. M. , Traniello , J. F. , Claxton , A. G. & Fannin , L. D. When and why did human brains decrease in size? A new change-point analysis and insights from brain evolution in ants . Frontiers in Ecology and Evolution 9 , 742639 ( 2021 ). OpenUrl 82. ↵ Smith , F. A. et al. The evolution of maximum body size of terrestrial mammals . Science 330 , 1216 – 1219 ( 2010 ). OpenUrl Abstract / FREE Full Text 83. ↵ Zhang , J. , Kempes , C. P. , Hamilton , M. J. & West , G. B. Scaling laws and a general theory for the growth of companies . arXiv preprint arXiv: 2109.10379 ( 2021 ). 84. ↵ Barrick , J. E. et al. Genome evolution and adaptation in a long-term experiment with Escherichia coli . Nature 461 , 1243 – 1247 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 85. ↵ Yarwood , C. E. Generation time and the biological nature of viruses . The American Naturalist 90 , 97 – 102 ( 1956 ). OpenUrl CrossRef 86. ↵ Elzhov , T. V. et al. Package ‘minpack. lm’ . Title R Interface Levenberg-Marquardt Nonlinear Least-Sq. Algorithm Found MINPACK Plus Support Bounds ( 2016 ). 87. ↵ Computing, R . et al. R: A language and environment for statistical computing. Vienna: R Core Team ( 2013 ). 88. ↵ Burnham , K. P. & Anderson , D. R. Multimodel inference: understanding AIC and BIC in model selection . Sociological methods & research 33 , 261 – 304 ( 2004 ). OpenUrl CrossRef PubMed Web of Science 89. ↵ Murtaugh , P. A. In defense of P values . Ecology 95 , 611 – 617 ( 2014 ). OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted October 23, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following A general model for genomic traits evolution Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share A general model for genomic traits evolution José Ignacio Arroyo , Alejandro Maass , Pablo A. Marquet , Geoffrey West , Christopher P. Kempes bioRxiv 2025.10.22.684021; doi: https://doi.org/10.1101/2025.10.22.684021 Share This Article: Copy Citation Tools A general model for genomic traits evolution José Ignacio Arroyo , Alejandro Maass , Pablo A. Marquet , Geoffrey West , Christopher P. Kempes bioRxiv 2025.10.22.684021; doi: https://doi.org/10.1101/2025.10.22.684021 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Evolutionary Biology Subject Areas All Articles Animal Behavior and Cognition (7641) Biochemistry (17707) Bioengineering (13904) Bioinformatics (41988) Biophysics (21466) Cancer Biology (18615) Cell Biology (25530) Clinical Trials (138) Developmental Biology (13387) Ecology (19921) Epidemiology (2067) Evolutionary Biology (24335) Genetics (15615) Genomics (22521) Immunology (17748) Microbiology (40424) Molecular Biology (17194) Neuroscience (88665) Paleontology (667) Pathology (2839) Pharmacology and Toxicology (4827) Physiology (7650) Plant Biology (15160) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9826) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.