Full text
50,333 characters
· extracted from
preprint-html
· click to expand
marApp: An R package and web portal to calculate mutations- and genetic diversity-area relationship for conservation | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results marApp : An R package and web portal to calculate mutations- and genetic diversity-area relationship for conservation Meixi Lin , Kristy Mualim , Oliver Selmoni , View ORCID Profile Moises Exposito-Alonso doi: https://doi.org/10.1101/2025.09.09.675155 Meixi Lin 1 Department of Integrative Biology, University of California Berkeley , Berkeley, USA 2 Department of Plant Biology, Carnegie Institution for Science , Stanford, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kristy Mualim 1 Department of Integrative Biology, University of California Berkeley , Berkeley, USA 2 Department of Plant Biology, Carnegie Institution for Science , Stanford, California, USA 3 Department of Biology, Stanford University , Stanford, California, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Oliver Selmoni 2 Department of Plant Biology, Carnegie Institution for Science , Stanford, California, USA 4 Department of Geography, University of Zurich , Zurich, Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Moises Exposito-Alonso 1 Department of Integrative Biology, University of California Berkeley , Berkeley, USA 2 Department of Plant Biology, Carnegie Institution for Science , Stanford, California, USA 3 Department of Biology, Stanford University , Stanford, California, USA 5 Department of Global Ecology, Carnegie Institution for Science , Stanford, California, USA 6 Howard Hughes Medical Institute, University of California Berkeley , Berkeley, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Moises Exposito-Alonso For correspondence: moisesexpositoalonso{at}gmail.com Abstract Full Text Info/History Metrics Preview PDF Abstract International conservation policies including the Kunming-Montreal Global Biodiversity Framework now consider genetic diversity of wild species in their targets. However, scalable, theory-driven tools to assess and predict genetic diversity loss are still emerging, limiting their use in conservation planning. Analogous to the species-area relationship, recent work has shown that genetic diversity scales with area, described by the mutations–area relationship (MAR) or genetic–diversity–area relationship (GDAR), can approximate the genetic diversity loss in a species from habitat reduction. To enable application of these insights for conservation practitioners and policy makers, we present the mar R package and marApp web portal, a fast and user-friendly tool for MAR/GDAR analysis. The mar package connects genetic diversity patterns in space to ecological theory, and automates steps from reading genetic and geographic data to simulating various habitat extinction scenarios using MAR/GDAR. The mar package estimates only short-term genetic diversity, providing a lower-limit for the far greater long-term genetic losses, underscoring the need to maximize present-day habitat conservation before it becomes irreversible. As a case example, we showcase predictions of coral ( Acropora sp.) genetic diversity loss from reef coverage impacts. We demonstrate the usage of marApp without requirements for prior genetics or coding experiences, and provide downloadable reports for conservation. Introduction Genetic diversity, the amount of genetic variations within species, is the most fundamental element of biodiversity ( May 1994 ; Gaston 2010 ). Genetic diversity provides the raw material on which evolutionary forces act, and is key to species adaptation to new environments and survival in small populations ( Frankham 1995 ). However, genetic diversity has long been overlooked in international conservation policies ( Laikre et al. 2020 ). Only in 2022, the Kunming – Montreal Global Biodiversity Framework (GBF) adopted the target “to maintain and restore the genetic diversity within and between populations of native, wild and domesticated species to maintain their adaptive potential” ( CBD 2022 ). While genetic data is now available for thousands of species and millions of individuals, a significant gap remains within many biodiversity hotspots ( Paz-Vinas et al. 2023 ), highlighting the need for theories and scalable approaches to quantify existing genetic diversity and predict diversity loss across species at landscape scales with simple and interpretable percentage-based estimates. However, these approaches are still in their nascent phases ( Leigh et al. 2021 ; Hoban et al. 2023 ), limiting the capacity for evidence-based conservation decision-making. In conservation practices, the macro-ecological framework of species-area relationship (SAR) has been widely applied to estimate rates of species diversity loss ( Matthews and Whittaker 2015 ). Interestingly, this principle of “commonness of rarity” applies to not only species in a community but also mutations in a population ( Hu, He, and Hubbell 2006 ; Exposito-Alonso et al. 2022 ). This fundamental similarity between theoretical population genetics and neutral macroecology led to the discovery of scaling relationships between genetic diversity and area, providing theoretical foundations for genetic diversity estimates and predictions in conservation ( Exposito-Alonso et al. 2022 ; Mualim et al. 2024 ). Known as the mutations-area relationship (MAR) or the genetic-diversity-area relationship (GDAR), genetic diversity, either measured as the number of polymorphic/segregating sites (M), or the nucleotide diversity (θ π or GD), increases with the area surveyed (A), following a power-law function: M = cA z and θ π = cA z . The power of this simple mathematical approach, is that upon rapid habitat area loss, one can predict the proportional reduction of genetic diversity by rearranging the MAR equation: 1-(1-A loss ) z ( Exposito-Alonso et al. 2022 ). We introduce the main modules in the mar package using the global Arabidopsis thaliana example: from importing data, fitting species abundance distributions for genetic diversity, to calculating the scaling relationship between genetic diversity and sampled area, and simulating genetic extinction upon habitat loss. To enhance accessibility, we developed marApp , a shiny-based web portal that enables users to explore MAR/GDAR calculations and run extinction simulations on their own datasets, with step-by-step explanations. It also generates downloadable conservation reports to help managers estimate genetic diversity loss or set habitat protection targets with a single click. The mar package The R package mar , available on GitHub ( https://github.com/meixilin/mar ), implements methods for calculating mutations-area relationships (MAR) and genetic-diversity-area relationships (GDAR). Additionally, tools to fit species-abundance distributions (SAD) in genetic data are also provided. The mar package depends on the SeqArray package (≥ v.1.28.0) for genomic data input and manipulation ( Zheng et al. 2017 ), the raster package for spatial analysis and sampling ( Hijmans 2025 ), the sars package for fitting MAR/GDAR relationships ( Matthews et al. 2019 ), and the sads package for SAD model fitting ( Prado, Dantas Miranda, and Chalom 2024 ). Documentation and examples can be found on GitHub. The workflow in mar is implemented through the MARPIPELINE function, which comprises six main steps ( Fig. 1 ): (1) data : read genotype and coordinate files, (2) gm : optional data subsetting, and create genomaps data structure to be used for the sfs , mar and ext steps, (3) sfs : calculate the site-frequency spectrums (SFS) and fit species abundance distribution models on genetic data, (4) mar : build the MAR/GDAR relationship by sampling the genomaps object, (5) ext : simulate the extinction process on the genomaps object and predict genetic diversity loss, and (6) plot : automatic plotting of MARPIPELINE steps. Apart from the plot step, which requires analysis steps executed beforehand, each step in the MARPIPELINE is fully modularized and can be executed separately for fine-tuning and time-saving. Below we introduce the main steps and core functions in MARPIPELINE . Download figure Open in new tab Fig. 1. Overview of the R package mar . The main steps in the workflow function MARPIPELINE are shown. All functions provided by the mar package are grouped by the workflow steps. Data processing and genomaps object creation The data step in MARPIPELINE requires two essential data components: coordinate and genotype of the sequenced samples. The coordinate file should be a 3-column text file containing sample identifier, longitude, and latitude (processed via lonlat_parser function). Genotype data can be provided in three formats: plain text files formatted as a matrix of alternative allele counts by sample and genomic position ( text_parser ), VCF files ( vcf_parser ), or PLINK files ( plink_parser ). To avoid model assumption violation, we require that genotypes should only consist of segregating bi-allelic single nucleotide polymorphisms (SNP) without missing data. Although non-diploid datasets are allowed, we recommend caution in the interpretations of MAR/GDAR results in non-diploid organisms. In the gm step of MARPIPELINE , optional variant and sample filtering based on variant or sample identifiers can be performed. The marmap function then processes the filtered coordinate data to generate a raster map, with each cell value representing the sample count per location. Map resolution and coordinate reference system (CRS) can be specified manually or determined automatically using equation 12 in ( Hengl 2006 ) and the WGS84 CRS. The margeno function organizes the filtered genotype data, using an S3 data structure inspired by the SeqVarGDSClass in SeqArray . Both marmaps and margeno functions create their respective S3 objects with dedicated plot and print methods. Finally, the genomaps function links the marmaps and margeno objects, while verifying sample identifier consistency, creating the core data structure for downstream analyses. Site-frequency spectrum computation The sfs step of MARPIPELINE provides a basic summary of genetic data. The sfs function summarizes the genotypes data in the S3 margeno object to a site frequency spectrum, which is a histogram of mutation abundances, and returns an S3 sfs object. We also implemented the expected SFS under population genetics neutral coalescence (noted as the neutral SFS below) through the expsfs function, which also returns an S3 sfs object. The MARsad function in the MAR package provides full functionality to fit popular species-abundance distribution (SAD) models, including the broken stick, geometric, lognormal, log-series, neutral metacommunity, and weibull distributions to the genotypes data through the fitsad function in the sads package ( Prado, Dantas Miranda, and Chalom 2024 ). For SAD model predictions, to align with the more commonly used SFS visualization in population genetics, we used an in-house function ( .sadpred ) to generate the expected SFS given the inferred SAD parameters. The MARsad function returns a S3 marsad object, consisting of the inferred SAD models, the Akaike Information Criterion (AIC) table of SAD models, and the expected SFS. To facilitate comparisons of SAD-derived SFS, neutral SFS and the genotypes data SFS, we implemented a Poisson likelihood function ll_sfs from the ∂a∂i python package (eq. 4 in ( Gutenkunst et al. 2009 )). Geo-sampling of genetics data and building the MAR/GDAR Building upon the observed summaries of genetic data, we proceed to empirically sample the geographical range of the data, and construct the mutations/genetic diversity-area relationships. In the mar step of MARPIPELINE , the MARsampling function samples genetic diversity across varying spatial scales and returns an S3 marsamp data frame. Based on the raster sample map ( marmaps object) generated during the gm step, MARsampling creates replicated square subsamples of the population distribution, ranging from a single raster cell to the full range size. To account for potential geographic subsampling biases, we implemented five distinct sampling schemes, including random, directional (south-to-north and north-to-south), and focal point-based (inward and outward) methods, using a probability sampling process based on the beta distribution. Area calculations employ two strategies: Asq (square area in degree 2 ) and A (cell area in km 2 ). Asq represents the total square area defined by minimum and maximum latitude/longitude values, while A calculates the sum of occupied grid cell areas using the area function from the raster package. For genetic diversity, mutdiv.gridded implements four metrics: number of segregating/polymorphic sites ( M ), endemic segregating sites ( E ), Watterson’s θ ( θ w ), and nucleotide diversity ( θ π ). Finally, the MARcalc function fits the power-law model M = cA z using the sar_power function from the sars package to establish the MAR/GDAR relationship for selected diversity and area metrics. Extinction simulation and genetic diversity loss prediction The MAR framework can be applied in reverse to project immediate genetic diversity loss associated with rapid habitat reduction. This relationship follows the power-law equation M = cA z . If we note the proportion of area lost as a , and the proportion of genetic diversity lost as m , the MAR can be rearranged to predict the remaining proportion m of genetic diversity: M(1-m) = c[A(1-a)] z , which yields the genetic diversity loss prediction: 1 – m = (1 – a) z . In the extinction step of MARPIPELINE , the MARextinction function simulates the loss of genetic diversity, and returns an S3 marextinct data frame. Based on the raster sample map generated during the gm step, MARextinction removes raster cells iteratively, modeling local extinction scenarios where habitat becomes unavailable, and tracks genetic diversity in the remaining samples. Similar to the MARsampling function, MARextinction allows five extinction schemes (random, south-to-north, north-to-south, inwards and outwards), and computes four genetic diversity metrics. However, unlike MARsampling , individuals are sampled directly from occupied grid cells because square grids can no longer be constructed during the iterative removal. Therefore, results based on Asq and A in MARextinction are similar ( Fig. S3 ). Finally, the MARcalc function then fits the power-law model to establish MAR/GDAR-based predictions for the proportion of remaining genetic diversity and area loss ( Fig. 1 ). Working example: global Arabidopsis 1001G dataset To showcase the mar package usage through command-line, we analyzed the mutations-area relationship using the accompanying data example provided in the mar package: gm1001g . This example dataset is derived from the Arabidopsis 1001 Genomes Project ( 1001 Genomes Consortium 2016 ). We downloaded the imputed SNP matrices, and accession metadata from https://1001genomes.org/ on 12-23-2024. A total of 1004 geo-referenced samples from the native continental range (excluding the USA, Canary Islands or Japan samples) were kept, and 10,000 biallelic polymorphic SNPs were randomly sampled from chromosome 1. The analysis can be executed using a single command: Download figure Open in new tab The random seed does not have to be set, we set it as 91 here for reproducibility in the results described below. MARPIPELINE command outputs key R Data files and PDF-formatted figures while displaying all primary results in the log file ( Table 1 ). View this table: View inline View popup Download powerpoint Table 1. MARPIPELINE output specifications. Examining the results, we found that the pipeline successfully creates a genomaps object, with an automatically determined spatial resolution of 0.9 degree ( Fig. 2A ). A total of 264 raster cells were created for 1004 geo-referenced Arabidopsis samples, with a median of two samples and a maximum of 75 samples falling in the same raster cell. In the sfs step, the SFS constructed from genotype data shows a pattern of commonness of rarity, with most SNPs segregating at lower frequencies ( Fig. 2B ). Comparing all SAD models, the Fisher’s log-series distribution (AIC = 91442.1) and the metacommunity zero-sum multinomial distribution (AIC = 91442.4) fit the data best, while the MacArthur’s broken stick distribution (AIC = 114744.1) performed the worse ( Fig. S1 ). In the mar step, the mutations-area relationship was successfully constructed from spatial sampling ( Fig. 2C ). Using the random spatial sampling method, the scaling factor z for the number of segregating sites and area is 0.50 when using occupied grid cell areas ( A, p < 0.001, R 2 = 0.97), and 0.39 when using the total square area ( Asq, p < 0.001, R 2 = 0.46; Table S1 ). In the extinction step, habitat loss simulations were performed ( Fig. 2D ). Using the random habitat extinction method and occupied grid cell areas ( A ), the remaining genetic diversity is expected to decline as habitat becomes unsuitable, following the expression: 1-m = (1 – a) 0 . 38 ( p < 0.001, R 2 = 0.98). Note here the scaling factor z = 0.38 is constructed from the extinction simulation. While similar to the scaling factor constructed during the MAR sampling step ( z = 0.50 using A and z = 0.39 using Asq ), it is not the same value. For example, if we assume a = 38% global terrestrial area has been transformed according to the Millennium Ecosystem Assessment (MEA 2001) and becomes unsuitable for Arabidopsis , then approximately 83% of genetic diversity, measured in the number of segregating sites, still remains in the population. Once more we note that this prediction estimates current (short-term) loss, while genetic drift will continue eroding genetic diversity over time. Download figure Open in new tab Fig 2. MARPIPELINE outputs from the Arabidopsis thaliana example (A) Raster sample map comprising 1004 geo-referenced samples, with points indicating sample locations and color intensity reflecting sample density per cell. (B) Site frequency spectrum (SFS) of the data. (C) MAR derived from MARsampling , displaying segregating sites ( M ) against cell area (km 2 , A ) with fitted M = cA z curves. (D) Genetic diversity loss predictions from MARextinction , with fitted 1-m = (1 - a) z curves. Panels (B-D) are truncated, for complete results, see Figs. S1 - S3 . Finally, we examine additional genetic diversity metrics. Akin to the species-area relationship (SAR), the mutations-area relationship was derived to approximate a power law from process-based approaches. However, following SAR, researchers have applied power law diversity-area concept to other species diversity metrics, e.g. Simpson’s diversity or other taxonomic levels ( Ma 2018 ). We have also explored and implemented several genetic diversity metrics, including the endemic segregating sites ( E ), Watterson’s θ ( θ w ), and nucleotide diversity ( θ π ), to establish the genetic-diversity-area relationship (GDAR). A phenomenological scaling relationship between such genetic diversity metrics and area still holds, but the scaling factor z and R 2 changes ( Table S1 , Figs. S2 , S3 ). Endemic segregating mutations, which are mutations that only segregate in the sampled regions, saturates slower with area than segregating mutations ( z = 1.28 with A , 0.81 with Asq ). Endemic mutations carry characteristic information for local genetic diversity but converge to allelic richness ( M ) as the study range expands to a species’ full range. Watterson’s θ quantifies the number of segregating sites with a sample size scaling factor, yielding a faster saturation rate of genetic diversity ( z = 0.25 with A , 0.15 with Asq ) compared to M , the number of segregating sites. Lastly, nucleotide diversity quantifies the average pairwise differences between genetic samples. Being less sensitive to rare alleles, θ π saturates the fastest with area ( z = 0.10 with A , 0.07 with Asq ), and its relationship is noisier although significant ( R 2 = 0.50 with A , 0.22 with Asq, p < 0.001). The marApp and Florida coral reef conservation example To facilitate the use of MAR/GDAR in conservation settings, we developed a shiny-based application that implements the MARPIPELINE function described above, available at https://moiexpositoalonsolab.shinyapps.io/marApp/ . To facilitate result interpretation, and provide background information, an introductory snippet is included in the marApp . We showcase visualizations with an example dataset derived from a global Acropora genetic meta-analysis ( Selmoni, Cleves, and Expósito-Alonso 2024 ). This dataset includes 76 Acropora cervicornis (staghorn coral) individuals sampled in the Florida Reef Tract, and sequenced with Genotyping by Sequencing technology ( Drury and Lirman 2021 ; Drury et al. 2019 ). In the “Upload data” tab, select “Custom” dataset, we uploaded a text file containing Acropora sample index, longitude and latitude information in the “Coordinate file” section and a text file containing genotypes in the “Genotype file” section following the instructions listed in marApp . After both files are successfully uploaded, click the “Load data” button. A preview for the uploaded coordinate and genotype data occurred, summarizing the data input. Additionally, an interactive map created by leaflet was available to examine the sample locations and marmaps object created. In this example, samples mapped along the Florida Reef Tract, suggesting successful genomaps object creation ( Fig. 3 ). Download figure Open in new tab Fig 3. Visualization from the marApp “Upload data” tab using the Florida Acropora example dataset. In the “Site Frequency Spectrum” tab, after selecting the species-abundance distribution (SAD) models to fit (defaults are log-normal and Fisher’s log-series distributions), select “Compute SFS and fit SAD”. The statistics for SAD model fits are provided, and the best-fit SAD model is automatically ranked on top. The log-likelihoods computed from SAD-predicted SFSs using the ll_sfs function are also provided. On the right, an interactive viewing of SFS computed from the genotype data (data), the population-genetics based neutral predictions (neutral), and selected SAD models are displayed. In this example, the log-series SAD (AIC = 51,365) fits the data better than the lognormal SADs (AIC = 54,709; Fig. S4 ). In the “Mutations-area relationship” tab, the sampling schemes, genetic diversity metrics, area metrics and number of replicates can be customized and applied also to the “Extinction simulation” tab to ensure results comparability. After selecting “Calculate MAR/GDAR”, the summary table from the MAR sampling process is provided, together with the option to download outputs from the MARsampling function used to calculate the power-law relationship. To animate the sampling process, bounding boxes with various sizes used for MARsampling are overlaid with the sample map raster. Finally, the correlation of number of mutations and area sampled are interactively plotted, with the fitted cA z curves overlaid as a gray line. Users can explore the power-law relationship on a log-scale and use different genetic diversity metrics. In this example, the scaling factor z for the number of segregating sites ( M ) and occupied grid cell area ( A ) is 0.33 ( p < 0.001, R 2 = 0.72; Fig. S5 ) . In the “Extinction simulation” tab, after selecting “Simulate extinction”, the power-law relationship built from the extinction process is provided, together with the option to download outputs from the MARextinction function. To animate the simulated extinction process, at each extinction step, the raster cells that become unavailable to individuals are marked as black on the sample map raster. Towards the end of simulation, all cells become uninhabitable and the population is extinct. Finally, the percentage of habitable area lost is plotted with the percentage of genetic diversity remaining, with the MAR/GDAR-based theoretical predictions (1 – a) z overlaid as a gray line. Users can explore the extinction prediction for different genetic diversity metrics. In this example, the predicted genetic diversity loss rate is (1-a) 0 . 32 ( Fig. S6 ). The final step is the “Conservation estimator” tab, where users can use either the default scaling factor ( z = 0.3), or the inferred scaling factor z for their species or regions of interest using a slider, to estimate the amount of genetic diversity loss or build habitat protection goals, and download the brief report generated. In this Acropora example, we sought to estimate the amount of genetic diversity loss using the scaling factor inferred from the “Extinction simulation” tab ( z = 0.32) given that a combination of factors including disease, consistent ocean warming, and pollution has been threatening the survival of coral reef ecosystems ( Cramer et al. 2020 ). In Florida Keys, healthy coral coverage has declined nearly 90% since the late 1970s ( Gardner et al. 2003 ; Scott 2023 ), and A. cervicornis coverage reduced by 98% from 1983 to 2000 ( Miller, Bourque, and Bohnsack 2002 ). Using the MAR relationship established above, we estimated that approximately 48% of Acropora cervicornis genetic diversity, measured in allelic richness, is still intact, and 52% of genetic diversity is already lost, assuming 90% short-term area loss ( Fig. 4 ). Download figure Open in new tab Fig 4. Screenshots from the marApp “Conservation estimation” tab using the Florida Acropora example dataset. Using the marApp to approximate genetic diversity loss without genetic data The ultimate conservation advantage of the MAR framework is that it uses area to approximate genetic diversity loss in a simple percentage-based estimate. It may be thus helpful to understand the magnitude of genetic diversity loss for species that lack genetic data or for which this data is impossible or impractical to generate. In the marApp , the “Conservation estimation” tab can be utilized as a standalone tool for conservation planning. For example, in a recent multi-national evaluation of demographic-based genetic diversity indicators for 919 taxa, on average, 17.6% of wild populations have been lost per taxa, and 3% of taxa have lost 75% of their historical populations (Proportion of populations Maintained within species (PM) indicator in ( Mastretta-Yanes et al. 2024 )). Assuming population loss is proportional to habitat loss, under 75% area loss, genetic diversity, measured in allelic richness, reduces by 34% assuming a scaling factor of z MAR ∼ 0.3 [34% = 1-(1-75%) 0.3 ]. The scaling factor z MAR describes the rate of genetic diversity loss, reflecting the spatial structure of genetic diversity. The scaling z MAR approaches zero in a panmictic population with minimal spatial structure, and reaches one among completely isolated populations. Based on empirically inferred z MAR values from 29 species ( Exposito-Alonso et al. 2022 ; Mualim et al. 2024 ), z MAR value of 0.3 likely captures large-scale trends of genetic diversity loss ( Exposito-Alonso et al. 2022 ; Mualim et al. 2024 ). Using three representative z MAR values, we offer interpretable genetic diversity loss estimates in nine countries with PM indicators ( Table 2 ). For example, given the percentage population losses averaged across ∼100 species evaluated per country, if we assume a standard z MAR of 0.3, the average remaining genetic diversity in Belgium is of 79% [79% = (45%) 0.3 ], and at most 98% of genetic diversity is still maintained in countries including Japan, Mexico, and South Africa. View this table: View inline View popup Download powerpoint Table 2. Estimated proportion of genetic diversity maintained within taxa for nine countries. Genetic diversity is measured in allelic richness. Estimates are based on short-term effects only, with more severe long-term effects expected. As the MAR/GDAR framework becomes adapted in more study systems with genetic data, we are only beginning to uncover the full extent of variation in z across species. To account for this uncertainty, we provide a reference table with different z MAR and area-loss combinations ( Fig. S7 ). Expanding collections of genetic data remains crucial, as it enables both the documentation of genetic diversity prior to potential loss and facilitates more robust estimations of z MAR among species that are phylogenetically related or share similar life history traits ( Leigh et al. 2021 ). Caution: long-term genetic diversity loss is larger than short-term marApp predictions Importantly, in the “Conservation estimation” tab, we constrained the MAR framework to use only the number of segregating sites or allelic richness ( M ) as measures of genetic diversity, rather than nucleotide diversity ( θ π ), which is more commonly used in population genetics. This choice reflects that MAR/GDAR is a sampling formula describing contemporary genetic diversity, not a process-based temporal model capturing mutation-drift evolutionary dynamics over time ( Mualim et al. 2024 ). Classically, average genetic diversity within a population has been well described by the mathematical formula: θ π = 4 N e μ; which captures the new mutations arising from mutation rate μ and the population size that can maintain them. During contemporary habitat loss, the census size ( N c ) declines instantly, while θ π changes little because it still reflects pre-disturbance population size, N e . This creates a lag in θ π responding to a population reduction, as shown in both the close-to-zero scaling factor z GDAR ∼ 0.04 ( Mualim et al. 2024 ). While allelic richness ( M ) responds quicker to recent population changes, it also suffers from this evolutionary lag ( Mualim et al. 2024 ). Due to this temporal lag of genetic diversity loss, we should not be feeling content on genetic diversity conservation. For example, we demonstrated above that in a scenario of 75% habitat loss, the allelic richness reduced less than the amount of habitat reduced by 34% ( z MAR = 0.3), and θ π reduced even less by 15% (15% = 1 - (1-75%) 0.04 ; z GDAR = 0.04). However, in the long-term, when population reaches a new equilibrium, θ π will eventually reflect the reduced effective population size, and reduce by 75% (z GDAR ∼ 1 in the long-term ( Mualim et al. 2024 )) if habitats are not restored. This is akin to biodiversity conservation at the species richness level. While wildlife has already suffered much impact, only 1008 out of 169,420 (∼0.6%) species evaluated are classified as extinct or extinct in the wild ( IUCN 2025 ). However, ecosystems are in a disequilibrium, 47,187 species are threatened by extinction, a phenomenon called “extinction debt”. Therefore, it is essential to protect as much genetic diversity as possible now and to act swiftly to reverse population declines, thereby preventing long-term genetic erosion and reducing the risk of future species extinction. Conclusion The mar R package and marApp web portal fills an important gap in conservation, offering a theory-driven and percentage-based solution for integrating genetic diversity into conservation planning. By automating the analysis of mutations-area and other genetic diversity-area relationships (MAR/GDAR), mar makes it feasible to estimate genetic diversity loss under habitat reduction across species and landscapes. The user-friendly marApp further lowers the barrier for application by managers and policy makers, requiring no prior coding or genetics expertise. Through case studies in a global common species Arabidopsis thaliana and a local threatened species Acropora cervicornis , we show how MAR-based tools can generate quantitative forecasts, enabling alignment with targets in the Kunming-Montreal Global Biodiversity Framework. We caution that the marApp only provides estimates for genetic diversity in the short-term, and long-term genetic diversity loss is expected to be much more severe. While robust methods for long-term genetic diversity loss are under development for many species, it is essential to preserve as much genetic diversity as possible for the future. As the conservation field moves beyond species diversity toward preserving adaptive potential, mar and marApp provide an essential bridge between genomic data and actionable area-based decisions. Data availability The mar package is available at https://github.com/meixilin/mar . The scripts used to build the marApp is available at https://github.com/meixilin/marApp . The data and scripts used to reproduce the Arabidopsis and Acropora examples are available at https://github.com/meixilin/marApp_paper . All genotype and coordinate data used in the examples are already publicly available. Author contributions M.L. and M.E.-A. conceived the project. M.L. conducted research and wrote the first draft of the manuscript. M.E.-A. developed the beta version of mar package, K.M. performed package testing and O.S. provided scripts for analysis. All authors read, revised and approved the manuscript. Supplemental materials Supplemental tables View this table: View inline View popup Download powerpoint Table S1. Output from the mar step in MARPIPELINE for the Arabidopsis 1001G dataset. MAR were fitted using the M = cA z function, from the MARsampling(scheme = “random”) output. “Mtype” denotes the four genetic diversity metrics considered: M: the number of segregating sites, E: the number of endemic segregating sites, thetaw: Watterson’s theta, and thetapi: nucleotide diversity. “Atype” denotes the two area calculation methods: A, the area of cells (km 2 ) or Asq, the area of squares (º 2 ). “c” and “z” are the inferred parameter values from M = cA z . “P(c)” and “P(z)” are the significant values for the inferred “c” and “z” values respectively. “R 2 ” column shows the adjusted R 2 value for each fitted genetic diversity and area relationship. Supplemental figures Download figure Open in new tab Fig S1. Output from plot.sfs in MARPIPELINE for the Arabidopsis 1001G dataset. The site frequency spectrum (SFS) of the data (gray, topleft), the expected SFS from neutral coalescence (red, topright), and the expected SFS from individual species abundance distributions (SAD). The AIC output is annotated for all SAD models, and the log-likelihood computed from the SFS is annotated for all SFSs. Download figure Open in new tab Fig S2. Output from plot.marsamp in MARPIPELINE for the Arabidopsis 1001G dataset. MAR were constructed from the MARsampling(scheme = “random”) output, using either A , the area of cells (km 2 , on the left), or Asq , the area of squares (º 2 , on the right). Four genetic diversity metrics were considered, from top to bottom, M: the number of segregating sites, E: the number of endemic segregating sites, thetaw: Watterson’s theta, and thetapi: nucleotide diversity. The fitted M = cA z line is overlaid and the equation annotated on the top right in each panel. Download figure Open in new tab Fig S3. Output from plot.marextinct in MARPIPELINE for the Arabidopsis 1001G dataset.. Genetic diversity loss predictions were constructed from the MARextinction(scheme = “random”) output, using either A , the area of cells (km 2 , on the left), or Asq , the area of squares (º 2 , on the right). Four genetic diversity metrics were considered, from top to bottom, M: the number of segregating sites, E: the number of endemic segregating sites, thetaw: Watterson’s theta, and thetapi: nucleotide diversity. The fitted ( 1 - m) = (1 - a) z line is overlaid and the equation annotated on the top right in each panel. Download figure Open in new tab Fig S4. Screenshots from the marApp Site frequency spectrum tab using the Florida Acropora example dataset. Download figure Open in new tab Fig S5. Truncated screenshots from the marApp Mutations-area relationship tab using the Florida Acropora example dataset. Download figure Open in new tab Fig S6. Truncated screenshots from the marApp Extinction simulation tab using the Florida Acropora example dataset. Download figure Open in new tab Fig S7. Reference table for approximating genetic diversity loss without genetic data. (A) Given a scaling factor ( z ), and a known habitat loss scenario, the approximate amount of genetic diversity remaining in the population is plotted, with darker color representing more genetic diversity loss. (B) Given a scaling factor ( z ), and a known genetic diversity conservation target, the maximum allowable percentage of area loss is plotted, with darker color representing more area loss. All estimates are based on the number of segregating sites (allelic richness) in a short-term scenario. Much more genetic diversity loss is expected in the long-term ( Mualim et al. 2024 ). Acknowledgements M.L. is supported by the David H. Smith Conservation Research Fellowship, and the Stanford Center for Computational, Evolutionary and Human Genomics. M.E.-A. is supported by the Office of the Director of the National Institutes of Health’s Early Investigator Award (1DP5OD029506-01), the U.S. Department of Energy, Office of Biological and Environmental Research (DE-SC0021286), by the U.S. National Science Foundation’s DBI Biology Integration Institute WALII (Water and Life Interface Institute, 2213983), by the Carnegie Institution for Science, the Howard Hughes Medical Institute, and the University of California Berkeley. The authors thank Alyssa Phillips and the MOILAB for helpful discussions. Funder Information Declared National Institutes of Health , 1DP5OD029506-01 United States Department of Energy , DE-SC0021286 U.S. National Science Foundation , 2213983 REFERENCES ↵ 1001 Genomes Consortium . 2016 . “ 1,135 Genomes Reveal the Global Pattern of Polymorphism in Arabidopsis Thaliana .” Cell 166 ( 2 ): 481 – 91 . OpenUrl CrossRef PubMed ↵ Assessment , M. E. 2001 . “Millennium Ecosystem Assessment” 2 . https://ser-europe.org/files/2012/08/Harris.pdf . ↵ CBD . 2022 . “Kunming – Montreal Global Biodiversity Framework 2030 Targets.” https://www.cbd.int/gbf/targets . ↵ Cramer , Katie L. , Jeremy B. C. Jackson , Mary K. Donovan , Benjamin J. Greenstein , Chelsea A. Korpanty , Geoffrey M. Cook , and John M. Pandolfi . 2020 . “ Widespread Loss of Caribbean Acroporid Corals Was Underway before Coral Bleaching and Disease Outbreaks .” Science Advances 6 ( 17 ): eaax9395 . OpenUrl FREE Full Text ↵ Drury , Crawford , Justin B. Greer , Iliana Baums , Brooke Gintert , and Diego Lirman . 2019 . “ Clonal Diversity Impacts Coral Cover in Acropora Cervicornisthickets: Potential Relationships between Density, Growth, and Polymorphisms .” Ecology and Evolution 9 ( 8 ): 4518 – 31 . OpenUrl ↵ Drury , Crawford , and Diego Lirman . 2021 . “ Genotype by Environment Interactions in Coral Bleaching .” Proceedings. Biological Sciences 288 ( 1946 ): 20210177 . OpenUrl PubMed ↵ Exposito-Alonso , Moises , Tom R. Booker , Lucas Czech , Lauren Gillespie , Shannon Hateley , Christopher C. Kyriazis , Patricia L. M. Lang , et al. 2022 . “ Genetic Diversity Loss in the Anthropocene .” Science (New York, N.Y .) 377 ( 6613 ): 1431 – 35 . OpenUrl ↵ Frankham , R. 1995 . “ Conservation Genetics .” Annual Review of Genetics 29 ( 1 ): 305 – 27 . OpenUrl CrossRef PubMed Web of Science ↵ Gardner , Toby A. , Isabelle M. Côté , Jennifer A. Gill , Alastair Grant , and Andrew R. Watkinson . 2003 . “ Long-Term Region-Wide Declines in Caribbean Corals .” Science (New York, N.Y .) 301 ( 5635 ): 958 – 60 . OpenUrl ↵ Gaston , Kevin J. 2010 . “ Biodiversity .” In Conservation Biology for All, 27–44 . Oxford University Press . ↵ Gutenkunst , Ryan N. , Ryan D. Hernandez , Scott H. Williamson , and Carlos D. Bustamante . 2009 . “ Inferring the Joint Demographic History of Multiple Populations from Multidimensional SNP Frequency Data .” PLoS Genetics 5 ( 10 ): e1000695 . OpenUrl ↵ Hengl , Tomislav . 2006 . “ Finding the Right Pixel Size .” Computers & Geosciences 32 ( 9 ): 1283 – 98 . OpenUrl ↵ Hijmans Robert J. 2025 . “Raster: Geographic Data Analysis and Modeling.” CRAN: Contributed Packages. The R Foundation . doi: 10.32614/cran.package.raster . OpenUrl CrossRef ↵ Hoban , Sean , Jessica M. da Silva , Alicia Mastretta-Yanes , Catherine E. Grueber , Myriam Heuertz , Margaret E. Hunter , Joachim Mergeay , et al. 2023 . “ Monitoring Status and Trends in Genetic Diversity for the Convention on Biological Diversity: An Ongoing Assessment of Genetic Indicators in Nine Countries .” Conservation Letters 16 ( 3 ). doi: 10.1111/conl.12953 . OpenUrl CrossRef ↵ Hu , Xin-Sheng , Fangliang He , and Stephen P. Hubbell . 2006 . “ Neutral Theory in Macroecology and Population Genetics .” Oikos (Copenhagen, Denmark) 113 ( 3 ): 548 – 56 . OpenUrl ↵ IUCN . 2025 . “ Table 3: Number of Species in Each IUCN Red List Category by Kingdom and Class .” IUCN Red List of Threatened Species . https://www.iucnredlist.org/statistics . ↵ Laikre , Linda , Sean Hoban , Michael W. Bruford , Gernot Segelbacher , Fred W. Allendorf , Gonzalo Gajardo , Antonio González Rodríguez , et al. 2020 . “ Post-2020 Goals Overlook Genetic Diversity .” Science (New York, N.Y .) 367 ( 6482 ): 1083 – 85 . OpenUrl ↵ Leigh , Deborah M. , Charles B. van Rees , Katie L. Millette , Martin F. Breed , Chloé Schmidt Laura D. Bertola , Brian K. Hand , et al. 2021 . “ Opportunities and Challenges of Macrogenetic Studies .” Nature Reviews. Genetics 22 ( 12 ): 791 – 807 . OpenUrl CrossRef PubMed ↵ Mastretta-Yanes , Alicia , Jessica M. da Silva , Catherine E. Grueber , Luis Castillo-Reina , Viktoria Köppä , Brenna R. Forester , W. Chris Funk , et al. 2024 . “ Multinational Evaluation of Genetic Diversity Indicators for the Kunming-Montreal Global Biodiversity Framework .” Ecology Letters 27 ( 7 ): e14461 . OpenUrl CrossRef PubMed ↵ Matthews , Thomas J. , Kostas A. Triantis , Robert J. Whittaker , and François Guilhaumon . 2019 . “ Sars: An R Package for Fitting, Evaluating and Comparing Species–area Relationship Models .” Ecography 42 ( 8 ): 1446 – 55 . OpenUrl CrossRef ↵ Matthews , Thomas J. , and Robert J. Whittaker . 2015 . “ REVIEW: On the Species Abundance Distribution in Applied Ecology and Biodiversity Management .” The Journal of Applied Ecology 52 ( 2 ): 443 – 54 . OpenUrl ↵ May , R. M. 1994 . “ Conceptual Aspects of the Quantification of the Extent of Biological Diversity .” Philosophical Transactions of the Royal Society of London. Series B, Biological Sciences 345 ( 1311 ): 13 – 20 . OpenUrl CrossRef PubMed Web of Science ↵ Ma , Zhanshan Sam . 2018 . “ DAR (diversity-Area Relationship): Extending Classic SAR (species-Area Relationship) for Biodiversity and Biogeography Analyses .” Ecology and Evolution 8 ( 20 ): 10023 – 38 . OpenUrl ↵ Miller , M. , A. Bourque , and J. Bohnsack . 2002 . “ An Analysis of the Loss of Acroporid Corals at Looe Key, Florida, USA: 1983–2000 .” Coral Reefs 21 ( 2 ): 179 – 82 . OpenUrl CrossRef Web of Science ↵ Mualim , Kristy S. , Jeffrey P. Spence , Clemens Weiß, Oliver Selmoni , Meixi Lin , and Moises Exposito-Alonso . 2024 . “ Genetic Diversity Loss in the Anthropocene Will Continue Long after Habitat Destruction Ends .” bioRxivorg . doi: 10.1101/2024.10.21.619096 . OpenUrl Abstract / FREE Full Text ↵ Paz-Vinas , Ivan , Amy Vandergast , Chloe Schmidt , Deborah Leigh , Simon Blanchet , René Clark Eric Crandall , et al. 2023 . “ Uneven Genetic Data Limits Biodiversity Assessments in Protected Areas Globally .” EcoEvoRxiv . doi: 10.32942/x2zc84 . OpenUrl CrossRef ↵ Prado Paulo In’acio , Murilo Dantas Miranda , and Andr ‘e Chalom . 2024 . “Sads: Maximum Likelihood Models for Species Abundance Distributions.” https://CRAN.R-project.org/package=sads . ↵ Scott , Michon . 2023 . “ With ‘Mission: Iconic Reefs’, NOAA Aims to Restore Florida Keys with Climate-Resilient Corals .” NOAA Climate.gov. March 6, 2023 . https://www.climate.gov/news-features/features/mission-iconic-reefs-noaa-aims-restore-florida-keys-climate-resilient-corals . ↵ Selmoni , O. , Phillip A. Cleves , and Moisés Expósito-Alonso. 2024 . “ Global Coral Genomic Vulnerability Explains Recent Reef Losses .” bioRxiv, March . doi: 10.1101/2024.03.25.586253 . OpenUrl Abstract / FREE Full Text ↵ Zheng , Xiuwen , Stephanie M. Gogarten , Michael Lawrence , Adrienne Stilp , Matthew P. Conomos , Bruce S. Weir , Cathy Laurie , and David Levine . 2017 . “ SeqArray-a Storage-Efficient High-Performance Data Format for WGS Variant Calls .” Bioinformatics (Oxford, England) 33 ( 15 ): 2251 – 57 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted September 14, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following marApp: An R package and web portal to calculate mutations- and genetic diversity-area relationship for conservation Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share marApp : An R package and web portal to calculate mutations- and genetic diversity-area relationship for conservation Meixi Lin , Kristy Mualim , Oliver Selmoni , Moises Exposito-Alonso bioRxiv 2025.09.09.675155; doi: https://doi.org/10.1101/2025.09.09.675155 Share This Article: Copy Citation Tools marApp : An R package and web portal to calculate mutations- and genetic diversity-area relationship for conservation Meixi Lin , Kristy Mualim , Oliver Selmoni , Moises Exposito-Alonso bioRxiv 2025.09.09.675155; doi: https://doi.org/10.1101/2025.09.09.675155 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Ecology Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41913) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13372) Ecology (19889) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15599) Genomics (22483) Immunology (17728) Microbiology (40365) Molecular Biology (17163) Neuroscience (88540) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15136) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9818) Zoology (2269)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.