SuSiE 2.0: improved methods and implementations for genetic fine-mapping and phenotype prediction

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 51,060 characters · extracted from preprint-html · click to expand
SuSiE 2.0: improved methods and implementations for genetic fine-mapping and phenotype prediction | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results SuSiE 2.0: improved methods and implementations for genetic fine-mapping and phenotype prediction Alexander McCreight , Yanghyeon Cho , Ruixi Li , Daniel Nachun , Hao-Yu Gan , Peter Carbonetto , Matthew Stephens , William R.P. Denault , Gao Wang doi: https://doi.org/10.1101/2025.11.25.690514 Alexander McCreight 1 Center for Statistical Genetics, The Gertrude H. Sergievsky Center, Columbia University , New York, NY, USA 2 Department of Biostatistics, Columbia University , New York, NY, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yanghyeon Cho 1 Center for Statistical Genetics, The Gertrude H. Sergievsky Center, Columbia University , New York, NY, USA 3 Department of Mathematics and Statistical Science, University of Idaho , Moscow, ID, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ruixi Li 1 Center for Statistical Genetics, The Gertrude H. Sergievsky Center, Columbia University , New York, NY, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Daniel Nachun 4 Department of Genetics, Stanford University , Stanford, CA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hao-Yu Gan 1 Center for Statistical Genetics, The Gertrude H. Sergievsky Center, Columbia University , New York, NY, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Peter Carbonetto 5 Department of Human Genetics, The University of Chicago , IL, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Matthew Stephens 5 Department of Human Genetics, The University of Chicago , IL, USA 6 Department of Statistics, The University of Chicago , IL, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: wang.gao{at}columbia.edu wdenault{at}uchicago.edu mstephens{at}uchicago.edu William R.P. Denault 5 Department of Human Genetics, The University of Chicago , IL, USA 7 Data Science Institute, University of Chicago , IL, USA 8 Oslo Centre for Biostatistics and Epidemiology, Oslo University Hospital , Oslo, Norway Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: wang.gao{at}columbia.edu wdenault{at}uchicago.edu mstephens{at}uchicago.edu Gao Wang 1 Center for Statistical Genetics, The Gertrude H. Sergievsky Center, Columbia University , New York, NY, USA 2 Department of Biostatistics, Columbia University , New York, NY, USA 9 Department of Neurology, Columbia University , New York, NY, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: wang.gao{at}columbia.edu wdenault{at}uchicago.edu mstephens{at}uchicago.edu Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Sum of Single Effects regression (SuSiE) has become widely adopted for genetic fine-mapping, yet its original implementation faces architectural limitations that hinder extensibility and performance. We present SuSiE 2.0, featuring a modular redesign for extensibility, up to 5x speed improvements for summary statistics applications, and several useful extensions including SuSiE-ash, a new method that improves calibration when strong signals coexist with moderate effects. Simulations and real data benchmarks demonstrate performance across diverse genetic architectures, highlighting improved calibration of SuSiE-ash for fine-mapping under complex polygenic backgrounds with 1.5–3x FDR reduction while maintaining power, and revealing SuSiE-based methods as effective yet underappreciated tools for TWAS prediction. Background Sum of Single Effects regression (SuSiE) [ 1 ] has emerged as a powerful Bayesian variable selection tool, producing posterior inclusion probabilities (PIPs) and single-effect credible sets (CSs) that quantify uncertainty in selected variables. These make SuSiE particularly suited for genetic fine-mapping, where causal signals are sparse yet genetic variables are highly correlated due to linkage disequilibrium, and capturing all potential causal effects is essential for downstream biological interpretation. The susieR package has received over 180,000 downloads on CRAN (as of 2025), and has inspired numerous methodological extensions [ 2 – 14 ], with integration into major analysis pipelines including COLOC [ 15 ] and large-scale genetic studies such as UK Biobank [ 16 ], GTEx [ 17 ], and FinnGen [ 18 ]. However, the original susieR implementation suffers from architectural limitations that hinder extensibility and performance, and many extensions exist only as standalone command-line tools [ 2 , 6 , 14 , 19 ], making them difficult to integrate into R-based pipelines or benchmark systematically. In applying SuSiE to expression QTL (eQTL) fine-mapping, we observed potential calibration issues under complex genetic architectures where strong regulatory signals coexist with moderate and weak effects. Existing extensions such as SuSiE-inf [ 2 ] model a pervasive infinitesimal effects background, but proved overly conservative in our applications. In this brief report we present SuSiE 2.0, a modular redesign that maintains backward compatibility while enabling seamless integration of extensions and improved performance. Under this framework we developed SuSiE-ash, a new method that places an adaptive shrinkage prior on moderate to weak effects, which proves to improve calibration across diverse genetic architectures. We also incorporate several published extensions [ 2 , 7 ], and demonstrate that SuSiE-based methods serve as effective yet underappreciated tools for TWAS prediction. Results and discussion Figure 1A illustrates the SuSiE 2.0 architecture, which organizes the computational workflow into four stages: interface, constructor, workhorse, and refinement. User-facing functions accept individual-level data, sufficient statistics, or summary statistics, harmonized into a common internal representation before executing Iterative Bayesian Stepwise Selection (IBSS). This modular design uses S3 generic dispatch to separate data-type specific operations from core algorithm logic, eliminating code duplication while enabling seamless integration of methodological extensions, as many reduce to customizations in Bayes factor computation or residual variance estimation. New features include (1) new prior on residual variance [ 20 ] for improved coverage particularly in small samples [ 7 ], (2) new model SuSiE-ash using adaptive shrinkage [ 21 , 22 ] to model moderate to weak effects ( Methods and Supplementary Notes ), (3) SuSiE-inf for modeling infinitesimal backgrounds [ 2 ], (4) up to 5x speed improvements for summary statistics with regularized LD matrices ( Figure S1A ), and (5) enhanced model refinement algorithm ( Figure S1B ), flexible convergence criteria, and additional residual estimation methods for greater robustness. SuSiE 2.0 includes comprehensive unit tests covering 99% of code. Download figure Open in new tab Figure 1. SuSiE 2.0 software architecture and performance across sparse and complex genetic architectures. (A) SuSiE 2.0’s modular design unifies individual-level data, sufficient statistics, and summary statistics under a single algorithmic pipeline with two approaches for modeling moderate and weak effects: SuSiE-ash and SuSiE-inf . (B-E) Sparse genetic effects (K = 1, 2, 3, 4, 5 causal variants; n = 1,000, p = 5,000 variants, h 2 snp = 0.03; 150 replicates per K). 95% credible set (CS) power across varying total proportion of variance explained (PVE). 95% CS false discovery rate (FDR) with nominal 5% FDR threshold (dotted red line). (D) 5-fold cross-validated prediction accuracy (Pearson’s R 2 ). (E) Variant-level ROC curves pooled across all values of K and replicates with 5% false positive rate (FPR) threshold (dotted red line). (F-I) Oligogenic effects on polygenic background with total 23 causal variants, mimicking a complex yet realistic cis-eQTL scenario (K = 3 strong effects (50% total PVE), 5 moderate effects (35% total PVE),15 polygenic background (15% total PVE); n = 1,000, p = 5,000, total PVE h 2 g = 0.25; 150 replicates). (F) 95% CS power when considering the N strongest simulated effects as causal variants. (G) 95% CS FDR across top N causal variant thresholds. (H) 5-fold cross-validated prediction accuracy (Pearson’s R 2 ). (I) Variant-level ROC curve with 5% FPR (dotted red line) using top 8 variants by effect size magnitude as causal (to cover the simulated strong and moderate effects). To assess performance, we developed simxQTL , an R package implementing diverse genetic architectures for benchmarking ( Methods ). We evaluated SuSiE, SuSiE-ash, and SuSiE-inf using power and false discovery rate (FDR) at 95% credible set (CS) coverage, ROC curves at variant level, and phenotype prediction accuracy as a proxy for TWAS model performance. Under sparse settings with k = 1–5 causal variants (n=1,000, p=5,000), all methods appear reasonably calibrated ( Figure 1C , Figure S2 ), with SuSiE achieved the highest power followed closely by SuSiE-ash, while SuSiE-inf was considerably more conservative ( Figure 1B ). At k = 5, SuSiE-inf maintained the lowest FDR whereas SuSiE-ash offered a more favorable power-FDR tradeoff. Prediction accuracy was nearly identical across methods, with SuSiE and SuSiE-ash slightly outperforming SuSiE-inf ( Figure 1D ). For variant-level evaluation, SuSiE-ash achieved the best ROC performance at low false positive rates (FPR), followed by SuSiE, with SuSiE-inf substantially lower. Under an oligogenic setting more representative of eQTL architecture (3 strong, 5 moderate effect variants and 15 polygenic background effects; Supplementary Notes S4 ), SuSiE-ash maintained power nearly identical to SuSiE while achieving substantially lower FDR; SuSiE-inf remained the most conservative ( Figure 1F–G , Figure S3 ). Prediction accuracy was comparable across methods, with SuSiE-ash showing a slight advantage ( Figure 1H ). For variant-level ROC performance at low FPR, SuSiE-ash and SuSiE performed similarly, with SuSiE-inf trailing behind ( Figure 1I ). Under settings with stronger infinitesimal backgrounds ( Figure S4 – 5 ), SuSiE-inf, while still conservative in power, achieved the best FDR control and improved ROC performance approaching the other methods, and achieved the best prediction accuracy, though closely followed by SuSiE-ash. SuSiE’s elevated FDR under polygenic architectures can arise from synthetic associations, where non-causal variants accumulate spurious signals through LD with multiple true effect variants. SuSiE interprets these synthetic signals as distinct true effects ( Figure S7 ). SuSiE-ash mitigates this by modeling the polygenic background with adaptive shrinkage, attributing this diffuse signal to residual variance rather than credible sets, improving its sensitivity to sparse effects. We also implemented and evaluated other proposed extensions for improving credible set coverage, including attainable coverage (SparsePro [ 19 ]) and Bayesian Linear Programming [ 23 ]. Attainable coverage showed limited benefit in our benchmarks ( Figure S8 ) but is included in SuSiE 2.0 as a convenient alternative for constructing credible sets at different coverage levels post-analysis when LD matrices are not readily available to implement the purity filter. Bayesian Linear Programming provided no improvement and is not included in SuSiE 2.0 ( Supplementary Notes S5 ). While SuSiE-ash was motivated by eQTL fine-mapping where moderate polygenic backgrounds are common, different applications may warrant different approaches. For exploratory genome-wide analysis prioritizing sensitivity, standard SuSiE provides the most signal and can identify candidates for follow-up with SuSiE-ash or SuSiE-inf. For targeted candidate regions, running all three methods helps ensure robustness. Other molecular QTLs and GWAS may exhibit distinct genetic architectures, and method choice ultimately depends on the application and tolerance for false discoveries. Conclusions We present SuSiE 2.0, a modular reimplementation of SuSiE that improves extensibility, performance, and calibration for genetic fine-mapping and TWAS prediction. SuSiE-ash addresses elevated FDR under complex genetic architectures by modeling moderate to weak effects through adaptive shrinkage, achieving improved calibration without sacrificing power. The four-stage architecture readily accommodates future extensions such as generalized linear models or integration into a generalized IBSS framework, and developers can build directly on the SuSiE 2.0 codebase to ensure compatibility with existing workflows. The software is available as an R package with comprehensive documentation and unit tests. METHODS Overview of SuSiE-ash Model We model the phenotype as sparse effects targeted for fine-mapping and a background of unmappable moderate to weak effects, aiming to improve power and reduce false discovery by capturing variation unexplained by the sparse component: where vector y is mean-centered and X is the standardized n × p genotype matrix. The sparse component β is represented using the Sum of Single Effects model ( SuSiE ) [ 1 ], where each γ ℓ = ( γ ℓ1 ,…, γ ℓ p ) is a one-hot indicator putative causal variant in the ℓ-th mappable effect, and ( Eqs. S2 – S5 in Supplementary Notes S1 ). The remaining moderate genetic effects, scaled by σ 2 , are modeled using an adaptive-shrinkage mixture-of-normals prior, with a fixed variance grid and mixture weights π . Together, the sparse and unmappable components induce a marginal precision structure of the form where τ 2 = var accounts for variations that SuSiE (essentially setting τ 2 = 0) cannot fine-map (See Supplementary Notes S1 for the full description). Posterior inference proceeds via coordinate-ascent variational inference (VI). Under a mean-field approximation, the evidence lower bound (ELBO) ( Eqs. S8 – S9 ) decomposes into tractable subproblems corresponding to single-effect regression (SER) updates for β and normal-means (NM) updates for θ ( Eqs. S13 – S14 ), as outlined below. Updating β Different from SuSiE, SuSiE-ash updates each single-effect component under a marginal likelihood that incorporates the precision matrix Ω ( Eq. 4 ) following the same formulation used in SuSiE-inf . Conditioning on the current variance components σ 2 and τ 2 , the ℓ-th effect is updated using the leave-one-effect-out residual together with a marginal likelihood involving Ω, ensuring that the SER update is performed under the correct effective noise structure. Given this likelihood, SuSiE-ash computes the posterior inclusion probabilities α ℓ j , posterior means m ℓ j , and variances for each single-effect component. This Ω-adjusted update mitigates PIP miscalibration under non-sparse architectures and improves the recovery of strong causal signals ( Eqs. S13 – S14 ). Updating θ and π Conditioned on the current sparse component, we use a data-driven approach to initialize variance grid ( Supplementary Notes S3 ) and update each θ j using normal-means posterior computations from Mr . ASH [ 22 ], yielding posterior mixture weights φ 1 jk , shrinkage-adjusted means µ 1 jk , and variances ( Eqs. S19 – S21 ). Mixture proportions are updated as which yields the updated precision matrix Ω. The complete SuSiE-ash procedure is summarized in Algorithm 1 , with further details in Supplementary Notes S1–S3 . Algorithm 1 Iterative Bayesian Stepwise Selection (IBSS) algorithm for SuSiE-ash Download figure Open in new tab Overview of Simulation Study Design We evaluated SuSiE, SuSiE-ash , and SuSiE-inf using genotype data from UK Biobank. Under sparse settings, we varied the number of causal variants (k=1–5) with fixed per-SNP heritability. Under complex genetic architectures mimicking realistic eQTL settings, we partitioned genetic effects into three components: sparse (3 variants with large effects), oligogenic (5–10 variants with moderate effects), and polygenic background (15 variants with small effects). We also evaluated settings with an infinitesimal background, where instead of polygenic background with limited variants, all remaining variants collectively contribute a small portion of heritability. Total heritability was fixed at h 2 = 0.25 across scenarios. To facilitate reproducible benchmarking, we implemented these simulation designs in simxQTL , an R package providing standardized genetic architectures for systematic evaluation of gene-mapping methods ( https://github.com/StatFunGen/simxQTL ). See Supplementary Notes S4 for full simulation details. Code Availability SuSiE 2.0 is available in the susieR package ( https://github.com/stephenslab/susieR ). Simulation functions are provided in the simxQTL package ( https://github.com/StatFunGen/simxQTL ). Real-data analysis scripts and TWAS weight functions are available at https://github.com/StatFunGen/xqtl-protocol and https://github.com/StatFunGen/pecotmr , respectively. Scripts to reproduce all analyses are available at https://github.com/alexmccreight/susieR2.0-paper . Author Contributions GW conceived and designed the experiments. GW, WD and MS jointly supervised the research. YC, AM developed the SuSiE-ash model, and AM developed SuSiE 2.0 package with input from GW. AM implemented the numerical experiments. RL and AM performed data applications. PC, HG and DN contributed to improvements on the SuSiE 2.0 package. AM, YC and GW wrote the manuscript. Competing Interests The authors declare no competing interests. Supplementary Notes S.1 SuSiE-ash Model and Assumptions SuSiE-ash is a new Bayesian variable selection regression approach that improves fine-mapping by combining SuSiE [ 1 ] for sparse variable selection and Mr.ASH [ 2 ] for adapative shrinkage estimation of unmappable effects. The key idea is to iteratively update the strong, sparse effect β using SuSiE marginalizing over θ and ϵ , followed by updating the unmappable effects (oligogenic and polygenic) θ using Mr.ASH based on the residual after removing the updated sparse effects. SuSiE-ash is based upon the following model: where y is a centered n× 1 vector of phenotype, X = [ x 1 ,…, x p ] is a standardized n×p matrix of genotypes for p genetic variants in a genomic region of interest, with x j being the j -th column of X , the p -vectors β and θ represent strong spare effect and oligogenic/polygenic effects, respectively, which are independent of each other, and ϵ ~ N ( 0 , σ 2 I ). Here, we construct β by SuSiE to model the strong sparse component [ 1 ]. We assume that precisely L variants have a non-zero effect on the outcome: where β (ℓ) denotes the ℓ-th single-effect vector, is a p -vector indicating the location of the causal SNP in the ℓ-th single effect, with p = ( p 1 ,…, p p ) ′ representing the prior weight that sum to 1, and b ℓ is a scalar representing the causal effect size in the ℓ-th single effect. Note that the single-effect regression (SER) model is a special case of the above-specified model when L = 1. Then we construct θ using an adaptive shrinkage prior for the scaled coefficient ([ 2 ], [ 3 ]) to model the remaining unmappable effects: where π = ( π 1 , … π K ) ′ represents the mixture proportions (non-negative and sum to one), and are a non-negative, increasing, pre-specified grid of component variances such that with set to 0. Remark 1 (Background variance modeling) Conceptually, SuSiE-ash separates sparse effect localization from background variance modeling. Following SuSiE-inf , the background component θ is modeled at the variant level but enters the likelihood only through its marginal variance τ 2 = Var( θ j ), inducing the precision structure Ω = ( σ 2 I + τ 2 XX ⊤ ) −1 . This formulation allows diffuse polygenic effects to be absorbed as structured background variation while preserving a clear inferential focus on sparse effects. At the same time, by modeling θ using a flexible shrinkage prior, SuSiE-ash enables more accurate estimation of the residual variance σ 2 and background effects, yielding a principled compromise between the overly optimistic behavior of SuSiE and the overly conservative behavior of SuSiE-inf . One may consider forming the likelihood using variant-specific background variances obtained from fitting θ via adaptive shrinkage, yielding a precision matrix of the form ( σ 2 I + XDX ⊤ ) −1 , where and denotes the posterior variance of the background effect at variant j . However, such a formulation blurs the distinction between sparse and background effects and leads to unstable iterative updates: large values may reflect either unmappable background signal or moderate effects better attributed to the sparse component, while near-zero values lead to numerically unstable inversion of the precision matrix. Moreover, because D is repeatedly updated, such formulations preclude reuse of a fixed spectral decomposition of XX ⊤ , requiring repeated large-scale matrix inversions or decompositions and incurring prohibitive computational cost in high-dimensional settings. In contrast, a global background variance stabilizes the separation between sparse effect localization and background variation, with θ estimated for downstream analyses rather than fine-mapping. S.2 Variational Inference Framework for SuSiE-ash Note that both SuSiE and Mr.ASH adopt the variational approximation (VA) method [ 4 ] to approximate the posterior distribution under their respective models. By assuming a fully factorized variational approximation, they simplify the optimization of the evidence lower bound (ELBO) over joint prior variables, making it tractable. This tractability is achieved by employing the coordinate ascent algorithm [ 5 ], which converts the complex joint optimization problem into a series of simpler tasks. In SuSiE , this coordinate-ascent procedure is implemented through the Iterative Bayesian Stepwise Selection (IBSS) algorithm, in which each single-effect component is updated by fitting a single-effect regression (SER) model to partial residuals. Likewise, Mr.ASH performs analogous coordinate-wise updates for each coefficient under the normal-mean mixture model (NM). In SuSiE-ash , we similarly assume that the approximation of the joint posterior q for β and θ is factorized as: Our proposed Algorithm is iteratively optimizing variational approximations q β ( β ) and q θ ( θ ) by maximizing the following ELBOs, respectively, under SuSiE-ash in Eq. (S1): and where and denote the vectors of single-effect and adaptive shrinkage variances, respectively. We define the precision matrix Ω = ( τ 2 XX ⊤ + σ 2 I ) −1 , with being the total prior variance of θ . The functions g β and g θ denote the prior distributions of β and θ , respectively. Finally, , is the residual after removing the current posterior mean of β , . SuSiE-ash is fitted using a generalized iterative Bayesian Stepwise-selection (GIBSS) as outlined in Algorithm 1 . Implementation within an iteration loop is outlined below. Update the strong mappable effect β Given variance components σ 2 and τ 2 , we update each by fitting the following single-effect regression (SER) model: where . The posterior distribution for β ℓ = b ℓ γ ℓ under the SER model ( Eq. S10 ) is: where is the vector of the posterior inclusion probabilities (PIPs). Note that the posterior distribution for the ℓ-th single effect can be obtained by maximizing the following simpler ELBO: where c ℓ is the sum of terms that are not dependent on q ℓ . Then, by taking partial derivatives of with respect to parameters α ℓ , , and , the explicit formulas for this update are expressed as: And For brevity, we introduce the following function that returns arguments of the posterior distribution of β ℓ in Algorithm1 : Update the unmappable effects θ and the mixture proportion π in the shrinkage prior To update θ and π , one could theoretically work with the marginal distribution over β . However, this approach is computationally demanding, particularly for large datasets. As a practical alternative, we leverage the existing Mr.ASH model implementation in the R package mr.ash in a modular fashion incorporated into GIBSS. This update involves fitting the Mr.ASH model to the residuals after removing the updated sparse effect, , from y , denoted as Here, each is computed as . In a similar manner to SuSiE, Mr.ASH employs a coordinate-ascent mean field variational inference: (1) the variational distribution of θ is factorized as the coordinate ascent update for each θ j , j = 1,…, p , is given by computing a posterior distribution under the following normal mean model: where . Then, by [ 2 ], the posterior distribution for θ j , denoted as under the normal mean model is given by: Where with , and . For future reference, we define the function NM post , which returns the estimated parameters for the posterior distribution of θ j under the normal mean model: where µ 1 j = ( µ 1 j 1 ,…, µ 1 jK ) ′ , , and . We can also obtain update the mixture proportion π : where . Remark 2 Note that in SuSiE-ash implementation, we used Bayes factor instead of likelihood in (S23) because the two are the same up to a constant term, and Bayes factor aligns with the Mr.ASH implementation, providing a numerically stable expression for evaluating the mixture components: Where S.3 Implementation Details: Shrinkage Grid Construction and Variance Updates We first calculate provisional variance components, denoted as , using a method-of-moments (MoM) estimator following SuSiE-inf . These values are obtained by plugging the posterior moments of q β , the variational posterior means and second moments of the single-effect coefficients, into the MoM equations. Specifically, ( ) are computed by solving: Here, denotes the Euclidean norm. At iteration i , and the residual variance can be updated as: where and with being . Data-driven variance grid for the Adaptive Shrinkage prior To construct the variance grid for the adaptive shrinkage prior, we divide the range of plausible effect-size variances into three log-spaced regions centered around the moment-based estimate of the polygenic variance, . We then generate (i) a dense set of very small to near zero variances to to capture very weak effects, (ii) a moderately dense region around ( to ) to model average to moderate unmappable effects, and (iii) a coarse grid from up to the minimum sparse-effect variance estimated by SuSiE , , to allow for additional larger effects not captured by SuSiE ; notice that this is also the upper-bound of the grids. These grids are then combined with a point mass at zero to form the final mixture variance grid used in the Mr.ASH prior for each iteration. S.4 Simulation Study Details Sparse effects simulation design We conducted extensive simulations to evaluate the performance of SuSiE, SuSiE-ash , and SuSiE-inf under a sparse genetic architecture, with slight modifications to the original SuSiE paper’s simulation design. These simulations used genotype data from UK Biobank where we randomly sampled 150 LD blocks across chromosomes 1–22 and derived the corresponding genotype matrices to serve as the basis for our simulation. Each genotype matrix, X ∈ ℝ n×p , has n = 1, 000 and p = 5, 000. Additionally, we required a minor allele frequency greater than 1%, and a missing rate below 5% (using mean imputation for missing data). We generated gene expression levels, y ∈ ℝ n , based upon the following linear model: where β ∈ ℝ p denotes the effect size vector and ϵ ∈ ℝ n denotes the noise vector, such that ϵ ~ 𝒩 (0, σ 2 I n ) where I n is the n× n identity matrix. The effect size vector, β , is constructed by first sampling a set of causal variant indices, 𝒮, uniformly at random from {1, …, p }. For each causal variant, j ∈ S , we set β j = 1, and for all non-causal variants, j ∈/ 𝒮, we set β j = 0. Finally, to achieve the desired heritability-level, h 2 , we set the residual variance, σ 2 , to solve the following equation: For each simulated effect, we fixed the per-snp heritability . We then generated simulated data sets varying the number of causal variants k = {1, 2, 3, 4, 5} resulting in h 2 = {0.03, 0.06, 0.09, 0.12}, respectively. For each of the five scenarios, we generated 150 data sets as replicates. Complex genetic architecture simulation design We also evaluated SuSiE, SuSiE-ash , and SuSiE-inf under multiple complex genetic architectures to mimic a realistic eQTL architecture [ 6 ]. In these settings, gene expression is influenced both by a core set of variants that exert medium to large effects and by a polygenic background in which i) a number of variants (≈15) each contribute a small amount of phenotypic variants, or ii) all remaining variants each contribute a non-zero amount of variation. We used the same preprocessed genotype matrices from the UK Biobank data described in the sparse simulation design section. We generated gene expression levels, y ∈ ℝ n , using a linear model that partitions genetic effects into three distinct components: sparse, oligogenic, and polygenic. The sparse component, 𝒮, includes k = 3 variants which exhibit relatively large effects. These effects are drawn from a normal distribution, , and then scaled to achieve the target heritability . The oligogenic component, 𝒪, has a small number of variants (5–10) that contribute moderate effects. These effects are modeled using a two-component Gaussian mixture distribution to allow for added variability in their magnitudes: for j ∈ 𝒪, where z j ∈ {1, 2} denotes the mixture component assignment, and are the component-specific variances with and π 1 + π 2 = 1. The oligogenic effects are then scaled to achieve the target heritability . The polygenic component, 𝒫, models the contribution of remaining causal variants. Effects for this component are drawn from a normal distribution with small variance, for j ∈ 𝒫, and scaled to achieve the target heritability . Variants not assigned to any component have effects set to zero. Collectively, these three mutually exclusive sets form a partition of the complete set of variants, i.e., 𝒮 ∪ 𝒪 ∪ 𝒫 = {1, …, p }. These components comprise the effect vector β ∈ ℝ p to create the follow 𝒪ing linear model: where X ∈ ℝ n×p is the standardized genotype matrix and ϵ ∈ ℝ n is a noise vector with where I n is the n × n identity matrix. The total heritability, is partitioned among these components such that with each h 2 value corresponding to the variance explained by the respective genetic component. We scaled all effects to achieve their desired heritability proportions. Finally, the residual variance, σ 2 , is chosen to ensure that the total variance of y reflects the desired total heritability. In our benchmark, we evaluated three scenarios with varying genetic architectures, all with total heritability and 150 replicates. The first scenario, oligogenic effects on a polygenic background ( Figure 1F–1I ), included 3 sparse effects (50% of ), 5 oligogenic effects (35% of ), and 15 polygenic effects (15% of ). The second scenario, oligogenic effects on a moderate infinitesimal background ( Figure S2 A–D ), maintained the same heritability proportions (50%, 35%, 15%) and variant counts for sparse and oligogenic components, but assigned the remaining heritability to all remaining variants. The third scenario, oligogenic effects on an extensive infinitesimal background ( Figure S2 E–H ), shifted the architecture toward a stronger infinitesimal contribution: 3 sparse effects (50% of ), 10 oligogenic effects (15% of ), and all remaining variants (35% of ). S.5 Evaluation of Alternative Credible Set Coverage Methods We evaluated two additional proposed approaches for improving credible set coverage: attainable coverage [ 7 ] and Bayesian Linear Programming (BLiP) [ 8 ]. Attainable coverage showed limited benefit in our benchmarks ( Figure S8 ), and is included in SuSiE 2.0 as a convenient alternative for constructing credible sets at different coverage levels post-analysis when LD matrices are not readily available to implement the purity filter. We note that the default entropy threshold parameter (ethresh = 20) performed poorly in both our sparse and complex benchmarks. We suspect this default was likely tuned for sparser marker sets, whereas our simulations use dense markers (p = 5,000). We therefore changed the default in SuSiE 2.0 to max(100, 0.10p), where p is the number of variables. BLiP provided no improvement over the default SuSiE implementation and is not incorporated in SuSiE 2.0 . SUPPLEMENTARY FIGURES Download figure Open in new tab Figure S1. Runtime comparison of SuSiE-RSS with regularized LD and refinement between susieR 1.0 vs susieR 2.0. (A-B) Sparse genetic architecture simulation (K = 5 causal variants; n = 1,000, p = 500, 1,000, 2,500 variants, h 2 snp = 0.03; 150 replicates per p). (A) Performance of RSS with regularized LD. (B) Performance of refining model fit. Download figure Open in new tab Figure S2. Sparse effects PIP calibration. Expected PIPs vs observed frequencies of true causal variants (diagonal line represents perfect calibration). Download figure Open in new tab Figure S3. ROC curves for oligogenic effects on a polygenic background using top N = 3 to 22 as causal , matching the “top N as causal” approach to assess FDR and power of 95% CS in Figure 1F and 1G . Download figure Open in new tab Figure S4. SuSiE 2.0 performance across complex genetic architectures with infinitesimal backgrounds. (A-D) Oligogenic effects on a moderate infinitesimal background (K = 3 sparse effects (50% total PVE), 5 oligogenic effects (35% total PVE), remaining variants (15% total PVE); n = 1,000, p = 5,000, total PVE h 2 g = 0.25; 150 replicates) (A) 95% CS power across top N variant thresholds. (B) 95% CS FDR across top N causal variant thresholds. (C) 5-fold cross-validated prediction accuracy (Pearson’s R 2 ). (D) Variant-level ROC curve with 5% FPR (dotted red line) using top 8 variants as causal (to cover simulated strong and moderate effects). (E-H) Oligogenic effects on an extensive infinitesimal background (K = 3 sparse effects (50% total PVE), 10 oligogenic effects (15% total PVE), remaining variants (35% total PVE); n = 1,000, p = 5,000, total PVE h 2 g = 0.25; 150 replicates). (E) 95% CS power across top N variant thresholds. (F) 95% CS FDR across top N causal variant thresholds. (G) 5-fold cross-validated prediction accuracy (Pearson’s R 2 ). (H) Variant-level ROC curve with 5% FPR (dotted red line) using top 13 variants as causal (to cover simulated strong and moderate effects). Download figure Open in new tab Figure S5. Proportion of replicates where each method achieves the best cross-validated prediction accuracy (Pearson’s R 2 ) across sparse and complex genetic architectures. (A) Sparse effects. (B) Oligogenic effects on polygenic background. (C) Oligogenic effects on moderate infinitesimal background. (D) Oligogenic effects on extensive infinitesimal background. Download figure Open in new tab Figure S6. Credible set size and median purity across sparse and complex genetic architectures. (A) Sparse effect CS size. (B) Oligogenic effects on a polygenic background CS size. (C) Oligogenic effects on a moderate infinitesimal background CS size. (D) Oligogenic effects on an extensive infinitesimal background CS size. (E) Median purity across different genetic effect architectures. Download figure Open in new tab Figure S7. Fine-mapping performance in the presence of correlated polygenic signals. (A-H) Oligogenic effects on polygenic background with total 23 causal variants, mimicking a complex yet realistic cis-eQTL scenario (K = 3 strong effects (50% total PVE), 5 moderate effects (35% total PVE),15 polygenic background (15% total PVE); n = 1,000, p = 5,000, total PVE h 2 g = 0.25. (A) Absolute simulated effect sizes with causal variants highlighted in red. (B) Marginal association strength (-log 10 p-values). (C-E) Posterior inclusion probabilities (PIPs) for SuSiE, SuSiE-inf, and SuSiE-ash, respectively; colored rings indicate credible set membership, causal variants highlighted in red. (F-H) Credible sets overlaid on absolute effect sizes for SuSiE, SuSiE-inf, and SuSiE-ash, respectively; colored points indicated variants captured in credible sets. Download figure Open in new tab Figure S8. Performance comparison of purity-based and attainable coverage credible sets in sparse fine-mapping scenarios. (A-D) Sparse genetic effects (K = 1, 2, 3 causal variants; n = 1,000, p = 5,000 variants, h 2 snp = 0.03; 150 replicates per K). (A) 95% CS power across varying total PVE. (B) 95% CS FDR with nominal 5% FDR threshold (dotted red line). (C) Purity (minimum absolute correlation among variants within each CS) distribution pooled across K, showing many attainable coverage CS are very low in purity. (D) Credible set size distribution pooled across K. Acknowledgements We thank Angela Helfrich and Mark Bronnimann from Amazon Web Services for providing cloud computing support for real-world data analysis. This work was supported in part by NIH grants R01HG002585 and R35GM153249 (to M.S., P.C.), NIH grants R01AG076901 (to G.W., R.L.), R01AG086467 (to A.M., Y.C.), and a grant from the Urbut Family Foundation (to G.W.). This project is supported by the Eric and Wendy Schmidt AI in Science Postdoctoral Fellowship, a Schmidt Sciences, LLC program. This research was conducted using data from the Religious Orders Study and the Rush Memory and Aging Project (ROSMAP). We thank the participants and investigators of these studies. Funder Information Declared NIH Common Fund, https://ror.org/001d55x84 Urbut Family Foundation Footnotes - Revisions to method section - Addition of simulation description to method section - Edits to alternative credible set formation methods - Explanation why SuSiE has elevated FDR under polygenic architecture - Two additional supplemental figures https://github.com/stephenslab/susieR https://github.com/StatFunGen/simxQTL https://github.com/StatFunGen/xqtl-protocol https://github.com/StatFunGen/pecotmr https://github.com/alexmccreight/susieR2.0-paper References [1]. ↵ Wang G , Sarkar A , Carbonetto P , Stephens M. A Simple New Approach to Variable Selection in Regression, with Application to Genetic Fine Mapping . Journal of the Royal Statistical Society Series B: Statistical Methodology . 2020 Dec 1 ; 82 ( 5 ): 1273 – 300 . OpenUrl CrossRef PubMed [2]. ↵ Cui R , Elzur RA , Kanai M , Ulirsch JC , Weissbrod O , Daly MJ , et al. Improving fine-mapping by modeling infinitesimal effects . Nature Genetics . 2024 Jan 1 ; 56 ( 1 ): 162 – 9 . OpenUrl CrossRef PubMed [3]. Gao B , Zhou X. MESuSiE enables scalable and powerful multi-ancestry fine-mapping of causal variants in genome-wide association studies . Nature Genetics . 2024 Jan 1 ; 56 ( 1 ): 170 – 9 . OpenUrl CrossRef PubMed [4]. Lu Z , Wang X , Carr M , Kim A , Gazal S , Mohammadi P , et al. Improved multiancestry fine-mapping identifies cis-regulatory variants underlying molecular traits and disease risk . Nature Genetics . 2025 Aug 1 ; 57 ( 8 ): 1881 – 9 . OpenUrl CrossRef PubMed [5]. Zhao S , Crouse W , Qian S , Luo K , Stephens M , He X. Adjusting for genetic confounders in transcriptome-wide association studies improves discovery of risk genes of complex traits . Nature Genetics . 2024 Feb 1 ; 56 ( 2 ): 336 – 47 . OpenUrl CrossRef PubMed [6]. ↵ Yuan K , Longchamps RJ , Pardiñas AF , Yu M , Chen TT , Lin SC , et al. Fine-mapping across diverse ancestries drives the discovery of putative causal variants underlying human complex traits and diseases . Nature Genetics . 2024 Sept 1 ; 56 ( 9 ): 1841 – 50 . OpenUrl CrossRef PubMed [7]. ↵ Denault WRP , Carbonetto P , Li R , The Alzheimer’s Disease Functional Genomics Consortium , Wang G , Stephens M. Accounting for uncertainty in residual variances improves calibration for fine-mapping with small sample sizes . bioRxiv . 2025 Jan 1 ;2025.05.16.654543. [8]. Zou Y , Carbonetto P , Xie D , Wang G , Stephens M. Fast and flexible joint fine-mapping of multiple traits via the Sum of Single Effects model . bioRxiv . 2025 Jan 1 ;2023.04.14.536893. [9]. Denault WRP , Sun H , Carbonetto P , Liu A , De Jager PL , Bennett D , et al. fSuSiE enables fine-mapping of QTLs from genome-scale molecular profiles . bioRxiv . 2025 Jan 1 ;2025.08.17.670732. [10]. Rossen J , Shi H , Strober BJ , Zhang MJ , Kanai M , McCaw ZR , et al. MultiSuSiE improves multi-ancestry fine-mapping in All of Us whole-genome sequencing data . medRxiv . 2024 Jan 1 ;2024.05.13.24307291. [11]. Weissbrod O , Hormozdiari F , Benner C , Cui R , Ulirsch J , Gazal S , et al. Functionally informed fine-mapping and polygenic localization of complex trait heritability . Nature Genetics . 2020 Dec 1 ; 52 ( 12 ): 1355 – 63 . OpenUrl CrossRef PubMed [12]. Zhang X , Jiang W , Zhao H. Integration of expression QTLs with fine mapping via SuSiE . PLOS Genetics . 2024 Jan 25 ; 20 ( 1 ): e1010929 . OpenUrl [13]. Zou Y , Carbonetto P , Wang G , Stephens M. Fine-mapping from summary data with the “Sum of Single Effects” model . PLOS Genetics . 2022 July 19 ; 18 ( 7 ): e1010299 . OpenUrl CrossRef PubMed [14]. ↵ Strober BJ , Zhang MJ , Amariuta T , Rossen J , Price AL . Fine-mapping causal tissues and genes at disease-associated loci . Nature Genetics . 2025 Jan 1 ; 57 ( 1 ): 42 – 52 . OpenUrl CrossRef PubMed [15]. ↵ Rasooly D , Peloso GM , Giambartolomei C. Bayesian Genetic Colocalization Test of Two Traits Using coloc . Current Protocols . 2022 Dec 1 ; 2 ( 12 ): e627 . OpenUrl [16]. ↵ Sun BB , Chiou J , Traylor M , Benner C , Hsu YH , Richardson TG , et al. Plasma proteomic associations with genetics and health in the UK Biobank . Nature . 2023 Oct 1 ; 622 ( 7982 ): 329 – 38 . OpenUrl CrossRef PubMed [17]. ↵ Aguet F , Brown AA , Castel SE , Davis JR , He Y , Jo B , et al. Genetic effects on gene expression across human tissues . Nature . 2017 Oct 1 ; 550 ( 7675 ): 204 – 13 . OpenUrl CrossRef PubMed Web of Science [18]. ↵ Kurki MI , Karjalainen J , Palta P , Sipilä TP , Kristiansson K , Donner KM , et al. FinnGen provides genetic insights from a well-phenotyped isolated population . Nature . 2023 Jan 1 ; 613 ( 7944 ): 508 – 18 . OpenUrl CrossRef PubMed [19]. ↵ Zhang W , Najafabadi H , Li Y. SparsePro: An efficient fine-mapping method integrating summary statistics and functional annotations . PLOS Genetics . 2023 Dec 28 ; 19 ( 12 ): e1011104 . OpenUrl PubMed [20]. ↵ Servin B , Stephens M. Imputation-Based Analysis of Association Studies: Candidate Regions and Quantitative Traits . PLOS Genetics . 2007 July 27 ; 3 ( 7 ): e114 . OpenUrl PubMed [21]. ↵ Stephens M. False discovery rates: a new deal . Biostatistics . 2017 Apr 1 ; 18 ( 2 ): 275 – 94 . OpenUrl CrossRef PubMed [22]. ↵ Kim Y , Wang W , Carbonetto P , Stephens M. A flexible empirical Bayes approach to multiple linear regression and connections with penalized regression . Journal of Machine Learning Research . 2024 ; 25 ( 185 ): 1 – 59 . OpenUrl CrossRef PubMed [23]. ↵ Spector A , Janson L. Controlled Discovery and Localization of Signals via Bayesian Linear Programming . Journal of the American Statistical Association . 2025 Jan 2 ; 120 ( 549 ): 460 – 71 . OpenUrl References for Supplementary Notes [1]. ↵ G. Wang , A. Sarkar , P. Carbonetto , and M. Stephens . “ A simple new approach to variable selection in regression, with application to genetic fine mapping ”. In: Journal of the Royal Statistical Society Series B: Statistical Methodology 82 . 5 ( 2020 ), pp. 1273 – 1300 . OpenUrl [2]. ↵ Y. Kim , W. Wang , P. Carbonetto , and M. Stephens . “ A flexible empirical Bayes approach to multiple linear regression and connections with penalized regression ”. In: Journal of Machine Learning Research 25 . 185 ( 2024 ), pp. 1 – 59 . OpenUrl [3]. ↵ M. Stephens . “ False discovery rates: a new deal ”. In: Biostatistics 18 . 2 ( 2017 ), pp. 275 – 294 . OpenUrl [4]. ↵ D. M. Blei , A. Kucukelbir , and J. D. McAuliffe . “ Variational inference: A review for statisticians ”. In: Journal of the American statistical Association 112 . 518 ( 2017 ), pp. 859 – 877 . OpenUrl [5]. ↵ D. Bertsekas . Nonlinear Programming. Athena Scientific , 1999 . [6]. ↵ L. R. Lloyd-Jones , A. Holloway , A. McRae , et al. “ The Genetic Architecture of Gene Expression in Peripheral Blood ”. In: American Journal of Human Genetics 100 . 2 ( Feb . 2017 ), pp. 228 – 237 . OpenUrl [7]. ↵ W. Zhang , H. Najafabadi , and Y. Li . “ SparsePro: An efficient fine-mapping method integrating summary statistics and functional annotations ”. In: PLOS Genetics 19 . 12 ( Dec . 2023 ). Publisher: Public Library of Science, e1011104 . OpenUrl [8]. ↵ A. Spector and L. Janson . “ Controlled Discovery and Localization of Signals via Bayesian Linear Programming ”. In: Journal of the American Statistical Association 120 . 549 ( Jan . 2025 ). Publisher: ASA Website, pp. 460 – 471 . ISSN: 0162-1459 . doi: 10.1080/01621459.2024.2347667 . OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted December 25, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following SuSiE 2.0: improved methods and implementations for genetic fine-mapping and phenotype prediction Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share SuSiE 2.0: improved methods and implementations for genetic fine-mapping and phenotype prediction Alexander McCreight , Yanghyeon Cho , Ruixi Li , Daniel Nachun , Hao-Yu Gan , Peter Carbonetto , Matthew Stephens , William R.P. Denault , Gao Wang bioRxiv 2025.11.25.690514; doi: https://doi.org/10.1101/2025.11.25.690514 Share This Article: Copy Citation Tools SuSiE 2.0: improved methods and implementations for genetic fine-mapping and phenotype prediction Alexander McCreight , Yanghyeon Cho , Ruixi Li , Daniel Nachun , Hao-Yu Gan , Peter Carbonetto , Matthew Stephens , William R.P. Denault , Gao Wang bioRxiv 2025.11.25.690514; doi: https://doi.org/10.1101/2025.11.25.690514 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17690) Bioengineering (13892) Bioinformatics (41935) Biophysics (21451) Cancer Biology (18587) Cell Biology (25499) Clinical Trials (138) Developmental Biology (13377) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24318) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88601) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15152) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-06-15T06:18:04.506796+00:00