ProVADA: Generation of Subcellular Protein Variants via Ensemble-Guided Test-Time Steering

doi:10.1101/2025.07.11.664238

ProVADA: Generation of Subcellular Protein Variants via Ensemble-Guided Test-Time Steering

2025 · doi:10.1101/2025.07.11.664238

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 63,585 characters · extracted from preprint-html · click to expand

ProVADA: Generation of Subcellular Protein Variants via Ensemble-Guided Test-Time Steering | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results ProVADA: Generation of Subcellular Protein Variants via Ensemble-Guided Test-Time Steering View ORCID Profile Wenhui Sophia Lu , View ORCID Profile Xiaowei Zhang , View ORCID Profile Luis S. Mille-Fragoso , Haoyu Dai , View ORCID Profile Xiaojing J. Gao , View ORCID Profile Wing Hung Wong doi: https://doi.org/10.1101/2025.07.11.664238 Wenhui Sophia Lu 1 Department of Statistics, Stanford University , Stanford, CA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Wenhui Sophia Lu For correspondence: sophialu{at}stanford.edu zhangxw{at}stanford.edu xjgao{at}stanford.edu whwong{at}stanford.edu Xiaowei Zhang 2 Department of Bioengineering, Stanford University , Stanford, CA, USA 3 Sarafan ChEM-H, Stanford University , Stanford, CA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xiaowei Zhang For correspondence: sophialu{at}stanford.edu zhangxw{at}stanford.edu xjgao{at}stanford.edu whwong{at}stanford.edu Luis S. Mille-Fragoso 2 Department of Bioengineering, Stanford University , Stanford, CA, USA 3 Sarafan ChEM-H, Stanford University , Stanford, CA, USA 5 Stanford Bio-X, Stanford University , Stanford, CA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Luis S. Mille-Fragoso Haoyu Dai 4 Department of Chemical Engineering, Stanford University , Stanford, CA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Xiaojing J. Gao 4 Department of Chemical Engineering, Stanford University , Stanford, CA, USA 5 Stanford Bio-X, Stanford University , Stanford, CA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xiaojing J. Gao For correspondence: sophialu{at}stanford.edu zhangxw{at}stanford.edu xjgao{at}stanford.edu whwong{at}stanford.edu Wing Hung Wong 1 Department of Statistics, Stanford University , Stanford, CA, USA 6 Department of Biomedical Data Science, Stanford University , Stanford, CA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Wing Hung Wong For correspondence: sophialu{at}stanford.edu zhangxw{at}stanford.edu xjgao{at}stanford.edu whwong{at}stanford.edu Abstract Full Text Info/History Metrics Preview PDF Abstract Engineering protein variants to function in exogenous environments remains a significant challenge due to the complexity of sequence and fitness landscapes. Experimental strategies often require extensive labor and domain expertise. While recent advances in protein generative modeling offer a promising in silico alternative, many of these methods rely on differentiable fitness predictors, which limits their applicability. To this end, we introduce Protein Variant ADAptation (ProVADA), an ensemble-guided, test-time steering framework that combines implicit generative priors with fitness oracles via a composite functional objective. ProVADA leverages Mixture-Adaptation Directed Annealing (MADA), a novel sampler integrating population-annealing, adaptive mixture proposals, and directed local mutations. Furthermore, ProVADA requires no gradients or explicit likelihoods, yet efficiently concentrates sampling on high-fitness, low-divergence variants. We demonstrate its effectiveness by in silico redesigning human renin for cytosolic functionality. Our results achieve significant gains in predicted localization fitness while preserving structural integrity. 1: Introduction Protein engineering—the search for sequence variants that exhibit desired functional properties—is a foundational technology in biological engineering. However, purely experimental approaches remain challenging mostly due to the combinatorial 20 L (where L : sequence length) sequence space to be explored.The challenge is further compounded by the complexity of the underlying fitness landscape, a subset of the sequence space where the protein exerts desired functionality. Consequently, experimental approaches often require extensive domain expertise and large-scale, iterative rounds of mutagenesis and screening, both of which are labor-intensive and costly. One particularly challenging instance is engineering protein variants to function in radically different environments,e.g. from extracellular to cytoplasm. This problem is pressing for two reasons. First, protein activity is highly context-dependent: subcellular compartments differ markedly in pH, redox potential, ionic strength, and other physicochemical parameters, all of which can compromise fold stability and catalytic function in a non-native environment ( 4 ). Second, many biotechnological and therapeutic applications demand proteins to operate reliably outside their endogenous environment, yet such repurposing frequently leads to impaired functionalities ( 6 ). Recent advances in machine learning demonstrate great potential in overcoming those bottlenecks. Protein language models trained on millions of sequences capture evolutionary constraints and can propose viable mutations ( 25 ; 27 ). On the other hand, diffusion-based generative models provide an alternative paradigm for efficiently sampling plausible variants. Finally, state-of-the-art, accurate structure prediction methods and downstream inverse-folding networks enable conditional sequence design that preserves a target fold ( 24 ; 1 ; 11 ). By proposing promising candidates, these in silico and hybrid approaches dramatically reduce the search space and, in turn, accelerate experimental protein engineering campaigns ( 19 ; 21 ; 30 ). While pretrained priors capture certain fitness attributes like stability, they offer limited guidance for more complex, higher-order fitness objectives—such as environment- or localization-specific functionality—resulting in inefficient optimization. To this end, we present Pro tein V ariant ADA ptation (ProVADA), an ensemble-guided, test-time steering framework for engineering functional proteins targeted to specific subcellular locations. ProVADA employs a novel Mixture-adaptation-directed Annealing sampling algorithm to efficiently explore the vast sequence space under multiple expert guidance. 1.1. Contributions We summarize our key contributions and the advantages of ProVADA from four perspectives: Likelihood-free, implicit generative priors . ProVADA is compatible with any base generative model, including autoregressive (e.g., ProteinMPNN), transformer-based (e.g., ESM2), or diffusion models, without requiring explicit density or likelihood evaluation. Test-time, ensemble-guided sampling . By optimizing a composite functional objective, ProVADA effectively leverages an “ensemble of expert models” to direct sequence generation in a fully gradient-free manner. Mixture-Adaptation Directed Annealing (MADA) . We develop a novel sampling framework that integrates population annealing and directed local mutations to efficiently explore complex fitness landscapes. Application to the in silico redesign of cytosolic renin . We apply ProVADA to the difficult task of engineering the catalytic domain of human renin for cytosolic functionality. Our results demonstrate significant improvements in predicted localization fitness while preserving structural integrity compared to rejection-sampling baselines. 1.2. Related literature Classifier-guided and plug-and-play methods steer sequence generation by backpropagating through a differentiable surrogate objective ( 14 ; 10 ; 2 ). By applying gradients of this proxy reward during generation, these methods bias a pretrained generative model towards desired properties. However, these approaches rely on differentiable surrogate models and thus cannot accommodate non-differentiable or black-box scoring functions. On the other hand, fine-tuning and preference-learning methods, such as classifier-free guidance ( 20 ) and reinforcement-learning-based fine-tuning, adapt model parameters to optimize downstream reward objectives. While effective, these methods typically require substantial computational resources and large amounts of labeled datasets. Additionally, they can suffer from catastrophic forgetting, where the model loses previously acquired knowledge from pertaining. Sampling-based test-time steering methods instead generate candidate sequences from a base model and evaluate them with external scoring functions. Simple strategies merely filter top-scoring samples, whereas more informed approaches use ProteinMPNN to fill masked positions before applying a selection step ( 30 ). Though easy to implement, these methods often struggle when exploring high-dimensional sequence spaces with complex fitness landscapes, especially when the base model’s distribution poorly aligns with regions of high fitness. 2: ProVADA: Test-Time Steering Ensemble-Guided Protein Variant Adaptation Problem setup Let the protein sequence length be L , and define the discrete sequence space 𝒳= {1, …, 20} L whose elements encode all possible amino-acid strings of length L . We start from a given wild-type reference sequence x 0 ∈ 𝒳 To generate candidate variants, we assume access to a generative model (e.g. ProteinMPNN) that induces an implicit prior p ϕ ( x ) over 𝒳 from which we can efficiently draw samples, even if it may lack an explicit,tractable density form. Additionally, let F : 𝒳 → ℝ be a potentially black-box, gradient-free fitness oracle, where larger values correspond to superior fitness. Our objective is to discover variants x of x 0 that both conform to this prior and yield high scores under the fitness function. At a high level, ProVADA consists of two key components. First, we train a classifier that predicts the target localization on a dataset with labeled sequence-fitness pairs. Second, given a reliable fitness function F ( x ) and an initial wild-type sequence x 0 , we wish to efficiently generate protein variants of x 0 from our generative prior that also achieve high fitness. Specifically, we construct a target sampling distribution proportional to the generative prior exponentially tilted by the tempered fitness score: where D (·,·) measures sequence-level divergence (e.g. Hamming distance), λ > 0 is a tunable penalty coefficient, and τ is a temperature parameter that governs how sharply sampling concentrates on high-fitness subspace. As τ decreases, the sampler becomes more concentrated on the top-scoring sequences, whereas higher τ yields broader exploration. By optimizing a composite functional objective, ProVADA effectively leverages the strength of an ensemble of expert models . The implicit generative prior p ϕ captures broad, low-level constraints—structural integrity, foldability, solubility—while each supervised fitness predictor F ( x ) specializes in a particular design objective (e.g., localization, enzymatic activity, binding affinity). By annealing our sampling distribution over the product of the prior and a tempered, aggregated fitness term, ProVADA concentrates on variants that satisfy the foundational constraints and simultaneously score highly under each expert’s guidance. This ensemble strategy yields high-confidence, multifunctional candidates that are robust to the idiosyncrasies of any single model and affords practitioners the flexibility to tailor and combine arbitrarily many design objectives. Remark 1 . Note that the ProVADA target distribution can be interpreted as a Gibbs posterior (or “pseudo-Bayes” posterior) ( 5 ) induced by the loss function H λ ( x ). In the sections that follow, we first describe how to construct the fitness function F ( x ) for subcellular localization via a classifier that outputs the probability of a sequence residing in the target compartment, and then demonstrate how to adaptively sample from to generate high-fitness variants under our novel sampling procedure. 2.1. Constructing the subcellular fitness score We formulate subcellular localization as a supervised classification task. Let the training set be , where x ( i ) ∈ 𝒳 is a protein sequence and y ( i ) ∈ {0, 1} indicates its corresponding presence in the target compartment. We learn a classifier model F θ ( x ) ∈ [0, 1] that outputs the estimated probability of localization. The model parameters θ are optimized by minimizing the empirical binary cross-entropy loss where γ > 0 controls the strength of the 𝓁 2 regularization. Once training converges, yields the predicted probability of correct localization, which we treat as a black-box score to guide our sampling. For notational brevity, we henceforth omit the explicit dependence on θ and denote our trained predictor simply as F ( x ). 2.2. Specifying the notion divergence To discourage excessive deviation from the wild-type scaffold, we introduce a mismatch penalty term based on the Hamming distance. For any candidate sequence x ∈ 𝒳 and reference x 0 , the Hamming distance, defined as counts the number of positions at which x and x 0 differ. Thus, our target distribution becomes By increasing λ , we amplify the Hamming-distance penalty within the annealed score, so that each additional residue mismatch is penalized more heavily. Consequently, sequences that diverge further from the reference incur exponentially larger penalties in their weights, thereby guiding the sampler toward high-fitness variants that remain close to the reference wild-type. 2.3. Mixture-Adaptation Directed Annealing We now introduce M ixture- A daptation D irected A nnealing (MADA), a novel sampling framework that efficiently explores high-dimensional, complex composite functional landscapes by integrating mixture-based adaptive proposals, directed local mutation kernels, population-annealed sequential importance sampling, and controlled resampling. MADA comprises three main components: selection, mutation , and stabilization . At each iteration, MADA maintains a small mixture of promising particle prototypes that generate N offspring through importance resampling with partial rejection control. This population-based approach simultaneously preserves diversity through parallel exploration while effectively concentrating computational effort on the highest-potential regions. Each offspring is then refined by a single Metropolis–Hastings step via fitness-guided local mutation kernels and a gradually decaying temperature schedule to transition smoothly from exploration to exploitation. These components are integrated into a unified algorithmic procedure to enable sequential and iterative refinement of the entire population. The following subsections provide a detailed description of each sampling step; the complete algorithmic procedure for MADA is provided in Algorithm 1. 2.3.1 Initialization Let p S be the masking probability. We first sample the number of mutated sites and then choose M distinct positions S = { i 1 , …, i M } ⊆ {1,…, L } uniformly at random, i.e., By construction, this guarantees that every proposed variant lies within an M -Hamming neighborhood of the reference wild-type x 0 . Next, we generate an initial population of N particles by masking x 0 at positions given by S and repeatedly sampling candidates from the implicit generative prior p ϕ . Concretely, for each i = 1, …, N , we draw so that its marginal law, conditioned on the masking locations, factorizes as Finally, we initialize the temperature to τ 0 = ∞ , so that 1 /τ 0 = 0 and hence reduces to uniform weights at t = 0, and thus provides a natural warm start with . 2.3.2 Selection Having drawn samples from , we transition to the next tempered distribution via importance resampling. To do so, we first compute the annealed importance weights for each particle : where H λ ( x ) = F ( x ) − λ D ( x 0 , x ) is the adjusted fitness score. We then perform a two-stage resampling to eliminate particles with low importance weights, inspired by the bootstrap filter and partial rejection control ( 12 ): Prototype selection: Draw a small set of K prototype particles by sampling with replacement from according to the normalized annealed weights Population reconstruction: Regenerate a full population of N particles by sampling uniformly withreplacement from the retained K prototypes. This completes one round of selection and yields , which contains at most K distinct proposals for the subsequent mutation stage. As to be shown in Theorem 2, this two-stage resampling procedure preserves the statistical unbiasedness of ordinary importance sampling. The down-sample–up-sample procedure above offers two advantages: it concentrates computational effort on the most promising regions and, by reusing a limited set of prototypes, amortizes costly invocations of the generative prior. In the ProteinMPNN setting, reusing the same mask and sequence context across multiple generations substantially reduces expensive model calls and reduces runtime by approximately 75% (see Figure S2 ). Greedy selection As an alternative to stochastic resampling, one can use a greedy selection strategy: After computing the annealed weights , deterministically retain only the top K particles with the largest weights. Then rebuild the full population of size N by sampling with replacement from the K elites according to their normalized weights. Although this top- K selection procedure introduces a bit of bias through the permanent elimination of lower-weight particles, we observe that it often results in accelerated convergence to high-fitness regions in practice. 2.3.3 Mutation In the mutation stage, each resampled particle undergoes local perturbation under the tempered target. Specifically, we draw a mask size M and select a subset S ⊆ {1, …, L } exactly as in Eq. (1) - Eq. (2) . We then fill the masked positions by sampling , conditioned on the unmasked residues of . We denote this local-move proposal kernel by This mask-then-fill procedure implements a systematic-scan Gibbs mutation kernel. Because some local moves may reduce fitness, each proposed is forwarded to the stabilization stage, where it is accepted or rejected according to the Metropolis–Hastings criterion. 2.3.4 Stabilization After generating each mutated proposal , we apply a Metropolis-Hastings (MH) accept-reject step to stabilize and direct the local exploration according to fitness. Let change in adjusted fitness denote the pre-mutation particle. We compute the change in adjusted fitness which biases acceptance toward moves that increase the fitness score or incur a lower divergence penalty. We then draw U ~ Unif[0, 1] and accept the proposal with probability If accepted, set ; otherwise retain This MH correction step stabilizes the sampler on the annealed target . Theorem 3 shows that the proposed procedure satisfies detailed balance. Algorithm 1: Mixture-Adaptation Directed Annealing Download figure Open in new tab 2.3.5 Annealing Schedule We employ a power-law cooling ( 9 ) schedule to gradually reduce both the masking fraction and the temperature τ t . At step t of T total iterations, define the normalized time . We then update where α > 0 controls the annealing rate, τ max and p S ,max denote the initial temperature and masking fraction, respectively, and τ min and p S ,min their terminal values. This power-law decay smoothly transitions the sampler from broad exploration (high-temperature, heavy masking) to focused exploitation (low-temperature, light masking). Empirically, we observe that this approach accelerates convergence while improving the final solution quality (see Appendix S3 for decay curves under different α ). 3: Application of ProVADA to In Silico Engineering of Cytosolic Renin Variants 3.1. Motivations for a practical example In this section, we conduct an in silico experiment with our pipeline to address a practical design challenge: engineering a cytosolic variant of human peptidase renin. Renin is a secreted protease with high specificity to cleave a defined peptide sequence on its natural substrate angiotensinogen ( 28 ). Owing to this high specificity, there is considerable interest in repurposing renin as a general protease tool for precise control of protein activity via targeted cleavage ( 16 ). However, because renin normally functions in the extracellular environment, our internal data suggest that cytosolic expression leads to misfolded and non-functional renin, which impedes its development as a generalized tool. The described renin engineering campaign is an appropriate test case for our pipeline because i) there are no close homologs of renin that functions in the cytoplasm, i.e., we have little or no evolutionary information to leverage; ii) we start from a zero-activity scaffold, making hybrid directed evolution approaches inefficient; iii) assays for cytosolic renin activity must be performed in live mammalian cells, which limit the throughput and are unaffordable to carry out at large scale, thus favoring test-time steering methodologies. To tackle this design challenge, we begin by constructing a reliable fitness oracle for cytosolic functionality. Motivated by the well-established links between subcellular localization and certain sequence characteristics, such as N-linked glycosylation sites and disulfide bonds ( 18 ; 4 ), we train a binary classifier on protein language model embeddings to predict the probability of cytosolic localization. To ensure our model focuses on those intrinsic protein properties that determine localization-dependent viability, we curate our dataset by removing low-complexity signals, including signal peptides and transmembrane domains. We apply MADA with our trained classifier as the fitness oracle to the human renin catalytic domain; this approach respects structural and evolutionary constraints via our generative prior while driving the search toward variants with high predicted cytosolic compatibility and preserved enzymatic fold. 3.2. Classifier construction, training and evaluation 3.2.1 Dataset preprocessing and classifier construction Dataset acquisition We curate vertebrate cytosolic and extracellular proteins from UniProt Swiss-Prot ( 3 ), restricting to entries with experimental evidence and excluding those localized to lysosomes, endosomes, peroxisomes, or mitochondria. Sequence truncation and filtering To isolate the intrinsic sequence determinants of subcellular fitness, we remove low-complexity localization signals (signal peptides and transmembrane domains) that correlate with localization but do not reflect functional fitness ( 31 ). We retain only mature peptide regions, discarding annotated signal peptides and propeptide segments. When multiple domains are annotated, we keep only individual domains of 50–1000 residues; for transmembrane proteins, we extract extracellular domains within the same size range. To balance protein-family representation, we cluster the sequences at 30% identity using MMSeqs2 ( 29 ), retaining one representative per cluster (reducing the dataset to approximately 40% of its original size). Finally, we stratify and split the data into training, validation, and test sets in a 70:20:10 ratio. Embedding generation and training We embed each protein sequence using the ESM2-650M-UR50D model due to its high downstream-task performance and balanced compute–accuracy trade-off ( 25 ). Unless otherwise specified, all references to ESM2 in this work denote this model. For each sequence, we extract per-residue representations from the final (33rd) transformer layer and mean-pool them into a 1280-dimensional vector. We then train a logistic regression classifier on these embeddings. A graphical overview appears in Figure 2A . Download figure Open in new tab Figure 1. Mixture-Adaptation Directed Annealing (MADA) algorithm overview. At each iteration, the top k sequences are randomly masked and then completed by ProteinMPNN under structural constraints. We sample candidates proportional to their current weights, evaluate fitness, and apply a Metropolis Hastings acceptance step. Accepted sequences are sorted by fitness; the highest k , with updated normalized weights move on to the next iteration. Download figure Open in new tab Figure 2. Subcellular classifier: construction and performance. ( A ) Schematics of classifier training and inference. ( B ) Heatmap of cytosolic log-likelihood ratio (LLR) for all single-point mutations to wild-type renin, shown for residues 40–90 and 250–300. Identified “hot-spot” positions where substitutions markedly increase cytosolic probability correspond to known extracellular features: disulfide bonds C51–C58, C259–C296, and the N75–T77 N-glycosylation motif. ( C ) Mapping of per-position cytosolic LLR onto the ESMFold-predicted renin catalytic domain. ( D ) KDE of all renin point mutations showing ESM2 log-likelihood ratio versus classifier LLR. 3.2.2 Classifier Performance Benchmark Benchmark against existing subcellular location predictors Several existing subcellular localization predictors leverage protein language model representations. To highlight the differences, we benchmark our classifier against three established subcellular localization predictors: DeepLoc2.0-fast (based on ESM-1b) ( 31 ), LocPro (based on ESM2 and other ensemble) ( 35 ), and MULocDeep (based on bidirectional LSTM) ( 22 ). We evaluate performance on two benchmark datasets: (i) non-representative sequences from our curated Swiss-Prot data and (ii) the Human Protein Atlas (HPA) Secretome dataset ( 32 ). Table 1 summarizes results: on Swiss-Prot, we report weighted-average AUROC for cytosolic versus extracellular predictions (see detailed metrics in Table S1 ). Since the HPA-Secretome dataset includes only extracellular proteins, we measure performance by extracellular classification accuracy. View this table: View inline View popup Download powerpoint Table 1. Performance comparison of our classifier against three baselines on two datasets. Boldface indicates the best result for each dataset. As shown in Table 1 , our classifier outperforms existing predictors on the curated Swiss-Prot dataset, as expected given that those methods trained on intact protein sequences focus primarily on signal sequences ( 31 ). Table S1 further demonstrates that the greatest improvement of our classifier performance comes from the high cytosolic label precision and extracellular label recall. Finally, our classifier also surpasses existing models on the orthogonal HPA-Secretome dataset of intact secreted proteins, again indicating that it captures intrinsic sequence determinants of native localization rather than depending on low-complexity signal peptides. Classifier probabilities identify sequence features that align with domain knowledge We assess whether the probabilities output by the classifier reflect a sequence’s propensity to localize to the target compartment. To this end, we perform an in silico deep mutational scan (DMS) on the 340-residue catalytic domain of human renin, comparing each mutant’s predicted probability to that of the wild-type sequence. Analysis of these probability shifts reveals “hot-spot” positions where substitutions strongly favor cytosolic localization over the wild-type sequence. Many of these positions coincide with known extracellular signatures, including two documented NXT N-glycosylation sites (N5-T7, N75-T77) and three disulfide bonds (C51-C58, C217-C221, C259-C296). Figure 2B exemplifies three such positions through heatmaps showing cytosolic probability shifts relative to wild-type renin, with the complete DMS heatmap provided in Figure S1 . By mapping these “hot-spots” to the renin structure via summation of probability shifts by position, Figure 2C demonstrates that these “hot-spots” are dispersed throughout the structure without enrichment in specific regions or surface areas, indicating that efforts to engineer cytosolic renin variants should adopt a global, structure-wide strategy. Moreover, despite identifying “hot-spots”, the small effect of each individual mutation implies that multiple substitutions are required to achieve a meaningful increase in cytosolic probability. Furthermore, the KDE plot in Figure 2D reveals weak correlation between classifier probability shifts and ESM2 logit likelihoods across all single-point mutations. This suggests that, despite training on ESM2 embeddings, the classifier’s localization landscape operates largely orthogonal to ESM2’s intrinsic fitness landscape. These observations underscore the complexity of classifier-guided renin design and motivate our MADA algorithm, which incorporates structural information while enabling broad, global sequence exploration. 3.3 Empirical Evaluations of MADA for Cytosolic Renin Engineering In this subsection, we present in silico results obtained from using MADA to design cytosolic variants of the human renin catalytic domain. The wild-type sequence exhibits a low predicted cytosolic probability of 0.035 by our classifier. We compare ProVADA against two rejection-sampling baselines, and a generative baseline from the built-in guided generation approach in ESM3 ( 17 ). The naïve rejection sampler randomly masks up to 50% of positions and replaces each with a uniformly sampled amino acid. The ProteinMPNN-based rejection sampler uses the same masking scheme (up to 100% of sites) but refills masked positions using ProteinMPNN’s generative prior. All ProteinMPNN-based masked generations in this manuscript are generated with the temperature set at 0.5 with cysteine- and non-canonical residue-free designs. For ESM3-guided generation, we use the ESM3-open model ( 15 ), fix its predicted structural and functional tokens for the renin catalytic domain, and steer decoding with cytosolic probabilities from our classifier. 3.3.1 Runtime cost vs. number of ProteinMPNN fill calls We measure wall-clock time under two regimes to illustrate the efficiency gains of our down-sample–up-sample strategy: i) invoking ProteinMPNN separately for each of 10 masked sequences (10 calls), and ii) a single invocation that returns 10 filled sequences in one batch. We repeat each experiment 10 times and report the average runtimes in Figure S2 , where the results show that our down-sample–up-sample strategy improved runtime efficiency by approximately 4-fold. 3.3.2 Comparison of fitness scores for generated variants We benchmark MADA against the three baselines. We collect 1500 sequences from the final iteration of 50 independent MADA runs (30 iterations each, greedily retaining the top 20% per round). The initial temperature is set to τ 1 = 2.0, and we employ a power-law cooling schedule with α = 3.0 (see Figure S3 ). Both the naïve and ProteinMPNN rejection samplers yield 1,500 sequences each, while the ESM3 generative baseline is limited to 200 sequences for computational traceability. In Figure 3A , we compare cytosolic probability distributions across methods. The fitness distribution of MADA variants exhibits a clear mode around 0.7, with most variants exceeding the 0.5 label-flipping threshold, while both rejection baselines’ outputs remain near the wild-type probability. MADA achieves a 9.5-fold increase in predicted fitness over these baselines. On the other hand, ESM3-generated variants center near 0.4, with only a few surpassing the threshold. Figure 3B shows that MADA variants carry on average 47% mutations, whereas ESM3 variants exceed 50% yet retain higher cosine similarity to wild-type embeddings. The MADA mutation profile thus occupies a viable yet diverged range from the wild-type renin; such distinction is appropriate and necessary given the absence of cytosolic renin homologs. Download figure Open in new tab Figure 3. MADA sampling performance. (A) Cytosolic probability distributions from MADA (1500 samples), ESM3 (200), naïve rejection (1500), and ProteinMPNN prior (1500); red line = WT, dotted = 0.5 threshold. (B) Mutation fraction and ESM2-embedding cosine similarity to WT renin. (C) Example MADA trajectory (100 iterations, 30 chains): cytosolic probability and mutation fraction. (D) Decline of extracellular signatures over iterations; inset Logos of N75–T77 motif at iterations 1, 5, 15. (E) LogoPlots of four extracellular signatures in final top variants. (F) Trajectories of cytosolic probability, adjusted fitness, MPNN score, and mutation fraction across 50 runs; right: KDE of final values (bold = mean). To assess convergence within a single MADA run, we execute 100 iterations with a population of 30 sequences per round, greedily retaining the top 20% at each step. Starting from a masking fraction of 0.2, temperature τ 1 = 2.0, and Hamming-penalty λ = 0.1, we track the best fitness score and mutation fraction over sampling ( Figure 3C ). Cytosolic probability plateaus by iteration 40, while mutation fraction peaks at 0.5 by iteration 20 before declining under the divergence penalty. With our annealing schedule, 30 iterations strike a practical balance, reaching near convergence with manageable computation. We further characterize the top variants from 50 independent MADA runs (30 iterations each). As shown in Figure 3D , counting extracellular signatures in the highest-fitness sequence at each iteration reveals a rapid drop to near zero within 5 iterations. We also track the sequence logo at the N75–T77 N-glycosylation site, which demonstrates progressive loss of the motif pattern over successive iterations. In Figure 3E , sequence logos for four extracellular signatures in the final MADA variants illustrate their elimination—most notably, both NXT glycosylation motifs are efficiently removed. Finally, Figure 3F shows convergence across 50 runs for i) cytosolic probability, ii) adjusted fitness, iii) MPNN score, and iv) mutation fraction. 3.3.3 Sequence analysis of MADA sampled variants We analyze the 50 highest-fitness MADA variants’ sequence features, including keyword annotations, homology relationships, and per-position mutation frequencies. First, we run InterProScan on the 50 MADA variants alongside 50 ESM3-generated sequences ( 23 ). Figure 4A shows that both sets preserve the superfamily and domain-level keywords present in WT renin; all ESM3 sequences and 48 of the 50 MADA variants retain the conserved aspartyl protease active sites. Neither group preserves the renin-family annotation, which is unsurprising given the roughly 50% mutation rate. Both also introduce novel keywords absent from the wild-type sequence, including family-level signatures for pepsin and beta-site APP-cleaving enzymes. Notably, MADA variants frequently recover lysosomal aspartic protease keywords—half even annotate as Cathepsin D, whereas ESM3 sequences favor vacuolar aspartic protease, Pepsin A, and Napsin A keywords. These findings indicate that MADA produces variants retaining core aspartic protease functionality. Download figure Open in new tab Figure 4. Sequence analysis of MADA-sampled renin variants. (A) InterProScan keyword heatmap for 50 MADA vs. ESM3 variants (WT keywords boxed in red). (B) Pairwise Hamming distance distributions: MADA–MADA (orange) and MADA–WT renin (purple). (C) ESM2 embedding cosine similarity: MADA–MADA (orange) vs. natural renin homologs (green). Renin homologs are retrieved by BlastP search on WT renin catalytic domain, then taking representatives by MMSeq2 clustering at 90% similarity threshold ( 29 ). (D) Per-position mutation frequency across 50 MADA variants. Next, we assess MADA output diversity via pairwise Hamming distances and cosine similarities in the ESM2 embedding space. Figure 4B shows that the generated variants exhibit an average pairwise Hamming distance of approximately 120 (30% mutations), which is significantly lower than their Hamming distances to WT renin (Hamming distance around 170, 45% mutations). In Figure 4C , the distributions of pairwise cosine similarity from MADA variants and natural renin homologs (from BlastP search) overlap closely, indicating comparable sequence-space diversity. Figure S4 in the Appendix confirms that MADA variants and homologs form separate clusters. These results indicate that MADA produces a set of distant yet plausible variants, introducing sufficient diversity to enable cytosolic localization without compromising core structural and functional integrity. Finally, we plot per-position mutation frequencies for the MADA renin variants in Figure 4D . Both catalytic Aspartic residues (D38 and D226) remain fully conserved, and the conserved motifs flanking these two catalytic sites are also largely retained. In contrast, the active-site flap (T80–G90) is highly variable; several key residues involved in substrate binding including Y83 and S84 are almost entirely substituted ( 34 ; 26 ). The results above suggest that MADA variants preserve Aspartic protease activity but will likely lose renin-specific substrate selectivity. To maintain realistic renin specificity, it will be necessary to hard fix substrate-contacting residues. 3.3.4 Structural analysis of MADA sampled variants In this subsection, We evaluate the structural viability of MADA-sampled renin variants by predicting their folded structures with ESMFold and assessing their alignment to the native renin catalytic domain, compared against variants generated via rejection sampling as well as ESM3 guided-generation. The same 50 sequences used in Figure 4 were analyzed for MADA and ESM3 generated renin variants. For both baseline rejection sampled variants, top-50 variants on fitness score from a total of 1500 generated sequences are selected. Figure 5A left and middle panel shows the overall structural confidence of these sequences and structural deviation to the input renin structures, and the right panel shows the evolutionary plausibility by ESM2 pseudo-perplexity. It is evident that ProteinMPNN-conditioned variants maintain overall structural integrity without compromising evolutionary plausibility, whereas structural and evolutionary integrity can easily break by random mutation. Likewise, ESM3 generated sequences with structural and keyword constraints also maintain structural integrity very well, yet a clear downshift of ESM2 perplexity is observed. This observation is not surprising since ESM3-based generation are expected to optimize on evolutionary likelihood, yet this feature might not be necessary for optimizations that does not reinforce evolutional constraints. This observation also helps explain the low fitness scores in ESM3-generated sequences, as the model’s evolutionary constraints could hinder optimization of fitness measures that are independent from evolutionary likelihood. Download figure Open in new tab Figure 5. Structural analysis of MADA sampled cytosolic renin variants. ( A ) Comparison of structural metrics and ESM2 pseudo-perplexity of the top 50 sequences (by cytosolic probability). Structures of these sequences are predicted by ESMFold. Left: Comparison of structural confidence as revealed by pLDDT. Middle: Comparison of structural deviation by the aligned root mean square deviation (RMSD). Right: Comparison of ESM2 pseudo-perplexity of the sequences. Red dashed line : WT renin pseudo-perplexity. ( B ) Representative structural alignment of variants generated by i) MADA, ii) ESM3 and iii) Naive rejection to input renin structure (gray). ( C ) Zoomed-in structure of the two catalytic Aspartate residue side chains. Side chains of the catalytic Aspartate on the input renin is highlighted in yellow. To better highlight the structural contrasts between variants generated via different methods, Figure 5B presents representative structures from i) MADA, ii) ESM3, and iii) naive rejection sampling, all aligned to the input renin structure. Both MADA and ESM3-generated variants align very well with the input renin structure, whereas the randomly sampled rejection variant exhibits complete misfolding of a subdomain at the bottom of the structure. Figure 5C provides a detailed view of the catalytic dyad side chains within the three aligned structures, revealing that MADA and ESM3 variants maintain side chain conformations nearly identical to the input structure, while the naive rejection sampled variant completely loses the expected catalytic dyad geometry. These results demonstrate that despite achieving efficient fitness score optimization, MADA-sampled variants successfully retain structural integrity while implicitly preserving evolutionary likelihood. Conclusion In this work, we present ProVADA, a test-time steering, ensemble-guided framework powered by our novel Mixture-Adaptation Directed Annealing (MADA) sampler. ProVADA efficiently generates protein variants tailored to desired fitness objectives while maintaining structural and evolutionary integrity. Leveraging our high-accuracy subcellular localization classifier, we demonstrate ProVADA’s effectiveness through in silico engineering of cytosolic renin variants. Notably, MADA achieves a remarkable 9.5-fold increase in sampling efficiency compared to conventional masked-fill rejection sampling. Our experiments demonstrate that ProVADA delivers diverse renin variants with substantially improved predicted cytosolic localization while preserving both structural stability and evolutionary plausibility. Beyond engineering subcellular variants, ProVADA demonstrates broad applicability across diverse protein design challenges. By training predictors for specific compartments such as endosomes or mitochondria, ProVADA can optimize protein stability for intracellular therapeutics requiring endosomal escape ( 8 ) or enhance the efficiency of mitochondrial base editors ( 13 ). Furthermore, with recent advances in immunogenicity prediction ( 7 ), ProVADA could enable the guided generation of de-immunized variants for therapeutic protein humanization ( 33 ). Collectively, ProVADA provides a versatile, structure-aware framework for directing protein design across varied functional landscapes, offering significant potential for protein variant engineering applications. Several directions warrant consideration for future work. While our approach omits explicit substrate-interaction guidance, potentially compromising substrate specificity, this can be readily addressed by fixing critical contact residues or imposing additional structural constraints. Furthermore, despite ProVADA’s strong in silico performance, experimental validation remains essential to confirm the real-world effectiveness of engineered variants. These directions represent important avenues for future work to fully realize ProVADA’s therapeutic potential. Supplementary Information Theoretical Analysis Theorem 2. Let be a given collection of particles with corresponding normalized importance weights , where and w i ≥ 0. Consider the two-stage resampling procedure described in Section 2.3.2 , which produces samples . For each 𝓁 ∈ {1,…, N }, it holds that , and consequently for any bounded measurable function h, the estimator is unbiased for the weighted expectation Theorem 3. Consider the Markov kernel K defined as: where and . This kernel satisfies detailed balance with respect to , i.e ., for all x, x ′ . Moreover, when , the acceptance ratio simplifies to . Proof of Theorem 2 Proof: Let’s denote by Ƶ = { ζ 1 , ζ 2 , …, ζ K } the set of selected prototypes. For any 𝓁 ∈ {1, 2, …, N }, we have Since is sampled uniformly from Ƶ , we have where n i ( Ƶ ) ~ Binomial( K, w i ) is the number of times x i appears in Ƶ . Now, since each ζ j is drawn independently with replacement according to weights { w i }, the expected number of times x i appears in Ƶ is K · w i . Therefore, Combining these results, we have Hence, , and averaging over 𝓁 yields the unbiasedness of the overall estimator. Proof of Theorem 3 Proof: We verify that the kernel K satisfies detailed balance with respect to . That is, we need to show that the following expression holds If x = x ′ , this holds automatically. For x ≠ x ′ , we have Now fix a subset S . By definition of a ( x, x ′ ): Similarly, for the reverse transition: Hence, these expressions are equal. As this holds for every subset S , we thus have For the simplification of the acceptance ratio, assume We have Since (by definition of q S ), the indicators are both 1. We decompose and similarly for x ′ . This gives Therefore, This completes the proof. Supplementary Figures Download figure Open in new tab Figure S1. Full heatmap of cytosolic classifier probability LLR for every single point mutation on renin catalytic domain to WT sequence. Download figure Open in new tab Figure S2. Average runtime for filling 10 sequences over 10 repeats via ProteinMPNN—multiple separate calls (orange) versus one batched call (green). One batched call achieves approximately a 4x speedup. Download figure Open in new tab Figure S3. Comparison of power-law cooling schedules for temperature τ t and masking fraction p S,t under different decay exponents α. Curves with higher α values exhibit sharper initial declines followed by more gradual tapering. Download figure Open in new tab Figure S4. Circular dendrogram of MADA renin variants with renin homologs and human Cathepsin D by their cosine similarity in embedding space of ESM2. Orange: MADA cytosolic renin variants. Blue: Renin homologs retrieved by BlastP search as mentioned in Figure 4 , highlighted node represents human renin. Red: Human Cathepsin D. Supplementary Tables View this table: View inline View popup Download powerpoint Table S1. Full performance comparison of our localization classifier against three baselines on the curated Swiss-Prot dataset. Boldface indicates the best result for each metric. Acknowledgments The authors would like to thank Brian Trippe, Ben Viggiano, and anonymous reviewers for helpful discussions and insightful remarks. W.S.L. gratefully acknowledges support from the Stanford Data Science Fellowship and NIH grant GM 134483. X.Z. is supported by the Stanford Interdisciplinary Graduate Fellowship affiliated with ChEM-H. X.J.G is supported by the Stanford Bio-X Interdisciplinary Initiatives seed grant program (R12-8, to X.J.G.). Funder Information Declared NIH , GM 134483 Stanford Data Science Fellowship Stanford Interdisciplinary Graduate Fellowship affiliated with ChEM-H Stanford Bio-X Interdisciplinary Initiatives seed grant program , R12-8 References 1. ↵ J. Abramson , J. Adler , J. Dunger , R. Evans , T. Green , A. Pritzel , O. Ronneberger , L. Willmore , A. J. Ballard , J. Bambrick , S. W. Bodenstein , D. A. Evans , C.-C. Hung , M. O’Neill , D. Reiman , K. Tunyasuvunakool , Z. Wu , A. Žemgulytė , E. Arvaniti , C. Beattie , O. Bertolli , A. Bridgland , A. Cherepanov , M. Congreve , A. I. Cowen-Rivers , A. Cowie , M. Figurnov , F. B. Fuchs , H. Gladman , R. Jain , Y. A. Khan , C. M. R. Low , K. Perlin , A. Potapenko , P. Savy , S. Singh , A. Stecula , A. Thillaisundaram ,Tong, S. Yakneen , E. D. Zhong , M. Zielinski , A. Žídek , V. Bapst , P. Kohli , M. Jaderberg , D. Hassabis , and J. M. Jumper . Accurate structure prediction of biomolecular interactions with alphafold 3 . Nature , 630 ( 8016 ): 493 – 500 , May 2024 . OpenUrl CrossRef PubMed 2. ↵ A. Bansal , H.-M. Chu , A. Schwarzschild , S. Sengupta , M. Goldblum , J. Geiping , and T. Goldstein . Universal guidance for diffusion models . In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition , pages 843 – 852 , 2023 . 3. ↵ A. Bateman , M.-J. Martin , S. Orchard , M. Magrane , A. Adesina , S. Ahmad , E. H. Bowler-Barnett , H. Bye-A-Jee ,Carpentier, P. Denny , J. Fan , P. Garmiri , L. J. d. C. Gonzales , A. Hussein , A. Ignatchenko , G. Insana , R. Ishtiaq , V. Joshi , D. Jyothi , S. Kandasaamy , A. Lock , A. Luciani , J. Luo , Y. Lussi , J. S. M. Marin , P. Raposo , D. L. Rice , R. Santos ,Speretta, J. Stephenson , P. Totoo , N. Tyagi , N. Urakova , P. Vasudev , K. Warner , S. Wijerathne , C. W.-H. Yu , R. Zaru , A. J. Bridge , L. Aimo , G. Argoud-Puy , A. H. Auchincloss , K. B. Axelsen , P. Bansal , D. Baratin , T. M. Batista Neto , M.-C. Blatter , J. T. Bolleman , E. Boutet , L. Breuza , B. C. Gil , C. Casals-Casas , K. C. Echioukh , E. Coudert , B. Cuche , E. de Castro , A. Estreicher , M. L. Famiglietti , M. Feuermann , E. Gasteiger , P. Gaudet , S. Gehant , V. Gerritsen , A. Gos , N. Gruaz , C. Hulo , N. Hyka-Nouspikel , F. Jungo , A. Kerhornou , P. L. Mercier , D. Lieberherr , P. Masson , A. Morgat , S. Paesano , I. Pedruzzi , S. Pilbout , L. Pourcel , S. Poux , M. Pozzato , M. Pruess , N. Redaschi , C. Rivoire , C. J. A. Sigrist , K. Sonesson , S. Sundaram , A. Sveshnikova , C. H. Wu , C. N. Arighi , C. Chen , Y. Chen , H. Huang , K. Laiho , M. Lehvaslaiho , P. McGarvey , D. A. Natale , K. Ross , C. R. Vinayaka , Y. Wang , and J. Zhang . Uniprot: the universal protein knowledgebase in 2025 . Nucleic Acids Research , 53 ( D1 ): D609 – D617 , Nov . 2024 . OpenUrl 4. ↵ T. J. Bechtel and E. Weerapana . From structure to redox: The diverse functional roles of disulfides and implications in disease . PROTEOMICS , 17 ( 6 ), Mar . 2017 . 5. ↵ P. G. Bissiri , C. C. Holmes , and S. G. Walker . A general framework for updating belief distributions . Journal of the Royal Statistical Society Series B: Statistical Methodology , 78 ( 5 ): 1103 – 1130 , 2016 . OpenUrl PubMed 6. ↵ F. Cesaratto , A. López-Requena , O. R. Burrone , and G. Petris . Engineered tobacco etch virus (tev) protease active in the secretory pathway of mammalian cells . Journal of Biotechnology , 212 : 159 – 166 , Oct . 2015 . OpenUrl CrossRef PubMed 7. ↵ B. Chen , M. S. Khodadoust , N. Olsson , L. E. Wagar , E. Fast , C. L. Liu , Y. Muftuoglu , B. J. Sworder , M. Diehn , R. Levy , M. M. Davis , J. E. Elias , R. B. Altman , and A. A. Alizadeh . Predicting hla class ii antigen presentation through integrated deep learning . Nature Biotechnology , 37 ( 11 ): 1332 – 1343 , Oct . 2019 . OpenUrl CrossRef PubMed 8. ↵ P. Chen and H. Cabral . Enhancing targeted drug delivery through cell-specific endosomal escape . ChemMedChem , July 2024 . 9. ↵ M. C. Choi . An improved variant of simulated annealing that converges under fast cooling . arXiv preprint arxiv: 1901.10269 , 2019 . 10. ↵ H. Chung , J. Kim , M. T. Mccann , M. L. Klasky , and J. C. Ye . Diffusion posterior sampling for general noisy inverse problems . arXiv preprint arxiv: 2209.14687 , 2022 . 11. ↵ J. Dauparas , I. Anishchenko , N. Bennett , H. Bai , R. J. Ragotte , L. F. Milles , B. I. M. Wicky , A. Courbet , R. J. de Haas , N. Bethel , P. J. Y. Leung , T. F. Huddy , S. Pellock , D. Tischer , F. Chan , B. Koepnick , H. Nguyen , A. Kang , B. Sankaran , A. K. Bera , N. P. King , and D. Baker . Robust deep learning–based protein sequence design using proteinmpnn . Science , 378 ( 6615 ): 49 – 56 , Oct . 2022 . OpenUrl CrossRef PubMed 12. ↵ A. Doucet , N. De Freitas , N. J. Gordon , et al. Sequential Monte Carlo methods in practice , volume 1 . Springer , 2001 . 13. ↵ Editorial . Powering new therapeutics with precision mitochondrial editing . Nature Biotechnology , 43 ( 6 ): 831 – 832 , June 2025 . OpenUrl PubMed 14. ↵ P. Emami , A. Perreault , J. Law , D. Biagioni , and P. St . John. Plug and play directed evolution of proteins with gradient-based discrete mcmc . Machine Learning: Science and Technology , 4 ( 2 ): 025014 , Apr . 2023 . OpenUrl CrossRef 15. ↵ EvolutionaryScale Team . evolutionaryscale/esm , 2024 . 16. ↵ X. J. Gao , L. S. Chong , M. S. Kim , and M. B. Elowitz . Programmable protein circuits in living cells . Science , 361 ( 6408 ): 1252 – 1258 , Sept . 2018 . OpenUrl Abstract / FREE Full Text 17. ↵ T. Hayes , R. Rao , H. Akin , N. J. Sofroniew , D. Oktay , Z. Lin , R. Verkuil , V. Q. Tran , J. Deaton , M. Wiggert , R. Badkundri ,Shafkat, J. Gong , A. Derry , R. S. Molina , N. Thomas , Y. A. Khan , C. Mishra , C. Kim , L. J. Bartie , M. Nemeth , P. D. Hsu , T. Sercu , S. Candido , and A. Rives . Simulating 500 million years of evolution with a language model . Science , 387 ( 6736 ): 850 – 858 , 2025 . OpenUrl CrossRef PubMed 18. ↵ A. Helenius and M. Aebi . Roles of n-linked glycans in the endoplasmic reticulum . Annual Review of Biochemistry , 73 ( 1 ): 1019 – 1049 , June 2004 . OpenUrl CrossRef PubMed Web of Science 19. ↵ B. L. Hie , V. R. Shanker , D. Xu , T. U. J. Bruun , P. A. Weidenbacher , S. Tang , W. Wu , J. E. Pak , and P. S. Kim . Efficient evolution of human antibodies from general protein language models . Nature Biotechnology , 42 ( 2 ): 275 – 283 , Feb 2024 . OpenUrl CrossRef PubMed 20. ↵ J. Ho and T. Salimans . Classifier-free diffusion guidance , 2022 . 21. ↵ K. Jiang , Z. Yan , M. D. Bernardo , S. R. Sgrizzi , L. Villiger , A. Kayabolen , B. J. Kim , J. K. Carscadden , M. Hiraizumi , H. Nishimasu , J. S. Gootenberg , and O. O. Abudayyeh . Rapid in silico directed evolution by a protein language model with evolvepro . Science , 387 ( 6732 ): eadr6006 , 2025 . OpenUrl PubMed 22. ↵ Y. Jiang , D. Wang , Y. Yao , H. Eubel , P. Künzler , I. M. Møller , and D. Xu . Mulocdeep: A deep-learning framework for protein subcellular and suborganellar localization prediction with residue-level interpretation . Computational and Structural Biotechnology Journal , 19 : 4825 – 4839 , 2021 . OpenUrl 23. ↵ P. Jones , D. Binns , H.-Y. Chang , M. Fraser , W. Li , C. McAnulla , H. McWilliam , J. Maslen , A. Mitchell , G. Nuka , S. Pesseat , A. F. Quinn , A. Sangrador-Vegas , M. Scheremetjew , S.-Y. Yong , R. Lopez , and S. Hunter . Interproscan 5: genome-scale protein function classification . Bioinformatics (Oxford, England) , 30 ( 9 ): 1236 – 1240 , May 2014 . OpenUrl CrossRef PubMed Web of Science 24. ↵ J. Jumper , R. Evans , A. Pritzel , T. Green , M. Figurnov , O. Ronneberger , K. Tunyasuvunakool , R. Bates , A. Žídek , A. Potapenko , A. Bridgland , C. Meyer , S. A. A. Kohl , A. J. Ballard , A. Cowie , B. Romera-Paredes , S. Nikolov , R. Jain , J. Adler , T. Back , S. Petersen , D. Reiman , E. Clancy , M. Zielinski , M. Steinegger , M. Pacholska , T. Berghammer , S. Bodenstein , D. Silver , O. Vinyals , A. W. Senior , K. Kavukcuoglu , P. Kohli , and D. Hassabis . Highly accurate protein structure prediction with alphafold . Nature , 596 ( 7873 ): 583 – 589 , July 2021 . OpenUrl CrossRef PubMed 25. ↵ Z. Lin , H. Akin , R. Rao , B. Hie , Z. Zhu , W. Lu , N. Smetanin , R. Verkuil , O. Kabeli , Y. Shmueli , A. dos Santos Costa , M. Fazel-Zarandi , T. Sercu , S. Candido , and A. Rives . Evolutionary-scale prediction of atomic-level protein structure with a language model . Science , 379 ( 6637 ): 1123 – 1130 , 2023 . OpenUrl CrossRef PubMed 26. ↵ S. Ma , J. A. Henderson , and J. Shen . Exploring the ph-dependent structure–dynamics–function relationship of human renin . Journal of Chemical Information and Modeling , 61 ( 1 ): 400 – 407 , Dec . 2020 . OpenUrl PubMed 27. ↵ A. Madani , B. Krause , E. R. Greene , S. Subramanian , B. P. Mohr , J. M. Holton , J. L. Olmos , C. Xiong , Z. Z. Sun , R. Socher , J. S. Fraser , and N. Naik . Large language models generate functional protein sequences across diverse families . Nature Biotechnology , 41 ( 8 ): 1099 – 1106 , Jan . 2023 . OpenUrl CrossRef PubMed 28. ↵ P. B. Persson . Renin: origin, secretion and synthesis . The Journal of Physiology , 552 ( 3 ): 667 – 671 , Nov . 2003 . OpenUrl CrossRef PubMed Web of Science 29. ↵ M. Steinegger and J. Söding . Mmseqs2 enables sensitive protein sequence searching for the analysis of massive data sets . Nature Biotechnology , 35 ( 11 ): 1026 – 1028 , Oct . 2017 . OpenUrl CrossRef PubMed 30. ↵ K. H. Sumida , R. Núñez-Franco , I. Kalvet , S. J. Pellock , B. I. M. Wicky , L. F. Milles , J. Dauparas , J. Wang , Y. Kipnis , N. Jameson , A. Kang , J. De La Cruz , B. Sankaran , A. K. Bera , G. Jiménez-Osés , and D. Baker . Improving protein expression, stability, and function with proteinmpnn . Journal of the American Chemical Society , 146 ( 3 ): 2054 – 2061 , Jan . 2024 . OpenUrl CrossRef PubMed 31. ↵ V. Thumuluri , J. J. Almagro Armenteros , A. R. Johansen , H. Nielsen , and O. Winther . Deeploc 2.0: multi-label subcellular localization prediction using protein language models . Nucleic Acids Research , 50 ( W1 ): W228 – W234 , Apr . 2022 . OpenUrl CrossRef PubMed 32. ↵ M. Uhlén , M. J. Karlsson , A. Hober , A.-S. Svensson , J. Scheffel , D. Kotol , W. Zhong , A. Tebani , L. Strandberg , F. Edfors , E. Sjöstedt , J. Mulder , A. Mardinoglu , A. Berling , S. Ekblad , M. Dannemeyer , S. Kanje , J. Rockberg , M. Lundqvist , M. Malm , A.-L. Volk , P. Nilsson , A. Månberg , T. Dodig-Crnkovic , E. Pin , M. Zwahlen , P. Oksvold , K. von Feilitzen , R. S. Häussler , M.-G. Hong , C. Lindskog , F. Ponten , B. Katona , J. Vuu , E. Lindström , J. Nielsen , J. Robinson , B. Ayoglu , D. Mahdessian , D. Sullivan , P. Thul , F. Danielsson , C. Stadler , E. Lundberg , G. Bergström , A. Gummesson , B. G. Voldborg , H. Tegel , S. Hober , B. Forsström , J. M. Schwenk , L. Fagerberg , and A. Sivertsson . The human secretome . Science Signaling , 12 ( 609 ), Nov . 2019 . 33. ↵ E. Wolfsberg , J.-S. Paul , J. Tycko , B. Chen , M. C. Bassik , L. Bintu , A. A. Alizadeh , and X. J. Gao . Machine-guided dual-objective protein engineering for deimmunization and therapeutic functions . Cell Systems , page 101299 , June 2025 . 34. ↵ Y. Yan , A. Zhou , R. W. Carrell , and R. J. Read . Structural basis for the specificity of renin-mediated angiotensinogen cleavage . Journal of Biological Chemistry , 294 ( 7 ): 2353 – 2364 , Feb . 2019 . OpenUrl Abstract / FREE Full Text 35. ↵ Y. Zhang , L. Zheng , N. You , W. Hu , W. Jiang , M. Lu , H. Xu , H. Dai , T. Fu , and Y. Zhou . Locpro: a deep learning-based prediction of protein subcellular localization for promoting multi-directional pharmaceutical research . Journal of Pharmaceutical Analysis , page 101255 , Mar . 2025 . View the discussion thread. Back to top Previous Next Posted July 17, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following ProVADA: Generation of Subcellular Protein Variants via Ensemble-Guided Test-Time Steering Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share ProVADA: Generation of Subcellular Protein Variants via Ensemble-Guided Test-Time Steering Wenhui Sophia Lu , Xiaowei Zhang , Luis S. Mille-Fragoso , Haoyu Dai , Xiaojing J. Gao , Wing Hung Wong bioRxiv 2025.07.11.664238; doi: https://doi.org/10.1101/2025.07.11.664238 Share This Article: Copy Citation Tools ProVADA: Generation of Subcellular Protein Variants via Ensemble-Guided Test-Time Steering Wenhui Sophia Lu , Xiaowei Zhang , Luis S. Mille-Fragoso , Haoyu Dai , Xiaojing J. Gao , Wing Hung Wong bioRxiv 2025.07.11.664238; doi: https://doi.org/10.1101/2025.07.11.664238 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioengineering Subject Areas All Articles Animal Behavior and Cognition (7630) Biochemistry (17662) Bioengineering (13881) Bioinformatics (41914) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13372) Ecology (19891) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15600) Genomics (22483) Immunology (17729) Microbiology (40365) Molecular Biology (17164) Neuroscience (88544) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15136) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9818) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-06-15T06:18:04.506796+00:00