Full text
32,826 characters
· extracted from
preprint-html
· click to expand
Design to Data for mutants of β-glucosidase B from Paenibacillus polymyxa: N160L, N160S, N160C, N160M, N160G | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Design to Data for mutants of β-glucosidase B from Paenibacillus polymyxa : N160L, N160S, N160C, N160M, N160G Nicholas Li , Ashley Vater , Justin B. Siegel doi: https://doi.org/10.1101/2025.11.24.690255 Nicholas Li 1 Genome Center, University of California , Davis 95616, California, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ashley Vater 1 Genome Center, University of California , Davis 95616, California, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site Justin B. Siegel 1 Genome Center, University of California , Davis 95616, California, United States 2 Department of Biochemistry & Molecular Medicine, University of California , Davis 95616, California, United States 3 Department of Chemistry, University of California , Davis 95616, California, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: jbsiegel{at}ucdavis.edu Abstract Full Text Info/History Metrics Preview PDF ABSTRACT Protein design is advancing toward quantitative modeling of enzyme function and stability. However, progress remains limited by the scarcity of standardized experimental datasets for training and benchmarking computational models. The Design to Data (D2D) program addresses this need by generating harmonized measurements of catalytic and stability parameters across an extensive β-glucosidase B (BglB) variant library. Here, we expand the D2D dataset with kinetic and thermal characterization of five single-point BglB variants and the wild-type (WT), including soluble expression, Michaelis-Menten constants ( k cat , K M , and k cat /K M ), and melting temperature (T M, ). Foldit Standalone was used to model the structural effects of the mutations. In this study, a weak but consistent association between Foldit total system energy (TSE) and T M was observed, suggesting local energetic effects that may influence stability. Together with the broader D2D corpus, these data enhance the functional mapping of BglB and provide model-ready benchmarks for developing and evaluating data-driven predictors of enzyme activity and stability. INTRODUCTION The advances in artificial intelligence are catalyzing a revolution in the biotechnology field 1 . In this domain, rational enzyme modification enables catalytic functions not found in nature 2 – 5 , providing a foundation for developing next-generation protein therapeutics, biofuels, food science, and bioremediation 6 – 8 . The intersection of artificial intelligence and enzyme research underscores the growing need for protein design software that predicts function, building off the developments around structure prediction capabilities. However, continued breakthroughs in this field are limited by the availability of large, structured, and interoperable datasets 9 . These dataset limitations are particularly relevant to the enzyme space, given the nuanced and complicated nature of the reactions they catalyze. Specifically, the field lacks comprehensive datasets containing quantitative measures of catalytic activity ( k cat , K M , k cat /K M ) and protein unfolding temperature (functional thermal stability, T M ) 10 . To address this challenge, national undergraduate research network Design to Data (D2D) was established in 2019 to systematize dataset expansion. This program enables students to measure and record quantitative kinetic and thermal stability data for single-point mutations of β-glucosidase B (BglB) 11 . Program coordination is based at the University of California, Davis, and has led to the dataset’s growth through contributions from a national consortium of more than 40 institutions. Previous findings by Carlin et al. revealed the current predictive capabilities of various computational methods tested on a dataset of 100 kinetically characterized BglB single-point mutants, demonstrating weak relationships between experimental data and structural metrics such as interface energy 10 . Further analysis showed that T M and algorithm-derived features (Rosetta and FoldX) also exhibited weak correlations 12 . Collectively, the authors of these studies recommend that expanding the BglB dataset may have utility in improving predictive accuracy of newer models. This study analyzed the kinetic properties and thermal stability of five novel single-point mutations (N160L, N160S, N160C, N160M, and N160G) in BglB from Paenibacillus polymyxa . Although residue N160 does not directly contact the ligand, its proximity to the active site suggests potential influence through indirect mechanisms. We investigated the relationship between Foldit Standalone algorithm-derived features and both catalytic performance and thermal stability. METHODS BglB mutant design Five BglB mutants were designed using FoldIt Standalone, an interactive graphical user interface that allows for real-time direct manipulation of protein structures 13 . FoldIt Standalone quantifies protein structure stability using the Rosetta energy function 2 . Mutants were assigned unique total system energy (TSE) scores and individual residue energy values subsequent to the application of both the minimization and repack/shake functions to the residue of interest and 29 neighboring residues. Mutagenesis Using established protocols, the Kunkel method was used for oligonucleotide-directed mutagenesis to produce the five single-point mutants of the already cloned pET29b+ vectors 14 . Subsequently, transformed E. coli DH5α colonies were selected, and plasmid DNA was extracted and purified using standard miniprep protocols. Plasmid DNA was sent for Sanger Sequencing (Genewiz, Arzenta). Sequence data was analyzed (Benchling) to verify the site mutation and confirm that the remaining sequence was error-free. Protein production and purification Transformed E. coli BL21 cells were expanded and the BglB variants were expressed as previously described 12 . Isopropyl β-D-1-thiogalactopyranoside (IPTG) was used to induce expression of the target proteins. The cells were lysed and the histidine tagged BglB proteins were purified with nickel-based immobilized metal affinity chromatography. Variant and WT protein concentration was determined by A280 using a BioTek Epoch spectrophotometer. Protein purity was analyzed using sodium dodecyl sulfate polyacrylamide gel electrophoresis (SDS-PAGE). Michaelis-Menten kinetics and thermal stability of mutants Kinetic activities of enzyme variants were quantified by monitoring hydrolysis of 4-nitrophenyl-β-D-glucopyranoside at 420 nm, following the established protocol 12 with minor modifications. To enhance resolution at lower substrate concentrations, a 3× dilution series (100 mM, 33.33 mM, 11.11 mM, 3.70 mM, 1.23 mM, 0.41 mM, 0.14 mM, and 0 mM) was used in place of the original 4× series, and absorbance was recorded at 1-minute intervals for 15 minutes. The kinetic data was fitted to the Michaelis-Menten model using a SciPy nonlinear curve fitting function. k cat and K M values were calculated for each variant and the catalytic efficiency ( k cat /K M ) was reported. Thermal stability of enzyme variants was quantified using a fluorescence-based protein unfolding assay with the Protein Thermal Shift™ (PTS) dye kit (Applied Biosystems) on a QuantStudio 3 System. Purified enzymes were diluted 2× prior to the addition of 16× PTS dye. Fluorescence was monitored as the temperature increased from 20 °C to 90 °C, and melting temperatures (T M ) were derived from the inflection point of the two-state Boltzmann model. For both mutant and wild-type enzymes, sample sizes for T M determination were ≤3. Because the assumption of equal variances is unreliable in small samples, Welch’s t-test was used to compute test statistics and 95% confidence intervals. Pairwise intervals were constructed at an overall significance level of α = 0.05. RESULTS Mutation Selection Using Foldit Standalone, residue N160 was selected for mutational analysis due to its proximity to the active site. Although N160 does not directly contact the ligand, it lies approximately 12 Å away ( Figure 1 ) and contributes to the stabilization of a local residue network through hydrogen bonding with T116 and T218 ( Figure 2a , yellow dotted lines). Five substitutions (leucine, serine, cysteine, methionine, and glycine) were introduced at this position to assess how differences in side-chain properties influence catalytic performance and thermal stability. Foldit Standalone predictions indicated that all five mutations disrupted the hydrogen-bonding network at N160, thereby eliminating the stabilizing interactions observed in the wild-type enzyme. Consequently, all variants were hypothesized to exhibit reduced catalytic efficiency and thermal stability relative to wild type. Download figure Open in new tab Figure 1. (a) Ribbon diagram of BglB (blue structure) with substrate 4-nitrophenol beta-D glucopyranoside (light indigo structure) created with PyMOL. The residue location, N160, is labeled with a magenta sphere. The ligand is labeled as multiple spheres. (b) Illustration of the BglB reaction of the reporter substrate, p-nitrophenyl-beta-D-glucoside (pPNG). Download figure Open in new tab Figure 2a. PyMOL-generated image of residue, N160 (magenta). Hydrogen bonds and distances are labeled and illustrated as yellow-dashed lines with neighboring residues, T218 (green, 1.8 Å) and T116 (dark cyan, 2.2 Å). The leucine and methionine substitutions introduced bulkier side chains ( Figure 2b ), increasing the clashing score from 0.843 to 5.401 and 15.003, respectively. Both mutations also resulted in positive changes in TSE: +6.46 for leucine and +5.36 for methionine. Given the increased steric clash and elevated TSE values, these variants were predicted to exhibit reduced catalytic efficiency and thermal stability compared with the wild-type enzyme. Download figure Open in new tab Figure 2b. PyMOL generated images of five variant residues shown in magenta. Neighboring T218 and T116 residues are shown as light green and dark cyan, respectively. Alpha carbon distances labeled between variant residues and T218 and T116 for N160S (3.0 Å, 3.9 Å) and N160C (2.6 Å, 3.9 Å). The mutations to cysteine and glycine introduced smaller side chains relative to asparagine ( Figure 2b ), with side-chain scores of 0.098 and 0, respectively, both lower than that of asparagine (2.401). Because of the reduced size and increased flexibility of these residues, the mutations were predicted to diminish catalytic efficiency, as the loss of steric constraints could impair precise substrate positioning. Both cysteine and glycine substitutions also produced positive changes in TSE of +1.64 and +11.66, respectively, suggesting decreased thermal stability. In contrast, substitution of asparagine with serine was expected to cause minimal functional change given the two residues’ similar polarity and hydrophilicity. Further the serine mutation resulted in a positive TSE shift of +1.79, indicating a negligible predicted difference in the protein’s thermodynamic folding favorability. Protein Purity and Expression All mutant proteins exceeded concentration of 1.60 mg/mL as determined by absorbance at 280 nm. SDS–PAGE analysis confirmed the presence and purity of the expressed proteins ( Figure 3 ). Protein concentrations for all mutants and the wild-type ranged from 1.6 to 3.5 mg/mL ( Table 1 ). The consistent and intense band patterns indicated successful expression and sufficient purity for subsequent kinetic and thermal characterization. View this table: View inline View popup Download powerpoint Table 1. Total system energy (TSE), protein concentration, kinetic parameters ( k cat , K M , k cat /K M ), and thermal stability (T M ) of wild-type and variant BglB enzymes. Download figure Open in new tab Figure 3. SDS-PAGE of wild-type and five variant enzymes. BglB wild-type and mutant bands formed around 50 kDA. The order of mutants and protein concentration shown from left to right: N160L, N160S, N160C, N160M, and N160G. Kinetic Activity Building on the BglB dataset, we determined kinetic constants for three novel mutants: N160L, N160M, and N160G; N160S and N160C had been previously characterized but the biological replicate data presented here represents important internal control and validation measures for the larger study. Substrate saturation curves were generated for the wild-type and all variants using a standard kinetic assay ( Figure 4 ). The wild-type enzyme exhibited a catalytic efficiency ( k cat /K M ) of 78.9 ± 5.4 min -1 mM -1 , consistent with previously reported values. N160C and N160G showed higher catalytic efficiencies of 128.5 ± 8.8 min -1 mM -1 and 170.5 ± 9.9 min -1 mM -1 , respectively. In other words, N160C and N160G also had higher turnover rates ( k cat ) and enhanced approximate binding affinity constants (K M ) than the wild-type. The other three mutants had reduced turnover rates ( Table 1 ) and the K M values for N160L and N160G were more than twice that of the wild-type. Download figure Open in new tab Figure 4. Michaelis-Menten graphs of the wild-type (WT) and five mutants. pPNG concentration [mM] is on the horizontal axis and k obs [min -1 ] is on the vertical axis. The triplicate data was fit with nonlinear regression. Thermal Stability Thermal stability was evaluated for the wild-type and all variants using the Protein Thermal Shift assay, performed in triplicate to obtain mean T M values with associated uncertainty ( Figure 5 ). The wild-type enzyme exhibited a T M of 45.9 ± 0.2 °C. Accounting for uncertainty, T M values ranged from 42.4 to 46.7 °C across all samples. Linear regression analysis between T M and TSE produced an R 2 value of 0.65 with a one-sided p-value of 0.03; the estimated regression line was Y = −0.26X − 242.13 ( Figure 5 ). Simultaneous pairwise confidence interval analysis identified statistically significant differences between N160L and N160G, N160C and N160M, N160C and N160G, and N160G and wild-type ( Table 2 ). At the 95% confidence level, the T M of N160C exceeded that of N160M and N160G by between 1.0 and 5.3 °C and between 1.2 and 5.4 °C, respectively. The T M values for N160L and wild-type were greater than that of N160G by between 1.0 and 3.5 °C and between 1.3 and 4.5 °C, respectively ( Table 2 ). View this table: View inline View popup Download powerpoint Table 2. Bonferroni-corrected pairwise confidence intervals (α = 0.05) for T M comparisons among wild-type and mutant BglB variants. Welch’s t-test assuming unequal variances was used to calculate intervals. Statistically significant comparisons are shown in bold. Download figure Open in new tab Figure 5. Foldit Standalone TSE scores plotted against T M values shown with error bars [°C] for the wild-type and each mutant. Error is reported as ± 2s (estimated sample standard deviation from t-distribution). DISCUSSION This study examined the catalytic efficiency and thermal stability of five BglB variants, each containing a single point mutation at residue N160. The mutants were selected to represent a range of biophysical properties and to evaluate the contribution of this residue to the enzyme’s kinetic performance and thermal stability. Protein Purity and Expression Despite the loss of the hydrogen-bonding network observed in the wild-type, all mutants were expressed as soluble proteins with concentrations ranging from 1.6 to 3.5 mg/mL. Kinetic Activity The N160L and N160M variants exhibited reduced catalytic performance relative to the wild-type. Both mutants showed approximately 1.5-fold lower k cat values and 2.5-fold higher K M values, resulting in more than a 4.7-fold decrease in catalytic efficiency ( k cat /K M ). This reduction likely arises from the bulky side chains of leucine and methionine, which may intrude into adjacent residue space and restrict local conformational flexibility. Such steric hindrance could impede the conformational changes required for efficient substrate binding and catalysis. Foldit Standalone modeling supports this interpretation, with clashing scores of 5.4 and 15.0 for N160L and N160M, respectively, compared with 0.8 for the wild-type. These elevated scores indicate greater steric interference with neighboring residues. In contrast, the N160C and N160G variants exhibited improved catalytic performance from the wild-type. Specifically, N160C and N160G showed more than 1.3-fold greater k cat values and approximately 0.8-fold lower K M values compared with the wild type. Consequently, their catalytic efficiencies, expressed as k cat divided by K M , were 1.6-fold and 2.1-fold higher, respectively. The improved catalytic efficiency relative to the wild-type likely reflects the smaller size of cysteine and glycine side chains compared with that of asparagine. Reduced side-chain volume decreases steric constraints, providing additional space for neighboring residues. Based on biophysical principles and our structural model, N160C and N160G appear to impose less side-chain rigidity than N160L and N160M, perhaps permitting greater local mobility that accommodates tighter substrate binding and faster catalysis. Foldit Standalone modeling supports this interpretation, as the side-chain scores quantify each residue’s contribution to predicted folding stability, where lower values are more favorable. N160C and N160G had scores of 0.1 and 0, respectively, compared with 2.4 for asparagine. The Foldit side-chain contribution profiles for N160C, N160G, and the wild-type were consistent with the kinetic data, where lower side-chain contribution scores corresponded to higher catalytic efficiency. Despite the similar hydrophilic and polar properties of serine and asparagine, the N160S mutant showed reduced kinetic performance compared to wild-type. N160S exhibited 0.6-fold lesser values for both k cat and overall catalytic efficiency. We hypothesize this difference is due to the distinct hydrophilic properties of serine versus asparagine. Wherein, the hydroxyl group of serine may alter the water molecule arrangements in the active site required for the substrate reaction ( Figure 1b ); thus, disrupting the active site geometry and reducing catalytic efficiency. Furthermore, the loss of hydrogen bonding in N160S decreases the structural integrity of the residue site, also affecting the precise coordination required for efficient substrate turnover. Thermal Stability The simultaneous pairwise confidence intervals were constructed to identify statistically significant differences in T M while controlling for multiple comparisons. The analysis showed that the T M of N160C was significantly greater than those of N160M and N160G. In addition, both N160L and the wild-type had significantly greater T M values, in this sample set, than N160G. Despite previous findings reporting a negligible Pearson correlation (r < −0.001) between T M and TSE 12 , we found a negative linear relationship between T M and TSE in these data ( Figure 5 ). Building off this preliminary result, we think that certain mutation sites may produce stronger associations between T M and TSE, perhaps making them particularly informative for predictive modeling. It would be interesting to investigate other residue locations that appear to similarly disrupt hydrogen bonding and water arrangements to look for more compelling trends between stability and TSE. Given the independent nature of kinetic and T M parameters 12 , we wonder if this might be similarly true for a special set of sites wherein model outputs (e.g., TSE) are indeed predictive of catalytic performance. CONCLUSION This study presents a novel contribution to the BglB D2D dataset through the kinetic and thermal characterization of five single-point mutants at residue N160, three of which have not been previously reported. By integrating new experimental measurements with computational parameters from Foldit, this study provides initial evidence of a measurable relationship between total system energy and thermal stability at this residue site. These findings highlight the value of more deeply characterizing select sites to uncover predictive features that influence enzyme performance. Expanding datasets with studies of this kind will improve the accuracy and reliability of predictive models, particularly for enzyme systems where experimental data remain limited. Continued generation of high-quality, functionally relevant data will enhance model development and advance applications in protein therapeutics, biofuels, food science, and bioremediation, bringing us one step closer to the full potential of data-driven enzyme design. Acknowledgments This work was supported by the University of California Davis, the National Science Foundation (award nos. # 2315767, 2118138 and 1827246). The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Science Foundation, or UC Davis. Further, this work references and is contextualized by the Design to Data (D2D) dataset, which has been built through contributions of over 1000 undergraduate students over the past decade. REFERENCES (1). ↵ Walker , S. P. ; Yallapragada , V. V. B. ; Tangney , M. Arming Yourself for The In Silico Protein Design Revolution . Trends Biotechnol . 2021 , 39 ( 7 ), 651 – 664 . doi: 10.1016/j.tibtech.2020.10.003 . OpenUrl CrossRef PubMed (2). ↵ Alford , R. F. ; Leaver-Fay , A. ; Jeliazkov , J. R. ; O’Meara , M. J. ; DiMaio , F. P. ; Park , H. ; Shapovalov , M. V. ; Renfrew , P. D. ; Mulligan , V. K. ; Kappel , K. ; Labonte , J. W. ; Pacella , M. S. ; Bonneau , R. ; Bradley , P. ; Dunbrack , R. L. Jr. ; Das , R. ; Baker , D. ; Kuhlman , B. ; Kortemme , T. ; Gray , J. J. The Rosetta All-Atom Energy Function for Macromolecular Modeling and Design . J. Chem. Theory Comput . 2017 , 13 ( 6 ), 3031 – 3048 . doi: 10.1021/acs.jctc.7b00125 . OpenUrl CrossRef PubMed (3). Leman , J. K. ; Weitzner , B. D. ; Lewis , S. M. ; Adolf-Bryfogle , J. ; Alam , N. ; Alford , R. F. ; Aprahamian , M. ; Baker , D. ; Barlow , K. A. ; Barth , P. ; Basanta , B. ; Bender , B. J. ; Blacklock , K. ; Bonet , J. ; Boyken , S. E. ; Bradley , P. ; Bystroff , C. ; Conway , P. ; Cooper , S. ; Correia , B. E. ; Coventry , B. ; Das , R. ; De Jong , R. M. ; DiMaio , F. ; Dsilva , L. ; Dunbrack , R. ; Ford , A. S. ; Frenz , B. ; Fu , D. Y. ; Geniesse , C. ; Goldschmidt , L. ; Gowthaman , R. ; Gray , J. J. ; Gront , D. ; Guffy , S. ; Horowitz , S. ; Huang , P.-S. ; Huber , T. ; Jacobs , T. M. ; Jeliazkov , J. R. ; Johnson , D. K. ; Kappel , K. ; Karanicolas , J. ; Khakzad , H. ; Khar , K. R. ; Khare , S. D. ; Khatib , F. ; Khramushin , A. ; King , I. C. ; Kleffner , R. ; Koepnick , B. ; Kortemme , T. ; Kuenze , G. ; Kuhlman , B. ; Kuroda , D. ; Labonte , J. W. ; Lai , J. K. ; Lapidoth , G. ; Leaver-Fay , A. ; Lindert , S. ; Linsky , T. ; London , N. ; Lubin , J. H. ; Lyskov , S. ; Maguire , J. ; Malmström , L. ; Marcos , E. ; Marcu , O. ; Marze , N. A. ; Meiler , J. ; Moretti , R. ; Mulligan , V. K. ; Nerli , S. ; Norn , C. ; Ó’Conchúir , S. ; Ollikainen , N. ; Ovchinnikov , S. ; Pacella , M. S. ; Pan , X. ; Park , H. ; Pavlovicz , R. E. ; Pethe , M. ; Pierce , B. G. ; Pilla , K. B. ; Raveh , B. ; Renfrew , P. D. ; Burman , S. S. R. ; Rubenstein , A. ; Sauer , M. F. ; Scheck , A. ; Schief , W. ; Schueler-Furman , O. ; Sedan , Y. ; Sevy , A. M. ; Sgourakis , N. G. ; Shi , L. ; Siegel , J. B. ; Silva , D.-A. ; Smith , S. ; Song , Y. ; Stein , A. ; Szegedy , M. ; Teets , F. D. ; Thyme , S. B. ; Wang , R. Y.-R. ; Watkins , A. ; Zimmerman , L. ; Bonneau , R. Macromolecular Modeling and Design in Rosetta: Recent Methods and Frameworks . Nat. Methods 2020 , 17 ( 7 ), 665 – 680 . doi: 10.1038/s41592-020-0848-2 . OpenUrl CrossRef PubMed (4). Khersonsky , O. ; Lipsh , R. ; Avizemer , Z. ; Ashani , Y. ; Goldsmith , M. ; Leader , H. ; Dym , O. ; Rogotner , S. ; Trudeau , D. L. ; Prilusky , J. ; Amengual-Rigo , P. ; Guallar , V. ; Tawfik , D. S. ; Fleishman , S. J. Automated Design of Efficient and Functionally Diverse Enzyme Repertoires . Mol. Cell 2018 , 72 ( 1 ), 178 - 186.e5 . doi: 10.1016/j.molcel.2018.08.033 . OpenUrl CrossRef PubMed (5). ↵ Siegel , J. B. ; Zanghellini , A. ; Lovick , H. M. ; Kiss , G. ; Lambert , A. R. ; St Clair , J. L. ; Gallaher , J. L. ; Hilvert , D. ; Gelb , M. H. ; Stoddard , B. L. ; Houk , K. N. ; Michael , F. E. ; Baker , D. Computational Design of an Enzyme Catalyst for a Stereoselective Bimolecular Diels-Alder Reaction . Science 2010 , 329 ( 5989 ), 309 – 313 . doi: 10.1126/science.1190239 . OpenUrl Abstract / FREE Full Text (6). ↵ Gordon , S. R. ; Stanley , E. J. ; Wolf , S. ; Toland , A. ; Wu , S. J. ; Hadidi , D. ; Mills , J. H. ; Baker , D. ; Pultz , I. S. ; Siegel , J. B. Computational Design of an α-Gliadin Peptidase . J. Am. Chem. Soc . 2012 , 134 ( 50 ), 20513 – 20520 . doi: 10.1021/ja3094795 . OpenUrl CrossRef PubMed (7). Reardon , S. Five Protein-Design Questions That Still Challenge AI . Nature 2024 , 635 ( 8037 ), 246 – 248 . doi: 10.1038/d41586-024-03595-9 . OpenUrl CrossRef PubMed (8). ↵ Yang , J. ; Li , F.-Z. ; Arnold , F. H. Opportunities and Challenges for Machine Learning-Assisted Enzyme Engineering . ACS Cent. Sci . 2024 , 10 ( 2 ), 226 – 241 . doi: 10.1021/acscentsci.3c01275 . OpenUrl CrossRef PubMed (9). ↵ page , M. H. A data bottleneck is holding AI science back, says new Nobel winner . MIT Technology Review . https://www.technologyreview.com/2024/10/15/1105533/a-data-bottleneck-is-holding-ai-science-back-says-new-nobel-winner/ (accessed 2025-10-05 ). (10). ↵ Carlin , D. A. ; Caster , R. W. ; Wang , X. ; Betzenderfer , S. A. ; Chen , C. X. ; Duong , V. M. ; Ryklansky , C. V. ; Alpekin , A. ; Beaumont , N. ; Kapoor , H. ; Kim , N. ; Mohabbot , H. ; Pang , B. ; Teel , R. ; Whithaus , L. ; Tagkopoulos , I. ; Siegel , J. B. Kinetic Characterization of 100 Glycoside Hydrolase Mutants Enables the Discovery of Structural Features Correlated with Kinetic Constants . PLOS ONE 2016 , 11 ( 1 ), e0147596 . doi: 10.1371/journal.pone.0147596 . OpenUrl CrossRef PubMed (11). ↵ D2D CURE | Data | Protein Characterization Data . https://www.d2dcure.com/data/?protein=BglB (accessed 2025-10-05 ). (12). ↵ Carlin , D. A. ; Hapig-Ward , S. ; Chan , B. W. ; Damrau , N. ; Riley , M. ; Caster , R. W. ; Bethards , B. ; Siegel , J. B. Thermal Stability and Kinetic Constants for 129 Variants of a Family 1 Glycoside Hydrolase Reveal That Enzyme Activity and Stability Can Be Separately Designed . PLOS ONE 2017 , 12 ( 5 ), e0176255 . doi: 10.1371/journal.pone.0176255 . OpenUrl CrossRef (13). ↵ Kleffner , R. ; Flatten , J. ; Leaver-Fay , A. ; Baker , D. ; Siegel , J. B. ; Khatib , F. ; Cooper , S. Foldit Standalone: A Video Game-Derived Protein Structure Manipulation Interface Using Rosetta . Bioinformatics 2017 , 33 ( 17 ), 2765 – 2767 . doi: 10.1093/bioinformatics/btx283 . OpenUrl CrossRef PubMed (14). ↵ Rapid and efficient site-specific mutagenesis without phenotypic selection . doi: 10.1073/pnas.82.2.488 . OpenUrl Abstract / FREE Full Text View the discussion thread. Back to top Previous Next Posted November 27, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Design to Data for mutants of β-glucosidase B from Paenibacillus polymyxa: N160L, N160S, N160C, N160M, N160G Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Design to Data for mutants of β-glucosidase B from Paenibacillus polymyxa : N160L, N160S, N160C, N160M, N160G Nicholas Li , Ashley Vater , Justin B. Siegel bioRxiv 2025.11.24.690255; doi: https://doi.org/10.1101/2025.11.24.690255 Share This Article: Copy Citation Tools Design to Data for mutants of β-glucosidase B from Paenibacillus polymyxa : N160L, N160S, N160C, N160M, N160G Nicholas Li , Ashley Vater , Justin B. Siegel bioRxiv 2025.11.24.690255; doi: https://doi.org/10.1101/2025.11.24.690255 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioengineering Subject Areas All Articles Animal Behavior and Cognition (7640) Biochemistry (17706) Bioengineering (13902) Bioinformatics (41978) Biophysics (21465) Cancer Biology (18611) Cell Biology (25528) Clinical Trials (138) Developmental Biology (13387) Ecology (19920) Epidemiology (2067) Evolutionary Biology (24332) Genetics (15615) Genomics (22519) Immunology (17747) Microbiology (40424) Molecular Biology (17194) Neuroscience (88662) Paleontology (667) Pathology (2839) Pharmacology and Toxicology (4827) Physiology (7650) Plant Biology (15160) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9826) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.