BiosInt: Biosensor-based smart design of pathway dynamic regulation for industrial biomanufacturing

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 47,602 characters · extracted from preprint-html · click to expand
BiosInt: Biosensor-based smart design of pathway dynamic regulation for industrial biomanufacturing | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results BiosInt: Biosensor-based smart design of pathway dynamic regulation for industrial biomanufacturing View ORCID Profile Hèctor Martín Lázaro , View ORCID Profile Irene Otero-Muras , View ORCID Profile Pablo Carbonell doi: https://doi.org/10.1101/2025.11.17.688676 Hèctor Martín Lázaro 1 Institute of Industrial Control Systems and Computing (AI2), Universitat Politécnica de València (UPV) , 46022 València, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Hèctor Martín Lázaro Irene Otero-Muras 2 Institute for Integrative Systems Biology (I2SysBio), Universitat de València-CSIC , 46980 València, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Irene Otero-Muras Pablo Carbonell 2 Institute for Integrative Systems Biology (I2SysBio), Universitat de València-CSIC , 46980 València, Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Pablo Carbonell For correspondence: pablo.carbonell{at}csic.es Abstract Full Text Info/History Metrics Preview PDF Abstract Industrial-scale production of bio-based chemicals in the circular green bioeconomy still faces inefficiencies arising from scaling up challenges. Biosensor mediated control of metabolic pathways has been proposed as a strategy to improve the performance of those bioprocesses. By linking industrial biosynthetic pathways to measurable metabolites through biochemical transformations, a highly interconnected regulatory design space is unveiled, offering new opportunities for pathway dynamic control. Yet, a systematic approach for selecting and implementing genetic circuits within such large space has remained absent. Here, we introduce BiosInt , an allosteric transcription factor-based genetic biocircuit, with quasi-integral adaptation control capabilities that can be used to increase robustness and performance of biomanufacturing engineered strains. To test the capabilities of the circuit, we have analysed its performance for the full set of metabolic pathway topologies found in the bio-based chemical production space of compounds with industrial interest. For a given implementation of the BiosInt circuit, we carried out a multiobjective optimization process that provides the inverse control solution of optimal topology starting from any given pathway configuration and enzyme expression ratios. Next, synthetic datasets are generated to train machine learning-based predictive models with the data obtained from the simulations of each topology by varying enzyme expression levels and their corresponding concentrations. We validated the models by showing their ability to predict the best control topology for a given set of enzyme ratios. As a proof-of-concept, we show its application to the design of the genetic constructs expressing flavonoid production pathways, providing optimal performance for the BiosInt -mediated dynamic regulation. This circuit inverse design and its application to dynamic control in biomanufacturing paves the way for a future design pipeline in biofoundries delivering more robust and efficient sustainable bioproduction processes. 1 Introduction Microbial cell factories are increasingly been used in bio-based production processes as green alternatives to chemical processes based on fossil resources. In order to make bioproduction processes environmentally sustainable and technoeconomically viable, it is required that they operate efficiently in a circular manner transforming organic feedstocks such as industry and households waste into valuable consumer end products [ 1 ]. However, industrial-scale production of bio-based chemicals in the circular green bioeconomy still faces inefficiencies arising from scaling up challenges. To overcome present biomanufacturing inefficiencies, biofoundries have been established as automated facilities where microbial cell factories operate with high efficiency by means of the systematic application of techniques from synthetic biology, automation, and artificial intelligence [ 2 , 3 ]. Similarly, another cornerstone of efficient microbial biomanufacturing is the introduction of dynamic regulation in production pathways [ 4 ]. A common agreement arising from such strategies is that regulation of synthetic metabolic pathways is becoming necessary in modern manufacturing because production strains might be generally prototyped in controlled laboratory conditions [ 5 ], but the processes would eventually scaled-up into industrial fermentation conditions, where cell factories will be subject to harsh conditions eliciting stress responses, negatively impacting efficiency [ 6 ]. Dynamic control of the synthetic metabolic pathways [ 7 , 8 ], thus, provides regulation strategies mitigating the loss in efficiency through active control of pathway expression. Yet, a systematic approach for selecting and implementing genetic circuits within such large space has remained largely absent. Biosensor-mediated control of metabolic pathways has been proposed as a strategy to improve the performance of those bioprocesses. By linking industrial biosynthetic pathways to measurable metabolites through biochemical transformations, a highly interconnected regulatory design space is unveiled, offering new opportunities for pathway dynamic control. Several strategies for dynamic control have been proposed in metabolic engineering and synthetic biology [ 9 , 10 ]. They are generally based on the introduction of a biosensing system [ 11 ], followed by a genetic control circuit [ 12 , 13 ]. In a similar fashion, several design principles were proposed in different studies, often mimicking what could be understood as the basic functionality of the proportional-integral-derivative (PID) controller in a biological context [ 14 ], mainly due to the versatility of such basic control strategy, although other controller designs are possible in synthetic biology which could be seen closer to the actual biological feedback actions found in nature [ 12 , 13 , 15 ]. Notably, efforts have been focused on achieving integral control functionality, as it allows error-free or perfect adaptation in biomolecular systems [ 16 ]. A well-known early proposal of such funcionality is found in the antithetic feedback motif [ 17 , 18 ]. In addition to the controller circuit, design can also be focused on the use of multiple biosensors or the selection of the topology of the circuits. For instance, several control topologies have been proposed for the naringenin pathway beyond a single feedback loop, including a cascaded control using multiple biosensors [ 19 ] and the use of three control layers [ 20 ]. Here, we introduce BiosInt , an allosteric transcription factor-based genetic biocircuit, with quasi-integral adaptation control capabilities that can be used to increase robustness and performance of biomanufacturing engineered strains (see Figure 1 ). To test the capabilities of the circuit, we have analysed its performance for the set of more than 30,000 metabolic pathway topologies found in the bio-based chemical production and detectable space of main compounds with industrial interest. For a given implementation of the BiosInt circuit, we carried out a multiobjective optimisation process that provides the inverse control solution of optimal topology starting from any given pathway configuration and enzyme expression ratios. Next, synthetic datasets are generated to train machine learning-based predictive models with the data obtained from the simulations of each topology by varying enzyme expression levels and their corresponding concentrations. We validated the models by showing their ability to predict the best control topology for a given set of nominal enzyme ratios. Download figure Open in new tab Figure 1. The general design scheme of this study. The BiosInt circuit regulates the overexpression of enzymes in the pathway through transcription factor TF 2 based on the feedback measurements obtained from transcription factor TF 1 , modulated by the reference Ref . An AI-based predictive system selects the optimal topology for sensing and regulation depending on the initial metabolic pathway design. In that way, strains developed in a controlled lab environment become more robust and can overcome stress in industrial production environments. As a proof-of-concept, we show its application to the design of the genetic constructs expressing flavonoid production pathways, providing optimal performance for the BiosInt -mediated dynamic regulation. By introducing this novel circuit inverse design and its application to dynamic control in biomanufacturing, we pave the way for a future design pipeline in biofoundries delivering more robust and efficient sustainable bioproduction processes. 2 Materials and methods 2.1 Data sets DetSpace sources The list of 484 compounds of industrial interest was obtained from a curated map of bio-based chemical compounds [ 21 ]. This list was reviewed and expanded accordingly. The 436 detectable molecules were sourced from Sensbio, a comprehensive database of biosensors [ 22 ]. These databases were combined and cross-linked to obtain the reference metabolic pathways on the DetSpace web server, through the use of reaction rules [ 23 ]. These reaction rules are able to connect detectable and producible compounds through the use of bioretrosynthesis. Kinetic constants The kinetic constants of the enzymes were obtained from the Brenda database [ 24 ] in the case of specific pathways, like naringenin. For generic enzymes, a range of values between 10 − 2 and 10 5 was used. 2.2 Determination of topologies Starting with the compound of industrial interest, the metabolic pathway was traced backwards bioretrosynthetically through the iterative application of reaction rules until the precursor compounds were found to be present in the host organism, Escherichia coli . With this information, a graph representing the pathway was created, removing compounds that can be considered trivial, such as water or other cofactors, to obtain the Weisfeiler Lehman graph descriptor hash [ 25 ]. This method ensures identical results for isomorphic graphs and different results for non-isomorphic, allowing us to classify the metabolic pathways according to whether they are isomorphic or not. The principal component decomposition (PCA) of the graph set based on the topology descriptor started from 43 components, and they were reduced to 23 maintaining the cumulative explained variance ratio above 0.95. In order to classify the set, the topologies were codified as shown in Figure 2 . For the bioproduction design space, 48 different topologies were found; and nearly 35,000 for the biodetectable space. Download figure Open in new tab Figure 2. Diagram of two topologies present in DetSpace and their corresponding coding. (a) This topology presents a single reaction with one substrate and one product, therefore, the graph is coded as 1 > 1. (b) This topology has two reactions, the second one with two substrates, therefore, the graph is coded as 1 > 2 > 1. 2.3 Modeling the pathway response Reactions in the pathway follow the Michaelis-Menten equation 1 , where [ Product ] and [ Substrate ] are the concentrations of each compound. [ E ] represents the enzyme concentration and k cat and K m its kinetic constants. K d is the degradation constant of the product: When constructing the generic cases, random values within the range [10 − 2 , 10 5 ] mM/s were assigned to the kinetic constants k cat and K m for each of the enzymes in the pathway. A constant value was set for the concentrations of the input compounds, with a degradation constant of 0.01 mM/s. The enzyme concentrations are within the range [0.01, 100] mM. For multiple substrates, we used a generalized Michaelis-Menten equation. 2.4 Modeling the BiosInt dynamic behavior Based on the previously modelled route, a control was implemented on the dynamic response. The steady state values for each metabolite concentration were used as the initial conditions. A designated implementation of the BiosInt parameters was defined and we assumed that such configuration is the one available for the controller device in all experiments. The controller operation requires two user-defined parameters that determine the control topology: the enzyme, whose expression is being regulated by the controller and the metabolite that will be detected by using an available biosensor. The simulation of the genetic circuit consists of two stages. First, the regulated is simulated until it returns to the steady state. In the second stage, a perturbation is introduced into one of the pathway precursor metabolites by lowering its concentration, and the simulation is performed until the system reaches the new equilibrium state. The model equations are as follows: The definitions of each parameter and the value assigned are shown on Table 1 . View this table: View inline View popup Download powerpoint Table 1: Parameters of the BiosInt model. 2.5 Selection of the optimal topology The control topology is defined by two selectable parameters: the reaction being controlled and the metabolite used as a biosensor. Figure 3 shows two examples of possible control topologies for the 1 > 1 > 1 topology. In these examples, the metabolite used as a biosensor, B or C , and the reaction being controlled, R1 or R2 , change. The graph shows how by modifying this configuration we obtain different dynamic response for the final product C . Download figure Open in new tab Figure 3. Two examples of control topologies for 1 > 1 > 1. (a) Topology 1 (top figure) uses C as biosensor and regulates expression of enzyme R 1 . (b) Topology 2 (bottom figure) uses B as biosensor and regulates R 2 . For each of the different pathway topologies, pathway responses were calculated by varying the two control parameters using the models described above. Four values were obtained in each simulation: O 1 N , O 1 P , O 2 N , O 2 P , which are defined as follows: O 1 corresponds to the concentration of the final product of the pathway; O 2 is the average concentration of the enzymes; N and P indicate the two stages of the simulation, with N being the moment prior the perturbation and P being the final moment of the simulation. With these values, a score was assigned to each combination of control parameters as: O 1 P / O 2 P . For each pathway topology, this process was carried out for different enzyme concentrations in order to determine the optimal control topology at different context conditions. 2.6 Predictive model A predictive model based on a feed-forward neural network was constructed from the data previously obtained. For a given topology, the training set consisted of different combinations of enzyme concentrations as inputs, and the outputs were the optimal control topologies, encoded through one-hot-encoding. For each enzyme, 9 concentration values were defined within the range [0.01, 100] and all possible combinations between the different enzymes in the pathway were explored. In the case of the topologies 1 > 1 and 2 > 1, more data need to be generated as there is only one reaction, so 100 values within the range were used. The dataset was divided into 80% training and 20% test. Tensorflow and Keras were used to train the neural network. Accuracy was used as a measure of performance. For each topology, cross-validation was repeated 100 times, and the obtained mean accuracy was used as a result. The neural network has a 4-layer architecture. The intermediate layers consist of a layer of 64 nodes with sigmoid activation function and a dropout layer. The output layer has a softmax activation function. The optimizer is an Adam optimizer and the loss function is categorical cross entropy from Keras . 2.7 Use case: dynamic regulation of the naringenin pathway The described general workflow was also applied in a focused study for the naringenin pathway, see Figure 4 . The pathway consists of 4 reactions, with the third one using two products (topology code 1 > 1 > 2 > 1). k cat and K m values for each reaction were obtained from the Brenda database [ 24 ]. The compounds involved in the pathway, except for naringenin chalcone, can be associated with an allosteric transcription factor, and, therefore, they can be detected by a biosensor for dynamic regulation. In total, thus, there are 20 possible optimal topologies, i.e., (4 reactions × 5 biosensors) . As we considered 9 possible values for each enzyme concentration, the resulting dataset contains 6,500 different combinations, which can be then used to train the predictive model, in a similar fashion as for other topologies considered in the study. Download figure Open in new tab Figure 4. Production pathway of the flavonoid naringenin, showing the enzymes involved in each reaction (TAL, 4CL, CHS, CHI), as well as the allosteric transcription factors activated by metabolites in the pathway (Tyr, PadR, FerC, FapR, FdeR). 3 Results and Discussion 3.1 Determination of the topological distribution of the bioproduction and biodetectable design spaces Synthetic bioproduction pathways are established by a set of enzymes that determine a metabolic route for the conversion of endogenous metabolites into the desired target products. Several studies have determined a consensus reference set of approximately 500 pathways that contains the most industrially-relevant bio-based targets for the bioeconomy [ 21 ]. Those pathways contain a large diversity in terms of their topology content, meaning by this that even though some of the pathways are linear chains of reactions, others contain branched topologies where at least one of the reaction steps depend on two or more reactants. Branched steps need all their reactants to be produced in order for the reaction to proceed, therefore creating more complex pathway topologies. Considering Escherichia coli as the chassis organism, we found 48 distinct topologies contained in the aforementioned reference industrial pathway set. Figure 5 shows their topology distribution, which approximately follows an exponential law. Most of the pathways had a simple linear distribution containing a few number of enzymes. However, branched distributions were also common, especially for low-length pathways. Download figure Open in new tab Figure 5. Distribution of topologies in the top 500 industrially-relevant bio-based targets pathways, expressed in E. coli as chassis organism. Enzymes in the pathway topology are represented by the ‘ > ’ symbol, and are preceded by their number of precursors. Branched steps occur when the number of precursors is greater than one. Our purpose is to evaluate the viability of establishing and automating the design of a dynamic regulation strategy for each of those topologies through the BiosInt circuit, as its behavior that is close to an integral controller should facilitate the development of producing strains with an increased robustness. In order to implement such strategy, we need first to determine those metabolites and intermediates in the pathway that can be sensed through an allosteric transcription factor, and therefore candidates to establishing a feedback loop in the pathway through a regulation circuit that actuates into the expression of one of the genes associated with the enzymes in the pathway. In a previous study, we introduced the tool DetSpace [ 23 ], which provides the full set of metabolites in the bioproduction design space that can be detected, directly or through a sensing pathway by means of an allosteric transcription factor. Therefore, we can calculate by means of the DetSpace tool the topological distribution of the detectable design space, i.e., a metabolic circuit consisting of a production pathway and a detection pathway connecting the target metabolite to an inducible transcription factor. In order to analyze the topological distribution of the collection of circuits for the biosensing pathways under study, a PCA decomposition was performed on the topology descriptor (see Materials and Methods). On the graph were represented the two most important. The resulting distribution is shown in Figure 6 for the two main components, highlighting the exponential distribution of the number of occurrences of each type of topology. Download figure Open in new tab Figure 6. Distribution of topologies present on DetSpace across the two main components PC 1 , PC 2 after a PCA was applied to their descriptor vector. The colors represent the frequency with which each topology appears. In the obtained distribution, some clusters were observed with shared features, suggesting an underlying pattern. For instance, a cluster composed of topologies with a frequency between 500 and 1000 was formed (top right corner in Figure 6 ) whose topologies turned out to be those with the longest average length. Similarly, a cluster with frequencies greater than 1000 is also found on the left side of the figure, where the topologies contained in the cluster are those with the highest presence of bifurcations, highlighting the complexity of the set. In both cases, topologies always presented bifurcations at the initial reactions. For the rest of the distribution groups, no notable clusters are apparent. Unlike in bioproduction, all linear topologies have low frequency, between 1 and 50. This may be explained because DetSpace design space has been expanded to contain as many routes as possible, generating more complex topologies. 3.2 Introducing the BiosInt dynamic regulation circuit Our goal is to develop an allosteric transcription factor-based controller circuit that can implement the dynamic regulation of biosynthetic pathways based on the optimal combination between biosensors and transcriptional actuators of the genes inolved in the pathway. To that end, Figure 7 shows the basic configuration of the BiosInt circuit. The circuit consists of two allosteric transcription factors that work as repressor-activator. Transcription factor TF 1 works as a biosensor for the sensed molecule T , which inhibits the biosensor. The transcription factor activates the promoter Pro 2 that expresses allosteric transcription factor TF 2 , which is continuously inhibited by effector R that provides a continuous reference level and can be either produced by some enzyme REF 1 or fed into the medium. Non-inhibited TF 2 will activate a promoter Pro 3 that, in turn, allows overexpression of a production pathway enzyme E 1 , which in the example represented in Figure 7 is the one that directly overproduces the sensed target T . Download figure Open in new tab Figure 7. The basic configuration of the BiosInt circuit regulates the expression of enzyme E through a cascade interconnection of transcription factors TF 1 and TF 2 , which are, in turn, allosterically inhibited by T and R , respectively. In case that the intracellular availability of target T decreases, TF 1 will increase the expression of TF 2 and therefore, there will be more a higher availability of non-inhibited triggering the overexpression of enzyme E 1 and therefore more target compound T could potentially be produced, provided that the theoretical maximum titer has not been yet reached. This strategy, thus, should be able to correct deviations from the initial production titers due to changes in the medium conditions, as long as the system is kept away from the saturation regime. Interestingly, the BiosInt circuit could be eventually tuned to a desired regulation action by changing the levels of reference species R present in the media, allowing therefore an external control of the system. Figure 8a shows different dynamic responses for a generic pathway with topology 1 > 1, according to the level of reference species R . As mentioned above, a higher value for the reference results in a lower production of metabolite T . Figure 8b shows the dose-response curve of the target for different values of the reference R . In this example, the reference R allows the fine-tuning of the product end-value within a range given approximately by 14-17 mM. Download figure Open in new tab Figure 8. Dynamic responses of a pathway with topology 1 > 1, according to the level of reference species R . a) Product dynamic response; b) Product end values dose-response curve depending on reference R . 3.3 Mapping optimal topologies depending on pathway context Once a target compound has been selected, there is a limited selection of gene variants for each enzymatic step in the production pathway. Such limited selection is therefore a design constraint, meaning that only a reduced set of possible choices is available. Each choice of gene variant, and associated regulatory elements like promoters and RBSs will imply a different balance between the expression levels of the enzymes involved in the pathway. We investigated how this design choice might determine the optimal control topology for the BiosInt closed-loop regulation. The control topology is defined by two parameters: the reaction being controlled and the metabolite detected by the biosensor. The optimal topology is defined as the topology that provides the best control, according to some criteria, for a specific pathway in a particular context, i.e., for the enzymes selected in the pathway. In the study, the best control was defined as the one that achieves the best trade-off between the highest concentration of the final product after a perturbation and the associated metabolic burden (see Materials and Methods). As shown in Figure 9 , the optimal regulation topology depends on the context for each pathway class. In this example, a two-enzyme pathway ( E 1 , E 2 ) was chosen. Depending on the variants, steady-state concentrations of the enzymes in the pathway will vary. We explored how this design choice could impact the selection of the best feedback topology, namely, the selection of the regulated enzyme and feedback metabolite. As expected, low concentrations of E 1 or E 2 are usually compensated by the over-expression of the depleted enzyme and feedback of the enzyme precursor. Interestingly, when concentrations become closer for both enzymes, there is a transition region where the optimal topology is transferred to another metabolite in the pathway. As concentrations become split apart, the optimal topology converges to the enzyme present at lowest concentration. Download figure Open in new tab Figure 9. Optimal topology depending on enzyme concentration. In this example pathway, there are two enzymes E 1 and E 2 , and three metabolites A, B , and C . Depending on the design context, steady-state concentrations of the pathways might vary. The optimal feedback topology for BiosInt -based regulation is shown in the heatmap, where each color code represents a pair regulated enzyme-feedback metabolite. Download figure Open in new tab Figure 10. The naringenin bioproduction pathway and the associated intermediate metabolites. Therefore, there is a transition region where we observe that the problem of choosing the optimal topology has no straightforward solution and shows a complex dependency with respect of the initial enzyme concentrations. Interestingly, the situation becomes more complex as the complexity of the pathway increases, either in length or in the presence of branched steps, as shown in the topology distribution in Figure 5 . 3.4 Prediction of the optimal topology Results from previous section suggest that as the complexity of the pathway increases, the selection of the optimal topology for a given implementation of the BiosInt circuit becomes more challenging. This predictive inverse optimal topology design problem can be stated as follows: Given A target metabolite and a chassis organism; An implementation of a metabolic pathway design, carried out through metabolic engineering and synthetic biology methods; A BiosInt controller design, tuned to a designated regulation function; Find An optimal topology in terms of regulated enzyme and feedback metabolite according to the desired criteria for pathway performance (production, robustness, efficiency, etc.). In order to solve this problem, we introduce here a machine-learning based predictive system. The system should be able to learn an optimal solution to the problem based on the given context and the defined biomanufacturing objectives, which in this case were defined as maximizing the ratio between recovery after a perturbation in the production of the target compounds with respect to the metabolic burden elicited by over-expressing the regulated enzyme. To that end, we generated a training set consisting of the synthetic data obtained from the simulation for each topology and possible combinations of initial pathway configurations, defined by the enzyme concentrations, and the performance indicators obtained by the steady-state values of the regulated pathways (see Materials and Methods). The training set was then use to build and validate a predictive model capable of predicting the optimal topology based on the context. Several machine learning approaches were investigated. In order to assess the complexity of the problem under study, we compared the performance of a Bayesian predictor trained with the generated data, with that of a neural network. Notably, results obtained by the neural network offered a clear improvement with respect to the Bayesian predictor. As shown in Table 2 , we obtained the average performance, based on prediction accuracy in the cross-validation, and its associated standard deviation for each of the topologies present in the training set, both for the neural network and the Bayesian. It can be seen that in all cases the neural network outperformed the Bayesian model, indicating as expected that the problem requires of a powerful training system to be able to solve accurately the topology selection problem. In general, the increase in performance was more apparent for those pathways with higher length and number of branches, with cases with an increase in performance of 20%, like in the case of the 1 > 2 > 2 > 2 > 1 topology. View this table: View inline View popup Download powerpoint Table 2: Performance comparison between neural network model (NN) and Bayesian predictor (Bayesian), according to rhe cross-validation mean accuracy. 3.5 The case of the naringenin pathway The production pathway for the naringenin flavonoid offers an interesting case study, since most of the metabolites involved in the pathway can be detected by a transcription factor and, therefore, they can serve as the feedback point for the BiosInt circuit. As it has been said before, naringenin chalcone is the only one that does not have an allosteric transcriptor factor associated. For this specific study, we have added a variation. In order to produce a perturbation in the compound production, we will study the case in which the perturbation appears as the depletion of one of the compounds involved in the pathway bifurcations. The obtained optimal topologies in the combinatorial design space of initial enzyme concentrations showed that regulation of the CHS and CHI can provide the best control, as they appear in approximately 50% of the combinations. This result may be related to the fact that these two reactions are the ones causing a bottleneck in the pathway. Therefore, it is important actuating on them to alleviate the bottleneck effect. On the other hand, the frequency with which each metabolite appears as a sensor is shown in Table 3 . It can be seen that the compound acting as the best sensor is, in most cases, the one that undergoes the perturbation. According to this observation, it might be reasonable to think that detecting the perturbation directly produces the best results in control compared with other possible indirect measurements. In the case of a tyrosine deficiency, a significant percentage of cases were also found where the second branched compound, malonyl-CoA, present downstream in the pathway with respect to tyrosine, was the optimal feedback compound, which might be due to cases where there is a high availability of the tyrosine-consuming enzyme TAL, and therefore feedback in the dynamic regulation is shifted towards malonyl-CoA. View this table: View inline View popup Download powerpoint Table 3: Ratio of occurrence of each of the compounds in the naringenin pathway acting as biosensors in the optimal topology within the combinatorial design space. The combinatorial design space of the naringenin producing pathway, modeled by their steady-state enzyme concentrations was simulated in order to generate a training set to build and cross-validate a predictive model for optimal dynamic regulation topology selection. As in the previous case, the neural network offered a better performance than the Bayesian model, as shown on Table 4 . For the naringenin case, the obtained increase in performance was in excess of 20%, achieving an accuracy close to 95% in the neural network. View this table: View inline View popup Download powerpoint Table 4: Performance comparison between neural network model (NN) and Bayesian predictor (Bayesian) for the naringenin pathway. 4 Conclusions In summary, we have introduced here a regulation circuit, BiosInt and its associated AI-based design tool biosint.ai , with the ability of selecting the optimal topology for increasing strain robustness given an initial metabolic pathway design. As the demand for industrial biomanufacturing processes increases due to the urging need of transitioning towards a global sustainable bioeconomy, we anticipate a growing number of bio-based production processes having the need of this type of regulation strategies to confer robustness to the system in order to make the overall process viable and sustainable. Our approach differs from other proposed solutions, which depend on the availability of a detailed knowledge of the pathway mechanistic dynamics and assume that the regulation circuit can be easily tuned in function of the chosen design solution. On the contrary, in this study we assume that the regulation circuit, named BiosInt is already built and its functionality has been experimentally characterised and, in addition, the desired pathway has been already designed based on metabolic engineering considerations and well-established methods. Therefore, the main goal of our approach is to develop a predictive system that can make an automated decision system that can select the best regulation topology to choose from within the large combinatorial set, trained on the previously obtained data, rather than having a focus on a particular class of pathways or compounds. We believe that this compound-agnostic approach that solves the inverse design problem based on the availability of pathway biosensor capabilities, will facilitate the adoption of the proposed dynamic regulation framework within a general Design-Build-Test-Learn (DBTL) workflow by easing the overall design process, boosting in that way progress towards advanced next-generation industrial biomanufacturing. Author contributions HM performed the simulations, analyzed the data and generated the figures. PC conceived the study and analyzed the data. IOM contributed to the modeling and performance assessment. HM, IOM, PC wrote the manuscript. All authors have given approval to the final version of the manuscript. Competing interests The authors declare no competing interests. Code availability The code used to generate the results presented in this study is available at the following GitHub repository: https://github.com/dbtl-synbio/biosint . Acknowledgments HM, IOM, PC acknowledge support from Generalitat Valenciana through grant CIAICO/2021/159 (SmartBioFab) and MCIN/AEI /10.13039/501100011033 and European Union NextGenerationEU/ PRTR funding through grant TED2021-131049B-I00 (BioEcoDBTL). The computations were performed on the HPC cluster Garnatxa at Institute for Integrative Systems Biology (I2SysBio), I2SysBio is a mixed research center formed by University of Valencia (UV) and Spanish National Research Council (CSIC), and at the HPC cluster Rigel of the Universitat Politécnica de Valéncia. References [1]. ↵ Otero-Muras and P. Carbonell , “Automated engineering of synthetic metabolic pathways for efficient biomanufacturing,” en , Metab. Eng ., vol. 63 , pp. 61 – 80 , Jan . 2021 . OpenUrl CrossRef PubMed [2]. ↵ P. Carbonell , T. Radivojevic , and H. Garcéa Martén , “Opportunities at the intersection of synthetic biology, machine learning, and automation,” en , ACS Synth. Biol ., vol. 8 , no. 7 , pp. 1474 – 1477 , Jul . 2019 . OpenUrl CrossRef PubMed [3]. ↵ M. C. T. Astolfi et al. , “Automated strain construction for biosynthetic pathway screening in yeast,” en , ACS Synth. Biol ., vol. 14 , no. 10 , pp. 4143 – 4151 , Oct . 2025 . OpenUrl PubMed [4]. ↵ M. Gotsmy , A. Erian , H. Marx , S. Pflügl , and J. Zanghellini , “Predictive dynamic control accurately maps the design space for 2,3-butanediol production,” en , Comput. Struct. Biotechnol. J ., vol. 23 , pp. 3850 – 3858 , Dec . 2024 . OpenUrl PubMed [5]. ↵ C. J. Robinson et al. , “Prototyping of microbial chassis for the biomanufacturing of high-value chemical targets,” en , Biochem. Soc. Trans ., vol. 49 , no. 3 , pp. 1055 – 1063 , Jun . 2021 . OpenUrl PubMed [6]. ↵ M. T. Mohedano , O. Konzock , and Y. Chen , “Strategies to increase tolerance and robustness of industrial microorganisms,” en , Synth. Syst. Biotechnol ., vol. 7 , no. 1 , pp. 533 – 540 , Mar . 2022 . OpenUrl CrossRef PubMed [7]. ↵ C. J. Hartline , A. C. Schmitz , Y. Han , and F. Zhang , “Dynamic control in metabolic engineering: Theories, tools, and applications,” en , Metab. Eng ., vol. 63 , pp. 126 – 140 , Jan . 2021 . OpenUrl CrossRef PubMed [8]. ↵ C. Ni , C. V. Dinh , and K. L. J. Prather , “Dynamic control of metabolism,” en , Annu. Rev. Chem. Biomol. Eng ., vol. 12 , no. 1 , pp. 519 – 541 , Jun . 2021 . OpenUrl PubMed [9]. ↵ I. Ruolo , S. Napolitano , D. Salzano , M. di Bernardo , and D. di Bernardo , “Control engineering meets synthetic biology: Foundations and applications,” en , Curr. Opin. Syst. Biol ., vol. 28 , no. 100397 , p. 100 397, Dec . 2021 . OpenUrl [10]. ↵ J. C. Moore , I. Ramos , and S. Van Dien , “Practical genetic control strategies for industrial bioprocesses,” en , J. Ind. Microbiol. Biotechnol ., vol. 49 , no. 2 , Apr . 2022 . [11]. ↵ M. Pisani and P. Carbonell , “Challenges and opportunities in smart biosensing for biomanufacturing,” en , ACS Synth. Biol ., vol. 14 , no. 9 , pp. 3267 – 3274 , Sep . 2025 . OpenUrl PubMed [12]. ↵ C. Barajas and D. Del Vecchio , “Synthetic biology by controller design,” en , Curr. Opin. Biotechnol ., vol. 78 , no. 102837 , p. 102 837, Dec . 2022 . OpenUrl [13]. ↵ M. Filo , C.-H. Chang , and M. Khammash , “Biomolecular feedback controllers: From theory to applications,” en , Curr. Opin. Biotechnol ., vol. 79 , no. 102882 , p. 102 882, Feb . 2023 . OpenUrl [14]. ↵ M. Chevalier , M. Gómez-Schiavon , A. H. Ng , and H. El-Samad , “Design and analysis of a proportional-integral-derivative controller with biological molecules,” en , Cell Syst ., vol. 9 , no. 4 , 338 – 353.e10 , Oct . 2019 . OpenUrl PubMed [15]. ↵ X. Lv et al. , “New synthetic biology tools for metabolic control,” en , Curr. Opin. Biotechnol ., vol. 76 , no. 102724 , p. 102 724, Aug . 2022 . OpenUrl [16]. ↵ P. Bhattacharya , K. Raman , and A. K. Tangirala , “ Design principles for perfect adaptation in biological networks with nonlinear dynamics ,” Bulletin of Mathematical Biology , vol. 86 , no. 8 , p. 100 , 2024 . OpenUrl PubMed [17]. ↵ A.-A. Baetica , Y. P. Leong , and R. M. Murray , “Guidelines for designing the antithetic feedback motif,” en , Phys. Biol ., vol. 17 , no. 5 , p. 055 002, Aug . 2020 . OpenUrl [18]. ↵ Y. Boada , A. Vignoni , J. Picó, and P. Carbonell , “Extended metabolic biosensor design for dynamic pathway regulation of cell factories,” en , iScience , vol. 23 , no. 7 , p. 101 305, Jul . 2020 . OpenUrl [19]. ↵ T. Jiang , C. Li , Y. Zou , J. Zhang , Q. Gan , and Y. Yan , “ Establishing an autonomous cascaded artificial dynamic (autocad) regulation system for improved pathway performance ,” Metabolic engineering , vol. 74 , pp. 1 – 10 , 2022 . OpenUrl PubMed [20]. ↵ S. Zhou , S.-F. Yuan , P. H. Nair , H. S. Alper , Y. Deng , and J. Zhou , “Development of a growth coupled and multi-layered dynamic regulation network balancing malonyl-CoA node to enhance (2s)-naringenin biosynthesis in escherichia coli,” en , Metab. Eng ., vol. 67 , pp. 41 – 52 , Sep . 2021 . OpenUrl CrossRef PubMed [21]. ↵ W. D. Jang , G. B. Kim , and S. Y. Lee , “An interactive metabolic map of bio-based chemicals,” en , Trends in Biotechnology , vol. 41 , no. 1 , pp. 10 – 14 , Jan . 2023 , issn: 01677799 . doi: 10.1016/j.tibtech.2022.07.013 . OpenUrl CrossRef PubMed [22]. ↵ J. Tellechea-Luzardo , H. Martén Lázaro , R. Moreno López , and P. Carbonell , “Sensbio: An online server for biosensor design,” en , BMC Bioinformatics , vol. 24 , no. 1 , p. 71 , Feb . 2023 , issn: 1471-2105 . doi: 10.1186/s12859-023-05201-7 . OpenUrl CrossRef PubMed [23]. ↵ H. Martén Lázaro , R. Marén Bautista , and P. Carbonell , “ Detspace: A web server for engineering detectable pathways for bio-based chemical production ,” Nucleic Acids Research , vol. 52 , no. W1 , W476 – W480 , 2024 . OpenUrl PubMed [24]. ↵ A. Chang et al. , “ Brenda, the elixir core data resource in 2021: New developments and updates ,” Nucleic acids research , vol. 49 , no. D1 , pp. D498 – D508 , 2021 . OpenUrl CrossRef PubMed [25]. ↵ N. Shervashidze , P. Schweitzer , E. J. Van Leeuwen , K. Mehlhorn , and K. M. Borgwardt , “ Weisfeiler-lehman graph kernels .,” Journal of Machine Learning Research , vol. 12 , no. 9 , 2011 . View the discussion thread. Back to top Previous Next Posted November 17, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following BiosInt: Biosensor-based smart design of pathway dynamic regulation for industrial biomanufacturing Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share BiosInt: Biosensor-based smart design of pathway dynamic regulation for industrial biomanufacturing Hèctor Martín Lázaro , Irene Otero-Muras , Pablo Carbonell bioRxiv 2025.11.17.688676; doi: https://doi.org/10.1101/2025.11.17.688676 Share This Article: Copy Citation Tools BiosInt: Biosensor-based smart design of pathway dynamic regulation for industrial biomanufacturing Hèctor Martín Lázaro , Irene Otero-Muras , Pablo Carbonell bioRxiv 2025.11.17.688676; doi: https://doi.org/10.1101/2025.11.17.688676 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Synthetic Biology Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17697) Bioengineering (13895) Bioinformatics (41951) Biophysics (21456) Cancer Biology (18594) Cell Biology (25520) Clinical Trials (138) Developmental Biology (13381) Ecology (19903) Epidemiology (2067) Evolutionary Biology (24323) Genetics (15612) Genomics (22510) Immunology (17738) Microbiology (40401) Molecular Biology (17184) Neuroscience (88622) Paleontology (667) Pathology (2833) Pharmacology and Toxicology (4825) Physiology (7644) Plant Biology (15158) Scientific Communication and Education (2046) Synthetic Biology (4296) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00