Entropy production constrains information throughput in gene regulation

doi:10.1101/2024.08.17.608427

Entropy production constrains information throughput in gene regulation

2024 · doi:10.1101/2024.08.17.608427

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 174,662 characters · extracted from preprint-html · click to expand

Directed information flow in reaction networks under energy constraints: A framework for communication and optimal design applications | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Directed information flow in reaction networks under energy constraints: A framework for communication and optimal design applications View ORCID Profile Maximilian Gehri , View ORCID Profile Lukas Stelzl , View ORCID Profile Heinz Koeppl doi: https://doi.org/10.1101/2024.08.17.608427 Maximilian Gehri 1 Centre for Synthetic Biology, Technical University of Darmstadt , 64283 Darmstadt, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Maximilian Gehri Lukas Stelzl 2 Institute of Molecular Physiology, Johannes Gutenberg University Mainz , 55122 Mainz, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Lukas Stelzl Heinz Koeppl 1 Centre for Synthetic Biology, Technical University of Darmstadt , 64283 Darmstadt, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Heinz Koeppl For correspondence: heinz.koeppl{at}tu-darmstadt.de Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Biological information processing is constrained by energetic costs, making it natural to treat information flow and dissipation jointly. We develop a unified framework for continuous-time chemical reaction networks (CRNs) that couples trajectory-level mutual and directed information between disjoint species sets with process-based stochastic thermodynamics for open, multi-reservoir systems. The formulation covers causal conditioning and indistinguishable reactions at the subnetwork level arising from multiple reservoir coupling or projection. To ensure stringent use of stochastic thermodynamics across disciplines, we compile a unified account that treats coarse-graining and multi-reservoir modeling correctly when estimating dissipation. We also give a conversion from species–reaction graphs to local-independence graphs on reactions and link local independence to causally conditioned directed information. Applications are twofold: First, the framework yields a rigorous continuous-time communication model over a CRN, with messages source-encoded by time-dependent chemostat protocols. Capacity is posed over trajectory distributions under principled thermodynamic costs. Graph tools enable a functionality-based classification that distinguishes between encoding reactions and reactions that constitute the transmission path. This facilitates characterizing degrees of freedom in the chemical noisy channel coding problem. Second, we advocate directed information as an objective for naturally evolved and engineered biochemical circuits under finite energy budgets. Case studies with minimal promoter-switching models illustrate framework application and visualize trade-offs between information flow and dissipation. I. INTRODUCTION Energy consumption is integral to gene regulatory mechanisms and intracellular signal transduction in both prokaryotes and eukaryotes. For example, chromatin remodeling complexes in eukaryotes use ATP to maintain gene expression profiles [ 1 , 2 ], while in prokaryotes, ATP conversion to cAMP provides a cofactor for transcription factors such as the catabolite-activating protein (CAP) [ 3 ]. These processes indicate that biological information processing is fundamentally constrained by energetic costs. This observation motivates a closer investigation of how upper bounds on energy dissipation shape the ability of cellular systems to process and transmit information. Gene regulatory networks form an essential part of intracellular information processing [ 4 ], and information-theoretic tools are increasingly used to analyze their signaling capabilities and regulatory architectures [ 5 , 6 ]. There is growing evidence that cells encode information in the spatiotemporal variation of populations of molecular species when responding to internal or external stimuli [ 7 – 10 ]. Classical information theory further implies that in systems with feedback and network-mediated memory, meaningful communication can only be properly characterized by information-theoretic quantities defined over entire trajectories, as only these capture the causal temporal structure required to model such signaling processes consistently [ 11 ]. Markovian stochastic chemical reaction networks (CRNs) offer a natural modeling framework in this context, capturing both the discrete and stochastic nature of molecular reactions. Any static or dynamical representation of information is thus accurately captured in the trajectories of a CRN. In complement, CRNs also allow an accurate characterization of thermodynamic costs. In particular, we advocate the use of directed information (DI) between continuous-time stochastic processes [ 11 – 13 ] as a natural measure of causal directional information flow in CRNs. Especially in the context of network-based signal processing and causal feedback encoding of information we will show that the DI represents an upper bound on the information transmission rate between an external stimulus trajectory and the molecular representation of the processed signal. We generalize existing expressions for mutual information (MI) between trajectories to quantify the total information exchange between subnetworks, i.e., subpopulations of species within a CRN. In addition, we derive the corresponding DI, which captures the causal information flow from one subnetwork to another over time. Such subnetwork formulations are especially relevant in settings where multiple external signals are sensed by distinct molecular species and subsequently integrated through internal molecular interactions to produce a response involving multiple downstream species [ 14 , 15 ]. While we do not require the entire CRN to be bipartite or multipartite, we impose the condition that the two subnetworks of interest do not undergo simultaneous state changes [ 16 ]. As a result, the joint dynamics of these two subnetworks form a bipartite, but potentially non-Markovian, marginal process, even though the full CRN is assumed to be Markovian and may exhibit non-bipartite structure. For example, consider the species X, A, and Y in a Markovian conversion network X ⇄ A ⇄ Y. The full system is Markovian and non-bipartite, as both {X, A} and {Y, A} can change simultaneously. However, the marginal dynamics of the subnetwork {X, Y} are bipartite and non-Markovian. As a harmonizing model formulation, we use Kurtz’s process-based description of CRNs to align stochastic thermodynamics (sTD) with trajectory-level information theory. This facilitates the analysis of trade-offs between trajectory-level information processing and energy dissipation of metabolic resources in thermodynamic non-equilibrium systems. Such trade-offs between energy dissipation and precision in cellular decision making and adaptation have been described as an emerging general design principle of biological systems [ 17 ]. We provide a comprehensive introduction to stochastic thermodynamics for CRNs. This introduction is certainly longer than is appropriate for a research article. We justify its extent by aiming to serve as a tutorial and thereby promote rigorous thermodynamic modeling across scientific disciplines. An important feature of our exposition is the explicit integration of internal microscopic degrees of freedom, which allows us to distinguish between the physically measurable dissipated heat and the related, but not equivalent, mesoscopic heat. We emphasize the necessity of accurately accounting for multiple driven reaction mechanisms and avoiding violations of underlying coarse-graining assumptions. In time-independent settings, the non-equilibrium steady-state (NESS) heat dissipation rate can be identified with the entropy production rate (EPR). In contrast, under time-dependent driving, internal microscopic degrees of freedom must be considered explicitly to correctly quantify entropic balances. Finally, we explore unified applications of information theory and stochastic thermodynamics to CRNs. First, we propose a thermodynamically consistent chemical communication model in which external information is encoded in the temporal dynamics of a subnetwork and processed through intermediate species, allowing for memory, feedback, and physically principled constraints on energy consumption. Second, we propose energetically constrained DI as an objective function for deriving energy-efficient designs of biological circuits. This can serve both as a testable prediction of optimal information flow as an emergent evolutionary trait and as a foundation for synthetic biology design. A key contribution is the identification of network decomposition rules that allow reactions to be classified into “input/output modulating”, “jointly modulating”, “feedforward” and “feedback” reactions. This provides a more detailed understanding of the specific function of a reaction in terms of information processing and makes information theoretic optimization problems practically accessible. We anticipate that continued developments in this direction will support the construction of genetic circuits that optimally balance information processing and energetic cost. A. Previous related work A powerful formalization of the description of Markovian chemical reaction networks was provided by Kurtz and Anderson [ 18 ]. They demonstrated how counting processes relevant to these networks can be represented using Poisson-type processes, leading to a random time-change representation. Therewith, they obtain a stochastic equation for continuoustime Markov jump processes, i.e., their state-level description. Our introduction to CRNs largely follows their work, supplemented by some thermodynamically relevant concepts. Information theory was pioneered by Claude E. Shannon and laid the foundation for the digital age. One of his landmark contributions was the first channel coding theorem [ 19 ], which determines the maximum rate at which information can be reliably transmitted despite noise corrupting the received signal. A key aspect of Shannon’s work was his abstraction of noise corruption through the concept of a transmission channel, characterized by the conditional distribution of the output given the encoded message. He introduced mutual information, a functional of the joint distribution of input and output of the channel, to quantify the stochastic dependency between input X and output Y and thereby the information exchange between them. Channel coding theorems identify the supremum of the mutual information for a given transmission channel, known as information (channel) capacity, with the maximum information encoding rate at which the probability of a decoding error of the sent message from the observed signal Y asymptotically approaches zero in the limit of infinite transmission duration. This limiting rate is known as the coding (channel) capacity. Following Shannon, channel coding theorems and other fundamental theoretical results like sourcechannel separation theorems [ 20 ] have been derived for various communication channels. Marko developed a theory of bidirectional communication, highlighting that communication, unlike mere transmission, is intrinsically bidirectional [ 21 ]. Building on his ideas, Massey established a consistent theory of information transmission that incorporates the use of feedback by emphasizing the distinction between statistical and causal dependence [ 11 ]. Feedback is not an inherent feature of the channel itself; rather, it is integrated into the encoding of the signals transmitted through the channel. In particular, Massey introduced the concept of directed information and demonstrated that, when used in place of mutual information, it could sometimes accurately extend the channel coding theorems for causal communication systems to transmission with the use of feedback. For channels that are used without feedback, directed information from X to Y reduces to the mutual information between them. In channel coding theorems for causal communication systems with feedback, the coding capacity is identified with the information channel capacity, which is defined either as the supremum of the mutual information between the message and the output sequence, or sometimes as the supremum of the directed information between the message-encoding sequence and the output sequence [ 20 , 22 ]. A generalization of the notion of directed information to continuous-time trajectories is due to Weissman et al . [ 12 ]. The supremum for the continuous-time information capacity is taken over the set of distributions of the channel encoding X given the message and then the message representations themselves [ 23 , 24 ]. To the best of our knowledge, no full communication channel model for CRNs has been proposed to date. Uda, for instance, notes that the concept of information transmission in systems biology remains underdeveloped [ 25 ]. Although Barlow [ 26 ] has already hypothesized that biological signal processing systems may function as communication systems in the early 60s, applications of information theory in systems biology have only started to develop into a research field for around two decades [ 25 ]. Research in this field has addressed a range of problems, including the analysis of connectivity, sensitivity, and cross-talk in signaling pathways [ 27 ]; the identification of optimal cellular adaptation strategies and their adaptation targets [ 28 ]; and the study of gene regulatory motifs at steady state [ 6 ], at specific time points [ 29 ], or in terms of dynamic features such as amplitude and frequency [ 14 ]. Tostevin and ten Wolde first considered the MI between trajectories in the biological context, recognizing that relevant information may be contained in the temporal dynamics of the input signal [ 30 ]. They provide the mutual information rate (MIR), i.e., the asymptotic differential information gain, under a linear-noise Gaussian approximation. Lestas et al . used MI between trajectories to lower bound the cost of suppressing molecular fluctuations via feedback under a Langevin approximation [ 31 ], later extended by Nakahira et al . who identified continuous-time DI as the appropriate measure [ 32 ]; however, the stringency of these lower bounds has been questioned by Parag [ 33 ]. A measure-theoretic account for the mutual information between the trajectories of two species of a class of biochemical reaction networks was first given by Duso and Zechner [ 34 ] and later generalized by Moor and Zechner [ 35 ]. Moor and Zechner further showed that the trajectory mutual information decomposes into a pair of measure-theoretically defined transfer entropies [ 36 ]. The transfer entropy is in fact identical to the DI under certain conditions. See Appendix C for a brief comparison. In this paper, we further generalize the expression for the mutual information in [ 35 ] to encompass the information flow between disjoint subnetworks without simultaneous reactions in the two sets. Without this bipartite-type requirement for the subnetworks of interest, the MI has been argued to diverge [ 16 ]. In contrast to Spinney et al . [ 36 ], we equivalently derive the continuous-time DI for CRNs via the structural extremal-based approach of Weissman et al . [ 12 ] instead of explicitly constructing a Radon-Nikodym derivative. Directed information remains well defined without the bipartite assumption, whereas mutual information generally does not. Sinzger et al . and Gehri et al . advanced techniques for analytically and numerically computing the mutual information of the Poisson-type channel modulated by a telegraph process, which can be regarded as a minimal module of a CRN [ 37 – 39 ]. Moor and Zechner [ 35 ] proposed several methods in the context of CRNs: a combination of stochastic simulation (SSA) [ 40 ] and moment closure approximation, a quasi-exact approach combining SSA and numerical integration, and an analytical approximation. Lastly, Reinhardt et al . developed an exact Monte Carlo method called path weight sampling to compute the mutual information between trajectories [ 41 ], which has recently been adapted for the computation of transfer entropies [ 42 ]. Analytical derivations of information channel capacities for the Poisson-type channel under peak and average-constraints on the message waveforms have been put forth by Kabanov [ 23 ], Davis [ 43 ] and Frey [ 24 ], where Frey also considered marked Poisson-type channels. For small biology-motivated systems, channel capacities have been analytically derived by Thomas and Eckford [ 44 – 46 ]. The combination of thermodynamics and information theory of biological information processing has been discussed in several studies. Some early works proposed the entropy production rate (EPR) as a measure of the energetic cost of cellular information processing and decision-making [ 47 , 48 ]. Later, models of diffusive transport were used to assess the thermodynamic cost of moving information-carrying molecules across cellular compartments [ 49 ]. More recently, stochastic thermodynamics has been combined with communication theory to analyze finite-state bipartite Markov systems at nonequilibrium steady state, treating each channel use as a readout of a steady-state output symbol [ 50 ]. In that framework, entropy production is not a constraint but a quantity co-optimized alongside information capacity, revealing trade-offs in the steady state. Another approach considers a discrete memoryless channel driven by an i.i.d. binary input, where the output is observed only once sufficient correlation between input and output is established [ 51 ]. The idea that information flow may be an emergent optimization principle in nervous systems, shaped by evolution, was first proposed by Attneave [ 52 ] and later formalized by Barlow [ 26 ] as the “efficient coding hypothesis,” suggesting that neurons evolve to reduce redundancy and maximize information about the environment. This notion was given a concrete mathematical formulation by Linsker through the principle of maximum information preservation (infomax) [ 53 ]. The optimization paradigm was subsequently extended to systems biology, particularly in the context of gene regulation, by Tkačik et al . [ 54 ]. Numerous follow-up studies have built on this foundation, which we do not review here for brevity. B. Outline and notation The paper is organized as follows. In Sec. II we review the process-based description of microscopically reversible Markovian CRNs as developed by Kurtz and Anderson [ 18 ]. The concepts of mutual information and directed information between continuous-time processes are both introduced in a measure-theoretic way in Sec. III. In particular, the MI is defined via a Radon-Nikodym derivative and the DI and its conditional versions are defined via the extremal-based approach of Weismann et al . [ 12 ]. We then use a classical expression [ 55 ] that links the mutual information between jump processes to the characterization of the counting processes of the distinct reactions. To obtain the MI between disjoint bipartite-type subnetworks of the CRN, we propose a notion of indistinguishability of different reactions from the perspective of the subnetworks. We also provide a proof that, without the bipartite assumption, mutual information is ill-defined due to the non-existence of the corresponding Radon–Nikodym derivative. A similar expression for the directed information between subnetworks is derived rigorously and linked to McFadden’s point process entropy [ 56 ]. The DI expressions presented are also valid in the case that certain reactions change input and output species simultaneously. The basics of thermodynamically consistent modeling for closed and open chemical reaction networks are outlined Sec. IV A and IV B. Then, in Sec. IV C we introduce thermodynamic notions on the trajectory-level in line with coarsegraining of a microscopic system (cf. Supp. Sec. S2.1). The common identification of the EPR of the NESS with the heat dissipation rate is given in Sec. IV D. Common decompositions of heat dissipation and entropy production are briefly outlined in Sec. IV E. Precautions to prevent underestimation of the EPR are detailed in Sec. IV F. In Sec. IV G the CRN description of stochastic thermodynamics is linked to the equivalent multi-reservoir description for general CTMCs. As an addition, the relation of thermodynamic irreversibility and time reversal is outlined in Supp. Sec. S2.4, together with a summary of equivalent equilibrium conditions. Section V introduces local independence graphs for CRNs, which will later be used to formulate a chemical communication system, categorize its the reactions into different functional classes and separate design degrees of freedom in a chemical reaction network from a fixed channel. Combined applications of information theory and stochastic thermodynamics will be discussed in Section VI. The primary focus is laid on a Shannon-theoretic communication model in Sec. VI A, where a chemical reaction network acts both is used to encode and transmit information in continuoustime from one set of species to another. First, the model of a chemical communication system is introduced. Second, we discuss the noisy channel coding problem in the context of chemical communication systems, which entails rigorous mathematical definitions of chemical codes, information rates and capacity. Third, we introduce quantities from stochastic thermodynamics as physically principled constraints on the allowed energy dissipation rate or chemical work of the system. Forth, we introduce the dual perspective on information capacity under energy constraints, namely, the minimum energy dissipation required per information unit as a function of the code rate. Lastly, we apply the introduced graph-based tools to decompose a chemical communication system into encoding reactions, which belong to the design degrees of freedom, and those reactions that are associated with the invariant channel law. In Section VI B we discuss how directed information can be used to test the hypothesis that information flow is an emergent optimization principle followed by evolution. Finally, in Sec. VI C, we showcase how directed information and stochastic thermodynamics are combined for two analytically tractable minimal promoter models as case studies. The first is a nonequilibrium two-state promoter model with parallel reaction channels, and the second is a three-state promoter model. Both models describe the switching between a single transcriptionally active state and an inactive state. We derive analytical expressions for the directed information (DI) between the promoter’s switching activity and the RNA copy number, and we optimize the directed information rate (DIR) under an upper bound on the promoter’s entropy production rate. In the process show that the thermodynamic modeling pitfalls pointed out lead to inconsistent results. In the following let (Ω, ℱ, ℙ) be a probability space with complete filtration ℱ = {ℱ t } t ≥ 0 . The symbol ∨ denotes the join operation for sigma-algebras that yields the smallest sigma-algebra that contains all elements of the individual sigma-algebras [ 57 ]. For any σ -algebra 𝔖 on Ω let denote its completion with ℙ-null sets. With we denote the integration over the interval ( s, t ], where s < t . This convention is important when considering counting process integrals. All stochastic pure jump processes, like counting processes and jump processes on a chemical copy number space, are assumed to have càdlàg trajectories. In the information-theoretic context we refer to X and Y as the channel input and output, respectively. In the context of communication channels the symbol X is typically used for the message-encoding signal, but otherwise we do not explicitly fix the meaning of X . In the notation of quantities in stochastic thermodynamics we proximally follow the IUPAC conventions [ 58 ]. II. MARKOVIAN STOCHASTIC CHEMICAL REACTION NETWORKS A stochastic chemical reaction network (CRN) consists of a finite set of chemical species ℒ = {1, ... , |ℒ|} with identities U = {U 1 , ... , U | ℒ| } and a finite set of reaction channels ℛ = {1, ... , |ℛ |} with identities ℜ 1 , ... , ℜ |ℛ | with stoichiometric balance equations wherein represent the number of reacting molecules of each species and the number of product molecules, respectively, for the forward reaction, and vice versa for the backward reaction. The difference of product and reactant species of ℜ r forms the stoichiometric change vector ν r = b ( r ) − a ( r ) of the forward reaction. For each reaction channel, the backward reaction microscopically reverses the forward reaction. We assume that all reactions are elementary [ 59 ], i.e., there are no reaction intermediates and each reaction has only a single transition state in the reaction coordinate diagram as illustrated in Fig. 1 (a) . Download figure Open in new tab FIG. 1. Exemplary reaction coordinate diagrams for (a) elementary reactions and (b) a non-elementary reaction with a single intermediate. E a denotes the activation energy of the reaction, while −Δ g is amount of energy released by the reaction. Let be the continuous-time Markov chain of the copy number vector of the chemical species U. Throughout, we sometimes assume that the rates of the reactions, commonly referred to as propensity functions, obey stochastic mass action kinetics [ 18 , 60 ] with and stochastic rate constants and . A conversion of the discrete molecule numbers to concentrations in a constant, finite solution volume V > 0 gives rise to another pair of rate constants , such that due to consistency of physical units in both the deterministic ODE model (of the concentrations) and the probability evolution equation of the stochastic model [ 18 , 61 ]. and can be identified with the rate constants of the deterministic law of mass action by taking the macroscopic limit. The mass action assumption can be dropped in the following if it is not explicitly used. A reaction (channel) ℜ r is called microscopically reversible if , or more generally if implies and vice versa for all u . A CRN is called microscopically reversible if all reactions are microscopically reversible. In the following, a CRN is assumed to be microscopically reversible unless otherwise noted. For each reactions of channel ℜ r we denote the counting processes of reactions in the forward and reverse direction as with and with , respectively. These can be represented via random time changes [ 18 ] of 2|ℛ| independent unit-rate Poisson processes with for r ∈ ℛ and ε ∈ {+, −} . Then the random state U ( t ) of the CRN with random initial value U (0) obeys The probability mass function p ( u, t ) = ℙ ( U ( t ) = u ) follows the chemical master equation [ 60 , 62 , 63 ] Before proceeding, we recapitulate the model assumptions made so far, as they are fundamental for stochastic thermodynamics of CRNs. Assuming a Markov process description means that the future dynamics of the mesoscopic system is sufficiently described by the current state and does not depend on the past. In particular, the future dynamics do also not depend on the particular microstate, the system is in, but just on the current mesostate u . The above assumptions about the system state U have implications for the environment in which the reaction network is embedded. Since the propensity functions are independent of time and space, the embedding environment must be seen as thermally equilibrated and spatially homogeneous from the perspective of the chemical species [ 64 ]. That is, the surrounding of each molecule of U homogenizes spatially on a much faster time scale than the reactions occur. In addition, the temperature of the solvent is constant over time and the volume V. Similarly, fluctuations in the solvent’s molecular composition, such as changes in pH or ionic strength, are assumed to be small enough to leave the mesoscopic rate constants effectively time-invariant. In later sections, isothermal–isobaric conditions will be adopted as a natural assumption for biological cells. Under such conditions, the stochastic rate constants in (2) could, in principle, depend on the mesostate u due to volume fluctuations. However, we assume these fluctuations are negligible, allowing to remain effectively constant, except in scenarios involving slow, time-dependent changes such as cellular growth. According to Gillespie, in the light of collision theory the elementary reaction assumption is plausible up to bimolecular reactions and the dynamics is rigorously shown to follow stochastic mass action kinetics under the above assumptions on the environment [ 64 ]. In other cases, in particular when the chemical species represent different conformations of polymers as is common in gene expression models, a more careful justification of the elementary reaction assumption and the Markov assumption may be required. The relevance of the elementary reaction assumption for stochastic thermodynamics of CRNs is discussed at the end of Sec. IV A. III. MUTUAL AND DIRECTED INFORMATION BETWEEN SUBNETWORK TRAJECTORIES A. Definitions of mutual and directed information between continuous-time trajectories Let be a pair of ℱ -adapted [ 57 ] stochastic processes with discrete state spaces and Y over the finite time interval [0, t ], t > 0. Let Σ X and Σ Y denote the power sets of and 𝒴, respectively. These are the smallest sigma-algebras that contain all singleton sets or { y }, y ∈ 𝒴. Then, the measurable space of trajectories X [0, t ] is given by the product space , where , with , and the product-sigma-algebra (or cylinder-sigma-algebra) with Σ X,s = Σ X . The product-sigmaalgebra is the smallest sigma-algebra on the product set , that can resolve all singleton sets { x } ∈ Σ X at every time-point [ 65 ]. In turn, the path space of ( X , Y ) [0, t ] is given by the product space . This measurable space can be equipped with induced path-probability measures, either the joint or the product of marginal measures defined as Here, , and similarly denote the preimages of the respective stochastic process trajectories. By ℱ XY we denote the internal/natural filtration of the process ( X , Y ), such that for all t ≥ 0 it holds Note that if ( X , Y ) has càdlàg trajectories, then can also be defined as the induced probability measure on the càdlàg space with the trace σ -algebra of , i.e., the σ -algebra only contains subsets of . The MI between the processes X [0, t ] and Y [0, t ] is then defined via the Radon-Nikodym derivative [ 57 ] between the joint and product of the marginal path-probability measures if is absolutely continuous with respect to typically denoted as . The mutual information rate (MIR) is defined as its asymptotic slope with ( X , Y ) = { X ( t ), Y ( t )} t ≥0 . For discrete sequences, the DI was originally defined by Massey [ 11 ] as where X 1: N = { X ( n )} n ∈{1,… N } and Y 1: N ={ Y ( n )} n ∈{1,… N } are random sequences for all N ∈ ℕ. As noted by Newton [ 13 ], this definition was developed in the context of communication channels, where Y n = Y ( n ) is received after X n = X ( n ) is transmitted. Thus, the enumeration index n does not represent physical time for the joint process {( X ( n ), Y ( n ))} n ∈{ 1,… N } . If the index n does denote physical time for the joint sequence, then (4) includes undirected “instantaneous information exchange” terms [ 66 ] of the form 𝕀 ( X n ; Y n | ( X , Y ) 1: n −1 ), which should be excluded when aiming to quantify the causal influence of X 1: N on Y 1: N . To address this, Newton proposed a causally faithful definition of DI for discrete sequences, where the index represents physical time: To generalize this definition to continuous-time trajectories, we follow the structural extremal-based approach of Weissman et al . [ 12 ], who defined the directed information for continuous processes by analogy with Massey’s formulation as where 𝒯 (0, t ) = { t = ( t i ) i ∈ {1,…, n } : 0 < t 1 <· · · < t n = t } is the set of all finite partitions of the interval [0, t ), and n ( t ) denotes the length of the partition. However, we modify the definition to follow the causal structure of Newton’s proposal instead. Thus, for continuous-time processes on the right-open interval [0, t ), we define The use of the supremum, in contrast to the infimum, is justifed in Supp. Lemma S1.1. Directed information on a closed interval [0, t ] is given by the right-limit and the directed information rate (DIR) is In other works, e.g., [ 13 , 16 ], directed information and transfer entropy have been extended to continuous time by taking the continuity limit of their discrete-time forms to construct a time-local rate, which is then integrated. The drawback of this approach for general continuous-time processes is, that boundary terms or discontinuous jumps may be overlooked (although discontinuities do not occur for the typically considered jump processes). In contrast, the approach of Weissman et al . offers a structurally sound alternative, as it is rooted in measure theory and thus defined directly in terms of continuous-time path space probability measures, similar to explicitly measure theoretic definitions (cf. Appendix C or the Appendix of [ 36 ]). In Supp. Sec. S1.2 we show that, under certain continuity conditions, the DI satisfies which is easier to evaluate in practice than definition (5). Similarly, for the continuous generalization of Massey’s DI, we obtain under the same continuity conditions. A similar expression was previously proposed by Weissman et al . [ 12 ], though without accounting for potential discontinuities or including the MI of the initial values, and without a formal proof. If 𝕀 ( X [0, t ) ; Y [0, t ) ) < ∞, then mutual and directed information are related through the information conservation law where the continuous-time instantaneous information exchange is defined as generalizing the corresponding discrete-time definition [ 66 ]. This conservation law has already been addressed in [ 16 ] (cf. Eq. (38) ) and follows from its discrete counterpart in [ 13 ], as we show for Supp. Proposition S1.1. In contrast, the Massey DI obeys the more asymmetric conservation law [ 12 ] where the delayed-input Massey DI is defined by for any delay δ ∈ (0, t ]. The conditional DI given a random variable Z , and the causally conditioned DI given an ℱ -adapted stochastic process Z [0, t ) = { Z ( s )} s ∈[0, t ) , are defined respectively as and if 𝕀 X [0, t ) , Z [0, t ) Y [0, t ) < ∞ and 𝕀 ( Z [0, t ) Y [0, t ) ) < ∞. Definition (10) is motivated by the chain rules for the discrete-time Newton DI, as detailed in Appendix A. An extremal-based definition is not viable since is not necessarily monotonous under refinement of the partition t ∈ 𝒯 (0, t ). However, there exists a sequence of parti tions that simultaneously approximates 𝕀 (X [0, t ) , Z [0, t ) → Y [0, t ) ) and 𝕀 ( Z [0, t ) → Y [0, t ) ), and hence also 𝕀 ( X [0, t ) Y [0, t ) → Z [0, t ) ) . This can be shown in analogy to the proof of Supp. Lemma S1.2. Similarly, we define if the r.h.s. exists. Here, ∥ A denotes “anticipatory” causal conditioning. This definition is again motivated in Appendix A, and it can be shown that there exists a sequence of partitions t ∈ 𝒯 (0, t ) such that 𝕀 ( X [0, t ) → Y [0, t ) A Z [0, t ) ) is approximated by Lemma 1 It holds that if the quantity on the l.h.s exists . This lemma follows directly from the nonnegativity of the approximating sequences. B. Trajectory Information between subnetworks of a CRN To simplify notation, we define the set of directed reaction , which has cardinality |ℛ ′ |. The directed reaction channels have stoichiometric balanced equations and change vectors . For clarity, we will omit the prime notation in the remainder of this section. Let ℒ X , ℒ Y ⊆ ℒ be disjoint subsets in the index set of chemical species U. They represent the subsets of species between which we want to characterize information exchange. Hence, we have X = { U d ( s ): d ∈ ℒ X } s ≥0 and Y = { U d ( s ): d ∈ ℒ Y } s ≥0 with the respective state spaces and . Let be the set of non-zero change vectors and be the coordinate map [ 65 ] to the coordinates of the subnetwork of X and the corresponding set of non-zero change vectors. For Y we analogously define π Y and 𝒱 Y . Definition 1 The directed reaction channels ℜ r and are called X-indistinguishable if and analogously for Y . As an example, consider the set of species X, Y, A and the X -indistinguishable reactions ∅ → X, X → 2X, Y + A → Y + X, which all increase the amount of X by one. Definition 1 allow us to account for the indistinguishability of changes in the marginal subnetwork compared to the entire network. This is important for the definition of the reaction counting processes that generate the trajectories associated with the joint and marginal probability measures and . Let V : ℛ → 𝒱 ∪ {0}, r ↦ ν r denote the mapping from reaction indices to their corresponding change vectors. This function is not necessarily injective, as we will see in Sec. IV B. We now distinguish between reaction counters of changes in X or Y . Set for all ν X ∈ 𝒱 X and ν Y ∈ 𝒱 Y . In this notation the time dependence is shifted to the lower index and the functional arguments are the respective marginal change vectors. The corresponding propensity functions of subsystem changes are With this notation, we obtain for all s ≥ 0 the process equations To compute the MI, we must represent the evolution of X and Y relative to the histories of the marginalized and joint subsystems, specifically , and , rather than relying on a representation that depends on the current state of U . This is done by changing the propensity functions of the reaction counters to stochastic processes, that depend on the history of the respective marginal system. Abusing notation, we say that has the ℱ XY -intensity process [ 57 ] λ X ( ν X ) and the ℱ X -intensity with where U ( t − ) = lim s ↗ t U ( s ) (or U ( t − )) denotes the left limit in time. Analogously, has the ℱ XY -intensity λ Y ( ν Y ) and the ℱ Y -intensity with By definition, all jump processes are non-explosive and all intensities are left-continuous processes and therefore satisfy the predictability property [ 55 ] w.r.t. their particular filtration. We now introduce two notions of bipartiteness. Definition 2 Let X and Y be the subnetwork processes as described above. We say that X and Y are weakly bipartite if the probability of a simultaneous jump vanishes, i.e ., where Δ X ( s ) := X ( s ) − X ( s − ) for all s ≥ 0 and analogously for Y . strongly bipartite if ℛ does not contain reactions that simultaneously change X and Y, i.e ., for all ν X ∈ V X , ν Y ∈ V Y . The weakly bipartite case aligns with the usual bipartiteness definition for (non-)Markovian processes [16, cf. p. 7], i.e., fo all s ≥ 0 and for h > 0 it holds That weak bipartiteness does not imply strong bipartiteness can be seen at the trivial case with ℙ (( X , Y )(0) = 0) = 1. However, strong bipartiteness implies weak bipartiteness as can be seen by the discussion in Supp. Sec. S1.1. The reaction counters (14) comprise a multivariate point process in the strongly bipartite case, which directly implies weak bipariteness. Both X and Y , individually, and ( X , Y ) are “fundamental processes” of the kind as presented in §2.13 of [ 55 ], even without bipartiteness. Given these definitions, the following theorem provides an expression for the mutual information, similar to the previously stated [ 35 ] and excludes a generalization to the relevant non-bipartite cases. Theorem 1 If the conditions 𝕀 ( X (0); Y (0)) < ∞, X and Y are strongly bipartite , the intensities meet the requirement and analogously for Y, are satisfied, then the MI is finite and satisfies where ϕ ( a ) := ln( a ) 𝟙 (0,∞) ( a ) with a ≥ 0 and 𝟙 denoting the indicator function of the measurable set in its lower index . If X and Y are not weakly bipartite and exists r ∈ ℛ with π X ( ν r ), π Y ( ν r ) ≠ 0 such that 𝔼 [ R r ( s )] > 0 for all s > 0, then is not absolutely continuous with respect to for all t > 0. If absolute continuity is not granted, then the RadonNikodym derivative does not exist and it is often said that 𝕀 ( X [0, t ] ; Y [0, t ] ) = ∞ by convention. A proof of the Theorem is provided in Supp. Sec. S1.1. The MI naturally decomposes into structurally equivalent parts, which we identify as the directed informations. Theorem 2 Under the conditions of Theorem 1 (strongly bipartite case), it holds and for the Massey DI A formal proof is given in Supp. Sec. S1.3. The expressions for Newton’s DI are well-defined even in the nonbipartite case, but require a different method of proof, which we do not provide here. The instantaneous information exchange and the Massey DI both contain simultaneous jump contributions, which cause them to diverge in this case. For the causally conditioned DI, a similar expression, in terms of intensity processes, follows directly from (10). Let ℒ Z ⊆ ℒ be a set of species and define Z := { U d : d ∈ ℒ Z }. Coordinate maps to the subsystem and projected non-zero change vectors be defined analogously to (12) and (13). Corollary 1. Let (ℒ X ∪ ℒ Z ) ∩ ℒ Y = ∅ . Then where is the ℱ XZY -intensity of and for all s ≥ 0. For completeness, we also introduce the point process entropies and conditional point process entropies, which generalize the concepts of Kramer [ 67 ] to the considered class of continuous-time processes. Kramer first introduced the concept of causally conditioned probabilities, which Spinney et al . [ 36 ] generalized to continuous time. These probabilities will be of importance in Sec. VI A, as they define the chemical communication channel. We denote for the probability of Y [0, t ] , causally conditioned on X [0, t ] (cf. Appendix C for a definition). The concept of point process entropy was first introduced by McFadden [ 56 ] via the likelihood of a univariate counting process, given in terms of local Janossy densities [ 68 ] (cf. p. 213). Similarly, a multivariate point process likelihood can be defined for the reaction counters ( R Y ( ν Y ), ν Y ∈ 𝒱 Y ) of a chemical (sub)network via the set of local Janossy densities on [0, t ] × 𝒱 Y [ 68 ] (cf. Ch. 7.3). Such likelihoods are defined on the space ⋃ n =0 (ℝ ≥0 V Y ) n , conditionally on the initial value Y (0). The likelihood associated with is and the likelihood associated with is The marginal point process entropy is then defined as and the causally conditioned point process entropy as Consequently, the DI satisfies the familiar relation (cf. [ 67 ]) Finally, we emphasize the importance of the merging of reaction counters of indistinguishable reactions. Instead of following the above projection procedure, define the index set i.e., the set of all reactions that modify X . While the newly constructed set of reaction counters only satisfies Analogous definitions can be made for Y and ( X , Y ). To build intuition, consider a given trajectory x [0, s ] . From x [0, s ] , one can uniquely construct the corresponding counting process realizations for all ν X ∈ 𝒱 X . Conversely, given these counting process realizations together with the initial condition x (0), the trajectory x [0, s ] can be uniquely reconstructed. Similarly, x [0, s ] can also be reconstructed from x (0) and the realizations of the reaction counters R r , r ∈ ℛ X , on the interval [0, s ]. However, the reverse is not generally true: if the system includes X -indistinguishable reactions, the trajectory x [0, s ] does not contain enough information to determine which specific parallel reaction r caused each observed stoichiometric change. A more formal discussion of this equivalence of jump process representations has been provided in Appendix A of [ 39 ]. In conclusion, not merging X -indistinguishable reactions can lead to misrepresentation of ℱ X and thus of the intensity processes . For instance, it must be taken into account in the stochastic filtering equations used to evaluate the various intensity processes [ 69 ]. The corresponding equations are provided in Appendix B. Next, consider the ℱ XY and ℱ X -intensity processes defined for all r ∈ ℛ X by Similarly, for all r ∈ ℛ Y , define the ℱ XY and ℱ Y -intensity processes as Note that and analogous relations hold for Y . By the log-sum inequality, with equality if the sum consists of a single term. It follows that the directed information satisfies with equality holding when |𝒱 X | = |ℛ X | . Thereby the tower property of the conditional expectation was used to recast both sides of (25) in the familar representation, that includes ϕ . An analogous inequality holds for I X [0, t ] → Y [0, t ] . VI. STOCHASTIC THERMODYNAMICS A. Closed stochastic chemical reaction networks A thermodynamic system is considered closed if it exchanges energy, but not matter, with its environment. In contrast, a chemical reaction network (CRN) is typically called closed if its core species U do not exchange molecules with the surrounding solvent, even if the solvent itself interacts with an external environment. The species in the solution are thus partitioned into two disjoint sets: the core species U, which participate in the CRN, and the solvent species S, which do not. That is, solvent species S do not appear as reactants or products in any reaction involving the core species U. Under the assumption that the solvent is maintained in thermal and chemical equilibrium at a constant temperature T , it effectively serves as both a heat bath and a chemically equilibrated environment for the CRN. The solution U ∪ S corresponds to an isothermal closed system whose classical (non-reactive) microscopic dynamics are governed by a Hamiltonian 𝔥. A mesoscopic description, as used in stochastic thermodynamics, becomes valid under the assumption of a time-scale separation: fast equilibration of microstates relative to the slower dynamics of mesoscopic state changes. This assumption is fundamental and enables a coarse-graining procedure in which each mesoscopic state u is assigned a well-defined free energy [ 70 ]. Under constant volume, this potential corresponds to the Helmholtz free energy f ( u ); under constant pressure, as typical for biological systems, it is replaced by the Gibbs free energy g ( u ). The pressure dependence is implicitly encoded in g ( u ) and need not appear explicitly in the mesoscopic description. The derivation of these mesoscopic free energies via coarse-graining from an underlying Hamiltonian model is reviewed in Supp. Sec. S2.1. Alternatively, g ( u ) can be obtained using equilibrium statistical mechanics of ideal dilute solutions [ 60 ], provided that all solute species are accounted for. Since the systems of interest in this work are biological and typically operate under isothermal–isobaric conditions, we use the Gibbs free energy g as the relevant mesoscopic thermodynamic potential throughout. The zeroth law of thermodynamics for chemical reaction networks dictates that a closed, microscopically reversible CRN that accounts for all solute species will always relax to equilibrium [ 60 ]. The equilibrium distribution depends on the initial state U (0) of the system and differs between stoichiometric compatibility classes [ 71 ], the sets of all states that are connected via the reaction channels 1, ... , |ℛ | . In the following we assume that any initial distribution assigns positive mass only to states of a single stoichiometric compatibility class, since otherwise the limiting distribution can become a mixture of the equilibrium distrubutions of individual compatibility classes. Equilibrium statistical mechanics further dictates that the equilibrium distribution of a closed CRN under isothermal-isobaric conditions is (i) a GibbsBoltzmann distribution π ( u ) ∝ e − βg ( u ) , where β = ( k B T ) −1 , and (ii) it satisfies detailed balance for all admissible states u and all microscopically reversible reactions ℜ r [ 60 ]. To describe the energetics of each reaction ℜ r we introduce difference operators for functions of mesoscopic states. Definition 3 For each reaction channel ℜ r we introduce the forward and backward difference operator and , such that for any function we define The definition implies . Starting from state u in forward direction, the change in Gibbs free energy of the state associated with reaction ℜ r is given by Plugging the Boltzmannn distribution into (26) yields the local detailed balance condition, or thermodynamic consistency relation, for closed CRNs [ 60 ]: for all admissible states u and all microscopically reversible reactions ℜ r . Equation (27) relates kinetic and energetic constants, and therefore it is also satisfied during the transient nonequilibrium and independent of the initial conditions. However, the consistency relation can be broken if reactions are not elementary, which violates the fundamental coarsegraining assumption that mesostates u define local equilibrium states. Any reaction coordinate of a potentially nonelementary reaction that does not decay instantaneously must be accounted for as a species of the CRN. Deviations from the well-mixed assumption caused by individual elementary reaction events are neglected under the implicit assumption of instantaneous diffusion, which ensures local equilibrium [ 72 ] within the finite reaction volume. Consequently, the local equilibrium assumption of stochastic thermodynamics aligns with the kinetic discrete-state Markov process model. A common modeling oversight, only recently emphasized in [ 73 ], is the misidentification of forward and backward reactions that in fact correspond to distinct microscopic channels. In such cases, the thermodynamic consistency relation is, quite naturally, not valid. This issue is particularly relevant for models of gene expression, where the production and decay of polymers such as mRNA and proteins typically do not represent elementary reactions and are not microscopic reverses of one another. With a slight abuse of notation, we denote the heat dissipated into the environment due to reaction ℜ r by . Strictly speaking, Q ( u ) is not always a well-defined state function, as heat is a path-dependent quantity. We incorporate the operator into the definition since it satisfies the property . As reviewed in Supp. Sec. S2.1, the mesoscopic Gibbs free energy g ( u ) of the solution can be decomposed into its mesoscopic enthalpy 𝔥( u ) and the internal entropy h i ( u ) of the mesostate u , averaging over the fast equilibrated degrees of freedom (cf. [ 60 ], Eq. (58) ): Following the mesoscopic formulation of the first law of thermodynamics under the local equilibrium assumption (cf. Supp. Sec. S2.1), the heat released to the environment during a state transition can be identified with the decrease in enthalpy: Substituting this into the free energy change yields which shows that the local detailed balance condition (27) links the kinetic parameters of the model with both external and internal entropy changes associated with each reaction. B. Open chemical reaction networks – Coupling to chemostats An open CRN allows the reactive exchange of molecules with external reservoirs. Here, we assume that these reservoirs are part of the solvent, which itself needs to be driven by its external environment to comply with the steady state assumption inherent to all reservoirs. The core system U be coupled to L chemostats Z 1 , ... , Z L , which are part of the solvent species S and immediately equilibrate after participation in reactions of the core system U. The energetic coupling is illustrated in Figure 2 . We assume that the copy number of chemostat molecules in the solvent is sufficiently high to be represented by their real-valued concentrations z = ( z 1 , ..., z L ), which are held constant over time. In our case, we imagine a biological cell that strives to maintain homeostasis. Others have discussed chemostats that vary in time according to a predetermined deterministic protocol [ 60 , 63 , 74 ]. For simplicity, we first introduce constant chemostats and include the possibility of temporal variations at the end of this section. Download figure Open in new tab FIG. 2. Visualization of an open CRN, where the core system U is coupled to a number of chemostat species Z l of chemical potential μ l via reactions. The solvent acts as a common heat bath with temperature T for the core system and all chemostats. Thus, each horizontal layer in the illustration can exchange energy with its vertically adjacent layers. The open CRN has stoichiometric balance equations where γ r = t ( r ) − s ( r ) is the stoichiometric change vector of the chemostat species associated with ℜ r . Immediately after each reaction, the chemostat is assumed to correct the copy number of all chemostat species. Consequently, under the assumption of stochastic mass action kinetics, the propensity functions of the coupled reactions maintain the form (1) with the adjustment that the stochastic rate constants now depend on the chemostat concentrations insofar as they incorporate a deterministic law of mass action where the units of the deterministic rate constants are adapted accordingly. For any set of Markovian kinetic laws of the form (i.e., not necessarily mass action kinetics), hence z is a fixed parameter of the propensity functions. Therefore, the probability evolution is still described by (3). To describe the energetics of each reaction ℜ r we denote by µ l the chemical potential of Z l , i.e., the energy associated with a single molecule of species Z . The chemical work done on the system by the chemostat via reaction ℜ r is then where (− r ) denotes the backward reaction. Hence the energy dissipated into the environment per reaction is Intuitively, the dissipated heat is the chemical work done on the system but not “stored” in the systems enthalpy. Further, expression (31) is consistent with (28) when the joint set of species U ∪ Z is regarded as the core system of a closed CRN instead of regarding Z as a controlled part of S. In open CRNs the thermodynamic consistency relation then becomes (cf. [ 60 ], Eq. (76)) for all admissible states u and all reversible reactions ℜ r . The local detailed balance relation (27) together with (29) follows the same expression as (32), where however the identities of the dissipated energies per reaction differ. Thus, (32) is more general in the sense that potentially accounts for chemical work through (31) in addition to enthalpy changes. A detailed mesoscopic derivation of (32), including multiple reservoirs, is given in Supp. Sec. S2.2. In Supp. Sec. S2.3 we review the consistency of (32) in the macroscopic limit for an ideal dilute solution, obeying mass action kinetics. Lastly, we note that beyond coupling a CRN to a chemostat with constant concentrations, more general time-dependent mechanisms can perturb the system away from equilibrium. Following the terminology of [ 70 ], these perturbations fall into two (not mutually exclusive) categories: (i) “Driving”, which refers to coupling the system to an external agent that exchanges energy with it during state transitions, as in the case of chemostats; and (ii) “Manipulation”, which involves time-dependent control of system parameters, such as changing chemostat concentrations or variations in the solution volume, for example due to cellular growth. Note that this terminology is not used consistently across the literature. In the presence of a time-varying protocol Γ( t ), the propensity functions become explicitly time-dependent, , resulting in a time-inhomogeneous Markov jump process. The structure of the chemical master equation (3) remains valid upon replacing the propensity functions with their timedependent counterparts. Likewise, thermodynamic quantities such as g ( u, t ) and µ l ( t ) acquire explicit time dependence, yielding the generalized local detailed balance condition together with the reaction-wise enthalpy balance relation Eq. (33) follows by introducing the instantaneous stationary distribution π ( u, t ), defined as the limiting distribution obtained by hypothetically freezing the protocol at Γ( t ) and allowing the system to relax. This distribution can then be used to derive (33) via the approach outlined in Supp. Sec. S2.2. This framework naturally extends to stochastic protocols, where the external control parameters are themselves governed by a stochastic process. In such cases, it is essential that the influence of the core system on the protocol, such as feedback on chemostat concentrations, remains negligible. C. Thermodynamics of random trajectories A central appeal of stochastic thermodynamics is that quantities like heat dissipation, work, and entropy production can be defined along individual random trajectories. In the following, we review a priori definitions of these quantities, consistent with macroscopic thermodynamic principles. The presentation builds on Kurtz’s process-based formulation of CRNs, which naturally accounts for the instantaneous, finite changes at the mesoscopic scale caused by chemical reactions of U. Let be the random times, when reaction ℜ r happens in forward or backward direction. Heat is dissipated only during reaction events due to “excess” chemical work provided, even if the system is externally manipulated. The heat dissipated along the random trajectory U [0, t ] := { U ( s )} s ∈ [0, t ] of the open CRN within the time interval (0, t ] is then where U ( t −) = lim s ↗ t U ( s ) (or U ( t − )) denotes the left limit in time, such that is the state of the CRN just before the k -th occurrence of reaction ℜ r in forward direction [ 63 ]. The left limit in the second argument of is irrelevant if Γ is a continuous function, but may be relevant for stochastic manipulation protocols. We restrict the discussion to deterministic protocols Γ that are piecewise-differentiable and hence drop the left limit in the second argument. We rewrite (35) via stochastic integrals of the reaction counting processes Defining the forward, backward, and net probability fluxes of reaction ℜ r as the mean heat dissipation rate (also called thermal power [ 58 ]) is as rigorously derived in Supp. Sec. S2.5. In a similar fashion, the chemical work performed on the trajectory U [0, t ] is which gives rise to the mean rate of chemical work by following the steps in the proof of (38). can also be understood as the amount of energy required to maintain the chemostat concentrations, i.e., the energy expenditure of the external agent. In a manipulated system, the change of the free energy of the system amounts to another work contribution along U [0, t ] . For simplicity we assume that the internal energy is a continuous function of time, such that [ 70 ] and, hence, the mean rate of manipulated work is The entire work, associated with manipulation and driving, is then In this account of work, we have ignored volume changes under isobaric conditions, since these are already accounted for in the enthalpy. The enthalpy change along the trajectory can be expressed via integrals, which account for both continuous evolution between reactions and changes due to reactions: and likewise also the internal entropy change where we denote the reactive change of the internal entropy as Plug (34) into (36) and (41) into (40), which together yields the enthalpy balance relation referred to as the first law of stochastic thermodynamics on the trajectory-level (cf. [ 60 ], Eq. (69) ). Through the development of stochastic thermodynamics, this law was often reported in a different form, which is sometimes called “first law of stochastic thermodynamics” [ 70 , 75 ] with and . It was first noted by Sekimoto [ 76 ] that the first-law-like equation was not consistent with the macroscopic first law, since the designated “mesoscopic heat exchange” q mes ( t ) contains an entropic part that does not account for heat flow, but a change in the system entropy. Since then, q ( t ) is distinctly called measured heat [ 76 ] or calorimetric heat [ 70 ]. We now turn two a trajectory-level account of the total entropy production of the open system. The total entropy production along U [0, t ] comprises entropy changes of the system U ∪ S and the entropy exchange with the environment: The system entropy itself contains two parts: (i) changes in the mesoscopic self-entropy whose mean is simply the population (Shannon) entropy of the core system H [ U ( s )] = 𝔼 [ h U ( U ( s ), s )] = ∑ u − p ( U ( s ), s ) ln( p ( U ( s ), s )) at time s ≥ 0, and (ii) changes in the internal entropy of the mesoscopic states h i . We denote the change of the population entropy along U [0, t ] as and, in turn, define the system entropy as Not much can be said about the change of internal entropy, but the population entropy is a quantity, which is given in terms of the kinetic model of the open CRN. Following an analogous decomposition as (42) we obtain where by the use of the CME (3) we may substitute We further have , which follows by pulling the expectation into the integral (Fubini’s theorem) and applying a shift of summation. Using the CME (3) we can express the derivative of the mean population entropy as where the last equality follows from (44) and the proof method for Eq. (38) in Supp. Sec. S2.5. According to macroscopic thermodynamic principles [ 72 ], the heat dissipated into the environment q ( t ) must be consistent with the increase of entropy in the evironment h ex , such that Now apply the generalized consistency relation (33) to the expression for the mesoscopic heat exchange (36), which yields This representation shows that mesoscopic heat exchange captures all entropy flow associated with reaction events that is not accounted for by changes in the statistical distribution over mesostates or by external manipulation. As it depends explicitly on the propensity functions, it encodes the local directional bias of each reaction channel ℜ r , relating it to thermodynamic reversibility [ 77 ] as further discussed in Supp. Sec. S2.4. Again following the proof method for Eq. (38) , the mean mesoscopic heat exchange rate (in units of k B T ) satisfies and thus depends only on the kinetic model of the open CRN. In conclusion, the total entropy production satisfies which decomposes into a part that depends only on the timedependent population distribution and on the propensity functions, and a term that accounts for internal entropic changes in the microscopic degrees of freedom due to external manipulation. The βq i terms cancel out. In the absence of external manipulation, e.g., when chemostat concentrations remain constant, the total entropy production of the open CRN depends solely on experimentally accessible quantities: the probability distribution over states and the reaction propensities. This key observation has already been emphasized before [ 63 ]. Using equations (44) , (46) , and (37) , we obtain where we used (44), (46) and (37). The (total) entropy production rate (EPR) then follows with and the method of proof of (38): which can also be related to other mean rates This decomposition motivated earlier inconsistent identifications of with the time-dependent entropy exchange rate [ 62 , 78 ]. By virtue of ln( x )( x − 1) ≥ 0 for all x ≥ 0 (with convention ln(0) := ∞), each term on the r.h.s. of (50) is non-negative and hence shows consistency with the second law of macroscopic thermodynamics. D. Energy dissipation and entropy production at stationarity Lastly, we look at the entropy production rate at stationarity, which in the following will be identified with the mean energy dissipation rate in a non-equilibrium steady state (NESS). At stationarity, we have lim t →∞ H [ U ( t )] = −∑ u π ( u ) ln( π ( u )), implying . Consequently, with (51) the entropy production rate at equilibrium or in a NESS satisfies The EPR at NESS hence quantifies the entropy changes in the whole system, consisting of U ∪ S and the environment, with all entropic changes that are associated with the dynamics of the CRN and not just changes in the population distribution. In a second step we note that stationarity of U ( t ) together with the local equilibrium condition Supp. Eq. (S2.5) implies that also the process of all microscopic degrees of freedom Ξ( t ) is stationary (cf. Supp. Sec. S2.1). Then the mean rate of reactive changes of the internal entropy (43) also vanishes: Applying (46) then establishes the identification of the EPR with the energy dissipation rate (in units of k B T ) at stationarity: with mean heat exchange rate ]. Eq. (52) is key to the applications discussed in Sec. VI A and VI B. E. Decompositions of heat dissipation and entropy production The central equality (52) motivates the definition of the socalled housekeeping heat [ 63 , 79 , 80 ] at trajectory level where we again generalized to time-dependent protocols Γ, using the instantaneous stationary distribution π ( u, s ). This quantity captures the portion of the total heat dissipation q ( t ) along a trajectory U [0, t ] that is required to maintain the instantaneous NESS of the system at any time s ∈ [0, t ]. Notably, the definition remains valid even away from stationarity, allowing one to distinguish the excess heat, defined as q ex ( t ) = q ( t ) − q hk ( t ), which quantifies the additional heat dissipated during relaxation toward the steady state. Similarly, there is the notion of adiabatic entropy production h a ( t ) [ 80 ], which is also defined by the r.h.s. of (53), and is supposed to describes the entropy produced by an adiabatic protocol Γ a that satisfies the ad-hoc assumption of instantaneous stationarity p (·, s ) = π (·, s ) for all s ∈ [0, t ] and hence 𝔼 [ h tot ( t )] = 𝔼 [ h a ( t )]. In practice such a protocol is only realizable by a constant Γ at stationarity [ 70 ], where even h tot ( t ) = h a ( t ). The adiabatic entropy production allows for the decomposition of the total entropy production into an adiabatic term and the non-adiabatic entropy production h na ( t ) := h tot ( t ) − h a ( t ). For a slowly manipulated closed system (i.e., without driving) following an adiabatic protocol, instantaneous detailed balance is approximately satisfied at all times. Using detailed balance as an ad-hoc assumption, yields h a ( t ) = 0 and hence h tot ( t ) = h na ( t ). While the intuition for this entropy production decomposition seems to be somewhat unripe, the two quantities exhibit important statistical properties [ 80 ]. F. Effective entropy production rate In practice, the chemostat species of an open CRN are often omitted when formulating the stoichiometric balance equations and the corresponding propensity functions (1). As (30) shows, this simplification does not necessarily affect the definition of the stochastic dynamics of the core process U . However, in the context of stochastic thermodynamics, it is essential to keep track of all species involved in each reaction. In particular, using such “effective” reactions leads to a systematic underestimation of the entropy production rate. In a properly defined open CRN it typically holds that and if r 1 ≠ r 2 . However, the same changes in the core species U can be driven with different chemostat species. Definition 4 Two reaction channels and are called parallel, if and , or and . Let 𝒞 := 1, ... , |𝒞 | with |𝒞 | ≤ |ℛ | be the index set of the parallel reaction classes 𝒞 α . All reactions within a class be properly aligned such that for all r 1 , r 2 ∈ 𝒞 α with r 1 ≠ r 2 . Then we define effective reaction counters with effective propensity functions and effective probability fluxes Applying (50) with these effective fluxes yields the “effective EPR” of the model with effective propensities: The effective EPR is a lower bound on the true EPR, i.e., as has been shown in [ 70 ] (cf. Eq. (3.81)), using the multireservoir description provided in the following section. We exemplify this inequality Sec. VI C with a two-state Markov model exhibiting two reaction channels. While in this case, for any choice of rate constants can be positive. G. Entropy Production Rate of driven conversion networks and general CTMCs with multiple reservoirs Much of the literature on entropy production rate employs a formulation based on the infinitesimal generator (transition rate matrix) Λ of a CTMC, rather than a reaction network perspective [ 62 , 70 , 81 ]. A bridge between these two pictures can be constructed by introducing an index set 𝒥 := {1,…, |𝒰 |}, where each mesoscopic state u ∈ 𝒰 is uniquely assigned an index j ∈ 𝒥 (with |𝒰 | = ∞ allowed). This indexing defines a one-hot encoding of the state space, 𝒰 ′ = { e j : j ∈ 𝒥}, where e j denotes the unit vector with a single non-zero entry at position j . Viewed as a population representation, this encoding interprets each state e j as indicating the presence of a single effective species , with all other species absent. This reformulation recasts the original reaction network into a conversion network: a collection of microscopically reversible transitions between discrete one-hot states. Each conversion i → j (with change vector ν ( i, j ) = e j − e i ) may be coupled to one or more reservoirs r , corresponding to the reaction channels ℜ r , as discussed in Supp. Sec. S2.2. For each reservoir r , we denote the set of reversible transitions it mediates as with stoichiometric balance equations and reservoir-specific propensity functions . The effective Markov generator Λ of such a network, coupled to multiple reservoirs r is then given as the sum of reservoir-wise generators Λ ( r ) , such that the transition rates for i → j (and also diagonal elements) satisfy [ 82 ] For each ( i, j ) ∈ ℛ ( r ) the reservoir-wise propensity functions and generators are related as The reservoir-wise generators then satisfy the local detailed balance relation for ( i, j ) ∈ ℛ ( r ) (cf. Supp. Sec. S2.2 and [ 82 ], Eq. (4) ) By (50) we obtain and hence substituting (54) yields the multi-reservoir EPR of a consistently modeled CTMC [ 82 ] Importantly, neglecting the contribution of individual reservoirs can result in an underestimation of the EPR (cf. Ref. 70, Eq. (3.81)). V. SPECIES-REACTION GRAPHS AND LOCAL INDEPENDENCE GRAPHS FOR CRNS A. Species reaction graphs We define a species-reaction graph (SR-graph) slightly different from its original notion [ 83 ]. This is necessary to faithfully represent local independence. A reversible SR-graph is a biparite graph (ℒ, ℛ, ℰ) with edge set ℰ and two vertex types: (i) the species nodes ℒ for all U d , and (ii) the reversible reaction channel nodes R for all reaction channels ℜ r . Here, the SR-graph attribute “reversible” means that we do not resolve forward and backward direction of a single reaction channel ℜ r if the reaction channel is reversible. However, the reversible SR-graph may actually comprise irreversible reaction channels. In contrast, we refer to an SR-graph as irreversible, if all reaction channels are considered irreversible. In a biparite graph, only vertices of different type can be connected by an edge e ∈ ℰ, i.e., where we assume distinct enumeration ℒ ∩ ℛ = ∅ for notational simplicity. A directed edge ( d, r ) ∈ ℒ × ℛ is drawn from U d to ℜ r if either or explicitly depends on u d . Such edges encapsulate local (in)dependencies of the reaction counters. A directed edge ( r, d ) ∈ ℛ × ℒ is drawn from ℜ r to U d if ν r is non-zero in the coordinate of U d . These edges encapsulate the time-point wise algebraic equation where π d is the coordinate map to d ∈ ℒ and ℛ d := r ∈ ℛ | π d ( ν r ) ≠ 0 . If directed edges in both directions exist, they can be replaced by a single bidirected edge “⟷” in the visualization of the graph. An edge-simple walk (often called a ‘trail’ in classical graph theory [ 84 ]) of length N ∈ ℕ, in the SR-graph is sequence of directed edges which joins a sequence of vertices ( v 0 , ..., v N ), such that all directed edges e k = ( v k − 1 , v k ) are pairwise distinct. A edge-simple walk is said to be from v 0 to v N . We say that passes through some vertex v , if exists i ∈ {1, ..., N − 1} such that v = v i . For edgesimple walks between sets instead of single vertices we use the following definition. Definition 5 Let ℛ ′ ⊆ ℛ and consider the disjoint species sets ℒ X , ℒ Y ⊂ ℒ . An edge-simple walk of length N ∈ ℕ is said to be from ℒ X to ℒ Y if v 0 ∈ ℒ X , v N ∈ ℒ Y and v 1 , ..., v N −1 ∉ ℒ X ∪ℒ Y . from ℛ ′ to ℒ Y if v 0 ∈ ℛ ′ , v N ∈ ℒ Y and v 1 , …, v N −1 ∉ ℛ ′ ∪ℒ Y . By this definition, a ‘trail’ from ℒ X to ℒ Y is always a ‘trail’ on the edge-deleted subgraph (ℒ, ℛ, ℰ X → Y ) with which does neither contain edges into ℒ X nor edges that originate from ℒ Y . Similarly, a ‘trail’ from ℛ ′ to ℒ Y is a ‘trail’ on the edge-deleted subgraph with B. Local independence graphs for multivariate counting processes We briefly introduce the concepts of local independence graphs and dynamic Markov properties for multivariate counting processes, as developed by Didelez [ 85 ]. Temporarily assume that ℛ is a set of irreversible reaction indices. We introduce bold notation for the multivariate counting process R := ( R r : r ∈ ℛ). For any A ⊆ ℛ denote the subprocess R A := ( R r : r ∈ A ) and its internal filtration . Further let s ↦ κ s ( r ) denote the (ℱ, ℙ)-intensity process of R r . Thereby κ s ( r ) may depend on the entire past ℱ s − , i.e., R may be a non-Markovian process. The process ( R , ℙ) is always associated with a probability measure, which determines its intensity processes. Further, it is also associated with a finite observation interval [0, t ], t > 0. In the following, we simply write R unless required. Definition 6 ([ 85 ]). Let A, B,C ⊆ ℛ be pairwise disjoint. We say that a subprocess R B is locally independent of R A given R C over [0, t ] if for all r ∈ B the F A ∪ B ∪ C -intensities are ℱ B ∪ C -intensities. This is denoted by A ↛ B | C. Otherwise , R B is locally dependent on R A given R , i.e ., A → B | C. If C = ∅, then R B is marginally locally (in)dependent of R A . Although this is not necessary in general, the above definition assumes that local independence is “reflexive”, i.e., each subprocess depends at least on its own past, which is the case for most practical applications. A, B,C ⊆ ℛ, not necessarily disjoint, Now let 𝒢:= (ℛ, 𝒜) be a directed graph (possibly containing cycles) with directed edge set Definition 7 ([ 85 ]). G is called local independence graph of R if R satisfies the pairwise dynamic Markov property with respect to 𝒢, i.e ., for all r ′ , r ∈ ℛ, r ′ ≠ r In the following, we need a few more notions of graph theory [ 86 ] and will mainly adhere to the conventions of Didelez [ 87 ] (cf. Sec. 1.1). To keep these notions abstract, we temporarily denote the finite vertex set as 𝒱 and the edge set as ℰ, where the edge set may contain both directed and undirected edges (denoted as sets), i.e., We consider only reciprocal graphs, where { v ′ , v } ∈ ℰ implies { v ′ , v ),( v, v ′ )} ∉ ℰ . Let G := (𝒱, ℰ) denote the graph. For A ⊂ 𝒱 we call 𝒢 A := ( A , ℰ| A ) the induced subgraph, where ℰ | A := ℰ ∩ (( A × A ) {{ v ′ , v } ⊆ A }). For B ⊂ 𝒱 we denote with 𝒢 B := (𝒱, ℰ B ) the edge-deleted subgraph with A tuple of distinct vertices v k ∈ 𝒱 (except possibly v 0 = v N ) is called a path of length N ∈ N from v 0 to v N if { v k 1 , v k }∈ ℰ or ( v k 1 , v k ) ∈ ℰ for all k ∈ {1, ..., N } . A path is called semidirected if exists k with ( v k 1 , v k ) ∈ ℰ and directed the latter holds for all k ∈ {1, ..., N } . Note that Didelez [ 87 ] also introduced a generalized notion of trails for the given class of graphs [ 88 ]. A chain graph is a (reciprocal) graph that does not contain any (semi)directed cycles. Let a, b ∈ 𝒱, a ≠ b and A, B,C ⊂ 𝒱 . A set C is called an ( a, b )-separator if all paths from a to b intersect C . We say that C separates A from B if C is an ( a, b )-separator for all a ∈ A and b ∈ B. a is a parent of b ( a ∈ pa( b )) if ( a, b ) ∈ ℰ and pa( A ) = ∪ v ∈ A pa( v ) \ A. b is a child of a ( b ∈ ch( a )) if ( a, b ) ∈ ℰ and ch( A ) := ∪ v ∈ A ch( v )\ A. a is a neighbor of/adjacent to b ( a nb( b )) if { a, b }∈ ℰ and nb( A ) = v A nb( v ) A. a and b are non-adjacent if { ( a, b ),( b, a ), { a, b } ∩ ℰ = ∅ . The union of parents and neighbors is called the boundary bd( A ) = pa( A ) ∪ nb( A ) and cl( A ) := bd( A ) ∪ A is the closure of A . If there is a path from a to b , then a is an ancestor of b ( a ∈ an( b )) and b is a descendant of a ( b ∈ de( a )). For sets an( A ) := ∪ v ∈ A an( v ) \ A , de( A ) := ∪ v ∈ A de( v ) A , and additionally the non-descendants nd( A ) := 𝒱 \ (de( A ) A ). A set A is called ancestral if bd( A ) = ∅ . The smallest ancestral set, containing A is denoted as An ( A ) and satisfies An( A ) = an( A ) ∪ A . The moralized graph 𝒢 m := (𝒱, ℰ m ) is an undirected graph, such that { v ′ , v } ∈ ℰ m if ( v ′ , v ) ∈ ℰ or ( v, v ′ ) ∈ ℰ, or if exists such that , where undirected path form to v } and pa 𝒢 means “parent with respect to graph 𝒢 “. With these notions, and switching back to the graph 𝒢 = (ℛ, 𝒜), an asymmetric separation criterion can be defined. Definition 8 ([ 85 ]). For pairwise disjoint A, B,C ℛ we say that C δ-separates A from B in 𝒢 if C separates A from B in the undirected graph . For A, B,C ⊂ ℛ not pairwise disjoint we say that C δseparates A from B if C \ B δ-separates A \ ( B ∪ C ) from B. Further, by convention ∅ is always δ-separated from B and ∅ δ-separates A from B if A and B are unconnected in . Note that the moralization criterion in the above definition applies only in the disjoint case. With the given graph theory notions two further dynamic Markov properties can be defined. Definition 9 ([ 85 ]). R is said to satisfy the local dynamic Markov property with respect to 𝒢 if for all r ∈ ℛ i.e ., the ℱ R -intensity is a ℱ cl( r ) -intensity . R is said to satisfy the global dynamic Markov property with respect to G if for all pairwise disjoint A, B,C ⊆ ℛ Note that the generalization of the global dynamic Markov property to the non-disjoint case is straightforward. Didelez proved two theorems – a criterion for (symmetric) conditional independence and a criterion for the equivalence of all dynamic Markov properties – which we restate in the following. Proposition 1 ([ 85 , 89 ]). Let A, B,C ⊆ ℛ be pairwise disjoint . Let 𝒢 be the local independence graph of R . If C separates A from B on (𝒢 An( A ∪ B ∪ C ) ) m , then If R satisfies conditional measurable separability [ 90 ], i.e ., for all D, E ⊆ ℛ and all s ∈ [0, t ] with convention ℱ ∅ := {∅, Ω}, then (under mild regularity conditions) all dynamic Markov properties are equivalent . Hence, given that 𝒢 is a local independence graph of R , Proposition 1 allows the use of graphical separation criteria to obtain “global” local independence relations. Further, (a) has several implications: (i) A B ↛ | C , (ii) B ↛ A | C , and (iii) for each r ∈ C the ℱ A ∪ B ∪ C -intensity is either a ℱ A ∪ C or a ℱ B ∪ C -intensity. The application of local independence graphs to the reaction counters of CRNs requires the extension with a non-dynamic node for each species in ℒ, which jointly represent the initial state U (0). In general, local independence graphs can be extended with time-fixed covariates, whose information must then be included in the filtration at the start ℱ 0 . The subgraph induced by non-dynamic nodes must be either a directed acyclic graph or a chain graph [ 85 ]. Edges from dynamic to non-dynamic nodes are not allowed. This construction ensures that the induced subgraph of nondynamic nodes can faithfully represent conditional independence relations and (non-dynamic) Markov properties. In the following, we formalize Let ℐ be the set of ℱ 0 -measurable baseline covariates θ i , i ∈ ℐ, such that ℐ ∩ ℛ = ∅ . These covariates can, for example, be (random) initial conditions or time-dependent parametrizations of intensity processes, which are fully determined at time zero. Again, we introduce a multivariate random variable on a subset I ⊆ ℐ with notation θ I := { θ i : i ∈ I } . As before let ℛ be the set of counting processes. We define the extended graph 𝒢 := (ℐ∪ ℛ; ℬ, 𝒜) with the set of undirected edges between baseline covariates and directed edges with A ∩ (ℛ × ℐ) = ∅ . The induced subgraph 𝒢 ℐ be a chain graph. The graph 𝒢 is equivalent to a dynamic graph [ 87 ]. For any set 𝒢 ⊆ ℐ ∪ ℛ we assign the subfiltration ℱ G such that for all s ∈ [0, t ] On the extended local independence graph 𝒢 we need to generalize the notion of local independence and Markov properties. In particular, dynamic properties can only be defined for subsets of ℛ. Definition 6 (local independence) and 8 ( δ separations) and all dynamic Markov properties can be readily generalized for A,C ⊆ ℐ ∪ ℛ and B ⊆ ℛ pairwise disjoint, using the extended subfiltrations. Definition 10. R is said to satisfy the local dynamic Markov property with respect to 𝒢 if for all r ∈ ℛ i.e ., the ℱ ℐ ∪ℛ - intensity is a ℱ cl( r ) -intensity . R is said to satisfy the global dynamic Markov property with respect to 𝒢 if for all A,C ⊆ ℐ ∪ ℛ and B ⊆ ℛ, pairwise disjoint , As one of our key results, we show that local independence is equivalent to a vanishing causally conditioned directed information. The proof is provided in Supp. Sec. S1.4. Theorem 3 Let A,C ⊆ ℐ ∪ ℛ and B ⊆ ℛ be pairwise disjoint. Then A ↛ B | C if and only if We conclude by providing an extension of Definition 7, which is sufficient for our purposes. Definition 11 𝒢 is called an (extended) local independence graph of ( θ, R , ℙ) if the following is satisfied: ℙ ∘ θ −1 satisfies the pairwise chain Markov property [ 86 ], relative to 𝒢 ℐ , i.e ., for any pair i, i ′ of non-adjacent vertices with i ′ ∈ nd( i ) satisfies i ⊥ i ′ | nd( i ) \ { i ′ }; For all j ∈ ℐ ∪ ℛ, r ∈ ℛ, j ≠ r it holds that Given that all deterministic parameters are fixed (and hence do not appear as nodes in the local independence graph), we assume conditional measurable separability for every relevant case in this paper. Hence, we always have equivalence of dynamic Markov properties for the generalized case with random baseline covariates. C. Local independence graphs for Markovian CRNs We have thus far claimed that the SR-graph of the Markovian CRN U faithfully represents local (in)dependencies. Now we derive the extended local independence graph of ( U (0), R ) from the “reversible” SR-graph. Here, any process node r ∈ ℛ represents the collection if the reaction channel ℜ r is reversible and otherwise. For simplicity of notation, assume distinct enumeration of species and reactions, i.e., ℒ ∩ ℛ = ∅ . The local independence graph of the reactions of a CRN is complementary to the related concept of a kinetic independence graph [ 91 ]. The latter is formulated on the level of species instead of reactions. In the following, let the subscript 0 for parents, children etc. stand for the respective functions with respect to the SR-graph and not the local independence graph. That is, for any A ⊆ ℛ, pa 0 ( A ) ⊂ ℒ are the set of species that modulate reactions in A and ch 0 ( A ) ⊂ ℒ are the set of species changed by reactions in A . We construct the edge sets 𝒜 and ℬ of the graph from the edge set ℰ by iteratively adding elements. For each r ∈ ℛ add to 𝒜 the in-arrows from its parent’s parents, ignoring reflexive edges. To account for the dependence of reaction counters on the initial copy numbers of species ℒ, add directed edges from the parents. Consequently, Further, we assume the most general case of dependences between initial copy numbers of all species by assuming that ℒ, the covariate set of the graph, is a complete subset relative to 𝒢, i.e., every pair of vertices on the subspace is joined by an undirected edge, such that We verify that 𝒢 is a local independence graph in accordance with Def. 11. The pairwise chain Markov property, relative to 𝒢 ℒ is hence trivially satisfied due to completeness. Next, consider any ( j, r ) ∉ 𝒜 . By construction we have Hence, if j ∈ ℛ, no species that is changed by j modulates r (i.e., ch 0 ( j ) pa 0 ( r ) = ∅), and if j ∈ ℒ, it does not modulate r since j ∉ pa 0 ( r ). Consequently, { j } ↛ { r } | (ℒ ∪ ℛ) \ { j, r }, i.e., the pairwise dynamic Markov property is satisfied. VI. DISCUSSION OF COMBINED APPLICATIONS We explore two application areas, namely continuous-time communication systems and and optimal design principles, where continuous-time path mutual information and directed information are combined with stochastic thermodynamic constraints. Other research traditions also merge information theory with thermodynamics, most notably information thermodynamics and information dynamics. A detailed comparison with these literatures lies outside the scope of the present work; for a comparison with the concept of learning rates, see Chetrite and Gupta [ 16 ], and for related contributions in information dynamics, see Spinney et al . [ 92 ]. We first adopt a classical information-theoretic viewpoint by interpreting deterministically manipulated chemical reaction networks as communication channels, where information encoding and transmission of information entail a thermodynamic cost in the form of heat dissipation, entropy production or work. Then we discuss the utility of the combined frameworks in evaluating the hypothesis that information flow functions as an emergent optimization principle, implicitly followed by natural selection in biological evolution [ 6 , 93 , 94 ]. As a complementary perspective, we also emphasize the relevance for synthetic biology, where these tools can inform the design of chemical reaction networks that are optimized for energy-efficient information processing. In general, one should be careful with continuous approximations of CRNs, such as the chemical Langevin equation, in both information theory and thermodynamics. While heat dissipation may still be well approximated by diffusion models, stochastic consistency (cf. Supp. Sec. S2.4) is typically lost [ 77 ]. In a similar way, path-information measures are not consistent under continuous approximations because the continuum limit eliminates “discrete reaction information” [ 35 ]. A. Chemical reaction networks as communication channels 1. A causal chemical communication model We introduce the communication model of interest, closely following the marked Poisson-type channel described by Frey [ 24 ]. Let t > 0 be the transmission duration and M : Ω → ℳ be a random variable, ℱ 0 -measurable, on a finite index set ℳ. We call M the message index (or simply message) and ℳ the message set of a (ℳ, t )-code. For any q (1, ∞) denote with the maximum q -ary code rate of (| ℳ|, t )-codes. If ℳ is uniformly distributed, then its entropy satifies . Traditionally the message ℳ is assumed to be uniformly distributed because the channel coding problem can be reduced to the case of a uniform message by a particular choice of random encoding [ 95 ] (p. 73). For biochemical systems, however, we acknowledge that the meaning of a message may be tied to its encoding, which precludes the traditional reduction method. Therefore, we allow for arbitrary message distributions and define the mean code rate [ 96 ] The symbol R , without any index, is reserved for code rates in this section. For notational simplicity we assume that all parallel reactions r 1 , r 2 ∈ ℛ, r 1 ≠ r 2 , satisfy , i.e., the change vectors have the same orientation for ε = +. For a CRN U with |ℛ | potentially reversible reaction channels we redefine 𝒱 (cf. (11)) to contain only forward change vectors, i.e., As before, assume ℒ X , ℒ Y are disjoint subsets of species. The marginal reaction counters of X are for all ν X ∈ 𝒱 X and ε ∈ { +,−} . Reaction counters for Y are defined analogously. Consequently, we have for all s ∈ [0, t ] and analogously for Y . At time zero, the system U be always prepared with a well defined initial distribution p ( u , 0) for every independent use of the communication channel (and hence independent of M ). Assigned to each message index m ∈ ℳ is a deterministic manipulation protocol of the core CRN U with bounded, piecewise-differentiable càdlàg trajectories on the interval [0, t ]. This protocol is turned “on” at t = 0. Importantly, be a bijective mapping. Next, we define the F 0 -measurable message process θ , such that for all s ∈ [0, t ] The manipulation protocols have the superscript X as the effect of the manipulation is assumed to solely affect certain reaction channels that exclusively change species X: where is the coordinate map to coordinates of , complementary to ℒ X . The propensity functions for reaction channels on [0, t ] depend on m ∈ ℳ through the protocol , which we denote as . If the protocols are non-constant, then the propensities are explicitly time-dependent, given m . All other reactions are assumed to have no explicit timedependence for simplicity. While θ can be regarded a (noiseless) source encoding, we refer to X as the (noisy) channel encoding with channel encoding propensities analogous to (15), where we do not distinguish between m dependent and m -independent reactions on the r.h.s. for brevity. We call Y the channel output with output propensities Given the above specifications for the CRN, we introduce intensity processes that depend solely on the history of the general communication model M, θ, X and Y . The ℱ θXY – intensities are called feedback channel encoding intensities, and the ℱ MXY -intensities are called feedback source-channel encoding intensities. Further, the ℱ XY –intensities are called (causal) channel output intensities. As we will discuss later, these channel output intensities represent the causal communication channel. For the evaluation of the relevant mutual information of a channel with feedback (and memory) yet another intensity is required. The ℱ MY –intensities we call source-output intensities. For the given setup with noiseless source encoding it can be shown that the source-channel encoding intensities are equal to the channel encoding intensities. For the formal statement of this equivalence we need the notion of the ℱ -predictable σ -algebra on (0, ∞) × Ω [ 57 ](p. 8): Then ((0, ∞) × Ω, 𝒫 ( ℱ ), µ ⊗ ℙ) is a measure space, where µ denotes the Lebesgue measure. For our purpose we only look at the restricted space (0, t ] × Ω and denote restricted filtrations as ℱ [0, t ] . Proposition 2. If θ is a noiseless source encoding, i.e ., it is defined as specified above, then the channel encoding intensities are equivalent to the source-channel encoding intensities . That is, for all ε ∈ { +, − } and ν X ∈ 𝒱 X it holds that Further, the source-output intensities satisfy ( µ ⊗ ℙ) -a.e. on , for all ε ∈ { +, − } and ν Y ∈ 𝒱 Y . A proof is presented in Supp. Sec. S1.6 A. Lastly, let be a deterministic decoding function, which maps every output trajectory back to either of the messages in ℳ or declares an erasure, by mapping to zero. We may associate two distinct conceptions with this decoding function: (i) the extrinsic decoder is a (computationally) learned function by an external observer, based on experimental observations of Y [0, t ] given M ; (ii) the intrinsic decoder refers to the conception that each m ∈ ℳ stands for a pair of input and output of a bijective function, where is the input and the output is a deterministic downstream action implemented by the biological cell. However, in the following we consider an abstract decoder which just determines the function space required for information theoretic analyses of chemical communication models. The (mean) probability of a decoding error be defined as While other performance measures, like the maximum probability of error max m ∈ ℳ ℙ( M ≠ D ( Y [0, t ] ) | M = m ) exist, we focus on the definition (56). In general, the relevant mutual information for a continuous-time chemical communication channel with memory and use of feedback is where we used that 𝕀 ( M ; U (0)) = 0 by construction. Consistently with Proposition 2 we have for a noiseless source-encoding θ [0, t ] . Now let us elaborate on the construction of the communication system by deriving its (extended) local independence graph 𝒢 = (ℐ ∪ ℛ; ℬ, 𝒜). Here, ℐ := { M } ∪ 𝒮, where M is the label for the node of the baseline variable M , whose realization is equivalent to a full trajectory θ [0, t ] . Mainly, 𝒢 is derived from the SR-graph of U as outlined in Sec. V C, augmented with the baseline vertex M. Hence, the leastassumptious edge sets are and In the absence of further restrictions on the chemical communication system, choosing (𝒮, ℬ) as a complete subgraph is consistent with the generic conditional independence structure induced by chemical reaction dynamics. A time-unrolled description of U as a dynamic Bayesian network, with species population values ( ... , U (− 2 d s ), U (− d s ), U (0)) at previous infinitesimal time layers, is sparsely connected across adjacent layers, but marginalization over the unobserved reaction history typically produces a fully connected conditional independence graph on U (0) for any non-infinitesimal time horizon. Postulating a missing edge {d, d ′ } ∉ ℬ therefore amounts to requiring that holds uniformly over the admissible choices of stoichiometry, reaction propensities, and preparation protocols for p ( u , 0). A sparser ℬ thus presupposes dynamically decoupled modules (e.g., bivariate subsystems) or a specific ergodic preparation protocol that asymptotically enforces such separations. To keep the framework applicable to general open CRNs with arbitrary preprocessing, we therefore adopt the structurally least restrictive choice and take (𝒮, ℬ) to be complete. In a proper communication model, the message M must be locally independent of U , which we have achieved through source-encoding via ℱ 0 -measurable manipulation protocols. Note that M and U [0, t ] are not independent random variables since M → U . Now assume that i.e., all reaction-co-parents of M are in and all species-co-parents are in 𝒮 X ∪ 𝒮 Y . Then, by construction and the global dynamic Markov property, we have that for all non-empty sets . In particular, we then have If additionally, there are no X - and no Y -indistinguishable reactions, then Theorem 3 implies where the first equality follows since source-encoding is noiseless and bijective. The property in (57) is actually an important classification of a communication system that was first introduced by Massey [ 11 ] for discrete-time models. We provide an adaptation of this definition to our model class. Definition 12 For a (|ℳ|, t ) -source code with message M, let θ [0, t ] be a (potentially noisy) source encoding and Y [0, t ] be the channel output. A chemical communication system ( M, θ,U , 𝒮 X , 𝒮 Y ) with designated channel encoding X [0, t ] is called causal if is a -intensity for all ε ∈ { +, − } and all ν Y ∈ 𝒱 Y . A direct consequence is that causal communication systems satisfy For a noiseless source-encoding Definition 12 is actually equivalent to and Theorem 3. Similar to Massey, we obtain the following inequality for any causal chemical communication system. Proposition 3. Let ( θ [0, t ] , X [0, t ] , Y [0, t ] ) be a causal communication system for a (|ℳ|, t ) -source code θ [0, t ] . Then with equality if and only if 𝕀 ( X [0, t ] → Y [0, t ] ∥ θ [0, t ] ) = 0. Proof . Since U (0) is prepared independently of θ (0) it holds that 𝕀( θ (0); Y (0)) = 0. Further, θ [0, t ] is locally independent from Y [0, t ] by construction. Hence 𝕀( Y [0, t ] → θ [0, t ] ) = 0. Consequently, where we used (8) in the first line, non-negativity of the causally conditioned DI in the second line, the chain rule for DI in the third line and (58) in the last line. In that, 𝕀( X [0, t ] → Y [0, t ] ∥ θ [0, t ] ) quantifies the surplus of causal information provided by X to Y without the causal information already provided by θ . We can think of this as the amount of channel encoding noise that is causally transmitted to Y . Since the ℱ θY –intensities usually differ from the ℱ θXY –intensities for any ν Y ∈ 𝒱 Y , ε ∈ { +,− } , this surplus information typically has a positive value. Such positive surplus information can be exemplarily verified with the small models in Sec. VI C under the assumption of just two different constant chemostat concentration values as -trajectories. 2. The noisy channel coding problem in chemical communication In the standard noisy channel coding problem for a causal communication system, the causally conditioned channel law is taken as fixed (see Appendix C for a definition). Here, we discuss the notion of channel capacity in the context of chemical communication. The channel capacity is a property of a causal chemical channel (𝒮 X , ℙ Y ∥ X , 𝒮 Y ) with fixed input and output species identities and a fixed channel law. Communicators may choose a code design – an encoder (the dynamics of X ) and a decoder (post-processing of Y ) – but they cannot alter the physical medium or its intrinsic randomness. Accordingly, the fluctuations that drive Y beyond those induced by X are modeled as the channel’s internal noise, which is not under the communicators’ control. This setup cleanly separates “code design” from “channel control”. Allowing to change turns the problem into a joint code–channel co-design task. While such co-design may be a reasonable modeling choice for biochemical systems in some cases, we deliberately focus on the traditional fixed-channel formulation within our chemical communication model. The channel law ℙ Y ∥ X is equivalently represented by the collection of predictable intensities {λ Y ( εν Y ) | ε ∈ { +,− } ν Y ∈ 𝒱 Y } , which is unique up to stochastic equivalence [ 57 ] (p. 31). This equivalence is readily apparent from the path likelihood (23). Whereas much of the literature formulates such likelihoods on the marked-jump space , we work with an equivalent, and more intuitive, representation on the space of càdlàg trajectories 𝒟 ([0, t ], 𝔛 × 𝒴). To this end, we introduce a collection of predictable non-negative functionals chosen such that on the predictable space . Their existence is guaranteed by the factorization lemma [ 97 ], and they can be chosen as the regenerative version specified by Brémaud [ 57 ] (p. 63). With this notation, the causally conditioned path likelihood admits the form where the counting (process) trajectories ρ Y ( εν Y ) are fully determined by y [0, t ] . For coding with transmission duration t and message M , and under the constraint that the channel law ℙ Y ∥ X is kept invariant, the remaining degrees of freedom are: (i) the (|ℳ|, t ) source code, which specifies the functional form of the protocols on (ii) the feedback channel encoding, i.e., the causal law ℙ X ∥ θ,Y ; and (iii) the initial distribution p ( u , 0), if it can be manipulated. As in the output case, the feedback encoding law ℙ X ∥ θ,Y can be represented by the collection of predictable input intensities {λ X ( εν X ) ε |∈ { +,− }, ν X ∈ 𝒱 X } (unique up to stochastic equivalence). In particular, we may choose predictable functionals such that, on the predictable space Equivalently, for any fixed protocol and history ( x, y ) [0, t ] , the map specifies a version of the input intensity along that trajectory. Accordingly, the design space of admissible channel codes can be identified with the class of such predictable functionals (subject to regularity constraints). Replacing this family by an alternative collection induces, in general, a different joint measure on (Ω, ℱ MθXY ). The invariance constraint on the physical channel is precisely that the conditional output law remains unchanged, i.e., . A class of such admissible channel codes will be derived in Sec. VI A 5, Theorem 5. Finally, degrees (i) and (ii) may be combined into a single causal feedback source–channel encoding law on [0, t ], which is equivalently represented by predictable intensities for the message-driven input process. Concretely, one may introduce predictable functionals such that A more formal notion of codes is presented in Appendix E. Next we turn to the definition of a suitable class of code families, which is required for the formulation of asymptotic limiting properties. In classical channel coding, longer block codes are often built by concatenating codewords in time. Given a (|ℳ a |, t a )code and a (|ℳ b |, t b )-code, one sends a message ( m a , m b ) ∈ ℳ a × ℳ b by sending a (|ℳ a |, t a )-codeword and, after a channel and feedback reset (or under a memoryless channel), sending a (|ℳ b |, t b )-codeword. Such a code scheme enforces that single codewords are sent via independent uses of the channel. While this works well for memoryless channels, it may result in practically low code rates for channels with memory as the guard/relaxation intervals for resets to the a prescribed channel initialization p 0 can be substantial and may dominate the communication time [ 98 ]. Previous continuous-time capacity results have typically assumed a memoryless channel [ 24 , 99 ] and feedback with instantaneous resets [ 12 ]. For channels with memory, where small guard times lead to intersymbol interference across code blocks [ 98 ], it is therefore favorable, both theoretically and practically, to use a single code with a larger codebook, spanning the whole transmission, instead of a sequence of codewords that are decoded independently under intersymbol interference. Continuous-time collections of messages and codes of practical and theoretical interest will hence be in the following class. Definition 13 A collection ( M t ) t ≥0 of messages M t : Ω → ℳ t is called monotonous if for all t ′ > t > 0. Any collection of codes (C t ) t ≥0 for ( M t ) t ≥0 is also called monotonous . While this definition does not ensure that the code rate is a monotonously increasing function, it formally accounts for the time-concatenation idea in continuoustime for a single channel use and prevents the maximum code rate R 0 ( t ) to vanish at finite times. For instance, the timeconcatenation of potentially interference-aware (|ℳ 0 |, Δ t ) code-slots with |ℳ 0 | ≥ 2 results in a time-dependent maximum code rate Since, unlike in discrete-time lossless coding (where the persymbol rate is bounded), a continuum-valued protocol can in principle encode countably many messages on an arbitrarily short interval [0, Δ t ), one could make the apparent code rate arbitrarily large by choosing Δ t very small. Having introduced practical issues of code design for chemical communication, we now turn to the notions of achievability and capacity required for coding theorems. The following definitions are adapted from Csiszár and Körner [ 100 ]. Definition 14 Let ε ∈ [0, 1) and the code C be consistent with P. The code C is called an (|ℳ|, t, ε ) -code for the message M if Definition 15 ( ε -achievable rates and ε -capacity). For ε ∈ [0, 1), a nonnegative number R ∗ is an ε -achievable rate for the channel (𝒮 X , ℙ Y ∥ X , 𝒮 Y ) if for all δ > 0 exists t 0 > 0 such that for all t ≥ t 0 exists a message M t : Ω → ℳ t such that and exists an (|ℳ t |, t, ε ) -code for ℳ t . A number R ∗ is an achievable rate if it is ε-achievable for all ε ∈ (0, 1). The supremum of ε-achievable rates, respectively, achievable rates is called ε-capacity C ε , respectively, capacity C of the channel (𝒮 X , ℙ Y ∥ X , 𝒮 Y ). Since C ε decreases as ε ↘ 0, we have In line with the existing literature, we explicitly point out that the collection of messages ( M t ) t ≥0 is subject to design and is not fixed in the noisy channel coding problem. However, as we pointed out when defining code rates, semantic aspects in biochemical systems may constrain admissible message random variables (and their distributions) in practice. The information capacity C 𝕀 for any information stable channel (𝒮 X , ℙ Y ∥ X , 𝒮 Y ) be defined as where optimizes over the families of functions , and thereby implicitly over the feedback channel encoding distribution , as well as the initial distribution p ( u , 0) under the constraint that it leaves the channel law invariant. Further, optimizes over the space of source encoding protocol functions and sup optimizes over the cardinality of ℳ t and well as the distribution of M t . The limit in (59) is essential for defining information capacity because, in principle, it enables identification with the operational channel capacity, i.e., the supremum of rates R for which there exists a sequence of codes with P e → 0 as t → ∞. Establishing this equivalence lies beyond the scope of the present work and we confine ourselves to the converse channel coding theorem (cf. Theorem 4). In classical definitions of the channel capacity for discrete-time systems with memory and feedback, the latter two suprema are typically not involved in the definition. Optimizing over θ [0, t ] and M t can actually be subsumed into the optimization over X [0, t ] for causal systems both channel encoding and source encoding are noiseless (i.e., one-to-one mappings). Since the channel encoding is always noisy for chemical communication, only can be subsumed into for noisless source coding. Similarly, if a (joint) source-channel code is used instead of a split source-channel code, then the remaining optimization problems are is not explicitly modeled. For intuition, we propose a generalization of information stability (as defined in [ 101 ]) to continuous-time channels, but we leave proving its applicability to future work. Definition 16 The channel (𝒮 X , ℙ Y ∥ X , 𝒮 Y ) is called information stable if there exists a sequence such that for all δ > 0 (i.e ., with convergence in probability) . Here, we added the sequence of processes/distributions since the capacity is typically not achieved by any element of the optimization space. Information stability is typically satisfied for ergodic communication system processes. What is not explicit in (60) is that the function spaces over which we optimize are typically restricted by additional constraints, such as upper bounds on reaction rates, a total maximum energy budget and confinements on the practical degrees of freedom in for chemical systems. We make energetic constraints explicit in the section VI A 3 and give a converse coding theorem, while practical degrees of freedom are discussed in VI A 5. 3. Channel coding under energetic constraints Let us now introduce a physically principled constraint on the search space of X [0, t ] and θ [0, t ] . For each m ∈ ℳ t we consider the heat q m ( t ) dissipated by the trajectory U [0, t ] . Let Q 0 ≥ 0 be the upper bound on the time-averaged heat, such that Such a constraint is also called input-output cost function [ 95 ], because it considers the dissipative cost of the whole CRN U and not only the energy dissipated by channel encoding into X . Evaluating the finite-time information capacity under this constraint yields the thermodynamic capacity-cost functions Alternatively, we can also constrain the chemical work or the total entropy production with W 0 , ε 0 ≥ 0, leading to other thermodynamic capacity-cost functions. If the protocols are constant protocols, where m just specifies a different concentration of chemostat species, then both the heat dissipation and the entropy production constraint can be rewritten in the limit t → ∞, such that by virtue of (52). Further alternatives, that aim to (i) approximate the cost, if no information about the internal entropy h i is available, or (ii) isolate the thermodynamic cost of the channel encoding reactions (excluding the cost of “pure” transmission) may use the above constraints, but with (46), (39) and (49). In the case of pure encoding cost, we would just sum over reaction channels ℜ r that are directly coupled to the protocols . For the given communication channel we can also provide a converse (channel coding) theorem [ 20 , 24 ], which is valid for any of the discussed energy constraints. Importantly, it is also valid for information unstable channels, since it only provides an upper bound on the operational capacity that is not necessarily tight. Lemma 2 Consider an (|ℳ|, t ) -code for the channel (𝒮 X , ℙ Y ∥ X , 𝒮 Y ) with message M : Ω → ℳ. If the code exhibits the code rate R, then where H B ( p ) = − p log( p ) − (1 − p ) log(1− p ) denotes the binary entropy function (for an arbitrary log-base) . Theorem 4 Let R be an achievable rate of the channel (𝒮 X , ℙ Y ∥ X , 𝒮 Y ) under a given set of energy constraints on admissible codes. For simplicity, we exemplarily use the heat dissipation constraint with upper bound Q 0 . Then it holds where C ( Q 0 ) denotes the operational capacity under the dissipation constraint . Proofs for both, the Lemma and the Theorem, is provided in Supp. Sec. S1.6 A. Lastly, although we formulate these optimization problems under thermodynamic constraints, they should often be complemented by kinetic constraints. For instance, at thermodynamic equilibrium, where the rate constants satisfy (27), one could in principle let all rate constants diverge while maintaining fixed propensity ratios. This, however, is clearly unphysical. Additional assumptions, such as lower bounds on mean sojourn times [ 37 ] or explicit models of the reaction constants [ 64 ], are thus necessary to ensure physical plausibility. In stochastic thermodynamics, the interplay between thermodynamic and kinetic constraints has been explored in the context of thermodynamic and kinetic uncertainty relations [ 102 ]. 4. Energy-per-bit perspective A dual perspective to information capacity is the energyper-bit-rate function [ 95 ]. Define the average energy per information unit (given the code rate R in an arbitrary log-base) as Instead of the heat dissipation bound Q 0 , we can of course also use any other bound. Since the capacity-cost functions Q 0 ⟼ C 𝕀 ( Q 0 ) and Q 0 1 ⟼ c 𝕀 ( t, Q 0 ) are monotonously increasing, the minimum-energy-per-bit-rate function can be defined as their generalized inverse: Given Theorem 4, is actually a lower bound on the minimum amount of heat that must be dissipated per unit of information sent to reliably communicate at code rate R over a given channel. This bound is conjectured to be tight for information stable channels. The following Lemma makes the asymptotic version numerically more accessible. Lemma 3 It holds that A proof is provided in Supp. Sec. S1.6 B. Similarly, we can define the minimum average heat dissipation required for a minimum information rate R : subject to the constraint I( θ [0, t ] ; Y [0, t ] ) ≥ Rt for both infima. It’s asymptotic version is The two presented notions of minimum energy dissipation per information unit are shown to be equivalent. Proposition 4 As before, the definitions and equivalence results work analogously with total entropy production, etc. A proof is provided in Supp. Sec. S1.6 B. 5. Degrees of freedom in channel coding Chemical reactions cannot be changed arbitrarily in reality. To find the supremum in the capacity definition, we have just a few options. First, we may apply “thermodynamic parameter tuning” by exchanging certain species with stoichiometrically equivalent ones that nonetheless exhibit different reaction affinities. That is, we change g , such that is modified for a subset of reaction channel. Otherwise, we can change the fixed chemostat concentrations z (not the source encoding protocols ), such that only those reaction channels are changed, that leave the channel probabilities invariant. Lastly, we can vary the reaction network topology by introducing or erasing reactions, i.e., adding and removing new stoichiometric vectors (including their chemostat couplings). This may also include a change in the number and identities of species that are not in 𝒮 X ∪ 𝒮 Y . Again, all these modification must not change the channel . This leaves us with the question of which reaction channels are actually allowed to be modified? We address this problem with a network-topological classification based on the species-reaction graph. Definition 17 Consider the reversible species-reaction graph of the open CRN U. A reaction channel ℜ r is classified as ... feedforward ( ℛ ff ) if there is a ‘trail’ from 𝒮 X to 𝒮 Y that passes through node ℜ r . feedback ( ℛ fb ) if there is a ‘trail’ from 𝒮 Y to 𝒮 X that passes through node ℜ r . modulating ( ℛ c ) if either of the following is true: ℜ r is purely modulating if it is neither a feedforward nor a feedback reaction and there exists a ‘trail’ from node ℜ r to 𝒮 X ∪𝒮 Y . ℜ r is feedforward, but not feedback, and there exists a ‘trail’ from ℜ r to 𝒮 X that avoids 𝒮 Y . ℜ r is feedback, but not feedforward, and there exists a ‘trail’ from ℜ r to 𝒮 Y that avoids 𝒮 X . disconnected ( ℛ d ) if there is no ‘trail’ from node ℜ r to 𝒮 X ∪𝒮 Y . input modulating ( ℛ Tx ) if it is modulating and exists a ‘trail’ from ℜ r to 𝒮 X that avoids 𝒮 Y . output modulating ( ℛ RX ) if it is modulating and exists a ‘trail’ from ℜ r to 𝒮 Y that avoids 𝒮 X . purely input modulating if . purely output modulating if . The given classification rules are designed to emphasize two relevant functionalities that a reaction channel can exhibit. First, the directional information exchange between X and Y (feedforward and feedback), and second, modulation of either X or Y that does not exploit any information about the other. Disconnected reaction channels do not influence X and Y at all and are hence irrelevant to the communication system. The subclassifications within ℛ c serve the practical interpretability. In Appendix D we additionally dissect the above rules in terms of binary “adjacency” matrices that indicate whether directed trails between ℜ r and either 𝒮 X or 𝒮 Y exist. We are led by the following intuition: reaction channels for which no trail from ℜ r to 𝒮 Y exists that avoids 𝒮 X should be modifiable without changing the channel . Only trails that carry influence from a reaction channel to Y without passing through X can alter how inputs are mapped to outputs. Using the matrix representations in Appendix D, one verifies that the following set collects exactly these reactions, and its decomposition makes the functional roles transparent. Definition 18 Consider the reversible species-reaction graph of the open CRN U. Reaction channels in the set are called encoding channels . Thus far, we have taken a reaction-centered perspective, but the species-centered perspective is always implicit. For example, changing a chemostat concentration, changes the propensities of a set of reactions, and erasing a core species for the network corresponds to erasing all reactions in which it participates. A degree of freedom in the optimization of that we did not address so far, is the initial distribution p ( u , 0). At least for finite transmission duration t we can expect that a well-chosen initial distribution can reduce the decoding error P e for a given source encoding θ [0, t ] . The initial values of certain species have an effect only on X [0, t ] , while others also have a direct effect on , e.g., because they are part of the species that implement feedforward or output modulation. Species that are directly connected to 𝒮 Y in the SRgraph can be identified with , where we introduce “( ... ) C ” (in contrast to “( ... ) c “) as the notation for the complement on either of the sets ℛ or 𝒮. That is, and are the species that do not have a direct connection to 𝒮 Y . Since the Janossy density of ( X,Y ) [0, t ] , represented as a function of trajectories, admits the factorization the channel law is independent of the choice of initial distribution of the species 𝒮 X ∪𝒮 Y . Definition 19 Consider the reversible species-reaction graph of the open CRN U. The species in the set are called precoding (or, reset-encoding) species, where the complements are taken on ℛ and 𝒮, respectively . The following theorem asserts, that the intuition about ℛ ← and 𝒮 ← is correct for a particular class of systems. Let ( M, θ,U , 𝒮 X , 𝒮 Y ) describe a causal chemical communication system, as just outlined, but with the extension that manipulation protocols may affect not only , but all of ℛ . Without restriction of generality, we assume that all propensity functions have a non-empty support, i.e., they must admit positive values for some arguments. For all r ∈ ℛ ← let be alternative (reversible) reaction propensities, which may differ from the original such that the stoichiometry of the reaction is unmodified. Either the reaction intensity is set to zero, i.e., for all ε ∈ { +,− } for all arguments, which corresponds to an erasure of the reaction from the network. Else, the reaction remains active, i.e., for some arguments. Then it needs to satisfy two properties: Reaction stoichiometry of ℜ r is unchanged, i.e., for all If ( u, m, t ) satisfies , then . This particularly restricts possible variations of t dependence, given m . All other reactions keep their original propensities. Now denote S p := ( U d (0): d ∈ 𝒮 ← ) and S c := ( U d (0): d ∈ 𝒮 \ 𝒮), such that U (0) = ( S p , S c ). The set of alternative propensities, together with a yet to be defined alternative initial distribution and an alternative message distribution, defines an alternative process on the probability space , such that the alternative path measure is given by . We stipulate that and allow variations of for any s c , as long as any s p with ℙ( S p = s p | S c = s c ) > 0 also satisfies . Our specifications of the alternative propensities and allowed variations of the initial distribution ensure that the alternative path measure is absolutely continuous with respect to the original path measure. With the alternative probability measure, we define channel output intensities for all s ∈ [0, t ] and all ν Y ∈ 𝒱 Y , ε ∈ { +, − } . Theorem 5 Consider two CRNs that differ only in reaction propensities of encoding reactions ℛ ← and the conditional initial distribution of precoding species 𝒮 ← , as described above. If and there are no Xor Y -indistinguishable reactions, then ∈ ∈ { − } ∈ for all ν Y 𝒱 Y , ε +, and all s [0, t ]. In particular, the causally conditional path measures satisfy A proof is provided in Supp. Sec. S1.6 C. The equalities in Theorem 5 are stated -a.s. To assert them also ℙ-a.s., an additional dominance condition is required: for every r ∈ ℛ and ε ∈ { +, − } , the alternative propensity must not vanish on the support of the corresponding original propensity . Equivalently, this ensures (in addition to the already guaranteed ), i.e., mutual absolute continuity. This subtlety has little practical impact. One may view ( θ,U , ℙ) as a “super-network” that contains at least all reactions and species present under . Then every possible path is also ℙ-possible, so the set of events with positive probability may be larger under ℙ. What matters for the capacity problem is that, on every event to which both measures assign positive probability, the channel law coincides, and thus the operational conclusion remains unchanged. However, Theorem 5 does not, by itself, certify that changes in ℛ ← exhaust the design space relevant to . A completeness statement would require a converse: any admissible change of propensities for reactions in alters the channel law on a set of positive measure. This appears natural in light of the graphical intuition developed above, but establishing the converse lies beyond the scope of this work. Finally, while we did neither stipulate a fully reversible CRN nor elementary reactions in the definition of the reversible SR-graph, we finally assess the consequences of thermodyamically consistent modeling on the set of encoding channels. Theorem 6 For any causal chemical communication model that is based on a reversible CRN it holds that if all reactions are elementary (and physically meaningful). In particular , Proof . The necessary condition for a reversible reactions channel ℜ r to have unidirectional information flow i s that it follows either of the structural assumptions depicted in Fig. 3 . However, any effective enzymatic reaction is a nonelementary approximation of at least two elementary reactions. Hence, we can focus on the remaining case of bidirectional zero-order propensities in some species d with π d ( ν r ) ≠ 0. The case of an elementary one-directional zero-order reaction, where the forward reaction does not depend on d , but the reverse reaction does, may arise from exchanges with a reservoir. That is, however, not a bidirectional zero-order case, which we deem not physically meaningful. In conclusion, unidirectional information flow does not happen in a reversible CRN consisting of elementary, physically meaningful reactions. Then, ℛ ff = ℛ fb follows immediately and hence implies Download figure Open in new tab FIG. 3. Species-Reaction subgraphs depicting distinct types of reaction channels that ensure unidirectional information flow. (a) Example of an effective enzymatic reaction, where A modulates the propensities, but is not changed. (b) Example of a (elementary) reaction with bidirectional zero-order propensity w.r.t. species C. A major consequence is that variation of feedback is not a degree of freedom that can be used to increase the channel capacity of a thermodynamically consistent chemical communication model. B. Information flow as an optimization principle Using information flow in chemical reaction networks as an optimization objective function yields proposals for the properties of optimized information processing structures or their inputs. These proposals can be used as testable predictions to evaluate the hypothesis that information flow is an emergent optimization objective of evolution. Alternatively, they can be used as principled design suggestions for engineering chemical reaction networks optimized for information transmission. In this work, we introduce trajectory-level information measures between subnetworks that extend earlier steadystate ensemble-level metrics [ 103 ] and Gaussian channel approximations [ 104 ]. By formulating the information measures on the paths probabilities of the underlying molecular dynamics, the framework captures the full temporal structure of the process. Thermodynamically consistent modeling enables the replacement of previous ad-hoc cost functions, which constrained mean molecular counts or their variances [ 103 , 104 ], with energetic bounds derived from first principles. Taken together, these extensions allow the paradigm to be explored at a finer molecular-level and temporal resolution while remaining anchored in fundamental physical laws. Further, directed information and causally conditioned path measures add a notion of causal directionality to the mathematical formulation of the optimization principle, distinguishing causal forward information flow from causal feedback. The paradigm of information optimization in biological systems has largely been discussed using mutual information, often motivated by the argument that mutual information is a bias-free measure of statistical dependence and thus serves as a minimal requirement for the presence of information flow [ 6 ]. While this rationale supports mutual information as a suitable objective function, it does not capture the causal structure present in many information-processing tasks. Earlier work introduced predictive information as a trajectory-level measure quantifying how much the past output dynamics reveal about future input dynamics [ 105 ]. Although not always framed in explicitly directional terms, this formulation inherently distinguishes the predicted quantity (future input) from the given information (past output), implying a temporal and functional asymmetry. Directed information formalizes such asymmetries by explicitly accounting for causal direction, and thus serves as a natural extension of mutual information when distinguishing forward from feedback pathways is essential. Recalling Proposition 3 and adopting the previously discussed communication system perspective, the directed information can be interpreted as an upper bound, and thus a proxy, for the information flow from an inaccessible external stimulus θ [0, t ] to the output Y [0, t ] , effectively excluding feedback contributions that would otherwise be included in the mutual information 𝕀( X [0, t ] ; Y [0, t ] ). Moreover, Definition 18 of encoding channels can be fruitfully applied in the information-optimization paradigm. It allows specific subsets of the reaction network to be held invariant, enabling the targeted design or analysis of other network components. Recall that the Janossy density, represented as a function of trajectories, admits the factorization in Eq. (65) . Assuming a fixed initial distribution, then implies that fixing leaves as degrees of freedom for optimization, and vice versa. Consequently, optimizing over the causally conditioned distribution or the marginal path distribution is equivalent if only one of the causally conditional probabilities is held fixed. In this case, Theorem 5 applies equivalently, when is held invariant the encoding channels are replaced with If, however, the marginal law is fixed, the situation be-comes more difficult. Then, we must ensure that every change in the Y [0, t ] -dynamics is either “compensated for” or “not fed back into” the X [0, t ] -dynamics. If no change in Y is fed back to X , then , which maps the optimization problem to the one, where the causal conditional probability is fixed. The case where compensation is required is beyond the scope of this work. Bialek [ 93 ] has summarized two mathematical approaches to test the hypothesis that evolutionary processes optimize information flow: (1) distribution matching and (2) network architecture. We extend these frameworks to the setting of pathlevel directed information subject to thermodynamic energy constraints. In the distribution matching framework, the input distribution is optimized for a given channel. This can be interpreted as an inverse problem to evolutionary network optimization: given the channel , does the actual input distribution or the feedback-encoding distribution maximize forward information flow? To eliminate the degree of freedom associated with the initial distribution, we assume fixed initial conditions X (0) = x (0) ∈ 𝔛 and Y (0) = y (0) ∈ 𝒴 . The two equivalent formulations of the distribution matching problem are then if the optimal distribution exists. Otherwise the solution can be represented as a sequence of distributions achieving the optimum asymptotically. The supremum is taken under any physically principled constraint of the form (61), which can involve the energetic contributions of the entire network or only a subset. The optimization involves modifications such rescaling or removal of encoding reactions ℛ ← , consistent with Theorem 5. Since the path measures are fully determined by the network’s propensity functions, optimization may be carried out directly over these function spaces. In this way, the distribution matching framework becomes one of “matching propensity functions (and possibly network topology) under energetic constraints”. If X [0, t ] is causally independent of Y [0, t ] , that is, I( Y [0, t ] → X [0, t ] ) = 0, then mutual information equals directed information under fixed initial conditions. Similarly, if X models an external signal such as DNA-damaging photon arrival statistics, rather than a part of the CRN, then energetic constraints on X [0, t ] are irrelevant, and kinetic constraints may be more appropriate. In contrast, the network architecture framework keeps the input process, or feedback encoding, fixed and optimizes the kinetic parameters and topology of the channel. This corresponds more directly to the evolutionary hypothesis, wherein the internal network evolves to optimally process a given environmental input. Again assuming fixed initial conditions and fixing (not ), we consider the problems where again the supremum may only be achieved asymptotically by a sequence of distributions. The resulting channel process, defined by its topology and propensity functions, yields a principled design proposal under varying energy budgets, or a testable prediction in support of the informationoptimization hypothesis. Both frameworks can also be formulated in terms of the information rate (6). This information rate perspective may even be more appropriate for the discussed application, as it eliminates the effect of an arbitrarily chosen finite time-window. In the following, we illustrate this optimization framework in two small-scale, analytically tractable examples, where the capacity of biochemical channels is evaluated under energetic constraints using promoter-level models of transcriptional regulation. C. Case studies: Accurate energy-accounting and information-dissipation trade-off in small promoter models We now apply the optimization principle to two analytically tractable promoter models. In each model the promoter state trajectory acts as the channel input X and the mRNA transcription record as the output Y . Our task is to maximize the directed information rate subject to an upper bound on the promoter’s entropy-production rate e p . Transcription events are excluded from the thermodynamic budget because (i) their energetic cost is invariant for a fixed gene and does not affect the optimal promoter design, and (ii) the transcription events are not modeled thermodynamically consistent for simplicity and tractability. The resulting constrained problems are solved with fmincon (MATLAB R2022a), providing quantitative examples of the distribution-matching framework in which the input kinetics are tuned to exploit the full dynamic range of the transcriptional output [ 93 ]. Consider a two-state Markov jump process with states OFF and ON, which modulate transcriptional activity by switching between the transcription rates 0 and k tx . For simplicity, we set k tx = 1 in our analysis, thereby fixing the system’s time scale. The state transition diagram of this model, along with transition rates and thermodynamic assumptions, is shown in Fig. 4 . The model features a pair of parallel reaction channels: the first channel involves ATP hydrolysis with and is coupled to corresponding chemostats, while the second reaction channel is thermally “driven” and not coupled to a chemostat. We denote the Gibbs free energy difference between the states by Δ g = g (ON) − g (OFF). The entropy production rate at the NESS is Download figure Open in new tab FIG. 4. State transition diagram of a two-state promoter model with two microscopically reversible reaction channels. The first reaction channel involves ATP hydrolysis and is therefore coupled to the chemostats of ATP, ADP and P i . with net probability flux , as we show in appendix F. This model provides a minimal example of the underestimation of the true entropy production by the effective entropy production (cf. Section IV F), as the effective twostate model always satisfies detailed balance at stationarity, such that its effective entropy production rate vanishes. The three-state promoter model in Figure 5 extends the twostate model by adding an intermediate state, OFF ∗ , which represents a step in a non-elementary reaction involving ATP hydrolysis. In this model, the first reaction channel handles ATP binding and unbinding between OFF and OFF ∗ , while the second channel accounts for the actual ATP hydrolysis, driving the transition from OFF ∗ to ON. The third reaction channel allows direct transitions between OFF and ON and is not coupled to any chemostat species. Distinguishing ATP binding/unbinding and hydrolysis may thereby be a more accurate account of the elementary reaction assumption than the single driven channel in the two-state model. Thermodynamically, the three-state promoter is characterized by Download figure Open in new tab FIG. 5. State transition diagram of a three-state promoter model with three microscopically reversible reaction channels. The upper path from OFF to ON involves ATP hydrolysis. Hence, OFF ∗ is the intermediate of a non-elementary hydrolysis reaction. The EPR satisfies With , which is related to the general form of the EPR for circular state transition diagrams [ 81 ]. We now formulate the maximization problem and use the (information) capacity notation for the maximum DIR. For the two-state promoter we compute where are the effective rates of the transitions between OFF and ON. The physical boundary conditions constrain the admissible region of the effective rates: For this admissible region the EPR is thus expressed in terms of the effective rates via The DIR for the model with a two-state promoter has been shown to satisfy [ 39 ] where h ( τ ) is the differential entropy of the random time τ between transcription events in the dynamics of Y . is its dimensionless version. The probability density of τ is which is an affine combination of exponentials and where w 1 , w 2 > 0 are the roots of with w 1 < w 2 . Figure 6 presents the numerical solution of (67) for a fixed value of β − Δ µ = 16, corresponding to the chemical work done on the system via ATP hydrolysis [ 106 ], and a fixed ratio of −Δ g/ Δ µ = 0.5. Thereby, fixing Δ µ corresponds to fixing the ratio of concentrations and fixing Δ g essentially fixes the identities of core species that participate in the promoter switching. The only degree of freedom left for practical parameter tuning is then the absolute concentration [ATP], keeping the ratio fixed. In Figure 6a the information capacity is evaluated for different ε 0 > 0, given that −Δ g/ Δ µ = 0.5. Complementarily, Figure 6b displays a density plot of the EPR e p in the -plane together with (i) its hyperbolic level sets and (ii) the curve of DIR maximizing effective rates for varying ε 0 > 0. The density plot illustrates that upper bounding the EPR does not upper bound the kinetic rates. Download figure Open in new tab FIG. 6. Maximum DIR for the two-state promoter model and fixed − β Δ µ = 16 and −Δ g/ Δ µ = 0.5. In contrast, Figure 7 shows the information capacity for varying −Δ g/ Δ µ and different level sets of e p . Hence, we do not only vary the absolute ATP concentration, but also explore the maximum for a continuous range of possible core species identities. In the range −Δ g/ Δ µ ∈ [0, 1 ] the maximum DIR increases monotonously with increasing e p . Download figure Open in new tab FIG. 7. Maximum DIR of the two-state promoter model with − β Δ µ = 16 as a function of −Δ g/ Δ µ for different level sets e p = ε 0 . For Δ g/ Δ µ / [0, 1 ] the level set e p = 0 in the plane always intersects with the DIR maximizing ratio [ 37 ]. However, Δ g/ Δ µ 1 on the other hand corresponds to , i.e., a chemically driven reaction, which additionally absorbs heat from the environment. Both scenarios are biophysically implausible. For the three-state promoter let X represent the subnetwork { ON } , i.e., the switching dynamics between a transcriptionally active and any inactive promoter state { OFF, OFF ∗ } . We define the capacity problem as where Δ g , Δ µ and k tx are fixed. The DIR of this model can be shown to have the form [ 39 ] The corresponding information capacity curves for k tx = 1 in Figure 8 are analogous to the curves of the two-state promoter. Download figure Open in new tab FIG. 8. Maximum DIR for the three-state promoter model with fixed − β Δ µ = 16. In the light of information flow as an optimization principle, the two systems U (one with two-state promoter and the other with three-state promoter) hence, have the same informationdissipation trade-off. Note that the channel ℙ Y ∥ X equals in the two models and the two optimizations (and their combination) can hence be regarded as small examples of the “distribution matching framework”. The two distinct input topologies do not change the information capacity with respect to kinetic rate optimization under an entropy production rate constraint. VII. CONCLUSION AND OUTLOOK This work establishes a trajectory-level framework that links information processing and thermodynamic cost in Markovian chemical reaction networks (CRNs). On the information-theoretic side, we formulate mutual information and directed information between continuous-time trajectories and extend these notions to information exchange and causal information flow between arbitrary subnetworks, clarifying when mutual information is ill-defined. On the thermodynamic side, we provide a thermodynamically consistent account for open CRNs in Kurtz’s process formulation and highlight that, at stationarity, the entropy production rate identifies the mean dissipation rate, while parallel reaction channels can invalidate common coarse-grained identifications if not modeled carefully. Building on these foundations, we introduce, to the best of our knowledge, the first Shannon-style communication channel model for CRNs that explicitly separates message, encoding, and decoding and that permits principled energetic constraints on admissible codes. In this setting, the finite-time information capacity and its capacity–cost variants are formulated as optimizations over causal encoding laws subject to physical bounds (e.g., on dissipated heat or entropy production), and we provide a converse coding theorem under such constraints. A key structural contribution is the reactionnetwork classification that isolates design degrees of freedom from an invariant physical channel law. Modifying propensities within the class of encoding reactions preserves the causal channel kernel, whereas changes outside these classes generically alter it. Finally, analytically tractable promoter models illustrate how energetic constraints shape achievable information flow, yielding explicit information-dissipation trade-offs. Several limitations delineate immediate directions for future work. First, we do not prove a full channel coding theorem for the CRN channel class; accordingly, the identification of information capacity with operational capacity is made under an information-stability assumption, and we only provide a converse bound. Establishing sufficient conditions for information stability in chemically relevant regimes remains open. Second, while the channel model allows time-dependent message protocols, thermodynamic constraints for general timedependent driving are not available in closed form at the same level of generality. Obtaining them requires a microscopic derivation from the underlying Hamiltonian and is beyond the present scope. Third, the present results are exact at the level of elementary reaction networks. For full gene-expression networks, even a single transcriptional module may correspond to thousands of elementary reactions, and experimental knowledge at this resolution can be limited. An important challenge is therefore to make the framework accessible for non-elementary (coarse-grained) descriptions without losing control of the relevant information and energetic quantities. Conceptually, we view the developed tools as a foundation for a reaction-network-based theory of signal processing: the principled design and analysis of biochemical signal encoding and processing under explicit causal, information-theoretic, and thermodynamic criteria. In this light, the reaction classification that enables a separation between “causal channel” and “causal (feedback) encoding” is best viewed as a tool for importing the classical noisy channel coding problem into the CRN setting. Thereby we have clarified what is meant by “communication through a CRN” and how this notion is invoked in an operational Shannon-sense, rather than a mere metaphorical one. We envision that, prospectively, our tools can serve as a starting point for progressing from the classical noisy channel coding problem, in which the channel is fixed, to more realistic biochemical coding problems. Natural next steps include extending the present methods to scenarios where parts of the “channel” are tunable (i.e., joint codechannel co-design) or performance metrics are goal-oriented, as well as developing controlled approximation schemes that bridge elementary and effective network descriptions. DATA AVAILABILITY The code used to perform the optimizations is available from the corresponding author upon reasonable request. ACKNOWLEDGMENTS We thank Nicolai Engelmann for helpful discussions. M.G. developed the methodology, led the formal analysis, contributed to conceptualization, and wrote the original draft; M.G., L.S., and H.K. reviewed and edited the manuscript. L.S. contributed to conceptualization. H.K. supervised the research and led the conceptualization. The authors declare no competing financial interest. Appendix A: Chain rules for directed information The following chain rules for the Newton DI are derived similarly to the original proofs given by Kramer [ 67 ] for the Massey DI. Lemma 4 For the random discrete sequences X 1: N , Y 1: N and Z 1: N the DI satisfies the chain rules where D −1 denotes an anticipatory shift such that Proof . Appendix B: Stochastic filtering equations for the intensity processes In order to compute the MI or DI from the expressions in the Theorems 1 and 2, we need to evaluate the intensity processes and . Computing the MI from similar expressions and approximations thereof has been discussed extensively in [ 35 ]. If, however, there are ( X,Y )-, X or Y indistinguishable reactions, then this needs to be accounted for [ 69 ] (see also the discussion in Sec. III B). To improve accessibility, we provide the relevant expressions ( Eq. (21) and (22) in [ 69 ]) in our notation. We maintain all assumptions from Sec. III B. Let 𝒮 A ⊂ 𝒮 be an arbitrary subset of species indices of the core system U, and its complement 𝒮 B := 𝒮 \ 𝒮 A . The set 𝒮 A represents the set of species with respect to whom’s history we want to filter the propensity functions in Eq (15) . Hence, we choose 𝒮 A ∈ { 𝒮 X , 𝒮 Y , 𝒮 X ∪ 𝒮 Y } in this context. Define and , together with the natural filtration ℱ A , as well as the coordinate maps π A , π B and the non-zero subnetwork change vectors 𝒱 A , 𝒱 B . In the following we substitute U ( s ) = ( A ( s ), B ( s )). Exemplary, we want to evaluate the ℱ A -intensity process of the reaction counter R X ( ν X ), ν X ∈ 𝒱 X : for all s ∈ [0, t ], where Π b ( s ) denotes the conditional probabil-:ity mass function as a function of time, which is actually a piecewise-deterministic stochastic process on the state space [0, 1 ] for all . We refer to as the filtering distribution at time s ≥ 0. It follows the stochastic differential equation such that , where Importantly, the stochastic updates at jumps of the counters R A ( ν A ), ν A ∈ 𝒱 A depend on the correct conditioning. Using the reaction counters R r , r ∈ ℛ X (cf. Eq. (24) ) instead would add the surplus information, which of some X indistinguishable reaction has happened. In particular, the enumerator and denominator of the update equation satisfy with being a set of X -indistinguishable reaction indices. The continuous part of the evolution equation does not depend on the choice of conditioning, discussed here [ 69 ]. Appendix C: Transfer entropy between subnetworks and causally conditioned probabilities While the primary focus of this paper is on mutual and directed information, we also provide an expression for the transfer entropy between subnetworks to facilitate comparison for an interdisciplinary readership. Following conventional notation, we refer to X as the target and Y as the source. To accommodate the definition of transfer entropy, we extend the definition of stochastic processes from the finite interval [0, t ] to the real line ℝ, such that the CRN is assumed to have started at time −∞ rather than at time zero. Consider the families of sigma-algebras and , defined by which represent the sliding-window partial histories of the joint process ( X,Y ) and the marginal process X , respectively. The parameters α, β > 0 specify the history lengths for X and Y . Note that these families are generally not filtrations, since the fixed-length window definitions imply and for s < s ′ in general. They become filtrations only in the limit α, β → ∞. Spinney et al . [ 36 ] generalize the discrete-time definition of transfer entropy to arbitrary stochastic processes indexed by a strictly ordered and uncountable set T. For continuous-time processes, they assume 𝕋 ⊇ [ t 0 − max( α, β ), t ) with t 0 < t . Their approach constructs a regular version of the probability Measure for every finite subset A ⊂ [ t 0 , t ) and every product set . This family of finite-dimensional distributions determines a unique probability measure on the path space over [ t 0 , t ) by Kolmogorov’s Extension Theorem. The notation P(· ∥·) is adopted from the “causally conditioned probability” as originally defined by Kramer [ 107 ]. Note that the definition in (C1) can be applied for arbitrary families of filtrations. Consequently, a causally conditioned probability for continuous-time systems, generalizing Kramers definition in the sense of Newton, is given by with short notation . The transfer entropy is then defined measure-theoretically via the Radon-Nikodym derivative of this probability measure with respect to the conditional probability, i.e., This framework also permits a pathwise, random-variable version of transfer entropy, as is common in stochastic thermodynamics. For comparison, we provide a related generalization of Schreiber’s discrete-time transfer entropy [ 108 ] based on the structural, extremal-based approach of Weissman et al . [ 12 ] Here, we simply conjecture that the two definitions are equivalent for sufficiently regular path spaces. Using the same approach outlined in Supp. Sec. S1.3, one can show that the latter definition yields the same expression for the transfer entropy of jump processes as that derived by Spinney et al . [ 36 ]. Using the subnetwork propensities (15) we define the conditional intensity processes Then we obtain In the limit of full history this yields the relation There are two ways to make the transfer entropy equal to the DI. Either artificially set the path of U to a constant for all s < 0 and randomly select U (0). Then Alternatively, the parameters α, β can be interpreted as timedependent functions α ( s ), β ( s ). Setting α ( s ) = β ( s ) = s we obtain for all s ≥ 0, such that Appendix D: Species reaction graph classification To provide intuition for the reaction channel classification in Def. 17 we provide the following binary “adjacency” matrices on the set {𝒮 X , ℜ r , 𝒮 Y } that indicate whether directed trails between ℜ r and either 𝒮 X or 𝒮 Y exist. Thereby, we ignore trails between species sets and cycles. In total, there are 16 distinct configurations. For example, indicates that a trail from 𝒮 X to ℜ r ( 𝒮 X −τ ℜ r ) exists, but neither a trail in the reverse direction nor any trail between ℜ r and 𝒮 Y exists. Hence, for the given example, the connectivity via trails may be visualized as where we use → instead of →, since we reserve the latter arrowhead for the local (in)dependence notation. Then, for categories (a) - (d), we obtain Note that . For the intersection between feedforward and feedback, i.e., 𝒮 X ⇄ ℜ r ⇄ 𝒮 Y , as well as the input and output modulating cases, we identify Appendix E: Formal definition of chemical channel codes We may formalize the notions of the chemical channel coding problem. The key technical point in our setting is that a change of input intensities typically induces a change of the underlying path measure. Hence, admissible encoders should be defined as pathwise (measure-free) objects, and only subsequently be related to probability measures via a consistency requirement. Throughout, fix a transmission duration t > 0 and write µ for Lebesgue measure on [0, t ]. Let 𝒟 ([0, t ], 𝔛 × 𝒴 ) denote the space of càdlàg trajectories equipped with its Borel σ -field. For s ∈ [0, t ], denote by r s the restriction operatorr s ( z ) := z [0, s ) . For a path z ∈ 𝒟 ([0, t ], · ) let 𝕁( z ) := {s ∈ (0, t ] : z ( s ) ≠ z ( s −) } be its set of jump times. We write L 1 ([0, t ], [0, ∞)) for integrable functions modulo µ -a.e. equality [ 109 ], and use the natural restriction map r s : L 1 ([0, t ], [0, ∞)) → L 1 ([0, s ), [0, ∞)) induced by restricting representatives. Let be a set of admissible protocols, for some fixed N ∈ N, viewed as deterministic control trajectories on [0, t ]. We assume that every Γ ∈ Π is bounded and piecewise differentiable, and has càdlàg sample paths on [0, t ]. For s ∈ [0, t ] we write Π| [0, s ) := { r s (Γ) | Γ ∈ Π } . A (feedback) channel encoder will be defined as an intensity-process map into L 1 ([0, t ], [0, ∞)); this encodes exactly the degree of identification relevant for compensators and likelihoods (integration against µ ), without reference to any ambient probability measure. Predictability and samplepath regularity are imposed deterministically via the existence of suitable representatives. Definition 20 A (| ℳ |, t ) -code C for the message M and the causal chemical channel ( 𝒮 X , P Y ∥ X , 𝒮 Y ) consists of the following objects . For any protocol book Π a noiseless source code is a bijective mapping A channel initialization is a probability mass function p 0 : 𝒰 → [0, 1 ]. Let c be a source code with protocol book Π. A feedback channel encoder is a collection of maps These maps are required to be predictable in the following sense: For every s ∈ [0, t ] and all Γ, Γ ′ ∈ Π and ( x, y ), ( x ′ , y ′ ) ∈ 𝒟 ([0, t ], 𝔛 ×𝒴 ), as elements of L 1 ([0, s ), [0, ∞)). Moreover, for each ( ε, ν X ) there exists a representative family (a “predictable regenerative version”) such that for all (Γ, ( x, y )), where is càglàd on [0, t ] and satisfies the (regen ε e , ν r X ative) jump-time constraint A feedback source–channel encoder is a collection of maps satisfying the analogous deterministic predictability condition (with m ∈ ℳ in place of Γ ) and admitting càglàd representatives whose jump times obey A channel decoder is a mapping A code C = (c, p 0 , f, D ), where f is a channel encoder, is called a split source-channel code. A code C = ( p 0 , f, D ), where f is a source-channel encoder, is called a sourcechannel code. Note that the variation of 𝒱 X is explicitly allowed. However, the presented definition need not be consistent with all the requirements of the Markovian CRN U | M . This level of detail is lost due to projection and marginalization to ( M, θ, X,Y ). Therefore, while we have just defined the general functional class of codes, not all of those codes can necessarily be realized via a finite CRN. The question, which codes can be realized with such CRNs is addressed in Sec. VI A 5. The following definition relates the code design to its induced probability law for the noisy channel coding problem. Definition 21 Let C be a split source-channel code for the chemical channel ( 𝒮 X , ℙ Y ∥ X , 𝒮 Y ). A probability measure ℙ C on (Ω, ℱ ) is said to be consistent with C if: ℙ C ( U (0) = u ) = p 0 ( u ) for all u ∈ 𝒰 ; the protocol-valued process θ satisfies θ = c( M ); for each ( ε, ν X ), the reaction-counting process R X ( εν X ) admits an ℱ θXY -intensity process λ X ( εν X ) under ℙ C , which satisfies on for the “predictable regenerative version” (as in Def. 20 (c)); ℙ C ≪ ℙ and -a.s. (where P acts as a reference measure) . Consistency for a source–channel code C = ( p 0 , f, D ) is defined analogously, with in place of and -predictability. Definition 22 The set 𝒞 of all (| ℳ |, t ) -codes C for which there exists at least one consistent measure ℙ C is called the set of (| ℳ |, t ) -codes admissible for the causal chemical channel ( 𝒮 X , ℙ Y ∥ X , 𝒮 Y ). Note that 𝒞 is not necessarily a product set of admissible initializations, encoders and decoders. Appendix F: EPR of a non-equilibrium two-state model To derive the EPR for the two-state model we use Equation (47) for the entropy exchange rate, which is equal to the EPR at the NESS: with and The stationary distribution is the kernel of the generator matrix corresponding to the model. Simplifying the above expression yields (66). To obtain the EPR as a function of the effective rates , we we express the rates in the state transition diagram via the effective rates and the potential differences Δ g , Δ µ . We have which implies and therefore also The physical boundary conditions constrain the admissible region of the effective rates to For this region we obtain (68) by substituting the original rates with the above expressions and simplifying. Appendix G: Interarrival time between transcription events for the circular three-state promoter The DI between the promoter dynamics and the RNA copy number has was linked to the differential entropy of the interarrival time in (69). Here, we apply Anderson’s filtering theorem for semi Markov processes similarly as described in [ 39 ]. Define the state space E := { J, ON, OFF ∗ , OFF } , where J is the jump state, which mimics ON in terms of leaving transitions. The Laplace version of the semi-Markov kernel density is where We filter E to the reduced state space { J } which yields the interarrival time distribution f τ of the output. Since the geometric series of a 3 × 3-matrix is hard to evaluate, we do the filtering state by state, from right to left. In the first step we identify Thus and In the second step we identify with and . We denote the roots of by w 1 , w 2 . Then Denoting the roots of by q 1 , q 2 we rewrite For the last filtering step we use that and for any a ≠ b . We obtain Denote R ( s ) := k tx ( u OFF + s )( w 1 + s )( w 2 + s ) and let v 1 , ..., v 4 be the roots of . Then Footnotes Substantial revisions in all non-introductory sections. Two added supplemental files: S1 for information theoretic proofs and S2 for further background in stochastic thermodynamics. References [1]. ↵ M. Magaña-Acosta and V. Valadez-Graham , Frontiers in Genetics 11 , 600615 ( 2020 ). OpenUrl PubMed [2]. ↵ S. L. Johnson and G. J. Narlikar , Journal of Molecular Biology 434 , 167653 ( 2022 ). OpenUrl CrossRef PubMed [3]. ↵ W. S. Klug , M. R. Cummings , C. A. Spencer , M. A. Palladino , and D. J. Killian , Concepts of genetics ( 2020 ). [4]. ↵ U. Alon , An introduction to systems biology: design principles of biological circuits ( Chapman and Hall/CRC , 2019 ). [5]. ↵ G. Rieckh and G. Tkačik , Biophysical journal 106 , 1194 ( 2014 ). OpenUrl CrossRef PubMed [6]. ↵ G. Tkačik and A. M. Walczak , Journal of Physics: Condensed Matter 23 , 153102 ( 2011 ). OpenUrl CrossRef PubMed [7]. ↵ M. Behar and A. Hoffmann , Current opinion in genetics & development 20 , 684 ( 2010 ). OpenUrl PubMed [8]. J. E. Purvis and G. Lahav , Cell 152 , 945 ( 2013 ). OpenUrl CrossRef PubMed Web of Science [9]. D. Friedrich , L. Friedel , A. Finzel , A. Herrmann , S. Preibisch , and A. Loewer , Molecular Systems Biology 15 , e9068 ( 2019 ). OpenUrl CrossRef PubMed [10]. ↵ S. Sasagawa , Y.-i. Ozaki , K. Fujita , and S. Kuroda , Nature cell biology 7 , 365 ( 2005 ). OpenUrl CrossRef PubMed Web of Science [11]. ↵ J. Massey et al. , in Proc. Int. Symp. Inf. Theory Applic.(ISITA-90) , Vol. 2 ( 1990 ). [12]. ↵ T. Weissman , Y.-H. Kim , and H. H. Permuter , IEEE Transactions on Information Theory 59 , 1271 ( 2012 ). OpenUrl [13]. ↵ N. J. Newton , arXiv preprint arXiv: 1604.01969 ( 2016 ). [14]. ↵ A. S. Hansen and E. K. O’Shea , Elife 4 , e06559 ( 2015 ). OpenUrl CrossRef PubMed [15]. ↵ J. J. Tyson and B. Novák, Annual review of physical chemistry 61 , 219 ( 2010 ). OpenUrl CrossRef PubMed Web of Science [16]. ↵ R. Chetrite , M. Rosinberg , T. Sagawa , and G. Tarjus , Journal of Statistical Mechanics: Theory and Experiment 2019 , 114002 ( 2019 ). OpenUrl [17]. ↵ P. R. Ten Wolde , Nature Physics 8 , 361 ( 2012 ). OpenUrl [18]. ↵ D. F. Anderson and T. G. Kurtz , in Design and analysis of biomolecular circuits: engineering approaches to systems and synthetic biology ( Springer , 2011 ) pp. 3 – 42 . [19]. ↵ C. E. Shannon , The Bell system technical journal 27 , 379 ( 1948 ). OpenUrl CrossRef Web of Science [20]. ↵ J. A. T. Thomas M. Cover , Channel capacity , in Elements of Information Theory ( John Wiley & Sons, Ltd , 2005 ) Chap. 7 , pp. 183 – 241 . OpenUrl [21]. ↵ H. Marko , IEEE Transactions on communications 21 , 1345 ( 1973 ). OpenUrl CrossRef [22]. ↵ Y.-H. Kim , IEEE Transactions on Information Theory 54 , 1488 ( 2008 ). OpenUrl [23]. ↵ Y. M. Kabanov , Theory of Probability & Its Applications 23 , 143 ( 1978 ). OpenUrl CrossRef [24]. ↵ M. R. Frey , Capacity of the Poisson communications channel, Doctoral dissertation, The University of North Carolina at Chapel Hill ( 1990 ). [25]. ↵ S. Uda , Biophysical reviews 12 , 377 ( 2020 ). OpenUrl PubMed [26]. ↵ H. B. Barlow et al. , Sensory communication 1 , 217 ( 1961 ). OpenUrl [27]. ↵ C. Waltermann and E. Klipp , Biochimica et Biophysica Acta (BBA)-General Subjects 1810 , 924 ( 2011 ). OpenUrl [28]. ↵ I. Nemenman , Quantitative biology: from molecular to cellular systems 4 , 73 ( 2012 ). OpenUrl [29]. ↵ R. Cheong , A. Rhee , C. J. Wang , I. Nemenman , and A. Levchenko , science 334 , 354 ( 2011 ). OpenUrl Abstract / FREE Full Text [30]. ↵ F. Tostevin and P. R. Ten Wolde , Physical review letters 102 , 218101 ( 2009 ). OpenUrl CrossRef PubMed [31]. ↵ I. Lestas , G. Vinnicombe , and J. Paulsson , Nature 467 , 174 ( 2010 ). OpenUrl CrossRef PubMed Web of Science [32]. ↵ Y. Nakahira , F. Xiao , V. Kostina , and J. C. Doyle , in 2018 Annual American Control Conference (ACC) ( IEEE , 2018 ) pp. 2707 – 2714 . [33]. ↵ K. V. Parag , Journal of theoretical biology 480 , 262 ( 2019 ). OpenUrl PubMed [34]. ↵ L. Duso and C. Zechner , in 2019 IEEE 58th Conference on Decision and Control (CDC) ( IEEE , 2019 ) pp. 6610 – 6615 . [35]. ↵ A.-L. Moor and C. Zechner , Physical Review Research 5 , 013032 ( 2023 ). OpenUrl [36]. ↵ R. E. Spinney , M. Prokopenko , and J. T. Lizier , Physical Review E 95 , 032319 ( 2017 ). OpenUrl [37]. ↵ M. Sinzger , M. Gehri , and H. Koeppl , in 2020 IEEE International Symposium on Information Theory (ISIT) ( IEEE , 2020 ) pp. 2873 – 2878 . [38]. M. Sinzger-D’Angelo and H. Koeppl , IEEE Transactions on Information Theory ( 2023 ). [39]. ↵ M. Gehri , N. Engelmann , and H. Koeppl , in 2024 IEEE International Symposium on Information Theory (ISIT) ( 2024 ) pp. 1931 – 1936 . [40]. ↵ D. T. Gillespie , The journal of physical chemistry 81 , 2340 ( 1977 ). OpenUrl CrossRef PubMed Web of Science [41]. ↵ M. Reinhardt , G. Tkačik , and P. R. Ten Wolde , Physical Review X 13 , 041017 ( 2023 ). OpenUrl [42]. ↵ A. Das and P. R. Ten Wolde , Physical Review Letters 135 , 107404 ( 2025 ). OpenUrl PubMed [43]. ↵ M. Davis , IEEE Transactions on Information Theory 26 , 710 ( 1980 ). OpenUrl [44]. ↵ P. J. Thomas and A. W. Eckford , IEEE Transactions on information Theory 62 , 7358 ( 2016 ). OpenUrl [45]. P. J. Thomas and A. W. Eckford , in 2016 IEEE International Symposium on Information Theory (ISIT) ( IEEE , 2016 ) pp. 1804 – 1808 . [46]. ↵ A. W. Eckford and P. J. Thomas , IEEE Transactions on Molecular, Biological and Multi-Scale Communications 4 , 27 ( 2018 ). OpenUrl [47]. ↵ G. Lan , P. Sartori , S. Neumann , V. Sourjik , and Y. Tu , Nature physics 8 , 422 ( 2012 ). OpenUrl PubMed [48]. ↵ P. Mehta and D. J. Schwab , Proceedings of the National Academy of Sciences 109 , 17978 ( 2012 ), https://www.pnas.org/doi/pdf/10.1073/pnas.1207814109 . OpenUrl Abstract / FREE Full Text [49]. ↵ S. J. Bryant and B. B. Machta , Physical review letters 131 , 068401 ( 2023 ). OpenUrl CrossRef PubMed [50]. ↵ F. Tasnim , N. Freitas , and D. H. Wolpert , Physical Review E 110 , 034101 ( 2024 ). OpenUrl [51]. ↵ A. Yadav and D. Wolpert , Minimal thermodynamic cost of communication ( 2025 ) , arXiv: 2410.14920 [cond-mat.stat-mech]. [52]. ↵ F. Attneave , Psychological review 61 , 183 ( 1954 ). OpenUrl CrossRef PubMed Web of Science [53]. ↵ R. Linsker , Computer 21 , 105 ( 1988 ). OpenUrl CrossRef [54]. ↵ G. Tkačik , C. G. Callan Jr , and W. Bialek , Physical Review E—Statistical, Nonlinear, and Soft Matter Physics 78 , 011910 ( 2008 ). OpenUrl [55]. ↵ R. Boel , P. Varaiya , and E. Wong , SIAM Journal on Control 13 , 1022 ( 1975 ). OpenUrl CrossRef [56]. ↵ J. McFadden , Journal of the society for industrial and applied mathematics 13 , 988 ( 1965 ). OpenUrl [57]. ↵ P. M. Brémaud , Point Processes and Queues: Martingale Dynamics , Vol. 50 ( Springer , 1981 ). [58]. ↵ T. Renner , E. R. Cohen , T. Cvitas , J. G. Frey , B. Holström , K. Kuchitsu , R. Marquardt , I. Mills , F. Pavese , M. Quack , J. Stohner , H. L. Strauss , M. Takami , and A. J. Thor , Quantities, units and symbols in physical chemistry ( The Royal Society of Chemistry , 2007 ). [59]. ↵ International Union of Pure and Applied Chemistry (IU-PAC ), Elementary reaction , doi: 10.1351/goldbook.E02035 ( 2019 ), IUPAC Compendium of Chemical Terminology (Gold Book). OpenUrl CrossRef [60]. ↵ R. Rao and M. Esposito , The Journal of Chemical Physics 149 ( 2018 ). [61]. ↵ T. G. Kurtz , The Journal of Chemical Physics 57 , 2976 ( 1972 ). OpenUrl CrossRef Web of Science [62]. ↵ H. Qian and H. Ge , Stochastic Chemical Reaction Systems in Biology ( Springer International Publishing , 2021 ). [63]. ↵ T. Schmiedl and U. Seifert , The Journal of Chemical Physics 126 ( 2007 ). [64]. ↵ D. T. Gillespie , Physica A: Statistical Mechanics and its Applications 188 , 404 ( 1992 ). OpenUrl [65]. ↵ A. Klenke and A. Klenke , Probability Theory: A Comprehensive Course , 303 ( 2020 ). [66]. ↵ P.-O. Amblard and O. J. Michel , Entropy 15 , 113 ( 2012 ). OpenUrl [67]. ↵ G. Kramer , Directed information for channels with feedback, ETH Series in Information Processing , Vol. 11 ( Hartung-Gorre Verlag , Konstanz, Switzerland , 1998 ). [68]. ↵ D. J. Daley and D. Vere-Jones , An Introduction to the Theory of Point Processes: Volume I: Elementary Theory and Methods ( Springer Science & Business Media , 2006 ). [69]. ↵ L. Bronstein and H. Koeppl , Physical Review E 97 , 062147 ( 2018 ). OpenUrl [70]. ↵ L. Peliti and S. Pigolotti , Stochastic thermodynamics: an in-troduction ( Princeton University Press , 2021 ). [71]. ↵ D. F. Anderson , G. Craciun , and T. G. Kurtz , Bulletin of mathematical biology 72 , 1947 ( 2010 ). OpenUrl CrossRef PubMed [72]. ↵ D. Kondepudi and I. Prigogine , Modern thermodynamics: from heat engines to dissipative structures ( John Wiley & Sons , 2014 ) Chap. 15. [73]. ↵ A. Kolchinsky , The Journal of Chemical Physics 161 ( 2024 ). [74]. ↵ U. Seifert , Physica A: Statistical Mechanics and its Applications 504 , 176 ( 2018 ). OpenUrl [75]. ↵ C. Van den Broeck and M. Esposito , Physica A: Statistical Mechanics and its Applications 418 , 6 ( 2015 ). OpenUrl CrossRef [76]. ↵ K. Sekimoto , Physical Review E—Statistical, Nonlinear, and Soft Matter Physics 76 , 060103 ( 2007 ). OpenUrl [77]. ↵ J. M. Horowitz , The Journal of chemical physics 143 ( 2015 ). [78]. ↵ J. L. Lebowitz and H. Spohn , Journal of Statistical Physics 95 , 333 ( 1999 ). OpenUrl [79]. ↵ Y. Oono and M. Paniconi , Progress of Theoretical Physics Supplement 130 , 29 ( 1998 ). OpenUrl [80]. ↵ M. Esposito and C. Van den Broeck , Physical review letters 104 , 090601 ( 2010 ). OpenUrl PubMed [81]. ↵ J. Schnakenberg , Rev. Mod. Phys . 48 , 571 ( 1976 ). OpenUrl CrossRef Web of Science [82]. ↵ M. Esposito , Physical Review E—Statistical, Nonlinear, and Soft Matter Physics 85 , 041125 ( 2012 ). OpenUrl [83]. ↵ M. Feinberg , Foundations of chemical reaction network theory , 205 ( 2019 ). [84]. ↵ D. B. West , Introduction to graph theory ( 2001 ). [85]. ↵ V. Didelez , Journal of the Royal Statistical Society Series B: Statistical Methodology 70 , 245 ( 2008 ). OpenUrl [86]. ↵ S. L. Lauritzen , Graphical models , Vol. 17 ( Clarendon Press , 1996 ). [87]. ↵ V. Didelez , Graphical models for event history analysis based on local independence ( Logos-Verlag , 2001 ). [88]. ↵ The notion of a trail in the field of probabilistic graphical models differs from the edge-simple walk, the ‘trail’ in classical graph theory. In earlier publications (and their follow ups) the trail is referred to as ‘chain’ [86]. [89]. ↵ J.-P. Florens , M. Mouchart , and J.-M. Rolin , Elements of bayesian statistics ( 1990 ). [90]. ↵ The completion of σ -algebras in the conditional measurable separability definition has been dropped by Didelez [85, 87], but was part of the original definition by Florens et al. [89] (cf. Definition 5.2.2). The formulation is weaker, but still sufficient to prove the left intersection property for local independence. Hence, local independence still satisfies the asymmetric graphoid realations under this weaker assumption. [91]. ↵ C. G. Bowsher , The Annals of Statistics 38 ( 2010 ). [92]. ↵ R. E. Spinney , J. T. Lizier , and M. Prokopenko , Physical Review E 98 , 032141 ( 2018 ). OpenUrl [93]. ↵ W. Bialek , SciPost Phys. Lect. Notes , 084 ( 2024 ) , doi: 10.21468/SciPostPhysLectNotes.84 . OpenUrl CrossRef [94]. ↵ G. Tkačik and P. R. t. Wolde , Annual Review of Biophysics 54 ( 2025 ). [95]. ↵ A. El Gamal and Y.-H. Kim , Network Information Theory ( Cambridge university press , 2011 ). [96]. ↵ To provide intuition, we’ll briefly outline the idea behind information units, using the bit as an example. A bit is the unit of information that can be stored (capacity) in a data storage device with two (equally addressable) states (i.e., no state is a priori more likely than the other). That is, any state of this storage device provides 1 bit of information. Consequently, a data storage device with N equally addressable states can store log 2 (N) bits of information (which equals the number of binary storage units required to achieve the same capacity). Shannon extended this notion to the information content of data or signals. For a message M ⊥ p(m) with discrete alphabet M the symbol m contains I(m) := −log 2 (p(m)) bits of information. Now, I(m) is the number of equally addressable binary information units required to represent the same amount of information. That is, we would be equally surprised to find this state in a storage device of I(m) bits, if all states were equally likely. in this sense, H(M)/t = ∑ m p(m)I(m)/t is the mean rate of information of M. The inequality in (55) is a standard result [110]. [97]. ↵ A. Klenke , Probability Theory : A Comprehensive Course , 3rd ed. ( Springer International Publishing , Cham , 2020 ). [98]. ↵ G. Genc , Y. E. Kara , H. B. Yilmaz , and T. Tugcu , IEEE Communications Letters 20 , 1729 ( 2016 ). OpenUrl [99]. ↵ A. D. Wyner , IEEE Transactions on Information Theory 34 , 1449 ( 2002 ). OpenUrl [100]. ↵ I. Csiszár and J. Körner , Information theory: coding theorems for discrete memoryless systems ( Cambridge University Press , 2011 ). [101]. ↵ S. Vembu , S. Verdu , and Y. Steinberg , IEEE Transactions on Information Theory 41 , 44 ( 2002 ). OpenUrl [102]. ↵ T. Van Vu , Y. Hasegawa , et al. , Journal of Physics A: Mathematical and Theoretical 55 , 405004 ( 2022 ). OpenUrl [103]. ↵ E. Ziv , I. Nemenman , and C. H. Wiggins , PloS one 2 , e1077 ( 2007 ). OpenUrl CrossRef PubMed [104]. ↵ G. Tkačik , A. M. Walczak , and W. Bialek , Physical Review E—Statistical, Nonlinear, and Soft Matter Physics 80 , 031920 ( 2009 ). OpenUrl [105]. ↵ W. Bialek , I. Nemenman , and N. Tishby , Neural computation 13 , 2409 ( 2001 ). OpenUrl CrossRef PubMed Web of Science [106]. ↵ B. Alberts , A. Johnson , J. Lewis , M. Raff , K. Roberts , and P. Walter , Molecular Biology of the Cell , 6th ed. ( Garland Science , New York, NY , 2017 ). [107]. ↵ G. Kramer , IEEE Transactions on Information Theory 49 , 4 ( 2003 ). OpenUrl [108]. ↵ T. Schreiber , Physical review letters 85 , 461 ( 2000 ). OpenUrl CrossRef PubMed Web of Science [109]. ↵ The elements of L1([0, t], [0, ∞)) are equivalence classes (i.e., sets) of integrable functions, that differ on at most countably many points [97]. the elements of equivalence classes are called representatives or versions. [110]. J. A. T. Thomas M. Cover , Entropy, relative entropy, and mutual information, in Elements of Information Theory ( John Wiley & Sons, Ltd , 2005 ) Chap. 2, pp. 13 – 55 . View the discussion thread. Back to top Previous Next Posted January 21, 2026. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Directed information flow in reaction networks under energy constraints: A framework for communication and optimal design applications Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Directed information flow in reaction networks under energy constraints: A framework for communication and optimal design applications Maximilian Gehri , Lukas Stelzl , Heinz Koeppl bioRxiv 2024.08.17.608427; doi: https://doi.org/10.1101/2024.08.17.608427 Share This Article: Copy Citation Tools Directed information flow in reaction networks under energy constraints: A framework for communication and optimal design applications Maximilian Gehri , Lukas Stelzl , Heinz Koeppl bioRxiv 2024.08.17.608427; doi: https://doi.org/10.1101/2024.08.17.608427 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Systems Biology Subject Areas All Articles Animal Behavior and Cognition (7644) Biochemistry (17728) Bioengineering (13916) Bioinformatics (42037) Biophysics (21489) Cancer Biology (18637) Cell Biology (25553) Clinical Trials (138) Developmental Biology (13401) Ecology (19941) Epidemiology (2067) Evolutionary Biology (24367) Genetics (15622) Genomics (22547) Immunology (17764) Microbiology (40475) Molecular Biology (17208) Neuroscience (88747) Paleontology (667) Pathology (2842) Pharmacology and Toxicology (4834) Physiology (7659) Plant Biology (15175) Scientific Communication and Education (2047) Synthetic Biology (4304) Systems Biology (9835) Zoology (2272)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2024) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00