Cell Trajectory Inference based on Schrödinger Problem and a Mechanistic Model of Stochastic Gene Expression

preprint OA: closed CC-BY-4.0
📄 Open PDF Full text JSON View at publisher
Full text 65,880 characters · extracted from preprint-html · click to expand
Cell Trajectory Inference based on Schrödinger Problem and a Mechanistic Model of Stochastic Gene Expression | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Cell Trajectory Inference based on Schrödinger Problem and a Mechanistic Model of Stochastic Gene Expression Clémence Fournié , Elias Ventre , Ulysse Herbach , View ORCID Profile Aymeric Baradat , View ORCID Profile Olivier Gandrillon , Fabien Crauste doi: https://doi.org/10.1101/2025.11.21.689689 Clémence Fournié 1 Université Paris Cité, CNRS , MAP5, F-75006 Paris, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: clemence.fournie{at}math.cnrs.fr fabien.crauste{at}math.cnrs.fr Elias Ventre 2 COMPutational Pharmacology and Clinical Oncology Department, Inria Sophia Antipolis – Méditerranée, Cancer Research Center of Marseille , Inserm UMR1068, CNRS UMR7258, Aix Marseille University UM105, Marseille, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ulysse Herbach 3 Université de Lorraine, CNRS , Inria, IECL, F-54000 Nancy, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site Aymeric Baradat 4 Univ Lyon, Université Lyon 1, CNRS UMR5208, Institut Camille Jordan , 43 Blvd du 11 Novembre 1918, Villeurbanne-Cedex, F-69622, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Aymeric Baradat Olivier Gandrillon 5 Univ Lyon, ENS de Lyon, Univ Claude Bernard, CNRS UMR 5239, INSERM U1210, Laboratory of Biology and Modelling of the Cell , 46 allée d’Italie Site Jacques Monod, F-69007, Lyon, France 6 Inria Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Olivier Gandrillon Fabien Crauste 1 Université Paris Cité, CNRS , MAP5, F-75006 Paris, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: clemence.fournie{at}math.cnrs.fr fabien.crauste{at}math.cnrs.fr Abstract Full Text Info/History Metrics Preview PDF Abstract Cellular differentiation is the biological process that leads a cell to opt for a particular cellular identity. Recently, single-cell RNA-sequencing has enabled the simultaneous measurement of gene expression levels at specific times for a large number of individual cells and a large number of genes. Repeating such measurements at different time points gives then access to the temporal variation, or transport, of a distribution on a gene expression space. The whole temporal trajectory of distributions thus characterizes the differentiation process at population level, but trajectories of individual cells are still out of reach since most measurement techniques are destructive. The optimal transport theory that has been used so far to infer cellular differentiation trajectories from time-stamped single-cell RNA-seq data involves solving the so-called Schrödinger problem in its most common version. This implies assuming that cells move, in the gene expression space, by diffusion. Yet, real gene dynamics are much more complex. In the present work, we assume that mRNA dynamics are characterized by brief and important production of RNA, with long periods of inactivity in between, and consider the so-called Bursty model of gene dynamics. We use this model to define a reference process for the Schrödinger problem. By comparing the solutions of the Schrödinger problems with a Diffusive and a Bursty reference process, under different conditions, we show that the Bursty model provides a better approximation of the underlying gene dynamics than the standard Diffusive process when inferring cell trajectories. 1 Introduction While two cells in a multicellular organism may possess the same genome, they can express their genes differently, leading to distinct cellular outcomes. Cellular differentiation is the biological process that leads a cell to opt for a particular cellular identity [ 1 ]. In contemporary biology, the cellular identity is defined by the expression of cell markers, mostly surface proteins. Recently, single-cell RNA sequencing (scRNA-seq) has enabled the simultaneous measurement of gene expression levels for up to millions of individual cells and tens of thousand of genes [ 2 , 3 , 4 ]. Thanks to this technique, a cell can be represented in a high-dimensional space, where each dimension corresponds to the expression level of a given gene, so the space dimension equals the number of genes. Repeating such measurements at different time points gives access to the temporal variation, or transport, of a distribution on a gene expression space. The entire temporal trajectory of the distribution characterizes the differentiation process at population level, but trajectories of individual cells are still out of reach since measurement process is destructive [ 5 ]. In practice, gene expression is measured either as the number of mRNA molecules or the level of protein expression. Hence, the above-mentioned gene space is either a mRNA counts space or a protein level space. While the differentiation process is fundamentally a continuous process, most of the experimental measurements provide access only to discrete temporal distributions. In order to study a differentiation process and more specifically to infer differentiation pathways, many works focused on the reconstruction of the continuous transcriptomics evolution based on such distributions. Schiebinger et al. [ 5 ] were pioneers in using the optimal transport (OT) theory to infer such trajectories from time-stamped transcriptomics data, introduced in the Waddington Optimal Transport (WOT) algorithm. WOT enables the reconstruction of a “timeline” between two cell distributions measured at different times. WOT solves the OT problem in a standard way: by minimizing a cost function defined as the squared Euclidean distance between cells in a PCA-reduced space. Since then, several works proposed methods of cell trajectory inference inspired by OT theory [ 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 ]. Authors of OT-based studies chose to work with quadratic OT, which consists in using the squared euclidean distance as a cost in the OT problem (see Section 2.3 ). More specifically, they used the socalled entropic approximation of OT – which is equivalent to solving the Schrödinger problem – in order to use the efficient Sinkhorn algorithm [ 18 , 19 ]. The Schrödinger problem consists in finding the stochastic process that is the closest to a given reference process while fitting the observations. The reference process is in general a Brownian motion, hence it is implicitely assumed that cell trajectories are close to diffusion. In some cases, distances different from the square Euclidean distance have been considered, mostly for taking into account additional information like lineage information [ 11 , 20 ] or coexpression between genes [ 21 ]. Some methods nevertheless used a different cost function when solving the OT problem. For instance, Liu et al. [ 17 ] criticized the use of Euclidean distance when applying OT theory and proposed an implementation of a learned cost function in the OT problem based on the Sinkhorn algorithm [ 18 ]. However, they do not use OT theory to infer cell trajectories, instead they focus on alignment of scRNA-seq datasets. Additionally, the learned cost is non-interpretable, a property that is shared by computational methods based on machine learning. In order to reduce the computational cost and gain interpretability, scEGOT [ 13 ] combines OT with cell state graph. By adding Gaussian mixture distribution on each cell’s dots, scEGOT allows to consider cell clusters instead of individual cells. The Gaussian mixture makes the results interpretable, however their objective are clearly different from ours because the authors use Euclidean distance between Gaussian distributions to infer trajectories, relying once again on a standard approach. The main idea of the present paper is to assert that in order to reconstruct cell differentiation trajectories, a more realistic, quantitative model of stochastic gene expression should be considered as reference process. We focus on solving the Schrödinger problem while using an alternative reference process that would be closer to realistic gene dynamics. Indeed, the Diffusive process generates unrealistic dynamics for proteins and mRNA [ 22 ]. The Schrödinger problem is not seen here as an approximation of a transport problem, but as a way to identify a process close to an a priori quantitative model that would fit the observations. When examined at the single-cell level, gene expression is undoubtedly a stochastic process [ 23 , 24 , 25 , 26 , 27 , 28 ]. The simplest model that can correctly reproduce experimental data distribution is the well-known two-state model of gene expression [ 29 , 30 , 31 ], a refinement of the model introduced in 1991 by Ko [ 32 ]. In this model, a gene is described by its promoter which can be either active or inactive – possibly representing a transcription complex being “bound” or “unbound” although it may be more complicated [ 33 ] – with mRNA being transcribed only during the active periods. Genes may mechanistically influence the activation rate of other genes, thus creating a Gene Regulatory Network (GRN). We consider an extension of the two-state model for several genes, that takes into account the GRN structure. For certain parameter values, mRNA dynamics are characterized by brief and important production of RNA, with long periods of inactivity in between: this regime corresponds to the widely observed phenomenon called transcriptional bursting [ 29 ]. Herbach et al. [ 34 ] introduced a mathematical framework for modeling GRN driven by transcriptional bursting, which we call the Bursty model here. This model has been shown to reproduce realistic gene dynamics at the single-cell level [ 35 , 36 ] and will provide our reference process. In this work we compare the performances of a Diffusive process, associated to standard OT theory, and a reference process based on the Bursty model to solve the Schrödinger problem, when initial and final distributions are based on realistic, simulated data. We first explore the case of a single gene, then of a specific two-gene GRN, the toggle switch, and finally more complex three-gene GRNs. We focus on the influence of some indicators, namely the time difference between the measurements of the two distributions and the number of cells in the distributions. We show that using the Bursty model as a reference process provides a better solution to the Schrödinger problem for all tested networks. 2 Methods 2.1 Gene dynamics We model gene expression using the framework introduced by Herbach et al. [ 34 ], and later simplified in the bursty regime [ 37 , 38 , 39 ]. In this model, a gene is characterized by its amount of mRNA and its level of proteins: mRNA is synthesized through stochastic transcriptional bursts, and degraded at rate d 0 ; proteins are synthesized from mRNA at rate s 1 and degraded at rate d 1 . The Bursty model is a piecewise-deterministic Markov process, with deterministic part given by where M ( t ) is the quantity of mRNA and P ( t ) the quantity of protein at time t , and a stochastic part: mRNA bursts occur at random times with rate k on and have a random size that follows an exponential distribution [ 38 , 39 ]. Within a cell, genes interact with each other via a GRN. Hence, a gene can be activated or inhibited by itself but also by other genes in the GRN. A burst is assumed to occur for gene i at a rate k on,i ( P 1 , …, P G ), where G is the number of genes in the GRN and ( P 1 , …, P G ) is the vector of protein levels of all genes in the GRN. The burst rate of gene i accounts for a basal activity parameter β i and the influence of other genes in the GRN, expressed via a gene-gene interaction matrix θ = { θ ji } j,i ∈{1,…, G } [ 38 , 40 ], so that where k 0, i and k 1, i correspond respectively to the minimum and maximum burst frequencies of gene i , and θ ji is the intensity of gene j action on gene I [ 34 ]. Parameters θ ji can be positive or negative, depending on whether gene j activates or inhibits gene i ( Figure 1 ). Download figure Open in new tab Figure 1: Schematic representation of a two-gene GRN based on the Bursty model of gene dynamics. For each gene i = 1,2, mRNA synthesis is activated with a rate k on,i ( P 1 , P 2 ), defined in (2), and mRNA and protein dynamics follow system (1). Parameters of the GRN are described in the text. In the case of the toggle switch model, the GRN consists of 2 genes that self-activate ( θ 1,1 > 0 and θ 2,2 > 0, green arrows) and inhibit each other ( θ 2,1 < 0 and θ 1,2 < 0, red arrows). Example of GRN: the Toggle Switch We consider a standard GRN made of two genes, the so-called “toggle switch” model. The two genes self-activate and inhibit each other, i.e. This GRN is illustrated in Figure 1 and Figure 2 illustrates the temporal evolutions of mRNA and protein for a toggle switch GRN where gene dynamics follow the Bursty model, computed with parameters values in Table 1 . A deterministic toggle switch model would favor one gene over the other, depending on initial conditions of the system. In this stochastic version of the toggle switch, one observes that one gene activity always prevails, yet the dominant gene switches stochastically. View this table: View inline View popup Download powerpoint Table 1: Default parameter values. Download figure Open in new tab Figure 2: Temporal evolution of protein and mRNA levels for the toggle switch model. Dynamics of one gene are illustrated in yellow, the other in red. Protein levels are displayed on the upper panel, while mRNA dynamics are displayed in the lower panel More generally, by modifying the values of the interaction parameters θ ji in model (1)-(2), any two-gene GRN can be implemented (see, for example, Figure 10 ). Extensions to three-gene GRNs can be straightforwardly implemented (see, for example, Figure 11 ). Simulations of protein dynamics Computations of the temporal evolution of protein and mRNA levels were performed using the Python package Harissa [ 40 ], which implements an exact simulation procedure for the Bursty model [ 37 ] ( Figure 2 ). This allows to simulate one GRN, hence simulating N times the evolution of a GRN results in N independent cell trajectories ( Figure 3 ). Download figure Open in new tab Figure 3: Cell trajectories of four cells driven by a toggle switch. Initial (time t 1 = 0.1h) location of cells is displayed as purple dots, while their final (time t 2 = 0.5h) location is represented by green dots. Each grey line is the temporal trajectory of each cell. Parameters are given in Table 1 . In order to mimic biological experiments aimed at measuring a GRN activity at given observation times, we simulate N times the GRN with initial conditions IC M . and IC P (see Table 1 ), select a simulation time t , and extract all mRNA and proteins concentrations for all genes. Throughout this work, we focus on distributions of protein levels only, yet the case of mRNA distributions is discussed in the last section. We denote by µ the distribution of G protein levels in a population of N cells at time t = t 1 , and ν the distribution of G protein levels in the same population at time t = t 2 , with t 2 > t 1 . They are given by: whereδis the Dirac distribution, , G is the level of gene g ’s protein in cell x i at t 1 and , is the level of gene g ’s protein in cell y j at t 2 . For the sake of simplicity, cells have the same index in distributions µ and ν (i.e., the i -th component of µ is associated to the same cell than the i -th component in ν ). Additionally, the two distributions are jointly normalized (between 0 and 1). Default parameter values are given in Table 1 . 2.2 Reduced model with analytical solution In addition to the model introduced in the previous section, for which couplings will be estimated from simulations, we consider a simplified model ( reduced Bursty model ) that will provide closed-form couplings. This simplification is achieved in two steps. First, a protein-only model is derived from a time scale separation of mRNA and proteins corresponding to d 0 ≫ d 1 (i.e., proteins assumed to be more stable than mRNAs). This model is defined by [ 40 , 41 ]: Where ℰ is the exponential distribution characterizing the random size of each burst (with mean value η ), k on is the burst rate and d 1 the degradation rate of proteins, as before. Second, we consider a particular 1-dimensional case of model (3) with a constant k on : this setting can be interpreted either as a single gene with no feedback, or part of a GRN within a time range sufficiently short so that k on ( P 1 , …, P G ) does not vary significantly during this period. This model turns out to be analytically tractable (see Supplementary File). The time-dependent distribution of P ( t ) knowing P (0) = P 0 is given by Where is the Dirac measure localized at , and ν t has a density function g t ( x | P 0 ) given by where and Φ denotes the confluent hypergeometric (Kummer’s) function of the first kind. For numerical applications, we regularize the singular part in (4) by replacing it with a normal distribution with small standard deviation ε ≪ 1, so that the equality holds when ε → 0. 2.3 Coupling between time points Let’s recall that experimental data would be collections of cell positions in the protein space at distinct time points, and that the objective of cell trajectory inference is to reconstruct cell trajectories in this space. Hence, we restrict the problem to two consecutive time points t 1 and t 2 . Given two collections and , we search to construct a matrix of probabilities Q ij that a cell at position x i at time t 1 reaches the state y j at time t 2 , in the most realistic way. In the following, the matrix Q will be called a coupling between the collections X and Y . By solving the Schrödinger problem (see Section 2.4 ), we look for the coupling that is the closest to a certain reference coupling , obtained using the Bursty model (1), while being compatible with the data X and Y . For either the model (1) or the Diffusive model, the reference coupling R is built by associating to the model a function being an exact or an approximated version of the transition kernel of the underlying theoretical process. Coupling for the reduced Bursty model In the case of a single gene following model (3), we use the regularized analytical solution (6) to compute the probability density of reaching state y starting from x , that is p t ( y | x ) defined by where g t is defined in (5) and Considering this transition kernel p t ( y | x ) and the uniform distribution p ( x ) ≡ 1 /N over initial states ( x 1 , …, x N ), the related reference coupling RB ref ( x, y ), for “reduced Bursty reference process”, is defined as: where the normalizing term is defined so that is a discrete probability distribution over ( y 1 , …, y N ). Coupling for the non-reduced Bursty model Given a GRN whose dynamics are given by a Bursty model (1)-(2), we define B ref ( x, y ) an approximation of its coupling between two collections at two different time points. It is the reference process defined by the probability of reaching y when starting from x , computed from the Bursty model (1)-(2). We estimate B ref using a Gaussian method (illustrated in Figure 4 ). For each cell i in the initial distribution µ at t = t 1 , we simulate the Bursty model (1)-(2) n times with the cell i ’s coordinates as an initial condition. We obtain a distribution denoted at time t = t 2 , representing the potential fates of cell i . An estimated density of is computed using Gaussian approximation, i.e. we create Gaussian distributions centered on cells in the distribution , and we compute the Gaussian kernel of this distribution using the default parameters of the textitgaussian kde() python function. The result is then divided by the total sum to ensure the resulting distribution is a probability distribution. Download figure Open in new tab Figure 4: Schematic of the procedure used for constructing the B ref estimation with a Gaussian method. a . Cells x 1 and x 2 (purple dots) are two cells of the distribution µ at time t 1 in protein space. b . Cells y 1 and y 2 (green dots) are two cells belonging to the distribution ν at time t 2 in the protein space. c . The approximated distribution is computed for n = 6 and cells are displayed as orange circles. d . Gaussian distributions centered on cells generate a smooth distribution (orange areas). e . Probabilities p ( y 1 x 1 ) = 3 /n and p ( y 2 x 1 ) = 2 /n are deduced from the construction process. Coupling for a Diffusive process When the underlying model is a Diffusive process, we consider the natural discrete counterpart used in [ 6 ], that is the reference coupling denoted by D ref is defined by where σ 2 is the diffusion coefficient,∥ x − y ∥is the Euclidean distance (here, in protein space) between x and y , and is the normalizing constant given by The value of the diffusion coefficient was chosen to be both relevant and close to the value used in Schiebinger et al. [ 5 ]. The challenge was to be small enough for the convergence of the Schrödinger problem while using a diffusive process that still predicts couplings ( D ref ( x, y )> 0). 2.4 Schrödinger problem Introduced in 1932 [ 42 ], the Schrödinger problem is an entropic minimization between a stochastic process and a reference process with marginal constraints. This problem has been studied because when the reference process is a Brownian and the diffusion coefficient goes to 0, it converges to the quadratic OT problem [ 43 , 44 ]. Let Π and Ψ be two discrete and finite spaces, e.g. Π = { x 1 , …, x N } and Ψ = { y 1 , …, y N } with N ∈ℕ, the number of cells. We denote by R ∈ 𝒫 (Π×Ψ), the reference process, where 𝒫 (Π×Ψ) is the space of probability measures on Π×Ψ. Since Π and Ψ are discrete spaces of size N , then R is a N × N matrix. We denote by u ∈ 𝒫 (Π) and v ∈ 𝒫 (Ψ) two uniform vectors of size N (each cell has the same weight), and we search for the process Q solution of where H is the relative entropy of Q with respect to R (also called Kullback–Leibler divergence of Q from R ) defined by This quantity will be called entropy throughout this paper for simplicity. The Schrodinger problem (7) can be solved with the Sinkhorn algorithm [ 18 , 19 ]. We solve the Schrödinger problem for the reference processes introduced in Section 2.3 . When R = RB ref , We denote by RB sch its solution, RB sch := Sch ( RB ref ; u, v ). For the reference processes R = B ref , we write We denote by B sch its solution, B sch := Sch ( B ref ; u, v ). When R = D ref , then and we denote D sch its solution, D sch = Sch ( D ref ; u, v ). 2.5 Performances of the models In order to compare the performances of the Bursty model with other models, in particular the Diffusive model of gene dynamics, we rely on the computation of a standard criterion, the entropy (8) between solutions of the Schrödinger problem (7). However, to ensure that results do not depend on the criterion, we also test other criteria measuring the difference between two stochastic processes. Entropy between Schrödinger problem solutions For one-gene GRN, we define i.e. the entropy between B sch or D sch and RB sch . This value characterizes the difference between the ability of a Diffusive process or a Bursty model-based process versus RB sch to predict cell trajectories correctly. The process RB sch is considered as the reference process because it provides an approximation of the exact probability distribution. In parallel, we compute as a control measure in order to estimate the error made when performing stochastic simulations. For two-or-more-gene GRN, we define the entropy between D sch and B sch , in order to evaluate the difference between the ability of a Diffusive process versus a Bursty model-based process to correctly predict cell trajectories. Similarly to the one-gene GRN case above, we introduce to estimate the error made when performing stochastic simulations. Values of H B should be smaller than H D,B values if B ref and B sch are closer than D sch and B sch . If the Bursty model-based reference coupling better predicts individual cell trajectories, the difference between H B and H D,B will be important. Due to stochasticity, we perform 100 iterations to compute these quantities and focus on the mean and standard deviation. Other difference indicators To certify that our results do not depend on the choice of the entropy, we also compute the following criteria, for two processes Q and R : - the Total Variation Distance: - the Jensen–Shannon Divergence: where W = ( Q + R ) / 2; - the Frobenius norm: 3 Results We consider two distributions of protein levels obtained from a fixed number of cells, simulated from the Bursty model (see Section 2.1 ) and measured respectively at times t 1 and t 2 ( t 2 > t 1 ). Noticeably, we focus only on protein levels since protein dynamics, contrary to mRNA dynamics, allow to preserve the markovian property in the Bursty model therefore providing a rigorous framework. We solve the associated Schrödinger problem for several reference processes, including the Bursty process and the standard Diffusive process. Solutions of the Schrödinger problem are compared using an entropy function. If the two processes are similarly able to correctly infer trajectories, the entropy equals or nears zero. When the final distribution is measured a “long” time after the initial distribution, the Bursty model has reached a so-called stationary state, and both initial and final distributions can be considered independent. In this case, trajectory inference should depend less on the selected reference process. Consequently, we focused our study on distributions in transient states, meaning the time difference t 2 − t 1 is small enough and t 1 is close to the initialisation of the simulations, as represented in Figure 3 . Details are provided in Section 2 . 3.1 Schrödinger problem for a single gene We first considered the case of a single gene with no feedback, as a toy model to illustrate the influence of the underlying model of gene dynamics on trajectory inference. Figure 5 shows the solutions of the Schrödinger problem computed with different approximated processes: a Diffusive model, D sch ( Figure 5a ), the non-reduced Bursty model, B sch ( Figure 5b ), and the reduced Bursty model, RB sch ( Figure 5c ). The ground truth ( Figure 5d ) is a coupling of cells with the same index (cell i in the initial distribution is cell i in the final distribution). We observe that RB sch yields the closest result to the ground truth ( Figure 5c ), in the form of a very accurate transition matrix, while D sch yields the furthest ( Figure 5a ). A darker diagonal is observed for B sch ( Figure 5b ), meaning that this process better approximates the ground truth than D sch . Noticeably, a limitation of the representation in Figure 5 is that the coupling could be not expressed (light color) and yet be relevant. For example, if two cells are close in protein space at both t 1 and t 2 , then the coupling between those cells could be mixed up but relevant. Download figure Open in new tab Figure 5: Solution of the Schrödinger problem for a single gene. a . D sch . b . B sch . c . RB sch . d . Ground truth. Each colored square represents the probability of the cell index at t 2 starting from the cell index at t 1 for N = 40 cells: the darker the box, the greater the probability. All matrices sum to 1. Parameters are given in Table 1 . Figure 6 presents the entropy between D sch and RB sch , denoted by H D, R , and the entropy between B sch and RB sch , denoted by H B, R (see Section 2.5 ). The curve H R also appears in Figure 6 , it serves as a control by computing the entropy between the reference process and its approximation. The initial distribution is measured at time t 1 = 0.1 h, and the final distribution is computed at various times t 2 in the range [0.2; 0.5] h. Download figure Open in new tab Figure 6: Entropies between reference processes as a function of the time interval for a one-gene GRN. Mean and standard deviation (straight lines) of entropies H D, R (blue), H B, R (red) and H R (green) are displayed for various values of the time difference t 2 − t 1 , when the gene dynamics model is in transient state. Additionally, minimum and maximum values over 50 iterations (dashed lines) are also displayed. Other parameters are detailed in Table 1 . We observe that quantities H D, R and H B, R decrease as t 2 − t 1 increases and H R remains constant. We also observe that H D, R is always higher than the entropy between B sch and RB sch , and remains much higher than H R even for large values of t 2 − t 1 (here 0.4 h). Entropy H B, R is always larger than H R , yet it is much smaller than H D, R , and for t 2 − t 1 = 0.4 h there is no difference between H B, R and H R . Both the exact temporal distribution of the process and its approximation provide comparable results. The study of this toy model highlights that using the exact temporal distribution of the reference process to solve the Schrödinger problem provides much better results than using the approximation of a gene dynamics model. However, it is a complex, most of the time impossible, task to find the analytical expression of this distribution for a GRN comprising at least two connected genes. Consequently, for a more relevant GRN, we will focus on the estimations provided by the reference process of the non-reduced Bursty model and the Diffusive process. 3.2 Case study: the toggle switch We focused on a two-gene GRN, called ‘toggle switch’, introduced in Section 2.1 and illustrated in Figure 10b (blue GRN). This specific GRN consists of two self-activating genes that inhibit each other. 3.2.1 Influence of the time interval and the number of cells We first investigated the influence of the time between measurements of the initial and the final distributions (i.e. t 2 − t 1 ) on the ability of two models, the Bursty and the Diffusive models, to correctly transport cell distribution. Figure 7a shows values of entropies H D,B and H B as functions of the time difference t 2 − t 1 . The H D,B curve is always above the H B curve and remains approximately constant and independent of the time difference. The entropy between the two processes is the same for all t 2 − t 1 values: whatever the time difference, the Diffusive process infers trajectories with the same uncertainty. For the time difference t 2 − t 1 = 0.2h, the entropy H D,B has a larger standard deviation, due to one extreme value. Download figure Open in new tab Figure 7: Entropies between reference processes for the toggle switch GRN, in the transient state. Mean and standard deviation (straight lines) of ( a .) H D,B (blue) and H B (red) as functions of the time interval t 2 − t 1 , and ( b .) H D,B /N (blue) and H B /N (red) as functions of the number of cells N . Additionally, minimum and maximum values over 100 iterations are represented (dashed lines). The thick vertical grey line in Panel B is the default value N = 100 cells used in other figures. Other parameters are given in Table 1 . Although it is more relevant to focus on the transient state ( Figure 7a ), it may be informative to question what happens in the stationary state. In this case, cells may either remain in their basin of attraction or switch to the other basin of attraction. The basin of attraction is a notion related to deterministic dynamics, consequently in the current framework where we consider stochastic models of gene dynamics it refers to the basin of attraction of the coarse approximation in the form of a system of ordinary differential equations of a piecewise-deterministic Markov process [ 45 ]. Results in the stationary state are illustrated in Supplementary File, they highlight conclusions similar to the ones obtained for the transient state. Although the Diffusive model is less accurate than the Bursty model for inferring trajectories, its ability increases when the final distribution becomes independent of the initial one (later time measurements). Figure 7b illustrates the influence of the number of cells N considered when measuring both initial and final distributions in transient state. To that aim, the entropy has been normalized by the number of cells to avoid a bias due to the size of the collections in the computation of the entropy: the computation of the entropy (8) explicitly depends on the number of cells N . First, we observed that H B /N does not depend on the number of cells. Second, H D,B /N decreases as the number of cells increases. We observed (not shown) that the Diffusive process tends to couple a small sample of cells from the initial collection to all other cells in the final collection. The solution to the Schrödinger problem associated with the reference Diffusive process is then a matrix exhibiting over-coupling for a sample of cells and under- or no-coupling for the rest of the cells. The over-coupled sample is proportionally larger for small numbers of cells, leading to a larger entropy as observed in Figure 7b . Both numerical investigations on the dependence of H D,B and H B on the length of the time interval t 2 − t 1 and the number of cells N highlighted a poor ability of the Diffusive model to reconstruct cell trajectories when the underlying model is the Bursty model of gene dynamics. These results are based on the use of the entropy to measure the differences between solutions of the Schrödinger problem. We then questioned the relevance of the entropy criterion and the parameter values of the Bursty model in reaching this conclusion. 3.2.2 Unspecificity of the entropy indicator and the distribution parameters In Figure 8 , we compared the performance of different indicators when evaluating the difference between the Diffusive and the Bursty process. Figure 8 shows mean and standard deviation of normalized indicators, computed as ( Indic − ControlIndic ) /ControlIndic , where Indic is either H D,B , D TV ( D sch , B sch ), JSD ( D sch , B sch ) or the Frobenius norm ∥ D sch − B sch ∥ F (see Section 2.5 ) and ControlIndic its associated control measure (respectively, H B , D TV ( B ref , B sch ), JSD ( B ref , B sch ) and ∥ B ref − B sch ∥ F ). We observe that for all indicators, the curves are all strictly positive and increasing. So, despite quantitative differences, all indicators yielded the same qualitative conclusion: the Bursty process provides a better trajectory inference than the Diffusive process. Download figure Open in new tab Figure 8: Difference indicators between reference processes as a function of the time interval for the toggle switch GRN. Mean and standard deviation of difference indicators comparing D sch and B sch normalized by the difference indicators comparing B ref and B sch respectively: Entropy (blue), total variation distance (yellow), Jensen–Shannon Divergence (purple), and Frobenius norm (green). For standard deviations < 0.1, error bars are not visible. Parameters are detailed in Table 1 . To challenge the performance of the B ref process, we tested a different Bursty model as reference process than the one used to generate the two distributions. In practice, we computed the initial distribution µ and the final distribution ν using the default parameter values of the toggle switch GRN, but we solved the Schrödinger problem with a different B ref process (different gene interaction matrix). Figure 9a highlights that for all the two-gene-based Bursty models (listed in Figure 9c ), the curves are very close. The H D,B curves are all higher than the H B curves, indicating that any Bursty process, even one with parameter values different from the ones corresponding to the data, provides a better approximation than the Diffusive process. In Figure 9b , one observes that even though H B curves are nearly indistinguishable at early times they become separated as t 2 − t 1 increases. We observed differences when parameter values of the GRN associated with self-activation were reduced (purple and yellow curves and GRN). Download figure Open in new tab Figure 9: Entropy H D,B as a function of the time difference t 2 − t 1 for different Bursty models, in transient state. a . Entropies H B (dashed line) and H D,B (straight line) means and standard deviations. Each color represents a set of interaction parameters of B ref used to compute H D,B (illustrated in Panel c ). b . Zoom in on H B values in Panel a for t 2 − t 1 ∈ [0.2,0.5]. c . Two-gene GRN used to compute B ref . Each scheme depicts interactions between gene 1 and gene 2, with green arrows representing activation ( θ > 0) and red arrows representing inhibition ( θ < 0). The blue-squared GRN is the toggle switch used in previous figures. Parameters values are detailed in Table 1 . Download figure Open in new tab Figure 10: Entropy H D,B as a function of the time difference t 2 − t 1 for various two-gene GRNs. a . Entropy H D,B in transient state, mean and standard deviation. Each color represents a GRN introduced in Panel b .. Parameters are detailed in Table 1 . b .Schematic representations of two-gene GRNs used in Panel a . Green arrows represent activations ( θ > 0) and red arrows represent inhibitions ( θ < 0). Download figure Open in new tab Figure 11: Entropy evolution as a function of the time difference t 2 − t 1 in the transient state for three-gene GRNs. a . H D,B mean and standard deviations (blue line with error bars) as well as min and max values (dash blue lines) and H B mean and standard deviation (red line with error bars) and min and max values (dash red lines), and the associated three-gene GRN. The schematic representation of the GRN highlights positive interactions (activation) between genes. b . H D,B mean and standard deviations (blue line with error bars) as well as min and max values (dash blue lines) and H B mean and standard deviation (red line with error bars) and min and max values (dash red lines), and the associated three-gene GRN (repressilator). The schematic representation of the GRN shows interactions between genes, with activations in green and inhibitions in red. Parameters are detailed in Table 1 . Overall, the Bursty model provides a framework to cell trajectory inference that performs better than standard diffusion. 3.3 Analysis for other GRNs As illustrated in Figure 2 , the toggle switch model of gene dynamics exhibits what can be considered as specific temporal dynamics. We subsequently challenged the Bursty process on other GRNs, first with various two-gene GRNs and then on three-gene GRNs. Results discussed hereafter and illustrated in Figures 10 and 11 have been obtained using various GRNs to compute both the distributions ( µ and ν ) and the reference coupling. This contrasts with the results shown in Figure 9 , where only the reference coupling was modified by the GRNs, not the data. Figure 10 shows the entropy H D,B for different two-gene GRNs. For each GRN, the distributions and the Bursty process were computed from the corresponding Bursty model. The blue curve represents H D,B values computed for the toggle switch. We observed that, whatever the two-gene GRN, dynamics of H D,B were similar, quantitatively and qualitatively: it was mostly constant as the time difference t 2 − t 1 increased, and the variability was higher for small time differences. Figure 11 shows the entropy H D,B for two 3-gene GRNs. In the first one, gene 1 activates gene 2 which activates gene 3 ( Figure 11a ); in the second one, the so-called repressilator model, gene 1 inhibits gene 2, gene 2 inhibits gene 3, and gene 3 inhibits gene 1 ( Figure 11b ). Parameter values are the same than for the toggle switch ( Table 1 ). Figure 11a shows a similar range of variations for H D,B and H B as observed for the two-gene GRNs. Variations about the mean were, however, more important than for the two-gene GRNs, as highlighted by error bars and minimum and maximum values (dashed lines). The H D,B curve remains well above the H B curve, similarly to what has been obtained for two-gene GRNs. The same results held for the Repressilator model ( Figure 11b ), a standard three-gene GRN. Whatever the time difference, the mean of H D,B was always higher than H B . For both GRNs, when t 2 − t 1 was small, the standard deviation of H D,B was larger and the minimum and maximum values were widely separated. The two three-gene GRNs considered generated more differences between the Diffusive and the Bursty processes than the toggle switch model or any 2-gene GRN, as well as more variability. In conclusion, for any two-gene or three-gene GRN, whatever the gene-gene interactions considered, the Bursty process always performed better than the standard Diffusive process in correctly transporting the distribution at time t 1 into the distribution at time t 2. 4 Discussion We explored the possibility that the Bursty model of gene dynamics would represent an alternative to the standard diffusion model used to infer trajectories in OT theory. Based on distributions of protein levels, the Bursty model proved indeed quite efficient in capturing cells trajectories more accurately than the Diffusive model ( Figure 5 ). This would be expected if we considered cells moving around in a Waddington’s landscape [ 46 ] whose shape is constrained by the GRN structure [ 26 , 39 ]. In this case, diffusion could not easily represent the variables acceleration and deceleration due to valleys and hills. Noticeably, we assumed for the sake of simplicity the same number of cells in both initial and final distributions, yet our entire analysis could be extended to the case with different cell numbers by normalizing the distributions. When considering a toy model made of only one gene, we concluded that using the exact time-dependent distribution of the process was the best alternative to diffusion. Although expected, this result indicates that it is possible to very efficiently infer cell trajectories in a transcriptomic space. However, the exact distribution is only explicit for one gene. Extending the reference process RB ref to GRN with multiple genes seems a very challenging task, probably impossible to do for complex gene interactions. An approximation of this distribution could be considered, yet the approximation might rapidly become coarse. For a one-gene GRN, the computation of RB ref and D ref processes is almost instantaneous on a personal computer (64 GB RAM and 13th Gen Intel(R) Core(TM) i7-13700H), whereas the computation of B ref takes approximately 7s (values averaged over 100 simulations). Increasing the number of genes does not impact the computational time for D ref , but it increases for B ref . The computational time is proportional to the number of genes in the GRN with almost no time-variability from one simulation to another, for two-gene up to six-gene GRNs where genes self-activate and inhibit the other genes (similar to the toggle switch model). For two genes, the computational time is 12s, for 3 genes, it is 13s, for 4 genes, 14s, for 5 genes 15.5s and for 6 genes 17s per simulation of the B ref process, on a computational server (High-Throughput Computing plateform with 1 CPU and 2 GiB memories). This provides an optimistic perspective for increasing dimensionality. As mentioned above, all investigations were performed on protein data, even though the Bursty model computes both mRNA and protein levels, which is necessary in order to represent transcriptional bursting and interactions between genes through their expression products (e.g., transcription factors). Although it would have been appropriate to focus our study on mRNA levels, as provided by scRNA-seq datasets, it is not possible to reduce the Bursty model to an mRNA-only model, due to the fact that mRNA are typically much less stable than proteins and thereby would not preserve the Markovian property of the GRN. This, however, makes it possible to reduce the Bursty model to a protein-only model that is well defined, along with quasi-steady-state distributions of mRNA levels conditionally on proteins [ 34 , 37 ]. Moreover, Baradat and Ventre [ 47 ] showed that for such a model it is still possible to define and solve a Schrödinger problem. We therefore decided to focus on distributions of protein levels throughout this work. We nevertheless challenged the Bursty model against RNA data, similarly to what has been done by Schiebinger et al. [ 5 ]. Figure 12 shows that the H D,B curve is higher than the H B curve. Contrary to what was observed with protein level distributions, the H D,B curve decreases as t 2 − t 1 increases, yet remaining far above the control curve. mRNAs are produced and degraded more rapidly than proteins, accelerating the approach to the stationary state. The same conclusions then hold, whether mRNA or protein data are used to infer cell trajectories. Download figure Open in new tab Figure 12: Entropies between reference processes for the toggle switch GRN for mRNA data. Mean and standard deviation (straight lines) of H D,B (blue) and H B (red) are displayed as functions of the time interval t 2 − t 1 . Additionally, minimum and maximum values over 100 iterations are represented (dashed lines). Parameters are given in Table 1 . Therefore, the Bursty model represents a desirable alternative to diffusion for trajectory inference in OT, both when using protein data or mRNA data. In order to use our approach on an experimental dataset, we would need first to infer the GRN that could be used to compute the reference process. For this, we could use CARDAMOM [ 36 ] which computes a Bursty-model GRN and the interaction parameter values from time-stamped scRNA-seq data. For now, degradation and synthesis rates have to be estimated before using the CARDAMOM algorithm but we are currently working on an extension allowing to fully calibrate the model from time-series of singlecell experimental data. Importantly, if the underlying GRN structure is not known, we know from Figure 9 that the method nevertheless correctly catches the trajectories for small GRNs. Although what impact it might have on larger GRNs remains to be explored, the results presented in this article thus pave the way to the extension of GRN inference algorithms to methods combining GRN and trajectory inference in an integrated framework. 5 Code Availability All python codes are available at https://github.com/fourniecl/inference_trajectories_PDMP . Supplementary File 1 Analytical solution for the reduced Bursty model Lets consider the reduced Bursty model where P ( t ) is the protein level at time t , ℰ is the exponential distribution characterizing the random size of each burst (with mean value η ), k on is the burst rate, assumed to be constant, and d 1 is the degradation rate of proteins. We briefly explain how to derive the formula Where is the Dirac measure localized at , and with µ t the probability measure defined by the following density function: where Φ denotes the confluent hypergeometric (Kummer’s) function of the first kind. Hence the probability measure ν t has a density function g t ( x | P 0 ) given by The corresponding Markov process ( X t ) t ≥0 (with values in ℝ + ) is characterized by its infinitesimal generator L defined by [ 1 ]: from which one can derive an evolution equation for the Laplace transform , leading to the general solution [ 1 ]: The first term in this product can be identified as the Laplace transform of , while the second term is the Laplace transform of X t conditional on X 0 = 0 (i.e., ℒ ( X t X 0 = 0)). It turns out that the latter can be inverted explicitly using a classical series known as the confluent hypergeometric function of the first kind (denoted by M ( a ; b ; z )), which is of practical interest since this special function is implemented in standard computing libraries such as SciPy. Manipulation of the series leads to the probability measure where µ t has density function f t ( x ) given in (1), and the final expression (1)-(2) follows. Interestingly, the weight of the singular part (Dirac measure at decreases exponentially with rate k on , in line with the property that in this model, a trajectory ceases to be deterministic precisely when the first burst occurs. 2 The stationary case We studied the influence of the time difference t 2 − t 1 on the solutions of the Schrödinger problem when the system is in a stationary state. To do so, we defined t 1, st = 40 h and t 2, st ∈[ 42 ; 64] h. Results are displayed in Supplementary Figure 1 . Results are similar to the case of the transient state, except that the H D,B curve decreases as t 2, st increases. This shows that despite the Diffusive model being less accurate than the Bursty model to infer trajectories, their performances become comparable when the final distribution becomes independent of the initial one, that is for large t 2, st values. Download figure Open in new tab Supplementary Figure 1: Entropies between reference processes for the toggle switch GRN, in the stationary state. Mean and standard deviation (straight lines) of H D,B (blue) and H B (red) are displayed as functions of the time interval t 2, st − t 1, st . Additionally, minimum and maximum values over 100 iterations are represented (dashed lines). Other parameters are given in Table 1 (main text). 6 Acknowledgments This work has received financial support from the CNRS through the MITI interdisciplinary programs through its exploratory research program (TROPIC project under Action 80PRIME 2023). This work was supported by the PEPR Santé Numérique under Project AI4scMED MultiScale AI for Single Cell-Based Precision Medicine (ANR-22-PESN-0002). We thank the computational center of IN2P3 (Villeurbanne/France), where part of the computations were performed. Funder Information Declared TROPIC project , 80PRIME 2023 EPR Santé Numérique under Project AI4scMED MultiScale AI for Single Cell-Based Precision Medicine , ANR-47122-PESN-0002 References [1]. ↵ Clevers , H. et al. What is your conceptual definition of “cell type” in the context of a mature organism? Cell Systems 4 , 255 – 259 ( 2017 ). OpenUrl PubMed [2]. ↵ Klein , A. M. et al. Droplet barcoding for single-cell transcriptomics applied to embryonic stem cells . Cell 161 , 1187 – 1201 ( 2015 ). URL doi: 10.1016/j.cell.2015.04.044 . OpenUrl CrossRef PubMed [3]. ↵ Tanay , A. & Regev , A. Scaling single-cell genomics from phenomenology to mechanism . Nature 541 , 331 – 338 ( 2017 ). OpenUrl CrossRef PubMed [4]. ↵ Macosko , E. Z. et al. Highly parallel genome-wide expression profiling of individual cells using nanoliter droplets . Cell 161 , 1202 – 1214 ( 2015 ). OpenUrl CrossRef PubMed [5]. ↵ Schiebinger , G. et al. Optimal-transport analysis of single-cell gene expression identifies developmental trajectories in reprogramming . Cell 176 , 928 – 943.e22 ( 2019 ). URL doi: 10.1016/j.cell.2019.01.006 . OpenUrl CrossRef PubMed [6]. ↵ Lavenant , H. , Zhang , S. , Kim , Y.-H. & Schiebinger , G. Towards a mathematical theory of trajectory inference . arXiv preprint ( 2023 ). URL https://arxiv.org/abs/2102.09204.2102.09204 . [7]. ↵ Tong , A. , Huang , J. , Wolf , G. , van Dijk , D. & Krishnaswamy , S. TrajectoryNet: A Dynamic Optimal Transport Network for Modeling Cellular Dynamics . arXiv preprint ( 2020 ). URL http://arxiv.org/abs/2002.04461 . ArXiv:2002.04461 [cs, q-bio, stat]. [8]. ↵ Tronstad , M. , Karlsson , J. & Dahlin , J. S. Multistageot: Multistage optimal transport infers trajectories from a snapshot of single-cell data . arXiv preprint ( 2025 ). URL https://arxiv.org/abs/2502.05241.2502.05241 . [9]. ↵ Prasad , N. , Yang , K. & Uhler , C. Optimal transport using gans for lineage tracing . arXiv preprint ( 2022 ). URL https://arxiv.org/abs/2007.12098.2007.12098 . [10]. ↵ Sha , Y. , Qiu , Y. , Zhou , P. & Nie , Q. Reconstructing growth and dynamic trajectories from single-cell transcriptomics data . Nature Machine Intelligence 6 , 25 – 39 ( 2023 ). URL doi: 10.1038/s42256-023-00763-w . OpenUrl CrossRef PubMed [11]. ↵ Lange , M. et al. Mapping lineage-traced cells across time points with moslin . Genome Biology 25 ( 2024 ). URL doi: 10.1186/s13059-024-03422-4 . OpenUrl CrossRef PubMed [12]. ↵ Zhu , J. , Zhang , K. , Zhang , Z. & Kong , D. Modeling cell type developmental trajectory using multinomial unbalanced optimal transport . arXiv preprint ( 2025 ). URL https://arxiv.org/abs/2501.03501.2501.03501 . [13]. ↵ Yachimura , T. et al. scegot: Single-cell trajectory inference framework based on entropic gaussian mixture optimal transport . bioRxiv ( 2023 ). URL https://www.biorxiv.org/content/early/2023/09/14/2023.09.11.557102 . https://www.biorxiv.org/content/early/2023/09/14/2023.09.11.557102.full.pdf . [14]. ↵ Lotfollahi , M. et al. Predicting cellular responses to complex perturbations in high-throughput screens . Molecular Systems Biology 19 , e11517 ( 2023 ). URL https://www.embopress.org/doi/abs/10.15252/msb.202211517 . https://www.embopress.org/doi/pdf/10.15252/msb.202211517 . OpenUrl CrossRef PubMed [15]. ↵ Bunne , C. , Schiebinger , G. , Krause , A. , Regev , A. & Cuturi , M. Optimal transport for single-cell and spatial omics . Nature Reviews Methods Primers 4 ( 2024 ). URL doi: 10.1038/s43586-024-00334-2 . OpenUrl CrossRef [16]. ↵ Jiang , Q. , Zhang , S. & Wan , L. Dynamic inference of cell developmental complex energy landscape from time series single-cell transcriptomic data . PLOS Computational Biology 18 , 1 – 22 ( 2022 ). URL doi: 10.1371/journal.pcbi.1009821 . OpenUrl CrossRef [17]. ↵ Liu , R. , Balsubramani , A. & Zou , J. Learning transport cost from subset correspondence . arXiv preprint ( 2019 ). URL https://arxiv.org/abs/1909.13203 . [18]. ↵ Sinkhorn , R. Diagonal equivalence to matrices with prescribed row and column sums . The American Mathematical Monthly 74 , 402 – 405 ( 1967 ). URL http://www.jstor.org/stable/2314570 . OpenUrl [19]. ↵ Peyré , G. & Cuturi , M. Computational optimal transport . Foundations and Trends in Machine Learning 11 , 355 – 607 ( 2019 ). OpenUrl CrossRef [20]. ↵ Majima , K. et al. LineageVAE: Reconstructing Historical Cell States and Transcriptomes toward Unobserved Progenitors . bioRxiv ( 2024 ). URL https://www.biorxiv.org/content/10.1101/2024.02.16.580598v1 . [21]. ↵ Du , J. et al. Gene2vec: distributed representation of genes based on coexpression . BMC Genomics 20 ( 2019 ). URL doi: 10.1186/s12864-018-5370-x . OpenUrl CrossRef PubMed [22]. ↵ Magaña-López , G. , Calzone , L. , Zinovyev , A. & Paulevé , L. scboolseq: Linking scrna-seq statistics and boolean dynamics . PLOS Computational Biology 20 , 1 – 25 ( 2024 ). URL doi: 10.1371/journal.pcbi.1011620 . OpenUrl CrossRef [23]. ↵ McCullagh , E. et al. Not all quiet on the noise front . Nat Chem Biol 5 , 699 – 704 ( 2009 ). P39. OpenUrl CrossRef PubMed [24]. ↵ Eldar , A. & Elowitz , M. B. Functional roles for noise in genetic circuits . Nature 467 , 167 – 73 ( 2010 ). P39. OpenUrl CrossRef PubMed Web of Science [25]. ↵ Chalancon , G. et al. Interplay between gene expression noise and regulatory network architecture . Trends Genet 28 , 221 – 232 ( 2012 ). P39. OpenUrl CrossRef PubMed Web of Science [26]. ↵ Huang , S. Cell lineage determination in state space: A systems view brings flexibility to dogmatic canonical rules . PLOS Biol 8 , e1000380 ( 2010 ). P39. OpenUrl CrossRef PubMed [27]. ↵ Niepel , M. , Spencer , S. L. & Sorger , P. K. Non-genetic cell-to-cell variability and the consequences for pharmacology . Curr Opin Chem Biol ( 2009 ). P39 . [28]. ↵ Raj , A. & van Oudenaarden , A. Nature, nurture, or chance: stochastic gene expression and its consequences . Cell 135 , 216 – 26 ( 2008 ). P39. OpenUrl CrossRef PubMed Web of Science [29]. ↵ Suter , D. M. et al. Mammalian genes are transcribed with widely different bursting kinetics . Science 332 , 472 – 474 ( 2011 ). URL https://www.science.org/doi/abs/10.1126/science.1198817 . OpenUrl Abstract / FREE Full Text [30]. ↵ Raj , A. , Peskin , C. S. , Tranchina , D. , Vargas , D. Y. & Tyagi , S. Stochastic mrna synthesis in mammalian cells . PLoS Biol 4 , e309 ( 2006 ). P39. OpenUrl CrossRef PubMed [31]. ↵ Peccoud , J. & Ycart , B. Markovian modelling of gene product synthesis . Theoretical population biology 48 , 222 – 234 ( 1995 ). P40. OpenUrl CrossRef Web of Science [32]. ↵ Ko , M. A stochastic model for gene induction . J Theor Biol . 153 , 181 – 94 ( 1991 ). OpenUrl CrossRef PubMed Web of Science [33]. ↵ Coulon , A. , Chow , C. C. , Singer , R. H. & Larson , D. R. Eukaryotic transcriptional dynamics: from single molecules to cell populations . Nat Rev Genet 14 , 572 – 584 ( 2013 ). P39. OpenUrl CrossRef PubMed [34]. ↵ Herbach , U. , Bonnaffoux , A. , Espinasse , T. & O., G. Inferring gene regulatory networks from single-cell data: a mechanistic approach . BMC Syst Biol 11 ( 2017 ). [35]. ↵ Albayrak , C. et al. Digital quantification of proteins and mrna in single mammalian cells . Molecular Cell 61 , 914 – 924 ( 2016 ). URL doi: 10.1016/j.molcel.2016.02.030 . OpenUrl CrossRef PubMed [36]. ↵ Ventre , E. , Herbach , U. , Espinasse , T. , Benoit , G. & Gandrillon , O. One model fits all: Combining inference and simulation of gene regulatory networks . PLOS Computational Biology 19 , 1 – 28 ( 2023 ). URL doi: 10.1371/journal.pcbi.1010962 . OpenUrl CrossRef [37]. ↵ Gaillard , M. & Herbach , U. Efficient stochastic simulation of gene regulatory networks using hybrid models of transcriptional bursting . Computational Methods in Systems Biology109–125 ( 2025 ). URL doi: 10.1007/978-3-032-01436-8_7 . OpenUrl CrossRef [38]. ↵ Herbach , U. Gene regulatory network inference from single-cell data using a selfconsistent proteomic field . arXiv preprint ( 2021 ). URL https://arxiv.org/abs/2109.14888.2109.14888 . [39]. ↵ Ventre , E. Reverse engineering of a mechanistic model of gene expression using metastability and temporal dynamics . In Silico Biol . 14 , 89 – 113 ( 2021 ). OpenUrl PubMed [40]. ↵ Herbach , U. Harissa: Stochastic simulation and inference of gene regulatory networks based on transcriptional bursting . Computational Methods in Systems Biology 97 – 105 ( 2023 ). URL doi: 10.1007/978-3-031-42697-1_7 . OpenUrl CrossRef [41]. ↵ Friedman , N. , Cai , L. & Xie , X. S. Linking stochastic dynamics to population distribution: An analytical framework of gene expression . Phys. Rev. Lett . 97 , 168302 ( 2006 ). URL https://link.aps.org/doi/10.1103/PhysRevLett.97.168302 . OpenUrl CrossRef PubMed [42]. ↵ Schrödinger , E. Sur la théorie relativiste de l’électron et l’interprétation de la mécanique quantique . Annales de l’institut Henri Poincaré 2 , 269 – 310 ( 1932 ). OpenUrl [43]. ↵ Christian , L. A survey of the schrödinger problem and some of its connections with optimal transport . arXiv preprint ( 2013 ). URL https://arxiv.org/abs/1308.0215.1308.0215 . [44]. ↵ Mariani , M. A gamma-convergence approach to large deviations . arXiv preprint ( 2018 ). URL https://arxiv.org/abs/1204.0640.1204.0640 . [45]. ↵ Ventre , E. et al. Reduction of a stochastic model of gene expression: Lagrangian dynamics gives access to basins of attraction as cell types and metastabilty . Journal of Mathematical Biology 83 ( 2021 ). URL doi: 10.1007/s00285-021-01684-1 . OpenUrl CrossRef [46]. ↵ Waddington , C. H. The Strategy of the Genes ( Allens & Unwin, London, UK , 1957 ). [47]. ↵ Baradat , A. & Ventre , E. Convergence of the Sinkhorn algorithm when the Schrödinger problem has no solution . Annales de la Faculté des sciences de Toulouse: Mathématiques Ser . 6 , 33 , 1297 – 1371 ( 2024 ). URL https://afst.centre-mersenne.org/articles/10.5802/afst.1800/ . OpenUrl References [1]. ↵ Malrieu , F. Some simple but challenging Markov processes . Annales de la Faculté de Sciences de Toulouse 24 , 857 – 883 ( 2015 ). OpenUrl View the discussion thread. Back to top Previous Next Posted November 22, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Cell Trajectory Inference based on Schrödinger Problem and a Mechanistic Model of Stochastic Gene Expression Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Cell Trajectory Inference based on Schrödinger Problem and a Mechanistic Model of Stochastic Gene Expression Clémence Fournié , Elias Ventre , Ulysse Herbach , Aymeric Baradat , Olivier Gandrillon , Fabien Crauste bioRxiv 2025.11.21.689689; doi: https://doi.org/10.1101/2025.11.21.689689 Share This Article: Copy Citation Tools Cell Trajectory Inference based on Schrödinger Problem and a Mechanistic Model of Stochastic Gene Expression Clémence Fournié , Elias Ventre , Ulysse Herbach , Aymeric Baradat , Olivier Gandrillon , Fabien Crauste bioRxiv 2025.11.21.689689; doi: https://doi.org/10.1101/2025.11.21.689689 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Systems Biology Subject Areas All Articles Animal Behavior and Cognition (7636) Biochemistry (17704) Bioengineering (13898) Bioinformatics (41967) Biophysics (21460) Cancer Biology (18599) Cell Biology (25525) Clinical Trials (138) Developmental Biology (13384) Ecology (19909) Epidemiology (2067) Evolutionary Biology (24326) Genetics (15613) Genomics (22512) Immunology (17740) Microbiology (40423) Molecular Biology (17191) Neuroscience (88645) Paleontology (667) Pathology (2835) Pharmacology and Toxicology (4825) Physiology (7646) Plant Biology (15158) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-05-23T02:00:01.238055+00:00
License: CC-BY-4.0