Deep dynamical models of single-cell multiomic velocities predict loss-of-function and rescue perturbations in B cells

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 75,592 characters · extracted from preprint-html · click to expand
Deep dynamical models of single-cell multiomic velocities predict loss-of-function and rescue perturbations in B cells | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Deep dynamical models of single-cell multiomic velocities predict loss-of-function and rescue perturbations in B cells View ORCID Profile Alireza Karbalayghareh , Darko Barisic , Christopher R. Chin , Ari Melnick , Christina S. Leslie doi: https://doi.org/10.1101/2025.04.24.650458 Alireza Karbalayghareh 1 Computational and Systems Biology Program, Memorial Sloan Kettering Cancer Center , New York, NY 10065, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Alireza Karbalayghareh Darko Barisic 2 Department of Medicine, Weill Cornell Medicine, Cornell University , New York, NY 10065, USA 3 Meyer Cancer Center, Weill Cornell Medical College , New York, NY 10065, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Christopher R. Chin 2 Department of Medicine, Weill Cornell Medicine, Cornell University , New York, NY 10065, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ari Melnick 2 Department of Medicine, Weill Cornell Medicine, Cornell University , New York, NY 10065, USA 3 Meyer Cancer Center, Weill Cornell Medical College , New York, NY 10065, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Christina S. Leslie 1 Computational and Systems Biology Program, Memorial Sloan Kettering Cancer Center , New York, NY 10065, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: lesliec{at}mskcc.org Abstract Full Text Info/History Metrics Preview PDF Abstract We present DynaVelo, a generative neural ordinary differential equation (ODE) model that learns the joint dynamics of gene expression and transcription factor (TF) motif activities in evolving cell systems using single-cell multiome data. DynaVelo leverages partial RNA velocity information together with single-cell TF motif accessibility data to improve the modeling of cell state dynamics and identification of TF drivers. We show that DynaVelo recovers the complex and bifurcating in vivo dynamics of wildtype murine germinal center (GC) B cells and reveals how these cell dynamics change under loss-of-function mutations in epigenetic regulators. DynaVelo resolves how TF motif activities evolve along latent time trajectories using analysis of training cells or through generated trajectories from the model. In silico perturbation analysis further enables DynaVelo to infer dynamic and cell-state-specific gene regulatory networks (GRNs), recovering many known TF-to-gene edges in the wildtype GC GRN and predicting those that are disrupted in mutants. Finally, in silico gene and TF perturbations allow both the prediction of cell dynamics under loss-of-function genetic mutations and the identification of TF perturbations to rescue loss-of-function dynamic and immunological phenotypes. DynaVelo therefore provides a powerful new deep learning framework for modeling and perturbing dynamic cell systems by harnessing single-cell multiome data sets. Introduction Recent single-cell technologies have succeeded in measuring multiple genome-wide readouts from the same individual cells, often pairing the transcriptome with epigenomic measurements such as chromatin accessibility, histone modifications, TF occupancy, or the 3D chromatin interactome 1 - 14 . These single-cell readouts profile the phenotypic and epigenetic states of cells at a single snapshot of time. However, cells are intrinsically dynamic systems where complex regulatory mechanisms govern the cellular gene expression and epigenetic programs that determine cell fate over time. Since real-time in vivo measurements of these modalities are generally not feasible, computational methods have been developed to infer the dynamics of cells from single-snapshot datasets. In these approaches, single cells from the same experimental timepoint are viewed as observations sampled from a dynamically evolving system, and each cell is assigned an inferred pseudotime or latent time relative to this presumed dynamic process. One prominent idea to emerge from these efforts was the concept of RNA velocity, initially proposed by Velocyto 15 and later enhanced by scVelo 16 , which models spliced and unspliced reads from scRNA-seq using ordinary differential equations (ODEs) to estimate the rate of change of gene expression in an implicit dynamic process. In particular, RNA velocities are useful for providing pseudotemporal directional information in inferring gene expression trajectories, although velocities are only computable for a subset of genes and may lack robustness 17 , 18 . To date, single-cell multiome data – providing both transcriptomic and chromatin accessibility readouts from the same cells (scRNA+ATAC-seq) – have not been exploited in velocity-based methods for inferring cell dynamics. Importantly, scATAC-seq allows the computation of single-cell TF binding motif accessibilities, which can serve as a proxy for the regulatory activity of the corresponding TFs. Therefore, these TF motif accessibilities and their underlying velocities should provide information beyond RNA expression and RNA velocities and enable improved modeling of dynamic cell state changes. Here we present a deep learning model called DynaVelo for inferring cell dynamics from single-cell multiome data that combines both RNA and TF motif accessibility modalities and predicts shifts in cell-fate trajectories under genetic perturbations. Our main hypothesis is that these two modalities influence each other in dynamic cellular systems: the accessibility of TF motifs, as proxies for TF regulatory activity, affects the RNA expression level of target genes, while the RNA expression of TFs, their cofactors, and other epigenetic regulators modulates downstream changes in chromatin state. DynaVelo trains a generative neural ordinary differential equation (ODE) to jointly model the dynamics of both gene expression and TF motif accessibility at a single-cell level, moving beyond RNA velocity methods like scVelo as well as recent machine learning approaches like Dynamo 19 and scDiffEq 20 that model the dynamics of gene expression alone. We reasoned that DynaVelo’s use of single-cell TF motif activity data would be especially relevant in cellular systems with rapid and complex dynamics where cell state transitions are driven by TFs. Furthermore, to be able to validate our findings, we used a setting where loss of function of epigenetic regulators is known to alter these TF activities and cell fate decisions. We therefore applied DynaVelo to murine germinal center (GC) B cells representing three genotypes: in the wildtype (WT) setting and in GC B cells bearing heterozygous loss-of-function mutations in Arid1a ( Arid1aHet ), a subunit of the canonical BAF chromatin remodeling complex that is commonly somatically altered in human B cell lymphomas 21 , 22 , and Ctcf ( CtcfHet ), a key factor involved in 3D chromatin organization 23 , 24 . Here we found that DynaVelo can recapitulate the complex in vivo dynamics of the GC reaction, generate cells and trajectories de novo from the model, and recover evolving TF activities and a dynamic gene regulatory network (GRN) over latent time. Importantly, DynaVelo can both predict the impact of loss-of-function genetic perturbations and identify TF perturbations that rescue loss-of-function dynamics and immunological phenotypes. Thus, DynaVelo provides a powerful generative ML framework for modeling and perturbing dynamic cellular systems from single-snapshot single-cell multiome data. Results DynaVelo learns a latent neural ODE model for single-cell dynamics from multiomic data DynaVelo is a neural ODE framework for learning cellular dynamics from single-cell multiome data, where paired scATAC-seq and scRNA-seq readouts are available in each cell. As TF motif accessibility is an interpretable cell-level readout derived from chromatin accessibility, we chose first to use chromVAR 25 to obtain a cell-by-TF-activity matrix from the scATAC readout. We also obtain a cell-by-gene matrix from scRNA readout after preprocessing, normalization, and filtering out irrelevant genes ( Methods ). DynaVelo learns to model the joint dynamics of RNA expression and TF motif accessibility in each cell. Since solving these ODEs in the original high dimensional input spaces could be challenging, we first embed the cells, using both the gene expression and TF motif accessibility inputs, in a lower dimensional latent space where we use neural ODEs to model the latent dynamics ( Figure 1a ). Single-cell states and velocities learned in the latent space define a vector field over cells, and these values are mapped again to the observational space. In addition to the latent states and velocities, DynaVelo also learns latent times for all cells, providing an ordering of cells in the joint vector field. Similar to variational autoencoders, DynaVelo uses a loss function to reconstruct the RNA and TF motif matrices so that learned initial (latent time t = 0) latent states and cell-specific latent times do not greatly diverge from their specified prior distributions ( Methods ). Additional components to the loss function are used to (1) enforce the similarity between learned RNA velocities and those provided for a subset of genes by a method like scVelo and (2) ensure consistency between RNA and TF motif velocities ( Methods ). Download figure Open in new tab Figure 1. DynaVelo trains a latent neural ODE model to learn the dynamics of single cells using single-cell multiomic data. a . The cell-by-gene gene expression and cell-by-TF motif accessibility matrices are first embedded into a lower dimensional latent space by encoders. The initial latent state z(0) and latent time t of each cell are predicted in the latent space by neural networks and passed to a neural ODE model, which solves the ODE from time t =0 to time t and starting from z(0). After receiving the latent state z( t ) and velocity v z ( t ), which forms the latent vector field, two decoders map the RNA and motif vector fields to the original RNA and motif space, reconstructing the RNA expression and motif accessibility and predicting the RNA and motif velocities. b . DynaVelo can learn dynamic GRNs using two approaches: Jacobian matrices and in silico perturbations. Dynamic GRNs, unlike static ones, capture cell-state-specific gene regulatory relationship by measuring velocity changes after RNA expression or motif accessibility changes. Jacobians measure derivatives of changes while in silico perturbations measure finite changes. c . In silico perturbation of wildtype samples is used to validate the models by comparing the change in velocities with the velocities of knockout (KO) samples. Similarly, in silico perturbation of KO samples is used to identify candidate perturbation targets to rescue loss of function mutations. Once DynaVelo is trained, the model can be used for multiple downstream tasks. One key application of DynaVelo is learning dynamic and cell-state-specific GRNs. To learn dynamic GRNs, we follow two approaches. The first approach is to estimate how a small change in the expression of one gene locally changes the RNA velocity of another gene through the computation of Jacobian matrices at individual cells ( Methods ). The second GRN inference approach is through in silico perturbation (knockout) of RNA expression values and TF motif accessibility scores and calculation of the resulting change in the RNA and motif velocities. When perturbing TFs, we change both gene expression and TF motif accessibility to the minimum possible value across cells, while for non-TF genes, we only set the RNA expression to the minimum value ( Figure 1b ). Both approaches can yield dynamic GRNs by capturing which individual gene perturbations can lead to changes in RNA velocities of all other genes, conditioned on a specific cell state. Additionally, we can also study the effects of gene perturbations on the TF motif velocities, for example to reveal how epigenetic regulators alter the motif accessibility landscapes across cells ( Figure 1b ). Since DynaVelo provides a functional form of the vector field for the learned cell dynamics, the model can be used with a wildtype single-cell dataset to predict the global effect of (germline or somatic) loss-of-function alterations in disease, or given cells bearing such mutations, identify the best perturbation targets to rescue the loss-of-function phenotypes and restore homeostatic dynamics ( Figure 1c ). For this second task, we perform in silico perturbation of all genes in the mutant system and see which ones change the RNA and motif velocities in a direction similar to the velocities of wildtype cells ( Figure 1c ). DynaVelo learns the complex dynamics of germinal center B cells and their latent times We first applied DynaVelo to a single-cell multiome data set profiling wildtype murine germinal center B cells ( N = 7166 cells). To guide the dynamics, we used RNA velocity estimates from scVelo for the subset of genes with sufficient coverage, called velocity genes. DynaVelo then learned RNA velocities of all genes as well as TF motif velocities, which were visualized by projection to the corresponding RNA expression and TF motif accessibility UMAP spaces ( Figure 2a ). The clear branching of these vector fields into two types of trajectories recapitulated the main well-known dynamics of GC B cells. In one branch, the highly proliferative centroblasts (CB, shown in green) in the dark zone of GCs progress through the cell cycle. In the second main branch, CB cells transition into the centrocytes (CC, shown in blue) in the light zone of the GC. A third important and more difficult trajectory to capture is the recirculation of some CCs from the light zone back into the dark zone, visible as flow lines that pass through the recycling CC population (CC Rec, shown in orange). These recycling CC cells have been presented with antigen by follicular dendritic cells and have received T cell help in the form of survival signals from T follicular helper (TFH) cells, then migrate back to the dark zone to undergo further somatic hypermutation and improve their BCR affinity 26 - 28 . Finally, some CCs exit the germinal center reaction towards prememory B cells (shown in purple) or plasmablasts (shown in cyan, Figure 2a ), which will eventually differentiate into memory B cells and plasma cells. Figure 2b summarizes the major GC dynamics and trajectories together with cell type annotations. Download figure Open in new tab Figure 2. DynaVelo learns the complex dynamics of germinal center B cells and their latent times. a . RNA and motif velocities projected to the UMAP embeddings of RNA expression and motif accessibility, respectively, showing RNA and motif vector fields. The most important cell state transitions are uncovered: CB to CC, CB cell cycle, CC to prememory, CC to plasmablast, CC to CB migration. b . The summarized macro dynamics of GC B cells. c . Latent times of the cells learned by DynaVelo and scVelo shown on both RNA and motif UMAPs. DynaVelo latent times better match the dynamics of GC B cells and identify more biologically reasonable initial cells (i.e. cells with latent time t close to 0). d . DynaVelo is used to simulate trajectories capturing the dynamics of two main types: CB cell cycle and CB→CC transition. The generated RNA and motif trajectories along these simulated cells are embedded along with the input cells into the RNA and motif UMAP spaces. e . Dynamics of two critical GC TFs, Spi1 and Nfkb1 in two simulated trajectories, cell cycle and CB→CC. Solid lines and shades show the mean and standard errors based on 20 generated trajectories. Trends of four modalities are shown for each TF: RNA expression, RNA velocity, motif accessibility, and motif velocity. Velocity curves are the time-derivatives of the state curves. We next plotted the latent times of cells learned by DynaVelo, shown both in RNA expression and motif accessibility UMAP spaces, and compared with scVelo’s latent times ( Figure 2c ) . This side-by-side comparison showed DynaVelo’s latent time better recapitulated GC dynamics than those of scVelo. While both methods identified prememory B cells and plasmablasts as the terminal states, scVelo had a harder time in capturing the latent times of CBs and CCs, as they have highly dynamic transcriptional and chromatin changes, and failed to identify the major bifurcation between cells that progress through cell cycle in the dark zone and those that transition to the light zone ( Figure 2c ). This discrepancy was even more evident in the motif space, where DynaVelo’s latent times provided a meaningful pseudo-ordering while scVelo’s latent times did not ( Figure 2c ). Taken together, these results show that by combining RNA expression and TF motif accessibility spaces, DynaVelo’s latent times outperformed scVelo by better capturing intricate and fast-changing cell-type dynamics within GCs. Since DynaVelo is a generative model, it can be used to generate trajectories starting from or ending at any cell and to generate cells de novo along these trajectories. These generated trajectories can be used to analyze cell state transitions along latent time and determine possible cell fates given a starting cell state. We used DynaVelo to generate two example trajectories, both starting in the CBs, one progressing through the cell cycle inside the dark zone, and the other one transitioning towards CCs in the light zone ( Figure 2d ). These two trajectories are colored by the latent time points used in the neural ODEs and are projected to both RNA expression and motif accessibility spaces. These generated latent trajectories were mapped by the decoders to the original high dimensional RNA expression and motif accessibility spaces to generate 100 cells, which were then embedded together with the observed cells to their respective UMAP spaces ( Figure 2d ). Trajectory generation is a unique feature of DynaVelo that can be used to understand the vector field over cells in both RNA and motif spaces and to examine gene expression and TF motif accessibility dynamics in different trajectory types. For instance, we examined the dynamics of Spi1 and Nfkb1, two critical TFs in GC B cells, along both CB→CC and CB cell cycle trajectories ( Figure 2e ). Here the velocity (RNA and motif) curves are the time derivatives of their corresponding state (RNA expression and motif accessibility) curves. A recent study showed using functional assays that Spi1 is required for transitioning of CBs to CCs and acts as a pioneer factor to open chromatin, recruiting chromatin remodeling complexes and subsequently enabling binding of Nfkb factors 29 . Our results validated these findings by showing that Spi1 and Nfkb1 accessibilities are monotonically increasing from CB to CC, but not in CB cell cycle progression where Spi1 motif accessibility is stable while Nfkb1 is monotonically decreasing ( Figure 2e ). Interestingly, motif accessibility demonstrated more dynamics than RNA expression for these TFs, confirming the importance of including this modality for resolving TF activity in the germinal center. These results show that DynaVelo was able to determine the temporal sequence of TF activity and binding in silico , with the potential to discover novel TF co-dependencies. DynaVelo identifies driver TFs for the main GC trajectory types Having confirmed that DynaVelo identifies major classes of GC B trajectories – CB progression through cell cycle, CB to CC transition, and exits towards prememory and plasmablast – we next delved into each trajectory type more carefully by analyzing dynamic patterns of TF expression and motif accessibility along latent time. To this end, we first partitioned the cells based on their trajectories, sorted them according to their latent time, and plotted the RNA expression and motif accessibility temporal patterns of important GC TFs ( Figure 3a ). We expected that driver TFs for each cell type would likely display monotonic patterns of TF expression and motif accessibility, both increasing and decreasing, within the cell type, and that the temporal order of TF activity might also play a role in cell fate decisions. Download figure Open in new tab Figure 3. DynaVelo identifies driver TFs for key GC trajectories. a .The temporal trends of motif accessibility and RNA expression for essential germinal center TFs across DynaVelo latent time in three main trajectory types: CB→CC→prememory, CB→CC→plasmablast, and CB cell cycle. b .RNA expression and motif accessibility of several important TFs in each GC trajectory type shown respectively on RNA and motif UMAPs. Both RNA expression and motif accessibility of TFs are required to understand their roles in each trajectory. For the prememory cell state, in additional to Spi1/Spib and Nfkb1/2, TFs including Runx1 and Irf8 displayed an increasing pattern, suggesting their possible roles in the progression to CC to prememory ( Figure 3a,b ) 29 , 30 . The RNA expression and motif accessibility temporal patterns of Spi1 and Runx1 are depicted on the corresponding UMAP embeddings in Figure 3b , showing that the accessibility of Spi1 gradually increases from CB to CC to prememory B cells but not in plasmablasts. The RNA expression of Spi1 is also absent in plasmablasts. Notably, the Runx1 motif is not accessible in the GC but becomes accessible at a later stage when committing to the prememory fate ( Figure 3b ). Based on similar DynaVelo analysis, some of the important TFs driving the plasmablast fate include Prdm1, Irf4, and Xbp1 30 ( Figure 3a ). Even though TF motif accessibility is a good proxy for single-cell TF activity, it alone appears insufficient to identify TF drivers; we found that a TF’s RNA expression was also required to accurately infer its role. For example, Prdm1 is accessible in both prememory B cells and plasmablasts but is only expressed in plasmablasts, which shows that Prdm1 is only responsible for plasmablast differentiation ( Figure 3b ). Similarly, Xbp1 demonstrates both higher accessibility and expression in plasmablasts, consistent with the fact that Xbp1 is a plasma cell TF. Contrary to prememory B cells and plasmablasts for which important TFs have been studied, less is known about CB-specific drivers of cell cycle in the dark zone. DynaVelo temporal patterns identified Foxo1, Mef2b, Mef2c, and Pou2f2 as TFs that potentially play a role in CB cell cycle progression ( Figure 3a ), all of which are well-known GC TFs based on the literature 30 . These TFs are expressed in both CBs and CCs, but their motif accessibility increases with CB cell cycle progression and decreases when transitioning towards CCs ( Figure 3a,b ). These observations are consistent with known biology of these TFs, which are important for maintaining the GC reaction but play a limited role in GC cell differentiation towards prememory B or plasmablast fates. Heterozygous mutations in Arid1a and Ctcf disrupt the normal trajectories of germinal center B cells In order to determine if DynaVelo can capture changes in cell-fate dynamics upon perturbation of key regulators of the GC reaction, we next turned from wildtype GC B cells to those with reduced expression of Arid1a or Ctcf , which respectively encode a DNA-binding subunit of the BAF chromatin remodeling complex and the insulator protein Ctcf that acts as both a TF and mediator of 3D chromatin looping. To this end, we applied DynaVelo to two multiome samples, Arid1aHet ( N = 6716 cells) and CtcfHet (N = 4688 cells ) , with heterozygous conditional deletion of Arid1a and Ctcf , respectively, in GC B cells 29 . Latent times learned for cells in these samples were similar to the wildtype samples in that they identified a subset of CBs as root cells and prememory B cells and plasmablasts as terminal cells ( Figure 4a ). The learned RNA velocities captured some of the main GC trajectories including CB cell cycle, CB to CC transition, and differentiation towards prememory B cells and plasmablasts ( Figure 4a ). However, one important expected trajectory was lost in both samples, namely the recirculation of CCs to CBs. This is an important expected trajectory in the WT GCs, as it is necessary for B cell receptor (BCR) affinity maturation prior to differentiation to either memory B cells or plasma cells. This suggests that loss of Arid1a or Ctcf leads to preferential exit of the GC reaction towards the memory B cell compartment, as previously reported for Arid1a 29 . Indeed, the percentage of prememory B cells in the mutant samples (20.09% in Arid1aHet , 25.45% in CtcfHet , and 44.96% in Arid1aHom ) is greater than in the wildtype (5.42%) ( Supplementary Figure S1 ). This indicates that in the mutant samples, GC B cells do not have the chance to undergo several rounds of somatic hypermutation in order to improve their BCR affinity, and consequently they have a faster exit as immature memory B cells. Download figure Open in new tab Supplementary Figure S1 Percentages of cell types in each genotype in mice GC B cells. The percentage of Prememory cells increases in both Arid1a and Ctcf mutant samples. Download figure Open in new tab Figure 4. Heterozygous mutations in Arid1a and Ctcf disrupt the normal trajectories of germinal center B cells. a .RNA and motif velocities of Arid1aHet GC B cells and DynaVelo’s latent times projected to the RNA and motif UMAPs. b .RNA and motif velocities of CtcfHet GCB cells and DynaVelo’s latent times projected to the RNA and motif UMAPs. In both ( a ) and ( b ), the critical recycling of CCs from the light back to the dark zone is lost, leading to either apoptosis or to germinal center exit as immature prememory B cells. c .TF motif accessibility trends of wildtype, Arid1aHet , and CtcfHet GC B cells in two main trajectory types: CB→CC→prememory and CB cell cycle. The dynamics of some TFs change more than others in the mutant GCs, highlighted by red and blue arrows for Arid1aHet and CtcfHet , respectively. d . Motif accessibility of TFs whose temporal patterns change more in Arid1aHet and CtcfHet GCs. To identify key transcription factors disrupted in mutant GCs, we analyzed their altered dynamics in CB cell cycle progression and the CB→CC→prememory trajectory type compared to WT ( Figure 4c ). While many TFs displayed subtle changes in dynamics, several TFs were especially notable in their strong changes in motif accessibility in the Arid1aHet sample, including Foxp1, Bach2, Spi1, and Stat3 ( Figure 4c ). The accessibility of Foxp1 and Bach2 decreases in Arid1aHet in both trajectory types compared to WT ( Figure 4c,d ), while the accessibility of Spi1 decreases in Arid1aHet during CB cell cycle progression ( Figure 4c,d ). The accessibility of Stat3 increases significantly in both CBs and CCs in Arid1aHet compared to WT ( Figure 4c,d ). Additionally, we highlighted Prdm1 and Pax5 as potential TFs whose accessibility respectively increases and decreases in CtcfHet compared to WT, especially during CB cell cycle ( Figure 4c,d ). DynaVelo uncovers dynamic TF relationships in GCs upon loss of chromatin regulators Arid1a and Ctcf So far, we have shown that DynaVelo is able to learn the main trajectories of WT GCs and their potential driver TFs and to identify the disrupted trajectories of mutant GC B cells. We next used DynaVelo to derive dynamic GRNs through examination of Jacobian matrices at cells of each trajectory type or through in silico perturbation of genes. We restricted to showing known gene regulatory relationships from literature 30 that are recovered based on the Jacobian matrix J xx , which measures how the change in RNA velocity of one gene depends on the change in expression of another gene, if TF motif accessibilities are left unchanged ( Figure 5a ). Positive and negative Jacobian values correspond to upregulation and downregulation, respectively. One limitation of this assessment is that known TF-to-gene regulatory relationships, i.e. GRN ‘edges’, from the germinal center literature 30 are reported as static rather than cell-state-dependent and dynamic. DynaVelo is able to extract the dynamics of gene regulatory relationships at the single-cell level and display temporal patterns along latent time in CB cell cycle and CB→CC→prememory trajectories in WT, Arid1aHet , or CtcfHet samples ( Figure 5a,b ). One of the important regulatory edges in the WT GC is Spi1→Bcl6 upregulation 30 , which matches the DynaVelo Jacobian in the WT CB→CC→prememory trajectory type. Notably, this regulatory relationship is lost in both Arid1aHet and CtcfHet in this trajectory type ( Figure 5a ). Download figure Open in new tab Figure 5. DynaVelo learns dynamic GRNs of wildtype GCs and identifies disrupted gene regulatory relationship in Arid1a and Ctcf mutant GC. a .Dynamic GRN edges for wildtype, Arid1aHet , and CtcfHet GCs in two trajectory types (CB→CC→prememory and CB cell cycle) based on analysis of the Jacobian Jxx. The majority of the wildtype dynamic regulatory edges match the known static GC GRN edges but provide more information about cell state in which they hold. The wildtype edge, Spi1→Bcl6 upregulation, is missing both in Arid1aHet and CtcfHet GCs. b .Dynamic GRN edges for wildtype, Arid1aHet , and CtcfHet GCs in two trajectory types (CB→CC→prememory and CB cell cycle) by in silico perturbations. Two important edges are lost in mutant GCs: Foxo1→Cxcr4 upregulation is lost in CCs in Arid1aHet , which is required for the migration of CCs from the light to the dark zone; Bcl6→Bcl2 downregulation is gained in prememory B cells in Arid1aHet , preventing normal differentiation toward memory B cells and priming the cells for apoptosis. c .The regulatory edges Foxo1→Cxcr4 and Bcl6→Bcl2 shown in the RNA UMAPs of wildtype, Arid1aHet , and CtcfHet GCs. d .A minimal static GRN of the wildtype GC is summarized from the dynamic GRN analysis. Red arrows represent upregulation; blue arrows, downregulation. The Bcl6→Bcl2 repressive edge is lost in the WT GC to enable differentiation to memory B cells (shown in blue cross). The edges that are lost or gained in mutant GC B cells are shown in purple and green lines and crosses. A second approach to investigate TF regulatory effects is through in silico perturbation, namely setting both the TF RNA expression and motif accessibility to their minimal values and measuring the change in RNA velocities for all other genes. Note that this perturbation strategy is a finite perturbation while Jacobian is an infinitesimal perturbation, and both the TF’s expression and its motif accessibility are perturbed, while the Jacobian method assumes the motif accessibility is intact. Figure 5b shows several important and known gene regulatory relationships in GCs 30 , along with the cell states/trajectory types in which they hold. Similar to the Jacobian analysis, in silico perturbation identified regulatory edges that are lost in Arid1aHet and CtcfHet mutant GCs. One critical transcriptional GRN edge in wildtype GCs is Foxo1→Cxcr4 upregulation, which is necessary for the maintenance of the GC and migration of CCs to the dark zone 30 . Notably, Foxo1→Cxcr4 upregulation is lost in CCs in Arid1aHet but instead is observed in CB cell cycle, as compared to WT ( Figure 5b,c ). This lost regulatory edge may be a reason why CCs in Arid1aHet lose their capability to migrate back to the dark zone and instead exit as immature prememory B cells, as described in our earlier analysis and experimentally validated 29 . Another important observation is that the absence of Bcl6→Bcl2 downregulation in CCs and prememory B cells in the wildtype GC, which is known to drive normal differentiation towards memory B cells 30 , is reversed in the CCs and prememory B cells of both Arid1aHet and CtcfHet ( Figure 5b,c ). Based on previous work 30 , Bcl6 can promote GC B cell apoptosis through repression of expression of Bcl2, an anti-apoptotic TF. Gain of this negative regulatory relationship may explain why CCs in Arid1aHet and CtcfHet GCs appear to undergo increased apoptosis. To summarize our findings, we assembled a minimal GRN governing the transcriptional regulation of critical GC TFs and highlighted regulatory relationships that are compromised in Arid1aHet and CtcfHet GCs ( Figure 5d ). In silico perturbations of Arid1a and Ctcf mutant GCs identify candidate targets to rescue the loss of function One powerful application of DynaVelo is the ability to perform in silico loss-of-function perturbations and determine their effects on GC dynamics and phenotype. We next aimed to globally assess how accurately we could predict the impact of genetic loss-of-function on RNA and motif velocities, and also to determine which loss-of-function perturbations would rescue the phenotypes of Arid1a and Ctcf loss. Notably, the change in the RNA and motif velocities (delta velocities) after in silico perturbation of Arid1a in the WT DynaVelo model suggests that CCs are committed towards the prememory B cell fate with no way to recycle back to the dark zone ( Figure 6a ), matching our previous analyses of Arid1aHet dynamics. It also appeared that CBs cannot properly progress through the cell cycle in the dark zone and instead transition to CCs in the light zone, which may also help explain why immature memory B cells accumulate in Arid1aHet GCs ( Figure 6a , Supplementary Figure S1 ). Download figure Open in new tab Figure 6. In silico perturbations of Arid1a and Ctcf mutant GCs identify candidate targets for rescuing the loss of function. a .Delta RNA and motif velocities after in silico perturbation of Arid1a show the difference between the velocity vectors before and after perturbation. These delta velocities are projected to the RNA and motif UMAPs, where the arrows indicate the direction toward which the cells are inclined to move after the perturbation. These trajectories show that the cells tend to transition from CB to CC and CC to CB recirculation is lost. b .The genes and TFs for which Arid1a delta velocities match the velocity changes in all cell types when comparing wildtype and Arid1aHet velocities. c .The genes and TFs for which Ctcf delta velocities match the velocity changes in all cell types when comparing wildtype and CtcfHet velocities. d .Number of rescued RNA and motif velocities after in silico perturbation of essential TFs in Arid1aHet . The TFs at the top right are the best perturbation targets for rescuing the loss of function phenotype in Arid1aHet . e .Number of rescued RNA and motif velocities after in silico perturbation of essential TFs in CtcfHet . The TFs at top right are the best perturbation targets for rescuing the loss of function phenotype in CtcfHet . f .Delta RNA and motif velocities after in silico perturbation of Tox in Arid1aHet . These perturbations appear to push some of the CCs back to CBs, potentially rescuing the lost CC→CB recirculation trajectory in Arid1aHet . g .Delta RNA and motif velocities after in silico perturbation of Pou2f2 in CtcfHet . These perturbations appear to push some of the CCs back to CBs, potentially rescuing the lost CC→CB recirculation trajectory in CtcfHet . For a more quantitative assessment, after in silico perturbation of Arid1a and Ctcf, we performed one-sample T tests on the both RNA and motif delta velocities in each cell type to test if the mean is significantly different from zero, where positive and negative T-test statistics correspond to significant velocity increases and decreases, respectively. Then we performed two-sample T tests between Arid1aHet and WT GCs in each cell type to test if the mean velocities are significantly different in two populations. We further identified genes and TFs for which these two T test statistics matched in sign in all cell types ( Methods ). Figures 6b and 6c show genes and TFs identified by this procedure whose predicted RNA and motif velocities increase or decrease in a manner consistent with experimental Arid1a and Ctcf perturbations, respectively. Of special note in Arid1a perturbation is the decrease in RNA velocity of Nfkb2 and increase in motif velocity of Irf8, both together suggesting immature differentiation towards prememory B cells ( Figure 6b ). As a final application of DynaVelo, we performed in silico perturbation of important GC TFs and epigenetic regulators in the mutant GC models and identified which perturbation targets could potentially rescue the loss of function in Arid1aHet and CtcfHet GC B cells ( Figure 6d,e ) . Similar to previous analysis ( Figure 6b,c ) , we counted and plotted the number of ‘rescued’ RNA and motif velocities after each in silico perturbation for Arid1aHet and CtcfHet , meaning that the predicted change in velocity across cell types was consistent with a change toward WT ( Figure 6d , e, Methods ). The perturbations leading to higher numbers of both RNA and motif velocity rescues yielded the nominated targets for rescuing the loss of function phenotype. We identified Tox and Bcor as the potential perturbation targets for rescuing Arid1a loss of function ( Figure 6d ) and Pou2f2, Pax5, and Spi1 as the potential perturbation targets for rescuing Ctcf loss of function ( Figure 6e ). Finally, we plotted the delta RNA and motif velocities after Tox and Pou2f2 in silico perturbations in Arid1aHet GC ( Figure 6f ) and CtcfHet GC ( Figure 6g ) . These delta velocities suggest that some of the CCs can migrate back to the dark zone, an ability that has been lost in Arid1aHet and CtcfHet GCs, and consequently not all the CCs transition to immature prememory B cells. Reassuringly, the nominated perturbation targets in Arid1aHet and CtcfHet are the ones that can apparently bring back the lost capability of CCs to recycle back to the dark zone to improve their affinity maturation ( Figure 6f , g ). Discussion A fundamental goal in biology is to understand the dynamic GRNs that govern cell state transitions in evolving cellular systems and to predict how cell-fate decisions change when the network is perturbed. Since most single-cell genomics technologies are destructive, cell dynamics and GRNs are typically inferred from data at a single snapshot of time, using pseudotime strategies such as those based on RNA velocity estimates. However, these velocity estimates are prone to errors and can generally be computed only for a relatively small subset of genes. Another key challenge is to distinguish causal drivers from non-causal ones, as single-cell data provides a high dimensional (and sparse) space of observables comprising many confounding factors. With the advent of single-cell multiome technology, it is now possible to exploit single-cell TF motif activities derived from scATAC-seq in addition to gene expression and RNA velocity in dynamic models of evolving cellular systems. We developed DynaVelo as a robust generative deep learning model that uses single-cell chromatin accessibility and transcriptomic readouts at a single time point to infer dynamic gene regulation patterns, and we applied this model to dissect the complex in vivo dynamics of WT and mutant GC B cells. DynaVelo uses a neural ODE framework where RNA expression and TF motif accessibility inputs are each mapped to latent spaces in which the cell dynamics are learned. Partial RNA velocities from a tool like scVelo are supplied as input to provide pseudotemporal guidance, and the model learns RNA velocities for all modeled genes as well as TF motif velocities. Through Jacobian analysis and in silico perturbation of TFs, DynaVelo can reveal dynamic GRNs, where single-cell regulatory edges between TFs and target genes depend on cell state, moving beyond traditional GRN inference methods that present a single static network per dataset or cell type. Moreover, DynaVelo provides a powerful generative deep learning framework for modeling and intervening in dynamic cellular systems, enabling the generation of cellular trajectories and the analysis of in silico genetic perturbations at both the expression and motif accessibility levels. Other recent work has used machine learning models on single-cell data to infer continuous vector fields and model cell dynamics. For example, Dynamo 19 infers absolute RNA velocity, predicts cell fates and perturbation outcomes, and extracts regulatory relationships using differential geometry. However, training this model requires single-cell RNA-seq paired with metabolic labeling which is not feasible in many experimental systems. Another recent model scDiffEq uses drift-diffusion modeling of cell dynamics but only uses RNA expression without chromatin accessibility information 20 and thus fails to leverage scATAC-derived TF activity dynamics. By contrast, DynaVelo exploits single-cell multiome to powerfully model how gene expression and chromatin accessibility, as summarized by TF motif activities, influence each other in cell dynamics, enabling the accurate prediction of loss-of-function and rescue perturbations. We first used DynaVelo in the WT GCs to uncover all the main dynamics and trajectories of the GC reaction. GC B cells experience rapid and extensive shifts in their chromatin and transcriptional programs as they cycle back and forth between rounds of proliferative bursting in the GC dark zone and selection in the light zone and exit the GC reaction as memory B cells or plasma cells 31 . These rapid cell-fate shifts are dictated by constantly evolving chromatin patterns, presenting a challenge to existing trajectory and velocity inference algorithms. DynaVelo recovered all the expected dynamics of GCs and the driver TFs of CB cell cycle progression, CB to CC transition, and prememory, and plasmablast exits. We showed that Spi1 and Nfkb1 are two driving TFs required for CBs to transition to CCs and then to prememory B cells, consistent with previous findings 29 . Next, we applied DynaVelo to mutant GCs and identified that the main lost transition in both Arid1aHet and CtcfHet GC dynamics is the migration from the light zone back to the dark zone, a critical step required for B cell affinity maturation before differentiation to prememory B cells or plasmablasts. As a result, CCs either undergo apoptosis (dead-end trajectories) or transition towards immature prememory B cells. This is in accordance with the observation that the percentage of prememory B cells is higher in the mutant GCs and that Arid1a haploinsufficiency in the GC leads to a fast exit as immature prememory B cells 29 . Through analysis of TF motif accessibility along inferred latent time, we identified TFs whose dynamics were disrupted in mutant GCs, determining that Bach2, Foxp1, and Spi1 become less accessible and Stat3 becomes highly accessible in Arid1aHet . The change in the dynamics of CtcfHet is more subtle than Arid1aHet , and of note is a decrease in Pax5 and increase in Prdm1 accessibility. Beyond resolving latent time ordering of cells, DynaVelo learns a functional form of the vector field underlying cell dynamics, enabling the inference of dynamic GRNs in the GC through Jacobian and in silico perturbation analysis. We showed that DynaVelo recovered TF-gene regulatory relationships in the WT GC that matched known biology and identified which regulatory edges are lost or gained in mutant GCs. We found that two important TF-gene edges are lost in the mutant setting. First, upregulation of Cxcr4 by Foxo1 is essential for migration of CCs in the light zone back to the dark zone, and this regulatory edge is lost in Arid1aHet CCs, leading to fast and immature exit from the GC. Second, Bcl6 downregulation of Bcl2 must be suppressed in CCs for differentiation towards prememory B cells 30 , as seen in the wildtype GC, but this negative regulatory relationship is gained in both mutants, leading to higher apoptosis. Finally, we used in silico perturbations in the mutant GCs to identify candidate perturbation targets potentially leading to rescue of the loss of function in Arid1aHet and CtcfHet , with Tox and Bcor as potential candidates for Arid1aHet and Pou2f2, Pax5, and Spi1 as candidates for CtcfHet . DynaVelo thus provides a powerful generative neural ODE framework for modeling cell dynamics from single-cell multiome data, enabling identification of TF drivers of cell state transitions, inference of dynamic GRNs, and prediction of loss-of-function and rescue genetic perturbations. Methods scATAC data processing We used ArchR 32 to analyze our scATAC-seq data. We inputted the fragment files of our seven GC samples: three WTs, one Arid1aHet , one CtcfHet , and two Arid1aHom . ArchR tiles the genome with the resolution of 500bp and then calculates the highly variable tiles. We used 25000 highly variable genes and LSI dimensionality reduction to bring the dimension to 30. We then used these clusters to create pseudo-bulk replicates and run the peak calling algorithm by MACS2 33 . Motif matrices from chromVAR After getting the peak matrix from ArchR, we ran chromVAR 25 to get the TF motif accessibility z scores in each single cell. We then smoothed these z scores over 15 nearest cells on the KNN graph derived from LSI dimensions. We used the Vierstra motif set 34 in chromVAR. This yielded the motif matrix for ∼2100 motifs. To filter the motifs with non/low-expressed TFs, we only kept the motifs whose TFs are expressed in at least 500 cells. As the TF motifs could be degenerate and multiple motifs could correspond to one TF, we only kept one motif per TF by choosing the pair that has the maximum Pearson correlation between the motif accessibility scores and RNA expression values. At the end, we ended up with a motif matrix for ∼200 motifs which was used as an input for the DynaVelo model. scRNA data processing We used Scanpy 35 to read the CellRanger outputs for all seven samples. We filtered the cells by keeping the ones with minimum gene count of 200, minimum UMI count of 1000, maximum UMI count of 10000, and percentage of mitochondrial genes less than 25%. We next did total count normalization to the median value of all cells and log1p-normalized (log(x+1)) them. Finally, we kept 2000 highly variable genes to be used in the DynaVelo model. Spliced and unspliced counts We ran velocyto 15 to read the CellRanger outputs and get the spliced and unspliced counts through loom files for each sample. We used Scanpy to read these loom files and merged the spliced and unspliced matrices to the RNA anndata, ready to be used by scVelo for RNA velocity estimates. Estimated RNA velocities by scVelo We used scVelo to get the estimated RNA velocities. Since cells in each sample might have different dynamics, we ran scVelo per sample. scVelo needs sufficient spliced and unspliced counts to have a reliable RNA velocity estimation. We followed the scVelo pipeline and kept the genes that have more than 10 shared spliced and unspliced counts. This filtering step usually got rid of ∼20,000 genes. We normalized the count, spliced, and unspliced matrices based on the median values and log1p-normalized the count matrix. When running scVelo, we specified considering all the genes not only the highly variable ones. The reason for this choice was to have access to maximum number of velocity genes, the genes for which scVelo provides RNA velocity estimates. Depending on the sample, the number of velocity genes was in the range of ∼400 to ∼1000 genes. Genes used in DynaVelo models We chose four subsets of genes to be used in the DynaVelo models: highly variable genes (2000 genes), velocity genes (∼400-1000 genes), expressed TFs (∼200 genes), and genes of interest (∼20 genes). Highly variable genes carry more information about the cell state transitions. Velocity genes provide additional directional information to guide the learned dynamics. We also included the TFs that were expressed in at least 500 cells to be included in the GRNs as well as the genes of interest in GCs such as Arid1a which is an important epigenetic regulator often mutated in the lymphomas. We used the union of these four subsets (∼3000-4000 genes) as our modeled genes. DynaVelo model details chromVAR assigns cell-wise TF motif accessibility scores by genome-wide scanning of accessible peaks for TF motifs. Given the crucial role of TFs in regulating target genes within dynamic systems, we hypothesize that integrating TF motif accessibility scores could enhance the understanding of cellular dynamics across transcriptomic and epigenomic spaces. If we denote log-normalized RNA expression values of m genes in a cell at the latent time t by x ( t ) ∈ R m , and chromVAR z-scores of k TFs in the same cell by y ( t ) ∈ R k , we model the cellular dynamics by a joint ODE as: where f x and f y are functions that model the dynamics (velocities) of RNA expression and TF motif accessibility, respectively. Here, we have full information of x ( t ) and y ( t ) and partial information of RNA velocities dx ( t )/ dt (for “velocity genes”, for which a method like scVelo provides an estimate), and we want to learn the dynamics functions f x and f y and the latent times, t , of the cells. After learning the dynamics functions, we will recover the RNA velocities of all genes (including non-velocity genes) and define the TF motif velocities dy ( t )/ dt to reveal important information regarding the dynamics of the chromatin landscape. Instead of working with ODEs in the original high-dimensional spaces, we define an equivalent joint ODE in low-dimensional latent spaces and , and then map them to the original spaces. We set the default values d x = d y = 50 in the models. The rationale behind this is two-fold. First, we would like to have velocity-aware latent spaces that will be informative in illustrating dynamical patterns of the data. Second, since the high-dimensional data are noisy and sparse, learning the dynamics functions in the original spaces is computationally costly and less robust. We define the joint ODE in the latent space as: where z x ( t ) and z y ( t ) are respectively the latent space of RNA expression and TF motif accessibility of a cell at the latent time t , and and are respectively the dynamics functions of z x and z y . If we denote the concatenations of these vectors by z( t ) = [z x ( t ), z y ( t )] and , we can write the joint latent ODE as , which has the solution and is known as the initial value problem (IVP). Since we do not know the functional form of f z , we define it as a neural network, which leads to a neural ODE introduced in the seminal work 36 . Neural ODEs are very powerful and efficient models for learning dynamics. After learning the joint cellular dynamics in the latent space, we map them back to the original spaces of RNA expression and motif accessibility by conditional distributions: where g x and g y are two neural network decoders mapping the latent space to the original spaces, and and are the known variances of the Gaussian distributions (we assume . We define the RNA and motif velocities as the time derivatives of µ x ( t ) and µ y ( t ): where and are respectively the Jacobian matrices of the decoder functions g x and g y . We assume that the initial points of cells are unknown in order to have a general framework for less studied and more complex biological systems such as GC B cells. Therefore, we assume that z(0) is a random variable with the prior standard Gaussian distribution z(0)∼ N (0, I ). We also assume that the latent times of cells t follow a prior Beta distribution t ∼ Beta (2,2), as the times should be positive. This leads to the expectation E [ t ] = 1 and variance Var [ t ] = 0.05. As such, the problem is to learn the posterior probability distributions of z(0) and t for each cell, the dynamics function f z , and the decoder functions g x and g y . To this end, we use variational inference techniques by first assuming approximate posterior distributions for z(0) and t and then using inference networks (or encoders) to learn the parameters of those approximate posteriors, as shown in Figure 1a . We assume the same family of distributions for the approximate posteriors as the priors, that is, , and t | x ( t ), y ( t )∼ Beta (α t , β t ), where and β t are encoder neural networks that take in the inputs x ( t ) and y ( t ) and output the corresponding parameters of the approximate posterior distributions. In addition to reconstruction loss, we use a KL divergence loss function to ensure that the learned initial (latent time t = 0) latent states and cell-specific latent times do not greatly diverge from a standard normal distribution and a Beta distribution, respectively. There are two additional loss functions used in DynaVelo. The first one is a cosine similarity loss function for the RNA velocities, where any reliable RNA velocity method, like scVelo, is used to provide velocity estimates for a subset of genes and supervise the corresponding learned RNA velocities. The second loss function is a velocity consistency loss to ensure that RNA and TF motif velocities lead to similar trajectories of cells in their respective latent spaces. The total loss function has the following form: where the first term is the reconstruction loss of RNA expression and TF motif accessibility which maximizes the log-likelihoods: The second term is RNA velocity loss which maximizes the cosine similarity of predicted velocities and scVelo velocities for the velocity genes: where G v and denote respectively the indices of velocity genes as reported by scVelo and the estimated RNA velocities by scVelo. The third term is velocity consistency loss which ensures that cells in two RNA and motif spaces follow consistent trajectories: where and store respectively the cosine similarities of RNA and motif velocity vectors of the cells i and j . The velocity consistency loss minimizes the distance between cosine similarity of cells in both RNA and motif spaces and thereby helps retain consistent trajectories in both spaces. The fourth term is regularization loss which minimizes the Kullback–Leibler (KL) divergence between the approximate posterior and prior distributions of z(0) and t : Generating trajectories using DynaVelo Since DynaVelo is a generative model, it can be used to generate cell trajectories. Given any cell { x ( t ), y ( t )}, DynaVelo first maps it to the initial latent state z(0) = [z x (0), z y (0)] and finds its latent time t , which is simply the mean of the posterior Beta distribution: . For generating trajectories, we start from z(0) at time t = 0 and solve the ODEs up to the time t = 1 with the step size of t step = 0.01: which generates a latent trajectory T z = [z(0), z( t 1 ), , … , z( t 100 )] with the length of 101 (including the initial point). We then map this latent trajectory to the observational spaces of RNA expression and motif accessibility using their corresponding decoders µ x ( t ) = g x .z x ( t )0 and µ y ( t ) = g y (z y ( t )) to generate the trajectories T x = [ µ x (0), µ x ( t 1 ), , … , µ x ( t 100 )] and T y = [ µ y (0), µ y ( t 1 ), … , µ x ( t 100 )]. To quantify the uncertainty of these trajectories, we sample N = 50 initial latent states from the learned posterior distributions and and generate N latent trajectories and N corresponding RNA expression and motif accessibility trajectories and for n = 1, … , N . We then calculate the mean and standard error of the mean at each time point of the RNA trajectories as (the calculation is the same for motif accessibility trajectories): where σ( T x ) is the standard deviation at teach time point: Finally, we calculate the 95% confidence interval of the mean trajectory as: Calculating the Jacobian matrices for dynamic GRNs We can calculate the Jacobian matrices, evaluated at individual cells, that represent the derivatives of the RNA and TF motif velocity outputs with respect to the RNA expression and TF motif accessibility inputs. This results in four Jacobian matrices in each cell showing how: (1) RNA expression affects RNA velocity; (2) RNA expression affects motif velocity; (3) TF motif accessibility affects RNA velocity; and (4) TF motif accessibility affects motif velocity. Specifically, once we have learned the velocity functions f x and f y in the joint ODE model, we can define four Jacobian matrices for the cell c as: for c = 1, … , n cells , where J xx and J yx show how the RNA expression of a gene can affect all other RNA and motif velocities, respectively, and J xy and J yy show how the motif accessibility of a TF can affect all other RNA and motif velocities. This allows us to understand the gene regulation in a dynamic manner as these Jacobians are cell specific. Since, all the reported GRNs in the literature are static rather than dynamic, we can calculate summary statistics of these Jacobian matrices in any cell states to get the static cell-state-specific GRNs. For example, we can get the transcriptional GRNs in the cell state S as: To calculate the Jacobians, we use the definition of partial derivatives: for c = 1, … , n cells , where e j is the unit vector of j -th element in either x ( c ) or y ( c ) . We use a small value ϵ = 10 −4 to estimate these Jacobian matrices. In silico perturbations To do in silico perturbation of i -th non-TF gene, we set its RNA expression to the minimum value over the population of all cells and calculate the change in both RNA and motif velocity vectors (delta velocity) as: for c = 1, … , n cells . To do in silico perturbation of i -th TF, we set both its RNA expression and motif accessibility to the minimum value over the population of all cells and calculate the delta velocities as (if the i -th element in both x and y vectors correspond to the same TF): for c = 1, … , n cells . These delta velocities could be projected to any lower dimensional spaces such as UMAP to see what the behavior of cells would be and how their trajectories would change after any perturbation. If we look carefully, we can see that these delta velocities are similar to the Jacobian matrices in that they measure the change in velocity functions after any perturbations. The main difference is that the perturbations are infinitesimal in Jacobians but finite in delta velocities. As a result, we can build two delta velocity matrices: to be used for the dynamic GRN inference. The reason for the negative sign is because the perturbations were knockout (setting to minimum values) instead of over-expression (setting to maximum values), where there is no need for the negative sign. These delta velocities can be used to understand both transcriptional and epigenetic regulation in each cell c = 1, … , n cells , or any cell sate S j : for j = 1, … , n states , where n states is the number of cell states/types. We should note that the dynamic GRNs derived by Jacobians and delta velocities might not be identical due to the nature of perturbations performed in each approach. Calculating RNA and motif velocity rescues Here we describe the details of how we count the number of RNA and motif velocities that are rescued after any TF perturbation in Figure 6 . Let and be the RNA and motif velocity vectors of the mutant sample and and the RNA and motif velocity vectors of the WT sample. We do in silico perturbation of the TF i in the mutant DynaVelo model and perform a one-sample t-test on the delta velocity vectors in each cell type and , to test if the mean of delta velocities are positive or negative, meaning if this perturbation increases or decreases the velocities in cell type S j . To see if the direction of change in velocities after in silico perturbation matches the direction towards the WT sample, we perform another two-sample t-test between the velocity vectors of WT and mutant samples in cell type S j , i.e. and . Now we calculate the number of rescued RNA and motif velocities as the number of genes and motifs that match the sign between and t and between and in all cell types S j . In a nutshell, this shows how much a TF perturbation in a mutant sample can shift its RNA and motif velocities towards the RNA and motif velocities of the WT sample. Similarly, we can perform an in silico perturbation of the TF i in the WT sample, predict the delta velocities, and check how much they shift towards the velocities of the mutant sample. To this end, we perform the one-sample t-tests and and two-sample t-tests and in all cell types cell type S j , j = 1, … , n st a tes . Now we calculate the number of genes for which and match the sign for all S j , and the number of motifs for which and match the sign for all S j . Higher numbers of matches mean more correct velocity predictions after in silico perturbations. Data availability We have used seven multiome samples: Ctcf-29 (WT), Arid1a-C3 (WT), Arid1a-D3 (WT), Arid1a-M3 ( Arid1aHet ), Arid1a-M1 ( Arid1aHom ), Arid1a-C2 ( Arid1aHom ), and Ctcf-30 ( CtcfHet ). The first six samples are available on the NCBI Gene Expression Omnibus via GSE237990, and the last sample ( CtcfHet ) via GSE294051. Code availability https://github.com/karbalayghareh/DynaVelo includes the code package and a tutorial for training DynaVelo models. It also includes Python notebooks to generate the figures of this paper. Author contributions A.K. developed DynaVelo, performed all the analyses, and co-wrote the paper with C.S.L.; C.R.C. annotated the cell types of the GC B cells; D.B. generated the GC B cell multiome datasets; C.S.L. and A.M. supervised the research; C.R.C., D.B., and A.M. provided biological interpretation of computational results. Competing interests A.K., D.B., C.R.C., and C.S.L. declare no competing interests. A.M. has research funding from Janssen, Epizyme and Daiichi Sankyo. A.M. consulted for Exo Therapeutics, Treeline Biosciences, Astra Zeneca, Epizyme. Acknowledgements A.K. and C.L. are funded by IGVF U01 NIH/NHGRI HG012103. D.B. is supported by R00 CA246080. C.R.C. is supported by NIH-NIAMS T32AR071302. Funding NIH, , HG012103 , CA246080 , T32AR071302 References ↵ Zhou , T. et al. GAGE-seq concurrently profiles multiscale 3D genome organization and gene expression in single cells . Nature Genetics 56 , 1701 – 1711 ( 2024 ). doi: 10.1038/s41588-024-01745-3 OpenUrl CrossRef Mimitou , E. P. et al. Multiplexed detection of proteins, transcriptomes, clonotypes and CRISPR perturbations in single cells . Nature Methods 16 , 409 – 412 ( 2019 ). doi: 10.1038/s41592-019-0392-0 OpenUrl CrossRef PubMed Stoeckius , M. et al. Simultaneous epitope and transcriptome measurement in single cells . Nature Methods 14 , 865 – 868 ( 2017 ). doi: 10.1038/nmeth.4380 OpenUrl CrossRef PubMed Ma , S. et al. Chromatin Potential Identified by Shared Single-Cell Profiling of RNA and Chromatin . Cell 183 , 1103 - 1116 .e1120 ( 2020 ). doi: 10.1016/j.cell.2020.09.056 OpenUrl CrossRef PubMed Chen , S. , Lake , B. B. & Zhang , K. High-throughput sequencing of the transcriptome and chromatin accessibility in the same cell . Nature Biotechnology 37 , 1452 – 1457 ( 2019 ). doi: 10.1038/s41587-019-0290-0 OpenUrl CrossRef Cao , J. et al. Joint profiling of chromatin accessibility and gene expression in thousands of single cells . Science 361 , 1380 – 1385 ( 2018 ). doi: 10.1126/science.aau0730 OpenUrl Abstract / FREE Full Text Mimitou , E. P. et al. Scalable, multimodal profiling of chromatin accessibility, gene expression and protein levels in single cells . Nature Biotechnology 39 , 1246 – 1258 ( 2021 ). doi: 10.1038/s41587-021-00927-2 OpenUrl CrossRef Bartosovic , M. & Castelo-Branco , G. Multimodal chromatin profiling using nanobodybased single-cell CUT&Tag . Nature Biotechnology 41 , 794 – 805 ( 2023 ). doi: 10.1038/s41587-022-01535-4 OpenUrl CrossRef Bartosovic , M. , Kabbe , M. & Castelo-Branco , G. Single-cell CUT&Tag profiles histone modifications and transcription factors in complex tissues . Nature Biotechnology 39 , 825 – 835 ( 2021 ). doi: 10.1038/s41587-021-00869-9 OpenUrl CrossRef Zhu , C. et al. Joint profiling of histone modifications and transcriptome in single cells from mouse brain . Nature Methods 18 , 283 – 292 ( 2021 ). doi: 10.1038/s41592-021-01060-3 OpenUrl CrossRef Angermueller , C. et al. Parallel single-cell sequencing links transcriptional and epigenetic heterogeneity . Nature Methods 13 , 229 – 232 ( 2016 ). doi: 10.1038/nmeth.3728 OpenUrl CrossRef PubMed Clark , S. J. et al. scNMT-seq enables joint profiling of chromatin accessibility DNA methylation and transcription in single cells . Nature Communications 9 , 781 ( 2018 ). doi: 10.1038/s41467-018-03149-4 OpenUrl CrossRef PubMed Zhu , C. et al. An ultra high-throughput method for single-cell joint analysis of open chromatin and transcriptome . Nature Structural & Molecular Biology 26 , 1063 – 1070 ( 2019 ). doi: 10.1038/s41594-019-0323-x OpenUrl CrossRef PubMed ↵ Peterson , V. M. et al. Multiplexed quantification of proteins and transcripts in single cells . Nature Biotechnology 35 , 936 – 939 ( 2017 ). doi: 10.1038/nbt.3973 OpenUrl CrossRef PubMed ↵ La Manno , G. et al. RNA velocity of single cells . Nature 560 , 494 – 498 ( 2018 ). doi: 10.1038/s41586-018-0414-6 OpenUrl CrossRef PubMed ↵ Bergen , V. , Lange , M. , Peidli , S. , Wolf , F. A. & Theis , F. J. Generalizing RNA velocity to transient cell states through dynamical modeling . Nature Biotechnology 38 , 1408 – 1414 ( 2020 ). doi: 10.1038/s41587-020-0591-3 OpenUrl CrossRef PubMed ↵ Gorin , G. , Fang , M. , Chari , T. & Pachter , L. RNA velocity unraveled . PLOS Computational Biology 18 , e1010492 ( 2022 ). doi: 10.1371/journal.pcbi.1010492 OpenUrl CrossRef PubMed ↵ Bergen , V. , Soldatov , R. A. , Kharchenko , P. V. & Theis , F. J. RNA velocity—current challenges and future perspectives . Molecular Systems Biology 17 , e10282 ( 2021 ). doi: 10.15252/msb.202110282 OpenUrl CrossRef PubMed ↵ Qiu , X. et al. Mapping transcriptomic vector fields of single cells . Cell 185 , 690 - 711 .e645 ( 2022 ). doi: 10.1016/j.cell.2021.12.045 OpenUrl CrossRef PubMed ↵ Vinyard , M. E. , Rasmussen , A. W. , Li , R. , Getz , G. & Pinello , L. scDiffEq: drift-diffusion modeling of single-cell dynamics with neural stochastic differential equations . bioRxiv , 2023.2012.2006.570508 ( 2023 ). doi: 10.1101/2023.12.06.570508 OpenUrl Abstract / FREE Full Text ↵ Centore , R. C. , Sandoval , G. J. , Soares , L. M. M. , Kadoch , C. & Chan , H. M. Mammalian SWI/SNF Chromatin Remodeling Complexes: Emerging Mechanisms and Therapeutic Strategies . Trends in Genetics 36 , 936 – 950 ( 2020 ). doi: 10.1016/j.tig.2020.07.011 OpenUrl CrossRef PubMed ↵ Kadoch , C. et al. Proteomic and bioinformatic analysis of mammalian SWI/SNF complexes identifies extensive roles in human malignancy . Nature Genetics 45 , 592 – 601 ( 2013 ). doi: 10.1038/ng.2628 OpenUrl CrossRef PubMed ↵ Phillips , J. E. & Corces , V. G. CTCF: Master Weaver of the Genome . Cell 137 , 1194 – 1211 ( 2009 ). doi: 10.1016/j.cell.2009.06.001 OpenUrl CrossRef PubMed Web of Science ↵ Rao , Suhas S. P. et al. A 3D Map of the Human Genome at Kilobase Resolution Reveals Principles of Chromatin Looping . Cell 159 , 1665 – 1680 ( 2014 ). doi: 10.1016/j.cell.2014.11.021 OpenUrl CrossRef PubMed Web of Science ↵ Schep , A. N. , Wu , B. , Buenrostro , J. D. & Greenleaf , W. J. chromVAR: inferring transcription-factor-associated accessibility from single-cell epigenomic data . Nature Methods 14 , 975 – 978 ( 2017 ). doi: 10.1038/nmeth.4401 OpenUrl CrossRef PubMed ↵ De Silva , N. S. & Klein , U. Dynamics of B cells in germinal centres . Nature Reviews Immunology 15 , 137 – 148 ( 2015 ). doi: 10.1038/nri3804 OpenUrl CrossRef PubMed Mesin , L. , Ersching , J. & Victora Gabriel D. Germinal Center B Cell Dynamics . Immunity 45 , 471 – 482 ( 2016 ). doi: 10.1016/j.immuni.2016.09.001 OpenUrl CrossRef PubMed ↵ Victora , G. D. & Nussenzweig , M. C. Germinal Centers . Annual Review of Immunology 40 , 413 – 442 ( 2022 ). doi: 10.1146/annurev-immunol-120419-022408 OpenUrl CrossRef ↵ Barisic , D. et al. ARID1A orchestrates SWI/SNF-mediated sequential binding of transcription factors with ARID1A loss driving pre-memory B cell fate and lymphomagenesis . Cancer Cell 42 , 720 – 722 ( 2024 ). doi: 10.1016/j.ccell.2024.03.012 OpenUrl CrossRef PubMed ↵ Laidlaw , B. J. & Cyster , J. G. Transcriptional regulation of memory B cell differentiation . Nature Reviews Immunology 21 , 209 – 220 ( 2021 ). doi: 10.1038/s41577-020-00446-2 OpenUrl CrossRef PubMed ↵ Mlynarczyk , C. , Fontán , L. & Melnick , A. Germinal center-derived lymphomas: The darkest side of humoral immunity . Immunological Reviews 288 , 214 – 239 ( 2019 ). doi: 10.1111/imr.12755 OpenUrl CrossRef PubMed ↵ Granja , J. M. et al. ArchR is a scalable software package for integrative single-cell chromatin accessibility analysis . Nature Genetics 53 , 403 – 411 ( 2021 ). doi: 10.1038/s41588-021-00790-6 OpenUrl CrossRef PubMed ↵ Zhang , Y. et al. Model-based Analysis of ChIP-Seq (MACS) . Genome Biology 9 , R137 ( 2008 ). doi: 10.1186/gb-2008-9-9-r137 OpenUrl CrossRef PubMed ↵ Vierstra , J. et al. Global reference mapping of human transcription factor footprints . Nature 583 , 729 – 736 ( 2020 ). doi: 10.1038/s41586-020-2528-x OpenUrl CrossRef PubMed ↵ Wolf , F. A. , Angerer , P. & Theis , F. J. SCANPY: large-scale single-cell gene expression data analysis . Genome Biology 19 , 15 ( 2018 ). doi: 10.1186/s13059-017-1382-0 OpenUrl CrossRef PubMed ↵ Chen , R. T. Q. , Rubanova , Y. , Bettencourt , J. & Duvenaud , D. K. Neural ordinary differential equations . Advances in neural information processing systems 31 ( 2018 ). View the discussion thread. Back to top Previous Next Posted April 26, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Deep dynamical models of single-cell multiomic velocities predict loss-of-function and rescue perturbations in B cells Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Deep dynamical models of single-cell multiomic velocities predict loss-of-function and rescue perturbations in B cells Alireza Karbalayghareh , Darko Barisic , Christopher R. Chin , Ari Melnick , Christina S. Leslie bioRxiv 2025.04.24.650458; doi: https://doi.org/10.1101/2025.04.24.650458 Share This Article: Copy Citation Tools Deep dynamical models of single-cell multiomic velocities predict loss-of-function and rescue perturbations in B cells Alireza Karbalayghareh , Darko Barisic , Christopher R. Chin , Ari Melnick , Christina S. Leslie bioRxiv 2025.04.24.650458; doi: https://doi.org/10.1101/2025.04.24.650458 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Systems Biology Subject Areas All Articles Animal Behavior and Cognition (7636) Biochemistry (17697) Bioengineering (13896) Bioinformatics (41957) Biophysics (21457) Cancer Biology (18595) Cell Biology (25522) Clinical Trials (138) Developmental Biology (13381) Ecology (19903) Epidemiology (2067) Evolutionary Biology (24323) Genetics (15612) Genomics (22511) Immunology (17738) Microbiology (40401) Molecular Biology (17184) Neuroscience (88624) Paleontology (667) Pathology (2833) Pharmacology and Toxicology (4825) Physiology (7644) Plant Biology (15158) Scientific Communication and Education (2046) Synthetic Biology (4296) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00