Pairwise Causal Discovery in Biochemical Network: A Survey on Directionality Inference within Complex Networks from Stationary Observations

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 53,475 characters · extracted from preprint-html · click to expand
Pairwise Causal Discovery in Biochemical Network: A Survey on Directionality Inference within Complex Networks from Stationary Observations | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Pairwise Causal Discovery in Biochemical Network: A Survey on Directionality Inference within Complex Networks from Stationary Observations Nava Leibovich , Miroslava Cuperlovic-Culf doi: https://doi.org/10.1101/2025.07.22.666141 Nava Leibovich a National Research Council of Canada, NRC-Fields Mathematical Sciences Collaboration Centre , 222 College street, Toronto, ON, M5T 3J1, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: Nava.Leibovich{at}nrc-cnrc.gc.ca Miroslava Cuperlovic-Culf b Digital Technologies Research Centre, Bldg M50, National Research Council Canada , 1200 Montreal Road, Ottawa, ON, K1A 0R6, Canada c Department of Biochemistry, Microbiology and Immunology, Faculty of Medicine University of Ottawa , 451 Smyth Road, Ottawa, ON, K1H 8M5, Canada d Ottawa Institute of Systems Biology, University of Ottawa , 451 Smyth Road, Ottawa, ON, K1H 8M5, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Preview PDF Abstract Metabolic networks map complex biochemical reactions within organisms, which is crucial for understanding cellular processes and metabolite flow. This study focuses on inferring the directionality of interactions in metabolomics networks. Given the challenge of using steady-state data, we benchmark various methods, including statistical scores and neural network approaches, on synthetic yet realistic biological models. Our findings highlight the relative success of a few methods in some cases where the interaction mechanism is known, whereas other methods show limited effectiveness. 1. Introduction Metabolic networks function as intricate maps of biochemical reactions within an organism, illustrating the complex web of interactions that govern cellular processes. Understanding the directionality of these reactions is crucial for tracking the flow of metabolites through pathways and for illuminating the functional dynamics of cellular metabolism. This information is relevant across various fields, including systems biology, biotechnology, and drug discovery [ 1 – 5 ]. In this context, we concentrate on the interpretation of the metabolomics network. The metabolome consists of hundreds of correlated compounds, offering substantial information within the metabolomics network that extends beyond the mere levels of individual metabolites [ 6 – 8 ]. Focusing solely on individual metabolites can obscure potential intervention targets due to issues with collinearity and confounding factors; however, network models can effectively incorporate these interrelationships within the metabolome [ 6 – 10 ]. The significance of biological networks in human diseases has been widely acknowledged [ 11 , 12 ], particularly in their role in identifying causal associations [ 13 ]. Interaction networks are typically inferred using time series measurements or pseudo-time trajectories, employing statistical tools such as Bayesian inference and maximum likelihood alongside machine learning algorithms [ 14 – 24 ]. Notably, a wide range of recent studies have explored methods for inferring networks from temporal data, as highlighted in the partial list as follows [ 25 – 35 ]. However, it is important to note that these methods require recorded synchronized ordered data, which is often unmeasurable in various observational contexts. Here we aim to infer the direction of interactions from stationary data associated with the analyzed dataset, see Fig. 1 . In studies of biochemical reaction networks, one can derive the functional connectivity, which provides valuable insights into the statistical dependencies arising from the collective dynamics of interactions between pairs of units [ 36 – 42 ]. These statistical dependencies among components can be characterized by well-known measures such as correlation, mutual information, or their analyses using silencing methods [ 40 – 44 ]. However, these measures are symmetric and consequently do not capture the direction of interactions, resulting in the inference of non-directed networks. In contrast, non-symmetric metrics, such as partial correlations and dependency analyses, necessitate recorded data from all other interacting variables within the network [ 45 – 49 ]. Such comprehensive data is often unavailable in many systems. Download figure Open in new tab Figure 1: The main goal is to infer the direction of interaction between two variables within a metabolic network. Our focus is on methods designed to uncover the directional influence between these two variables. However, determining the orientation of the network from static data poses a great challenge, necessitating keen analytical and computational techniques. In particular, we recognize that the complete metabolic network under consideration has not been fully characterized. As a result, neither the overall network topology nor other confounding variables have been adequately described or measured. Consequently, we focus on methods designed to infer the pairwise causal direction between two correlated variables. Furthermore, we emphasize that earlier benchmark analyzes of pairwise inference methods [ 50 – 52 ] evaluate performance with respect to various causal mechanisms between pairs of variables, without taking into account the context of interaction networks in biological processes. Such networks can introduce intricate influences due to signal propagation, even when the two variables do not directly interact with one another. Determining the network orientation from steady-state data is therefore challenging. First, we examine various methods for deciphering interaction directions using steady-state data and highlight their domains of failure and success. We survey both statistical scores which allow inference from small data sets, and methods based on machine learning and neural network approaches which require large data sets for training the models. We tested these methods on multiple interaction mechanisms, with and without additional noise. 2. Main Results As mentioned, we aim to infer the direction of interaction, within a partially observed interaction network between biochemical components, to gain insights into some biological processes. To do so, we benchmark existing methods to infer directionality from pairwise data. Our contribution lies in the following. Previous comparisons between methods collate various interaction mechanisms, combining linear and non-linear effects, and additive and multiplicative noise. Nevertheless, these previously examined mechanisms do not necessarily capture the complexity of biochemical reaction networks; which possess multiple confounders that may remain unknown, including biochemical cycles and interaction loops, complex signal propagation throughout the network, and which the pairwise interactions mechanism is unknown. 2.1. Benchmarking Methods for Analyzing Synthetic Data We use several interaction mechanisms within complex networks. Specifically, we use the Michaelis-Menten model, which describes gene interaction networks [ 53 ], as well as networks of coupled Rössler oscillators, and coupled Goodwin oscillators which represent a prototypical biological oscillator that characterizes various biological processes such as circadian clocks [ 54 , 55 ]. Further details on these models can be found in Appendix A . The interaction networks are random graphs following the Erdös–Rényi model. In Fig. 2 we show the observations of pairs for the interaction mechanisms considered. Given that both variables are affected by additional influences from other network variables, along with inherent noise for each variable which also propagates throughout the network, determining the direction of the interaction is highly challenging. Download figure Open in new tab Figure 2: Realizations of interacting variables’ pairs ( y i , y j ) for different interaction mechanisms; Good-win, Rössler, and Michaelis-Menten models (left to right correspondingly). For consistent visualization, we present the direction of interaction y i → y j for all panels, where for each we sample N = 10 2 data points. We investigated a range of methods, including those grounded in statistical scores, stacking techniques, and artificial neural networks (NNs). The statistical scores we analyzed include the Additive Noise Model (ANM), Conditional Distribution Similarity (CDS), Information Geometric Causal Inference (IGCI), and Regression-Error-based Causal Inference (RECI). Additionally, we examined various stacking methods, such as voting, eXtreme Gradient Boosting (XGBoost), and a recently introduced Causal Ensemble Learning method based on Support Measure Machines (CEMM). Furthermore, we explored several proposed deep neural network architectures. A comprehensive overview and description of these inference methods can be found in Table 1 and in Appendix B . The implementations were based on [ 56 ] wherever applicable. View this table: View inline View popup Download powerpoint Table 1: Pairwise interaction orientation methods. a For uniform notation we assume that the variable x affects the variable y , i.e. x → y . The noise term is marked with η . b Abbreviations: NN = neural networks, RHHS = reproducing kernel Hilbert space, DAG = directed acyclic graph, FiLM = Feature-wise Linear Modulation, XGBoost = eXtreme Gradient Boosting, SMM = Support Measure Machine The techniques that have been tested provide a rich composition of methods, each incorporating different approaches to inference. This variety offers a solid foundation for benchmarking assessment. However, we have not examined all possible methods that have been proposed in the literature. Certain methods were excluded for several reasons, including limitations in computational capacity, the unavailability of published code from the relevant articles, and a lack of applicability to our data. Specifically, some methods require information that is inaccessible or involve data types that do not align with the data of our interest. As mentioned, the methods are reviewed in Table 1 and Appendix B . 2.1.1. Performance on Random Synthetic Networks We show in Fig. 3 that for the biochemical reaction networks we examined, most methods we assessed struggle with direction determination, as no method presents excellent accuracy, say beyond 0.9. This somewhat inferior accuracy is expected, especially for the statistical scores, since they rely on strong assumptions about the statistical characteristics of the data, which are not necessarily complied within our systems. In general, accuracy around 0.5 indicates a random choice between two equally probable cases, while accuracy lower than that implies systematic errors yielded in the statistical scores from their unfulfilled assumptions. Download figure Open in new tab Figure 3: Results for the tested directionality inference methods for various interaction mechanisms. However, we found that some methods perform better than others. In particular, the NCC method yields results that surpass the baseline of random choice located at 0.5, with accuracies ranging between 0.7 to 0.9. We find that the NCC results exceed other methods for various interaction mechanisms and noise scales that were considered. We further investigate insights from these results in the next subsections. Interestingly, for the Rössler coupled oscillators, we found that the CEMM, XG-Boost, and CDS methods produced relatively successful results, albeit falling short when compared to the NCC method (see Fig. 3 ). Among the stacking methods, XGBoost and CEMM demonstrated accuracy that places them secondary to NCC, achieving an approximate accuracy of 0.7. Additionally, for the Rössler coupled oscillators, the statistical score from CDS exhibits similar accuracy for larger values of N . In contrast, for the other interaction mechanisms we examined, the CDS, CEMM, and XGBoost stacking methods, along with the other inference approaches tested—excluding NCC—failed to accurately infer the direction of the interaction. Moreover, our findings indicate some dependence on the noise level. We tested the performance for three noise levels. For data synthesized with the Goodwin mechanism, a higher noise level seems preferable, and success was found for the NCC method. Other dynamic mechanisms show comparable results for all noise levels tested. Additionally, the finite number of sampled data points, N , induced an additional measurement noise from the finite sampling itself. For small N , most methods show accuracy close to 0.5, which signifies the chance accuracy of a dummy classifier for the two possible directions. As N increases, some methods improve their accuracy, while for others the accuracy stays at the random chance or even decreases below it, due to the systematic errors that emerged from the statistical assumption. Given that observations are typically in the size of 10 2 − 10 3 samples, we do not consider a larger set of data points. 2.1.2. Further inspection of the NCC model To expand the analyses even further, we have tested the NCC method as a candidate for a directionality inference method. We first examine the performance of the model trained on a given dataset drawn from one mechanism, to infer interaction direction from another mechanism, see Fig. 4 . As expected, each NCC model performs better on the data mechanism it was trained on. Obviously, for a trained model to be effective, the training data must be similar to the data aimed to infer. Nevertheless, in the biological processes under question, one does not necessarily have such information. Download figure Open in new tab Figure 4: Testing data vs trained NCC models. As expected, using training and testing datasets from different mechanisms is ineffective. We also trained an additional model using simulated data generated with randomly selected interaction mechanisms, referred to as ‘mixed’. Unlike before, we did not specify particular mechanisms. This new model yielded slightly improved results, exceeding the random accuracy for two different mechanisms we examined. The analysis also considers the correlation coefficients of Kendall’s τ and Spearman’s ρ , along with the mutual information between the two variables. These measures evaluate the observed dependence between the two variables in question. Higher values suggest that the dynamics of one variable depend on the other. We found that a strong dependence between the variables results in a higher probability of successfully determining the interaction direction, see Fig. 5 . In our synthetic data, all examined edges represent defined interactions between the nodes, where one variable categorically influences the other. However, weak interactions may be empirically detected due to sampling errors, the propagation of noise from other parts of the network, and the influence of various confounding factors on the dynamics of both variables. Download figure Open in new tab Figure 5: Dependence quantities influence the probability for meaningful training of the NCC model and thus to a successful inference. Results for the Kendall τ coefficient (left), Spearman ρ coefficient (middle), and mutual information (right) show that stronger dependence between the two variables corresponds to increased success rates in determining the direction of interaction between them. 2.1.3. Model Performance for Realistic Biological Systems We test the applicability of the above models for inference of the interaction direction within realistic biological processes. To do so, we simulate these biological processes and test the performance of the inferred interaction direction. Specifically, we first use a previously proposed biologically realistic system - a model that is based on the E. coli gene regulatory network (GNR) provided in the DREAM challenge [ 57 – 59 ]. In addition, we simulated the Bile Acid Synthesis Pathway (BASP) model specified in [ 60 ] for describing part of the cholesterol metabolism. Finally, we simulate sphyngolipid metabolism using computationally determined enzyme kinetic parameters and a detailed metabolic network based on KEGG: map00600. The details of all models are provided in Appendix A . Testing the models on synthetic yet realistic biological process data enables us to evaluate the anticipated performance of the inference method in a controlled environment before applying it to actual data of interest. We utilize our pre-trained NCC models and apply them to the biological processes mentioned above, see Fig. 6 . Our simulation results indicate that the inference approaches explored demonstrate limited success in elucidating the interactions within the examined biological network. Download figure Open in new tab Figure 6: Performance for Realistic Biological Systems. The models used for direction inference are NCC with training data as specified in each panel. Models are detailed in Appendix A 3. Discussion and Summary Inferring the direction of interaction between two components within a complex bio-chemical network from time-independent observations is an important, yet challenging, task. One key significance of inferring directed biochemical networks in human health research lies in the ability to control and recover metabolic performance in faulty or sub-optimally operating cells, such as those with mutations [ 5 ]. Additionally, it is essential to understand how signal transduction networks influence cancer cell behavior, including proliferation, survival, invasiveness, and drug resistance[ 3 ]. Therefore, the study of biological interaction networks, and in particular the inference of the direction of the biochemical reactions, is essential as it may eventually lead to new therapeutic approaches and personalized treatments. Here we examined various methods to determine the interaction directionality while considering the characteristics of the biological observations of metabolites; the small data set, many variables that are unknown or unobserved, the multiplicity of confounders, the noise propagation through the network, the unknown dynamics mechanism between the variables, and more. Since many variables are undetermined, we concentrated on pairwise directionality inference. We compared various approaches and found that they are limited in effectively determining the direction of influence. This limitation may stem from unfulfilled assumptions, such as the presence of directed acyclic graphs or the introduction of additional noise. Furthermore, the complexity can increase due to different interaction mechanisms, multiple undetermined confounders, and various sources of noise. Therefore, we suggest that methods for determining interaction orientation should be specifically tailored to the system being studied whenever possible. The inference of pairwise directionality from multiple snapshots, especially in the absence of temporal information, should be approached with caution. In particular, one must be wary of interpreting the interaction directionality as indicative of ‘causality.’ This term refers to scenarios where direct changes or interventions in one variable influence the state of another variable. Consequently, ‘causality’ is typically discussed in the context of temporal data, where direct interventions and their subsequent effects can be observed over time. However, while some argue that ‘causality’ should only apply to temporal data, others extend its definition to include an interpretation based on static observations - this dissension remains open for further research. Supplementary Materials Appendix A. Synthetic Data generation For numerical demonstrations, we generate a random directed graph Ĝ that holds the topology of the network. Mathematically, the graph is described by the adjacency matrix with elements G ij = 1 where the state of j affects the dynamics of node i , i.e., j → i , and G ij = 0 otherwise. We examined systems with Erdös Rényi random networks similarly to [ 64 ]. The synthetic data is generated as follows. First, we generated a long temporal trajectory of ( y i , y j ) following Here Ĝ is the adjacency matrix of the Erdös Rényi random interaction network. The function Int[ y i , y j ] defines both the mechanism of the interaction, the effect of variable j ≠ i upon the dynamics of variable i , and the self-regulation, the effect of the level y i on oneself dynamics. The noise term can be either additive noise or multiplicative noise. We note that we do not aim to determine the interaction mechanism Int[ y i , y j ], and we do not subject ourselves to any specific form of it. Moreover, we note that Int[ y i , y j ] defines only direct interaction between the variables. However, indirect effects might present as well, especially where the networks we examine might be cyclic graphs due to the nature of the biochemical process. Appendix A.1. Models Continuum Michaelis–Menten Regulatory Network The dynamic in the Michaelis–Menten (MM) regulatory network is given by the where the interaction function for a given edge is given by where we choose between the activation (top) and suppression (bottom) randomly, with equal probability to either choice. For variables that are not affected by any other variable, i.e. ∑ j G ij = 0, we use; f = 1. The argument of half-maximum rate is determined as K = 1. All variables are self-regularized by the term − β i y i , where β i represents the degradation rate of molecule i and is chosen in the simulation to be drawn from Gaussian distribution with mean 1 and noise scale of 0.1. Coupled Goodwin Oscillators The dynamic for the triplet ( x i ( t ), y i ( t ), z i ( t )) is given by the where the numeric simulation took place with the parameters K = 1, a i = b i = c i = 0.4, v o = 1, n = 17 as the same as [ 64 ]. Rössler Oscillators The dynamic for the triplet ( x i ( t ), y i ( t ), z i ( t )) is given by the where the numeric simulation took place with the parameters K = 1, a i = 0.1, b i = 1 and c i = 18.0 for every i as the same as [ 34 ]. Bile Acid Synthesis Pathway The Bile Acid Synthesis Pathway (BASP) is modeled by with parameters given in Table below. View this table: View inline View popup Download powerpoint The variables are the concentrations of the intercellular cholesterol [IC], bile acid [BA], the Cholesterol 7 α Hydroxylase [C7H], and the Returned Bile Acids [RBA]. The ‘external signal’ is modeled by uniform distribution, means D ∼ Uniform[0, 1]. Gene Regulatory Interactions in E. coli As mentioned we also examine more biologically realist model which is based on a subset of an E. coli gene regulatory network and was provided in the DREAM challenge [ 57 – 59 ]. There, the in-silico network inference challenge investigated how well gene networks can be deduced from simulated data. The network is derived as subgraphs from the recognized E. coli and S. cerevisiae gene regulation networks [ 72 ]. Meaning that the results presented are thus biologically realistic, i.e., aiming to capture a reasonable network, but not given from a real observation, and the gene indexes are thus arbitrary. The E. coli gene regulatory network is defined and simulated with the following equations: with parameters given in Table below. View this table: View inline View popup Download powerpoint These parameters are taken from [ 57 – 59 ]. The ‘external disturbance’ is modeled by uniform distribution, means D, U ∼ Uniform[0, 1]. Sphyngolipid Metabolism The model includes reactions involved in sphyngomyelin hydrolysis, de novo synthesis of ceramide, and the salvage pathway, where we are specifically following three fatty acid chain molecular species. Specific reactions included in the model, as well as enzymes included in the model, are shown in Figure 6 (C). The abbreviations used in this figure are provided as follows. CER: Ceramide, HEXCER: Hexosylceramide, PC: Phosphatidylcholine, S1P: Sphinganine-1-phosphate, SM: Sphingomyelin, SPH: Sphinganine. The model aims to include the number of enzymes shown to be involved in these reactions (following reaction information obtained from KEGG: map00600, UNIPROT, and Rhea), and does not represent any specific biological situation. The enzyme kinetic is calculated using the method developed by Kroll et al. [ 73 ]. The kinetic rate for each enzyme involved in the reaction is calculated separately for specific reactant and product pairs, where the model uses the enzyme sequence and reactant and product SMILES strings to predict the kinetic rate. The mass action model of reaction is calculated using the sum of kinetic parameters for all enzymes involved in the reaction. The model is run until reaching the steady state where the values for serine, 3keto dihydrosphiganine, and PC are kept constant, as necessary metabolites for the system. For other metabolites, we are assuming a closed system. This model was simulated using MATLAB (MathWorks, Inc.). The MATLAB code for this model will be made available upon reasonable request. Appendix B. Overview of Pairwise Orientation Methods Several notable methods have been proposed to address the bivariate causal discovery problem. Early techniques relied on strong assumptions about the dynamic mechanisms, such as additive noise or functional models. However, biochemical variables may not meet these assumptions, particularly when they evolve within complex interaction networks. To determine which methods might be suitable for network orientation analyses, we examine various approaches using synthetic data. The techniques we examined are based on three features: (1) statistical scores quantified for both directions and presume some statistical dependence (e.g., additional noise model), (2) modeling using neural networks, and (3) an ensemble of models using the concept of meta-learning. Briefly, the statistical scores require us to assume the dynamic mechanism between the two variables but allow inference from a relatively low number of data points. Conversely, methods involving the stacking of statistical features or neural network modeling are heavily data-consuming. Appendix B.1. Statistical Scores Methods for interaction direction inference that involve some statistical scores aim to quantify the asymmetric conditional dependence between the two variables. However, these statistical scores are strongly based on functional causal models (FCM) between the two continuous variables. It means that for two variables x and y , the edge x → y means that y = f ( x, θ, η ) where θ is the parameters sets, and η is a noise term. The causal structure is identifiable whenever there are no unobserved confounders, it belongs to a restricted functional class, and suitable constraints are imposed on η . It has been shown that without any further assumption on the function f , the causal direction is not identifiable because for both directions one can find an independent noise term [ 74 , 75 ] Inspired by [ 51 ], we test in the benchmark the following statistical scores. Additive Noise Model (ANM) The additive noise model (ANM) assumes that y = f ( x ) + η where the noise η and variable x are independent. The additive noise model is one of the most popular approaches for pairwise causality. It is based on the fitness of the data to the additive noise model in one direction and the rejection of the model in the other direction. The data is assumed to be continuous [ 61 ]. Conditional Distribution Similarity statistics (CDS) Assume that the shape of the conditional distribution p ( Y | X = x ) tends to be similar for different values of x if the random variable X is the cause of Y (i.e., if X → Y ). Then, one of the quantities that captures this variability is the standard deviation of the scaled values of y after binning in the x direcLtion. A lower standard deviation indicates x → y . his measure is defined as: . Information Geometric Causal Inference (IGCI) This approach considers a deterministic process that follows y = f ( x ), where assumed that the function f is invertible, which means that f −1 exists. The direction of interaction is then determined by measuring irregularities by the distance to an exponential family using the information statistics metric, i.e., Kullback-Leibler statistics [ 63 ]. Furthermore, the authors show the applicability of such statistical measures for processes evolving with small additive noise [ 63 ]. Regression Error based Causal Inference (RECI) The approach is based on the assumption that a regression fit in the true causal direction yields smaller errors, on average, than when fitting the model in the opposite direction. It allows non-deterministic nonlinear relations between the cause and the effect. The analysis takes place by fitting a least squares regression in both possible causal directions, and the causal direction is chosen to be the direction with the lower mean-squared errors (MSE) [ 65 ]. For the regression implementations, we use polynomial regression of degree 3 [ 56 ]. Appendix B.2. Machine Learning Techniques Beyond the statistical methods presented above, which, as mentioned, require some strong assumptions or prior knowledge about the causal mechanism, different approaches use machine learning (ML) techniques to detect the direction of the interaction. These methods, however, possess some challenges since they need prior learning of a labeled training set and require large datasets that are not necessarily accessible. Moreover, these methods require additional cost, the need for graphics processing units (GPUs), and computational time due to the complexity of the algorithms. Generally, ML methods aim to infer the direction of interaction from hidden patterns within the training data, without explicitly coding the features required for inference. It can be done by using neural network (NN) modeling, or by stacking many properties of the system in the so-called meta-learning techniques. Causal Generative Neural Networks (CGNN) The method aims to learn the multivariate function causal model f using a generative NN. The uses of NN to learn f allow for not explicitly restricting the class of functions allowed. Particularly, it models f for both directions, y → x and x → y , with a neural network with one hidden layer. Then it chooses the direction that provides the best fit - using a non-parametric score, the Maximum Mean Discrepancy [ 67 ]. Randomize Causation Coefficient (RCC) The method utilizes a NN that is constructed with two parts - kernel embedding layers and a classifier. The former part is based on the projection of the observational distributions into Reproducing Kernel Hilbert Space (RKHS) using random cosine embedding, and the classifier is a random forest [ 69 ]. Neural Causation Coefficient (NCC) The neural network (NN) is designed with embedding layers that facilitate the learning of feature maps, which are essential for understanding the data. Following the embedding layers, the architecture includes binary classifier layers, both of which are structured as multilayer perceptrons [ 68 ]. Stacking with Gradient Boosting Classification - JARFo This method is an ensemble learning algorithm of many statistical features, including statistical measures, information measures, and measures of the conditional probability variability. The stacking of all these features was done with gradient-boosting classification. In the literature, it is named after its designer - José A. R. Fonollosa (JARFo) - [ 62 ]. Causal Ensemble Measure Machine (CEMM) Stacking the statistical scores using the support measure machine (SMM) as follows. First, train SMM, which classifies each score as to whether it successfully detects the direction or not. The stacking involves ‘flipping’ the wrongly assigned direction [ 51 ]. References [1]. ↵ S. P. Cornelius , W. L. Kath , A. E. Motter , Realistic control of network dynamics , Nature communications 4 ( 1 ) ( 2013 ) 1942 . OpenUrl PubMed [2]. M. Timme , J. Casadiego , Revealing networks from dynamics: an introduction , Journal of Physics A: Mathematical and Theoretical 47 ( 34 ) ( 2014 ) 343001 . OpenUrl [3]. ↵ W. Kolch , M. Halasz , M. Granovskaya , B. N. Kholodenko , The dynamic control of signal transduction networks in cancer cells , Nature Reviews Cancer 15 ( 9 ) ( 2015 ) 515 – 527 . OpenUrl CrossRef PubMed [4]. T. M. Karrer , J. Z. Kim , J. Stiso , A. E. Kahn , F. Pasqualetti , U. Habel , D. S. Bassett , A practical guide to methodological considerations in the controllability of structural brain networks , Journal of neural engineering 17 ( 2 ) ( 2020 ) 026031 . OpenUrl PubMed [5]. ↵ A. E. Motter , N. Gulbahce , E. Almaas , A.-L. Barabási , Predicting synthetic rescues in metabolic networks , Molecular systems biology 4 ( 1 ) ( 2008 ) 168 . OpenUrl Abstract / FREE Full Text [6]. ↵ W. Weckwerth , Metabolomics in systems biology , Annual review of plant biology 54 ( 1 ) ( 2003 ) 669 – 689 . OpenUrl CrossRef PubMed Web of Science [7]. J. Gao , V. G. Tarcea , A. Karnovsky , B. R. Mirel , T. E. Weymouth , C. W. Beecher , J. D. Cavalcoli , B. D. Athey , G. S. Omenn , C. F. Burant , et al. , Metscape: a cytoscape plug-in for visualizing and interpreting metabolomic data in the context of human metabolic networks , Bioinformatics 26 ( 7 ) ( 2010 ) 971 – 973 . OpenUrl CrossRef PubMed Web of Science [8]. ↵ A. Karnovsky , T. Weymouth , T. Hull , V. G. Tarcea , G. Scardoni , C. Laudanna , M. A. Sartor , K. A. Stringer , H. Jagadish , C. Burant , et al. , Metscape 2 bioinformatics tool for the analysis and visualization of metabolomics and gene expression data , Bioinformatics 28 ( 3 ) ( 2012 ) 373 – 380 . OpenUrl CrossRef PubMed Web of Science [9]. W. Weckwerth , K. Morgenthal , Metabolomics: from pattern recognition to biological interpretation , Drug discovery today 10 ( 22 ) ( 2005 ) 1551 – 1558 . OpenUrl CrossRef PubMed Web of Science [10]. ↵ L. Perez De Souza , S. Alseekh , Y. Brotman , A. R. Fernie , Network-based strategies in metabolomics data analysis and interpretation: from molecular networking to biological interpretation , Expert Review of Proteomics 17 ( 4 ) ( 2020 ) 243 – 255 . OpenUrl PubMed [11]. ↵ A.-L. Barabasi , Z. N. Oltvai , Network biology: understanding the cell’s functional organization , Nature reviews genetics 5 ( 2 ) ( 2004 ) 101 – 113 . OpenUrl CrossRef PubMed Web of Science [12]. ↵ M. Vidal , M. E. Cusick , A.-L. Barabási , Interactome networks and human disease , Cell 144 ( 6 ) ( 2011 ) 986 – 998 . OpenUrl CrossRef PubMed Web of Science [13]. ↵ E. E. Schadt , J. Lamb , X. Yang , J. Zhu , S. Edwards , D. GuhaThakurta , S. K. Sieberts , S. Monks , M. Reitman , C. Zhang , et al. , An integrative genomics approach to infer causal associations between gene expression and disease , Nature genetics 37 ( 7 ) ( 2005 ) 710 – 717 . OpenUrl CrossRef PubMed Web of Science [14]. ↵ E. Y. Su , A. Spangler , Q. Bian , J. Y. Kasamoto , P. Cahan , Reconstruction of dynamic regulatory networks reveals signaling-induced topology changes associated with germ layer specification , Stem Cell Reports 17 ( 2 ) ( 2022 ) 427 – 442 . OpenUrl PubMed [15]. G. Guillén-Gosálbez , A. Miró , R. Alves , A. Sorribas , L. Jiménez , Identification of regulatory structure and kinetic parameters of biochemical networks via mixed-integer dynamic optimization , BMC systems biology 7 ( 1 ) ( 2013 ) 1 – 11 . OpenUrl PubMed [16]. G. Michailidis , F. d’Alché Buc Autoregressive models for gene regulatory network inference: Sparsity , stability and causality issues, Mathematical biosciences 246 ( 2 ) ( 2013 ) 326 – 334 . OpenUrl PubMed [17]. H. Schmidt , K.-H. Cho , E. W. Jacobsen , Identification of small scale biochemical networks based on general type system perturbations , The FEBS Journal 272 ( 9 ) ( 2005 ) 2141 – 2151 . OpenUrl CrossRef PubMed [18]. C. Kirst , M. Timme , D. Battaglia , Dynamic information routing in complex networks , Nature communications 7 ( 1 ) ( 2016 ) 11061 . OpenUrl PubMed [19]. J. Kim , D. G. Bates , I. Postlethwaite , P. Heslop-Harrison , K.-H. Cho , Least-squares methods for identifying biochemical regulatory networks from noisy measurements , BMC bioinformatics 8 ( 1 ) ( 2007 ) 8 . OpenUrl PubMed [20]. A. Wang , J. Pang , Iterative structural inference of directed graphs , Advances in Neural Information Processing Systems 35 ( 2022 ) 8717 – 8730 . OpenUrl [21]. H. Gong , J. Klinger , K. Damazyn , X. Li , S. Huang , A novel procedure for statistical inference and verification of gene regulatory subnetwork , BMC bioinformatics 16 ( 7 ) ( 2015 ) 1 – 10 . OpenUrl CrossRef PubMed [22]. J. E. Larvie , M. G. Sefidmazgi , A. Homaifar , S. H. Harrison , A. Karimoddini , A. Guiseppi-Elie , Stable gene regulatory network modeling from steady-state data , Bioengineering 3 ( 2 ) ( 2016 ) 12 . OpenUrl PubMed [23]. D. J. Warne , R. E. Baker , M. J. Simpson , Simulation and inference algorithms for stochastic biochemical reaction networks: from basic concepts to state-of-the-art , Journal of the Royal Society Interface 16 ( 151 ) ( 2019 ) 20180943 . OpenUrl PubMed [24]. ↵ D. Yu , M. Righero , L. Kocarev , Estimating topology of networks , Physical Review Letters 97 ( 18 ) ( 2006 ) 188701 . OpenUrl PubMed [25]. ↵ L. Wang , S. Huang , S. Wang , J. Liao , T. Li , L. Liu , A survey of causal discovery based on functional causal model , Engineering Applications of Artificial Intelligence 133 ( 2024 ) 108258 . OpenUrl [26]. C. Gong , C. Zhang , D. Yao , J. Bi , W. Li , Y. Xu , Causal discovery from temporal data: An overview and new perspectives , ACM Computing Surveys 57 ( 4 ) ( 2024 ) 1 – 38 . OpenUrl [27]. D. Bhaskar , D. S. Magruder , M. Morales , E. De Brouwer , A. Venkat , F. Wenkel , G. Wolf , S. Krishnaswamy , Inferring dynamic regulatory interaction graphs from time series data with perturbations , in: Learning on Graphs Conference , PMLR , 2024 , pp. 22 – 1 . [28]. A. Theocharous , G. G. Gregoriou , P. Sapountzis , I. Kontoyiannis , Temporally causal discovery tests for discrete time series and neural spike trains , IEEE Transactions on Signal Processing ( 2024 ). [29]. J. Wan , J. Kataoka , J. Sivakumar , E. Peña , Y. Che , H. Sayama , C. Cheng , Sparse bayesian learning for sequential inference of network connectivity from small data , IEEE Transactions on Network Science and Engineering ( 2024 ). [30]. J. Casadiego , D. Maoutsa , M. Timme , Inferring network connectivity from event timing patterns , Physical review letters 121 ( 5 ) ( 2018 ) 054101 . OpenUrl PubMed [31]. L. Rouillard , L. Ambrogioni , D. Wassermann , Robust and highly scalable estimation of directional couplings from time-shifted signals , arXiv preprint arxiv: 2406.02545 ( 2024 ). [32]. J. Yang , J. Huang , Q. Chen , Construction of time series causal network based on partial rank correlation , Knowledge-Based Systems 295 ( 2024 ) 111865 . OpenUrl [33]. N. Irribarra , K. Michell , C. Bermeo , W. Kristjanpoller , A multi-head attention neural network with non-linear correlation approach for time series causal discovery , Applied Soft Computing 165 ( 2024 ) 112062 . OpenUrl [34]. ↵ J. Casadiego , M. Nitzan , S. Hallerberg , M. Timme , Model-free inference of direct network interactions from nonlinear collective dynamics , Nature communications 8 ( 1 ) ( 2017 ) 2192 . OpenUrl PubMed [35]. ↵ A. Wang , T. P. Tong , A. Mizera , J. Pang , Benchmarking structural inference methods for interacting dynamical systems with synthetic data , Advances in Neural Information Processing Systems 37 ( 2025 ) 135129 – 135185 . OpenUrl [36]. ↵ J. Pearl , Causal inference in statistics: An overview , Statistics Surveys 3 (none) ( 2009 ) 96 – 146 . doi: 10.1214/09-SS057 . URL 10.1214/09-SS057 OpenUrl CrossRef [37]. F. Liu , L. Chan , Causal inference on discrete data via estimating distance correlations , Neural computation 28 ( 5 ) ( 2016 ) 801 – 814 . OpenUrl PubMed [38]. A. Marx , J. Vreeken , Telling cause from effect using mdl-based local and global regression , in: 2017 IEEE international conference on data mining (ICDM) , IEEE , 2017 , pp. 307 – 316 . [39]. A. Marx , J. Vreeken , Telling cause from effect by local and global regression , Knowledge and Information Systems 60 ( 2019 ) 1277 – 1305 . OpenUrl [40]. ↵ A. Rosato , L. Tenori , M. Cascante , P. R. De Atauri Carulla , V. A. Martins dos Santos , E. Saccenti , From correlation to causation: analysis of metabolomics data using systems biology approaches , Metabolomics 14 ( 2018 ) 1 – 20 . OpenUrl CrossRef PubMed [41]. L. Peel , T. P. Peixoto , M. De Domenico , Statistical inference links data and theory in network science , Nature Communications 13 ( 1 ) ( 2022 ) 6794 . OpenUrl PubMed [42]. ↵ O. M. Cliff , A. G. Bryant , J. T. Lizier , N. Tsuchiya , B. D. Fulcher , Unifying pairwise interactions in complex dynamics , Nature Computational Science 3 ( 10 ) ( 2023 ) 883 – 893 . OpenUrl CrossRef PubMed [43]. B. Barzel , A.-L. Barabási , Network link prediction by global silencing of indirect correlations , Nature biotechnology 31 ( 8 ) ( 2013 ) 720 – 725 . OpenUrl CrossRef PubMed [44]. ↵ C. Glymour , K. Zhang , P. Spirtes , Review of causal discovery methods based on graphical models , Frontiers in genetics 10 ( 2019 ) 524 . OpenUrl PubMed [45]. ↵ M. Paluş , M. Vejmelka , Directionality of coupling from bivariate time series: How to avoid false causalities and missed connections , Physical Review E 75 ( 5 ) ( 2007 ) 056211 . OpenUrl [46]. M. Eichler , R. Dahlhaus , J. Sandkühler , Partial correlation analysis for the identification of synaptic connections , Biological cybernetics 89 ( 4 ) ( 2003 ) 289 – 302 . OpenUrl CrossRef PubMed Web of Science [47]. R. Aghdam , M. Ganjali , X. Zhang , C. Eslahchi , Cn: a consensus algorithm for inferring gene regulatory networks using the sorder algorithm and conditional mutual information test , Molecular BioSystems 11 ( 3 ) ( 2015 ) 942 – 949 . OpenUrl CrossRef PubMed [48]. X. Zhang , J. Zhao , J.-K. Hao , X.-M. Zhao , L. Chen , Conditional mutual inclusive information enables accurate quantification of associations in gene regulatory networks , Nucleic acids research 43 ( 5 ) ( 2015 ) e31 – e31 . OpenUrl CrossRef PubMed [49]. ↵ Y. Jacob , Y. Winetraub , G. Raz , E. Ben-Simon , H. Okon-Singer , K. Rosenberg-Katz , T. Hendler , E. Ben-Jacob , Dependency network analysis (depna) reveals context related influence of brain network nodes , Scientific Reports 6 ( 1 ) ( 2016 ) 27444 . OpenUrl PubMed [50]. ↵ C. Käding , J. Runge , Distinguishing cause and effect in bivariate structural causal models: A systematic investigation , Journal of Machine Learning Research 24 ( 278 ) ( 2023 ) 1 – 144 . OpenUrl [51]. ↵ G. Varando , S. Catsis , E. Diaz , G. Camps-Valls , Pairwise causal discovery with support measure machines , Applied Soft Computing 150 ( 2024 ) 111030 . OpenUrl [52]. ↵ W. Niu , Z. Gao , L. Song , L. Li , Comprehensive review and empirical evaluation of causal discovery algorithms for numerical data , Journal of Machine Learning Research ( 23 ) ( 2024 ) 1 – 78 . [53]. ↵ G. Karlebach , R. Shamir , Modelling and analysis of gene regulatory networks , Nature reviews Molecular cell biology 9 ( 10 ) ( 2008 ) 770 – 780 . OpenUrl CrossRef PubMed Web of Science [54]. ↵ B. C. Goodwin , Oscillatory behavior in enzymatic control processes , Advances in enzyme regulation 3 ( 1965 ) 425 – 437 . OpenUrl CrossRef PubMed [55]. ↵ P. Ruoff , M. Vinsjevik , C. Monnerjahn , L. Rensing , The goodwin oscillator: on the importance of degradation reactions in the circadian clock , Journal of biological rhythms 14 ( 6 ) ( 1999 ) 469 – 479 . OpenUrl CrossRef PubMed Web of Science [56]. ↵ D. Kalainathan , O. Goudet , R. Dutta , Causal discovery toolbox: Uncovering causal relationships in python , Journal of Machine Learning Research 21 ( 37 ) ( 2020 ) 1 – 5 . OpenUrl PubMed [57]. ↵ T. Schaffter , D. Marbach , D. Floreano , Genenetweaver: in silico benchmark generation and performance profiling of network inference methods , Bioinformatics 27 ( 16 ) ( 2011 ) 2263 – 2270 . OpenUrl CrossRef PubMed Web of Science [58]. F. Liu , S.-W. Zhang , W.-F. Guo , Z.-G. Wei , L. Chen , Inference of gene regulatory network based on local bayesian networks , PLoS computational biology 12 ( 8 ) ( 2016 ) e1005024 . OpenUrl [59]. ↵ M. Foo , J. Kim , D. G. Bates , Modelling and control of gene regulatory networks for perturbation mitigation , IEEE/ACM Transactions on Computational Biology and Bioinformatics 16 ( 2 ) ( 2018 ) 583 – 595 . OpenUrl [60]. ↵ F. Zhang , B. Macshane , R. Searcy , Z. Huang , Mathematical models for cholesterol metabolism and transport , Processes 10 ( 1 ) ( 2022 ) 155 . OpenUrl [61]. ↵ P. Hoyer , D. Janzing , J. M. Mooij , J. Peters , B. Schölkopf , Nonlinear causal discovery with additive noise models , Advances in neural information processing systems 21 ( 2008 ). [62]. ↵ J. A. Fonollosa , Conditional distribution variability measures for causality detection , Cause Effect Pairs in Machine Learning ( 2019 ) 339 – 347 . [63]. ↵ Inferring deterministic causal relations , arXiv preprint arxiv: 1203.3475 ( 2012 ). [64]. ↵ M. Nitzan , J. Casadiego , M. Timme , Revealing physical interaction networks from statistics of collective dynamics , Science advances 3 ( 2 ) ( 2017 ) e1600396 . OpenUrl FREE Full Text [65]. ↵ P. Blöbaum , D. Janzing , T. Washio , S. Shimizu , B. Schölkopf , Analysis of cause-effect inference by comparing regression errors , PeerJ Computer Science 5 ( 2019 ) e169 . OpenUrl [66]. N. Leibovich , Determining interaction directionality in complex biochemical networks from stationary measurements , bioRxiv ( 2024 ) 2024 – 04 . [67]. ↵ O. Goudet , D. Kalainathan , P. Caillou , I. Guyon , D. Lopez-Paz , M. Sebag , Learning functional causal models with generative neural networks , Explainable and interpretable models in computer vision and machine learning ( 2018 ) 39 – 80 . [68]. ↵ D. Lopez-Paz , R. Nishihara , S. Chintala , B. Scholkopf , L. Bottou , Discovering causal signals in images , in: Proceedings of the IEEE conference on computer vision and pattern recognition , 2017 , pp. 6979 – 6987 . [69]. ↵ D. Lopez-Paz , K. Muandet , B. Schölkopf , I. Tolstikhin , Towards a learning theory of cause-effect inference , in: International Conference on Machine Learning , PMLR , 2015 , pp. 1452 – 1461 . [70]. Y. Yuan , X. Ding , Z. Bar-Joseph , Causal inference using deep neural networks , arXiv preprint arxiv: 2011.12508 ( 2020 ). [71]. J.-F. Ton , D. Sejdinovic , K. Fukumizu , Meta learning for causal direction , in: Proceedings of the AAAI conference on artificial intelligence , Vol. 35 , 2021 , pp. 9897 – 9905 . OpenUrl [72]. ↵ D. Marbach , T. Schaffter , C. Mattiussi , D. Floreano , Generating realistic in silico gene networks for performance assessment of reverse engineering methods , Journal of computational biology 16 ( 2 ) ( 2009 ) 229 – 239 . OpenUrl CrossRef PubMed Web of Science [73]. ↵ A. Kroll , S. Ranjan , M. K. Engqvist , M. J. Lercher , A general model to predict small molecule substrates of enzymes based on machine and deep learning , Nature communications 14 ( 1 ) ( 2023 ) 2787 . OpenUrl PubMed [74]. ↵ A. Hyvärinen , P. Pajunen , Nonlinear independent component analysis: Existence and uniqueness results , Neural networks 12 ( 3 ) ( 1999 ) 429 – 439 . OpenUrl CrossRef PubMed Web of Science [75]. ↵ K. Zhang , Z. Wang , J. Zhang , B. Schölkopf , On estimation of functional causal models: general results and application to the post-nonlinear causal model , ACM Transactions on Intelligent Systems and Technology (TIST) 7 ( 2 ) ( 2015 ) 1 – 22 . OpenUrl View the discussion thread. Back to top Previous Next Posted July 26, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Pairwise Causal Discovery in Biochemical Network: A Survey on Directionality Inference within Complex Networks from Stationary Observations Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Pairwise Causal Discovery in Biochemical Network: A Survey on Directionality Inference within Complex Networks from Stationary Observations Nava Leibovich , Miroslava Cuperlovic-Culf bioRxiv 2025.07.22.666141; doi: https://doi.org/10.1101/2025.07.22.666141 Share This Article: Copy Citation Tools Pairwise Causal Discovery in Biochemical Network: A Survey on Directionality Inference within Complex Networks from Stationary Observations Nava Leibovich , Miroslava Cuperlovic-Culf bioRxiv 2025.07.22.666141; doi: https://doi.org/10.1101/2025.07.22.666141 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Systems Biology Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41910) Biophysics (21436) Cancer Biology (18576) Cell Biology (25480) Clinical Trials (138) Developmental Biology (13368) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15598) Genomics (22482) Immunology (17726) Microbiology (40360) Molecular Biology (17163) Neuroscience (88534) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00