Elucidating Protein Dynamics through the Optimal Annealing of Variational Autoencoders

doi:10.1101/2025.01.08.632051

Elucidating Protein Dynamics through the Optimal Annealing of Variational Autoencoders

2025 · doi:10.1101/2025.01.08.632051

preprint OA: closed CC-BY-NC-4.0

📄 Open PDF Full text JSON View at publisher

Full text 54,921 characters · extracted from preprint-html · click to expand

Elucidating Protein Dynamics through the Optimal Annealing of Variational Autoencoders | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Elucidating Protein Dynamics through the Optimal Annealing of Variational Autoencoders Subinoy Adhikari , Jagannath Mondal doi: https://doi.org/10.1101/2025.01.08.632051 Subinoy Adhikari 1 Tata Institute of Fundamental Research , Hyderabad 500046, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jagannath Mondal 1 Tata Institute of Fundamental Research , Hyderabad 500046, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: jmondal{at}tifrh.res.in Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Proteins traverse intricate conformational landscapes, with transitions and long-lived states that hold the key to their biological function. Yet, unraveling these dynamics remains a formidable challenge. An emerging approach has been to train the conformational ensemble via deep Variational autoencoders (VAEs) in a bid to machine learn the underlying reduced dimensional representation. However, training VAEs typically involves a fixed β value of 1, where β acts as the crucial weighing factor between the reconstruction and regularization terms. This static setup can often lead to poste-rior collapse, which significantly hinders the model’s ability to capture complex protein dynamics accurately. To mitigate this issue, annealing the β parameter offers a potential alternative. However, this approach frequently falls short in fully addressing the problem, majorly due to arbitrary choice of upper bound of β and annealing schedule. In this work, we introduce an innovative approach for selecting the β parameter by utilizing the Fraction of Variance Explained (FVE) score to identify its optimal value. We demonstrate that training annealed VAEs at their optimum β in a single cycle consistently outperformed their non-annealed counterparts, as evident from their higher Variational Approach for Markov Processes-2 and Generalized Matrix Rayleigh Quotient scores and distinct free energy surface minima on both folded and intrinsically disordered proteins. The improved latent space representations significantly improve state space discretization, thereby refining Markov State Models and providing more accurate insights into conformational landscapes as reflected in distinct contact maps. These findings not only underscore the potential of annealed VAEs in resolving complex conformational spaces but also highlight the critical interplay between annealing schedules and latent space structures. Dimension reduction is a crucial method in understanding long molecular dynamics (MD) data. For a protein with N atoms, the system’s dimensionality is 3N. This high dimensionality makes it challenging to identify conformational changes such as domain closure or flexible loop movements 1 which may be lost in the small rapid fluctuations of each atom in the high dimension space. High dimensionality includes both short-term fluctuations and long-term conformational changes, which can mask the identification of stable, long-lived states. 2 , 3 In the full 3N-dimensional space, tracking and interpreting binding 4 and unbinding events, 5 allosteric effect, 6 or induced fit mechanisms in response to a ligand 7 or protein partner can be obscured by the vast number of atomic positions. These interactions often cause specific, localized changes that may go unnoticed in high-dimensional space. Therefore, there is a need to better represent the data in a fewer number of dimensions. Several dimension reduction methods have been actively employed to preserve the key properties and changes in the original space. 8 For instance, Principal Component Analysis (PCA) 9 and time-lagged Independent Component Analysis (tICA) 10 , 11 are popular for capturing variance and slow motions but are limited by their linearity. Nonlinear methods like t-SNE (t-distributed Stochastic Neighbor Embedding) 12 , 13 and UMAP (Uniform Manifold Approximation and Projection) 14 excel at revealing complex, nonlinear patterns in protein dynamics. More recently, deep learning based approaches such as autoencoders 15 – 17 have gained popularity over other non linear dimension reduction methods. An autoencoder compresses the input data to a latent space, which typically contains a few dimensions, followed by its decompression to reconstruct the input data. The training objective is to minimize a loss function, ensuring that the latent space captures essential features of the input while discarding noise and redundancies. However, it has often been observed that learning with autoen-coders leads to overfitting. To address this challenge, Variational Autoencoders (VAEs) 18 , 19 were developed as a probabilistic extension of traditional autoencoders. Although originally conceived for generative modeling, VAEs have also found applications in dimensionality reduction, especially for molecular systems 20 . By imposing a prior distribution, typically a multivariate Gaussian, on the latent space, VAEs introduce regularization into the learning process. 21 This regularization prevents overfitting by encouraging the latent space to be smooth and structured, leading to better generalization. This modifies the loss function ( ℒ V AE ) to incorporate both the reconstruction loss and the Kullback-Leibler (KL) divergence term, which regularizes the latent space and is given as The KL divergence is scaled by the β weight, enabling control over the trade-off between reconstruction accuracy and regularization. 22 , 23 A central focus of this work lies in examining the selection of the β parameter within the VAE framework. We explore the crucial role, this parameter plays in effectively balancing the competing objectives of accurate data reconstruction and appropriate regularization of the latent space. A higher value of β encourages a more regularized and disentangled latent space at the cost of reconstruction quality. Whereas a lower β prioritizes accurate reconstruction but may lead to an entangled latent space. A representation is considered disentangled if it can be broken down into multiple subspaces, each of which aligns with a unique symmetry transformation and can be independently transformed without affecting the others. 24 Therefore, β must be tuned either through visual inspection heuristics or by using disentanglement metrics, such as those obtained by training a simple linear classifier on the latent space of a trained VAE in a supervised manner. However, in the case of MD trajectories, where the data is unsupervised, disentanglement must be assessed through alternative approaches. These include evaluating the clustering of latent representations into meaningful macrostates, analyzing free energy surfaces reconstructed from the latent space, or assessing the ability of the model to capture slow collective motions that are critical to MD studies. While disentanglement focuses on separating independent factors of variation, the latent space in MD studies must also preserve thermodynamic and dynamical relevance, which requires a careful choice of β parameter. Metrics such as the Generalized Matrix Rayleigh Quotient (GMRQ) score 25 and Variational Approach to Markov Processes-2 (VAMP2) score 26 are essential tools for evaluating the quality of the latent space in terms of its ability to capture slow collective variable, and have been used to monitor the effect of β in training VAEs in this work. We have also examined on how to improve the VAE model’s ability to learn disentangled representations. 27 – 30 Early efforts to achieve disentangled representations often relied on prior knowledge of the factors driving data generation. 31 – 39 While several fully unsupervised methods have been developed to learn disentangled factors, these approaches have generally struggled to scale effectively to datasets beyond toy datasets. One of the ways to improve disentanglement is by annealing the β parameter. 40 , 41 While annealing presents an alternative, it does not provide a universal solution. The effectiveness of annealing often depends on the specific dataset and the chosen annealing schedule, and posterior collapse may still occur. 42 , 43 Finding the optimal settings can vary significantly across different problems and requires doing expensive parameter sweeps, which can be computationally expensive and time-consuming, especially for high dimensional systems. In this work, we introduce a novel approach for determining the optimal β parameter for training VAEs. Instead of arbitrarily choosing β values, we utilize the Fraction of Variance Explained (FVE) score to identify the optimal β that maximizes the model’s ability to represent the underlying data distribution and avoid posterior collapse. Our findings have been applied to two systems, Trp-cage and α -Synuclein ( α S). Trp-cage is a 20 residue, fast folding miniprotein, which folds in ≈ 4 µs . 44 α S 45 is an intrinsically disordered protein and is associated with movement disorders such as parkinsons disease, multiple system atrophy and in neurodegenerative disese such as dementia with Lewy bodies. 46 Identifying slower collective variables (CVs) is critical for understanding the folding mechanism as in Trp-cage 47 or capturing the key dynamics of α S that drive their functional flexibility. We have trained VAEs with the optimal β using three different annealing methods and compared them with the regular VAE model, and found that annealing outperformed their non-annealed conterparts in capturing slower collective variables. Our approach highlights the utility of annealed VAEs in uncovering meaningful latent representations that refine state space discretization, which is critical for subsequent building of Markov State Models (MSM). Results Probabilistic Approach to Dimensionality Reduction using Variational autoencoder VAE is composed of two parts, an encoder and a decoder . The encoder compresses the high dimensional input data x into a lower dimensional continuous latent vector z which is sampled from a learned approximate posterior distribution. This posterior is regularized to be close to a predefined prior distribution p ( z ), typically a standard normal distribution. The job of the decoder is to reconstruct the original data from the conditional distribution p θ ( x | z ). Thus the marginal likelihood of x is given as However, the integral is intractable because it involves integrating over all possible values of the latent variable z to normalize the posterior distribution, which is given as To address this, variational inference is used, in which the true posterior p θ ( z | x ) is approximated by a variational distribution q φ ( z | x ), which is parameterized by the encoder using a mean vector and a variance vector. The closeness of the approximated and the true posterior can be quantified using the KL divergence between them and is given as Upon expanding and rearranging, we have In VAEs, we want to maximize the log likelihood of the true data, log p ( x ) and simulataneously minimize the distance between the true and approximated posterior. Thus the training objective becomes minimizing the negative of the left hand side of the above equation. The loss function ℒV AE becomes, The loss function is the negation of Evidence Lower Bound (ELBO). Minimizing the loss is equivalent to maximizing the ELBO, which serves as a lower bound to log p ( x ). This encourages accurate reconstructions while enforcing a structured latent space which aligns the approximate posterior to the prior. Learning disentangled representation using β -VAE In many cases, a standard VAE does not ensure that the latent variables capture independent features of the data. This limitation led to the development of β -VAE, which explicitly aims to disentangle the factors of variation in the data. This is achieved by constraining the distance between the approximate posterior and the prior to remain below a small constant, δ, To handle the constraint, the Lagrangian is defined as where β is the lagrange multiplier. Using the complementary slackness condition from the Karush-Kuhn-Tucker (KKT) conditions, the necessary conditions for optimality in constrained optimization problems can be derived. Since δ is fixed, − β δ is simply a constant offset in the Lagrangian. Constant terms do not affect the gradient with respect to the model parameters, so we can safely omit − β δ in the final objective. Consequently, the loss function of the VAE 48 reduces to Augmenting disentanglement through annealing of the β parameter The choice of β parameter plays a critical role in balancing the trade-off between reconstruction quality and the regularization of the latent space. A higher value of β encourages a more regularized and disentangled latent space at the cost of reconstruction quality, whereas a lower β prioritizes accurate reconstruction but may lead to an entangled latent space. In dimension reduction, the primary objective is to preserve as much information from the original data as possible, which can be accomplished by using a lower value of β . However this may lead the approximate posterior to collapse to the prior, a phenomenon known as KL vanishing problem. To better understand the impact of the β parameter and its role in balancing these trade-offs, we examined the D KL term, 49 – 52 by decomposing it into two components: The mutual information (MI) term I q ( x, z ) measures the degree to which the encoded representation z captures the unique features of the input data x , so that the decoder can faithfully reconstruct it. A higher MI enhances the correlation between the latent variables and the data, promoting a reduction in the extent of KL vanishing. The marginal KL term, D KL ( q φ ( z )‖ p ( z )), evaluates how well the aggregated posterior, q φ ( z ), aligns with the prior distribution. Combining the above two equations, the loss function can be written as In case of a standard VAE, where β = 1, the first term disappears, leaving only the KLD term. Training the model focusses solely on aligning the aggregated posterior with the prior with no incentive to maximize the MI, which leads to posterior collapse. As a result the latent space captures no meaningful information about the input data. However annealing of the β parameter 40 , 41 can alleviate such problems thereby providing a better latent representation, which retains the best of both worlds. To begin with, we start with a low value of β which allows the model to capture the underlying pattern in the data more freely without emphasising much on the prior i . e . the model emphasizes on maximizing the mutual information. This creates a latent representation that is meaningful but not regularized. As the training progresses, the increase in the β value makes the latent space more regularized thereby aligning it to the prior distribution, rather than solely focussing on reconstructing the input data. When β reaches 1, the weighted loss function is equivalent to the true variational lower bound. This gradual approach of annealing β , enables the model to alleviate overlapping or entangled features, thereby improving the model’s ability to learn disentangled factors of the latent space. Our proposed protocol for optimizing β with FVE While annealing β to 1 improves the model’s ability to balance reconstruction and regularization, in practice, this may not always yield optimal results. One way to improve is to cyclically anneal the β value to 1. 41 But it requires selection on the number of cycles and also needs to be trained for large number of epochs (so that the annealing rate is slow and reduce the risk of posterior collapse), thereby increasing training time. In this work, we give an alternate way in which we anneal the β parameter in just one annealing cycle. Instead of annealing β to 1, we anneal it to a lower value. Literature survey 53 , 54 indicates that, the β parameter have been arbitrarily set during the training of standard VAE models. However, this is ineffective as the results may change upon choosing a different β value. We have, instead used FVE score 55 as a metric to find an optimal value of β for training the model, which is given as where and n represent the input, output, mean of the input and total number of features respectively. The FVE score is calculated after each training epoch, and we select the β value corresponding to the highest value of FVE score in the full training cycle. We chose this value as this score suggests that the model at that specific β , has been able to retain the maximum variance of the data during reconstruction. In this work, the rate of this increase is tuned using three different schedules, namely linear 56 , 57 ( equation 13 ), cosine 58 ( equation 14 ) and logistic 40 ( equation 15 ) as defined by the respective equation where t is the current epoch, T is the total number of epochs, β 0 is the starting value of β, β t is the β at epoch t and β f is the final value of β. k determines the steepness of the logistic curve and is set to 10/( T − 1). Training of VAEs using our method The training protocol begins with using the C α pairwise distance matrix as the input feature vector for the model. The VAE is then trained using different annealing schemes, with the β parameter gradually annealed to a value of 1. As a control, a standard VAE model is also trained with β fixed to 1. During training, the FVE score is monitored as a function of β , and the optimal β value is determined as the one corresponding to the maximum FVE score. Once the optimal β values are identified, the VAE is retrained using each annealing scheme at its respective optimal β . The best-performing annealed VAE model is selected based on the highest VAMP2 score, calculated from the mean vector and compared with the non-annealed counterpart (see Figure 1(d) ). The mean vector from the selected model is then used to construct MSM. Download figure Open in new tab Figure 1. (a) Schematic of a β Annealed Variational AutoEncoder (b) Linear, Cosine and Logistic annealing schedules (c) Annealing of the β parameter. (d) Protocol to determine the optimum β value from FVE score and selection of the best model to construct MSM. Cartoon snapshot of the Trp-cage mini protein and (f) α S protein. The different colours correspond to different residues. Discussion Optimizing VAE for Improved Representation of Protein Free Energy Landscapes We initially trained a standard VAE model ( β fixed to 1) and annealed VAE model ( β annealed to 1) for the Trp-cage protein (refer to ‘Method (Overview of training data) section’). The mean vector produced by the encoder in the latent space of the trained model was used to construct the free energy surface (FES) and shown in Figure 2 , for both the standard VAE and annealed VAE across all annealing schemes. The FES revealed a single minimum for all models, indicative of posterior collapse, where the model fails to capture distinct states. Notably, the standard VAE exhibited a more pronounced and collapsed minimum compared to the annealed VAE. This suggests that while the annealing process partially mitigates posterior collapse, it ultimately fails to avoid it, indicating room for further improvement in balancing reconstruction and regularization. Download figure Open in new tab Figure 2. FES of Trp-cage using the latent space of (a) Standard VAE (b) Linearly annealed VAE (c) Cosine annealed VAE and (d) Logistically annealed VAE. β is annealed to 1. To investigate this collapse, we have monitored the FVE score over the entire training cycle as shown in Figure 3(a) . The initial negative FVE score reflects that the model’s reconstruction is poor at the start of training. This is expected, as the VAE starts with randomly initialized parameters, and the latent space is not yet structured in a way that effectively captures the data variance. As the training progresses, the model learns to reconstruct the data more effectively, leading to an increase in the FVE score. The peak corresponds to the point where the model has reached an optimal balance between the variance and regularization. After this point, as β continues to increase, the weight of the KL divergence term grows, leading to stronger regularization of the latent space. This regularization forces the latent variables to conform more closely to the prior distribution. Consequently, the ability of the model to explain the variance decreases, resulting in a decline in the FVE score. Eventually, as β reaches to 1, the regularization fully dominates, and the model prioritizes aligning the latent space with the prior, over capturing meaningful variance from the data. At this stage, the FVE score flattens out to near zero values, indicating that the latent space is no longer effectively representing the data. The latent variables contribute no useful information, and the decoder essentially defaults to reconstructing the mean of the data. In comparison, the standard VAE is trained with β =1 from the begining, which forces the model to balance reconstruction and regularization throughout training. This early regularization restricts the model’s capacity to capture meaningful variance, leading to a more severe posterior collapse compared to the annealed VAE. Download figure Open in new tab Figure 3. (a) Variation of FVE value with respect to epochs for VAE and the annealing schemes. Change in FVE as a function of β for different annealing schemes. The optimum value of β is labelled by the diamond marker which is 0.061, 0.025 and 0.029 for linear, cosine and logistic annealing respectively. (c) Variation of FVE values with respect to epochs for models trained at their optimum β values. (d) Comparison of the VAMP2 score between PCA, non-annealed VAE and annealed VAE for different annealing schemes for lag time of 10 ns. To address posterior collapse, we retrained the VAE model using the optimal β value corresponding to the highest FVE score achieved by the annealed VAE across all annealing schemes. Similarly, each VAE model was trained by annealing the β to its respective optimal β value (see Figure 3(b) ). We evaluated six models in total: three from the non-annealed VAE and three from the annealed VAE. We analyzed the FVE scores of the non-annealed VAE and annealed VAE and found that while the FVE scores for both models eventually plateau to a similar value by the end of training, their trajectories differ significantly. The annealed VAE demonstrates a rapid increase in the FVE score during the initial stages of the training before plateauing. In contrast, the non-annealed VAE exhibits a slower, more gradual increase in the FVE score, reflecting its difficulty in capturing the variance and lags behind the annealed VAE during the entire training cycle (see Figure 3(c) ). While both models eventually converge to a similar FVE score, the annealed VAE’s training trajectory suggests it is more efficient at capturing meaningful data variance early on, mitigating posterior collapse, and structuring the latent space in a way that may be more useful for down-stream tasks. To evaluate the effectiveness of the two models, we compared their VAMP2 scores, which measure the ability of the latent space to capture slow collective variables, and compared it with the top two principal components from PCA. Our results demonstrate that the five-fold cross-validated VAMP2 scores for the annealed VAE consistently outperformed those of the non-annealed VAE and PCA across all annealing schemes (see Figure 3(d) ). This highlights that annealing the β parameter significantly enhances the model’s ability to capture slow collective variables compared to both non-annealed VAEs and PCA. To further assess the quality of the latent space, we analyzed its disentanglement by clustering the FES using a Gaussian Mixture Model (GMM) (see Figure 4(a)-(b) and Figure S1). The optimal number of clusters was determined by evaluating Bayesian Information Criterion (BIC) scores across different cluster counts (see Figure 4(c) and Figure S2). The slope of the BIC curve indicated that four clusters provided the best trade-off between model complexity and goodness of fit for different annealing schemes. After clustering, we assessed the quality of the identified clusters using the silhouette score, which quantifies how well a data point fits within its assigned cluster compared to others. Higher silhouette scores indicate better-defined and more distinct clusters. Our analysis revealed that the silhouette scores for the four clusters derived from the annealed VAE were consistently higher across all annealing schemes compared to the non-annealed VAE (see Figure 4(d) ). This suggests that annealing the β parameter enhances the disentanglement of representations in the latent space, resulting in more distinct and interpretable clusters. The points corresponding to each cluster were color-coded, highlighting the distinct regions of the latent space occupied by the four clusters. In the case of the non-annealed VAE, the clusters appeared less well-separated, reflecting the challenges in disentangling representations without annealing. By contrast, the annealed VAE exhibited a more distinct and organized clustering in the latent space, underscoring the improved separation and representation achieved through annealing (see Figure 4(e)-(f) ), as further supported by the consistently higher silhouette scores (see Figure 4(c)-(d) and Figure S3). Download figure Open in new tab Figure 4. FES of Trp-cage using the latent space of (a) Non-annealed cosine VAE (b) Cosine annealed VAE (c) BIC score as a function of GMM clusters for the Non-annealed cosine and Cosine annealed VAE (d) Sihouette score as a function of GMM clusters for all the annealing schemes. The clusters identified by the GMM from the latent space of (e) Non-annealed Cosine VAE and (f) Cosine annealed VAE. The cluster centers are marked in red triangles. Refining Kinetic and Structural Understanding of α S with Annealed VAEs We then investigated the effect of annealing on a more complex system, α S (refer to ‘Method (Overview of training data) section’). To achieve this, we identified the optimal β parameter and trained both non-annealed and annealed VAE models using this value (see Figure S4). We then compared the VAMP2 scores derived from the latent space of the models. Consistent with previous observations, the annealed VAE outperformed the non-annealed VAE, achieving higher VAMP2 scores across all annealing schemes (see Figure S5). The FES constructed from the latent space of the annealed VAE demonstrated a more dispersed distribution of latent representations compared to the non-annealed VAE alongwith an increased number of local minima, a characteristic feature of IDPs (see Figure 5(a)-(b) and Figure S6). To better understand the dynamic transitions between the metastable states and gain insights into the underlying kinetics, we performed kinetic modeling by constructing a Markov State Model (MSM). We used k-means clustering to discretize the latent space into microstates, with the number of clusters ranging from 50 to 1000. To determine the optimal number of microstates, we employed both the GMRQ score and VAMP2 score as metrics. In both of these metrics, the annealed VAE outperformed the non-annealed VAE (see Figure 5(c) and Figure S7-S8), indicating that the annealing process enhanced the quality of the latent space representation. We chose 500 microstates to ensure a sufficiently detailed representation of the system’s dynamics. A transition matrix was then built by counting the number of transitions among the microstates at lag times ranging from 1 ns to 200 ns. Using this matrix, we computed the implied timescales (ITS) for both the non-annealed and annealed VAE (see Figure 5(e)-(f) ). The ITS values for the annealed VAE were higher than those for the non-annealed VAE, indicating that the annealed VAE captures slower relaxation processes more effectively. This suggests that the annealing process enhances the model’s ability to identify transitions between more kinetically distinct or stable states. Similar trends were observed across other annealing schemes as well (see Figure S9). Download figure Open in new tab Figure 5. FES of α S using the latent space of (a) Non-annealed linear VAE (b) Linear annealed VAE. GMRQ scores of the Non-annealed linear VAE and Linear annealed VAE for different number of (c) microstates and (d) macrostates. ITS of the (e) Non-annealed linear VAE and (f) Linear annealed VAE. To coarse-grain the microstates, Perron Cluster Cluster Analysis (PCCA+) was performed at a lag time of 100 ns. The number of macrostates was varied between 2 and 10. The five-fold cross-validated GMRQ scores from the training dataset indicate that, regardless of the number of macrostates, the annealed VAE consistently achieves higher GMRQ scores across all annealing schemes (see Figure 5(d) and Figure S7). Finally, we discretized the MSM for all the annealing schemes into six macrostates (see Figure 6 and Figure S10) and Chapman-Kolmogorov (CK) test was performed to verify the Markovianity of the model for all non-annealed and annealed VAEs (see Figure S11-S13). The CK test showed that the linear and cosine annealed VAEs outperformed their non-annealed counterparts, with predic- tion and re-estimation in good agreement. However, the logistically annealed VAE performed slightly worse than the non-annealed case. For the linearly annealed VAE, the macrostates were characterized using inter-residue contact maps. These contact maps revealed distinct structural features for each state, confirming their uniqueness and highlighting the ability of annealed VAE to capture structurally significant conformations of the system (see Figure 7(a)-(f) ). Download figure Open in new tab Figure 6. Coarse grained MSM macrostates of α S using the latent space of Linear annealed VAE, with their representative structures and their population. The bar below represents different regions of α S. Download figure Open in new tab Figure 7. (a)-(f) Ensemble averaged backbone contact map for the six Linearly annealed VAE states AS1-AS6. MFPT among the macrostates of (g) Non-annealed linear VAE and (h) Linearly annealed VAE. The residue-wise contact map for the linearly annealed states of α S highlights significant interactions across its four structural regions. The N-terminal region, characterized by a highly conserved hexamer motif (KTKEGV), exhibits a tendency to form helices and can be divided into two helical segments: H1 (residues 1-30) and H2 (residues 31-60) when associated with micelles. This is followed by a central hydrophobic nonamyloid- β component (NAC) region (residues 61 to 95) and C terminal region (residues 96 to 140). The different annealed states (AS) obtained from the MSM macrostate, reveal distinctive structural motifs and inter-domain interactions within α S. In AS1, notable β -sheet contacts are observed between ALA17-GLU28 in the H1 region and VAL37-VAL49 spanning the H2 region, while the NAC region forms β sheets with LYS60-ALA69 interacting with ALA78-SER87, emphasizing the structural organization in this aggregation-prone area. Transitioning to AS2, a β -sheet between VAL49-ALA56 (H2) and GLU83-ALA90 (NAC) links these regions, with MET5-ALA17 (H1) and PRO120-ASP135 (CTER) forming another β sheet, highlighting inter-domain interactions and structural connectivity across the regions. In AS3, significant β -sheet contacts between ALA30-TYR39 (H2) and PRO108-LEU113 (CTER) suggest long range connectivity, while MET1-ALA17 (H1) and ASP115-ASP135 (CTER) indicate long-range NTER-CTER interactions. The AS4 state reveals an antiparallel β -sheet between ASP4-ALA17 (H1) and LYS45-HIS50 (H2), alongside long-range contacts between ALA19-VAL40 (H1) and ALA124-GLY132 (CTER), suggesting a well-organized structural alignment across multiple domains. In AS5, inter-region interactions are highlighted by contacts between MET5-LYS43 (H1) and THR64-VAL82 (NAC), as well as β sheets between ALA90-LEU100 (NAC) and GLY106-VAL118 (CTER), which contribute to the stability of the structure. Extended β -sheet arrangements are also seen between MET5-GLY14 (H1) and GLU123-TYR133 (CTER), linking distant regions of α S. Finally, the AS6 state, characterized by a large population, demonstrates widespread inter-region connectivity, with notable interactions between GLY68-ALA78 (NAC) and GLU123-GLU137 (CTER), as well as GLU20-LYS32 (H1) and TYR125-TYR136 (CTER), further illustrating the complex and dynamic network of interactions that govern the conformational behavior of α S across different annealed states (see Figure 7(a)-(f) ). We have further calculated the Mean First Passage Time (MFPT) between the metastable states for both the non-annealed VAE and the annealed VAE. Our analysis reveals that the timescales reported by the non-annealed VAE are an order of magnitude lower compared to those from the annealed VAE. This significant discrepancy in timescales indicates that the non-annealed VAE latent space over-simplifies the system’s dynamical landscape resulting in overestimated rates of conformational changes. Consequently, the system’s kinetics seems distorted where transitions between states appear unrealistically fast (see Figure 7(g)-(h) ). This study demonstrates the efficacy of annealed VAEs in capturing the complex conformational landscapes of proteins, both for a model Trp-cage protein and an intrinsically disordered system, α S. By systematically selecting the annealing parameter β through the FVE score we were able to mitigate posterior collapse and ensure the generation of meaningful latent representations. For both Trp-cage and α S, annealed VAEs outperformed their non-annealed counterparts in various metrics, including VAMP2 and GMRQ scores, FES analysis, and Markov State Model analysis. The slower convergence velocities and the presence of more distinct minima in the FES for annealed VAEs highlight their ability to capture finer details of the conformational space. The annealed VAE, with its higher reported timescales, captures a more faithful representation of the long-lived states and slow transitions, which are often critical for understanding system behavior. Overall, this work provides compelling evidence that annealed VAEs offer a more robust framework for exploring the conformational dynamics of proteins. The interplay between β annealing schedules, the latent space structure, and the performance metrics across both Trp-cage and α S systems underscores the importance of optimizing these parameters to capture meaningful and interpretable biological insights. Methods Neural Network Architecture To represent protein structures, we utilized C α -C α pairwise distances as input features. For Trp-cage, distances were calculated using all C α atoms, whereas for α S, distances were computed by skipping every third residue. The neural network architecture for Trp-cage consisted of layers with 190, 128, 64, 32, and 16 neurons, while for α S, it included layers with 1081, 512, 256, 64, and 12 neurons. In both cases, the latent space was represented by 2 neurons. The models were trained using stochastic gradient descent (SGD) as the optimizer with a learning rate of 1 × 10 −3 and mean squared error (MSE) was used as the loss function, weighted by a factor of 10. A batch size of 5000 and 100 was used for Trp-cage and α S respectively. Hyperbolic tangent (tanh) activation was used in hidden layers, while the output layer utilized a sigmoid activation. Weights were initialized using the glorot uniform method. The training was performed for 1000 epochs, with 90% of the data allocated for training and 10% for validation. A schematic of the architecture is shown in Figure 1(a)-(c) . The implementation of the β annealed VAE is made publicly available in the Github page ( https://github.com/subinoyadhikari/beta_annealed_VAE ) Overview of training data The training of VAEs was carried out using high-quality, long-timescale molecular dynamics simulation trajectories of two distinct protein systems: Trp-cage and α -Synuclein. Specifically, a 100 µ s unbiased simulation trajectory of Trp-cage and a 73 µ s simulation trajectory of α -Synuclein were utilized, both provided by D. E. Shaw Research. 59 , 60 These datasets are well-suited for exploring protein dynamics due to their exceptional temporal resolution and extensive sampling of conformational space. The representative structures are shown in Figure 1-(e) and (f) respectively. Data and code availability All data supporting the findings are included within the manuscript. Additionally, the code for training the model can be accessed on GitHub via the following link: https://github.com/subinoyadhikari/beta_annealed_VAE Acknowledgments We acknowledge support of the Department of Atomic Energy, Government of India, under Project Identification No. RTI 4007. We sincerely acknowledge Tata Institute of Fundamental Research Hyderabad, India for providing the support of computing resources. We thank to D. E. Shaw Research for providing us the long MD simulation trajectories of Trp-cage and α -Synuclein. 59 , 60 JM acknowledges Core Research grants provided by the Department of Science and Technology (DST) of India (CRG/2023/001426). Footnotes ↵ * E-mail: subinoyadhikari{at}tifrh.res.in ; jmondal{at}tifrh.res.in ,+914020203091 References (1). ↵ Karplus , M. ; Kuriyan , J. Molecular dynamics and protein function . Proceedings of the National Academy of Sciences 2005 , 102 , 6679 – 6685 . OpenUrl Abstract / FREE Full Text (2). ↵ Plattner , N. ; Noé , F. Protein conformational plasticity and complex ligand-binding kinetics explored by atomistic simulations and Markov models . Nature communications 2015 , 6 , 7653 . OpenUrl CrossRef PubMed (3). ↵ Bowman , G. R. ; Pande , V. S. Protein folded states are kinetic hubs . Proceedings of the National Academy of Sciences 2010 , 107 , 10890 – 10895 . OpenUrl Abstract / FREE Full Text (4). ↵ Paul , F. ; Wehmeyer , C. ; Abualrous , E. T. ; Wu , H. ; Crabtree , M. D. ; Schöneberg , J. ; Clarke , J. ; Freund , C. ; Weikl , T. R. ; Noé , F. Protein-peptide association kinetics beyond the seconds timescale from atomistic simulations . Nature communications 2017 , 8 , 1095 . OpenUrl CrossRef PubMed (5). ↵ Tiwary , P. ; Mondal , J. ; Berne , B. J. How and when does an anticancer drug leave its binding site? Science advances 2017 , 3 , e1700014 . OpenUrl FREE Full Text (6). ↵ Yuan , Y. ; Deng , J. ; Cui , Q. Molecular dynamics simulations establish the molecular basis for the broad allostery hotspot distributions in the tetracycline repressor . Journal of the American Chemical Society 2022 , 144 , 10870 – 10887 . OpenUrl CrossRef PubMed (7). ↵ Sherman , W. ; Day , T. ; Jacobson , M. P. ; Friesner , R. A. ; Farid , R. Novel procedure for modeling ligand/receptor induced fit effects . Journal of medicinal chemistry 2006 , 49 , 534 – 553 . OpenUrl CrossRef PubMed Web of Science (8). ↵ Velliangiri , S. ; Alagumuthukrishnan , S. , et al. A review of dimensionality reduction techniques for efficient computation . Procedia Computer Science 2019 , 165 , 104 – 111 . OpenUrl CrossRef (9). ↵ Abdi , H. ; Williams , L. J. Principal component analysis . Wiley interdisciplinary reviews: computational statistics 2010 , 2 , 433 – 459 . OpenUrl CrossRef (10). ↵ Molgedey , L. ; Schuster , H. G. Separation of a mixture of independent signals using time delayed correlations . Physical review letters 1994 , 72 , 3634 . OpenUrl CrossRef PubMed Web of Science (11). ↵ Naritomi , Y. ; Fuchigami , S. Slow dynamics of a protein backbone in molecular dynamics simulation revealed by time-structure based independent component analysis . The Journal of Chemical Physics 2013 , 139 . (12). ↵ Hinton , G. E. ; Roweis , S. Stochastic neighbor embedding . Advances in neural information processing systems 2002 , 15 . (13). ↵ Van der Maaten , L. ; Hinton , G. Visualizing data using t-SNE . Journal of machine learning research 2008 , 9 . (14). ↵ McInnes , L. ; Healy , J. ; Melville , J. Umap: Uniform manifold approximation and projection for dimension reduction . arXiv preprint arxiv: 1802.03426 2018 , (15). ↵ Rumelhart , D. E. ; Hinton , G. E. ; Williams , R. J. Learning internal representations by error propagation, parallel distributed processing, explorations in the microstructure of cognition, ed. de rumelhart and j. mcclelland. vol. 1. 1986 . Biometrika 1986 , 71 , 6 . OpenUrl (16). Bengio , S. ; Bengio , Y. Taking on the curse of dimensionality in joint distributions using neural networks . IEEE Transactions on Neural Networks 2000 , 11 , 550 – 557 . OpenUrl CrossRef PubMed (17). ↵ Adhikari , S. ; Mondal , J. Machine learning subtle conformational change due to phosphorylation in intrinsically disordered proteins . The Journal of Physical Chemistry B 2023 , 127 , 9433 – 9449 . OpenUrl CrossRef PubMed (18). ↵ Kingma , D. P. Auto-encoding variational bayes . arXiv preprint arxiv: 1312.6114 2013 , (19). ↵ Rezende , D. J. ; Mohamed , S. ; Wierstra , D. Stochastic backpropagation and approximate inference in deep generative models . International conference on machine learning . 2014 ; pp 1278 – 1286 . (20). ↵ Menon , S. ; Adhikari , S. ; Mondal , J. An Integrated Machine Learning Approach Delineates an Entropic Expansion Mechanism for the Binding of a Small Molecule to α-Synuclein . eLife 2024 , 13 . (21). ↵ Goodfellow , I. Deep learning . 2016 . (22). ↵ Burgess , C. P. ; Higgins , I. ; Pal , A. ; Matthey , L. ; Watters , N. ; Desjardins , G. ; Lerchner , A. Understanding disentangling in beta-VAE . arXiv preprint arxiv: 1804.03599 2018 , (23). ↵ Alemi , A. ; Poole , B. ; Fischer , I. ; Dillon , J. ; Saurous , R. A. ; Murphy , K. Fixing a broken ELBO . International conference on machine learning . 2018 ; pp 159 – 168 . (24). ↵ Higgins , I. ; Amos , D. ; Pfau , D. ; Racaniere , S. ; Matthey , L. ; Rezende , D. ; Lerchner , A. Towards a definition of disentangled representations . arXiv preprint arxiv: 1812.02230 2018 , (25). ↵ McGibbon , R. T. ; Pande , V. S. Variational cross-validation of slow dynamical modes in molecular kinetics . The Journal of chemical physics 2015 , 142 . (26). ↵ Wu , H. ; Noé , F. Variational approach for learning Markov processes from time series data . Journal of Nonlinear Science 2020 , 30 , 23 – 66 . OpenUrl CrossRef (27). ↵ Yang , Z. ; Hu , Z. ; Salakhutdinov , R. ; Berg-Kirkpatrick , T. Improved variational autoencoders for text modeling using dilated convolutions . International conference on machine learning . 2017 ; pp 3881 – 3890 . (28). Dieng , A. B. ; Kim , Y. ; Rush , A. M. ; Blei , D. M. Avoiding latent variable collapse with generative skip models . The 22nd International Conference on Artificial Intelligence and Statistics . 2019 ; pp 2397 – 2405 . (29). Zhao , T. ; Zhao , R. ; Eskenazi , M. Learning discourse-level diversity for neural dialog models using conditional variational autoencoders . arXiv preprint arxiv: 1703.10960 2017 , (30). ↵ Kim , H. ; Mnih , A. Disentangling by factorising . International conference on machine learning . 2018 ; pp 2649 – 2658 . (31). ↵ Hinton , G. E. ; Krizhevsky , A. ; Wang , S. D. Transforming auto-encoders . Artificial Neural Networks and Machine Learning–ICANN 2011: 21st International Conference on Artificial Neural Networks, Espoo, Finland, June 14-17, 2011, Proceedings, Part I 21 . 2011 ; pp 44 – 51 . (32). Goroshin , R. ; Mathieu , M. F. ; LeCun , Y. Learning to linearize under uncertainty . Advances in neural information processing systems 2015 , 28 . (33). Karaletsos , T. ; Belongie , S. ; Rätsch , G. Bayesian representation learning with oracle constraints . arXiv preprint arxiv: 1506.05011 2015 , (34). Rippel , O. ; Adams , R. P. High-dimensional probability estimation with deep density models . arXiv preprint arxiv: 1302.5125 2013 , (35). Reed , S. ; Sohn , K. ; Zhang , Y. ; Lee , H. Learning to disentangle factors of variation with manifold interaction . International conference on machine learning . 2014 ; pp 1431 – 1439 . (36). Zhu , Z. ; Luo , P. ; Wang , X. ; Tang , X. Multi-view perceptron: a deep model for learning face identity and view representations . Advances in neural information processing systems 2014 , 27 . (37). Yang , J. ; Reed , S. E. ; Yang , M.-H. ; Lee , H. Weakly-supervised disentangling with recurrent transformations for 3d view synthesis . Advances in neural information processing systems 2015 , 28 . (38). Kulkarni , T. D. ; Whitney , W. F. ; Kohli , P. ; Tenenbaum , J. Deep convolutional inverse graphics network . Advances in neural information processing systems 2015 , 28 . (39). ↵ Cheung , B. ; Livezey , J. A. ; Bansal , A. K. ; Olshausen , B. A. Discovering hidden factors of variation in deep networks . arXiv preprint arxiv: 1412.6583 2014 , (40). ↵ Bowman , S. R. ; Vilnis , L. ; Vinyals , O. ; Dai , A. M. ; Jozefowicz , R. ; Bengio , S. Generating sentences from a continuous space . arXiv preprint arxiv: 1511.06349 2015 , (41). ↵ Fu , H. ; Li , C. ; Liu , X. ; Gao , J. ; Celikyilmaz , A. ; Carin , L. Cyclical annealing schedule: A simple approach to mitigating kl vanishing . arXiv preprint arxiv: 1903.10145 2019 , (42). ↵ Rezende , D. J. ; Viola , F. Taming vaes . arXiv preprint arxiv: 1810.00597 2018 , (43). ↵ Lucas , J. ; Tucker , G. ; Grosse , R. B. ; Norouzi , M. Don’t blame the elbo! a linear vae perspective on posterior collapse . Advances in Neural Information Processing Systems 2019 , 32 . (44). ↵ Zhou , R. Trp-cage: folding free energy landscape in explicit water . Proceedings of the National Academy of Sciences 2003 , 100 , 13280 – 13285 . OpenUrl Abstract / FREE Full Text (45). ↵ Goedert , M. Alpha-synuclein and neurodegenerative diseases . Nature Reviews Neuroscience 2001 , 2 , 492 – 501 . OpenUrl CrossRef PubMed Web of Science (46). ↵ Butler , B. ; Sambo , D. ; Khoshbouei , H. Alpha-synuclein modulates dopamine neurotransmission . Journal of chemical neuroanatomy 2017 , 83 , 41 – 49 . OpenUrl PubMed (47). ↵ Juraszek , J. ; Bolhuis , P. G. Sampling the multiple folding mechanisms of Trp-cage in explicit solvent . Proceedings of the National Academy of Sciences 2006 , 103 , 15859 – 15864 . OpenUrl Abstract / FREE Full Text (48). ↵ Higgins , I. ; Matthey , L. ; Pal , A. ; Burgess , C. P. ; Glorot , X. ; Botvinick , M. M. ; Mohamed , S. ; Lerchner , A. beta-vae: Learning basic visual concepts with a constrained variational framework . ICLR (Poster) 2017 , 3 . (49). ↵ Makhzani , A. ; Shlens , J. ; Jaitly , N. ; Goodfellow , I. ; Frey , B. Adversarial Autoencoders . 2016 ; https://arxiv.org/abs/1511.05644 . (50). Chen , R. T. ; Li , X. ; Grosse , R. B. ; Duvenaud , D. K. Isolating sources of disentanglement in variational autoencoders . Advances in neural information processing systems 2018 , 31 . (51). Hoffman , M. D. ; Johnson , M. J. Elbo surgery: yet another way to carve up the variational evidence lower bound . Workshop in Advances in Approximate Bayesian Inference, NIPS . 2016 . (52). ↵ Li , C. ; Liu , H. ; Chen , C. ; Pu , Y. ; Chen , L. ; Henao , R. ; Carin , L. Alice: Towards understanding adversarial learning for joint distribution matching . Advances in neural information processing systems 2017 , 30 . (53). ↵ Mansoor , S. ; Baek , M. ; Park , H. ; Lee , G. R. ; Baker , D. Protein Ensemble Generation through Variational Autoencoder Latent Space Sampling . Journal of Chemical Theory and Computation 2024 , 20 , 2689 – 2695 . OpenUrl CrossRef (54). ↵ Luo , Z. ; Wang , R. ; Sun , Y. ; Liu , J. ; Chen , Z. ; Zhang , Y.-J. Interpretable feature extraction and dimensionality reduction in ESM2 for protein localization prediction . Briefings in Bioinformatics 2024 , 25 , bbad534 . OpenUrl CrossRef PubMed (55). ↵ Boattini , E. ; Marín-Aguilar , S. ; Mitra , S. ; Foffi , G. ; Smallenburg , F. ; Filion , L. Autonomously revealing hidden local structures in supercooled liquids . Nature communications 2020 , 11 , 5479 . OpenUrl CrossRef PubMed (56). ↵ Fraccaro , M. ; Sønderby , S. K. ; Paquet , U. ; Winther , O. Sequential neural models with stochastic layers . Advances in neural information processing systems 2016 , 29 . (57). ↵ Alias Parth Goyal , A.G. ; Sordoni , A. ; Côté , M.-A. ; Ke , N. R. ; Bengio , Y. Z-forcing: Training stochastic recurrent networks . Advances in neural information processing systems 2017 , 30 . (58). ↵ Lai , G. ; Li , B. ; Zheng , G. ; Yang , Y. Stochastic wavenet: A generative latent variable model for sequential data . arXiv preprint arxiv: 1806.06116 2018 , (59). ↵ Robustelli , P. ; Piana , S. ; Shaw , D. E. Developing a molecular dynamics force field for both folded and disordered protein states . Proceedings of the National Academy of Sciences 2018 , 115 , E4758 – E4766 . OpenUrl Abstract / FREE Full Text (60). ↵ Shaw , D. E. ; Dror , R. O. ; Salmon , J. K. ; Grossman , J. ; Mackenzie , K. M. ; Bank , J. A. ; Young , C. ; Deneroff , M. M. ; Batson , B. ; Bowers , K. J. , et al. Millisecond-scale molecular dynamics simulations on Anton . Proceedings of the conference on high performance computing networking, storage and analysis . 2009 ; pp 1 – 11 . View the discussion thread. Back to top Previous Next Posted January 13, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Elucidating Protein Dynamics through the Optimal Annealing of Variational Autoencoders Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Elucidating Protein Dynamics through the Optimal Annealing of Variational Autoencoders Subinoy Adhikari , Jagannath Mondal bioRxiv 2025.01.08.632051; doi: https://doi.org/10.1101/2025.01.08.632051 Share This Article: Copy Citation Tools Elucidating Protein Dynamics through the Optimal Annealing of Variational Autoencoders Subinoy Adhikari , Jagannath Mondal bioRxiv 2025.01.08.632051; doi: https://doi.org/10.1101/2025.01.08.632051 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Biophysics Subject Areas All Articles Animal Behavior and Cognition (7622) Biochemistry (17650) Bioengineering (13871) Bioinformatics (41881) Biophysics (21424) Cancer Biology (18566) Cell Biology (25461) Clinical Trials (138) Developmental Biology (13365) Ecology (19866) Epidemiology (2067) Evolutionary Biology (24290) Genetics (15590) Genomics (22476) Immunology (17713) Microbiology (40331) Molecular Biology (17148) Neuroscience (88473) Paleontology (666) Pathology (2827) Pharmacology and Toxicology (4816) Physiology (7635) Plant Biology (15114) Scientific Communication and Education (2044) Synthetic Biology (4286) Systems Biology (9815) Zoology (2268)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-24T02:00:01.246996+00:00

License: CC-BY-NC-4.0