Full text
73,164 characters
· extracted from
preprint-html
· click to expand
Harnessing protein-folding algorithms to drug intrinsically disordered epitopes | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Harnessing protein-folding algorithms to drug intrinsically disordered epitopes View ORCID Profile Jakub Lála , View ORCID Profile Stefano Angioletti-Uberti doi: https://doi.org/10.1101/2025.11.11.687846 Jakub Lála 1 Department of Materials, Imperial College London , London, UK 2 Thomas Young Centre, Imperial College London , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jakub Lála Stefano Angioletti-Uberti 1 Department of Materials, Imperial College London , London, UK 2 Thomas Young Centre, Imperial College London , London, UK 3 Nanograb Ltd, W105JJ London, UK 4 Aminoanalytica Ltd , MK45 1DL Bedford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Stefano Angioletti-Uberti For correspondence: sangiole{at}imperial.ac.uk Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Due to their lack of a specific structure and dynamical nature, targeting of epitopes that are part of an intrinsically disordered region of a protein is a notoriously difficult task. Here, we describe a computational approach to overcome this problem, based on the use of a protein-folding algorithm and its confidence metrics within a Monte Carlo optimization pipeline to generate peptide-based binders. For different protein targets, we show by accurate free energy calculations that our approach is able to design peptides with binding free energies on the order of tens of k B T , i.e., with strengths comparable to covalent interactions. Direct observation of the bound complex through molecular simulations shows that the targeted epitope folds into structured domains with lowered thermal fluctuations upon binding, while remaining unstructured and dynamic in the unbound state, suggesting that the protein-folding algorithm must have learned the principles of induced (co-)folding. Given the ubiquitous presence of unstructured regions in proteins, our results suggest a potential pathway to design drugs targeting a large variety of previously untargetable epitopes, and open new possibilities for therapeutic intervention in diseases where disordered proteins play a key role. SIGNIFICANCE Small-molecule drugs that bind to a protein via a lock-and-key mechanism require the targeted epitope to form a well-structured, stable binding pocket, thereby preventing binding to unstructured regions. To overcome this limit, we present a general approach, based on a protein-folding algorithm, to find peptide sequences that induce the formation of such a binding interface when no pocket is normally present. In other words, we show how a protein-folding algorithm can be used to program epitope recognition by induced folding instead of rigid lock-and-key matching. In this way, we show that we can extend druggable epitopes to intrinsically disordered, dynamical regions of proteins. II. INTRODUCTION Intrinsically disordered regions (IDRs) of proteins are regions that lack a stable secondary structure, e.g., an alpha-helix or a beta-sheet. These regions are highly dynamic and participate in transient but specific interactions with other biomolecules, making them essential for biological functions. In fact, despite (or, maybe, because of) this characteristic, IDRs play critical roles in cellular processes, including cell signaling, regulation, and molecular recognition. 1 – 3 Recently, they have also been implicated in liquid-liquid phase transitions that are proving critical to understanding how cells respond to stressors. 4 , 5 Compared to well-structured regions, which are amenable to many of the tools developed by structural biologists over the past decades, the dynamical nature of IDRs makes it difficult to characterize them, and thus to understand their behavior, complicating the targeting of these regions with small-molecule drugs. One of the central challenges is that the absence of a stable structure hinders the application of one of drug design’s most successful paradigms: the lock-and-key model. As a result, traditional drug design strategies such as docking, 6 which rely on well-defined binding pockets and stable secondary structures, are intrinsically inadequate for IDRs. For this reason, epitopes within these regions have often been defined as undruggable. 7 RNA is also a highly flexible molecule, and, for similar arguments, designing drugs to target this nucleic acid has proven a hard problem. It has been shown, however, that for targeting RNA, some drugs achieve strong and selective binding by stabilizing a specific configuration. 8 , 9 This mechanism suggests, at least in principle, a potential pathway toward designing binders for proteins’ IDRs. Considering the practically infinite design space available, finding a generic molecule that stabilizes a specific fold in a protein might seem a prohibitively large task. To simplify the problem, we restrict ourselves to protein-based binders. By doing so, we aim to exploit a known mechanism, induced (co-)folding, whereby one or both otherwise unstructured proteins undergo a coordinated transition into a structured conformation upon binding. This mechanism has long been invoked as a way to mediate protein-protein interactions. 10 , 11 In fact, it has already been suggested that signaling through interacting IDRs exploits induced folding to achieve selective binding. 2 Whether or not two unstructured chains of amino acids can co-fold upon finding each other will depend on the formation of structural patterns, e.g., hydrogen bonds or patches of opposite charge, that can stabilize such a folded structure, and lead to attractive interactions between the two molecules. We notice that the same principle equally applies when considering the interaction between two distant parts of the same protein chain. Based on this premise, we hypothesize that to understand how proteins fold into their native structures, protein-folding algorithms must have also learned about induced folding. If this hypothesis is correct, one should be able to use such algorithms to design binders by looking for protein sequences that induce folding of a target IDR. A. Binder design strategy and system description For a binder consisting of N canonical amino acids, there are 20 N potential candidate sequences to consider, making a random search computationally intractable for all but the shortest sequences. Instead, here we use Monte Carlo optimization as a strategy for the directed exploration of the binder sequence space, driven by the minimization of an energy function that, crucially, contains as a key ingredient the predicted Local Distance Difference Test (pLDDT) 12 , 13 of the targeted IDR, a quantity returned by modern protein-folding algorithms. This metric is central to our purpose because sequence-to-structure models, such as AlphaFold2 13 and ESMFold, 14 have been shown to indirectly predict the dynamical, disordered nature of a residue’s local environment through the pLDDT. 15 , 16 The reason for this correlation is readily explained. The pLDDT was originally designed to convey the algorithm’s confidence in the prediction of a local environment around a given residue. However, one can never truly decouple two qualitatively different cases: low pLDDT values can either be the result of a truly inaccurate prediction, or, alternatively, indicate that the algorithm cannot associate a specific structure to such a region, exactly because no dominant one is present. The latter behavior is exactly what is expected for a dynamic region, i.e., an IDR. We exploit this observation to design protein sequences that can bind to a target IDR through induced folding in the following way (details in the Methods section), schematically represented in Figure 1 . We start from a random amino acid sequence for our binder and make single point mutations. We refold the protein in the presence of the mutated binder, and evaluate an energy that depends on the structure associated with the new sequence. This energy contains a term that increases if the proposed mutation decreases the pLDDT of the target IDR, and is used within a Metropolis acceptance criterion to either accept or reject such mutation. In this way, in practice, we favor mutations that increase the pLDDT of the target IDR. We iterate this algorithm until convergence, arriving at the final designed sequence. As a proof of principle, we apply the protocol to four different interesting targets: α-synuclein (ASYN), CD28, p53 (P53) and one of the small ubiquitin-like modifiers (SUMO). Possibly due to the lack of clear binding pockets, these proteins have been difficult to target using small-molecule drugs. Here, we selected these proteins due to the presence of an IDR in their structure, which serves as the binding epitope in our design protocol, and because they regulate biological processes whose disruption has clear downstream pathological consequences, making them compelling therapeutic targets. For example, ASYN has been shown to aggregate into toxic species that are implicated in the pathogenesis of Parkinson’s disease. 17 CD28 can provide a co-stimulatory signal necessary for T-cell activation and survival, making it relevant in modulating immune responses in various diseases. 18 The tumor suppressor protein (P53) can have its function restored by inactivating its interaction with negative regulators like MDM2, prevalent in many cancers, resulting in the inhibition of tumor growth. 19 Lastly, SUMO proteins are an attractive drug target as SUMO’s modification of various substrates is involved in key cellular processes, while dysregulation of SUMOylation has been linked to diseases including cancer. 20 Download figure Open in new tab Figure 1. Schematic summary of our approach, showcasing inducing secondary structure and high model confidence upon the introduction of a binder (red-to-white-to-blue color represents low-to-high pLDDT values per residue). A protein target in its apo (unbound) form on the left has a disordered, unfolded region (white-to-red color). In the holo (bound) form on the right, upon interacting with a designed sequence (magenta), the previously unstructured region is induced to fold. The change in structuring is measured by a jump in the local pLDDT of the target region. A (positive) change in this local pLDDT is the main driving force during our Monte Carlo optimization in search of a candidate binder sequence. Figure 2 compares the experimentally determined structures of the targets from the Protein Data Bank 21 with the structures predicted by ESMFold for the isolated proteins. With the exception of ASYN, which is fully disordered 22 and exhibits a high C α Root Mean Square Deviation (RMSD) of 18.2Å and a low Template Modeling Score (TM-score) 23 of 0.210, ESMFold accurately reconstructs the structured, non-IDR regions of CD28, P53, and SUMO, yielding low RMSDs (2.06, 2.12, and 0.683Å) and high TM-scores (0.762, 0.917, and 0.719), respectively. Aside from ASYN, the pLDDT is consistently high for the ordered regions (pLDDT-OR in Fig. 2 ), while being notably lower for their respective IDRs (pLDDT-IDR), reflecting reduced model confidence in the disordered parts. Importantly, for CD28, P53, and SUMO, the IDRs are absent in the experimental structures due to crystallographic challenges in resolving flexible regions. The selected targets exhibit varying degrees of disorder: ASYN lacks stable tertiary structure but exhibits transient secondary-structure propensity; 24 CD28 contains a well-defined IDR; P53 has two disordered regions, both of which are selected as epitope targets; and SUMO contains two IDRs, of which we target only one. Molecular dynamics simulations further support the disordered nature of these regions, showing reduced intrinsic secondary structure content, while exhibiting elevated thermal fluctuations (see the Supplementary Information). Together, these observations strongly suggest that low model confidence (low pLDDT) for these systems correlates with intrinsic disorder, rather than reflecting the protein-folding algorithm’s failure to predict a well-structured region. Download figure Open in new tab Figure 2. Comparison of experimental (top row, orange) and predicted (bottom row) structures by ESMFold for the four targets selected in this study. Predicted structures are colored per residue by pLDDT, using a red-to-white-to-blue gradient corresponding to values from 0 to 1. We report the average pLDDT over the ordered, rigid regions (pLDDT-OR) and the intrinsically disordered regions (pLDDT-IDR). Dashed teal boxes highlight the IDRs of interest (we omit SUMO’s second IDR for clarity). ESMFold accurately reproduces the apo form for all proteins except the fully disordered ASYN. IDRs forming unstructured coils are either unresolved in experimental structures (CD28, P53, SUMO), aligned well with experiment (P53), or poorly correlated (ASYN). For P53, we consider both termini (IDR1, IDR2) for epitope targeting. Although ESMFold predicts IDR1 with moderately high pLDDT, its coil-like geometry and enhanced thermal fluctuations (see the Supplementary Information) indicate a disordered character. On average, pLDDT-IDR is lower than pLDDT-OR (excluding ASYN), supporting the interpretation that ESMFold’s low confidence reflects intrinsic disorder rather than poor predictive accuracy. These disordered regions are subsequently used as epitope targets. B. ESMFold associates co-folding with an increase in pLDDT Using our Monte Carlo optimization, we designed 11 binders across the four different targets previously described. Figure 3 presents the predicted structure and the per-residue pLDDT for targets in an isolated (unbound) form and in the presence of the binders designed using our protocol, highlighting the change in pLDDT upon binding. Both in the structure and in the pLDDT histogram, the region of the protein presented in teal corresponds to the region targeted during the design procedure. It is clear that, in general, upon binding the confidence level in the structural prediction increases for the targeted region. Apart from P53-1, where an increase in confidence level does not change the character of the secondary structure of the IDR, which remains a random coil, in all other targets presented here the change in pLDDT is also associated with a change in secondary structure, with the partial formation of alpha-helices or beta-sheets in the targeted region. While our procedure allows us to find binders for all targets, they also show different behaviors. The low pLDDT of ASYN, suggesting no stable tertiary structure despite some secondary structure propensity, is significantly increased in the presence of the binder, potentially correlating with an increased likelihood of obtaining a single dominant tertiary structure. In the case of CD28 and SUMO instead, the positive change in pLDDT is very much confined to the targeted IDR only (residues Gly121–Arg140 for CD28 and residues Gly1–Gly21 for SUMO). Finally, P53 represents yet another different situation, where the binder successfully induces a higher pLDDT region in residues Pro1–Val5, part of the targeted region, but at the same time decreases the pLDDT score in another, separate IDR (residues His200–His226). While this effect shows that our binders only induce structuring in their putative target region, it is unclear whether the change to lower values in other regions is a genuine physical mechanism of induced disorder (which we do not investigate further here), or from limitations in the pLDDT metric itself. As previously discussed, pLDDT can conflate model confidence with structural order. The presence of the binder might require ESMFold to attempt to predict complexes from sequence space farther away from its training data, simply lowering its confidence, particularly in regions that the model was already relatively less confident in even in the isolated state (as in the case of P53). Before we continue, we notice that while for ease of visualization we report here the results for a single binder per protein only, these sequences were chosen purely as representative examples. The very same trends are observed across multiple designs (see the Supplementary Information), confirming the robustness of our arguments and the design protocol. There we also show that for P53, we designed P53-E that targets the other IDR epitope, highlighting the flexibility of our method. Download figure Open in new tab Figure 3. Four examples of peptide binder (magenta) designs. The targeted IDRs are colored in teal. The bottom bar plots for each structure show the pLDDT as an average over 5 contiguous residues (blue for the whole structure except the targeted IDR, in teal). Remaining designs are provided in Figure 7 in the Supplementary Information. C. Molecular simulations confirm high-affinity binding to the disordered regions In Figure 4 , we show the free energy profile as a function of distance from the binding site for all our different binders, together with the estimated binding affinity Δ G . We computed these free energy profiles using coarse-grained molecular dynamics (MD) in combination with umbrella sampling and replica exchange, see Methods for details. Compared to experimental techniques, molecular simulations provide us with direct mechanistic insights about the binding modality. For example, besides measuring binding affinity, we also obtain a clear microscopic picture of the binder-epitope interface. We use as a simple yet informative collective variable (CV) to describe the binding reaction: the distance r between the center of mass (COM) of the binder and that of the rigid , structured region of the target. This region is defined by first considering all close contacts of the binder to the target, and then discarding all the ones that involve the residues on the IDR. Fig. 4 shows clear deep minima at low r , of tens of k B T , for all target-binder systems. The variability in the width of this minimum across the different target-binder pairs can be explained by the fact that the epitope is part of an IDR, whose flexibility allows it to move away from the COM of the rigid part of the protein, while still being bound to the binder. This effect can then be different for each target-binder pair. In Figure 5 , we display representative snapshots from the MD trajectories, clearly showing the unbinding process at larger CV values and supporting our interpretation of the shape of the free energy profiles. Moreover, we provide the reweighted thermal fluctuations (RMSF) analysis as a function of the distance in Figure 8 in the Supplementary Information, showing reduced RMSF values for the IDR in the bound region of the CV space. Download figure Open in new tab Figure 4. A–D) Free energy profiles along the distance r , computed using umbrella sampling with replica exchange. The dashed vertical lines represent the cutoff r c . E) Definition of the collective variable used in the free energy calculations. We measure the free energy as a function of the distance r between the centers of mass – depicted by the spheres – of the binder (lemon) and non-IDR part of the target (orange). F) Binding affinities (Δ G ) for each system computed from the free energy surface (FES) projections in A–D) using Eq. 2 , grouped in color by the protein target. Uncertainty is computed with block analysis, 25 i.e., standard deviation of Δ G across 20 blocks. Download figure Open in new tab Figure 5. Four examples of binder-target systems together with snapshots from the trajectories at specific restraint distances r t shown on the free energy surface (FES) as a black dot, with the visualization as an inset. At larger distances, the binder (magenta) is restrained away from the target (blue). The teal residues of the target show the disordered region. An important aspect to highlight is that the affinity we obtain is typically associated with very strong binders such as antigen-antibody pairs. We expect these simulations to reproduce experimental binding measurements, such as surface plasmon resonance (SPR) or isothermal titration calorimetry (ITC), only within a few k B T , i.e., two or even three orders of magnitude in the dissociation constant K D . While various approximations in the coarse-grained description (e.g., the use of an implicit solvent) might have over-stabilized binding, the affinities we observe are large enough that even an over-estimation of many k B T would still strongly suggest that our sequences represent true, high-affinity binders. Additionally, it should be pointed out that MD simulations (more precisely, free energy calculations using MD), based on a description of the physico-chemical forces in the system, represent an independent, orthogonal approach compared to a deep learning-based protein-folding algorithm, which indirectly extracts information from existing databases of sequences and experimental structures. That two independent methods, i.e., ESMFold and MD, predict binding of our designed sequences to the target epitope strengthens our belief that these results will stand experimental validation, which, however, remains outside the scope of this work. D. Discussion We have shown proof-of-concept of a design protocol for peptide binders targeting disordered regions on clinically important protein targets. Our protocol is based on the definition of an energy function, which can be evaluated from the structure predicted by a protein-folding algorithm (here, ESMFold), and its minimization as a function of the binder sequence, using a Monte Carlo approach. An important ingredient of such a function is the confidence metric associated with the intrinsically disordered region of the protein we target, whose increase we hypothesized is a signal for induced folding. Free energy calculations via molecular dynamics with a classical force field widely used for proteins confirm the bona fide nature of our designed binders, as well as strong binding specifically to their putative target epitope. In particular, our in silico validation shows promising binding affinities, providing good candidates that could be experimentally tested in a lab. In spirit, the approach we presented here is similar to what was recently shown by Baker and co-workers. 26 These authors first designed a template library of peptide fragments that recognize specific repeating amino acid patterns, and then stitched these fragments into small proteins that could recognize extended patterns of disordered sequences. These small proteins were shown to bind strongly and selectively to their targets, with low cross-reactivity, and various proof-of-principle downstream applications were provided, from cell lysate separation to inhibition of cancer-associated proteins. Similarly to our work, their work showed that targeting of IDR is possible, our approach differs mostly for its simplicity: instead of a multi-step protocol with different models and several human interventions between different steps, we rely on a single-step design purely driven by an energy function directly evaluated from the output of a single protein-folding algorithm. We also notice that the binders we generated with our protocol, of only 30–50 residues, are much smaller than those designed in the aforementioned publication. 26 The smaller size could provide translational advantages. First, it allows obtaining large quantities of such binders at high purity using solid-phase peptide synthesis techniques. Second, and perhaps more importantly, shorter lengths are associated with weaker interactions with other proteins and generally make these binders harder to recognize by the immune system, decreasing their immunogenic potential. Overall, having the ability to design small peptide binders on demand for disordered regions of proteins, previously considered untargetable via typical approaches, opens up a range of possibilities across the field of applied biotechnology and drug design. III. METHODS A. Binder-design protocol The practical implementation of our protocol builds on Monte Carlo optimization, as implemented using the open-source BAGEL package. 27 While an in-depth description of the protocol can be read in the associated paper, 27 we describe here the general idea for completeness. The starting point is the definition of an energy (loss) function that depends on structural constraints imposed on different parts of the protein-binder complex. In particular, let X ∈ χ denote a candidate folded structure (i.e., the 3D atomic coordinates of the complex) and Φ ∈ Φ the associated folding-confidence metrics – e.g., pLDDT, pTM, Predicted Aligned Error (PAE) – predicted by a structure prediction oracle. We then define the total energy E : χ × Φ → ℝ as a weighted sum of individual energy (loss) terms, each of which is a function of either the structure or its confidence metrics: Here, E i ( X ) captures geometric or inter-residue constraints acting directly on the coordinates X, E j (Φ) captures folding-confidence penalties or rewards based on the metrics Φ, and w i , w j control their relative importance respectively. In our implementation, we use a total of eight distinct terms (i.e., | ℐ | + | 𝒥 | = 8), whose different combinations produce various designs (detailed in the Supplementary Information). Besides terms previously reported for general protein design purposes, e.g., steering the design toward high confidence structures – high pLDDT, high pTM, low PAE – and soluble sequences (important to aid expression and experimental verification), our loss function contains additional custom terms that drive the designed binder to bind the selected IDR (rather than generically the protein), while also forcing the latter to fold upon binding. A detailed description of each term and associated weights w is reported in Table I in the Supplementary Information. View this table: View inline View popup Download powerpoint Table I. Energy terms of the protein design guided optimization of a binder targeting an IDR. First set of energy terms relates to structure-related properties E i ( X ), while the second set shows the folding-metric associated terms E j (Φ). For detailed implementation, refer to the BAGEL package and technical report. 27 Once the energy function is defined, we search for the binder sequence by minimizing such energy through an adaptation of a Monte Carlo tempering algorithm. Even for short amino acid sequences, the number of potential peptide candidates is immense. Whereas this large selection increases the possibility that at least one candidate with the right properties exists, it is also an impossibly large space to search without any type of guidance. We solve this issue by using the energy function to drive the search process by a slightly modified version of a classical Monte Carlo algorithm called simulated tempering, 28 to help sample the sequence space in search of the optimal (global) solution. We start from a random sequence, and at each iteration generate a new potential candidate via a random point mutation, choosing one of the 20 canonical amino acids with equal probability, except for cysteine (because of problems associated with expression and thus related to an eventual experimental validation). A mutation is accepted or rejected based on the Metropolis criterion, and this protocol is iterated until a maximum number of steps is reached, on the order of 10 4 steps. If s ∈ 𝒮 is the current protein sequence, and s ′ is a candidate mutated sequence, the acceptance probability is given by where ( X , Φ) = ℱ ( s ) and ( X ′ , Φ ′ ) = ℱ ( s ′ ) are the folded structures and folding confidence metrics corresponding to sequences s and s ′ respectively. In Monte Carlo tempering, the system cycles between a low temperature ( T = 0.1) to promote exploitation and a high temperature ( T = 1.0) to encourage exploration, using the acceptance probability defined in Eq. 1 . Since we are interested in optimization rather than sampling of the energy landscape according to a Boltzmann distribution, instead of trying to swap between the two temperatures using an appropriate acceptance criterion to maintain detailed balance, we simply cycle between the low and high temperature regimes sequentially (usually every n low = 400 and n high = 100 steps, although the exact values are system-dependent). At the end of each high temperature regime, we also switch back to the system (i.e., the sequence) with the lowest energy sampled so far. In general, while in the low temperature regime the system efficiently explores its current local minimum, at high temperature, the system can jump between minima separated by larger energy barriers more easily, providing an efficient balance between local and global exploration. While other schemes could be used, such as parallel tempering, 29 a comprehensive benchmark is beyond the scope of this work. We report the values of the different energy terms for the 11 binders we designed in Table VI in the Supplementary Information. We notice that the final values obtained for the different energy terms obtained at the end of the optimization for our designs are consistent with high-quality binders, with interface PAE (iPAE) values around or below 0.2, (local) pLDDT scores around or above 0.75, and binder–epitope distances around or under 1.0 nm. B. Molecular simulations 1. Simulation setup and force field We describe the interactions with the Amber ff14SB force field 30 and an implicit water model. 31 To simulate the system, we used OpenMM 32 with PLUMED. 33 After fixing the final structures of the Monte Carlo optimization from ESMFold with pdbfixer , 32 we minimized the bound complex with L-BFGS in two phases: first, we restrained the non-hydrogen atoms and minimized the rest of the complex; second, we minimized all atoms together. We ran 10 ns of equilibration in the NVT ensemble, extracting the final structures as the starting point for our free energy calculations. Given we used an implicit solvent, we did not use periodic boundary conditions, while non-bonded interactions were cut off at a distance of 2 nm, with the solute dielectric set to 1.0 and the solvent dielectric to 78.5 (dimensionless relative permittivities). All simulations are performed with a timestep of 2 fs. We recognize that using an implicit solvent and cutoff electrostatics are suboptimal compared to describing the energetics of the system using an explicit water model with periodic boundary conditions and a proper treatment of long-range effects using schemes such as Particle Mesh Ewald. 34 However, we chose our simpler setup to at least ensure accurate statistical sampling and converged free energy profiles, which would instead be problematic with more computationally expensive (but still inaccurate) models. While we initially explored more accurate energetic descriptions, including explicit solvent simulations with periodic boundary conditions, the presence of highly flexible IDRs would have required prohibitively large simulation boxes - a limitation avoided by using an implicit solvent model. Although using an implicit solvent model could lead to an inaccuracy of several k B T in the calculated binding free energy, our aim is to qualitatively confirm the presence of a deep minimum associated with the bound state, and that this minimum is associated with a formation of a folded structure. Given the values of several tens of k B T we obtain for our minima, we believe these predictions to be qualitatively robust to changes in the simulation setup. 2. Free energy calculations and binding affinity estimation To validate our results, in particular, to verify that the designed sequences have appreciable binding to their target, we employed free energy calculations using a combination of umbrella sampling 35 and replica exchange. The former is employed to obtain the free energy as a function of the distance between the center of mass (COM) of the target and the binder, since its integral in the bound state can be used to gauge the strength of binding, and is formally related to the experimental binding/dissociation constants. 36 More precisely, one can estimate the free energy of binding Δ G for an observed one-dimensional CV free energy profile F obs ( r ) with the formula where , k B is the Boltzmann’s constant, ρ ° is the standard reference concentration, i.e., 1 mol L −1 , and r c is the cutoff distance defining the boundary between the bound and unbound states. is the effective estimate of F (∞), where is the mean of the free energy averaged between its values recorded between r c and max({ r t }). The boundary r c is defined as the distance above which the free energy F ( r ) remains constant, and thus is equal to its value at infinite distance, where the protein-binder pairs do not interact. We introduced the average to remove the effect of random fluctuations in the estimates of this plateau. A full derivation of the above formula and an in-depth discussion of the different terms is provided in the Supplementary Information. While doing umbrella sampling, we bias the system using as a collective variable (CV) the distance r between the COMs, x target and x binder , of groups of C α atoms for residues on the target and binder respectively. We defined the group on the target by finding the nearby residues close to the binder after the initial equilibration. We excluded the residues that form a part of the targeted IDR epitope, as otherwise the large fluctuations of the IDR made x target lie outside of the rigid domain of the target, leading to degeneracies in r . With this definition, the bias used during umbrella sampling is which forces the system to sample close to the region r ≈ r t , allowing one to obtain an accurate value for the local free energy. As customary in umbrella sampling, different simulations (windows) are repeated with biases differing in the value of r t , and then stitched together using the Weighted Histogram Analysis Method (WHAM) algorithm 37 to obtain the free energy profile. The exact values of all the parameters used in our simulations, including the values of the different bias centers { r t } or the bound region cutoffs r c , can be found in the Supplementary Information. To further improve sampling, especially of the other slow degrees of freedom involved in the binding transition, e.g., torsional angles describing the state of the IDR and the binder, we employed replica exchange for each window. In particular, for each value of the external bias used in umbrella sampling, we run four replicas in parallel using OpenMMTools. 38 The replicas have different temperatures, separated by 5 kelvin each, ranging from 310 K to 325 K. During the simulations, we attempt n 3 swaps between two replicas chosen randomly every 10 ps, where n is the number of replicas. Overall, after initial 10 ns of equilibration within the replica exchange setting, all systems were simulated for 30 ns for each (bias, replica temperature) combination, which amounts to about 10 µs of total simulation time per protein-binder pair. Details of acceptance rates and sampling convergence can be found in the Supplementary Information. C. Data, Materials, and Software Availability All data needed to evaluate the conclusions in the paper are present in the paper and/or the Supplementary Information. We used the open-source Python package BAGEL 27 available at https://github.com/softnanolab/bagel , with version v0.1.0 archived in the Zenodo repository. 39 The templates to obtain the binders can be found in the GitHub and Zenodo repositories in the folder scripts/technical-report/disordered/ . E. Author contributions S.A.-U. designed the research; J.L. and S.A.-U performed the research, analyzed the data and wrote the manuscript. F. Competing interests S.A.-U. is Chief Scientist at Aminoanalytica Ltd and at Nanograb Ltd and acts as a scientific consultant for both companies. The current study has not been financed by any of the aforementioned companies, nor do its results contain any IP related to these companies. SUPPLEMENTARY INFORMATION 1. Analysis of IDR secondary structure and fluctuations To compute the TM-score of the binder-target complex as predicted by ESMFold, we use the tmalign PyMOL implementation of the TM-Align package. 40 For RMSD calculation we use biotite , 41 and for Root Mean Square Fluctuations (RMSF) 42 and secondary structure assignment (through DSSP 43 ) we use MDAnalysis 44 . 45 Figure 6 shows the thermal fluctuations of the disordered regions of interest, highlighted in teal, including the RMSF and dominant secondary structure per residue. For the per-residue RMSF computation, we use the average C α positions aligned to the non-IDR parts only. For P53, we focus on two different IDRs to showcase the flexibility of our design protocol - one of which is clearly more disordered (with a high RMSF, on the right), while the other has a relatively moderate RMSF, but contains coil-like characteristics (on the left). Download figure Open in new tab Figure 6. Molecular dynamics of 500 ns of the four targets at 310 K in the apo form, using otherwise the same simulation parameters (force field, etc.) as the enhanced sampling simulations in the free energy calculations, but without the harmonic restraint, nor the replica exchange. IDRs (teal shade) of the protein have elevated C α RMSF and have generally greater coil-like nature in their secondary structure compared to the rest of the protein. 2. Details of Monte Carlo optimization Table I describes all energy terms employed in the protocol. Tables II to V then provide the specific energy terms, E i and E j , and their weights, w i and w j respectively, used for the different design protocols, including the details of the optimization. The exact weights need to be carefully considered and tuned to achieve the desired behavior. We have conceived of the terms and their weights through rational design and trial-and-error. Note that given the stochastic nature of the optimization, using these optimization parameters will not yield the exact binders as discussed here. For full description of these energy terms, consult the BAGEL package and its associated technical report. 27 View this table: View inline View popup Download powerpoint Table II. Peptide binders for ASYN, specifically P37840 (SYUA HUMAN). Hotspot residues are Leu100–Ala140. All binders have a length of 50 residues. View this table: View inline View popup Download powerpoint Table III. Peptide binders for CD28, specifically P10747 (CD28 HUMAN). Hotspot residues Gly121–Arg140. All binders have a length of 30 residues. 3. Further results for the optimal binders Table VI reports the most relevant metrics typically used to analyze protein-protein complexes and their binding interface, for the 11 final binders designed, in the bound state with their target. As observable from the weights of the different energy terms involved in the optimization, as reported in Tables II to V , the energy terms associated with iPAE, and the local pLDDT (over the binder and over the epitope) are the key metrics driving our minimization protocol. Finally, Figure 7 provides the remaining designs and pLDDT analysis (as in Figure 3 in the main text), for the binders not shown in the main body of the text. View this table: View inline View popup Download powerpoint Table IV. Peptide binders for P53, specifically P04637 (P53 HUMAN). P53-1 and P53-2 target epitope residues Pro1–Phe18, while P53-E targets the other disordered epitope at His205–His227. All binders have a length of 30 residues. View this table: View inline View popup Download powerpoint Table V. Peptide binders for SUMO, specifically P63165 (SUMO1 HUMAN). Epitope residues are Gly1–Gly21. All binders have a length of 30 residues. View this table: View inline View popup Download powerpoint Table VI. Metrics for selected energy (loss) terms in the IDR-guided design protocol. We report the actual values for these (unweighted) rather than the (weighted) energy term values. iPAE is normalized. Download figure Open in new tab Figure 7. All remaining peptide binder (magenta) designs, targeting the epitope (teal) of the target (blue). The bottom bar plots for each structure also show the pLDDT as an average over 5 contiguous residues. Download figure Open in new tab Figure 8. ℝoot Mean Square Fluctuations (RMSF) for all systems as a function of the CV distance. The CV space is divided into 10 equal-sized bins and reweighted according to the restraint bias. Each point here represents the mean RMSF across all residues of the IDR;, and the vertical bars show the standard deviation across those residues. 4. Simulation and analysis details for free energy calculations a. Collective variable We define the distance r ( x target , x binder ) between the centroids of two C α atom groups, X target and X binder . Here, x target and x binder denote the centroids of their respective groups. Each group comprises all residues whose C α atoms lie within 1.5 nm of any C α in the other group. To focus on ordered regions, any residues identified as part of an IDR – by visual inspection of the equilibrated structure – are excluded from X target . This was done in order to prevent artifacts introduced by large thermal fluctuations of the IDR, which would often lie outside of the rigid domain. Moreover, when x target and x binder overlap, there would be an inherent degeneracy, where different binding modes collapse into a single point on the free energy surface. b. Umbrella sampling Table VII provides the details of window placements for umbrella sampling. We chose these restraints based on a first of set of well-tempered metadynamics simulations 46 not included here, which suggested the width of the bound basin in our CV space. In the presumed bound region of the CV space, we place windows at a higher density (narrow) compared to the unbound region with a lower density (wide). Starting configurations of the simulations were retrieved by displacing the binder along the vector r pointing from x target to x binder to the respective values of r t for that specific window. Subsequent equilibration within the replica exchange setting ensured the initial conformation remains physical. If the binder displacement caused steric clashes, e.g., when displacing the binder closer to the target, the nearest valid non-sterically clashing starting configuration was used instead, where then the associated harmonic restraints alone are responsible for moving the binder to the local region of the CV space for that particular window within the equilibration phase. View this table: View inline View popup Download powerpoint Table VII. Umbrella sampling with replica exchange simulation windows. Narrow windows use κ narrow = 412.40 kJ mol −1 nm −2 with 0.125 nm spacing and wide windows use κ wide = 103.10 kJ mol −1 nm −2 with 0.250 nm spacing. The bound/unbound cutoff r c is defined from visual inspection of the plateau in the free energy profiles and is used to compute Δ G . c. Extracting the binding free energy Δ G from the free energy profiles: full derivation Here, we assume knowledge of textbook statistical mechanics and provide a semi-exhaustive derivation of Eq. 3 , specifically considering all the corrections necessary for the free energy profile in a one-dimensional collective variable of a molecular system of 3 N total coordinates, where N is the number of atoms in the system. Let r be the vector connecting the centers of mass of the binder and the rigid part of the target, i.e., connecting groups X target and X binder , with r = ∥ r ∥. The three–dimensional potential of mean force (PMF) F true ( r ) is defined by the probability distribution Notice that F true ( r ) is a free energy because it is itself the result of integrating over all the degrees of freedom of the protein and the binder, at a specific value of r . The standard–state binding free energy for a bimolecular association is: Rewriting the previous equation in spherical coordinates we have where we define Here, the integral is over the solid angle S 2 with a measure d Ω = sin θ dθ dϕ , where . In our simulations, we reconstruct a one–dimensional free energy profile F obs ( r ) with WHAM. This free energy differs from the radial PMF defined by Eq. 9 by the Jacobian of the change of variables r ≡ ( x, y, z ) → ( r, θ, ϕ ): with C being an arbitrary additive constant. We refer to this as the entropic contribution arising from a larger density of states at larger r . Substituting Eq. (10) into Eq. (8) gives To eliminate the non–observable constants, we can evaluate Eq. (10) at the unbound plateau r = r c , where F true ( r c ) = F true (∞). This yields Inserting Eq. (12) into Eq. (11) gives where we replace F obs ( r c ) by the mean value over the unbound plateau to reduce noise from fluctuations. Combining Eq. (13) with Δ G = − k B T ln( ρ ° Z bound ) reproduces Eqs. (2) and (3) in the main text. Remarks. (i) The Jacobian term 4 πr 2 ( Eq. 8 ) cancels out exactly against the entropic contributions ( Eq. 10 ), leaving the constant prefactor outside the integral. (ii) Any additive constant in F obs is immaterial; only differences relative to the unbound plateau matter. (iii) ρ ° Z bound is dimensionless: dr has units of length, of length 2 . Z bound has thus units of length 3 , while ρ ° (1 M) has units of length −3 , converting the “bound volume” ( Z bound ) to the standard state. We work in nanometers, hence ρ ° = 0.602214 nm −3 . d. Thermal fluctuations upon binding We also produce a reweighted histogram of the Root Mean Square Fluctuations (RMSF) of the targeted epitopes along the CV. In other words, we first bin all trajectory snapshots into 10 equal-width bins along the distance CV. Then, we align all snapshots within each bin using the Kabsch algorithm on the structured (non-IDR) residues, and compute the RMSF over the IDR residues only. Each snapshot n is assigned an importance weight where the weights are normalized such that ∑ n w n = 1. The per-residue RMSF is where x n,a are the C α position of the IDR amino acid (residue) a in snapshot n and µ a = ∑ n w n x n,a , i.e., the weighted mean position. For each bin, we report the RMSF taken as the mean and standard deviation across all the residues in the IDR. For all systems, this shows that by introducing the binder, i.e., at a low value of the CV, we reduce the thermal fluctuations of the IDR, and thus impose partial structuring. For some systems (P53-1, P53-2 or SUMO-1A), we can also see a wider distribution (i.e., greater standard deviation) of RMSF across the residues, potentially suggesting that near the transition, i.e., near r c , some parts of the IDR might fluctuate more than other parts, those likely still remaining bound to the peptide. We do not investigate this further. Lastly, additional analysis not displayed here did not show any significant increase in the secondary structure content imposed due to binding at low CV distances, such as alpha-helices or beta-sheets in the epitope. e. Detailed results for the numerical convergence of the FES calculations For each system, in Figures 9 to 19 we provide detailed plots describing the statistical sampling achieved in our simulations, as well as the convergence of the binding affinity Δ G . When plotting the FES, we only show the region below the plateau on the left-hand side, i.e., we crop high (positive) energies at low distances. The uncertainty of the FES (shown in the shaded region) is computed at each bin of the histogram by taking the standard deviation of the free energy surfaces retrieved from block analysis 25 with 20 blocks. We checked, not shown here, that the Δ G estimate is invariant to the number of blocks, from 2 to 20 blocks. We show the convergence of Δ G as a function of simulation time. To demonstrate that our umbrella sampling simulations can properly reconstruct the FES, we compute the overlap of neighboring windows as: where p i ( j ) and p i +1 ( j ) are the normalized histogram probabilities of window i and i + 1 in bin j , and N b is the total number of histogram bins. We perform all binning at 100 bins, both here and during FES reconstruction in WHAM. Last, we also report the transition probabilities between replicas with a reasonable 20% acceptance rate between neighboring replicas. Download figure Open in new tab Figure 9. Free energy calculation details for ASYN-A. Download figure Open in new tab Figure 10. Free energy calculation details for ASYN-G. Download figure Open in new tab Figure 11. Free energy calculation details for CD28-A. Download figure Open in new tab Figure 12. Free energy calculation details for CD28-B. Download figure Open in new tab Figure 13. Free energy calculation details for CD28-G. Download figure Open in new tab Figure 14. Free energy calculation details for CD28-P. Download figure Open in new tab Figure 15. Free energy calculation details for P53-1. Download figure Open in new tab Figure 16. Free energy calculation details for P53-2. Download figure Open in new tab Figure 17. Free energy calculation details for P53-E. Download figure Open in new tab Figure 18. Free energy calculation details for SUMO-1A. Download figure Open in new tab Figure 19. Free energy calculation details for SUMO-1C. Acknowledgments We thank Dr Daniele Visco for insightful discussions about the problem and implementation of some of the simulation techniques used. J.L. acknowledges the President’s PhD scholarship at Imperial College London for funding. We acknowledge computational resources and support provided by the Imperial College Research Computing Service ( http://doi.org/10.14469/hpc/2232 ). We are grateful to the UK Materials and Molecular Modelling Hub for computational resources, which is partially funded by EPSRC (EP/T022213/1, EP/W032260/1 and EP/P020194/1). This research was also supported by grants from NVIDIA and utilized NVIDIA’s GPU access through the Academic Grant for Simulation and Modeling. Funder Information Declared Imperial College London, https://ror.org/041kmwe10 , President’s PhD Scholarship EPSRC , EP/T022213/1 , EP/W032260/1 , EP/P020194/1 NVIDIA , Academic Grant for Simulation and Modeling Footnotes https://github.com/softnanolab/bagel https://doi.org/10.5281/zenodo.15808839 REFERENCES ↵ M Madan Babu , Robin van der Lee , Natalia Sanchez de Groot , and Jörg Gsponer . Intrinsically disordered proteins: regulation and disease . Current Opinion in Structural Biology , 21 ( 3 ): 432 – 440 , June 2011 . ISSN 0959-440X . doi: 10.1016/j.sbi.2011.03.011 . URL http://dx.doi.org/10.1016/j.sbi.2011.03.011. OpenUrl CrossRef PubMed ↵ Peter E. Wright and H. Jane Dyson . Intrinsically disordered proteins in cellular signalling and regulation . Nature Reviews Molecular Cell Biology , 16 ( 1 ): 18 – 29 , December 2014 . ISSN 1471-0080 . doi: 10.1038/nrm3920 . URL http://dx.doi.org/10.1038/nrm3920. OpenUrl CrossRef ↵ Alex S. Holehouse and Birthe B. Kragelund . The molecular basis for cellular function of intrinsically disordered protein regions . Nature Reviews Molecular Cell Biology , 25 ( 3 ): 187 – 211 , November 2023 . ISSN 1471-0080 . doi: 10.1038/s41580-023-00673-0 . URL http://dx.doi.org/10.1038/s41580-023-00673-0. OpenUrl CrossRef ↵ David S.W. Protter and Roy Parker . Principles and properties of stress granules . Trends in Cell Biology , 26 ( 9 ): 668 – 679 , September 2016 . ISSN 0962-8924 . doi: 10.1016/j.tcb.2016.05.004 . URL http://dx.doi.org/10.1016/j.tcb.2016.05.004. OpenUrl CrossRef PubMed ↵ Shana Elbaum-Garfinkle , Younghoon Kim , Krzysztof Szczepaniak , Carlos Chih-Hsiung Chen , Christian R. Eckmann , Sua Myong , and Clifford P. Brangwynne . The disordered p granule protein laf-1 drives phase separation into droplets with tunable viscosity and dynamics . Proceedings of the National Academy of Sciences , 112 ( 23 ): 7189 – 7194 , May 2015 . ISSN 1091-6490 . doi: 10.1073/pnas.1504822112 . URL http://dx.doi.org/10.1073/pnas.1504822112. OpenUrl Abstract / FREE Full Text ↵ Joseph M. Paggi , Ayush Pandit , and Ron O. Dror . The art and science of molecular docking . Annual Review of Biochemistry , 93 ( 1 ): 389 – 410 , August 2024 . ISSN 1545-4509 . doi: 10.1146/annurev-biochem-030222-120000 . URL http://dx.doi.org/10.1146/annurev-biochem-030222-120000. OpenUrl CrossRef PubMed ↵ Caolitao Qin , Yun-Long Wang , Jian Zheng , Xiang-Bo Wan , and Xin-Juan Fan . Current perspectives in drug targeting intrinsically disordered proteins and biomolecular condensates . BMC Biology , 23 ( 1 ), May 2025 . ISSN 1741-7007 . doi: 10.1186/s12915-025-02214-x . URL http://dx.doi.org/10.1186/s12915-025-02214-x. OpenUrl CrossRef ↵ Laura R. Ganser , Megan L. Kelly , Neeraj N. Patwardhan , Amanda E. Hargrove , and Hashim M. Al-Hashimi . Demonstration that small molecules can bind and stabilize low-abundance short-lived rna excited conformational states . Journal of Molecular Biology , 432 ( 4 ): 1297 – 1304 , February 2020 . ISSN 0022-2836 . doi: 10.1016/j.jmb.2019.12.009 . URL http://dx.doi.org/10.1016/j.jmb.2019.12.009. OpenUrl CrossRef PubMed ↵ Yuquan Tong , Jessica L. Childs-Disney , and Matthew D. Disney . Targeting rna with small molecules, from rna structures to precision medicines: Iuphar review: 40 . British Journal of Pharmacology , 181 ( 21 ): 4152 – 4173 , September 2024 . ISSN 1476-5381 . doi: 10.1111/bph.17308 . URL http://dx.doi.org/10.1111/bph.17308. OpenUrl CrossRef PubMed ↵ Paul H. Kussie , Svetlana Gorina , Vincent Marechal , Brian Elenbaas , Jacque Moreau , Arnold J. Levine , and Nikola P. Pavletich . Structure of the mdm2 oncoprotein bound to the p53 tumor suppressor transactivation domain . Science , 274 ( 5289 ): 948 – 953 , November 1996 . ISSN 1095-9203 . doi: 10.1126/science.274.5289.948 . URL http://dx.doi.org/10.1126/science.274.5289.948. OpenUrl Abstract / FREE Full Text ↵ W. Sean Davidson , Ana Jonas , David F. Clayton , and Julia M. George . Stabilization of α-synuclein secondary structure upon binding to synthetic membranes . Journal of Biological Chemistry , 273 ( 16 ): 9443 – 9449 , April 1998 . ISSN 0021-9258 . doi: 10.1074/jbc.273.16.9443 . URL http://dx.doi.org/10.1074/jbc.273.16.9443. OpenUrl Abstract / FREE Full Text ↵ Valerio Mariani , Marco Biasini , Alessandro Barbato , and Torsten Schwede . lddt: a local superposition-free score for comparing protein structures and models using distance difference tests . Bioinformatics , 29 ( 21 ): 2722 – 2728 , August 2013 . ISSN 1367-4811 . doi: 10.1093/bioinformatics/btt473 . URL http://dx.doi.org/10.1093/bioinformatics/btt473. OpenUrl CrossRef PubMed Web of Science ↵ John Jumper , Richard Evans , Alexander Pritzel , Tim Green , Michael Figurnov , Olaf Ronneberger , Kathryn Tunyasuvunakool , Russ Bates , Augustin Žídek , Anna Potapenko , Alex Bridgland , Clemens Meyer , Simon A. A. Kohl , Andrew J. Ballard , Andrew Cowie , Bernardino Romera-Paredes , Stanislav Nikolov , Rishub Jain , Jonas Adler , Trevor Back , Stig Petersen , David Reiman , Ellen Clancy , Michal Zielinski , Martin Steinegger , Michalina Pacholska , Tamas Berghammer , Sebastian Bodenstein , David Silver , Oriol Vinyals , Andrew W. Senior , Koray Kavukcuoglu , Pushmeet Kohli , and Demis Hassabis . Highly accurate protein structure prediction with alphafold . Nature , 596 ( 7873 ): 583 – 589 , July 2021 . ISSN 1476-4687 . doi: 10.1038/s41586-021-03819-2 . URL http://dx.doi.org/10.1038/s41586-021-03819-2. OpenUrl CrossRef PubMed ↵ Zeming Lin , Halil Akin , Roshan Rao , Brian Hie , Zhongkai Zhu , Wenting Lu , Nikita Smetanin , Robert Verkuil , Ori Kabeli , Yaniv Shmueli , Allan dos Santos Costa , Maryam Fazel-Zarandi , Tom Sercu , Salvatore Candido , and Alexander Rives . Evolutionary-scale prediction of atomic-level protein structure with a language model . Science , 379 ( 6637 ): 1123 – 1130 , March 2023 . ISSN 1095-9203 . doi: 10.1126/science.ade2574 . URL http://dx.doi.org/10.1126/science.ade2574. OpenUrl CrossRef PubMed ↵ Hao-Bo Guo , Alexander Perminov , Selemon Bekele , Gary Kedziora , Sanaz Farajollahi , Vanessa Varaljay , Kevin Hinkle , Valeria Molinero , Konrad Meister , Chia Hung , Patrick Dennis , Nancy Kelley-Loughnane , and Rajiv Berry . Alphafold2 models indicate that protein sequence determines both structure and dynamics . Scientific Reports , 12 ( 1 ), June 2022 . ISSN 2045-2322 . doi: 10.1038/s41598-022-14382-9 . URL http://dx.doi.org/10.1038/s41598-022-14382-9. OpenUrl CrossRef PubMed ↵ Carter J. Wilson , Wing-Yiu Choy , and Mikko Karttunen . Alphafold2: A role for disordered protein/region prediction? International Journal of Molecular Sciences , 23 ( 9 ): 4591 , April 2022 . ISSN 1422-0067 . doi: 10.3390/ijms23094591 . URL http://dx.doi.org/10.3390/ijms23094591. OpenUrl CrossRef PubMed ↵ Derya Emin , Yu P. Zhang , Evgeniia Lobanova , Alyssa Miller , Xuecong Li , Zengjie Xia , Helen Dakin , Dimitrios I. Sideris , Jeff Y. L. Lam , Rohan T. Ranasinghe , Antonina Kouli , Yanyan Zhao , Suman De , Tuomas P. J. Knowles , Michele Vendruscolo , Francesco S. Ruggeri , Franklin I. Aigbirhio , Caroline H. Williams-Gray , and David Klenerman . Small soluble α-synuclein aggregates are the toxic species in parkinson’s disease . Nature Communications , 13 ( 1 ), September 2022 . ISSN 2041-1723 . doi: 10.1038/s41467-022-33252-6 . URL http://dx.doi.org/10.1038/s41467-022-33252-6. OpenUrl CrossRef PubMed ↵ Sijing Xia , Qin Chen , and Bing Niu . Cd28: A new drug target for immune disease . Current Drug Targets , 21 ( 6 ): 589 – 598 , April 2020 . ISSN 1389-4501 . doi: 10.2174/1389450120666191114102830 . URL http://dx.doi.org/10.2174/1389450120666191114102830. OpenUrl CrossRef PubMed ↵ Sylvain Peuget , Xiaolei Zhou , and Galina Selivanova . Translating p53-based therapies for cancer into the clinic . Nature Reviews Cancer , 24 ( 3 ): 192 – 215 , January 2024 . ISSN 1474-1768 . doi: 10.1038/s41568-023-00658-3 . URL http://dx.doi.org/10.1038/s41568-023-00658-3. OpenUrl CrossRef ↵ Antti Kukkula , Veera K. Ojala , Lourdes M. Mendez , Lea Sistonen , Klaus Elenius , and Maria Sundvall . Therapeutic potential of targeting the sumo pathway in cancer . Cancers , 13 ( 17 ): 4402 , August 2021 . ISSN 2072-6694 . doi: 10.3390/cancers13174402 . URL http://dx.doi.org/10.3390/cancers13174402. OpenUrl CrossRef ↵ H. M. Berman . The protein data bank . Nucleic Acids Research , 28 ( 1 ): 235 – 242 , January 2000 . ISSN 1362-4962 . doi: 10.1093/nar/28.1.235 . URL http://dx.doi.org/10.1093/nar/28.1.235. OpenUrl CrossRef PubMed Web of Science ↵ Mohammad A. A. Fakhree , Ine Segers Nolten , Christian Blum , and Mireille M. A. E. Claessens . Different conformational subensembles of the intrinsically disordered protein α-synuclein in cells . The Journal of Physical Chemistry Letters , 9 ( 6 ): 1249 – 1253 , February 2018 . ISSN 1948-7185 . doi: 10.1021/acs.jpclett.8b00092 . URL http://dx.doi.org/10.1021/acs.jpclett.8b00092. OpenUrl CrossRef PubMed ↵ Yang Zhang and Jeffrey Skolnick . Scoring function for automated assessment of protein structure template quality . Proteins: Structure, Function, and Bioinformatics , 57 ( 4 ): 702 – 710 , October 2004 . ISSN 1097-0134 . doi: 10.1002/prot.20264 . URL http://dx.doi.org/10.1002/prot.20264. OpenUrl CrossRef PubMed Web of Science ↵ Do-Hyoung Kim , Jongchan Lee , K. Mok , Jung Lee , and Kyou-Hoon Han . Salient features of monomeric alpha-synuclein revealed by nmr spectroscopy . Biomolecules , 10 ( 3 ): 428 , March 2020 . ISSN 2218-273X . doi: 10.3390/biom10030428 . URL http://dx.doi.org/10.3390/biom10030428. OpenUrl CrossRef PubMed ↵ Fangqiang Zhu and Gerhard Hummer . Convergence and error estimation in free energy calculations using the weighted histogram analysis method . Journal of Computational Chemistry , 33 ( 4 ): 453 – 465 , November 2011 . ISSN 1096-987X . doi: 10.1002/jcc.21989 . URL http://dx.doi.org/10.1002/jcc.21989. OpenUrl CrossRef PubMed ↵ Kejia Wu , Hanlun Jiang , Derrick R. Hicks , Caixuan Liu , Edin Muratspahić , Theresa A. Ramelot , Yuexuan Liu , Kerrie McNally , Sebastian Kenny , Andrei Mihut , Amit Gaur , Brian Coventry , Wei Chen , Asim K. Bera , Alex Kang , Stacey Gerben , Mila Ya-Lan Lamb , Analisa Murray , Xinting Li , Madison A. Kennedy , Wei Yang , Zihao Song , Gudrun Schober , Stuart M. Brierley , John O’Neill , Michael H. Gelb , Gaetano T. Montelione , Emmanuel Derivery , and David Baker . Design of intrinsically disordered region binding proteins . Science , 389 ( 6757 ): eadr8063 , 2025 . doi: 10.1126/science.adr8063 . URL https://www.science.org/doi/abs/10.1126/science.adr8063 . OpenUrl CrossRef PubMed ↵ Jakub Lála , Ayham Al-Saffar , and Stefano Angioletti-Uberti . Bagel: Protein engineering via exploration of an energy landscape . July 2025 . doi: 10.1101/2025.07.05.663138 . URL http://dx.doi.org/10.1101/2025.07.05.663138. OpenUrl Abstract / FREE Full Text ↵ E Marinari and G Parisi . Simulated tempering: A new monte carlo scheme . Europhysics Letters (EPL) , 19 ( 6 ): 451 – 458 , July 1992 . ISSN 1286-4854 . doi: 10.1209/0295-5075/19/6/002 . URL http://dx.doi.org/10.1209/0295-5075/19/6/002. OpenUrl CrossRef Web of Science ↵ Koji Hukushima and Koji Nemoto . Exchange monte carlo method and application to spin glass simulations . Journal of the Physical Society of Japan , 65 ( 6 ): 1604 – 1608 , June 1996 . ISSN 1347-4073 . doi: 10.1143/jpsj.65.1604 . URL http://dx.doi.org/10.1143/JPSJ.65.1604. OpenUrl CrossRef ↵ James A. Maier , Carmenza Martinez , Koushik Kasavajhala , Lauren Wickstrom , Kevin E. Hauser , and Carlos Simmerling . ff14sb: Improving the accuracy of protein side chain and backbone parameters from ff99sb . Journal of Chemical Theory and Computation , 11 ( 8 ): 3696 – 3713 , July 2015 . ISSN 1549-9626 . doi: 10.1021/acs.jctc.5b00255 . URL http://dx.doi.org/10.1021/acs.jctc.5b00255. OpenUrl CrossRef PubMed ↵ Hai Nguyen , Daniel R. Roe , and Carlos Simmerling . Improved generalized born solvent model parameters for protein simulations . Journal of Chemical Theory and Computation , 9 ( 4 ): 2020 – 2034 , March 2013 . ISSN 1549-9626 . doi: 10.1021/ct3010485 . URL http://dx.doi.org/10.1021/ct3010485. OpenUrl CrossRef PubMed ↵ Peter Eastman , Jason Swails , John D. Chodera , Robert T. McGibbon , Yutong Zhao , Kyle A. Beauchamp , Lee-Ping Wang , Andrew C. Simmonett , Matthew P. Harrigan , Chaya D. Stern , Rafal P. Wiewiora , Bernard R. Brooks , and Vijay S. Pande . Openmm 7: Rapid development of high performance algorithms for molecular dynamics . PLOS Computational Biology , 13 ( 7 ): e1005659 , July 2017 . ISSN 1553-7358 . doi: 10.1371/journal.pcbi.1005659 . URL http://dx.doi.org/10.1371/journal.pcbi.1005659. OpenUrl CrossRef PubMed ↵ Gareth A Tribello , Massimiliano Bonomi , Davide Branduardi , Carlo Camilloni , and Giovanni Bussi . PLUMED 2: New feathers for an old bird . Comput. Phys. Commun ., 185 ( 2 ): 604 – 613 , February 2014 . OpenUrl CrossRef Web of Science ↵ Ulrich Essmann , Lalith Perera , Max L. Berkowitz , Tom Darden , Hsing Lee , and Lee G. Pedersen . A smooth particle mesh ewald method . The Journal of Chemical Physics , 103 ( 19 ): 8577 – 8593 , November 1995 . ISSN 1089-7690 . doi: 10.1063/1.470117 . URL http://dx.doi.org/10.1063/1.470117. OpenUrl CrossRef Web of Science ↵ G.M. Torrie and J.P. Valleau . Nonphysical sampling distributions in monte carlo free-energy estimation: Umbrella sampling . Journal of Computational Physics , 23 ( 2 ): 187 – 199 , February 1977 . ISSN 0021-9991 . doi: 10.1016/0021-9991(77)90121-8 . URL http://dx.doi.org/10.1016/0021-9991(77)90121-8. OpenUrl CrossRef PubMed Web of Science ↵ Patrick Varilly , Stefano Angioletti-Uberti , Bortolo M. Mognetti , and Daan Frenkel . A general theory of dna-mediated and other valencelimited colloidal interactions . The Journal of Chemical Physics , 137 ( 9 ), September 2012 . ISSN 1089-7690 . doi: 10.1063/1.4748100 . URL http://dx.doi.org/10.1063/1.4748100. OpenUrl CrossRef PubMed ↵ Shankar Kumar , John M. Rosenberg , Djamal Bouzida , Robert H. Swendsen , and Peter A. Kollman . The weighted histogram analysis method for free-energy calculations on biomolecules. i. the method . Journal of Computational Chemistry , 13 ( 8 ): 1011 – 1021 , October 1992 . ISSN 1096-987X . doi: 10.1002/jcc.540130812 . URL http://dx.doi.org/10.1002/jcc.540130812. OpenUrl CrossRef ↵ John Chodera , Andrea Rizzi , Levi Naden , Kyle Beauchamp , Patrick Grinaway , Mike Henry , Iván Pulido , Josh Fass , Alex Wade , Gregory A. Ross , Andreas Kraemer , Hannah Bruce Macdonald Jaimergp , Bas Rustenburg , David W.H. Swenson , Ivy Zhang , Dominic Rufa , Andy Simmonett , Mark J. Williamson, Hb0402, Jake Fennick, Sander Roet, Benjamin Ries, Ian Kenney, Irfan Alibay, Richard Gowers, and Simon Boothroyd . choderalab/openmmtools: 0.24.1 , 2025 . URL doi: 10.5281/zenodo.596622 . OpenUrl CrossRef ↵ Jakub Lála , Arnav Cheruku , and Stefano Angioletti-Uberti . softnanolab/bagel: v0.1.0 - first public release , July 2025 . URL doi: 10.5281/zenodo.15808839 . OpenUrl CrossRef ↵ Y. Zhang . Tm-align: a protein structure alignment algorithm based on the tm-score . Nucleic Acids Research , 33 ( 7 ): 2302 – 2309 , April 2005 . ISSN 1362-4962 . doi: 10.1093/nar/gki524 . URL http://dx.doi.org/10.1093/nar/gki524. OpenUrl CrossRef PubMed Web of Science ↵ Patrick Kunzmann , Tom David Müller , Maximilian Greil , Jan Hendrik Krumbach , Jacob Marcel Anter , Daniel Bauer , Faisal Islam , and Kay Hamacher . Biotite: new tools for a versatile python bioinformatics library . BMC Bioinformatics , 24 ( 1 ), June 2023 . ISSN 1471-2105 . doi: 10.1186/s12859-023-05345-6 . URL http://dx.doi.org/10.1186/s12859-023-05345-6. OpenUrl CrossRef PubMed ↵ B. P. Welford . Note on a method for calculating corrected sums of squares and products . Technometrics , 4 ( 3 ): 419 – 420 , August 1962 . ISSN 1537-2723 . doi: 10.1080/00401706.1962.10490022 . URL http://dx.doi.org/10.1080/00401706.1962.10490022. OpenUrl CrossRef Web of Science ↵ Wolfgang Kabsch and Christian Sander . Dictionary of protein secondary structure: Pattern recognition of hydrogen-bonded and geometrical features . Biopolymers , 22 ( 12 ): 2577 – 2637 , December 1983 . ISSN 1097-0282 . doi: 10.1002/bip.360221211 . URL http://dx.doi.org/10.1002/bip.360221211. OpenUrl CrossRef PubMed Web of Science ↵ Naveen Michaud-Agrawal , Elizabeth J. Denning , Thomas B. Woolf , and Oliver Beckstein . Mdanalysis: A toolkit for the analysis of molecular dynamics simulations . Journal of Computational Chemistry , 32 ( 10 ): 2319 – 2327 , April 2011 . ISSN 1096-987X . doi: 10.1002/jcc.21787 . URL http://dx.doi.org/10.1002/jcc.21787. OpenUrl CrossRef PubMed ↵ Richard Gowers , Max Linke , Jonathan Barnoud , Tyler Reddy , Manuel Melo , Sean Seyler , Jan Domański , David Dotson , Sébastien Buchoux , Ian Kenney , and Oliver Beckstein . Mdanalysis: A python package for the rapid analysis of molecular dynamics simulations . In Proceedings of the 15th Python in Science Conference, SciPy , page 98 – 105 . SciPy, 2016 . doi: 10.25080/majora-629e541a-00e . URL http://dx.doi.org/10.25080/majora-629e541a-00e. OpenUrl CrossRef ↵ Alessandro Barducci , Giovanni Bussi , and Michele Parrinello . Well-tempered metadynamics: A smoothly converging and tunable free-energy method . Physical Review Letters , 100 ( 2 ), January 2008 . ISSN 1079-7114 . doi: 10.1103/physrevlett.100.020603 . URL http://dx.doi.org/10.1103/PhysRevLett.100.020603. OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted November 13, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Harnessing protein-folding algorithms to drug intrinsically disordered epitopes Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Harnessing protein-folding algorithms to drug intrinsically disordered epitopes Jakub Lála , Stefano Angioletti-Uberti bioRxiv 2025.11.11.687846; doi: https://doi.org/10.1101/2025.11.11.687846 Share This Article: Copy Citation Tools Harnessing protein-folding algorithms to drug intrinsically disordered epitopes Jakub Lála , Stefano Angioletti-Uberti bioRxiv 2025.11.11.687846; doi: https://doi.org/10.1101/2025.11.11.687846 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41911) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13371) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15599) Genomics (22483) Immunology (17728) Microbiology (40364) Molecular Biology (17163) Neuroscience (88537) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.