Ensemblify: a user-friendly tool for generating ensembles of intrinsically disordered regions of AlphaFold and user-defined models

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 40,389 characters · extracted from preprint-html · click to expand
Ensemblify: a user-friendly tool for generating ensembles of intrinsically disordered regions of AlphaFold and user-defined models | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Ensemblify: a user-friendly tool for generating ensembles of intrinsically disordered regions of AlphaFold and user-defined models View ORCID Profile Nuno P. Fernandes , View ORCID Profile Tiago Gomes , View ORCID Profile Tiago N. Cordeiro doi: https://doi.org/10.1101/2025.08.26.672300 Nuno P. Fernandes a Instituto de Tecnologia Química e Biológica António Xavier, Universidade Nova de Lisboa , Av. da República, 2780-157 Oeiras Portugal Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Nuno P. Fernandes Tiago Gomes a Instituto de Tecnologia Química e Biológica António Xavier, Universidade Nova de Lisboa , Av. da República, 2780-157 Oeiras Portugal Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tiago Gomes For correspondence: tiago.gomes{at}itqb.unl.pt tiago.cordeiro{at}itqb.unl.pt Tiago N. Cordeiro a Instituto de Tecnologia Química e Biológica António Xavier, Universidade Nova de Lisboa , Av. da República, 2780-157 Oeiras Portugal Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tiago N. Cordeiro For correspondence: tiago.gomes{at}itqb.unl.pt tiago.cordeiro{at}itqb.unl.pt Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Motivation Intrinsically disordered proteins (IDPs) and regions (IDRs) challenge structural characterization due to their dynamic conformational ensembles and lack of stable structure. Existing computational approaches for modelling these ensembles are either computationally intensive, limited in flexibility, or inaccessible to non-experts, especially when dealing with multi-domain or multi-chain proteins. Results We present Ensemblify, an open-source, user-friendly Python package for generating and analyzing conformational ensembles of IDPs/IDRs. Ensemblify uses a Monte Carlo algorithm coupled with neighbour-aware sampling of dihedral angles from curated or user-defined fragment libraries to explore conformational space. It directly incorporates information from AlphaFold’s confidence metrics as flexible energy restraints in PyRosetta to guide the sampling. It supports multi-chain and multi-domain proteins and can sample N-terminal, C-terminal, and inter-domain linkers while preserving folded regions. Ensemble quality can be validated and refined against experimental data such as SAXS via Bayesian/Maximum Entropy (BME) reweighting. Interactive dashboards provide in-depth structural analysis and comparison. Testing across 10 diverse proteins demonstrated Ensemblify’s accuracy, flexibility, and ability to recover experimentally observed structural features. Incorporating AlphaFold confidence metrics shows potential to improve the ensemble-data agreement. Availability Ensemblify is freely available at https://github.com/CordeiroLab/ensemblify , along with detailed installation instructions and usage tutorials. Ensemblify can be used for scripting through its Python API or directly through the provided command-line interface (CLI). Complete documentation is available within the source-code and CLI and on Ensemblify’s official documentation page ( https://ensemblify.readthedocs.io ). Contact tiago.gomes{at}itqb.unl.pt , tiago.cordeiro{at}itqb.unl.pt Supplementary information Supplementary data is available online. Introduction Intrinsically disordered proteins (IDPs) are a unique class of proteins that lack a stable, well-defined three-dimensional structure under physiological conditions ( Holehouse and Kragelund 2024 ). Unlike their folded counterparts, IDPs exist as highly dynamic conformational ensembles ( Tesei et al . 2024 ) but often exhibit nonrandom structural heterogeneity that enables specific interactions with various biological partners ( Tompa et al . 2015 , Csizmok et al . 2016 ). The emergence of AlphaFold has enabled accurate prediction of folded protein structures at the proteome scale ( Jumper et al . 2021 , Varadi et al . 2024 ). However, reliably modelling IDPs remains a major challenge ( Ruff and Pappu 2021 , Lane 2023 ). Notably, AlphaFold’s predicted Local Distance Difference Test (pLDDT) metric has emerged as a reliable indicator of disorder, with low pLDDT values correlating with unstructured regions ( Alderson et al . 2023 ). Despite this, AlphaFold’s potential to identify and characterize disordered regions has been largely underutilized in probing the conformational landscapes of IDPs and intrinsically disordered regions (IDRs). Consequently, the conformational properties of IDPs/IDRs remain poorly understood and challenging to predict, also owing to their low sequence conservation and limited experimental characterization. Thus, ensemble generation remains essential for capturing their dynamic nature and for modelling their interactions with binding partners ( Moses et al . 2023 ). Exploring the conformational landscape of IDPs/IDRs is computationally intensive and often requires expert knowledge. The current state of the art has evolved into three main approaches: (i) structural database sampling (SDS), which sample dihedral angles from probability distributions derived from experimental data ( Feldman and Hogue 2000 , Ozenne et al . 2012 , Estaña et al . 2019 , Harmat, Dudola and Gáspári 2021 , Teixeira et al . 2022 , Liu et al . 2023 , Pajkos et al . 2025 ); (ii) molecular dynamics (MD) simulations using physical force fields ( Pietrek, Stelzl and Hummer 2020 , Shrestha, Smith and Petridis 2021 , Thomasen et al . 2024 ); and (iii) artificial intelligence/machine learning (AI/ML) methods ( Del Alamo et al . 2022 , Janson et al . 2023 , Aupič et al . 2024 , Wayment-Steele et al . 2024 , Kalakoti and Wallner 2024 , Wu et al . 2024 ). All-atom MD and advanced AI/ML methods, while offering high resolution and predictive power, are computationally expensive and typically require domain expertise. In contrast, coarse-grained (CG)-MD is more computationally efficient and allows access to broader conformational sampling of IDPs ( Cao et al . 2024 , Wang et al . 2025 ). However, this comes at the cost of reduced atomic resolution and underestimation of local structural detail, which can be critical for modelling IDPs accurately. SDS methods often prioritize accessibility through web servers ( Ziegler et al . 2016 , Pajkos et al . 2025 ) at the expense of versatility and user customization, or they offer flexibility with complex setups and limited documentation, making them inaccessible to less experienced researchers ( Ozenne et al . 2012 , Harmat, Dudola and Gáspári 2021 , Teixeira et al . 2022 , Liu et al . 2023 ). Moreover, current methods are usually not applicable to multi-domain or multi-chain proteins, whose structural dynamics remain challenging to model due to the IDRs that make up inter-domain linkers or terminal tails. AlphaFold’s confidence metrics, such as the pLDDT and the predicted aligned error (PAE) ( Guo et al . 2022 , Brotzakis et al . 2025 ), convey structural information that could help overcome these limitations, but they are rarely exploited in ensemble generation. Likewise, the nonrandom secondary structure propensities in IDPs/IDRs are not always captured by available methods. To address these challenges, we introduce Ensemblify, a Python package for generating protein conformational ensembles. Ensemblify is fast, versatile, open-source, and easily extensible, making it suitable to users with differing levels of expertise. It supports diverse protein structural architectures, directly incorporates information from AlphaFold’s confidence metrics, and realistically samples fractional secondary structure propensities in disordered regions. Additionally, it features robust documentation, a user-friendly HTML parameters form, and a command-line interface (CLI) that offers easy access to its key features. By combining ensemble generation, analysis, experimental validation, and refinement through user-friendly dashboards, Ensemblify provides a powerful, accessible resource for both research and teaching. Description and functionality Workflow & Implementation To generate a conformational ensemble, Ensemblify requires a parameters file with mandatory input fields, including the input structure or sequence and a dihedral angle database ( Fig. 1 ). This file can be easily created using the provided user-friendly HTML interface (Supplementary Fig. S6). The input structure can be a predicted model from AlphaFold, optionally including its PAE matrix, a user-defined model, or an amino acid sequence in the case of full IDPs ( Fig. 1A ). Users may also combine experimentally determined structures of folded protein domains with sequences of disordered linker or terminal regions to create a full-length starting structure. To do this, the user must provide the sequences of all protein regions (folded and disordered) in FASTA format, and the structures of folded domains as PDB files, in order from N- to C-terminal. Alternatively, a UniProt accession number can be provided in place of a structure and/or PAE matrix to automatically retrieve the corresponding file from the AlphaFold Protein Structure Database ( Varadi et al . 2024 ). Download figure Open in new tab Figure 1. General workflow of the Ensemblify software tool. A . Input structure can be provided in various forms: an AF-2 model, with or without its PAE matrix, a UniProt accession number, a user-defined model, or a protein sequence. B . All mandatory inputs like the input structure, dihedral angle database and target sampling regions are defined through a user-friendly HTML form, which outputs a properly formatted YAML parameters file for ensemble generation. C . Based on the YAML file, conformational ensembles are generated by sampling dihedral angles from the user-provided database and inserting them into the protein backbone using a Monte Carlo algorithm. This is followed by side chain repacking using FASPR and a steric clash check using PULCHRA. Ensembles can sample flexible N- and/or C-terminal tails and/or inter-domain linkers across diverse protein architectures. D . Interactive HTML dashboards enable exploration of ensemble-averaged structural properties and intuitive data visualization. E . Experimental data ( e . g . SAXS) can be used to reweight the ensemble via BME method. Structural metrics of reweighted ensembles are compared to their uniformly weighted counterparts through a dedicated reweighting dashboard. Target sampling regions can be manually appointed for each protein chain in the input structure using the parameters file. Any regions not selected are restrained to their initial configuration ( Fig. 1B ). Each designated sampling region must be assigned a database and a sampling mode: tripeptide or single residue ( Estaña et al . 2019 , González-Delgado et al . 2022 ), which respectively dictate if neighbouring residues are considered or ignored during sampling. By default, all residues within the specified ranges are sampled. When using an AlphaFold model, users can also choose to automatically restrict sampling to residues within these regions with pLDDT below a certain threshold (default is 70). Ensemble generation in Ensembify relies on PyRosetta, the Python interface for the widely used Rosetta molecular modelling suite ( Chaudhury, Lyskov and Gray 2010 ). Prior to sampling, the input structure is processed by converting side-chains to a ‘centroid’ representation to increase sampling efficiency (Supplementary Information S1.1). Ensembles are generated through protein backbone conformational sampling ( Fig. 1C , Supplementary Information S1.2). This process is guided by energy restraints that preserve the structure of folded domains and inter-chain contacts, with optional additional restraints to bias sampling towards desired secondary structure properties (Supplementary Information S1.2.2). During sampling, Ensemblify performs a Monte Carlo move targeting each residue within a defined sampling region, continuing until all residues have been targeted. In each move, the protein’s backbone is perturbed by sampling dihedral angles φ, ψ and ω from a normal distribution centered around values found in a dihedral angle database. Ensemblify offers a three-residue fragment dihedral angle database but also supports user-defined databases. Multiple databases can be used in the same sampling protocol, and strategies to reduce memory usage of large databases are detailed in Supplementary Information S1.2.3. After each Monte Carlo move, the energy of the newly generated conformation is evaluated using a Rosetta score function. This includes a weak Van der Waals repulsive term to penalize steric clashes and additional terms to penalize the violation of applied structural restraints. The move is then accepted or rejected according to the standard Metropolis criterion ( Metropolis et al . 1953 ) (Supplementary Information S1.2.1, Supplementary Equation S1). Once all target residues have been sampled, the resulting structure undergoes a minimization protocol before output. To restore atomic detail, side chains of the sampled regions are repacked using FASPR, an ultra-fast and accurate program for deterministic protein side chain packing ( Huang, Pearce and Zhang 2020 ). Final structures are then processed with PULCHRA ( Rotkiewicz and Skolnick 2008 ) to check for steric clashes using a 2 Å threshold, and any structures with clashes are discarded. The sampling cycle is repeated until the desired number of valid sampled structures is reached. Strategies used to further increase variability between sampled structures are detailed in Supplementary Information S1.2.4. Given a protein conformational ensemble, Ensemblify can create an interactive structural analysis dashboard that supports zooming, panning and trace selection for all figures ( Figure 1D , Supplementary Fig. S7). When multiple ensembles are provided, they can be easily compared through a similar analysis dashboard (Supplementary Fig. S8). Analysis metrics include a contact frequency map with a 4.5 Å threshold, a C α average distance matrix, and a secondary structure assignment frequency plot following the simplified DSSP ( Kabsch and Sander 1983 ) classifications of α-helix, β-sheet and random coil. Additionally, the dashboard provides probability density distributions for the radius of gyration ( R g ), maximum C α distance ( D max ) and end-to-end distance ( D ee ), with optional center mass distance distributions between any two user-defined protein regions. Calculation of additional analysis metrics can be easily implemented by exploiting Ensemblify’s modular architecture. When provided with experimental small angle X-ray scattering (SAXS) data, Ensemblify can reweight an existing structural ensemble through a Bayesian/Maximum Entropy (BME) approach ( Bottaro, Bengtsen and Lindorff-Larsen 2018 ) (Supplementary Information S2). This process outputs an interactive graphical dashboard that compares the original and reweighted ensembles based on the previously mentioned analysis metrics, as well as their agreement with SAXS data ( Fig. 1E , Supplementary Fig. S9). The BME method involves finding the set of optimal conformer weights (Supplementary Equation S4) that achieve good agreement with experimental data ( i . e ., low reduced χ 2 ; Supplementary Equation S5) while maximizing relative entropy ( i . e ., high ϕ eff ; Supplementary Equations S6-S7). Tuning the scaling parameter θ in Supplementary Equation S4 results in different values of χ 2 and ϕ eff . Consequently, plotting ϕ eff versus χ 2 as θ changes leads to the optimal value of θ , located at the point of maximum curvature, i . e . the “elbow” of the plot ( Fig. 1E , Supplementary Fig. S5). Ensemble generation in Ensemblify is parallelized using Ray ( Moritz et al . 2017 ), a distributed computing library for scaling Python applications. Ensemble analysis and reweighting calculations rely on the MDAnalysis ( Michaud-Agrawal et al . 2011 , Gowers et al . 2016 ), MDtraj ( McGibbon et al . 2015 ), Pandas ( McKinney 2010 ), NumPy ( Harris et al . 2020 ) and SciPy ( Virtanen et al . 2020 ) Python libraries in combination with the multiprocessing Python library to speed up calculations. Interactive graphical dashboards are created using Plotly ( Inc 2015 ), a browser-based Python graphing library. All the calculated structural analysis data used in the creation of interactive dashboards, including back-calculated SAXS profiles and their fitting to experimental SAXS data, are provided as plain text files, allowing users to apply their own analysis pipelines. Applications Ensemblify has been successfully applied to proteins of varying size and structural architectures (Supplementary Fig. S10, Supplementary Information S3.2), showcasing its ability to sample N-terminal and C-terminal disordered tails, as well as inter-domain linkers, while preserving the structure of folded domains and, for multi-chain proteins, inter-chain interfaces. Full IDPs tested (Supplementary Figs S10A, S11-S12, Supplementary Information 3.2.1) include: ( 1 ) Histatin5 (Hst5) , an antimicrobial peptide found naturally in human saliva ( Thomasen et al . 2024 , Jephthah et al . 2019 ), and ( 2 ) α-synuclein (aSyn) , a presynaptic neuronal protein implicated in Parkinson’s disease ( Thomasen et al . 2024 , Ahmed et al . 2021 ). Multi-domain proteins with flexible linkers (Supplementary Figs S10B, S13-S17, Supplementary Information 3.2.2): ( 3 ) A truncated construct of cardiac myosin-binding protein C (cMyBP-C) , which plays important roles in the muscle sarcomere by interacting with myosin and actin, composed of the tri-helix bundle of the m-domain connected to the C2 domain by a flexible linker (cMyBP-C mTHB-C2 ) ( Thomasen et al . 2024 , Michie et al . 2016 ); ( 4 ) Linear tetraubiquitin (Ubq 4 ) ( Thomasen et al . 2024 , Jussupow et al . 2020 ); ( 5 ) A C-terminal truncated construct of TIA1 , a regulator of transcription and RNA translation ( Thomasen et al . 2024 , Sonntag et al . 2017 ); and ( 6 ) SMAD4 , a mediator of TGF-β signal transduction ( Gomes et al . 2021 ). Proteins with folded domains and long disordered tails (Supplementary Figs S10C, S18-S20, Supplementary Information 3.2.3): ( 7 ) a construct of the SH4, Unique and SH3 domains of non-receptor tyrosine kinase Src (USH3) ( Arbesú et al . 2017 ), implicated in cell signaling pathways related to cell growth, migration, invasion, and survival; ( 8 ) Galectin-3 (Gal-3) ( Thomasen et al . 2024 , Lin et al . 2017 ), a lectin that binds to β-galactoside and can regulate cell signaling; and ( 9 ) heterogeneous nuclear ribonucleoprotein A1 (hnRNPA1) ( Thomasen et al . 2024 , Martin et al . 2021 ), that has an intrinsically disordered low-complexity domain where mutations can lead to amyotrophic lateral sclerosis. Multi-chain protein (Supplementary Figs S10D, S21, Supplementary Information 3.2.4): ( 10 ) a homohexamer construct of the C-terminal product subunit of the endolysin of S. thermophilus phage P7951 (LysP7951) ( Jumper et al . 2021 , Pinto et al . 2022 ). A comprehensive description and analysis of these case studies is provided in Supplementary Information 3.2. Theoretical SAXS profiles calculated from the ensembles of most of the tested proteins show reasonable agreement with experimental SAXS data, and, in all cases, this fitting improves after BME ensemble reweighting. The time required to generate ensembles of 10,000 structures per protein, along with references for the protein structures and corresponding experimental SAXS data, are provided in Supplementary Tables S3-S4. Ensemblify offers a way to convert the structural information from AlphaFold’s PAE matrix ( Guo et al . 2022 , Brotzakis et al . 2025 ) into energy restraints that guide conformational sampling (Supplementary Information S1.2.5). We evaluated this approach by generating a Gal-3 ensemble from an AF-2 prediction, using both pLDDT and PAE metrics to inform the sampling process. This ensemble was compared to one generated from a user-defined model (Supplementary Information 3.3). The latter, composed predominantly of extended conformations (average R g of ∼33 Å), initially showed poor agreement with experimental SAXS data ( χ 2 = 20.16), though this improved substantially following BME reweighting ( χ 2 reweighted = 1.50). Incorporating AF-2-derived information into the sampling led to a more compact ensemble (average Rg of ∼24 Å) with better SAXS agreement ( χ 2 = 11.95). However, after BME reweighting, the fit ( χ 2 reweighted = 2.45) remained inferior to that of the ensemble generated without AF input — likely due to the presence of overly compact conformations. This motivated the generation of a third ensemble in which selected inter-residue energy restraints derived from the PAE matrix were attenuated by a factor of 30, as described in Supplementary Information S1.2.5. This adjustment yielded an ensemble of moderately compact conformations (average Rg of ∼26 Å), leading to improved SAXS agreement both before ( χ 2 = 4.96) and after BME reweighting ( χ 2 reweighted = 1.32), surpassing the performance of the ensemble generated without AF-2 information. These results underscore the importance of tuning the scaling of PAE-derived restraints (parameter γ in Supplementary Equation S3) and highlight the potential for users to optimize this parameter to achieve better agreement with experimental data. Conclusions We present Ensemblify, a user-friendly computational framework that enables the generation of conformational ensembles for IDPs/IDRs using neighbour-aware structural database sampling. It offers ways to combine the experimentally determined structured domains with disordered segments into unified hybrid globular/disordered protein structures, which can serve as sampling starting points and enable a holistic exploration of a protein’s conformational space. Ensembify seamlessly integrates ensemble analysis with experimental validation through interactive dashboards. It is also well-documented, intuitive, and modular, allowing for easy addition of new features. Testing Ensemblify on a diverse benchmark set of ten proteins demonstrated that the generated ensembles closely match experimental SAXS data and accurately capture experimentally determined secondary structure propensities in disordered regions. In all cases, BME reweighting further improved agreement with experiments. Unlike existing approaches, Ensemblify uniquely integrates AlphaFold-derived confidence metrics to inform conformational sampling, by using pLDDT scores to define flexible regions and converting PAE matrix values into tunable energy restraints. This strategy led to ensembles that better fit experimental SAXS data, highlighting the potential of leveraging AlphaFold towards improved ensemble accuracy. By bridging predictive modelling with experimental validation in an accessible and extensible platform, Ensemblify holds promise as a valuable resource for both research and education in structural biology. Funding information TNC is the recipient of the CEECIND/01443/2017 grant. National funds funded this work through FCT: Project MOSTMICRO-ITQB (UIDB/04612/2020, UIDP/04612/2020), FEDER Funds through COMPETE 2020 (0145-FEDER-007660), LS4FUTURE (LA/P/0087/2020), and a SR&TD project (PTDC/BIA-BFS/0391/2021). Acknowledgements We thank Manuel N. Melo (ITQB NOVA) for his insightful help, suggestions, and access to the in-house computer cluster. We thank the members of the Dynamic Structural Lab (ITQB NOVA) for testing this tool. Funder Information Declared Fundação para a Ciência e Tecnologia, https://ror.org/00snfqn58 , PTDC/BIA-BFS/0391/2021 , LA/P/0087/2020 , UIDP/04612/2020 , UIDB/04612/2020 , CEECIND/01443/2017 Footnotes https://github.com/CordeiroLab/ensemblify https://ensemblify.readthedocs.io References ↵ Ahmed MC , Skaanning LK , Jussupow A et al. Refinement of α-Synuclein Ensembles Against SAXS Data: Comparison of Force Fields and Methods . Front Mol Biosci 2021 ; 8 : 654333 . OpenUrl PubMed ↵ Alderson TR , Pritišanac I , Kolarić Đ et al. Systematic identification of conditionally folded intrinsically disordered regions by AlphaFold2 . Proc Natl Acad Sci USA 2023 ; 120 : e2304302120 . OpenUrl CrossRef PubMed ↵ Arbesú M , Maffei M , Cordeiro TN et al. The unique domain forms a fuzzy intramolecular complex in src family kinases . Structure 2017 ; 25 : 630 - 640.e4 . OpenUrl CrossRef PubMed ↵ Aupič J , Pokorná P , Ruthstein S et al. Predicting conformational ensembles of intrinsically disordered proteins: from molecular dynamics to machine learning . J Phys Chem Lett 2024 ; 15 : 8177 – 86 . OpenUrl PubMed ↵ Bottaro S , Bengtsen T , Lindorff-Larsen K. Integrating molecular simulation and experimental data: A bayesian/maximum entropy reweighting approach . BioRxiv 2018 , DOI: 10.1101/457952 . OpenUrl Abstract / FREE Full Text ↵ Brotzakis ZF , Zhang S , Murtada MH et al. AlphaFold prediction of structural ensembles of disordered proteins . Nat Commun 2025 ; 16 : 1632 . OpenUrl CrossRef PubMed ↵ Cao F , von Bülow S , Tesei G et al. A coarse-grained model for disordered and multi-domain proteins . Protein Sci 2024 ; 33 : e5172 . OpenUrl CrossRef PubMed ↵ Chaudhury S , Lyskov S , Gray JJ . PyRosetta: a script-based interface for implementing molecular modeling algorithms using Rosetta . Bioinformatics 2010 ; 26 : 689 – 91 . OpenUrl CrossRef PubMed Web of Science ↵ Csizmok V , Follis AV , Kriwacki RW et al. Dynamic protein interaction networks and new structural paradigms in signaling . Chem Rev 2016 ; 116 : 6424 – 62 . OpenUrl CrossRef PubMed ↵ Del Alamo D , Sala D , Mchaourab HS et al. Sampling alternative conformational states of transporters and receptors with AlphaFold2 . eLife 2022 ; 11 , DOI: 10.7554/eLife.75751 . OpenUrl CrossRef PubMed ↵ Estaña A , Sibille N , Delaforge E et al. Realistic Ensemble Models of Intrinsically Disordered Proteins Using a Structure-Encoding Coil Database . Structure 2019 ; 27 : 381 - 391.e2 . OpenUrl CrossRef ↵ Feldman HJ , Hogue CW . A fast method to sample real protein conformational space . Proteins 2000 ; 39 : 112 – 31 . OpenUrl CrossRef PubMed Web of Science ↵ Gomes T , Martin-Malpartida P , Ruiz L et al. Conformational landscape of multidomain SMAD proteins . Comput Struct Biotechnol J 2021 ; 19 : 5210 – 24 . OpenUrl CrossRef PubMed ↵ González-Delgado J , Bernadó P , Neuvial P et al. Statistical proofs of the interdependence between nearest neighbor effects on polypeptide backbone conformations . J Struct Biol 2022 ; 214 : 107907 . OpenUrl PubMed ↵ Gowers R , Linke M , Barnoud J et al. Mdanalysis: A python package for the rapid analysis of molecular dynamics simulations . Proceedings of the 15th Python in Science Conference. SciPy , 2016 , 98 – 105 . ↵ Guo H-B , Perminov A , Bekele S et al. AlphaFold2 models indicate that protein sequence determines both structure and dynamics . Sci Rep 2022 ; 12 : 10696 . OpenUrl CrossRef PubMed ↵ Harmat Z , Dudola D , Gáspári Z. DIPEND: An Open-Source Pipeline to Generate Ensembles of Disordered Segments Using Neighbor-Dependent Backbone Preferences . Biomolecules 2021 ; 11 , DOI: 10.3390/biom11101505 . OpenUrl CrossRef ↵ Harris CR , Millman KJ , van der Walt SJ et al. Array programming with NumPy . Nature 2020 ; 585 : 357 – 62 . OpenUrl CrossRef PubMed ↵ Holehouse AS , Kragelund BB . The molecular basis for cellular function of intrinsically disordered protein regions . Nat Rev Mol Cell Biol 2024 ; 25 : 187 – 211 . OpenUrl CrossRef PubMed ↵ Huang X , Pearce R , Zhang Y. FASPR: an open-source tool for fast and accurate protein side-chain packing . Bioinformatics 2020 ; 36 : 3758 – 65 . OpenUrl CrossRef PubMed ↵ Inc PT . Collaborative data science . 2015 . ↵ Janson G , Valdes-Garcia G , Heo L et al. Direct generation of protein conformational ensembles via machine learning . Nat Commun 2023 ; 14 : 774 . OpenUrl CrossRef PubMed ↵ Jephthah S , Staby L , Kragelund BB et al. Temperature dependence of intrinsically disordered proteins in simulations: what are we missing? J Chem Theory Comput 2019 ; 15 : 2672 – 83 . OpenUrl CrossRef PubMed ↵ Jumper J , Evans R , Pritzel A et al. Highly accurate protein structure prediction with AlphaFold . Nature 2021 ; 596 : 583 – 9 . OpenUrl CrossRef PubMed ↵ Jussupow A , Messias AC , Stehle R et al. The dynamics of linear polyubiquitin . Sci Adv 2020 ; 6 , DOI: 10.1126/sciadv.abc3786 . OpenUrl FREE Full Text ↵ Kabsch W , Sander C. Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features . Biopolymers 1983 ; 22 : 2577 – 637 . OpenUrl CrossRef PubMed Web of Science ↵ Kalakoti Y , Wallner B. AFsample2: Predicting multiple conformations and ensembles with AlphaFold2 . BioRxiv 2024 , DOI: 10.1101/2024.05.28.596195 . OpenUrl Abstract / FREE Full Text ↵ Lane TJ . Protein structure prediction has reached the single-structure frontier . Nat Methods 2023 ; 20 : 170 – 3 . OpenUrl CrossRef PubMed ↵ Lin Y-H , Qiu D-C , Chang W-H et al. The intrinsically disordered N-terminal domain of galectin-3 dynamically mediates multisite self-association of the protein through fuzzy interactions . J Biol Chem 2017 ; 292 : 17845 – 56 . OpenUrl Abstract / FREE Full Text ↵ Liu ZH , Teixeira JMC , Zhang O et al. Local Disordered Region Sampling (LDRS) for ensemble modeling of proteins with experimentally undetermined or low confidence prediction segments . Bioinformatics 2023 ; 39 , DOI: 10.1093/bioinformatics/btad739 . OpenUrl CrossRef ↵ Martin EW , Thomasen FE , Milkovic NM et al. Interplay of folded domains and the disordered low-complexity domain in mediating hnRNPA1 phase separation . Nucleic Acids Res 2021 ; 49 : 2931 – 45 . OpenUrl CrossRef PubMed ↵ McGibbon RT , Beauchamp KA , Harrigan MP et al. Mdtraj: A modern open library for the analysis of molecular dynamics trajectories . Biophys J 2015 ; 109 : 1528 – 32 . OpenUrl CrossRef PubMed ↵ McKinney W. Data structures for statistical computing in python . Proceedings of the 9th Python in Science Conference. SciPy , 2010 , 56 – 61 . ↵ Metropolis N , Rosenbluth AW , Rosenbluth MN et al. Equation of state calculations by fast computing machines . J Chem Phys 1953 ; 21 : 1087 . OpenUrl CrossRef Web of Science ↵ Michaud-Agrawal N , Denning EJ , Woolf TB et al. MDAnalysis: a toolkit for the analysis of molecular dynamics simulations . J Comput Chem 2011 ; 32 : 2319 – 27 . OpenUrl CrossRef PubMed ↵ Michie KA , Kwan AH , Tung C-S et al. A Highly Conserved Yet Flexible Linker Is Part of a Polymorphic Protein-Binding Domain in Myosin-Binding Protein C . Structure 2016 ; 24 : 2000 – 7 . OpenUrl CrossRef ↵ Moritz P , Nishihara R , Wang S et al. Ray: A Distributed Framework for Emerging AI Applications . arXiv 2017 , DOI: 10.48550/arxiv.1712.05889 . OpenUrl CrossRef ↵ Moses D , Ginell GM , Holehouse AS et al. Intrinsically disordered regions are poised to act as sensors of cellular chemistry . Trends Biochem Sci 2023 ; 48 : 1019 – 34 . OpenUrl CrossRef PubMed ↵ Ozenne V , Bauer F , Salmon L et al. Flexible-meccano: a tool for the generation of explicit ensemble descriptions of intrinsically disordered proteins and their associated experimental observables . Bioinformatics 2012 ; 28 : 1463 – 70 . OpenUrl CrossRef PubMed Web of Science ↵ Pajkos M , Clerc I , Zanon C et al. AFflecto: A web server to generate conformational ensembles of flexible proteins from AlphaFold models . J Mol Biol 2025 : 169003 . ↵ Pietrek LM , Stelzl LS , Hummer G. Hierarchical ensembles of intrinsically disordered proteins at atomic resolution in molecular dynamics simulations . J Chem Theory Comput 2020 ; 16 : 725 – 37 . OpenUrl CrossRef PubMed ↵ Pinto D , Gonçalo R , Louro M et al. On the Occurrence and Multimerization of Two-Polypeptide Phage Endolysins Encoded in Single Genes . Microbiol Spectr 2022 ; 10 : e0103722 . OpenUrl ↵ Rotkiewicz P , Skolnick J. Fast procedure for reconstruction of full-atom protein models from reduced representations . J Comput Chem 2008 ; 29 : 1460 – 5 . OpenUrl CrossRef PubMed Web of Science ↵ Ruff KM , Pappu RV . Alphafold and implications for intrinsically disordered proteins . J Mol Biol 2021 ; 433 : 167208 . OpenUrl CrossRef PubMed ↵ Shrestha UR , Smith JC , Petridis L. Full structural ensembles of intrinsically disordered proteins from unbiased molecular dynamics simulations . Commun Biol 2021 ; 4 : 243 . OpenUrl PubMed ↵ Sonntag M , Jagtap PKA , Simon B et al. Segmental, Domain-Selective Perdeuteration and Small-Angle Neutron Scattering for Structural Analysis of Multi-Domain Proteins . Angew Chem Int Ed 2017 ; 56 : 9322 – 5 . OpenUrl CrossRef ↵ Teixeira JMC , Liu ZH , Namini A et al. Idpconformergenerator: A flexible software suite for sampling the conformational space of disordered protein states . J Phys Chem A 2022 ; 126 : 5985 – 6003 . OpenUrl PubMed ↵ Tesei G , Trolle AI , Jonsson N et al. Conformational ensembles of the human intrinsically disordered proteome . Nature 2024 ; 626 : 897 – 904 . OpenUrl CrossRef PubMed ↵ Thomasen FE , Skaalum T , Kumar A et al. Rescaling protein-protein interactions improves Martini 3 for flexible proteins in solution . Nat Commun 2024 ; 15 : 6645 . OpenUrl CrossRef PubMed ↵ Tompa P , Schad E , Tantos A et al. Intrinsically disordered proteins: emerging interaction specialists . Curr Opin Struct Biol 2015 ; 35 : 49 – 59 . OpenUrl CrossRef PubMed ↵ Varadi M , Bertoni D , Magana P et al. AlphaFold Protein Structure Database in 2024: providing structure coverage for over 214 million protein sequences . Nucleic Acids Res 2024 ; 52 : D368 – 75 . OpenUrl CrossRef PubMed ↵ Virtanen P , Gommers R , Oliphant TE et al. SciPy 1.0: fundamental algorithms for scientific computing in Python . Nat Methods 2020 ; 17 : 261 – 72 . OpenUrl CrossRef PubMed ↵ Wang L , Brasnett C , Borges-Araújo L et al. Martini3-IDP: improved Martini 3 force field for disordered proteins . Nat Commun 2025 ; 16 : 2874 . OpenUrl CrossRef PubMed ↵ Wayment-Steele HK , Ojoawo A , Otten R et al. Predicting multiple conformations via sequence clustering and AlphaFold2 . Nature 2024 ; 625 : 832 – 9 . OpenUrl CrossRef PubMed ↵ Wu KE , Yang KK , van den Berg R et al. Protein structure generation via folding diffusion . Nat Commun 2024 ; 15 : 1059 . OpenUrl CrossRef PubMed ↵ Ziegler Z , Schmidt M , Gurry T et al. Mollack: a web server for the automated creation of conformational ensembles for intrinsically disordered proteins . Bioinformatics 2016 ; 32 : 2545 – 7 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted August 30, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Ensemblify: a user-friendly tool for generating ensembles of intrinsically disordered regions of AlphaFold and user-defined models Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Ensemblify: a user-friendly tool for generating ensembles of intrinsically disordered regions of AlphaFold and user-defined models Nuno P. Fernandes , Tiago Gomes , Tiago N. Cordeiro bioRxiv 2025.08.26.672300; doi: https://doi.org/10.1101/2025.08.26.672300 Share This Article: Copy Citation Tools Ensemblify: a user-friendly tool for generating ensembles of intrinsically disordered regions of AlphaFold and user-defined models Nuno P. Fernandes , Tiago Gomes , Tiago N. Cordeiro bioRxiv 2025.08.26.672300; doi: https://doi.org/10.1101/2025.08.26.672300 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17691) Bioengineering (13892) Bioinformatics (41936) Biophysics (21452) Cancer Biology (18588) Cell Biology (25504) Clinical Trials (138) Developmental Biology (13378) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24320) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88605) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15153) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00