Balancing Speed and Precision in Protein Folding: A Comparison of AlphaFold2, ESMFold, and OmegaFold

doi:10.1101/2025.06.20.660709

Balancing Speed and Precision in Protein Folding: A Comparison of AlphaFold2, ESMFold, and OmegaFold

2025 · doi:10.1101/2025.06.20.660709

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 43,913 characters · extracted from preprint-html · click to expand

Balancing Speed and Precision in Protein Folding: A Comparison of AlphaFold2, ESMFold, and OmegaFold | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Balancing Speed and Precision in Protein Folding: A Comparison of AlphaFold2, ESMFold, and OmegaFold Anna Hýskova , Eva Maršálková , View ORCID Profile Petr Šimeček doi: https://doi.org/10.1101/2025.06.20.660709 Anna Hýskova 1 CEITEC, Masaryk University , Kamenice 5, 62500, Brno, Czechia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Eva Maršálková 1 CEITEC, Masaryk University , Kamenice 5, 62500, Brno, Czechia 2 National Centre for Biomolecular Research, Faculty of Science, Masaryk University , Kamenice 5, 625 00, Brno, Czech Republic Find this author on Google Scholar Find this author on PubMed Search for this author on this site Petr Šimeček 1 CEITEC, Masaryk University , Kamenice 5, 62500, Brno, Czechia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Petr Šimeček For correspondence: simecek{at}mail.muni.cz Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract We compared the performance of three widely used protein structure prediction tools—AlphaFold2, ESMFold, and OmegaFold—using a dataset of over 1,300 newly created records from the PDB database. These structures, resolved between July 2022 and July 2024, ensure unbiased evaluation, as they were unavailable during the training of these tools. Using metrics such as root mean square deviation (RMSD), template modeling score (TM-score), and predicted local distance difference test (pLDDT), we found that AlphaFold2 consistently achieves the highest accuracy but depends on high-quality sequence alignments. In contrast, ESMFold and OmegaFold provide faster predictions and excel in challenging cases, such as rapidly evolving or designed proteins with limited sequence homology. Comparing ESMFold and OmegaFold, ESMFold achieves higher confidence scores (pLDDT) and structural similarity (TM-score). OmegaFold is competitive in specific contexts, such as de novo-designed proteins or sequences with limited evolutionary information. Additionally, we demonstrate that machine learning models trained on protein language model embeddings and pLDDT confidence scores can predict potential structure prediction failures, helping to identify challenging cases early in the pipeline. Introduction All living organisms—from simple bacteria and algae to plants, fungi, animals, and humans—contain a multitude of proteins that participate in virtually every cellular process [ 1 , 2 ]. These molecular machines must fold into specific three-dimensional structures, organized hierarchically at four distinct levels ( Fig. 1 ): from the linear sequence of amino acids (primary structure), through local folding patterns of α -helices and β -sheets (secondary structure), to the complete three-dimensional arrangement of these elements (tertiary structure), and finally to the assembly of multiple chains into functional complexes (quaternary structure). While the amino acid sequence alone determines the final structure, protein misfolding often leads to disease [ 3 ]. Experimental structure determination through X-ray crystallography, cryo-EM, or NMR spectroscopy remains the gold standard [ 4 , 5 , 6 ], but these methods are time-consuming, expensive, and not always feasible. This creates an urgent need for reliable computational prediction methods, particularly as the gap between known protein sequences and solved structures continues to widen—with over 254 million sequences known (UniProtKB) but only about 230,444 experimentally determined structures available in the Protein Data Bank (as in January, 2025). Download figure Open in new tab Fig. 1. Hierarchical organization of protein structure. (a) Primary structure: the linear sequence of amino acids. (b) Secondary structure: local conformations including α -helices and β -sheets stabilized by hydrogen bonds. (c) Tertiary structure: the complete three-dimensional fold. (d) Quaternary structure: assembly of multiple chains into functional complexes. The field of protein structure prediction has been transformed by artificial intelligence approaches. The introduction of AlphaFold2 in 2020 marked a watershed moment, achieving near-experimental accuracy [ 7 ]. This success has spurred the development of alternative approaches, particularly language model-based predictors like ESMFold and OmegaFold that can generate predictions without requiring multiple sequence alignments [ 8 , 9 ]. These newer methods promise faster predictions and potentially better performance on challenging targets like designed or rapidly evolving proteins. Despite these advances, the field lacks a comprehensive comparison of these tools’ performance on truly novel proteins—structures solved after the tools’ training cutoff dates [ 10 ]. Such evaluation is crucial for understanding each method’s strengths and limitations, particularly as these tools become increasingly integrated into structural biology workflows. While the Critical Assessment of Structure Prediction (CASP) [ 11 ] and Continuous Automated Model EvaluatiOn (CAMEO) [ 12 ] provide valuable benchmarks, they are limited to participating methods and may not reflect real-world usage patterns. Here, we present a systematic comparison of AlphaFold2, ESMFold, and OmegaFold using a dataset of over 1,300 protein structures deposited in the PDB between 2022 and 2024. Using multiple evaluation metrics including RMSD [ 13 ], TM-score [ 14 ], and pLDDT [ 15 ], we assess both overall performance and specific challenging cases. Our analysis reveals that while AlphaFold2 achieves the highest average accuracy, ESMFold and OmegaFold excel in particular niches, especially for proteins with limited homology information. We also identify protein families and structural features that correlate with prediction success or failure, providing practical guidance for the structural biology community. Methods Dataset We compiled a benchmark dataset of 1,327 protein structures deposited in the Protein Data Bank (PDB) between July 2022 and July 2024. This temporal restriction ensures no overlap with training data used by AlphaFold2 (cutoff April 2020), ESMFold (June 2020), or OmegaFold (2021). The dataset contains three distinct groups: (1) single-chain monomers (980 structures), (2) small multi-chain complexes (245 structures with 2-6 chains), and (3) de novo designed proteins whose sequence does not naturally occur in any living organism (102 structures). Structures were selected using the RCSB PDB Search API [ 16 , 17 ] with the following criteria: (i) deposition date between July 2022 and July 2024, (ii) protein-only structures without nucleic acids or oligosaccharides, (iii) chain lengths between 20 and 400 amino acids to ensure compatibility with all prediction tools, and (iv) availability of structural information in PDB format. To ensure diversity, structures within monomer and de novo protein groups were filtered to have at most 70% pairwise sequence identity. We developed a custom PDB file parsing pipeline to extract complete amino acid sequences and experimental C α coordinates. The pipeline addresses common challenges in PDB files, including non-standard residue numbering, insertion codes, and post-translational modifications. For modified residues, we reconstructed the original amino acid sequence using BioPython’s extended residue dictionary and MODRES records. Structures containing non-standard residues without clear mapping to canonical amino acids (26 cases) were excluded from the analysis. Each structure was annotated with protein family classifications using UniProt and PDBe APIs to map PDB identifiers to Pfam and InterPro database entries. These annotations enable analysis of prediction tools’ performance across different protein families and structural motifs. The numbers of protein structures the dataset contained in various stages of the experiment are stated in Table 1 . The final curated dataset, including all protein sequences, structures, and family annotations, is available at Hugging Face Hub repository. View this table: View inline View popup Download powerpoint Table 1. Size of the dataset in various stages of the experiment. Structure Prediction Tools Three tools were selected for protein structure prediction: AlphaFold2, ESMFold, and OmegaFold. While alignment-based AlphaFold2 is an obvious choice, considering how widely used it is [ 10 ], language model-based ESMFold and OmegaFold were chosen because they provide promising results with much lower requirements on time and computational power, making them more suitable for large-scale applications [ 8 , 9 ]. AlphaFold2 We used AlphaFold v2.1.1 running on the university e-INFRA CZ infrastructure with its monomer model and reduced database settings to optimize computational resources. The model architecture consists of two main components: (i) an Evoformer module, which processes multiple sequence alignments (MSAs) and pairwise representations through 48 transformer blocks, and (ii) a structure module that converts the refined representations into 3D coordinates through 8 equivariant transformer blocks with Invariant Point Attention. MSAs were generated using Uniref90, BFD, and MGnify databases. For each sequence, five model predictions were generated and ranked by predicted confidence, with the highest-confidence model (ranked 0.pdb) selected for evaluation. ESMFold Predictions were obtained via REST API calls to the ESM Metagenomic Atlas. ESMFold combines two components: (i) the ESM-2 protein language model with 15B parameters, pre-trained on masked sequence prediction, and (ii) a folding head consisting of 48 folding blocks that process sequence and pairwise representations. Unlike AlphaFold2, ESMFold predicts structures directly from single sequences without requiring MSA generation. OmegaFold Predictions were performed using OmegaFold v1.0 running on university computational cluster with NVIDIA A40 GPU. OmegaFold employs: (i) OmegaPLM, a 670M parameter language model trained on masked protein sequences, and (ii) a Geoformer architecture that refines the language model representations to be geometrically consistent before structure prediction. Like ESMFold, OmegaFold operates on single sequences without MSA requirements. All predictions were made for individual protein chains, as both ESMFold and OmegaFold do not support prediction of protein complexes. While AlphaFold2 offers a multimer model, we used its monomer model to ensure fair comparison. Source code, configuration files, and prediction outputs are available at GitHub repository. Evaluation Metrics We employed three complementary metrics to assess prediction quality: RMSD measuring atomic distance deviation, TM-score evaluating topological similarity, and pLDDT reflecting model confidence. Root Mean Square Deviation (RMSD) RMSD quantifies the average distance between corresponding C α atoms in superimposed structures: where n is the number of aligned C α atom pairs and δ i is the distance between atoms in the i -th pair. To compute RMSD, we first extract C α coordinates from both experimental and predicted structures, then determine the optimal superposition using the Bio.SVDSuperimposer module from BioPython [ 18 ], which finds the rotation and translation matrices minimizing the RMSD value. While RMSD is widely used, it is sensitive to protein size and can be disproportionately affected by local structural deviations. Template Modeling Score (TM-score) TM-score evaluates the topological similarity of protein structures while accounting for protein length: where L N is the length of the reference structure, L T is the number of aligned residues, d i is the distance between the i -th pair of aligned residues after superposition, and is a length-dependent scaling factor. TM-score ranges from 0 to 1, with values above 0.5 indicating proteins share the same fold and 1 representing perfect structural alignment. Unlike RMSD, TM-score is length-normalized and less sensitive to local structural variations. Predicted LDDT (pLDDT) The predicted local distance difference test (pLDDT) is a confidence metric provided by each prediction tool. For each residue, it estimates the expected agreement between predicted and experimental structures on 0 to 100 scale. Scores above 90 indicate high prediction confidence. Scores above 70 suggest at least reliable backbone prediction. For our analysis, we used the mean pLDDT across all residues in each protein chain. While pLDDT correlates with prediction accuracy, high confidence scores do not guarantee correct structure prediction, particularly for challenging targets like intrinsically disordered regions or proteins with limited homology information. Statistical Analysis and Annotation We compared these metrics across our dataset using Kruskal-Wallis tests followed by Dunn’s method with Bonferroni correction for multiple comparisons. The correlation between metrics was assessed using Spearman’s rank correlation coefficient. Protein chains were mapped to functional annotations using UniProt and PDBe APIs. For family-specific analysis, we focused on Pfam and InterPro families with at least 10 member proteins in our dataset. The experimental method of structure determination (X-ray crystallography, cryo-EM, or NMR) was recorded for each chain to assess potential biases in prediction accuracy. Predictions were classified as “poor” if they met any of the following criteria: average pLDDT < 70, TM-score 9 Å. The 9 Å RMSD threshold was chosen to match the resolution cutoff used in training AlphaFold2. Statistical significance of family-specific enrichment in poor predictions was assessed using Fisher’s exact test with Benjamini-Hochberg correction for multiple comparisons. Implementation and Availability All analysis code was implemented in Python using BioPython for structure manipulation, tmtools for TM-score calculation, and scipy.stats for statistical testing. The complete dataset, including protein sequences, experimental structures, predictions, and evaluation results is available at https://huggingface.co/datasets/hyskova-anna/proteins . Source code and documentation are provided at https://github.com/ML-Bioinfo-CEITEC/CAoPSPT . Results The structure of 1,337 protein chains was predicted using AlphaFold2, ESMFold, and OmegaFold. Of these, AlphaFold2 failed to predict a single chain (8B2M:A), which was excluded from the evaluation; all remaining predictions were obtained successfully. Selected examples of predictions aligned with their experimental structures are visualized in Figure 2 . Download figure Open in new tab Fig. 2. Examples of structure predictions from AlphaFold2 (red), ESMFold (blue) and OmegaFold (yellow) aligned with corresponding experimentally determined structures (green). (a) An example of a poorly predicted structure (8P4Y:A) by AlphaFold2. (b) Structure of protein 8PTF:A showing varying prediction quality across tools. Comparative Performance Analysis All three tools demonstrated generally satisfactory performance, with AlphaFold2 achieving the highest accuracy across all metrics ( Figure 3 ). AlphaFold2 predictions showed the highest median TM-score (0.96) and lowest median RMSD (1.30Å), followed by ESMFold (TM-score: 0.95, RMSD: 1.74Å) and OmegaFold (TM-score: 0.93, RMSD: 1.98Å). Consistently, AlphaFold2 displayed the highest confidence in its predictions with median pLDDT of 92.65, compared to 87.40 for ESMFold and 89.00 for OmegaFold. Download figure Open in new tab Fig. 3. Performance comparison across prediction tools. Distribution of (a) RMSD values, (b) TM-scores, and (c) pLDDT scores. Box plots show median, quartiles, and outliers. All pair comparisons have been statistically significant ( p < 0.01). Metric Correlations and Their Dependencies on Sequence Length And Other Factors We observed significant correlations between prediction confidence (pLDDT) and accuracy metrics ( Figure 4 ). Most notably, there was a negative correlation between average pLDDT and RMSD (Spearman’s ρ = − 0.87, − 0.87, and − 0.88 for AlphaFold2, ESMFold, and OmegaFold, respectively) and a positive correlation between average pLDDT and TM-score ( ρ = 0.60, 0.66, and 0.71). The correlation was strongest for ESMFold and OmegaFold, suggesting that their confidence scores more accurately reflect prediction quality than those of AlphaFold2. Download figure Open in new tab Fig. 4. Correlation analysis between prediction metrics. Heatmaps show Spearman’s correlation coefficients between average pLDDT, RMSD, and TM-score for each prediction tool. All correlations are statistically significant ( p < 0.001). While low-confidence predictions rarely achieved good accuracy metrics, we found numerous cases of incorrect structures with high pLDDT scores across all tools ( Figure 5 ) Download figure Open in new tab Fig. 5. Dependency of RMSD and TM-score on average pLDDT of structures generated by different tools. The LOESS curve (red) was obtained by locally estimated scatterplot smoothing. Sample points with RMSD greater than 40 Å are omitted from the visualization for better clarity. Analysis of sequence length dependency also revealed interesting patterns. While RMSD showed weak correlation with sequence length, TM-scores displayed stronger positive associations, particularly for AlphaFold2 ( ρ = 0.41, p < 0.001). This suggests that predictions for shorter proteins ( < 100 amino acids) tend to achieve lower TM-scores across all tools, though this trend is less pronounced in RMSD values due to the metric’s inherent length dependency. ESMFold and OmegaFold showed weaker but still significant correlations with sequence length ( ρ = 0.29 and ρ = 0.28, respectively, for TM-score). The experimental method used for structure determination significantly influenced prediction accuracy ( Figure 7 ). All tools performed best on X-ray crystallography structures (median RMSD: 1.24Å, 1.65Å, and 1.89Å for AlphaFold2, ESMFold, and OmegaFold, respectively) but struggled with NMR-determined structures (median RMSD: 2.31Å, 2.89Å, and 3.12Å). This pattern likely reflects both the inherent flexibility of proteins amenable to NMR analysis and the predominance of X-ray structures in training data. Download figure Open in new tab Fig. 6. Dependency of average pLDDT, TM-score, and RMSD on the type of protein chain being predicted. The differences between groups were tested by Kruskal-Wallis test, post-hoc comparisons were done using Dunn’s method with a Bonferroni correction for multiple tests. Statistical significance visualized by difference in letter codes. Sample points with RMSD greater than 30 Å are omitted from the visualization for better clarity. Download figure Open in new tab Fig. 7. Dependency of RMSD and TM-score on the experimental method of acquisition of the protein chain structure. The differences between groups were tested by Kruskal-Wallis test, post-hoc comparisons were done using Dunn’s method with a Bonferroni correction for multiple tests. Statistical significance visualized by difference in letter codes. Sample points with RMSD greater than 40Å are omitted from the visualization for better clarity. When comparing performance across different protein types (monomers, complexes, and de novo proteins), we observed an interesting pattern, see Figure 6 . While all tools generally performed similarly across these categories, there are two notable exceptions. First, ESMFold and OmegaFold achieved significantly lower RMSD values for de novo proteins compared to natural proteins. Second, AlphaFold2 showed a unique weakness with de novo proteins, achieving significantly lower TM-scores for these proteins compared to monomers and complexes. This suggests that language model-based tools may have an advantage in predicting structures of artificial proteins where evolutionary information is limited. Analysis of Prediction Failures We classified predictions as incorrect if they met any of the following criteria: average pLDDT < 70, TM-score 9Å. AlphaFold2 produced the fewest incorrect predictions (8.9% of total), followed by ESMFold (13.0%) and OmegaFold (16.8%). The overlap of prediction failures between tools was limited, suggesting complementary strengths ( Figure 8 ). Download figure Open in new tab Fig. 8. Venn diagrams comparing the overlap of poorly predicted protein chains based on three evaluation criteria: (a) average pLDDT < 70, TM-score 9 Å for AlphaFold2, ESMFold, and OmegaFold, and (b) the overlap of predictions that fail across the three metrics for each tool individually. Analysis of protein families revealed that proteins lacking Pfam annotations were particularly challenging for AlphaFold2 but not for ESMFold or OmegaFold, highlighting the importance of evolutionary information in AlphaFold2’s predictions. Conversely, viral proteins, especially from coronavirus, were better predicted by AlphaFold2 than by the language model-based tools. All tools showed reduced accuracy for proteins containing leucine-rich repeats or von Willebrand factor A-like domains, suggesting these structural motifs pose particular challenges for current prediction methods. The analysis of protein family associations revealed distinctive patterns in prediction accuracy. Notably, AlphaFold2 showed significantly reduced performance for proteins lacking Pfam family annotations (odds ratio = 0.67, p < 0.01), while ESMFold and OmegaFold maintained consistent performance regardless of family assignments. This pattern was also observed with InterPro annotations, highlighting AlphaFold2’s dependence on evolutionary information. Certain protein families were consistently well-predicted across all tools. These included protein kinase domains (PF00069, IPR000719), the SH2 domain (IPR000980), and the NAD(P)-binding domain superfamily (IPR036291). Conversely, all tools struggled with leucine-rich repeats (IPR001611, IPR003591) and von Willebrand factor A-like domains (IPR036465), suggesting these structural motifs remain challenging for current prediction methods. Interestingly, several protein families showed tool-specific prediction patterns. AlphaFold2 excelled at predicting viral protein families, particularly the viral RNA-dependent RNA polymerase (PF00680, IPR001205) and coronavirus-specific proteins (PF05409, IPR043503), achieving significantly better accuracy than ESMFold or OmegaFold ( p < 0.001). Conversely, the S-adenosyl-L-methionine-dependent methyltransferase superfamily (IPR029063) showed markedly different prediction quality between AlphaFold2 (odds ratio = 1.83, p < 0.05) and the language model-based tools (odds ratio = 0.64 and 0.51 for ESMFold and OmegaFold respectively, p < 0.001). Prediction of Structure Determination Success Using Machine Learning To help identify potential failures in structure prediction, we trained gradient boosting LightGBM [ 19 ] models for AlphaFold2, ESMFold, and OmegaFold, respectively, using ProtBert BFD embeddings [ 20 ] calculated from protein sequences and pLDDT scores. The models were trained to predict whether a structure prediction would likely be unsuccessful, allowing early identification of challenging cases. The trained models and source code are available on GitHub repository, enabling to assess potential challenges early in structure prediction pipelines. Prediction of Structure Determination Success Using Machine Learning To help identify potential failures in structure prediction, we trained gradient boosting LightGBM [ 19 ] models for AlphaFold2, ESMFold, and OmegaFold, respectively, using ProtBert BFD embeddings [ 20 ] calculated from protein sequences and pLDDT scores. The models were trained to predict whether a structure prediction would likely be unsuccessful, allowing early identification of challenging cases. The trained models and source code are available on GitHub repository, enabling to assess potential challenges early in structure prediction pipelines. As shown in Figure 9 , it is typically pLDDT, the length of the sequence, and a few selected embedding elements that have the greatest influence on prediction. Download figure Open in new tab Fig. 9. SHAP values of LightGBM model for AlphaFold2. Discussion Since the beginning of this decade, structural biology and protein structure prediction fields have undergone a significant transition. Currently, there are two large projects dealing with this issue: CASP [ 11 ] and CAMEO [ 12 ]. While AlphaFold2 has participated in both CASP14 and CAMEO, ESMFold has entered only CASP15, and OmegaFold has not been included in either. There are also a few publications dealing with the comparison of protein structure prediction tools, but they usually focus mainly on AlphaFold2 and similar tools (e.g. ColabFold) [ 21 ] or perform the evaluation on a particular set of proteins, namely human proteins [ 22 ], snake venom toxins [ 21 ], and nanobodies [ 23 ]. This paper tries to increase our understanding by creating an inclusive dataset of protein structures recently added to PDB. The key finding of this work is that AlphaFold2 outperforms ESMFold and OmegaFold on a majority of proteins in the dataset, measured by both RMSD and TM-score. When comparing the two protein language-based models, ESMFold seems to be a slightly better choice, as it produced fewer incorrect structures than OmegaFold and achieved significantly better median RMSD and TM-score. Still, the difference in performance between ESMFold and OmegaFold is much smaller compared to the gap between both of these tools and AlphaFold2. While all three tools rarely produce a good prediction with low confidence, wrong structures with a high average pLDDT are outputted quite frequently. Our analysis revealed that prediction accuracy is influenced by various factors. All three tools performed best when predicting proteins whose experimental structure was determined by X-ray crystallography, while structures determined by NMR proved to be the most challenging. Because NMR is typically used to determine the structures of small proteins, a corresponding decrease in prediction accuracy is observed for shorter sequences. Interestingly, proteins without family annotations proved particularly difficult for AlphaFold2 but did not change the performance of ESMFold and OmegaFold. A possible explanation is that proteins belonging to no family lack homologs with a known structure, which AlphaFold2 could use as a template during the prediction. In contrast, ESMFold and OmegaFold do not rely on MSAs and modelling templates, so their performance remained largely unaffected. Our analysis shows several key insights, yet certain constraints of our study must be noted. First, the dataset does not contain only proteins whose experimental structure was previously unknown but also proteins that were just recently analyzed again, usually in different conditions. This might be an advantage for AlphaFold2, which uses a reduced PDB database for template searching during the prediction process. Moreover, the whole analysis focuses only on single protein chains without the context of their interacting partners, which might be crucial for structure formation, especially in protein complexes. Last but not least, all the protein chains in the dataset have a maximum length of 400 amino acids due to using ESMAtlas API. The field continues to evolve rapidly. Recently, DeepMind released AlphaFold3 [ 24 ], followed by several replication efforts including Boltz-1 [ 25 ] and HelixFold3 [ 26 ]. The code has been made available for noncommercial research purposes in November 2024 [ 27 ]. In the future, it would be interesting to predict the structure of whole protein complexes using these newer models and compare the results with predictions of single chains. The analysis could also be extended by including protein chains of all lengths, exploring additional metrics such as the variance of pLDDT, or testing newer models as they become available. Competing interests No competing interest is declared. Author contributions statement A.H. was responsible for dataset preparation and the majority of coding. A.H. and P.S. collaborated on writing the manuscript. E.M. and P.S. provided critical manuscript review and oversaw project management. Acknowledgments The project was supported by the OPUS LAP program of the Czech Science Foundation, project no. 23-04260L “Biological code of knots – identification of knotted patterns in biomolecules via AI approach”). Computational resources were supplied by the project “e-Infrastruktura CZ” (e-INFRA CZ LM2018140) supported by the Ministry of Education, Youth and Sports of the Czech Republic. Funder Information Declared Czech Science Foundation , 23-04260L Footnotes https://huggingface.co/datasets/hyskova-anna/proteins References 1. ↵ B Alberts . Molecular Biology of the Cell . W.W. Norton , 2017 . ISBN 978-1-317-56375-4 . URL https://books.google.cz/books?id=jK6UBQAAQBAJ . 2. ↵ GM Cooper . The Cell: A Molecular Approach . Sunderland (MA ): Sinauer Associates , 2000 . ISBN 0-87893-106-6 . 3. ↵ DJ Selkoe . Folding proteins in fatal ways . Nature , 426 ( 6968 ): 900 – 904 , December . ISSN 1476-4687 . doi: 10.1038/nature02264 . URL https://www.nature.com/articles/nature02264 . OpenUrl CrossRef PubMed Web of Science 4. ↵ MS Smyth and JHJ Martin . x Ray crystallography . Molecular Pathology , 53 ( 1 ): 8 – 14 , February 2000 . ISSN 1366-8714 . URL https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1186895/ . OpenUrl Abstract / FREE Full Text 5. ↵ JLS Milne , MJ Borgnia , A Bartesaghi , EEH Tran , LA Earl , DM Schauder , J Lengyel , J Pierson , A Patwardhan , and S Subramaniam . Cryo-electron microscopy: A primer for the non-microscopist . The FEBS journal , 280 ( 1 ): 28 – 45 , January 2013 . ISSN 1742-464X . doi: 10.1111/febs.12078 . URL https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3537914/ . OpenUrl CrossRef PubMed 6. ↵ Y Hu , K Cheng , L He , X Zhang , B Jiang , L Jiang , C Li , G Wang , Y Yang , and M Liu . NMR-Based Methods for Protein Analysis . Analytical Chemistry , 93 ( 4 ): 1866 – 1879 , February 2021 . ISSN 0003-2700 . doi: 10.1021/acs.analchem.0c03830 . URL https://doi.org/10.1021/acs.analchem.0c03830. Publisher: American Chemical Society . OpenUrl CrossRef 7. ↵ J Jumper , R Evans , A Pritzel , T Green , M Figurnov , O Ronneberger , K Tunyasuvunakool , R Bates , A Žídek , A Potapenko , A Bridgland , C Meyer , SAA Kohl , AJ Ballard , A Cowie , B Romera-Paredes , S Nikolov , R Jain , J Adler , T Back , S Petersen , D Reiman , E Clancy , M Zielinski , M Steinegger , M Pacholska , T Berghammer , S Bodenstein , D Silver , O Vinyals , AW Senior , K Kavukcuoglu , P Kohli , and D Hassabis . Highly accurate protein structure prediction with AlphaFold . Nature , 596 ( 7873 ): 583 – 589 , August 2021 . ISSN 1476-4687 . doi: 10.1038/s41586-021-03819-2 . URL https://www.nature.com/articles/s41586-021-03819-2 . OpenUrl CrossRef PubMed 8. ↵ Z Lin , H Akin , R Rao , B Hie , Z Zhu , W Lu , N Smetanin , R Verkuil , O Kabeli , Y Shmueli , A dos Santos Costa , M Fazel-Zarandi , T Sercu , S Candido , and. A Rives . Evolutionary-scale prediction of atomic-level protein structure with a language model . Science , 379 ( 6637 ): 1123 – 1130 , March 2023 . doi: 10.1126/science.ade2574 . URL https://www.science.org/doi/10.1126/science.ade2574 . OpenUrl CrossRef PubMed 9. ↵ R Wu , F Ding , R Wang , R Shen , X Zhang , S Luo , C Su , Z Wu , Q Xie , B Berger , J Ma , and J Peng . High-resolution de novo structure prediction from primary sequence . preprint, Bioinformatics , July 2022 . URL http://biorxiv.org/lookup/doi/10.1101/2022.07.21.500999 . 10. ↵ O Kovalevskiy , J Mateos-Garcia, and. K Tunyasuvunakool . AlphaFold two years on: Validation and impact . Proceedings of the National Academy of Sciences , 121 ( 34 ): e2315002121, August 2024 . doi: 10.1073/pnas.2315002121 . URL https://www.pnas.org/doi/10.1073/pnas.2315002121 . OpenUrl CrossRef PubMed 11. ↵ J Moult , JT Pedersen , R Judson , and K Fidelis . A large-scale experiment to assess protein structure prediction methods . Proteins: Structure, Function, and Bioinformatics , 23 ( 3 ): ii – iv , 1995 . ISSN 1097-0134 . doi: 10.1002/prot.340230303 . URL https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.340230303 . OpenUrl CrossRef PubMed 12. ↵ X Robin , J Haas , R Gumienny , A Smolinski , G Tauriello , and T Schwede . Continuous Automated Model EvaluatiOn (CAMEO)—Perspectives on the future of fully automated evaluation of structure prediction methods . Proteins: Structure, Function, and Bioinformatics , 89 ( 12 ): 1977 – 1986 , 2021 . ISSN 1097-0134 . doi: 10.1002/prot.26213 . URL https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.26213 . OpenUrl CrossRef 13. ↵ I Kufareva and R Abagyan . Methods of protein structure comparison . Methods in molecular biology (Clifton, N.J .), 857 : 231 – 257 , 2012 . ISSN 1064-3745 . doi: 10.1007/978-1-61779-588-6_10 . URL https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4321859/ . OpenUrl CrossRef PubMed 14. ↵ Y Zhang and J Skolnick . Scoring function for automated assessment of protein structure template quality . Proteins: Structure, Function, and Bioinformatics , 57 ( 4 ): 702 – 710 , 2004 . ISSN 1097-0134 . doi: 10.1002/prot.20264 . URL https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.20264 . OpenUrl CrossRef PubMed Web of Science 15. ↵ K Tunyasuvunakool , J Adler , Z Wu , T Green , M Zielinski , A Žídek , A Bridgland , A Cowie , C Meyer , A Laydon , S Velankar , GJ Kleywegt , A Bateman , R Evans , A Pritzel , M Figurnov , O Ronneberger , R Bates , SAA Kohl , A Potapenko , AJ Ballard , B Romera-Paredes , S Nikolov , R Jain , E Clancy , D Reiman , S Petersen , AW Senior , K Kavukcuoglu , E Birney , P Kohli , J Jumper , and D Hassabis . Highly accurate protein structure prediction for the human proteome . Nature , 596 ( 7873 ): 590 – 596 , August 2021 . ISSN 1476-4687 . doi: 10.1038/s41586-021-03828-1 . URL https://www.nature.com/articles/s41586-021-03828-1 . OpenUrl CrossRef PubMed 16. ↵ Y Rose , JM Duarte , R Lowe , J Segura , C Bi , C Bhikadiya , L Chen , AS Rose , S Bittrich , SK Burley , and JD Westbrook . RCSB Protein Data Bank: Architectural Advances Towards Integrated Searching and Efficient Access to Macromolecular Structure Data from the PDB Archive . Journal of Molecular Biology , 433 ( 11 ): 166704 , May 2021 . ISSN 0022-2836 . doi: 10.1016/j.jmb.2020.11.003 . URL https://www.sciencedirect.com/science/article/pii/S0022283620306227 . OpenUrl CrossRef PubMed 17. ↵ S Bittrich , C Bhikadiya , C Bi , H Chao , JM Duarte , S Dutta , M Fayazi , J Henry , I Khokhriakov , R Lowe , DW Piehl , J Segura , B Vallat , M Voigt , JD Westbrook , SK Burley , and Y Rose . RCSB Protein Data Bank: Efficient Searching and Simultaneous Access to One Million Computed Structure Models Alongside the PDB Structures Enabled by Architectural Advances . Journal of Molecular Biology , 435 ( 14 ): 167994 , July 2023 . ISSN 0022-2836 . doi: 10.1016/j.jmb.2023.167994 . URL https://www.sciencedirect.com/science/article/pii/S0022283623000505 . OpenUrl CrossRef PubMed 18. ↵ PJA Cock , T Antao , JT Chang , BA Chapman , CJ Cox , A Dalke , I Friedberg , T Hamelryck , F Kauff , B Wilczynski , and MJL de Hoon . Biopython: freely available Python tools for computational molecular biology and bioinformatics . Bioinformatics , 25 ( 11 ): 1422 – 1423 , June 2009 . ISSN 1367-4803 . doi: 10.1093/bioinformatics/btp163 . URL https://doi.org/10.1093/bioinformatics/btp163. OpenUrl CrossRef PubMed Web of Science 19. ↵ Guolin Ke , Qi Meng , Thomas Finley , Taifeng Wang , Wei Chen , Weidong Ma , Qiwei Ye , and Tie-Yan Liu . Lightgbm: A highly efficient gradient boosting decision tree . Advances in neural information processing systems , 30 , 2017 . 20. ↵ Nadav Brandes , Dan Ofer , Yam Peleg , Nadav Rappoport , and Michal Linial . Proteinbert: a universal deep-learning model of protein sequence and function . Bioinformatics , 38 ( 8 ): 2102 – 2110 , 2022 . OpenUrl CrossRef PubMed 21. ↵ K Kalogeropoulos , MF Bohn , DE Jenkins , J Ledergerber , C Sørensen , N Hofmann , J Wade , T Fryer , G Thi Tuyet Nguyen , U auf dem Keller , AH Laustsen , and TP Jenkins . A comparative study of protein structure prediction tools for challenging targets: Snake venom toxins . Toxicon , 238 : 107559 , February 2024 . ISSN 0041-0101 . doi: 10.1016/j.toxicon.2023.107559 . URL https://www.sciencedirect.com/science/article/pii/S0041010123003707 . OpenUrl CrossRef 22. ↵ M Manfredi , C Savojardo , G Iardukhin , D Salomoni , A Costantini , PL Martelli , and R Casadio . Alpha&ESMhFolds: A Web Server for Comparing AlphaFold2 and ESMFold Models of the Human Reference Proteome . Journal of Molecular Biology , 436 ( 17 ): 168593 , September 2024 . ISSN 0022-2836 . doi: 10.1016/j.jmb.2024.168593 . URL https://www.sciencedirect.com/science/article/pii/S0022283624001888 . OpenUrl CrossRef PubMed 23. ↵ MS Valdés-Tresanco , ME Valdés-Tresanco , DE Jiménez-Gutiérrez , and E Moreno . Structural Modeling of Nanobodies: A Benchmark of State-of-the-Art Artificial Intelligence Programs . Molecules , 28 ( 10 ): 3991 , January 2023 . ISSN 1420-3049 . doi: 10.3390/molecules28103991 . URL https://www.mdpi.com/1420-3049/28/10/3991 . OpenUrl CrossRef PubMed 24. ↵ Josh Abramson , Jonas Adler , Jack Dunger , Richard Evans , Tim Green , Alexander Pritzel , Olaf Ronneberger , Lindsay Willmore , Andrew J Ballard , Joshua Bambrick , et al. Accurate structure prediction of biomolecular interactions with alphafold 3 . Nature , pages 1 – 3 , 2024 . 25. ↵ Jeremy Wohlwend , Gabriele Corso , Saro Passaro , Mateo Reveiz , Ken Leidal , Wojtek Swiderski , Tally Portnoi , Itamar Chinn , Jacob Silterra , Tommi Jaakkola , et al. Boltz-1: Democratizing biomolecular interaction modeling . bioRxiv , pages 2024 – 11 , 2024 . 26. ↵ Lihang Liu , Shanzhuo Zhang , Yang Xue , Xianbin Ye , Kunrui Zhu , Yuxin Li , Yang Liu , Wenlai Zhao , Hongkun Yu , Zhihua Wu , et al. Technical report of helixfold3 for biomolecular structure prediction . arXiv preprint arxiv: 2408.16975 , 2024 . 27. ↵ Ewen Callaway . Ai protein-prediction tool alphafold3 is now more open . Nature , 635 ( 8039 ): 531 – 532 , 2024 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted June 21, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Balancing Speed and Precision in Protein Folding: A Comparison of AlphaFold2, ESMFold, and OmegaFold Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Balancing Speed and Precision in Protein Folding: A Comparison of AlphaFold2, ESMFold, and OmegaFold Anna Hýskova , Eva Maršálková , Petr Šimeček bioRxiv 2025.06.20.660709; doi: https://doi.org/10.1101/2025.06.20.660709 Share This Article: Copy Citation Tools Balancing Speed and Precision in Protein Folding: A Comparison of AlphaFold2, ESMFold, and OmegaFold Anna Hýskova , Eva Maršálková , Petr Šimeček bioRxiv 2025.06.20.660709; doi: https://doi.org/10.1101/2025.06.20.660709 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7618) Biochemistry (17633) Bioengineering (13857) Bioinformatics (41841) Biophysics (21399) Cancer Biology (18529) Cell Biology (25422) Clinical Trials (138) Developmental Biology (13352) Ecology (19860) Epidemiology (2067) Evolutionary Biology (24282) Genetics (15582) Genomics (22462) Immunology (17700) Microbiology (40295) Molecular Biology (17140) Neuroscience (88421) Paleontology (666) Pathology (2823) Pharmacology and Toxicology (4813) Physiology (7632) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4284) Systems Biology (9808) Zoology (2267)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00