Full text
67,180 characters
· extracted from
preprint-html
· click to expand
On inputs to deep learning for RNA 3D structure prediction | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results On inputs to deep learning for RNA 3D structure prediction Marcell Szikszai , View ORCID Profile Marcin Magnus , Sachin Kadyan , View ORCID Profile Elena Rivas doi: https://doi.org/10.1101/2025.02.14.638364 Marcell Szikszai 1 Department of Computer Science and Software Engineering, The University of Western Australia , Crawley, WA 6009, Australia 2 Department of Molecular and Cellular Biology, Harvard University , Cambridge, MA 02138, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Marcin Magnus 2 Department of Molecular and Cellular Biology, Harvard University , Cambridge, MA 02138, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marcin Magnus Sachin Kadyan 3 Department of Systems Biology, Columbia University , New York 10027, NY, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Elena Rivas 2 Department of Molecular and Cellular Biology, Harvard University , Cambridge, MA 02138, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Elena Rivas For correspondence: elenarivas{at}fas.harvard.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Today, there are several effective deep learning models for predicting the 3D structure of proteins. Building on their success, models have been developed for predicting the 3D structure of non-coding RNAs. Unfortunately, these models are much less accurate than their protein counterparts. In this paper, we highlight differences between protein and RNA structure, and demonstrate methods for deep learning targeted at addressing those differences, with the aim of prompting discussion on these topics. We present an RNA-specific pipeline for generating structural Multiple Sequence Alignments (MSAs). Derived from the structural alignments, we introduce engineered evolutionary features that strongly inform RNA structure. Further, from the crystal structure, we derive structural features describing RNA base pairing. These evolutionary and structural features can be used in loss functions at different stages of training. Finally, we discuss different cropping strategies informed by RNA structure. 1 I ntroduction The prediction of 3D protein structure was revolutionized via deep learning by AlphaFold in 2018 ( Senior et al., 2020 ). Since then, the number of tools that apply deep learning to broader problems in structural biology has skyrocketed. It is no surprise that researchers immediately began adapting the lessons from AlphaFold toward non-coding ribonucleic acid (RNA) 3D structure prediction ( Chen et al., 2020 ; Wang et al., 2021 ; Sato et al., 2021 ; Fu et al., 2021 ; Pearce et al., 2022 ; Shen et al., 2022 ; Baek et al., 2022 ; Feng et al., 2022 ; Li et al., 2022 ; Abramson et al., 2024 ). The core problems are essentially analogous: take a 1D polymer sequence as input, and predict the 3D conformation of the molecule. For both proteins and structured RNAs, the 3D structure is a consequence of the 1D polymer sequence, and the 3D structure has strong ties to the molecule’s function. With increasing interest in 3D prediction of the RNA structure, there was a need for more robust tools to benchmark performance. The blind-assesment competition CASP15 ( Kryshtafovych et al., 2023 ) joined RNA-Puzzles ( Cruz et al., 2012 ; Miao et al., 2015 ; 2017 ; 2020 ) in 2022 to include RNA-only targets in the competition. These assessments of novel RNA structures indicates that to this day, deep-learning methods have yet to catch up to other existing traditional methods for RNA structure prediction, as reported by the latest CASP16 and RNA-Puzzle ( Miao et al., 2020 ) competitions. The amount of RNA structural data available to perform deep-learning RNA 3D structure prediction pales in comparison to that available for protein structure prediction ( Szikszai et al., 2024 ). Methods like RNA3DB ( Szikszai et al., 2024 ) have been created recently to exhaustively characterize structural homologies in existing RNA PDB chains, and to provide flexible tools to avoid structural homology overlap when designing training and testing sets for robust benchmarking. As we learn from the successes in protein structure prediction, several additional considerations have to be taken into account when creating deep-learning methods for RNA structure prediction, owing to the distinct properties of RNA compared to proteins. We investigate some of these considerations in detail in this manuscript. Our contribution is a method to generate RNA structural alignments, and evolutionary and structural features that can be used to inform the training of RNA 3D structure prediction methods. See Figure 1 . Download figure Open in new tab Figure 1: Overview of RNA structural inputs for training deep-learning methods. (a) The structural alignments are generated by structural homology search to the Rfam database ( Ontiveros-Palacios et al., 2024 ), with the Infernal ( Nawrocki & Eddy, 2013 ) method. We build one structural alignment for each Rfam family with significant homology to the query (E-value < 1e-5). See Appendix section B for details. From each structural alignment, we extract evolutionary features (both observed and expected covariation) using R-scape ( Rivas et al., 2017 ; 2020 ). (b) From the query PDB file, we extract structural properties related to the base pairing geometry (including all possible base-pair types) using RNAview ( Yang et al., 2003 ). 2 R fam - based structural multiple sequence alignments The idea behind using multiple sequence alignments (MSAs) as the input to AlphaFold-like deep learning models is the expectation that evolutionary information informs structure. This is true for both proteins and RNAs. However, with RNAs, the degree to which alignments inform structure varies highly by type. Structured RNAs, such as tRNA and rRNA, rely on specific conformations to perform their function. As a result, their structure is highly conserved. This conservation is easily detectable in their MSAs. Base pairing in particular can be inferred from alignments by looking at positive and negative evolutionary information ( Rivas, 2020 ). On the other hand, some RNAs, like mRNAs, rely primarily on their conserved codon organization to determine their function. These mRNAs still form base pairs and fold into some 3D conformation, but they are generally not conserved (even though certain folded configurations may be more stable). Since most interest in RNA 3D structure is focused on structured RNAs, alignments should be made with evolutionary conservation of structure in mind. However, the methods used by current deep learning models either assume all RNAs are structural, or do not incorporate structural conservation into their alignments. There are, broadly speaking, two pipelines representative of the approaches used by all existing methods: the rMSA ( Zhang et al., 2023 ) pipeline that fits alignments to proposed structures, or structure-agnostic HMMER-based ( Eddy, 2008 ; 2009 ; 2011 ) pipelines as used by AlphaFold 3. The pipeline used by AlphaFold 3 relies on a large database of clustered representative RNA sequences from Rfam ( Ontiveros-Palacios et al., 2024 ), RNAcentral ( RNAcentral Consortium, 2021 ), and Nucleotide Collection ( Sayers et al., 2023 ). HMMER is then used to find homologous sequences in this database. HMMER uses profile hidden Markov models to search sequence databases for homologues using sequence only. It does not consider the secondary structure of RNAs in the homology search. It is well established that alignments for structural RNAs can be improved by using both sequence and secondary structure ( Freyhult et al., 2007 ; Nawrocki, 2009 ), using structural homology methods such as Infernal ( Nawrocki & Eddy, 2013 ). Infernal works by constructing profile stochastic contextfree models ( covariance models or CMs) of RNA families, which are trained from a family-specific MSA along with a consensus secondary structure. The rMSA pipeline, used by RoseTTAFoldNA ( Baek et al., 2023 ) and others ( Wang et al., 2023 ), starts out with an initial HMMER alignment to first identify homologous sequences. Then it creates an Infernal covariance model using a predicted RNA secondary structure. This Infernal covariance model is then used for homology searches to arrive at a final alignment. This pipeline does consider secondary structure, but a relatively unreliable one, since the consensus is found through thermodynamic folding ( Lorenz et al., 2016 ). Importantly, this approach assumes that the query sequence conserves its secondary structure, which may not be the case (e.g. mRNAs or synthetic constructs). Additionally, Infernal is used with an E-value cut-off of 10, which is prone to favor the inclusion of false positive homologues. The artifacts created by the high E-value cut-off and the assumption of conserved secondary structure was previously documented by Gao et al. (2022) . For known structural RNAs used in training, we propose to take advantage of the Rfam database ( Ontiveros-Palacios et al., 2024 ). Rfam compiles a database of Infernal covariance models which classifies structural RNAs into families. Each family has a seed alignment , the MSA used to build the covariance model, along with a carefully created consensus secondary structure. Our method described in Figure 1a starts by finding Rfam families that show statistical significant homology to the query RNA sequence. For each homologous RNA family, a structural alignment including the query and sequences that belong to the Rfam family can be constructed. For the PDB database, as reported by RNA3DB (2024-12-04-full-release)( Szikszai et al., 2024 ), of the 1,869 RNA representative chains (clustered at 99% identity), 67% (1,198) have homology to at least one Rfam family with an E-value cutoff of 1 e − 5 . RNA chains without homology to Rfam usually fall in the category of synthetic sequences, mRNAs or small fragments lacking structure. From our structural alignments, we can directly feed covariation evolutionary information into the model in order to provide a signal about secondary and tertiary structure. See Figure 1a and Appendix Section A for a discussion. Figure 2 presents a comparison of the performance of the alignment methods on a 5S rRNA structural RNA chain and a Purine riboswitch aptamer. Even though the AlphaFold-like alignment includes many more sequences, the alignment is less accurate identifying the positions that are base paired. Similar results are presented in the supplement for two other structural RNAs. Other examples are given in Appendix Section B. Download figure Open in new tab Figure 2: Comparison of alignment methods. For a 5S rRNA PDB chain and a Purine riboswitch aptamer, we show: (a) A HMMER alignment made against Rfam, RNAcentral and the nucleotide databases. (b) A structural alignment created with our Rfam-based method. (c) A curated structural alignment for the RNA family from Rfam. We show the evolutionary information present in each alignment as the significantly covarying base pairs (depicted in green) found using R-scape. Covarying base pairs are given in the context of a CaCoFold ( Rivas, 2020 ) consensus secondary structure that incorporates all significantly covarying pairs found in the alignment. These examples show the power of using Rfam’s covariance models, and building MSAs using Infernal with appropriate E-value cutoffs. In cases where no Rfam CM produces a significant hit, it may be difficult to determine whether the RNA is structural or not. As a result, alignments made with HMMER (as done by AlphaFold 3) will be the most adequate and informative without making assumptions about a secondary structure that could introduce circular analysis artifacts ( Gao et al., 2022 ). 3 L oss functions associated to RNA base pairing RNA folding is hierarchical ( Tinoco & Bustamante, 1999 ) meaning that the secondary structure (that is, the collection of base pairs) is more stable and forms faster than the 3D structure ( Banerjee et al., 1993 ; Mathews et al., 1997 ; Onoa et al., 2003 ). The RNA secondary structure heavily informs the 3D ( Shapiro et al., 2007 ; Miao et al., 2020 ). As a result, it is often argued that for a model to predict 3D correctly, it must correctly predict the 2D structure first ( Kerpedjiev et al., 2015 ). However, there is little discussion from the RNA 3D structure prediction community about loss functions that target RNA base pairing specifically. AlphaFold-like models distinguish two different kinds of losses. Structural losses rely on the entire 3D structure. These are usually end-to-end, and are evaluated at the end of the structure module or the diffusion module in the case of AlphaFold 3, e.g. Frame Aligned Point Error (FAPE) ( Jumper et al., 2021 ). Also, there are auxiliary losses that apply to linear projections from the internal pair representation, usually just before the structure module and after the Pairformer or Evoformer, e.g. distogram loss ( Jumper et al., 2021 ). Here we propose two loss functions, one structural and one auxiliary to inform specifically about RNA base pairing. We propose a loss termed pairtogram loss–a play on AlphaFold’s distograms 1 . A pairtogram describes the base pairing geometry and can be used as an auxiliary loss. To construct a pairtogram, we extract an augmented Leontis-Westhof base pair geometry classification matrix ( Leontis & Westhof, 2001 ) for all N res × N res pairs. This classification provides 12 basic geometric types, distinguishing between Watson-Crick, Hoogsteen, or Sugar-edge interacting edges, and cis or trans bond orientations. For instance, canonical RNA base pairs A:U , G:C are cis interactions between the WatsonCrick edges of both residues. The standard annotation is augmented with whether the pair is stacked (but not any of the defined base pairs), or a contact (defined as residues at a distance smaller than 8 Å), or neither. See Appendix section C for details on how the pairtogram loss is calculated. Our RNA structural loss considers the dihedral angle between the planes of the two nucleotide bases. Pyrimidine bases are completely planar, while the base in purines is nearly planar (but can be functionally treated as such) ( Callahan, 2011 ). As a result, we can assign a plane corresponding to a base via three atoms in all residues. In principle, any three atoms can be used since the bases are largely planar, but we consider two planes for both pyrimidines and purines for redundancy. The planes for purines are defined by C1 ′ -N9-C4 and C8-N9-C4, while the planes for pyrimidines are defined by C1 ′ -N1-C2 and C6-N1-C2 (see Figure 3 ). These planes are then used to calculate the dihedral angles between bases. Residues forming Watson-Crick base pairs will have angles close to 180 ° . Moreover, residues in one side of a canonical helix will also have angles close to 180 ° with residues on the other side of the helix. On the other hand, residues in one side of the helix will have very small base angles amongst each other. Download figure Open in new tab Figure 3: Between-Base Angle Planes. An example of how the planes are constructed for a purine (left, adenine) and a pyrimidine (right, uracil). The first planes are shown in blue, while the second planes are shown in orange. The example base pair is from a tRNA (PDB: 1ehz_A ) ( Shi & Moore, 2000 ) and drawn with Mol* ( Sehnal et al., 2021 ). Let the plane for a residue i be defined by three atom coordinates ( A C1’/C8/C6 , A N9/N1 , A C4/C2 ) where A ∈ ℝ 3 . Then, we calculate the normal of the plane, For each pair of planes i and j , we calculate the sine and cosine of the dihedral angle, Then we define our loss, Between Base Angle Error (BBAE) as, where θ are the dihedral angles for the predicted structure, and θ TRUE are the dihedral angles for the ground-truth structure. 4 R esidue cropping A key consideration for deep learning models is memory. Recently, this issue has received a lot of attention, since large-scale state-of-the-art models (such as large language models) may have hundreds of billions of parameters ( Kaplan et al., 2020 ; Grattafiori et al., 2024 ), which must be loaded into memory during both inference and training. In the case of 3D structure prediction models, the number of parameters is much more manageable (on the order of around 100M parameters for AlphaFold 2 ( Jumper et al., 2021 ), for example), but the memory consumption is still relatively high, and can scale cubically or quadratically with sequence length ( Vaswani et al., 2017 ; Senior et al., 2020 ; Jumper et al., 2021 ). As a result, deep learning models for 3D structure prediction often have to crop the sequences to shorter sub-sequences during training and process these crops one at a time. This is commonly referred to as residue cropping . Initially, the AlphaFold 1 convolutional neural network (CNN) produced crops of length 64 in order to generate 64 × 64 distograms ( Senior et al., 2020 ). This cropping strategy is easy to motivate for proteins: existing literature has shown that protein contact prediction only needs a limited context window ( Jones & Kandathil, 2018 ; Senior et al., 2020 ). Unfortunately, this is not the case for RNAs. Some structural RNA families, such as SSU and LSU rRNA subunits, contain long-range base pairs that are more than 500 residues apart, and are difficult to predict for traditional dynamic programming algorithms ( Huang et al., 2019 ). In 2021, AlphaFold 2 moved from a CNN-based architecture to a transformer model, which requires O ( n 3 ) memory for a sequence of length n . This memory requirement comes from triangular selfattention. As a result, AlphaFold 2 takes highly restrictive crops of 256 residues during the initial training phase and crops of 384 residues during fine-tuning. The starting position of these crops is randomly sampled from 𝒰 {1, N res − crop_size + 1 } where N res is the length of the sequence. These crops are contiguous in sequence, so that given a starting position i , the window contains residues [ i, i + crop_size − 1]. While this strategy works well for proteins, since they only need small context windows, it is suboptimal at best for RNAs. AlphaFold 2 contiguous crops break RNA base pairs, and include only one half of the RNA canonical helices that are the foundation of any RNA 3D structure ( Figure 4a ). Despite this, some methods for RNA 3D structure prediction, such as DRfold ( Li et al., 2022 ) use continuous cropping for training. Consequently, DRfold also restricts their test set to RNAs with lengths between 14 and 392 nucleotides ( Li et al., 2022 ), likely masking the performance degradation of their cropping strategy. Download figure Open in new tab Figure 4: Different cropping strategies. Example for a 30S rRNA (PDB: 5no2_A ) ( López-Alonso et al., 2017 ). The red regions indicate the cropped sequence. The starting position is 1,100 with crop_size = 384, with Watson-Crick base pairs extracted using RNApdbee 2.0 ( Zok et al., 2018 ) and RNAview ( Leontis & Westhof, 2001 ). The visualisations were created with Mol* ( Sehnal et al., 2021 ). When DeepMind debuted AlphaFold-Multimer for protein complex prediction in 2022, they introduced spatial crops where residues are selected by their spatial distance in the 3D structure. For these spatial crops, a starting position is sampled from 𝒰 {1, N res }. A reference atom is chosen (in the case of AlphaFold-Multimer C α atoms), from which all distances are measured. Then, the crop_size nearest residues, measured by Euclidean distance to the reference atoms, are taken as the crop ( Figure 4b ). In AlphaFold-Multimer, spatial crops are chosen in a 50:50 ratio along with contiguous crops with a crop_size of 384. Although only used by AlphaFold-Multimer for multimer interface residues, the concept can be easily adapted to monomers. Beyond spatial crops, RoseTTAFoldNA ( Baek et al., 2023 ) also developed an alternate strategy for cropping nucleic acid–protein complexes and RNA monomers to explicitly avoid breaking base pairs and to pick context windows better suited to preserve RNA canonical helices composed of stacked Watson-Crick base pairs ( Figure 4c ). For RNA monomers, RoseTTAFoldNA builds a weighted undirected graph of the sequence where sequential residues have edges with a weight of one, and Watson–Crick base pairs have a weight of zero. As before, a random starting position is sampled from 𝒰 { 1, N res } , and minimum-weight graph traversal is used to find the nearest crop_size = 256 residues based on the graph-distance. We suggest a combined method using contiguous cropping together with RoseTTAFoldNA’s interaction graph-based and AlphaFold 3 spatial crops. The ratios of the different cropping categories can be customized for the different training sets and different environments, with the goal of balancing time and performance with structure prediction accuracy. 5 S ummary The prediction of RNA 3D structure from sequence by deep learning methods is challenged by the small amount of structural data existing to train the models in comparison to proteins. This manuscript aims at lowering the impact of such a fundamental problem by making sure that the information obtained from the existing inputs extracts the maximal amount of structural RNA specific properties, both structural and at the level of RNA base pairing. We have presented structural alignments for PDB RNA chains that capture significantly more pairing information than other agnostic homology methods. We have introduced losses capturing RNA base pairing information, including non-canonical base pairs, and also structural losses that capture the stacked nature of the 3D helices formed by the RNA canonical base pairs. Finally, we have discussed base pairing-aware cropping strategies. By introducing these topics in this manuscript, we hope to encourage further research and discussion on RNA-specific models in the field of RNA 3D structure prediction. A S tructural E volutionary features As discussed in Section 2 , the purpose of feeding MSAs into deep learning structure prediction pipelines is to provide evolutionary context about the residues. In traditional RNA structure prediction pipelines, MSAs can allow the model to identify covariation resulting from the presence of conserved RNA structure. For structured RNAs, the covariation derived from their alignments has been shown to be highly informative towards both the secondary and tertiary structures ( Rivas et al., 2017 ; Rivas, 2020 ; Rivas et al., 2020 ; Rivas, 2023 ; Karan & Rivas, 2024 ). Here we describe a method for how these features can be used by an AlphaFold-like architecture. Using the software R-scape ( Rivas et al., 2017 ; 2020; Rivas, 2020 ), for any pair of positions, we can compute both the statistically significant covariation above phylogenetic expectation or observed covariation (in the form of an expected E-value), as well as the expected covariation (or power) given the number of total substitutions in the pair. Both E-value and power are binned into 8 bins, and E-value is clamped to the range [0, 10.0]. The feature tensor produced is of shape [ N res , N res , 16]. This is then linearly projected to the pair representation channel dimension c z , and added to the input of, for example, the main transformer block. This method also allows for calculating the E-value and power from multiple MSAs by computing the minimum, mean, and maximum across the MSAs, and producing a tensor of shape [ N res , N res , 16 × 3]. Under ideal circumstances, it may seem unnecessary to explicitly feed in these features. We may expect that since deep learning is highly effective at representation learning, i.e. the ability to learn the useful representations from the raw data, we can just directly input the raw alignments and the network can learn these features implicitly as part of its internal representation. Currently we have no evidence to conclusively show whether inputting these features directly improves performance, however, there are two other possible motivations for explicitly calculating these. First, these evolutionary maps provide an efficient way of embedding covariation information from a high-dimensional MSA in a way that is independent of the MSA dimensionality. AlphaFold 2’s Evoformer memory cost is , where N seq is the number of sequences in the alignment 3 . To address this, the model reduced the depth of the alignments using MSA clustering , where a relatively small number of sequences ( N clust = 128 during the initial training phase, N clust = 512 during fine-tuning 4 ) cluster centres are picked. Then the remaining sequences in the MSA are assigned to their closest cluster by Hamming distance, and a number of statistics (e.g. distribution of amino acids) are computed for the cluster. Our proposed evolutionary features avoid the effect of any dimensionality reduction. Second, while we currently suggest using these features as inputs, they may also be useful as auxiliary losses. It would be easy to create an auxiliary head that linearly projects the internal representation into the desired dimension (either [ N res , N res , 16] or [ N res , N res , 16 × 3]), and calculates the averaged cross-entropy loss as done for distograms by AlphaFold 2. We further suggest that these features may also be a useful auxiliary head for other models such as RNA language models like RiNALMo ( Penić et al., 2024 ). B S tructural alignments For our Rfam-based MSAs, each family-specific alignment includes the query sequence, and up to 256 seed sequences from the Rfam family, or up to 512 full sequences if there are fewer than 256 seed sequences in the family. In Figure 5 , we describe two other examples of structural RNAs and the comparison of our Rfam/Infernal based structural alignments and the structure-agnostic alignments used by AlphaFold 3. For the two selected PDB chains: 5ddp_A (a glutamine riboswitch aptamer) ( Ren et al., 2015 ) and 2qus_A (a Hammerhead_3 ribozyme) ( Chi et al., 2008 ), we observe that Infernal is able to find multiple homologs in the Rfam database, and produces a structural alignment of quality comparable to that of the Rfam seed. On the other hand, a structure-agnostic search in the same database renders few homologs and very sequence-conserved alignments that offer no evolutionary information about the secondary structures. Download figure Open in new tab Figure 5: Comparison of alignment methods. (a) The AlphaFold 3-like alignments were created in-house using the following database: Nucleotide collection (nt) 112,177,963 sequences; 2,688,129,930,104 total bases (Feb 2, 2025 4:42 AM); BLASTDB Version: 5; RNAcentral Release 24, 07/03/2024. (b) Our structural alignments were created using Rfam v15.0, and Infernal v 1.1.4. (c) We compare to the Rfam seed alignment for the RNA family to which the queries belong. Evolutionary conserved base pairs are depicted in green. For other details, see caption of Figure 2 . C P airtogram loss details Pairtogram data is extracted from the 3D structures using R-scape ( Rivas et al., 2017 ; 2020; Rivas, 2020 ), which includes a modified version of the software RNAView ( Leontis & Westhof, 2001 ). The final pairtogram matrix has 14 total dimensions for each pair (see Table 1 and Figure 1b for a breakdown of the features). We treat these 14 dimensions as four separate one-hot vectors, and calculate the average cross-entropy loss across the four features between the ground-truth and a linear projection from the pair representation. View this table: View inline View popup Download powerpoint Table 1: Structural features used in the pairtogram loss. The final tensor has size [ N res , N res , 14]. Footnotes ↵ 1 Distance histograms , an output of discretised pairwise distances between all atoms. ↵ 3 This is a simplification from the original AlphaFold 2 paper. We omit templates from our explanations. ↵ 4 This is a simplification. N seq = N clust + N templ . † Note that . R eferences ↵ Josh Abramson , Jonas Adler , Jack Dunger , Richard Evans , Tim Green , Alexander Pritzel , Olaf Ronneberger , Lindsay Willmore , Andrew J. Ballard , Joshua Bambrick , Sebastian W. Bodenstein , David A. Evans , Chia-Chun Hung , Michael O’Neill , David Reiman , Kathryn Tunyasuvunakool , Zachary Wu , Akvilė Žemgulytė , Eirini Arvaniti , Charles Beattie , Ottavia Bertolli , Alex Bridgland , Alexey Cherepanov , Miles Congreve , Alexander I. Cowen-Rivers , Andrew Cowie , Michael Figurnov , Fabian B. Fuchs , Hannah Gladman , Rishub Jain , Yousuf A. Khan , Caroline M. R. Low , Kuba Perlin , Anna Potapenko , Pascal Savy , Sukhdeep Singh , Adrian Stecula , Ashok Thillaisundaram , Catherine Tong , Sergei Yakneen , Ellen D. Zhong , Michal Zielinski , Augustin Žídek , Victor Bapst , Pushmeet Kohli , Max Jaderberg , Demis Hassabis , and John M. Jumper . Accurate structure prediction of biomolecular interactions with AlphaFold 3 . Nature , 630 ( 8016 ): 493 – 500 , June 2024 . ISSN 1476-4687 . doi: 10.1038/s41586-024-07487-w . URL https://www.nature.com/articles/s41586-024-07487-w . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed ↵ Minkyung Baek , Ryan McHugh , Ivan Anishchenko , David Baker , and Frank DiMaio . Accurate prediction of nucleic acid and protein-nucleic acid complexes using RoseTTAFoldNA , September 2022 . URL https://www.biorxiv.org/content/10.1101/2022.09.09.507333v1 . Pages: 2022.09.09.507333 Section: New Results. ↵ Minkyung Baek , Ryan McHugh , Ivan Anishchenko , Hanlun Jiang , David Baker , and Frank DiMaio . Accurate prediction of protein–nucleic acid complexes using RoseTTAFoldNA . Nature Methods , pp. 1 – 5 , November 2023 . ISSN 1548-7105 . doi: 10.1038/s41592-023-02086-5 . URL https://www.nature.com/articles/s41592-023-02086-5 . Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed ↵ A. R. Banerjee , J. A. Jaeger , and D. H. Turner . Thermal unfolding of a group I ribozyme: the lowtemperature transition is primarily disruption of tertiary structure . Biochemistry , 32 ( 1 ): 153 – 163 , January 1993 . ISSN 0006-2960 . doi: 10.1021/bi00052a021 . OpenUrl CrossRef PubMed ↵ Muriel Gargaud , Ricardo Amils , José Cernicharo Quintanilla , Henderson James (Jim) Cleaves , William M. Irvine , Daniele L. Pinti , and Michel Viso Michael P. Callahan . Nucleic Acid Base . In Muriel Gargaud , Ricardo Amils , José Cernicharo Quintanilla , Henderson James (Jim) Cleaves , William M. Irvine , Daniele L. Pinti , and Michel Viso (eds.), Encyclopedia of Astrobiology , pp. 1138 – 1140 . Springer , Berlin, Heidelberg , 2011 . ISBN 978-3-642-11274-4 . doi: 10.1007/978-3-642-11274-4_1080 . URL https://doi.org/10.1007/978-3-642-11274-4_1080 . OpenUrl CrossRef ↵ Xinshi Chen , Yu Li , Ramzan Umarov , Xin Gao , and Le Song. RNA Secondary Structure Prediction By Learning Unrolled Algorithms . In International Conference on Learning Representations , 2020 . doi: 10.48550/arXiv.2002.05810 . URL https://openreview.net/forum?id=S1eALyrYDH . OpenUrl CrossRef ↵ Young-In Chi , Monika Martick , Monica Lares , Rosalind Kim , William G. Scott , and Sung-Hou Kim . Capturing Hammerhead Ribozyme Structures in Action by Modulating General Base Catalysis . PLOS Biology , 6 ( 9 ): e234 , September 2008 . ISSN 1545-7885 . doi: 10.1371/journal.pbio.0060234 . OpenUrl CrossRef PubMed ↵ José Almeida Cruz , Marc-Frédérick Blanchet , Michal Boniecki , Janusz M. Bujnicki , Shi-Jie Chen , Song Cao , Rhiju Das , Feng Ding , Nikolay V. Dokholyan , Samuel Coulbourn Flores , Lili Huang , Christopher A. Lavender , Véronique Lisi , François Major , Katarzyna Mikolajczak , Dinshaw J. Patel , Anna Philips , Tomasz Puton , John Santalucia , Fredrick Sijenyi , Thomas Hermann , Kristian Rother , Magdalena Rother , Alexander Serganov , Marcin Skorupski , Tomasz Soltysinski , Parin Sripakdeevong , Irina Tuszynska , Kevin M. Weeks , Christina Waldsich , Michael Wildauer , Neocles B. Leontis , and Eric Westhof . RNA-Puzzles: A CASP-like evaluation of RNA three-dimensional structure prediction . RNA , 18 ( 4 ): 610 – 625 , April 2012 . ISSN 1355-8382 . doi: 10.1261/rna.031054.111 . URL https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3312550/ . OpenUrl Abstract / FREE Full Text ↵ Sean R. Eddy . A Probabilistic Model of Local Sequence Alignment That Simplifies Statistical Significance Estimation . PLOS Computational Biology , 4 ( 5 ): e1000069 , May 2008 . ISSN 1553-7358 . doi: 10.1371/journal.pcbi.1000069 . URL https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000069 . Publisher: Public Library of Science . OpenUrl CrossRef PubMed ↵ Sean R. Eddy . A new generation of homology search tools based on probabilistic inference . Genome Informatics. International Conference on Genome Informatics , 23 ( 1 ): 205 – 211 , October 2009 . ISSN 0919-9454 . OpenUrl PubMed ↵ Sean R. Eddy . Accelerated Profile HMM Searches . PLOS Computational Biology , 7 ( 10 ): e1002195 , October 2011 . ISSN 1553-7358 . doi: 10.1371/journal.pcbi.1002195 . URL https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1002195 . Publisher: Public Library of Science . OpenUrl CrossRef PubMed ↵ Chenjie Feng , Wenkai Wang , Renmin Han , Ziyi Wang , Lisa Ye , Zongyang Du , Hong Wei , Fa Zhang , Zhenling Peng , and Jianyi Yang . Accurate de novo prediction of RNA 3D structure with transformer network , October 2022 . URL https://www.biorxiv.org/content/10.1101/2022.10.24.513506v1 . Pages: 2022.10.24.513506 Section: New Results. ↵ Eva K. Freyhult , Jonathan P. Bollback , and Paul P. Gardner . Exploring genomic dark matter: A critical assessment of the performance of homology search methods on noncoding RNA . Genome Research , 17 ( 1 ): 117 – 125 , January 2007 . ISSN 1088-9051 . doi: 10.1101/gr.5890907 . URL https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1716261/ . OpenUrl Abstract / FREE Full Text ↵ Laiyi Fu , Yingxin Cao , Jie Wu , Qinke Peng , Qing Nie , and Xiaohui Xie . UFold: fast and accurate RNA secondary structure prediction with deep learning . Nucleic Acids Research , pp. gkab1074 , November 2021 . ISSN 0305-1048 . doi: 10.1093/nar/gkab1074 . URL https://doi.org/10.1093/nar/gkab1074 . OpenUrl CrossRef ↵ W. Gao , A. Yang , and E. Rivas . Thirteen dubious ways to detect conserved structural RNAs . IUBMB Life , 75 : 471 – 492 , 2022 . doi: 10.1002/iub.2694 . URL https://iubmb.onlinelibrary.wiley.com/doi/10.1002/iub.2694 . OpenUrl CrossRef PubMed ↵ Aaron Grattafiori , Abhimanyu Dubey , Abhinav Jauhri , Abhinav Pandey , Abhishek Kadian , Ahmad Al-Dahle , Aiesha Letman , Akhil Mathur , Alan Schelten , Alex Vaughan , Amy Yang , Angela Fan , Anirudh Goyal , Anthony Hartshorn , Aobo Yang , Archi Mitra , Archie Sravankumar , Artem Korenev , Arthur Hinsvark , Arun Rao , Aston Zhang , Aurelien Rodriguez , Austen Gregerson , Ava Spataru , Baptiste Roziere , Bethany Biron , Binh Tang , Bobbie Chern , Charlotte Caucheteux , Chaya Nayak , Chloe Bi , Chris Marra , Chris McConnell , Christian Keller , Christophe Touret , Chunyang Wu , Corinne Wong , Cristian Canton Ferrer , Cyrus Nikolaidis , Damien Allonsius , Daniel Song , Danielle Pintz , Danny Livshits , Danny Wyatt , David Esiobu , Dhruv Choudhary , Dhruv Mahajan , Diego Garcia-Olano , Diego Perino , Dieuwke Hupkes , Egor Lakomkin , Ehab AlBadawy , Elina Lobanova , Emily Dinan , Eric Michael Smith , Filip Radenovic , Francisco Guzmán , Frank Zhang , Gabriel Synnaeve , Gabrielle Lee , Georgia Lewis Anderson , Govind Thattai , Graeme Nail , Gregoire Mialon , Guan Pang , Guillem Cucurell , Hailey Nguyen , Hannah Korevaar , Hu Xu , Hugo Touvron , Iliyan Zarov , Imanol Arrieta Ibarra , Isabel Kloumann , Ishan Misra , Ivan Evtimov , Jack Zhang , Jade Copet , Jaewon Lee , Jan Geffert , Jana Vranes , Jason Park , Jay Mahadeokar , Jeet Shah , Jelmer van der Linde , Jennifer Billock , Jenny Hong , Jenya Lee , Jeremy Fu , Jianfeng Chi , Jianyu Huang , Jiawen Liu , Jie Wang , Jiecao Yu , Joanna Bitton , Joe Spisak , Jongsoo Park , Joseph Rocca , Joshua Johnstun , Joshua Saxe , Junteng Jia , Kalyan Vasuden Alwala , Karthik Prasad , Kartikeya Upasani , Kate Plawiak , Ke Li , Kenneth Heafield , Kevin Stone , Khalid El-Arini , Krithika Iyer , Kshitiz Malik , Kuenley Chiu , Kunal Bhalla , Kushal Lakhotia , Lauren Rantala-Yeary , Laurens van der Maaten , Lawrence Chen , Liang Tan , Liz Jenkins , Louis Martin , Lovish Madaan , Lubo Malo , Lukas Blecher , Lukas Landzaat , Luke de Oliveira , Madeline Muzzi , Mahesh Pasupuleti , Mannat Singh , Manohar Paluri , Marcin Kardas , Maria Tsimpoukelli , Mathew Oldham , Mathieu Rita , Maya Pavlova , Melanie Kambadur , Mike Lewis , Min Si , Mitesh Kumar Singh , Mona Hassan , Naman Goyal , Narjes Torabi , Nikolay Bashlykov , Nikolay Bogoychev , Niladri Chatterji , Ning Zhang , Olivier Duchenne , Onur Çelebi , Patrick Alrassy , Pengchuan Zhang , Pengwei Li , Petar Vasic , Peter Weng , Prajjwal Bhargava , Pratik Dubal , Praveen Krishnan , Punit Singh Koura , Puxin Xu , Qing He , Qingxiao Dong , Ragavan Srinivasan , Raj Ganapathy , Ramon Calderer , Ricardo Silveira Cabral , Robert Stojnic , Roberta Raileanu , Rohan Maheswari , Rohit Girdhar , Rohit Patel , Romain Sauvestre , Ronnie Polidoro , Roshan Sumbaly , Ross Taylor , Ruan Silva , Rui Hou , Rui Wang , Saghar Hosseini , Sahana Chennabasappa , Sanjay Singh , Sean Bell , Seohyun Sonia Kim , Sergey Edunov , Shaoliang Nie , Sharan Narang , Sharath Raparthy , Sheng Shen , Shengye Wan , Shruti Bhosale , Shun Zhang , Simon Vandenhende , Soumya Batra , Spencer Whitman , Sten Sootla , Stephane Collot , Suchin Gururangan , Sydney Borodinsky , Tamar Herman , Tara Fowler , Tarek Sheasha , Thomas Georgiou , Thomas Scialom , Tobias Speckbacher , Todor Mihaylov , Tong Xiao , Ujjwal Karn , Vedanuj Goswami , Vibhor Gupta , Vignesh Ramanathan , Viktor Kerkez , Vincent Gonguet , Virginie Do , Vish Vogeti , Vítor Albiero , Vladan Petrovic , Weiwei Chu , Wenhan Xiong , Wenyin Fu , Whitney Meers , Xavier Martinet , Xiaodong Wang , Xiaofang Wang , Xiaoqing Ellen Tan , Xide Xia , Xinfeng Xie , Xuchao Jia , Xuewei Wang , Yaelle Goldschlag , Yashesh Gaur , Yasmine Babaei , Yi Wen , Yiwen Song , Yuchen Zhang , Yue Li , Yuning Mao , Zacharie Delpierre Coudert , Zheng Yan , Zhengxing Chen , Zoe Papakipos , Aaditya Singh , Aayushi Srivastava , Abha Jain , Adam Kelsey , Adam Shajnfeld , Adithya Gangidi , Adolfo Victoria , Ahuva Goldstand , Ajay Menon , Ajay Sharma , Alex Boesenberg , Alexei Baevski , Allie Feinstein , Amanda Kallet , Amit Sangani , Amos Teo , Anam Yunus , Andrei Lupu , Andres Alvarado , Andrew Caples , Andrew Gu , Andrew Ho , Andrew Poulton , Andrew Ryan , Ankit Ramchandani , Annie Dong , Annie Franco , Anuj Goyal , Aparajita Saraf , Arkabandhu Chowdhury , Ashley Gabriel , Ashwin Bharambe , Assaf Eisenman , Azadeh Yazdan , Beau James , Ben Maurer , Benjamin Leonhardi , Bernie Huang , Beth Loyd , Beto De Paola , Bhargavi Paranjape , Bing Liu , Bo Wu , Boyu Ni , Braden Hancock , Bram Wasti , Brandon Spence , Brani Stojkovic , Brian Gamido , Britt Montalvo , Carl Parker , Carly Burton , Catalina Mejia , Ce Liu , Changhan Wang , Changkyu Kim , Chao Zhou , Chester Hu , Ching-Hsiang Chu , Chris Cai , Chris Tindal , Christoph Feichtenhofer , Cynthia Gao , Damon Civin , Dana Beaty , Daniel Kreymer , Daniel Li , David Adkins , David Xu , Davide Testuggine , Delia David , Devi Parikh , Diana Liskovich , Didem Foss , Dingkang Wang , Duc Le , Dustin Holland , Edward Dowling , Eissa Jamil , Elaine Montgomery , Eleonora Presani , Emily Hahn , Emily Wood , Eric-Tuan Le , Erik Brinkman , Esteban Arcaute , Evan Dunbar , Evan Smothers , Fei Sun , Felix Kreuk , Feng Tian , Filippos Kokkinos , Firat Ozgenel , Francesco Caggioni , Frank Kanayet , Frank Seide , Gabriela Medina Florez , Gabriella Schwarz , Gada Badeer , Georgia Swee , Gil Halpern , Grant Herman , Grigory Sizov , Guangyi , Zhang , Guna Lakshminarayanan , Hakan Inan , Hamid Shojanazeri , Han Zou , Hannah Wang , Hanwen Zha , Haroun Habeeb , Harrison Rudolph , Helen Suk , Henry Aspegren , Hunter Goldman , Hongyuan Zhan , Ibrahim Damlaj , Igor Molybog , Igor Tufanov , Ilias Leontiadis , Irina-Elena Veliche , Itai Gat , Jake Weissman , James Geboski , James Kohli , Janice Lam , Japhet Asher , Jean-Baptiste Gaya , Jeff Marcus , Jeff Tang , Jennifer Chan , Jenny Zhen , Jeremy Reizenstein , Jeremy Teboul , Jessica Zhong , Jian Jin , Jingyi Yang , Joe Cummings , Jon Carvill , Jon Shepard , Jonathan McPhie , Jonathan Torres , Josh Ginsburg , Junjie Wang , Kai Wu , Kam Hou U , Karan Saxena , Kartikay Khandelwal , Katayoun Zand , Kathy Matosich , Kaushik Veeraraghavan , Kelly Michelena , Keqian Li , Kiran Jagadeesh , Kun Huang , Kunal Chawla , Kyle Huang , Lailin Chen , Lakshya Garg , Lavender A , Leandro Silva , Lee Bell , Lei Zhang , Liangpeng Guo , Licheng Yu , Liron Moshkovich , Luca Wehrstedt , Madian Khabsa , Manav Avalani , Manish Bhatt , Martynas Mankus , Matan Hasson , Matthew Lennie , Matthias Reso , Maxim Groshev , Maxim Naumov , Maya Lathi , Meghan Keneally , Miao Liu , Michael L. Seltzer , Michal Valko , Michelle Restrepo , Mihir Patel , Mik Vyatskov , Mikayel Samvelyan , Mike Clark , Mike Macey , Mike Wang , Miquel Jubert Hermoso , Mo Metanat , Mohammad Rastegari , Munish Bansal , Nandhini Santhanam , Natascha Parks , Natasha White , Navyata Bawa , Nayan Singhal , Nick Egebo , Nicolas Usunier , Nikhil Mehta , Nikolay Pavlovich Laptev , Ning Dong , Norman Cheng , Oleg Chernoguz , Olivia Hart , Omkar Salpekar , Ozlem Kalinli , Parkin Kent , Parth Parekh , Paul Saab , Pavan Balaji , Pedro Rittner , Philip Bontrager , Pierre Roux , Piotr Dollar , Polina Zvyagina , Prashant Ratanchandani , Pritish Yuvraj , Qian Liang , Rachad Alao , Rachel Rodriguez , Rafi Ayub , Raghotham Murthy , Raghu Nayani , Rahul Mitra , Rangaprabhu Parthasarathy , Raymond Li , Rebekkah Hogan , Robin Battey , Rocky Wang , Russ Howes , Ruty Rinott , Sachin Mehta , Sachin Siby , Sai Jayesh Bondu , Samyak Datta , Sara Chugh , Sara Hunt , Sargun Dhillon , Sasha Sidorov , Satadru Pan , Saurabh Mahajan , Saurabh Verma , Seiji Yamamoto , Sharadh Ramaswamy , Shaun Lindsay , Shaun Lindsay , Sheng Feng , Shenghao Lin , Shengxin Cindy Zha , Shishir Patil , Shiva Shankar , Shuqiang Zhang , Shuqiang Zhang , Sinong Wang , Sneha Agarwal , Soji Sajuyigbe , Soumith Chintala , Stephanie Max , Stephen Chen , Steve Kehoe , Steve Satterfield , Sudarshan Govindaprasad , Sumit Gupta , Summer Deng , Sungmin Cho , Sunny Virk , Suraj Subramanian , Sy Choudhury , Sydney Goldman , Tal Remez , Tamar Glaser , Tamara Best , Thilo Koehler , Thomas Robinson , Tianhe Li , Tianjun Zhang , Tim Matthews , Timothy Chou , Tzook Shaked , Varun Vontimitta , Victoria Ajayi , Victoria Montanez , Vijai Mohan , Vinay Satish Kumar , Vishal Mangla , Vlad Ionescu , Vlad Poenaru , Vlad Tiberiu Mihailescu , Vladimir Ivanov , Wei Li , Wenchen Wang , Wenwen Jiang , Wes Bouaziz , Will Constable , Xiaocheng Tang , Xiaojian Wu , Xiaolan Wang , Xilun Wu , Xinbo Gao , Yaniv Kleinman , Yanjun Chen , Ye Hu , Ye Jia , Ye Qi , Yenda Li , Yilin Zhang , Ying Zhang , Yossi Adi , Youngjin Nam , Yu , Wang , Yu Zhao , Yuchen Hao , Yundi Qian , Yunlu Li , Yuzi He , Zach Rait , Zachary DeVito , Zef Rosnbrick , Zhaoduo Wen , Zhenyu Yang , Zhiwei Zhao , and Zhiyu Ma . The Llama 3 Herd of Models , November 2024 . URL http://arxiv.org/abs/2407.21783 . arxiv: 2407.21783 [cs]. ↵ Liang Huang , He Zhang , Dezhong Deng , Kai Zhao , Kaibo Liu , David A Hendrix , and David H Mathews . LinearFold: linear-time approximate RNA folding by 5’-to-3’ dynamic programming and beam search . Bioinformatics , 35 ( 14 ): i295 – i304 , July 2019 . ISSN 1367-4803 . doi: 10.1093/bioinformatics/btz375 . URL https://doi.org/10.1093/bioinformatics/btz375 . OpenUrl CrossRef PubMed ↵ David T Jones and Shaun M Kandathil . High precision in protein contact prediction using fully convolutional neural networks and minimal sequence features . Bioinformatics , 34 ( 19 ): 3308 – 3315 , October 2018 . ISSN 1367-4803 . doi: 10.1093/bioinformatics/bty341 . URL https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6157083/ . OpenUrl CrossRef PubMed ↵ John Jumper , Richard Evans , Alexander Pritzel , Tim Green , Michael Figurnov , Olaf Ronneberger , Kathryn Tunyasuvunakool , Russ Bates , Augustin Žídek , Anna Potapenko , Alex Bridgland , Clemens Meyer , Simon A. A. Kohl , Andrew J. Ballard , Andrew Cowie , Bernardino Romera-Paredes , Stanislav Nikolov , Rishub Jain , Jonas Adler , Trevor Back , Stig Petersen , David Reiman , Ellen Clancy , Michal Zielinski , Martin Steinegger , Michalina Pacholska , Tamas Berghammer , Sebastian Bodenstein , David Silver , Oriol Vinyals , Andrew W. Senior , Koray Kavukcuoglu , Pushmeet Kohli , and Demis Hassabis . Highly accurate protein structure prediction with AlphaFold . Nature , 596 ( 7873 ): 583 – 589 , August 2021 . ISSN 1476-4687 . doi: 10.1038/s41586-021-03819-2 . URL https://www.nature.com/articles/s41586-021-03819-2 . Number: 7873 Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed ↵ Jared Kaplan , Sam McCandlish , Tom Henighan , Tom B. Brown , Benjamin Chess , Rewon Child , Scott Gray , Alec Radford , Jeffrey Wu , and Dario Amodei . Scaling Laws for Neural Language Models , January 2020 . URL http://arxiv.org/abs/2001.08361 . arxiv: 2001.08361 [cs]. ↵ Aayush Karan and Elena Rivas . All-at-once RNA folding with 3D motif prediction framed by evolutionary information , December 2024 . URL https://www.biorxiv.org/content/10.1101/2024.12.17.628809v1 . Pages: 2024.12.17.628809 Section: New Results. ↵ Peter Kerpedjiev , Christian Höner zu Siederdissen , and Ivo L Hofacker . Predicting RNA 3D structure using a coarse-grain helix-centered model . RNA , 21 : 1110 – 1121 , 2015 . OpenUrl Abstract / FREE Full Text ↵ Andriy Kryshtafovych , Torsten Schwede , Maya Topf , Krzysztof Fidelis , and John Moult . Critical assessment of methods of protein structure prediction (CASP)-Round XV . Proteins , 91 ( 12 ): 1539 – 1549 , December 2023 . ISSN 1097-0134 . doi: 10.1002/prot.26617 . OpenUrl CrossRef PubMed ↵ NB Leontis and E Westhof . Geometric nomenclature and classification of RNA base pairs . RNA , 7 ( 4 ): 499 – 512 , April 2001 . ISSN 1355-8382 . URL https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1370104/ . OpenUrl Abstract ↵ Yang Li , Chengxin Zhang , Chenjie Feng , Peter L. Freddolino , and Yang Zhang . Integrating endto-end learning with deep geometrical potentials for ab initio RNA structure prediction , December 2022 . URL https://www.biorxiv.org/content/10.1101/2022.12.30.522296v1 . Pages: 2022.12.30.522296 Section: New Results. ↵ Ronny Lorenz , Ivo L. Hofacker , and Peter F. Stadler . RNA folding with hard and soft constraints . Algorithms for Molecular Biology , 11 ( 1 ): 8 , April 2016 . ISSN 1748-7188 . doi: 10.1186/s13015-016-0070-z . URL https://doi.org/10.1186/s13015-016-0070-z . OpenUrl CrossRef ↵ Jorge Pedro López-Alonso , Tatsuya Kaminishi , Takeshi Kikuchi , Yuya Hirata , Idoia Iturrioz , Neha Dhimole , Andreas Schedlbauer , Yoichi Hase , Simon Goto , Daisuke Kurita , Akira Muto , Shu Zhou , Chieko Naoe , Deryck J. Mills , David Gil-Carton , Chie Takemoto , Hyouta Himeno , Paola Fucini , and Sean R. Connell . RsgA couples the maturation state of the 30S ribosomal decoding center to activation of its GTPase pocket . Nucleic Acids Research , 45 ( 11 ): 6945 – 6959 , June 2017 . ISSN 0305-1048 . doi: 10.1093/nar/gkx324 . URL https://doi.org/10.1093/nar/gkx324 . OpenUrl CrossRef PubMed ↵ D. H. Mathews , A. R. Banerjee , D. D. Luan , T. H. Eickbush , and D. H. Turner . Secondary structure model of the RNA recognized by the reverse transcriptase from the R2 retrotransposable element . RNA (New York, N.Y .), 3 ( 1 ): 1 – 16 , January 1997 . ISSN 1355-8382 . OpenUrl Abstract ↵ Zhichao Miao , Ryszard W. Adamiak , Marc-Frédérick Blanchet , Michal Boniecki , Janusz M. Bujnicki , Shi-Jie Chen , Clarence Cheng , Grzegorz Chojnowski , Fang-Chieh Chou , Pablo Cordero , José Almeida Cruz , Adrian R. Ferré-D’Amaré , Rhiju Das , Feng Ding , Nikolay V. Dokholyan , Stanislaw Dunin-Horkawicz , Wipapat Kladwang , Andrey Krokhotin , Grzegorz Lach , Marcin Magnus , François Major , Thomas H. Mann , Benoît Masquida , Dorota Matelska , Mélanie Meyer , Alla Peselis , Mariusz Popenda , Katarzyna J. Purzycka , Alexander Serganov , Juliusz Stasiewicz , Marta Szachniuk , Arpit Tandon , Siqi Tian , Jian Wang , Yi Xiao , Xiaojun Xu , Jinwei Zhang , Peinan Zhao , Tomasz Zok , and Eric Westhof . RNA-Puzzles Round II: assessment of RNA structure prediction programs applied to three large RNA structures . RNA , 21 ( 6 ): 1066 – 1084 , June 2015 . ISSN 1355-8382, 1469-9001 . doi: 10.1261/rna.049502.114 . URL http://rnajournal.cshlp.org/content/21/6/1066 . Company: Cold Spring Harbor Laboratory Press Distributor: Cold Spring Harbor Laboratory Press Institution: Cold Spring Harbor Laboratory Press Label: Cold Spring Harbor Laboratory Press Publisher: Cold Spring Harbor Lab . OpenUrl Abstract / FREE Full Text ↵ Zhichao Miao , Ryszard W. Adamiak , Maciej Antczak , Robert T. Batey , Alexander J. Becka , Marcin Biesiada , Michał J. Boniecki , Janusz M. Bujnicki , Shi-Jie Chen , Clarence Yu Cheng , Fang-Chieh Chou , Adrian R. Ferré-D’Amaré , Rhiju Das , Wayne K. Dawson , Feng Ding , Nikolay V. Dokholyan , Stanisław Dunin-Horkawicz , Caleb Geniesse , Kalli Kappel , Wipapat Kladwang , Andrey Krokhotin , Grzegorz E. Łach , François Major , Thomas H. Mann , Marcin Magnus , Katarzyna Pachulska-Wieczorek , Dinshaw J. Patel , Joseph A. Piccirilli , Mariusz Popenda , Katarzyna J. Purzycka , Aiming Ren , Greggory M. Rice , John Santalucia , Joanna Sarzynska , Marta Szachniuk , Arpit Tandon , Jeremiah J. Trausch , Siqi Tian , Jian Wang , Kevin M. Weeks , Benfeard Williams , Yi Xiao , Xiaojun Xu , Dong Zhang , Tomasz Zok , and Eric Westhof . RNA-Puzzles Round III: 3D RNA structure prediction of five riboswitches and one ribozyme . RNA , 23 ( 5 ): 655 – 672 , May 2017 . ISSN 1355-8382, 1469-9001 . doi: 10.1261/rna.060368.116 . URL http://rnajournal.cshlp.org/content/23/5/655 . Company: Cold Spring Harbor Laboratory Press Distributor: Cold Spring Harbor Laboratory Press Institution: Cold Spring Harbor Laboratory Press Label: Cold Spring Harbor Laboratory Press Publisher: Cold Spring Harbor Lab . OpenUrl Abstract / FREE Full Text ↵ Zhichao Miao , Ryszard W. Adamiak , Maciej Antczak , Michał J. Boniecki , Janusz Bujnicki , Shi-Jie Chen , Clarence Yu Cheng , Yi Cheng , Fang-Chieh Chou , Rhiju Das , Nikolay V. Dokholyan , Feng Ding , Caleb Geniesse , Yangwei Jiang , Astha Joshi , Andrey Krokhotin , Marcin Magnus , Olivier Mailhot , Francois Major , Thomas H. Mann , Paweł Piątkowski , Radoslaw Pluta , Mariusz Popenda , Joanna Sarzynska , Lizhen Sun , Marta Szachniuk , Siqi Tian , Jian Wang , Jun Wang , Andrew M. Watkins , Jakub Wiedemann , Yi Xiao , Xiaojun Xu , Joseph D. Yesselman , Dong Zhang , Yi Zhang , Zhenzhen Zhang , Chenhan Zhao , Peinan Zhao , Yuanzhe Zhou , Tomasz Zok , Adriana Żyła , Aiming Ren , Robert T. Batey , Barbara L. Golden , Lin Huang , David M. Lilley , Yijin Liu , Dinshaw J. Patel , and Eric Westhof . RNA-Puzzles Round IV: 3D structure predictions of four ribozymes and two aptamers . RNA , 26 ( 8 ): 982 – 995 , August 2020 . ISSN 1355-8382 . doi: 10.1261/rna.075341.120 . URL https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7373991/ . OpenUrl Abstract / FREE Full Text ↵ Eric P. Nawrocki and Sean R. Eddy . Infernal 1.1: 100-fold faster RNA homology searches . Bioinformatics , 29 ( 22 ): 2933 – 2935 , November 2013 . ISSN 1367-4803 . doi: 10.1093/bioinformatics/btt509 . URL https://doi.org/10.1093/bioinformatics/btt509 . OpenUrl CrossRef PubMed Web of Science ↵ Eric Paul Nawrocki . Structural RNA homology search and alignment using covariance models . Washington University in St. Louis , 2009 . ↵ Bibiana Onoa , Sophie Dumont , Jan Liphardt , Steven B. Smith , Ignacio Tinoco , and Carlos Bustamante . Identifying kinetic barriers to mechanical unfolding of the T. thermophila ribozyme . Science (New York, N.Y .), 299 ( 5614 ): 1892 – 1895 , March 2003 . ISSN 1095-9203 . doi: 10.1126/science.1081338 . OpenUrl Abstract / FREE Full Text ↵ N. Ontiveros-Palacios , E. Cooke , E. P. Nawrocki , S. Triebel , M. Marz , E. Rivas , S. Griffiths-Jones , A. I. Petrov , A. Bateman , and B. Sweeney . Rfam 15: RNA families database in 2025 . NAR, gkae1023 , 2024 . doi: 10.1093/nar/gkae1023 . OpenUrl CrossRef ↵ Robin Pearce , Gilbert S. Omenn , and Yang Zhang . De Novo RNA Tertiary Structure Prediction at Atomic Resolution Using Geometric Potentials from Deep Learning , May 2022 . URL https://www.biorxiv.org/content/10.1101/2022.05.15.491755v1 . Pages: 2022.05.15.491755 Section: New Results. ↵ Rafael Josip Penić , Tin Vlašić , Roland G. Huber , Yue Wan , and Mile Šikić . RiNALMo: General-Purpose RNA Language Models Can Generalize Well on Structure Prediction Tasks , February 2024 . URL http://arxiv.org/abs/2403.00043 . arxiv: 2403.00043 [cs, q-bio]. ↵ Aiming Ren , Yi Xue , Alla Peselis , Alexander Serganov , Hashim M. Al-Hashimi , and Dinshaw J. Patel . Structural and Dynamic Basis for Low-Affinity, High-Selectivity Binding of L-Glutamine by the Glutamine Riboswitch . Cell Reports , 13 ( 9 ): 1800 – 1813 , December 2015 . ISSN 2211-1247 . doi: 10.1016/j.celrep.2015.10.062 . OpenUrl CrossRef PubMed ↵ Elena Rivas . RNA structure prediction using positive and negative evolutionary information . PLOS Computational Biology , 16 ( 10 ): e1008387 , October 2020 . ISSN 1553-7358 . doi: 10.1371/journal.pcbi.1008387 . URL https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1008387 . Publisher: Public Library of Science . OpenUrl CrossRef PubMed ↵ Elena Rivas . RNA covariation at helix-level resolution for the identification of evolutionarily conserved RNA structure . PLOS Computational Biology , 19 ( 7 ): e1011262 , July 2023 . ISSN 1553-7358 . doi: 10.1371/journal.pcbi.1011262 . URL https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1011262 . Publisher: Public Library of Science . OpenUrl CrossRef PubMed ↵ Elena Rivas , Jody Clements , and Sean R. Eddy . A statistical test for conserved RNA structure shows lack of evidence for structure in lncRNAs . Nature Methods , 14 ( 1 ): 45 – 48 , January 2017 . ISSN 1548-7105 . doi: 10.1038/nmeth.4066 . OpenUrl CrossRef PubMed ↵ Elena Rivas , Jody Clements , and Sean R Eddy . Estimating the power of sequence covariation for detecting conserved RNA structure . Bioinformatics , 36 ( 10 ): 3072 – 3076 , May 2020 . ISSN 1367-4803 . doi: 10.1093/bioinformatics/btaa080 . URL https://doi.org/10.1093/bioinformatics/btaa080 . OpenUrl CrossRef PubMed ↵ RNAcentral Consortium . RNAcentral 2021: secondary structure integration, improved sequence search and new member databases . Nucleic Acids Research , 49 ( D1 ): D212 – D220 , January 2021 . ISSN 0305-1048 . doi: 10.1093/nar/gkaa921 . URL https://doi.org/10.1093/nar/gkaa921 . OpenUrl CrossRef PubMed ↵ Kengo Sato , Manato Akiyama , and Yasubumi Sakakibara . RNA secondary structure prediction using deep learning with thermodynamic integration . Nature Communications , 12 ( 1 ): 941 , February 2021 . ISSN 2041-1723 . doi: 10.1038/s41467-021-21194-4 . URL https://www.nature.com/articles/s41467-021-21194-4 . Bandiera_abtest: a Cc_license_type: cc_by Cg_type: Nature Research Journals Number: 1 Primary_atype: Research Publisher: Nature Publishing Group Subject_term: Machine learning;Non-coding RNAs;RNA;Structure determination Subject_term_id: machine-learning;non-coding-rnas;rna;structure-determination. OpenUrl CrossRef PubMed ↵ Eric W. Sayers , Evan E. Bolton , J. Rodney Brister , Kathi Canese , Jessica Chan , Donald C. Comeau , Catherine M. Farrell , Michael Feldgarden , Anna M. Fine , Kathryn Funk , Eneida Hatcher , Sivakumar Kannan , Christopher Kelly , Sunghwan Kim , William Klimke , Melissa J. Landrum , Stacy Lathrop , Zhiyong Lu , Thomas L. Madden , Adriana Malheiro , Aron Marchler-Bauer , Terence D. Murphy , Lon Phan , Shashikant Pujar , Sanjida H. Rangwala , Valerie A. Schneider , Tony Tse , Jiyao Wang , Jian Ye , Barton W. Trawick , Kim D. Pruitt , and Stephen T. Sherry . Database resources of the National Center for Biotechnology Information in 2023 . Nucleic Acids Research , 51 ( D1 ): D29 – D38 , January 2023 . ISSN 1362-4962 . doi: 10.1093/nar/gkac1032 . OpenUrl CrossRef PubMed ↵ David Sehnal , Sebastian Bittrich , Mandar Deshpande , Radka Svobodová , Karel Berka , Václav Bazgier , Sameer Velankar , Stephen K Burley , Jaroslav Koča , and Alexander S Rose . Mol* Viewer: modern web app for 3D visualization and analysis of large biomolecular structures . Nucleic Acids Research , 49 ( W1 ): W431 – W437 , July 2021 . ISSN 0305-1048 . doi: 10.1093/nar/gkab314 . URL https://doi.org/10.1093/nar/gkab314 . OpenUrl CrossRef PubMed ↵ Andrew W. Senior , Richard Evans , John Jumper , James Kirkpatrick , Laurent Sifre , Tim Green , Chongli Qin , Augustin Žídek , Alexander W. R. Nelson , Alex Bridgland , Hugo Penedones , Stig Petersen , Karen Simonyan , Steve Crossan , Pushmeet Kohli , David T. Jones , David Silver , Koray Kavukcuoglu , and Demis Hassabis . Improved protein structure prediction using potentials from deep learning . Nature , 577 ( 7792 ): 706 – 710 , January 2020 . ISSN 1476-4687 . doi: 10.1038/s41586-019-1923-7 . URL https://www.nature.com/articles/s41586-019-1923-7 . Number: 7792 Publisher: Nature Publishing Group . OpenUrl CrossRef PubMed ↵ Bruce A. Shapiro , Yaroslava G. Yingling , Wojciech Kasprzak , and Eckart Bindewald . Bridging the gap in RNA structure prediction . Current Opinion in Structural Biology , 17 ( 2 ): 157 – 165 , April 2007 . ISSN 0959-440X . doi: 10.1016/j.sbi.2007.03.001 . OpenUrl CrossRef PubMed Web of Science ↵ Tao Shen , Zhihang Hu , Zhangzhi Peng , Jiayang Chen , Peng Xiong , Liang Hong , Liangzhen Zheng , Yixuan Wang , Irwin King , Sheng Wang , Siqi Sun , and Yu Li . E2Efold-3D: End-to-End Deep Learning Method for accurate de novo RNA 3D Structure Prediction , July 2022 . URL http://arxiv.org/abs/2207.01586 . arxiv: 2207.01586 [cs, q-bio]. ↵ Huijing Shi and Peter B. Moore . The crystal structure of yeast phenylalanine tRNA at 1.93 Å resolution: A classic structure revisited . RNA , 6 ( 8 ): 1091 – 1105 , August 2000 . ISSN 1355-8382 . doi: 10.1017/S1355838200000364 . URL https://www.cambridge.org/core/journals/rna/article/abs/crystal-structure-of-yeast-phenylalanine-trna-at-193-a-resolution-a-classic-structAC4EBBDBBABEEC91D6B0D48E511B707C . Publisher: Cambridge University Press . OpenUrl Abstract ↵ Marcell Szikszai , Marcin Magnus , Siddhant Sanghi , Sachin Kadyan , Nazim Bouatta , and Elena Rivas . RNA3DB: A structurally-dissimilar dataset split for training and benchmarking deep learning models for RNA structure prediction . Journal of Molecular Biology , pp. 168552 , March 2024 . ISSN 0022-2836 . doi: 10.1016/j.jmb.2024.168552 . URL https://www.sciencedirect.com/science/article/pii/S0022283624001475 . OpenUrl CrossRef ↵ I. Tinoco and C. Bustamante . How RNA folds . Journal of Molecular Biology , 293 ( 2 ): 271 – 281 , October 1999 . ISSN 0022-2836 . doi: 10.1006/jmbi.1999.3001 . OpenUrl CrossRef PubMed Web of Science ↵ I. Guyon , U. Von Luxburg , S. Bengio , H. Wallach , R. Fergus , S. Vishwanathan , and R. Garnett Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , Ł ukasz Kaiser , and Illia Polosukhin . Attention is All you Need . In I. Guyon , U. Von Luxburg , S. Bengio , H. Wallach , R. Fergus , S. Vishwanathan , and R. Garnett (eds.), Advances in Neural Information Processing Systems , volume 30 . Curran Associates, Inc ., 2017 . URL https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf . ↵ Linyu Wang , Xiaodan Zhong , Shuo Wang , Hao Zhang , and Yuanning Liu . A novel end-to-end method to predict RNA secondary structure profile based on bidirectional LSTM and residual neural network . BMC bioinformatics , 22 ( 1 ): 169 , March 2021 . ISSN 1471-2105 . doi: 10.1186/s12859-021-04102-x . OpenUrl CrossRef PubMed ↵ Wenkai Wang , Chenjie Feng , Renmin Han , Ziyi Wang , Lisha Ye , Zongyang Du , Hong Wei , Fa Zhang , Zhenling Peng , and Jianyi Yang . trRosettaRNA: Automated prediction of RNA 3D structure with transformer network . Nature Communications , 14 : 7266 , November 2023 . ISSN 2041-1723 . doi: 10.1038/s41467-023-42528-4 . OpenUrl CrossRef PubMed ↵ H. Yang , F. Jossinet , N. Leontis , L. Chen , J. Westbrook , H. M. Berman , and E. Westhof . Tools for the automatic identification and classification of RNA base pairs . NAR , 31 . 13 : 3450 – 3460 , 2003 . OpenUrl CrossRef PubMed Web of Science ↵ Chengxin Zhang , Yang Zhang , and Anna Marie Pyle . rMSA: A Sequence Search and Alignment Algorithm to Improve RNA Structure Modeling . Journal of Molecular Biology , 435 ( 14 ): 167904 , July 2023 . ISSN 0022-2836 . doi: 10.1016/j.jmb.2022.167904 . URL https://www.sciencedirect.com/science/article/pii/S0022283622005241 . OpenUrl CrossRef PubMed ↵ Tomasz Zok , Maciej Antczak , Michal Zurkowski , Mariusz Popenda , Jacek Blazewicz , Ryszard W Adamiak , and Marta Szachniuk . RNApdbee 2.0: multifunctional tool for RNA structure annotation . Nucleic Acids Research , 46 ( W1 ): W30 – W35 , July 2018 . ISSN 0305-1048 . doi: 10.1093/nar/gky314 . URL https://doi.org/10.1093/nar/gky314 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted February 17, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following On inputs to deep learning for RNA 3D structure prediction Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share On inputs to deep learning for RNA 3D structure prediction Marcell Szikszai , Marcin Magnus , Sachin Kadyan , Elena Rivas bioRxiv 2025.02.14.638364; doi: https://doi.org/10.1101/2025.02.14.638364 Share This Article: Copy Citation Tools On inputs to deep learning for RNA 3D structure prediction Marcell Szikszai , Marcin Magnus , Sachin Kadyan , Elena Rivas bioRxiv 2025.02.14.638364; doi: https://doi.org/10.1101/2025.02.14.638364 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7637) Biochemistry (17705) Bioengineering (13899) Bioinformatics (41968) Biophysics (21460) Cancer Biology (18603) Cell Biology (25526) Clinical Trials (138) Developmental Biology (13385) Ecology (19910) Epidemiology (2067) Evolutionary Biology (24328) Genetics (15614) Genomics (22513) Immunology (17741) Microbiology (40423) Molecular Biology (17193) Neuroscience (88646) Paleontology (667) Pathology (2835) Pharmacology and Toxicology (4827) Physiology (7647) Plant Biology (15160) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9825) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.