Universe of Lasso Proteins: Exploring the limit of entanglement and folding landscape of proteins predicted by AlphaFold

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 57,267 characters · extracted from preprint-html · click to expand
Universe of Lasso Proteins: Exploring the limit of entanglement and folding landscape of proteins predicted by AlphaFold | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Universe of Lasso Proteins: Exploring the limit of entanglement and folding landscape of proteins predicted by AlphaFold View ORCID Profile Fernando Bruno da Silva , View ORCID Profile Agata P. Perlinska , View ORCID Profile Jacek Płonka , Erica Flapan , View ORCID Profile Joanna I. Sulkowska doi: https://doi.org/10.1101/2025.03.21.644650 Fernando Bruno da Silva 1 Centre of New Technologies, University of Warsaw , Banacha 2c, 02-097, Warsaw, Poland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Fernando Bruno da Silva Agata P. Perlinska 1 Centre of New Technologies, University of Warsaw , Banacha 2c, 02-097, Warsaw, Poland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Agata P. Perlinska Jacek Płonka 1 Centre of New Technologies, University of Warsaw , Banacha 2c, 02-097, Warsaw, Poland 2 Faculty of Chemistry, University of Warsaw , Pasteura 1, 02-093, Warsaw, Poland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jacek Płonka Erica Flapan 3 Pomona College , Claremont, CA 91711, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Joanna I. Sulkowska 1 Centre of New Technologies, University of Warsaw , Banacha 2c, 02-097, Warsaw, Poland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Joanna I. Sulkowska For correspondence: j.sulkowska{at}cent.uw.edu.pl Abstract Full Text Info/History Metrics Preview PDF A bstract Knots and lasso topology represent a class of natural motifs found in proteins which are characterized by a threaded structure. Proteins with a lasso motif represent a macroscopic version of the peptide lasso, which are known for their high stability and offer tremendous potential for the development of novel therapeutics. Here, based on AlphaFold, we have shown the limit of topological complexity of naturally occurring protein structures with cysteine bridges. Based on 176 million high confidence (pLDDT > 70) AlphaFold-predicted protein models and a detailed analysis of the conservation of the motif in a family, we found four new lasso motifs, including L 4 and LS 4 LS 3 topologies, and the first examples of knotted lasso proteins: L 1 K3 1 and L 3 #K3 1 . We show that in the case of natural proteins, there are no lassos with 5 threadings but there exist some with 6. Families possessing proteins with more than 6 threadings did not exceed the conservation threshold of 10%. Moreover, we propose a probable folding mechanism for the LS 4 LS 3 lasso motif, enhancing our view on protein folding and stability. This work expands the topological space of lasso type motifs in proteins but also suggests that more complex structures could be unfavorable for proteins. Highlights Discovery of novel non-trivial lasso motifs: the L 4 , supercoiling of both tails LS 4 LS 3 , and the first knotted lasso proteins: L 1 K3 1 and L 3 #K3 1 . The knotted lasso motifs are in membrane proteins. Lassos topologies with 5 or more crossings are not conserved in protein families, and more complex motifs do not exist 472 new InterPro entries with a high probability of non-trivial lasso motif Potential folding pathway for proteins with complex supercoiled lasso motif LS 4 LS 3 Download figure Open in new tab I ntroduction A lasso in the microscopic world is a concept similar to its equivalent in the macro world – a loop made on a rope. In proteins, the rope is a polypeptide chain, and the loop is made by a covalent link between two cysteine residues, the disulfide bridge. However, lassos are found not only in proteins but also in peptides [ 1 ] which form very stable structures. The peptide lassos, due to their small size, are a well-studied group of molecules with a known function and a wide range of possible applications [ 2 , 3 ]. On the other hand, lassos in proteins are less known, even though they are shown to be present in several different protein families. Their size, complexity, and difficulty in assessing their function hinder experimental studies [ 4 , 5 , 6 ]. Here, using a theoretical approach and structural data obtained from machine learning algorithms, we characterize protein lasso motifs and show novel entangled topologies that provide insight into the complexity of protein structures towards practical applications [ 7 ]. The topic of protein entanglement is important and has started to have real-world applications. It was shown that topologi engineering in the form of the introduction of catenation holds great promise in the development of therapeutic proteins (artificial antibody with enhanced affinity and in vivo stability [ 8 ]). With progress in topological engineering for proteins, the lasso motif gives a new pathway in the development of therapeutic proteins [ 7 ]. Lasso topology in proteins is divided into types depending on the complexity of the motif. It is measured by the number of times the polypeptide chain pierces the lasso loop. Note that in proteins, after the lasso loop is formed, there are two parts of the remaining chain, one closer to the N- and the other to the C-terminus (N- and C-tail). Therefore, when determining the exact topology, we take into account which tail threads the loop. The first large-scale classification of protein lasso motifs was done in 2016 in the LassoProt database [ 9 ]. In the case of one tail threading, four main motifs were found in proteins: L 1 , L 2 , L 3 , and L 6 (note that there are lasso proteins with six piercings, but none with four or five) [ 10 , 9 ]. There are also instances when both tails pierce the loop (a two-sided lasso): L 1,1 , L 1,2 , L 2,1 , and L 4 , 3 [ 9 , 11 ]. The most complex type of lasso known to date, based on proteins deposited in the PDB and LassoProt, is supercoiling when the tail is piercing and wrapping around the loop. There are three motifs with supercoiling known: LS 2 , LS 3 , and LS 4 [ 11 ]. When none of the tails pierce the loop, the lasso is trivial and is denoted L 0 . Lasso proteins of this type are the majority; however, they are not classified as having a complex and entangled topology – they just possess a disulfide bridge. Besides lasso, there are other types of entanglement present in protein structures [ 12 ]. Several loops closed (but not pierced) by cysteine bridges form a topologically trivial structure called a cysteine knot [ 13 , 14 ]. When the loops are pierced by each other, we speak of links [ 15 ]. Structures with multiple bridges with closed loops (intersecting or not), forming a θ shape, are called theta curves [ 16 ]. Entanglement made solely on the polypeptide chain and not locked by cysteine bonds is called a knot. Interestingly, among all these topologies, the largest group is formed by proteins with a single-pierced lasso motif (L 1 ) [ 6 ], even though they are not a well-researched group. Based on experimentally obtained protein structures, it was shown that 39% of the lasso proteins are classified as enzymes [ 11 ]. Specifically, their function seems to depend on the type of the lasso motif. In individual cases, such as leptin, its L 1 lasso is known to influence the activity, dynamics of the native state, and stability of the protein [ 17 ]. Recently, computational studies have explored and identified favorable folding pathways for leptin [ 18 , 19 ]. However, the folding mechanisms of lasso topology proteins are still poorly understood, requiring further experimental and theoretical investigations. As the machine learning revolution also affected structural biology, we now have several orders of magnitude more 3D structures available to explore, including structures we had not previously expected to exist. For example, AlphaFold predicted new types of knotted [ 20 , 21 , 22 ] and linked proteins [ 23 ], coming from either known knotted families or not. Importantly, experimental studies have already confirmed the existence of several new knotted types: double knotted 3 1 #3 1 [ 24 ] and 7 1 [ 20 ] – the first type of knot which is neither a twist type nor has unknotting number one. Moreover, this amazing achievement was possible without any homologous structures (the 7 1 knot). Given AlphaFold’s capability to predict new entangled folds, we explore whether new lasso motifs are also feasible. The topology should be conserved in the majority of the proteins in a given family, if it is important for the biological function or the environmental conditions [ 25 , 26 , 27 , 28 ]. Along with the amount of available structural data, this provides an opportunity for both quantitative and qualitative analysis of predicted protein structures. It is particularly important for the lasso motifs, since they are less understood than knots. Specifically, it is known that a knot is perfectly conserved in a protein family [ 29 , 22 ], which has not yet been studied for protein families containing lasso motifs. Herein, we have conducted a comprehensive review of 176 million high-quality structures predicted by AlphaFold 2. For each protein structure, we determined the dominant lasso (in the AlphaLasso database [ 30 ]) and knot type (in the AlphaKnot database [ 31 ]). As a result, we found 2.5 million proteins with non-trivial lasso topology that we further analyzed. Based on this data and rigorous analysis of all the proteins in the given family, we show new types of lassos and the first knotted lasso proteins. R esults We based our study on all protein structures predicted by AlphaFold that possess a non-trivial topology such as a lasso or knot. We used two databases: AlphaLasso [ 30 ] and AlphaKnot [ 31 ], where all such structures are deposited. In total, we evaluated over 2.5 million high-quality (pLDDT over 70) protein models with a lasso topology. Since one protein structure can have more than one lasso motif, we identified 3.1 million lasso motifs. In the case of proteins with knots, we collected information about 700 thousand structures. We classified each protein by a family (using the InterPro database, see Methods section for details) to analyze the level of conservation of the topology within groups of similar proteins. Moreover, we used it to find new and robust lasso motifs that are preserved in a given family ( Figure 1 ). Download figure Open in new tab Figure 1: Known and new lasso types. The first row shows the simplest lasso types (L 0 with no piercings, and L 1 with a single piercing through the loop) that are known to be present in protein structures. The remaining are new lasso types presented in this study and found in modeled protein structures. L 4 with a single tail piercing the loop four times, LS 4 LS 3 with both tails piercing and wrapping around the loop (supercoil) four and three times, L 1 K3 1 with a lasso and knot intertwined, and L 3 #K3 1 with first a lasso loop pierced three times and next the 3 1 knot. Orange beads and lines connecting them depict a cysteine bridge that forms the lasso loop. The knot is green-colored. Statistical analysis of lasso motif conservation in the family Since it was shown that AlphaFold is capable of predicting new knot topologies in proteins [ 32 , 20 ], we now ask whether the same can be done for new types of lassos and, in general, how well the lasso motif is conserved in a given family [ 33 ]. However, unlike the knotted topology, which is strictly conserved within a family [ 33 , 29 ], the existence of lassos can depend on a single cysteine mutation. The second difficulty is the identification of the cysteine bridge, which is not annotated in the 3D structures provided by AlphaFold. Therefore, we use a distance cutoff for the bridge – the thiol groups have to be within 3 Å of each other. To carry out a qualitative analysis of lasso motives provided by AlphaFold, we focused on the most prevalent protein families. Since not every protein has an AlphaFold-generated model, some of the families have less structural information than others. Given that the probability of correctness of the new topology is higher, the better its conservation in the family, we opted for the families for which we had sufficient topological information – meaning at least 30% of the family members were analyzed by the AlphaLasso web server. 1041 families fulfilled these criteria. We then categorized the families by lasso topology, excluding L 0 . On this basis, we selected only 6 motives, L 1 , L 2 , L 3 , L 4 , L 6 (no L 5 ), out of the 24 possible ones that the AF predicted. First, we analyzed the level of conservation of the lasso motif in protein groups (based on InterPro) that have at least a single experimental confirmation of the motif ( Figure 2 ). Download figure Open in new tab Figure 2: Conservation level of the main lasso types in known entangled protein groups. Shown is the number of InterPro entries possessing at least one experimentally confirmed lasso structure. The conservation level is calculated as the number of proteins exhibiting a given topology to the number of analyzed proteins in this InterPro entry and presented as bins (e.g. conservation of 10% covers values in range 10%-19%). Only entries where at least 50% (minimum 100) of all proteins present in the InterPro entry were analyzed and with conservation at least 10% are considered. The results show a great variety – we encountered virtually every conservation level. However, for the L 1 and L 2 lasso types, there are two leading values: 10-20% that show no conservation, and more than 90% that show strict conservation of the lasso. Therefore, the level of conservation of the lasso depends on the given protein group, contrary to the protein knots whose presence is strictly conserved in a family. Comparing data from AlphaLasso to data deposited in the PDB, we found that in 569 families (InterPro entries) indeed, the topology predicted by AlphaLasso agrees with the topology of the structure deposited in the PDB. On the other hand, this means that we have found 472 new entries where the topology may be correct, and its correctness may be suggested by the motif behavior in the family. Figure 3 also shows a clear pattern: the more complex a lasso topology, the less it is conserved. In fact, when lassos have more than six piercings, they are not conserved (present in less than 10% of proteins from a given family). During the analysis, we found proteins with very complex lasso motifs with as many as 18 intersections. This suggests that very complex lasso shapes might be harder to maintain during evolution. However, it should be noted that there may be some isolated exceptions where only one or two sequences from the whole family have such a complex topology. These data are available for further research. Download figure Open in new tab Figure 3: Distribution of loop size by lasso types. A distribution of loop size is shown, divided by the lasso type and its conservation across the InterPro entry. Only the entries where AlphaLasso analyzed at least 50% (minimum 100) of all proteins annotated in the InterPro database were considered. Lassos of the same type present in the same protein were grouped based on their position and an average of their loop size was taken. Having such a verified dataset ( Figure 3 ), we found that the most common lasso loops are small, between 5 and 100 amino acids long. These smaller loops may give proteins an advantage by making them more stable. To the best of our knowledge, due to a lack of data, this has never been shown before. New lasso motifs in proteins Among 2.5 million protein structures with lassos, we identified four new motifs ( Figure 1 ). To identify these motifs, we lowered the criterion of preservation in the family, but this is not always necessary. We found a family with a double supercoiled motif, where both ends wrap around the lasso loop three and four times (LS 4 LS 3 ), families with a loop pierced four times (L 4 ), family where the lasso is embedded inside the knot (L 1 K3 1 ), and an example of how both the lasso and the knot topology can exist on a single chain (L 3 #K3 1 ). The existence of lasso-knot topologies provides clues on how such structures can be folded. To further ensure that these topologies are well predicted, for all of the new topological categories, a sample of five proteins was taken and recomputed using the AlphaFold 3 prediction model [ 34 ], and analyzed using the AlphaLasso web server. The models proved to have the same topology as those predicted using AlphaFold 2. Below we characterize each of the new motifs in detail. The new lasso topology – L 4 We identified two groups of proteins with a new L 4 lasso topology – L +4C and L -4C ( Figure 4 ). In AlphaLasso we identified 1,082 proteins possessing the L + 4C lasso and InterPro family ID IPR014635. This number corresponds to 82% of all proteins analyzed for this InterPro family, 61% of its entirety. Evidence suggests that such topology is strongly conserved in this family. On top of that, we found 1,249 proteins with L -4C and InterPro family ID IPR043538, which correspond to 23% of the analyzed proteins with the same InterPro family. While this ratio is rather small in comparison to the first family, it is still visible as an outlier in our compiled data. These two families are not the only ones in which we found L 4 structures, however, other families did not contain such high numbers of lassoed proteins or lacked a significant representation in the InterPro database. Download figure Open in new tab Figure 4: Proteins with the new L 4 lasso type. Left: L +4 C lasso in Alpha-Amylase protein. Cartoon and schematic representation of AlphaFold model of Escherichia coli alpha-amylase (UniProtKB id: P25718). The orange color shows the position of the lasso loop. The orange bead and line connecting them depict a cysteine bridge that forms the lasso loop. The lower panel shows the position of the lasso motif. The black dots mark the four residues that pierce the lasso loop (Tyr547, Ala564, Gln578, Phe623). Right: L -4 C lasso in Xylosyltransferase 1 protein from Homo sapiens (UniProtKB id: Q86Y38). The first 142 residues are not shown in the figure (schematically shown with a dashed line). The black dots mark the four residues that pierce the lasso loop (Gly491, Gln519, Thr534, Met545). L + 4C in Alpha-Amylase Proteins The L +4C lasso type protein identified with InterPro ID IPR014635 belongs to the family 13 (GH13) of the glycosyl hydrolases from alpha-amylase proteins, which consists of more than 20 types of enzymes [ 35 ]. They can be found in a variety of taxonomic groups, like Archaea, Bacteria, Fungi, Plants, and Animals [ 36 , 37 , 38 ]. However, all proteins with L +4C identified belong to Bacteria. Enzymes from this family act on starch and starch derivatives with hydrolysis or transferring activity [ 39 ]. Most alpha-amylases consist of three domains, an N-domain, a catalytic domain, and a C-domain, whereas conventional small amylases only have a catalytic domain and a C-domain. The L +4C proteins found in AlphaLasso present an N-terminal extension and instead of three, there are four domains (N’-domain, N-domain, catalytic domain, and a C-domain). The overall structures share similarities with the first two reported proteins with the same conformation, Pyrococcus furiosus thermostable amylase (PFTA), a hyperthermophilic amylase isolated from the archaeon Pyrococcus furiosus [ 40 ] and Archaic Hyperthermophilic Maltogenic Amylase (SMMA) from Staphylothermus marinus [ 41 ]. Both proteins, PFTA and SMMA, display four domains, with an N’-domain containing ten and nine beta-sheets for PFTA and SMMA, respectively. As a representative protein for the alpha-amylase family, we chose a Cronobacter turicensis alpha-amylase (UniProtKB ID: C9Y338) with global pLDDT equal to 93.3 ( Figure 4 ). The protein has 663 residues and the cysteine bridge is located between residues 108 and 524, with loop length equal to 417 residues. The average pLDDT for the residues within the lasso corresponds to 95.7. Figure 4 exemplifies the L 4 topology identified in the Alpha-amylase, in which the loop region formed by the S-S bridge is pierced four times. L -4C in Xylosyltransferase The InterPro family ID IPR043538 adopts an L -4 C lasso type in only 24% of proteins in this family. This protein family is functionally associated with xylosyltransferases. This family of enzymes catalyzes the transfer of xylose residues to specific acceptor molecules, primarily proteins. This process, known as xylosylation, is crucial for biological functions in organisms [ 42 ]. The expected taxonomic range for this enzyme is Eukaryota and Bacteria. However, all L -4C lasso proteins identified with ID IPR043538 display only members from Eukaryota. In AlphaLasso, L -4C lasso proteins correspond to xylosyltransferase 1, 2 (XT1 and XT2), and type II transmembrane proteins (single-pass transmembrane protein) consisting of a short amino-terminal region in the cytosol, a single transmembrane helix, a stem region required for Golgi localization, a catalytic GT-A domain, and a unique C-terminal domain of unknown function [ 43 ]. The Xylosyltransferase 1 protein, AlphaFold ID A0A0S7LW33 with global pLDDT equal to 96.75, Figure 4 , represents the protein family with InterPro ID IPR043538. The protein has 397 residues and the cysteine bridge is located between residues 5 and 200, and the loop length is equal to 417 residues. The average pLDDT for the residues within the lasso corresponds to 95.7. Knotted proteins with lasso topology We took our analysis further and cross-referenced the proteins containing a lasso with those containing a knot. The knotted topology is observed substantially less frequently than the lasso (1 out of 250 proteins is knotted [ 22 ]), therefore, we did not expect a significant overlap. We found two families with a high content of proteins with the mixed lasso-knot topology. Both families have the simplest 3 1 knot, the most frequent knot type in the protein world. They differ in the complexity of the lasso topology, measured by the number of lasso piercings – L 1 and L 3 . Interestingly, both of these families represent membrane proteins. Lasso intertwined with a knot – L 1 K3 1 Sodium/calcium exchanger protein family is a known group of knotted membrane proteins [ 44 , 33 ]. Even though the presence of the 3 1 knot in these proteins is known, the fact that the majority of them can also simultaneously form a lasso topology has been unrecognized until now. In particular, we found that 2,952 proteins (65% of the analyzed proteins in the family; based on AlphaFold models of the InterPro family with id: IPR004836) form a single pierced loop closed via a cysteine bridge (L +1C type). Figure 5 shows the example SLC8 human protein. It has 3 1 knot spanned on the majority of its multi-domain structure. The knot starts and ends in one of the domains – the transmembrane core domain, common for all the proteins of the family. The lasso loop in the SLC8 is made by the cysteine bridge (Cys55-Cys827) and is pierced by the remaining C-terminal chain (Val855 is the piercing residue; it also lies within the knotted region). Given the position of the knot and the lasso in the structure, the topologies are structurally connected and intertwined ( Figure 5 ). Moreover, the remaining tails (the residues outside either of the entangled motifs) are over 50 amino acids long, thus the motifs are deeply embedded in the structure, which indicates its topological stability. Importantly, the same topologies can be found in the experimental structures of this protein (e.g. PDB id: 8sgj [ 45 ]). Download figure Open in new tab Figure 5: L +1C lasso intertwined with 3 1 knot in a sodium/calcium exchanger protein. Cartoon and schematic representation of AlphaFold model of the human Sodium/calcium exchanger 1 protein (UniProtKB id: P32418). The green color shows the position of the knot. Orange beads and lines connecting them depict a cysteine bridge that forms the lasso loop. The lower part of the figure shows the positions of the entangled motifs in the protein sequence. A black dot marks the position of the residue that pierces the lasso loop (Val855). Lasso and knot as separate motifs in a single structure – L 3 #K3 1 The second family with lasso-knot topology is calcium-activated potassium channels (InterPro id: IPR047871). In this case, the lasso and the knot are present in separate domains of the protein. The lasso is in the N-terminal part of the protein (the transmembrane domain), and the knot is located after the lasso motif (the cytoplasmatic domain; Figure 6 ). The lasso is formed by a short loop (Cys79-Cys206; based on UniProtKB id: Q12791). Note that Cys79 is in an unmodeled region in the experimental structure of this protein (e.g. PDB id: 8z3s) and in the AlphaFold model the pLDDT of this residue is low (53.8). The loop is pierced three times by the C-terminal chain (Ala235, Phe253, Trp268) which makes it a L +3C lasso type. The 3 1 knot is positioned between Cys413 and Leu1046 and thus needs most of the structure to form (the knot is also present in the experimental structure). The knot is not located in the transmembrane domain like in the case of the sodium/calcium exchanger protein family ( Figure 6 ). Note that both of these entangled motifs are positioned deeply in the protein structure, and any thermal fluctuations of the N- or C-tails will not change the topology of the protein. Download figure Open in new tab Figure 6: L +3C lasso and 3 1 knot in a calcium-activated potassium channel. Cartoon and schematic representation of AlphaFold model of the human Calcium-activated potassium channel subunit alpha-1 (UniProtKB id: Q12791). The orange color shows the position of the lasso loop and the green shows the position of the knot. Orange beads and lines connecting them depict a cysteine bridge that forms the lasso loop. The lower part of the figure shows the positions of the entangled motifs in the protein sequence. The black dots mark the position of the three residues that pierce the lasso loop (Ala235, Phe253, Trp268). The positions of the transmembrane and cytoplasmic domains are taken from the UniProtKB. The lasso-knot topology is not commonly found in this family (31% of the proteins). However, our topological analysis shows that an additional 15% possess knots (but without the lasso). The next 5% only have the lasso topology. Given that the two motifs are found in separate domains, it appears that the overall topology relies on the protein architecture, and in the case of this family, it is not particularly conserved. The most complex lasso motif – LS 4 LS 3 During our extensive search of lasso proteins among AlphaFold models, we encountered a group of proteins with the most complex lasso motif – a double supercoiled lasso with four and three crossings (from N and C-tail, respectively; LS 4 LS 3 ; Figure 7 ). Not only are both tails repeatedly piercing the lasso loop, but they are also wrapped around the chain of the loop (supercoil). Download figure Open in new tab Figure 7: Schematic representation of the protein with the most complex lasso topology (LS 4 LS 3 ). Superimposed AlphaFold model (left) and PDB structure (8hnd; right) of Q9Y6L6 protein. The chain of the lasso loop is colored with orange. The black dots mark the position of the residues that pierce the lasso loop (N-tail: Ile47, Phe59, Val80, Arg93, and C-tail: Trp470, Gly552, Ser576). This complex motif is present in a family of membrane transporters (Organic-Anion-Transporting Polypeptides; InterPro id: IPR004156). The representative protein of this set is human Solute carrier organic anion transporter family member 1B1 (OATP1B1; UniProtKB id: Q9Y6L6), which has structural information available (6 cryo-EM PDB structures). The structures were obtained in different conditions (ligands), leading to their different conformations (outward- and inward-open). We observed that regardless of the conformation, the structures show a complicated lasso topology; however, not as complex as the AlphaFold model (Table S1). The difference lies in the number of times the tails pierce the lasso loop. In particular, the PDB structure 8k6l has one less N-tail crossing than the AlphaFold model (LS 3 LS 3 vs. LS 4 LS 3 ). Both structures are high-quality (the PDB structure 8k6l has 2.9Å resolution and the AlphaFold model has an average pLDDT 80.8) and they superimpose well (C-alpha RMSD 1.5Å based on 320 residues). A closer investigation of the structures shows that the PDB structure lacks a portion of the lasso loop residues (Pro280-Val322). The location of this gap is directly responsible for the missing N-tail crossing. The lasso loop of OATP1B1 protein is made with a cysteine bridge between Cys142 and Cys463, which are part of different external loops (Cys142: EL3-4 called N-lariat, and Cys463: EL9-10 called Kazal domain). This disulfide bond is present throughout all the structures we analyzed, both experimental (including their different conformations) and theoretical ones (Table S1). The AlphaFold model of the protein is of high quality, including the region with the lasso loop (average pLDDT value 82.6). The model resembles the inward-open conformation of the protein as evidenced by the comparison with the experimental structures (1.7Å with the inward-open conformation (PDB id: 8hnd; Figure 7 ) and 5.9Å with the outward-open conformation (PDB id: 8k6l), based on 560 and 547 C-alpha atoms, respectively). Potential folding pathway The identification of new types of lasso motifs is also relevant for the fundamental processes of protein folding and degradation. The size (length) of these proteins is too large to conduct quantitative and qualitative simulations of folding even in a coarse-grained model such as Go-like. However, it is possible to propose possible pathways, and we will present one for the most complex lasso motif – LS 4 LS 3 ( Figure 8 ). Download figure Open in new tab Figure 8: Proposed protein folding mechanism of the most complex lasso topology (LS 4 LS 3 ). Positions of the cysteine residues are marked with orange circles. To the best of our knowledge, for only one protein with a supercoiling (LS 3 ) motif, the tangling mechanism has been proposed based on numerical simulation. It was found with structure-based models [ 46 ] and molecular dynamics that native contacts are sufficient to self-coil a protein [ 47 ] via threading one tail around a closed loop, even when the cysteine bridge was created first. In the case of LS 4 LS 3 motif, the only viable option is to wrap the termini around the preformed lasso loop and then close it via a cysteine bridge. The wrapping can be done by a triple or quadruple twisting and then locating the protein in the membrane and closing the loop, see Figure 8 . C onclusions Herein, we have shown that Alphafold 2 has the capability of predicting novel lasso topologies. The motifs present in this paper have a significant representation in their InterPro families, suggesting a strong evolutionary conservation. Apart from the L 4 motif, each new lasso is present in a single protein family. The L 4 is found in two enzymatic families: alpha-amylases and xylosyltransferases. Even though they share the same type of lasso topology, the motif itself is resolved differently in each family, e.g. there is a twofold difference in length. By compiling all predicted 3D structures with lasso and knot topologies, we found two families containing proteins with mixed lasso-knot topologies (L 1 K3 1 and L 3 #K3 1 ). Therefore, for the first time, we show that two complex, entangled motifs can be present in a single structure. Moreover, the motifs are located deeply in the protein structure, indicating their topological stability. Both of these families represent membrane proteins. The most complex lasso motif (LS 4 LS 3 ) is the first two-sided supercoiled lasso found in protein structures. As such, it gives new insight into the protein folding mechanism. We propose a pathway that includes the formation of a supercoiled structure first (by twisting), and then closure by a cysteine bridge. Based on the newly available data, we show that the lasso topologies are best conserved for loops that are 5-100 amino acids long. By comparing the data generated by the AF to experimentally determined structures available in LassoProt, we confirmed that the AF correctly predicts lasso topologies for already known structures. The computational approach, such as AlphaFold, has proven advantageous in predicting proteins with unseen-before types of non-trivial topology. Taking into account data presented here, AF can be further used to predict new not-seen-before lasso topology in proteins with new desired properties, so that, for example, they perform functions under unnatural conditions. M aterials and methods Data set As a data source for our analysis, we utilized AlphaFold-predicted models of protein structures with an average pLDDT≥ 70 to ensure high-quality data. Using the AlphaFold Protein Structure Database (4th version) we generated a list of over 176 million models that met this criterion. The structural data of AlphaFold-predicted models were analyzed in search of cysteine bridges and lasso topologies [ 11 ] during the creation of the AlphaLasso database [ 30 ]. The detection of cysteine bridges was based on a distance criterion between sulfur atoms (≤ 3 Å) using Biopython (version 1.79) for processing PDB files. The Topoly (version 1.0.2) [ 48 ] Python package was utilized to determine lasso topology. Similarly, such data was processed during the development of the AlphaKnot database [ 31 ], which also employs the Topoly Python package for knot detection. Using the AlphaLasso database, we retrieved proteins with lasso topologies (2.6M individual proteins, 3.1M lassos) along with their corresponding InterPro family IDs and stored them in a PostgreSQL (version 16.2) database. Additionally, data on knotted proteins (681K proteins) were imported from the AlphaKnot database. This integration allowed for efficient querying and aggregation of the data. UniProt IDs were mapped to InterPro family IDs, using the protein2ipr dataset (1.4B pairs) obtained from the InterPro website. Such curated data was then processed using SQL and the pandas (version 2.2.2) package to quantify the proteins within each InterPro family that contained either knots, lassos, or both motifs. For each InterPro family, the ratio of proteins having interesting features to the total (present in InterPro DB) and the analyzed number of proteins was calculated. Only InterPro entries for which we had run the calculations for at least 30% of all the representative proteins were taken into consideration. Data used in this work is available in the supplementary material. For the data used for discovery of new topologies see S1.1, data used for the comparison with experimental data see S1.2, data used for the analysis of loop size see S1.3. From each category of topology present in this paper, a sample of 5 proteins was taken and recomputed using the AlphaFold 3 prediction model [ 34 ]. The obtained models were then analyzed using the AlphaLasso web server. The models proved to have the same topology as those predicted using AlphaFold 2. S upplementary material S1 . 1 File SI_AlphaLasso.xlsx – spreadsheet containing information about InterPro families present in the AlphaLasso database. Field summary: InterPro ID – entry ID in the InterPro database. Type – lasso type. Lasso Type (LT) – number of entries of given InterPro ID and Type present in the AlphaLasso database. Represents count of lasso topology. E.g. protein possessing two lassos of L1 type would add 2 to this number. Unique Lasso Type (ULT) – number of proteins of given InterPro ID and Type present in the AlphaLasso database. Represents count of proteins possessing at least one lasso of given type. E.g. protein possessing two lassos of L1 type would add 1 to this number. Unique in AlphaLasso (AL) – number of proteins of given InterPro ID present in the AlphaLasso database. Includes all types of lassos. In InterPro (IP) – number of proteins of given InterPro ID present in InterPro database. Analyzed by AL (AI) – number of proteins of given InterPro ID that were analyzed by AlphaLasso web server. ULT/IP – ratio of Unique Lasso Type (ULT) to In InterPro (ID). Represents conservation of all known proteins of this InterPro ID. ULT/AI – ratio of Unique Lasso Type (ULT) to Analyzed by AL (AI). Represents conservation of all analyzed proteins of this InterPro ID. This metric is used as a conservation value in the paper. AI/IP – ratio of Analyzed by AL (AI) to In InterPro (ID). Represents how much of the data available in InterPro was processed by AlphaLasso. S1 . 2 File SI_Experimental_validation.xlsx – spreadsheet containing information about InterPro families present in both Alpha-Lasso and LassoProt databases. Field summary: InterPro ID – entry ID in the InterPro database. Type – lasso type. Unique Lasso Type (ULT) – number of proteins of given InterPro ID and Type present in the AlphaLasso database. Represents count of proteins possessing at least one lasso of given type. E.g. protein possessing two lassos of L1 type would add 1 to this number. In InterPro (IP) – number of proteins of given InterPro ID present in InterPro database. Analyzed by AL (AI) – number of proteins of given InterPro ID that were analyzed by AlphaLasso web server. Conservation – ratio of Unique Lasso Type (ULT) to Analyzed by AL (AI). Represents conservation of all analyzed proteins of this InterPro ID. This metric is used as a conservation value in the paper. Represented as percentage in increments of 10. AI/IP – ratio of Analyzed by AL (AI) to In InterPro (ID). Represents how much of the data available in InterPro was processed by AlphaLasso. S1 . 3 File SI_Loop_size.xlsx – spreadsheet containing information about loop size distribution in InterPro families present in AlphaLasso database. This data consists only of InterPro entries for which at least 50% (minimum 100) of entries present in the InterPro database were calculated. Field summary: InterPro ID – entry ID in the InterPro database. Type – lasso type. Conservation – ratio of Unique Lasso Type (ULT) to Analyzed by AL (AI). Represents conservation of all analyzed proteins of this InterPro ID. This metric is used as a conservation value in the paper. Represented as percentage in increments of 10. Loop size – mean loop size (in amino acids) of all proteins in the given InterPro entry. S1 Table Detailed topology information about different structures of the protein with the most complex lasso motif (UniProtKB id: Q9Y6L6). The summary of available PDB and AlphaFold structures, comparing calculated topology for proteins with different ligands. ACKNOWLEDGMENTS This work was supported by the National Science Centre 2022/47/B/NZ1/03480 (to J.I.S.) This research was carried out with the support of the Interdisciplinary Centre for Mathematical and Computational Modelling (ICM) University of Warsaw under computational allocation no GS82-12. R eferences [1]. ↵ Julian D Hegemann , Marcel Zimmermann , Xiulan Xie , and Mohamed A Marahiel . Lasso peptides: an intriguing class of bacterial natural products . Accounts of chemical research , 48 ( 7 ): 1909 – 1919 , 2015 . OpenUrl CrossRef PubMed [2]. ↵ Gabriel CA da Hora , Myongin Oh , Marcus C Mifflin , Lori Digal , Andrew G Roberts , and Jessica MJ Swanson . Lasso peptides: Exploring the folding landscape of nature’s smallest interlocked motifs . Journal of the American Chemical Society , 146 ( 7 ): 4444 – 4454 , 2024 . OpenUrl CrossRef PubMed [3]. ↵ Helena Martin-Gómez and Judit Tulla-Puche . Lasso peptides: chemical approaches and structural elucidation . Organic & biomolecular chemistry , 16 ( 28 ): 5065 – 5080 , 2018 . OpenUrl PubMed [4]. ↵ Jiyeon Lee , Dahye Im , Yajie Liu , Jing Fang , Xibao Tian , Minsu Kim , Wen-Bin Zhang , and Jongcheol Seo . Distinguishing protein chemical topologies using supercharging ion mobility spectrometry-mass spectrometry . Angewandte Chemie , 135 ( 51 ): e202314980 , 2023 . OpenUrl [5]. ↵ Yajie Liu , Wen-Hao Wu , Sumin Hong , Jing Fang , Fan Zhang , Geng-Xin Liu , Jongcheol Seo , and Wen-Bin Zhang . Lasso proteins: modular design, cellular synthesis, and topological transformation . Angewandte Chemie , 132 ( 43 ): 19315 – 19323 , 2020 . OpenUrl [6]. ↵ Bartosz Ambrožy Greń , Pawel Dabrowski-Tumanski , Wanda Niemyska , and Joanna Ida Sulkowska . Lasso proteins—unifying cysteine knots and miniproteins . Polymers , 13 ( 22 ): 3988 , 2021 . OpenUrl PubMed [7]. ↵ Emiko Mihara , Satoshi Watanabe , Nasir K Bashiruddin , Nozomi Nakamura , Kyoko Matoba , Yumi Sano , Rumit Maini , Yizhen Yin , Katsuya Sakai , Takao Arimori , et al. Lasso-grafting of macrocyclic peptide pharmacophores yields multi-functional proteins . Nature Communications , 12 ( 1 ): 1543 , 2021 . OpenUrl PubMed [8]. ↵ Wen-Hao Wu , Xilin Bai , Yu Shao , Chao Yang , Jingjing Wei , Wei Wei , and Wen-Bin Zhang . Higher order protein catenation leads to an artificial antibody with enhanced affinity and in vivo stability . Journal of the American Chemical Society , 143 ( 43 ): 18029 – 18040 , 2021 . OpenUrl PubMed [9]. ↵ Pawel Dabrowski-Tumanski , Wanda Niemyska , Pawel Pasznik , and Joanna I Sulkowska . Lassoprot: server to analyze biopolymers with lassos . Nucleic acids research , 44 ( W1 ): W383 – W389 , 2016 . OpenUrl CrossRef PubMed [10]. ↵ Ellinor Haglund , Joanna I Sulkowska , Jeffrey K Noel , Heiko Lammert , José N Onuchic , and Patricia A Jennings . Pierced lasso bundles are a new class of knot-like motifs . PLoS computational biology , 10 ( 6 ): e1003613 , 2014 . OpenUrl [11]. ↵ Wanda Niemyska , Pawel Dabrowski-Tumanski , Michal Kadlof , Ellinor Haglund , Piotr Sulkowski , and Joanna I Sulkowska . Complex lasso: new entangled motifs in proteins . Scientific reports , 6 ( 1 ): 36895 , 2016 . OpenUrl PubMed [12]. ↵ Joanna Ida Sulkowska . On folding of entangled proteins: knots, lassos, links and θ-curves . Current opinion in structural biology , 60 : 131 – 141 , 2020 . OpenUrl PubMed [13]. ↵ Neil W Isaacs . Cystine knots . Current opinion in structural biology , 5 ( 3 ): 391 – 395 , 1995 . OpenUrl CrossRef PubMed Web of Science [14]. ↵ Pawel Dabrowski-Tumanski , Pawel Rubach , Dimos Goundaroulis , Julien Dorier , Piotr Sulkowski , Kenneth C Millett , Eric J Rawdon , Andrzej Stasiak , and Joanna I Sulkowska . Knotprot 2.0: a database of proteins with knots and other entangled structures . Nucleic acids research , 47 ( D1 ): D367 – D375 , 2019 . OpenUrl PubMed [15]. ↵ Pawel Dabrowski-Tumanski , Aleksandra I Jarmolinska , Wanda Niemyska , Eric J Rawdon , Kenneth C Millett , and Joanna I Sulkowska . Linkprot: a database collecting information about biological links . Nucleic acids research , page gkw976, 2016 . [16]. ↵ Pawel Dabrowski-Tumanski , Dimos Goundaroulis , Andrzej Stasiak , and Joanna I Sulkowska . θ-curves in proteins . arXiv preprint arXiv: 1908.05919 , 2019 . [17]. ↵ Ellinor Haglund , Anna Pilko , Roy Wollman , Patricia Ann Jennings , and José Nelson Onuchic . Pierced lasso topology controls function in leptin . The Journal of Physical Chemistry B , 121 ( 4 ): 706 – 718 , 2017 . OpenUrl PubMed [18]. ↵ Claudio Perego and Raffaello Potestio . Searching the optimal folding routes of a complex lasso protein . Biophysical journal , 117 ( 2 ): 214 – 228 , 2019 . OpenUrl PubMed [19]. ↵ Fernando Bruno da Silva , Jennifer M Simien , Rafael G Viegas , Ellinor Haglund , and Vitor Barbanti Pereira Leite . Exploring the folding landscape of leptin: Insights into threading pathways . Journal of Structural Biology , 216 ( 1 ): 108054 , 2024 . OpenUrl PubMed [20]. ↵ Maarten A Brems , Robert Runkel , Todd O Yeates , and Peter Virnau . Alphafold predicts the most complex protein knot and composite protein knots . Protein Science , 31 ( 8 ): e4380 , 2022 . OpenUrl PubMed [21]. ↵ Agata P Perlinska , Wanda H Niemyska , Bartosz A Gren , Marek Bukowicki , Szymon Nowakowski , Pawel Rubach , and Joanna I Sulkowska . Alphafold predicts novel human proteins with knots . Protein Science , 32 ( 5 ): e4631 , 2023 . OpenUrl PubMed [22]. ↵ Agata P Perlinska , Maciej Sikora , and Joanna I Sulkowska . Everything alphafold tells us about protein knots . Journal of Molecular Biology , 436 ( 19 ): 168715 , 2024 . OpenUrl PubMed [23]. ↵ Yingnan Hou , Tengyu Xie , Liuqing He , Liang Tao , and Jing Huang . Topological links in predicted protein complex structures reveal limitations of alphafold . Communications Biology , 6 ( 1 ): 1098 , 2023 . OpenUrl PubMed [24]. ↵ Agata P Perlinska , Mai Lan Nguyen , Smita P Pilla , Emilia Staszor , Iwona Lewandowska , Agata Bernat , Elžbieta Purta , Rafal Augustyniak , Janusz M Bujnicki , and Joanna I Sulkowska . Are there double knots in proteins? prediction and in vitro verification based on trmd-tm1570 fusion from c. nitroreducens . Frontiers in Molecular Biosciences , 10 : 1223830 , 2024 . OpenUrl PubMed [25]. ↵ Joanna I Sulkowska , Eric J Rawdon , Kenneth C Millet , Jose N Onuchic , and Andrzej Stasiak . Conservation of complex knotting and slipknotting patterns in proteins . Biophysical Journal , 102 ( 3 ): 253a , 2012 . OpenUrl [26]. ↵ Peter Virnau , Leonid A Mirny , and Mehran Kardar . Intricate knots in proteins: Function and evolution . PLoS computational biology , 2 ( 9 ): e122 , 2006 . OpenUrl PubMed [27]. ↵ Raffaello Potestio , Cristian Micheletti , and Henri Orland . Knotted vs. unknotted proteins: evidence of knot-promoting loops . PLoS computational biology , 6 ( 7 ): e1000864 , 2010 . OpenUrl PubMed [28]. ↵ Thomas Christian , Reiko Sakaguchi , Agata P Perlinska , Georges Lahoud , Takuhiro Ito , Erika A Taylor , Shigeyuki Yokoyama , Joanna I Sulkowska , and Ya-Ming Hou . Methyl transfer by substrate signaling from a knotted protein fold . Nature structural & molecular biology , 23 ( 10 ): 941 – 948 , 2016 . OpenUrl PubMed [29]. ↵ Maciej Sikora , Eva Klimentova , Dawid Uchal , Denisa Sramkova , Agata P Perlinska , Mai Lan Nguyen , Marta Korpacz , Roksana Malinowska , Szymon Nowakowski , Pawel Rubach , et al. Knot or not? identifying unknotted proteins in knotted families with sequence-based machine learning model . Protein Science , 33 ( 7 ): e4998 , 2024 . OpenUrl PubMed [30]. ↵ P. Rubach , J. Plonka , B. A. Gren , F. B. da Silva , M. Korpacz , and J. I. Sulkowska . Alphalasso: Lassos in alphafold generated structures . Accessed 11.03.2025 . [31]. ↵ Pawel Rubach , Maciej Sikora , Aleksandra I Jarmolinska , Agata P Perlinska , and Joanna I Sulkowska . Alphaknot 2.0: a web server for the visualization of proteins’ knotting and a database of knotted alphafold-predicted models . Nucleic Acids Research, page gkae443 , 2024 . [32]. ↵ Fernando Bruno da Silva , Iwona Lewandowska , Anna Kluza , Szymon Niewieczerzal , Rafal Augustyniak , and Joanna I Sulkowska . First crystal structure of double knotted protein trmd-tm1570–inside from degradation perspective . bioRxiv , pages 2023 – 03 , 2023 . [33]. ↵ Vasilina Zayats , Maciej Sikora , Agata P Perlinska , Adam Stasiulewicz , Bartosz A Gren , and Joanna I Sulkowska . Conservation of knotted and slipknotted topology in transmembrane transporters . Biophysical Journal , 122 ( 23 ): 4528 – 4541 , 2023 . OpenUrl PubMed [34]. ↵ Josh Abramson , Jonas Adler , Jack Dunger , Richard Evans , Tim Green , Alexander Pritzel , Olaf Ronneberger , Lindsay Willmore , Andrew J Ballard , Joshua Bambrick , et al. Accurate structure prediction of biomolecular interactions with alphafold 3 . Nature , pages 1 – 3 , 2024 . [35]. ↵ Mark R Stam , Etienne GJ Danchin , Corinne Rancurel , Pedro M Coutinho , and Bernard Henrissat . Dividing the large glycoside hydrolase family 13 into subfamilies: towards improved functional annotations of α-amylase-related proteins . Protein Engineering, Design and Selection , 19 ( 12 ): 555 – 562 , 2006 . OpenUrl CrossRef PubMed Web of Science [36]. ↵ Štefan Janeček , Emmanuel Lévêque , Abdel Belarbi , and Bernard Haye . Close evolutionary relatedness of α-amylases from archaea and plants . Journal of molecular evolution , 48 : 421 – 426 , 1999 . OpenUrl CrossRef PubMed Web of Science [37]. ↵ M Machius , G Wiegand , and R Huber . Crystal structure of calcium-depletedbacillus licheniformisα-amylase at 2.2 å resolution . Journal of molecular Biology , 246 ( 4 ): 545 – 559 , 1995 . OpenUrl CrossRef PubMed Web of Science [38]. ↵ RM Van der Kaaij , Š Janeček , MJEC van der Maarel , and L Dijkhuizen . Phylogenetic and biochemical characterization of a novel cluster of intracellular fungal α-amylase enzymes . Microbiology , 153 ( 12 ): 4003 – 4015 , 2007 . OpenUrl CrossRef PubMed [39]. ↵ Sunil Dutt , Abinash Mohapatra , Shashi Pandey , and Vikas Tyagi . A decade update on the promiscuity of α-amylase in organic synthesis . Tetrahedron , page 133905 , 2024 . [40]. ↵ Jong-Tae Park , Hyung-Nam Song , Tae-Yang Jung , Myoung-Hee Lee , Sung-Goo Park , Eui-Jeon Woo , and Kwan-Hwa Park . A novel domain arrangement in a monomeric cyclodextrin-hydrolyzing enzyme from the hyperthermophile pyrococcus furiosus . Biochimica et Biophysica Acta (BBA)-Proteins and Proteomics , 1834 ( 1 ): 380 – 386 , 2013 . OpenUrl [41]. ↵ Tae-Yang Jung , Dan Li , Jong-Tae Park , Se-Mi Yoon , Phuong Lan Tran , Byung-Ha Oh , Štefan Janeček , Sung Goo Park , Eui-Jeon Woo , and Kwan-Hwa Park . Association of novel domain in active site of archaic hyperthermophilic maltogenic amylase from staphylothermus marinus . Journal of Biological Chemistry , 287 ( 11 ): 7979 – 7989 , 2012 . OpenUrl Abstract / FREE Full Text [42]. ↵ C Götting , J Kuhn , and K Kleesiek . Human xylosyltransferases in health and disease . Cellular and molecular life sciences , 64 : 1498 – 1517 , 2007 . OpenUrl CrossRef PubMed Web of Science [43]. ↵ David C Briggs and Erhard Hohenester . Structural basis for the initiation of glycosaminoglycan biosynthesis by human xylosyltransferase 1 . Structure , 26 ( 6 ): 801 – 809 , 2018 . OpenUrl [44]. ↵ Aleksandra I Jarmolinska , Agata P Perlinska , Robert Runkel , Benjamin Trefz , Helen M Ginn , Peter Virnau , and Joanna I Sulkowska . Proteins’ knotty problems . Journal of molecular biology , 431 ( 2 ): 244 – 257 , 2019 . OpenUrl PubMed [45]. ↵ Jing Xue , Weizhong Zeng , Yan Han , Scott John , Michela Ottolia , and Youxing Jiang . Structural mechanisms of the human cardiac sodium-calcium exchanger ncx1 . Nature communications , 14 ( 1 ): 6181 , 2023 . OpenUrl PubMed [46]. ↵ Jeffrey K Noel , Mariana Levi , Mohit Raghunathan , Heiko Lammert , Ryan L Hayes , José N Onuchic , and Paul C Whitford . Smog 2: a versatile software package for generating structure-based models . PLoS computational biology , 12 ( 3 ): e1004794 , 2016 . OpenUrl PubMed [47]. ↵ Szymon Niewieczerzal and Joanna I Sulkowska . Supercoiling in a protein increases its stability . Physical review letters , 123 ( 13 ): 138102 , 2019 . OpenUrl PubMed [48]. ↵ Pawel Dabrowski-Tumanski , Pawel Rubach , Wanda Niemyska , Bartosz Ambrozy Gren , and Joanna Ida Sulkowska . Topoly: Python package to analyze topology of polymers . Briefings in Bioinformatics , 22 ( 3 ): bbaa196 , 2021 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted March 22, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Universe of Lasso Proteins: Exploring the limit of entanglement and folding landscape of proteins predicted by AlphaFold Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Universe of Lasso Proteins: Exploring the limit of entanglement and folding landscape of proteins predicted by AlphaFold Fernando Bruno da Silva , Agata P. Perlinska , Jacek Płonka , Erica Flapan , Joanna I. Sulkowska bioRxiv 2025.03.21.644650; doi: https://doi.org/10.1101/2025.03.21.644650 Share This Article: Copy Citation Tools Universe of Lasso Proteins: Exploring the limit of entanglement and folding landscape of proteins predicted by AlphaFold Fernando Bruno da Silva , Agata P. Perlinska , Jacek Płonka , Erica Flapan , Joanna I. Sulkowska bioRxiv 2025.03.21.644650; doi: https://doi.org/10.1101/2025.03.21.644650 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Biophysics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17697) Bioengineering (13894) Bioinformatics (41951) Biophysics (21456) Cancer Biology (18594) Cell Biology (25515) Clinical Trials (138) Developmental Biology (13380) Ecology (19903) Epidemiology (2067) Evolutionary Biology (24322) Genetics (15612) Genomics (22510) Immunology (17737) Microbiology (40400) Molecular Biology (17183) Neuroscience (88619) Paleontology (667) Pathology (2833) Pharmacology and Toxicology (4825) Physiology (7644) Plant Biology (15158) Scientific Communication and Education (2046) Synthetic Biology (4296) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00