Full text
67,092 characters
· extracted from
preprint-html
· click to expand
Functional and structural characterization of AtAbf43C: An exo-1,5-⍺-L-arabinofuranosidase from Acetivibrio thermocellus DSM1313 | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Functional and structural characterization of AtAbf43C: An exo-1,5-⍺-L-arabinofuranosidase from Acetivibrio thermocellus DSM1313 View ORCID Profile Joey L. Galindo , Philip D. Jeffrey , Angela Zhu , View ORCID Profile A. James Link , View ORCID Profile Jonathan M. Conway doi: https://doi.org/10.1101/2025.04.06.647456 Joey L. Galindo 1 Department of Chemical and Biological Engineering, Princeton University , Princeton, NJ 08544, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Joey L. Galindo Philip D. Jeffrey 2 Department of Molecular Biology, Princeton University , Princeton, NJ 08544, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Angela Zhu 1 Department of Chemical and Biological Engineering, Princeton University , Princeton, NJ 08544, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site A. James Link 1 Department of Chemical and Biological Engineering, Princeton University , Princeton, NJ 08544, USA 2 Department of Molecular Biology, Princeton University , Princeton, NJ 08544, USA 3 Department of Chemistry, Princeton University , Princeton, NJ 08544, USA 4 Omenn-Darling Bioengineering Institute, Princeton University , Princeton, NJ 08544, USA 5 Andlinger Center for Energy and the Environment, Princeton University , Princeton, NJ 08544, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for A. James Link Jonathan M. Conway 1 Department of Chemical and Biological Engineering, Princeton University , Princeton, NJ 08544, USA 2 Department of Molecular Biology, Princeton University , Princeton, NJ 08544, USA 4 Omenn-Darling Bioengineering Institute, Princeton University , Princeton, NJ 08544, USA 5 Andlinger Center for Energy and the Environment, Princeton University , Princeton, NJ 08544, USA 6 High Meadows Environmental Institute, Princeton University , Princeton, NJ 08544, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jonathan M. Conway For correspondence: jmconway{at}princeton.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract The Acetivibrio thermocellus DSM1313 genome codes for seven predicted glycoside hydrolase family 43 (GH43) enzymes, four of which remain uncharacterized. This study describes the function and structure of one such enzyme, AtAbf43C, from GH43 subfamily 26 (GH43_26) which acts as an ⍺-L-arabinofuranosidase (EC 3.2.1.55). AtAbf43C is active on para-nitrophenol-⍺-L-arabinofuranoside (pNPAra), with optimal activity observed at pH 5.5 and 65 ℃. Multiple crystal structures of AtAbf43C were obtained, in which an N-terminal carbohydrate binding module family 42 (CBM42) domain displays a ß-trefoil type fold and the C-terminal GH43 domain displays a canonical 5-bladed ß-propeller motif. One structure, which was solved with two L-arabinofuranose molecules bound to ß- and ɣ-subdomains of the CBM42, builds upon previous literature suggesting the ⍺-binding pocket of the AtAbf43C CBM42 is non-functional. Furthermore, structural alignment with the substrate bound structure of a closely homologous GH43_26 exo-⍺-1,5-arabinofuranosidase, SaAraf43A from Streptomyces avermitilis (PDB 3AKH), allowed for identification of the conserved catalytic triad via site-directed mutagenesis in AtAbf43C, as well as insight into the deep-narrow topology of the AtAbf43C binding pocket that suggested it would be active on similar arabino-oligosaccharide (AOS) substrates as SaAraf43A. Subsequent liquid chromatography-mass spectrometry (LC-MS) analysis of polysaccharides and oligosaccharides hydrolyzed by AtAbf43C provides experimental evidence confirming this enzyme acts in an exo manner primarily towards ⍺-1,5 linked arabino-oligosaccharides. Introduction Plant cell walls are comprised of large biopolymers, primarily cellulose, hemicellulose and lignin, which form the basis for lignocellulosic biomass ( 1 – 4 ). Efficient degradation of the various components of lignocellulose has important applications in many bioprocesses, most notably for the sustainable bioproduction of fuels and chemicals from renewable lignocellulosic feedstocks, but also in commercial food and beverage processing ( 2 , 3 , 5 ). Complete deconstruction of lignocellulose requires a diverse array of Carbohydrate Active enZymes (CAZymes) ( 3 – 8 ). The domains from these CAZymes have been categorized into various families by homology in the CAZy database ( 7 ). Glycoside hydrolase (GH) domains, which cleave O -glycosidic bonds, are the dominant catalytic players in lignocellulose degradation ( 4 , 6 – 9 ). Additionally, Carbohydrate Binding Module (CBM) domains are a type of non-catalytic domain, commonly encoded in the same protein as catalytic CAZy domains, that bind to carbohydrate substrates and heavily influence enzymatic activity and specificity ( 4 , 6 – 9 ). ⍺-L-arabinofuranosidases (EC 3.2.1.55), a type of GH enzyme which hydrolyze terminal non-reducing ⍺-1,2, ⍺-1,3, or ⍺-1,5 linked arabinofuranose residues, play an important role in the degradation of arabinose-containing hemicelluloses such as arabinoxylan, arabinan, and arabinogalactan ( 3 – 5 , 10 ). Owing to the diversity of these polysaccharides, ⍺-L-arabinofuranosidases vary widely in their specificities towards a given substrate. Some ⍺-L-arabinofuranosidases are only active on small substrates such as short arabino-oligosaccharides (AOS) or arabino-xylo-oligosaccharides (AXOS) as well as the synthetic compound para-nitrophenol-α-L-arabinofuranoside (pNPAra), while other ⍺-L-arabinofuranosidases are active primarily on large polysaccharides like arabinan or arabinoxylan ( 3 , 5 , 10 , 11 ). Additionally, a subset of this latter category of ⍺-L-arabinofuranosidases, also known as arabinoxylan arabinofuranohydrolases, specifically cleave arabinofuranosyl residues from arabinoxylans ( 3 , 5 , 10 , 11 ). ⍺-L-arabinofuranosidases can be found across several GH families including GH2, 3, 43, 51, 54, 62, and 159 ( 3 , 5 ). The large GH family 43, which was recently divided into 37 subfamilies, commonly contains galactan 1,3-β-galactosidases (EC 3.2.1.145), β-xylosidases (EC 3.2.1.37), and endo-⍺-L arabinanases (EC 3.2.1.99), in addition to ⍺-L-arabinofuranosidases ( 8 , 12 , 13 ). Several families of CBMs are also commonly found associated with GH43 ⍺-L-arabinofuranosidases, most notably CBM6 and 42 ( 8 , 12 – 14 ). CBM6 domains commonly bind directly to polysaccharides like xylan and amorphous cellulose, while CBM42 domains typically recognize small arabino-oligosaccharides or the terminal non-reducing arabinofuranosyl residues of arabinan or arabinoxylan ( 8 , 13 , 14 ). Acetivibrio thermocellus (basionym: Clostridium thermocellum ) is a thermophilic Gram-positive obligate anaerobic bacterium that has been heavily studied for its ability to efficiently break down lignocellulose, by natively producing a variety of cellulose and hemicellulose degrading enzymes ( 2 , 4 , 12 , 15 ). To date three ⍺-L-arabinofuranosidases from A. thermocellus have been characterized including an intracellular GH51 family ⍺-L-arabinofuranosidase active towards AOS and AXOS, and two extracellular cellulosomal GH43 ⍺-L-arabinofuranosidases ( 4 , 12 , 16 , 17 ). The first of these GH43 ⍺-L-arabinofuranosidases ( Ct 43Araf; subfamily 16) was most active to arabinoxylan polysaccharides, while the second (AxB8; subfamily 29) acted as a bi-functional β-xylosidase/⍺-L-arabinofuranosidase with primary activity towards small AXOS ( 4 , 12 , 17 ). Furthermore, of the seven predicted GH43 genes in A. thermocellus genome, only three have been characterized including the two aforementioned ⍺-L-arabinofuranosidases as well as a 1,3-β-galactosidase (1,3Gal43A; subfamily 24) ( 4 , 12 , 17 , 18 ). Additionally, three of the four uncharacterized A. thermocellus GH43 proteins contain CBM42 domains ( 4 , 14 ). Previously Ribeiro et al . expressed these three CBM42 domains as isolated truncations, and tested their binding of various natural substrates, finding they most strongly bind arabinoxylan and arabinan ( 14 ). In this work we describe the unique crystal structure and function of a fourth A. thermocellus GH43 ⍺-L-arabinofuranosidase (AtAbf43C; subfamily 26) that acts primarily in an exo manner towards the ⍺-1,5 linkages present in arabinan and smaller AOS substrates rather than those of arabinoxylan and associated AXOS. We solved the crystal structure of the full enzyme and each of its domains including a structure of its CBM42 bound to arabinose. We characterized the optimal activity of AtAbf43C on a variety of substrates and demonstrated that its activity is exo in nature with AOSs as its primary substrate. Taken together our work provides new insight into the structure and function of a thermophilic ⍺-L-arabinofuranosidase. Results Sequence Analysis and Diversity of Characterized GH43_26 Enzymes The gene encoding the AtAbf43C protein in A. thermocellus DSM1313 (Locus Tag: Clo1313_2794 ; GenBank Protein accession #: ADU75776.1) has an open reading frame of 1743 bp and is identical in sequence to the Cthe_2138 gene in A. thermocellus ATCC27405. The predicted 580 amino acid protein consists of a N-terminal signal peptide (residues 1-19), a CBM42 domain (residues 29-160), a GH43 domain (residues 179-488), and a C-terminal dockerin I domain (residues 511-568) ( Fig. 1a ). This suggested Clo1313_2794 likely codes for a secreted enzyme that is part of the extracellular A. thermocellus cellulosome. The top non-identical BLASTp hits (67-93% identity) to AtAbf43C are comprised of almost all GH43 enzymes from other Acetivibrio species, many of which are predicted to be putative β-xylosidases or ⍺-L-arabinofuranosidases (Table S1). Ten bacterial GH43_26 enzymes (Table S2) cataloged in the CAZy database have previously been characterized ( 19 – 25 ). All of these GH43_26 enzymes are ⍺-1,5-arabinofuranosidases active towards AOS or arabinan. Individually these enzymes have 49-63% identity to AtAbf43C and a phylogenetic tree constructed after a multiple protein sequence alignment shows their diversity ( Fig. 1b ; Table S2). Download figure Open in new tab Figure 1. (a) The general architecture of the Clo1313_2794 gene. (b) A phylogenetic tree constructed after a multiple protein sequence alignment using ClustalOmega of the GenBank protein ascension # (ADU75776.1) corresponding to Clo1313_2794 (★) with the other characterized members of the GH43_26 subfamily. (c ) Protein gel of the purified full length AtAbf43C protein and its truncated versions: CtAbd43C_CBM42 and AtAbf43C_GH43. AtAbf43C Enzymatic Activity To determine the biochemical properties of AtAbf43C, recombinant AtAbf43C (Residues 21-511, lacking the signal peptide and dockerin I domain) and truncation mutants AtAbf43C_CBM42 (Residues 21-167) and AtAbf43C_GH43 (Residues 167-494) were expressed and purified ( Fig. 1a & c; Table S3 ). Using para -nitrophenol-⍺-L-arabinofuranoside (pNPAra) as the substrate, we investigated the optimal reaction conditions for AtAbf43C. AtAbf43C showed optimal activity at pH 5.5 and 65 ℃, though the enzyme retained >75% of its maximum activity within range of pH 5-6.5 and a temperature of 55-70 ℃ ( Fig. 2a & b ). When incubated at elevated temperatures, AtAbf43C retains at least 46-48% of its initial activity and at least 28% after 6 hours when incubated at 55-60 ℃ and 65 ℃ respectively ( Fig. 2c ). This is consistent with the results of the temperature optimization assay in which enzyme activity begins to dimmish at 70 ℃ and above ( Fig. 2b ). The effects of additives on the activity of AtAbf43C were also tested ( Fig. 2d ). Some divalent ions, including Ca 2+ , Mg 2+ , and Co 2+ , appeared to have a positive effect on enzymatic activity, with Ca 2+ addition having the largest effect. In contrast, Zn 2+ and Cu 2+ caused a significant decrease in activity. The effects observed with other additives were not statistically significant (p-value ≤ 0.05) ( Table S4 ). Download figure Open in new tab Figure 2. Effect of environmental conditions on AtAbf43C activity on pNPAra. Error bars represent standard deviations between triplicate technical replicates at each reaction condition. ( a) pH optimization performed at 55°C. (b) Temperature optimization performed at pH 5.5. (c) Thermostability test of CtAbf34C. (d) Effect of various 10 mM additives on enzymatic activity. Asterisks indicate statistical significance (P≤0.05) compared with the untreated condition. Substrate Specificity At the optimal pH and temperature AtAbf43C and AtAbf43C_GH43 had specific activities of 4.95±0.28 U/mg and 5.67±0.63 U/mg respectively on pNPAra, while AtAbf43C_CBM42 showed no activity on pNPAra, with U defined as mM/s of released para -nitrophenol (pNP) ( Table S5a ). This confirmed that AtAbf43C_GH43 contained the active catalytic domain. In addition to pNPAra, the activity of AtAbf43C and its truncated versions were tested on several other substrates. These included natural substrates: wheat arabinoxylan (WAX), beechwood xylan (BX), and sugar beet arabinan (SBA), which were tested using the dinitrosalicylic acid (DNS) reducing assay; as well as three additional synthetic pNP glycosides: para-nitrophenol-ß-D-xylopyranoside (pNPXy), para-nitrophenol-⍺-D-galactopyranoside (pNP⍺Gal), and para-nitrophenol-ß-D-galactopyranoside (pNPßGal). However, no activity on any of these other substrates was detected ( Table S5a & b ). Kinetic parameters were then determined at optimal conditions on pNPAra for AtAbf43C and AtAbf43C_GH43 ( Table 1 , Table S6 ). Full substrate saturation could not be achieved for either protein, as the K m values were so high as to approach the solubility limit of pNPAra in aqueous solution. View this table: View inline View popup Download powerpoint Table 1. Kinetic parameters determined for AtAbf43C and AtAbf43C_GH43 on pNPAra at optimal conditions. Crystal Structure and Mutagenesis Study of AtAbf43C Crystal structures were obtained for the full-length AtAbf43C protein (PDB code: 9NXG) at a resolution of 1.32Å as well as individual domains AtAbf43C_CBM42 (PDB code: 9NXI) and AtAbf43C_GH43 (PDB code: 9NXJ) at 1.75 Å and 2.32Å respectively. Additionally, a 1.75Å crystal structure (PDB code: 9NXH) was solved for AtAbf43C soaked in L-arabinose immediately prior to freezing and mounting in which two L-arabinofuranose molecules were found bound to the CBM42 domain. A summary of refinement statistics for all 4 structures can be found in Table 2 . View this table: View inline View popup Download powerpoint Table 2. Summary of data and refinement statistics for the structures of AtAbf43C. The full length AtAbf43C arabinose-soaked structure (PDB 9NXH) consists of the smaller N-terminal CBM42 domain connected via a short 19 amino acid linker to the larger C-terminal catalytic GH43 domain ( Fig. 3a ). In addition to the two arabinose molecules bound to the CBM42 domain, the structure also contains three glycerol molecules, and a central magnesium ion in the GH43 domain. The catalytic GH43 domain displays the 5-bladed ß-propeller fold typical of GH43 enzymes ( 11 , 13 , 24 – 30 ) ( Fig. 3b ). The GH43 domain of AtAbf43C does not have a C-terminal ß-jelly roll domain found in some subfamilies of GH43 proteins, however, the N-terminal strand of the domain appears to form part of the 5 th blade in the blade V structure in what is colloquially termed a “molecular velcro” ( 11 , 25 , 26 , 28 , 30 ) ( Fig. 3b ). This closure in the structure, thought to provide extra structural stability, is not found in many GH43 enzymes in which the 5 th blade in the beta propeller only consists of residues found in the C-terminal strand ( 11 , 27 , 28 , 30 ). Download figure Open in new tab Figure 3. The major structural features of AtAbf43C shown using the arabinose-soaked structure (PDB code: 9NXH). Magnesium (Mg 2+ ) is shown as a purple sphere, glycerol molecules are shown as green stick structures, and arabinose molecules are shown as dark grey stick structures. (a) The overall structure of AtAbf43C with the GH43 domain shown in yellow and the CBM42 domain shown in red. (b) The 5-bladed ß-propeller structure of the GH43 domain, with each blade shown in a distinct color. (c) The 3 catalytic residues labeled and shown as stick structures within the GH43 structure. (d) The ß-trefoil structure of the CBM42 domain, with each subdomain shown in a distinct color. (e) Arabinose bound in the ß-pocket of the CBM42 domain (f) Arabinose bound in the ɣ-pocket of the CBM42 domain. Contacted residues in (e & f) are labeled and shown as stick structures, Hydrogen bonds between the residues and arabinose molecule bonds are shown as dashed cyan lines, with bond lengths labeled in Å. GH43 enzymes operate via an inverting mechanism in which three catalytic residues are highly conserved: aspartate acting as a base, a glutamate acting as an acid, and a second aspartate acting as a pKa modulator ( 9 , 13 , 24 – 30 ). In AtAbf43C these residues were identified to be D168, D283, and E344, found closely grouped at the base of the beta propeller structure ( Fig. 3c .) To demonstrate the importance of these residues to catalytic activity, site-directed mutagenesis was used to generate versions of AtAbf43C (AtAbf43C_D168A, AtAbf43C_D283A, AtAbf43C_E344A, and AtAbf43C_H408A) in which each of the three active site residues, as well as histidine (H408) initially thought to interact with the magnesium ion, were individually mutated to alanine (Fig. S1a & b). When purified and tested alongside wild-type AtAbf43C, activity on pNPAra was completely eliminated in any of the versions of AtAbf43C where one of the three catalytic residues (D168, D283, and E344) was mutated to alanine (Fig. S1b; Table S7), indicating these sites are critical to enzymatic function. The H408A mutant appeared to have a small and statistically significant (p-value ≤ 0.05) increase in activity relative to the wild-type enzyme, suggesting this histidine residue is not critical for activity (Table S7). The CBM42 domain of AtAbf43C displays the typical ß-trefoil structure found in other CBM42 and similar CBM13 family proteins consisting of three 40-50 amino acid subdomains (⍺, ß, and ɣ), each of which harbor a potential sugar binding pocket ( Fig. 3d ) ( 13 , 14 , 24 ). However, it has been observed in other CBM42 proteins that one of these three pockets may become nonfunctional ( 13 , 14 , 24 ). In the solved structure of arabinose-soaked Atabf43C_CBM42 (PDB 9NXH), arabinose was only found bound in the ß and ɣ pockets of the CBM42 domain ( Fig. 3d ). In the ß pocket the arabinose molecule formed hydrogen bonds with three residues, Y73, H70, and D89 ( Fig. 3e ). Similarly, the arabinose in the ɣ pocket contacted Y121, H118, and D136, as well as an additional residue N120 ( Fig. 3f ). In the previous study of this CBM42 by Ribeiro et al ., residues D39, D91, and D136 were individually altered to alanine ( 14 ). While versions with the D91A and D136A substitutions showed significantly decreased binding affinity for arabinoxylan and arabinose relative to the wild-type protein, the D39A mutation, which corresponds to the ⍺ pocket, did not significantly affect binding ( 14 ). The binding pattern of arabinose observed in our structure would thus appear to support that the ⍺ pocket is non-functional or does not contribute to arabinose binding in the Atabf43C CBM42 domain. Of the ten previously characterized GH43_26 enzymes ( Fig. 1b ; Table S2), SaAraf43A from the bacterium Streptomyces avermitilis is the only GH43_26 enzyme other than AtAbf43C with a crystal structure containing both the CBM42 and GH43 domains. Previously, SaAraf43A was extensively characterized by Ichinose et al . and Fujimoto et al ., including multiple crystal structures (PDB: 3AKF, 3AKG, 3AKH, 3AKI) of the full-length protein complexed with various substrates, and a mutagenesis study identifying its catalytic residues ( 24 , 31 ). SaAraf43A is an exo-1,5-⍺-L-arabinofuranosidase, with activity primarily towards AOSs such as arabinotriose, arabinotetraose, and arabinopentose ( 24 , 31 ). Like AtAbf43C, SaAraf43A contains both a GH43 and CBM42 domain, but in SaAraf43C the GH43 domain resides at the N-terminus of the protein followed by a C-terminal CBM42 ( 24 ). Interestingly, despite the reverse ordering of these domains between SaAraf43A and AtAbf43C, the individual GH43 and CBM42 domains closely align. Structural alignment of the GH43 and CBM42 domains in AtAbf43C individually to the structure of arabinotriose complexed structure of SaAraf43A (PDB code: 3AKH) results in an RMSD of 0.475 Å (269 common C⍺ atoms) and 0.532 Å (111 common C⍺ atoms) respectively. ( Fig. 4a ; Fig S2a). As such, residues D168, D283, and E344 in the GH43 domain AtAbf43C closely aligned with corresponding catalytic residues D20, D135, and E196 in SaAraf43A ( Fig. 4b ), which when individually changed to alanine by Fujimoto et al . eliminated enzymatic activity on pNPAra ( 24 ). Furthermore, this structure of SaAraf43A contained an arabinose and arabinobiose molecule within the binding pocket of its GH43 domain. The topology of this binding pocket, characteristic of exo-acting GH43 enzymes, is such that the three catalytic residues are positioned at the bottom of a deep, narrow opening that sterically limits access to larger or branched substrates ( 24 ). This is opposed to endo-acting GH43s that are active on polysaccharides like arabinoxylan and arabinan, which possess a much more exposed binding cleft ( 24 – 28 , 30 ). This narrowed binding pocket results from an extended loop structure in the 5 th blade of the beta propeller, which AtAbf43C appears to possess ( Fig. 3a & b) ( 24 , 25 ). When overlaid with the apo structure of the AtAbf43C GH43 domain, the bound arabinose and arabinobiose from SaAraf43A fit neatly within the surface structure of the AtAbf43C binding pocket ( Fig. 4c ), suggesting AtAbf43C maybe be active on similar AOS substrates as SaAraf43A. The binding domain of AtAbf43C also closely aligned with the CBM42 domain in SaAraf43A (Fig. S2a). However, while both structures had sugars in their ß-subdomains, the SaAraf43A CBM42 had an arabinobiose bound in its ⍺-pocket, and an unliganded ɣ-pocket (Fig. S2b-d). Furthermore, alignment of binding residues in the ⍺-pocket differed significantly between the two proteins, with AtAbf43C possessing a proline and two glutamines at positions where SaAraf43A possesses glutamine, histidine, and aspartate residues respectively (Fig. S2b). These differences could account for a non-functional ⍺-binding pocket in the AtAbf43C CBM42. Download figure Open in new tab Figure 4. (a) Structural alignment of the GH43 domain of arabinose soaked AtAbf43C structure (GH43 domain in yellow, CBM42 domain in red) with the structure of SaAraf43A complexed with arabinotriose (PDB Code: 3AKH) shown in light blue. (b) Alignment of the catalytic residues in SaAraf43A (light blue) and co-crystalized arabinose and arabinobiose molecules in SaAraf43A structure with the active site residues of AtAbf43C (yellow). (c) Superimposition of the arabinose and arabinobiose molecules from the SaAraf43A structure onto the surface structure of AtAbf43C, with the AtAbf43C catalytic residues shown in yellow. In (a-c) sodium (Na + ) is shown as an orange sphere, chlorine (Cl - ) is shown as a green sphere, glycerol molecules are shown as green stick structures, and sugar molecules are shown as dark grey stick structures. LC-MS Analysis of Natural Polysaccharide and Oligosaccharide Hydrolysis Finally, using liquid chromatography-mass spectrometry (LC-MS), the activity of AtAbf43C at optimal conditions was tested on the three natural substrates used previously (WAX, BX, SBA), as well as the following oligosaccharides: arabinobiose (A2), arabinotriose (A3), arabinotetraose (A4), arabinopentose (A5), 2 3 -α-L-arabinofuranosyl-xylotriose (A 2 XXX), and 3 3 -α-L-arabinofuranosyl-xylotetraose (XA 3 XXX). Based on these results, AtAbf43C appears to be primarily active towards α-1,5 linked AOS and to a lesser extent on arabinan where it seems to act in an exo manner to release free arabinose ( Table 3 ; Figure S3 ). While marginal activity was detected on WAX, no significant activity was detected on BX or the AXOS tested, suggesting AtAbf43C does not degrade xylan-based substrates ( Table 3 ). View this table: View inline View popup Download powerpoint Table 3. Summary of results from LC-MS analysis of AtAbf43C hydrolysis of various natural substrates and oligosaccharides. Mass spectra produced from the LC-MS based experiment and used for subsequent analysis can be found in the supporting information ( Fig. S3 ). Discussion AtAbf43C is the fourth GH43 enzyme to be characterized from A. thermocellus , and only the third with an experimentally-determined structure ( 12 , 17 , 18 , 29 , 30 ). Furthermore, AtAbf43C is the first enzyme in the GH43_26 subfamily from A. thermocellus , and only the second crystal structure of a GH43_26 enzyme that includes the CBM domain ( 12 , 17 , 18 , 24 , 25 ) ( Table S2 ). Natively AtAbf43C is likely an extracellular cellulosomal enzyme based on the presence of a N-terminal signal peptide and C-terminal dockerin I domain ( 4 , 12 , 17 , 18 ). Characterization of the activity of AtAbf43C shows that it is an ⍺-arabinofuranosidase with activity towards pNPAra. The optimal pH and temperature for AtAbf43C on pNPAra (pH 5.5 and 65°C, Fig 2a & b ) is generally consistent with other characterized A. thermocellus enzymes ( 4 , 12 , 17 ). Kinetic parameters determined on pNPAra show AtAbf43C had very high K m as to approach the solubility limit of the substrate ( Table 1 ), a result which was observed in the previously characterized A. thermocellus GH43 enzyme AxB8, an ⍺-arabinofuranosidase that was also primarily active on oligosaccharides ( 12 ). The larger k cat / K m value observed in the full length AtAbf43C protein versus the AtAbf43C_GH43 domain would suggest the CBM42 domain may aid in substrate specificity to pNPAra, however this is not definitive due to an inability to achieve substrate saturation due to the solubility limit of pNPAra. AtAbf43C was not active on the other pNP glycosides tested ( Table S5a ), indicating it acts primarily as an ⍺-arabinofuranosidase and does not have secondary function as a ß-xylosidase as has been reported for other GH43 enzymes ( 3 – 5 , 12 ). Meanwhile, testing on natural hemicellulose substrates (WAX, BX, and SBA) using the DNS assay suggested that AtAbf43C was inactive towards xylan and arabinan polysaccharides ( Table S5b ). Solution of multiple AtAbf43C crystal structures provided some insight into the enzyme’s preferred substrate and mode of action. First, the arabinose bound structure of full length AtAbf43C builds upon previous work by Riberio et al ., by providing structural evidence for a non-functional ⍺-binding pocket, with arabinose only bound in the ß- and ɣ-subdomains ( 14 ). Subsequent alignment with the homologous CBM42 structure in SaAraf43A further supports this, as significant differences in corresponding binding residues were observed between the two proteins. Next, alignment of the GH43 domain in AtAbf43C with SaAraf43A allowed for identification of its three conserved active site residues in AtAbf43C (D168, D283, and E344), and insight into the binding modality of its binding pocket ( 24 ). Mutation of these catalytic residues in AtAbf43C confirms their involvement in its activity with all single point mutants losing activity ( Fig. S1, Table S7 ). The deep-narrow topology of this pocket ( Fig 3c ), which limits access to the active site, is very similar to that of SaAraf43A ( Fig. 4b & c ), which acts in an exo manner towards ⍺-1,5-linked AOS. Subsequent LC-MS analysis of the hydrolysis products of AtAbf43C on natural substrates shows that this enzyme is capable of liberating some arabinose from SBA and, to a lesser extent, from WAX ( Table 3 , Fig S3 ), though notably not enough to be detected by the DNS assay, indicating that it likely acts in an exo manner at the ends of polysaccharides. LC-MS analysis of oligosaccharide hydrolysis ( Table 3 , Fig S3 ) showed AtAbf43C is active towards ⍺-1,5-linked AOS but not AXOS. This is consistent with activities observed in other members of the GH43_26 subfamily, which all act as arabinan- or AOS-degrading ⍺-1,5-arabinofuranosidases ( Table S2 ). Taken together, we demonstrate through structural and functional characterization that AtAbf43C is an active arabinofuranosidase, specialized in degrading AOSs to arabinose in an exo manner. AtAbf43C from A. thermocellus is the most thermophilic GH43_26 enzyme characterized to date, thus deepening our understanding of this important subfamily of arabinofuranosidase enzyme. Experimental Procedures Sequence and Phylogenetic Analysis The nucleotide and protein sequences for AtAbf43C (Locus tag: Clo1313_2794 , GenBank Protein accession: ADU75776.1) as well as its predicted domains were found using the CAZy database and the National Center for Biotechnology Information (NCBI) database from the A. thermocellus DSM1313 genome ( 7 , 32 ). The N-terminal signal peptide was predicted using the SignalP 6.0 software ( 33 ). The protein-protein BLAST search and pairwise alignment was conducted using the full amino acid sequence for Clo1313_2794 using the NCBI blastp tool. Characterized GH43 subfamily 26 proteins were found as annotated in the CAZy database, and their protein sequences were obtained using their primary ascension number in the NCBI database. Multiple sequence alignment of these proteins was performed using the ClustalOmega algorithm from which the phylogenetic tree was constructed using the IQTREE webserver and visualized using the Interactive Tree of Life (iTOL) online tool ( 34 – 36 ). Cloning of AtAbf43C A table of oligonucleotide primers used to construct the plasmid in this study can be found in the supporting information ( Table S8) . The gene for AtAbf43C was amplified from A. thermocellus DSM1313 genomic DNA purchased from the Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures via PCR ( Table S8; Primers JLG005-006). Amplification removed a predicted N-terminal signal peptide and C-terminal dockerin I domain from the Clo1313_2794 gene encoding the AtAbf43C module while adding overlap regions for subsequent Gibson assembly into the pET28b(+) expression vector (EMD Millipore) that added a C-terminal -LEHHHHHH purification tag. Truncated versions of AtAbf43C (AtAbf43C_CBM42 and AtAbf43C_GH43) were amplified from this resulting vector with overlaps for Gibson assembly into the pET28b(+) vector ( Table S8 ; Primers JLG005 & JLG036-38). Site-directed mutagenesis of AtAbf43C was carried out via PCR amplification of the full-length expression plasmid using mutagenic primers which added overlap regions for subsequent recirculation via Gibson assembly ( Table S8 ; Primers JLG195-202). Gibson Assemblies were performed by incubating amplified DNA fragments at 50 ℃ for 1 hour with NEBuilder HiFi DNA Assembly Master mix (New England Biosciences) as per the manufacturer’s recommended protocol. Bacterial Strains and Culture Conditions Plasmids were cloned in NEB chemically competent Escherichia coli DH5⍺ (New England Biolabs). Plasmids were isolated using ZymoPURE miniprep kits (Zymo Research), and plasmid sequences were confirmed by sequencing (Azenta Genewiz). Sequenced confirmed plasmids were then transformed into chemically competent E. coli BL21 (DE3) pRosetta2 (EMD Millipore) for protein expression. E. coli cultures were maintained in enriched Luria-Bertani (LB) Medium (24 g/L yeast extract, 10 g/L tryptone, 5 g/L NaCl) or LB medium (5 g/L yeast extract, 10 g/L tryptone, 5 g/L NaCl, 15 g/L agar) (1.5% w/v) agar plates with 50 µg/ml kanamycin (IBI Scientific), or 50 µg/ml kanamycin (IBI Scientific) and 33 µg/ml chloramphenicol (RPI), as appropriate. Protein Expression and Purification Protein expression was induced by inoculating ZYM-5052 autoinduction media with overnight cultures of the transformed E. coli BL21 DE3 Rosetta strains ( 37 ). Cells were harvested by centrifugation at 6000 x g for 10 minutes after 18-22 hours of growth at 37 ℃ in a shaking incubator at 250 rpm. Cell pellets were resuspended in lysis buffer (20 mM Sodium Phosphate pH 7.4, 500 mM NaCl, 10 mM imidazole) before being lysed via sonication on ice using a Branson SFX 550 Sonifer® in cycles of 10s on at 20 kHz and 10s off for 10 minutes total. This lysate was then centrifuged for 30 minutes at 30,000 x g at 4 ℃, and the resulting supernatant was passed through a 0.22 µm filter. Full length AtAbf43C and its truncated versions were then purified via Immobilized Metal Affinity Chromatography (IMAC) using 5 ml EconoFit IMAC columns (Bio-Rad) on an NGC Chromatography System (Bio-Rad) and fractionated in elution buffer (20 mM Sodium Phosphate pH 7.4, 500 mM NaCl, 250 mM imidazole). Fractions of IMAC purified full length AtAbf43C protein were combined and further purified via size exclusion chromatography via FPLC using a HiLoad TM 26/600 Superdex TM 200 pg. column (Cytiva) in a mobile phase of 50 mM sodium phosphate pH 7.0, 150 mM NaCl buffer. Mutant versions of AtAbf43C and wild type AtAbf43C protein used to test the effect of mutagenesis were purified via immobilized metal ion chromatography using His-Spin Protein Mini Prep Kits (Zymo Research) as per the manufacturer’s instructions. After purifications, all proteins were buffer exchanged into pH 6.0 100 mM sodium phosphate buffer using 30 kDa MWCO Centrifugal Filter Units (CELLTREAT®) or 10 kDa MWCO Spin-X® UF 20 mL Centrifugal Concentrators (Corning®). Purity of the resulting proteins was assessed by SDS-PAGE using 4–20% Mini-PROTEAN® TGX Stain-Free TM Protein Gels (Bio-Rad) with Precision Plus TM unstained protein standards (Bio-Rad). Protein concentration was evaluated by measuring the A280nm of the resulting buffer exchanged proteins using a Nanodrop One spectrophotometer (Thermo Scientific) and calculating protein concentration using Beer’s law and the calculated A280nm extinction coefficient of each individual protein ( 38 – 40 ). Aliquots of full length AtAbf43C were frozen at -80°C prior to further analysis at a protein concentration of 9mg/mL. Substrates Para-nitrophenol-⍺-L-arabinofuranoside (pNPAra) and para-nitrophenol-ß-D-xylopyranoside (pNPXy) were obtained from Sigma-Aldrich and EMD Millipore respectively. para-nitrophenol-⍺-D-galactopyranoside (pNP⍺Gal) and para-nitrophenol-ß-D-galactopyranoside (pNPßGal) were obtained from TCI chemicals. Natural substrates with purities >95% including Wheat Arabinoxylan (WAX), Beechwood Xylan (BX), and Sugar Beet Arabinan (SBA) were obtained from Megazyme (Neogen). Oligosaccharides including Arabinobiose, Arabinotriose, Arabinotetraose, Arabinopentose, 2 3 -α-L-Arabinofuranosyl-xylotriose (A 2 XXX), and 3 3 -α-L-Arabinofuranosyl-xylotetraose (XA 3 XXX) were obtained from Megazyme (Neogen). Arabinose and Xylose were purchased from Fisher Scientific. Temperature and pH optimization Prior to all assays AtAbf43C protein at a final concentration of 9 mg/ml in pH 6 100 mM sodium phosphate buffer was diluted to 0.025 mg/ml in an appropriate reaction buffer, described further below. Reactions were initiated by adding 45 µl of 5 mM pNPAra solution dissolved in appropriate buffer to 5 µl of the 0.025 mg/ml enzyme solution in PCR strip tubes, before immediately being moved to a thermocycler for incubation. After 10 minutes, reactions were stopped with the addition of 100 µl of 1M sodium carbonate. The absorbance at 405 nm of 100 µl of each reaction were measured in a flat-bottomed clear 96 well plate using a BioTek SynergyH1 microplate reader (Agilent). pH optimization was first performed at 55 ℃ with 100 mM sodium acetate buffer used for pH 4-5.5 conditions, and 100 mM sodium phosphate buffer for pH 6-8 conditions. Temperature optimization was then performed at the optimal observed pH of 5.5 in 100 mM sodium acetate buffer. Activity was calculated as a percentage relative to the highest observed activity. All reaction conditions were performed in triplicate. Thermostability and Effect of Additives To test the thermostability of AtAbf43C, protein was first diluted as described previously in pH 5.5 100 mM sodium acetate buffer, before being incubated in a thermocycler at temperatures of 55-70 ℃. Aliquots of protein were then removed at 30-minute, 1-hour, 3-hour, and 6-hour timepoints. Activity was then tested as described above at the optimal observed temperature of 65 ℃ using 5 mM pNPAra dissolved in pH 5.5 100mM sodium acetate buffer. Residual activity was calculated as a percentage relative to that of unincubated AtAbf43C. To test the effects of various salts and chelating agents, AtAbf43C was first diluted in pH 5.5 100 mM sodium acetate buffer containing the additive at 10 mM and pre-incubated at room temperature for 1-hour prior to adding substrate. Activity was then tested as described previously at optimal temperature and pH except the 5 mM pNPAra substrate solutions also contained 10 mM of the specific additive. Additives were as follows: NaCl, CaCl 2 , KCl, MgSO 4 *7H 2 O, MnCl 2 *4H 2 O, ZnSO 4 *7 H 2 O, CuCl2*2H 2 O, FeCl 3 *6H 2 O, CoCl 2 , NiCl 2 *6H 2 O, EDTA tetrasodium dihydrate. Activity was calculated as a percentage relative to AtAbf43C incubated and tested with no additive. Additionally, the absorbance at 405nm measured from blank solutions containing additives were subtracted from those observed in the corresponding enzymatic reaction conditions to control for the variation in absorbance due to the addition of the specific metal salt or chelating agent. All reaction conditions were performed in triplicate. Statistical significance of additive effects was determined by running a Brown-Forsythe and Welch ANOVA test on the collected data in GraphPad Prism version 10.0 (GraphPad Software, Boston, Massachusetts USA). Substrate Specificity Activity against pNP-Glycosides (pNPAra, pNPXy, pNP⍺Gal, and pNPßGal) was tested at optimal pH and temperature as described above for pNPAra on AtAbf43C, AtAbf43C_GH43, and AtAbf43C_CBM42. 4-nitrophenol (pNP) released was quantified via Beer’s law with a 405 nm extinction coefficient of 18500 L/(mol*cm) and path length calculated empirically as per the manufacturer’s recommendation ( 41 , 42 ). Specific activity was then calculated as U/mg of protein, where U is defined as µM/s of released pNP. Activity against natural substrates Wheat Arabinoxylan (WAX), Beechwood Xylan (BX), and Sugar Beet Arabinan (SBA), was then tested by incubating the proteins as described above at optimal conditions except with substrate solutions containing natural substrate dissolved at 1% (w/v) in buffer and incubation time lengthened to 1 hour. Activity was detected as described previously by Conway et al. using the dinitrosalicylic acid (DNS) reducing assay with L-arabinose used as a standard for oligosaccharide release ( 43 , 44 ). Kinetic parameters were determined for AtAbf43C and AtAbf43C_GH43 by incubating the proteins as described above except with varying concentrations of pNPAra (0.9-36 mM) and with reaction times shorted to 2 minutes to ensure linear initial rates of reaction. pNP released was quantified as described above with velocities calculated as µM pNP/s. Using the predicted molar mass of each protein, kinetic parameters were then calculated using the predicted molar mass via non-linear regression using the “determine kcat” model in GraphPad Prism version 10.0 (GraphPad Software, Boston, Massachusetts USA). Testing of AtAbf43C Mutants To test the importance of certain residues to catalytic activity, purified AtAbf43C with residues D168, D283, E344, and H408 individually mutated to alanine were tested alongside wild type AtAbf43C on pNPAra. Proteins were prepared and incubated in pH 5.5 sodium acetate with 5 mM pNPAra substrate as described above for 10 minutes at 65 ℃ at a final reaction concentration of 0.0025 mg/ml protein. Activity was calculated as a percentage relative to that of wild type AtAbf43C. Statistical significance was determined by running a Brown-Forsythe and Welch ANOVA test on the collected data in GraphPad Prism version 10.0 (GraphPad Software, Boston, Massachusetts USA). LC-MS Analysis of Hydrolyzed Products To further investigate the activty of AtAbf43C, reactions were initiated by adding 90 µL of substrate solution dissolved in pH 5.5 100 mM sodium acetate buffer to 10 µL of enzyme diluted in the same buffer as described above at 0.025 mg/ml or 10 µL of blank buffer. Samples were incubated at 65 ℃ for 1 hour in a thermocycler after which the reactions were stopped by heating at 95 ℃ for 5 minutes to inactivate the enzyme. Natural substrates WAX, BX, and SBA were dissolved at a concentration of 1% (w/v), while oligosaccharides and sugars were dissolved at 0.1% w/v. LC-MS analysis was performed using an Agilent 6530 QTOF connected to an Agilent 1260 LC system. Mass spectra were acquired using electrospray ionization (ESI) with the instrument in positive ion mode. Reaction samples were run on a Agilent HI-PLEX Na (Octo) 300 x 7.7 mm column heated to 80 °C. The mobile phase was ultrapure water, and sample runs were 30 minutes long with a flow rate of 0.5 mL/min. Data were analyzed using Agilent MassHunter software; mass spectra and extracted ion chromatograms (EICs) for species of interest ([M+Na] + adducts) were then obtained. Crystallization, data collection, and structure refinement Purified AtAbf43C, AtAbf43C_CBM42, and AtAbf43C_GH43 proteins at concentrations of 9.0 mg/ml, 4.4 mg/ml, and 8.5 mg/ml respectively were crystalized over the course of several days via the sitting drop vapor diffusion method. AtAbf43C was crystalized in the form of thin needle-like blades in space group P2 1 with two molecules in the asymmetric unit from a solution of 15-20% v/v propanol, 25% w/v PEG3350, 50 mM AmSO 4 , and 0.1 M HEPES pH 7.7. AtAbf43C crystals soaked in L-arabinose, crystallized in similar conditions, were in space group C2 with one molecule in the asymmetric unit. AtAbf43C_CBM42 was crystalized in space group P2 1 2 1 2 with one molecule in the asymmetric unit in the form of small flat plates from a solution of 25% w/v PEG3350 and 0.1 M citric acid pH 3.0. AtAbf43C_GH43 was crystalized in space group P6 5 with two molecules in the asymmetric unit in the form of bi-pyramidal crystals in 25% w/v polyethylene glycol monomethyl ether (pegM) 5000, 0.25 M AmS04, and 0.1 M Bis-Tris pH 5.5. Crystals were mounted in nylon loops (Hampton Research) and flash-cooled in liquid nitrogen after brief (<30 seconds) equilibration in cryoprotectant solutions. Cryoprotectant solutions corresponded to crystallization solutions supplemented with 27-30% v/v glycerol. For the arabinose soak of AtAbf43C crystals the cryoprotectant solution was also supplemented with 5% w/v arabinose and the crystal equilibration time in this solution was extended to 3 minutes before flash-cooling. Data were collected on beam lines 17-ID-1 (AMX) and 17-ID-2 (FMX) at Brookhaven National Lab, Upton, New York, USA. Data were processed using XDS and scaled using AIMLESS ( 45 , 46 ). Data collection and processing statistics are shown in Table 2 . Structures of AtAbf43C, AtAbf43C_CBM42, and AtAbf43C_GH43 proteins were determined by molecular replacement using the program PHASER starting from the AlphaFold model of AtAbf43C and partially refined versions thereof ( 47 ). AtAbf43C_GH43 was first solved using an edited AlphaFold model, and the partially refined structure was used as a model for AtAbf43C which was then completed using the AlphaFold model for AtAbf43C_ CBM42. Partial refinement of this full-length AtAbf43C model at 1.3 Å resolution was then used as the starting point for the solution of AtAbf43C_CBM42. Initial models were iteratively rebuilt and refined using COOT and PHENIX.REFINE respectively ( 47 , 48 ). An apparent metal binding site was added based on local site geometry and surrounding ligands and assigned as Mg 2+ . Arabinose binding to AtAbf43C was assessed by model-phased difference (||Fobs|-|Fcalc||) maps revealing two arabinose molecules in the furanose conformation bound to AtAbf43C. We were not successful in obtaining arabinose bound to AtAbf43C_GH43 in soaking experiments. Persistent difference density at the expected active site of AtAbf43C_GH43, too large to be the cryoprotectant used, was modeled using a carbohydrate group formally named as an unknown ligand (residue name UNL). No oligomerization of the AtAbf43C, AtAbf43C_CBM42, and AtAbf43C_GH43 proteins were observed within the crystal lattices of the four crystal forms. Final refinement statistics are shown in Table 2 . The four structures have been deposited in the Protein Data Bank (AtAbf43C with id 9NXG, AtAbf43C:Arabinose with id 9NXH, AtAbf43C_CBM42 with id 9NXI, and AtAbf43C_GH43 with id 9NXJ ( 49 ). Visualizations were prepared and structural alignments were performed using the PyMOL Molecular Graphics System, Version 2.5.8 Schrödinger, LLC. Data Availability Protein crystal structures obtained in our study are deposited to the Protein Data Bank with accession numbers 9NXG, 9NXH, 9NXI, 9NXJ. All other data is contained within the manuscript and supplementary information. Supporting Information This article contains supporting information. Funding and additional information This work was supported by the Energy Research Fund administered by the Andlinger Center for Energy and the Environment at Princeton University and startup funds from the Department of Chemical and Biological Engineering at Princeton University to J.M.C. Mass spectrometry data were collected on an instrument purchased with a supplement to NIH grant GM107036 to A.J.L. A.Z. was supported by an NSF Graduate Research Fellowship Program under Grant DGE-2039656. Conflict of Interest The authors declare that they have no conflicts of interest with the contents of this article. Acknowledgements This research used resources of the National Synchrotron Light Source II, a U.S. Department of Energy (DOE) Office of Science User Facility operated for the DOE Office of Science by Brookhaven National Laboratory under Contract No. DE-SC0012704. The Center for BioMolecular Structure (CBMS) is primarily supported by the National Institutes of Health, National Institute of General Medical Sciences (NIGMS) through a Center Core P30 Grant (P30GM133893), and by the DOE Office of Biological and Environmental Research (KP1605010). Abbreviations and nomenclature (CAZymes) Carbohydrate Active enZymes (GH) Glycoside hydrolase (CBM) Carbohydrate Binding Module (AOS) arabino-oligosaccharides (AXOS) arabino-xylo-oligosaccharides (pNP) 4-nitrophenol (pNPAra) para-nitrophenol-α-L-arabinofuranoside (pNPXy) para-nitrophenol-ß-D-xylopyranoside (pNP⍺Gal) para-nitrophenol-⍺-D-galactopyranoside (pNPßGal) para-nitrophenol-ß-D-galactopyranoside (DNS) dinitrosalicylic acid (WAX) wheat arabinoxylan (BX) beechwood xylan (SBA) sugar beet arabinan (A2) Arabinobiose (A3) Arabinotriose (A4) Arabinotetraose (A5) Arabinopentose (A 2 XXX) 2 3 -α-L-Arabinofuranosyl-xylotriose (XA 3 XXX) 3 3 -α-L-Arabinofuranosyl-xylotetraose References 1. ↵ Chen , H. ( 2014 ) Chemical Composition and Structure of Natural Lignocellulose . in Biotechnology of Lignocellulose: Theory and Practice ( Chen , H. ed), pp. 25 – 71 , Springer Netherlands, Dordrecht , doi: 10.1007/978-94-007-6898-7_2 OpenUrl CrossRef 2. ↵ Bing , R. G. , Sulis , D. B. , Wang , J. P. , Adams , M. W. W. , and Kelly , R. M . ( 2021 ) Thermophilic microbial deconstruction and conversion of natural and transgenic lignocellulose . Environmental Microbiology Reports . 13 , 272 – 293 OpenUrl CrossRef PubMed 3. ↵ Poria , V. , Saini , J. K. , Singh , S. , Nain , L. , and Kuhad , R. C . ( 2020 ) Arabinofuranosidases: Characteristics, microbial production, and potential in waste valorization and industrial applications . Bioresource Technology . 304 , 123019 OpenUrl CrossRef PubMed 4. ↵ Hamann , P. R. V. , and Noronha , E. F . ( 2022 ) Xylan-breakdown apparatus of Clostridium thermocellum . Cellulose . 29 , 7535 – 7553 OpenUrl CrossRef 5. ↵ Numan , M. T. , and Bhosle , N. B . ( 2006 ) α-l-Arabinofuranosidases: the potential applications in biotechnology . Journal of Industrial Microbiology and Biotechnology . 33 , 247 – 260 OpenUrl CrossRef 6. ↵ Gilbert , H. J . ( 2010 ) The Biochemistry and Structural Biology of Plant Cell Wall Deconstruction . Plant Physiology . 153 , 444 – 455 OpenUrl FREE Full Text 7. ↵ Drula , E. , Garron , M.-L. , Dogan , S. , Lombard , V. , Henrissat , B. , and Terrapon , N . ( 2022 ) The carbohydrate-active enzyme database: functions and literature . Nucleic Acids Research . 50 , D571 – D577 OpenUrl CrossRef PubMed 8. ↵ Mewis , K. , Lenfant , N. , Lombard , V. , and Henrissat , B . ( 2016 ) Dividing the Large Glycoside Hydrolase Family 43 into Subfamilies: a Motivation for Detailed Enzyme Characterization . Applied and Environmental Microbiology . 82 , 1686 – 1692 OpenUrl Abstract / FREE Full Text 9. ↵ Vuong , T. V. , and Wilson , D. B . ( 2010 ) Glycoside hydrolases: Catalytic base/nucleophile diversity . Biotechnology and Bioengineering . 107 , 195 – 205 OpenUrl CrossRef PubMed 10. ↵ Sturgeon , R. J . ( 1997 ) Advances in Macromolecular Carbohydrate Research, Advances in Macromolecular Carbohydrate Research , JAI Press 11. ↵ Vandermarliere , E. , Bourgois , T. M. , Winn , M. D. , van Campenhout , S. , Volckaert , G. , Delcour , J. A. , Strelkov , S. V. , Rabijns , A. , and Courtin , C. M. ( 2009 ) Structural analysis of a glycoside hydrolase family 43 arabinoxylan arabinofuranohydrolase in complex with xylotetraose reveals a different binding mechanism compared with other members of the same family . Biochemical Journal . 418 , 39 – 47 OpenUrl Abstract / FREE Full Text 12. ↵ de Camargo , B. R. , Claassens , N. J. , Quirino , B. F. , Noronha , E. F. , and Kengen , S. W. M. ( 2018 ) Heterologous expression and characterization of a putative glycoside hydrolase family 43 arabinofuranosidase from Clostridium thermocellum B8 . Enzyme and Microbial Technology . 109 , 74 – 83 OpenUrl CrossRef 13. ↵ The CAZypedia Consortium ( 2018 ) Ten years of CAZypedia: a living encyclopedia of carbohydrate-active enzymes . Glycobiology . 28 , 3 – 8 OpenUrl CrossRef PubMed 14. ↵ Ribeiro , T. , Santos-Silva , T. , Alves , V. D. , Dias , F. M. V. , Luís , A. S. , Prates , J. A. M. , Ferreira , L. M. A. , Romão , M. J. , and Fontes , C. M. G. A . ( 2010 ) Family 42 carbohydrate-binding modules display multiple arabinoxylan-binding interfaces presenting different ligand affinities . Biochimica et Biophysica Acta (BBA) - Proteins and Proteomics . 1804 , 2054 – 2062 OpenUrl CrossRef 15. ↵ Blumer-Schuette , S. E. , Brown , S. D. , Sander , K. B. , Bayer , E. A. , Kataeva , I. , Zurawski , J. V. , Conway , J. M. , Adams , M. W. W. , and Kelly , R. M . ( 2014 ) Thermophilic lignocellulose deconstruction . FEMS Microbiology Reviews . 38 , 393 – 448 OpenUrl CrossRef PubMed 16. ↵ Taylor , E. J. , Smith , N. L. , Turkenburg , J. P. , D’Souza , S. , Gilbert , H. J. , and Davies , G. J . Structural insight into the ligand specificity of a thermostable family 51 arabinofuranosidase, Araf51, from Clostridium thermocellum . doi: 10.1042/BJ20051780 OpenUrl Abstract / FREE Full Text 17. ↵ Ahmed , S. , Luis , A. S. , Bras , J. L. A. , Ghosh , A. , Gautam , S. , Gupta , M. N. , Fontes , C. M. G. A. , and Goyal , A . ( 2013 ) A Novel α-L-Arabinofuranosidase of Family 43 Glycoside Hydrolase (Ct43Araf) from Clostridium thermocellum . PLOS ONE . 8 , e73575 OpenUrl CrossRef PubMed 18. ↵ Ichinose , H. , Kuno , A. , Kotake , T. , Yoshida , M. , Sakka , K. , Hirabayashi , J. , Tsumuraya , Y. , and Kaneko , S . ( 2006 ) Characterization of an Exo-β-1,3-Galactanase from Clostridium thermocellum . Applied and Environmental Microbiology . 72 , 3515 – 3523 OpenUrl Abstract / FREE Full Text 19. ↵ Liu , Y. , Angelov , A. , Feiler , W. , Baudrexl , M. , Zverlov , V. , Liebl , W. , and Vanderhaeghen , S . ( 2022 ) Arabinan saccharification by biogas reactor metagenome-derived arabinosyl hydrolases . Biotechnology for Biofuels and Bioproducts . 15 , 121 OpenUrl CrossRef 20. Matsuo , N. , Kaneko , S. , Kuno , A. , Kobayashi , H. , and Kusakabe , I . ( 2000 ) Purification, characterization and gene cloning of two alpha-L-arabinofuranosidases from Streptomyces chartreusis GS901 . Biochem J . 346 , 9 – 15 OpenUrl Abstract / FREE Full Text 21. Michlmayr , H. , Hell , J. , Lorenz , C. , Böhmdorfer , S. , Rosenau , T. , and Kneifel , W . ( 2013 ) Arabinoxylan Oligosaccharide Hydrolysis by Family 43 and 51 Glycosidases from Lactobacillus brevis DSM 20054 . Applied and Environmental Microbiology . 79 , 6747 – 6754 OpenUrl Abstract / FREE Full Text 22. Cartmell , A. , McKee , L. S. , Peña , M. J. , Larsbrink , J. , Brumer , H. , Kaneko , S. , Ichinose , H. , Lewis , R. J. , Viksø-Nielsen , A. , Gilbert , H. J. , and Marles-Wright , J . ( 2011 ) The Structure and Function of an Arabinan-specific α-1,2-Arabinofuranosidase Identified from Screening the Activities of Bacterial GH43 Glycoside Hydrolases* . Journal of Biological Chemistry . 286 , 15483 – 15495 OpenUrl Abstract / FREE Full Text 23. Kang , Y. , Choi , C.-Y. , Kang , J. , Ju , Y.-R. , Kim , H. B. , Han , N. S. , and Kim , T.-J . ( 2024 ) Functional Characterization of Endo- and Exo-Hydrolase Genes in Arabinan Degradation Gene Cluster of Bifidobacterium longum subsp. suis . International Journal of Molecular Sciences . 25 , 3175 OpenUrl CrossRef PubMed 24. ↵ Fujimoto , Z. , Ichinose , H. , Maehara , T. , Honda , M. , Kitaoka , M. , and Kaneko , S . ( 2010 ) Crystal Structure of an Exo-1,5-α-l-arabinofuranosidase from Streptomyces avermitilis Provides Insights into the Mechanism of Substrate Discrimination between Exo- and Endo-type Enzymes in Glycoside Hydrolase Family 43 * . Journal of Biological Chemistry . 285 , 34134 – 34143 OpenUrl Abstract / FREE Full Text 25. ↵ Linares-Pastén , J. A. , Falck , P. , Albasri , K. , Kjellström , S. , Adlercreutz , P. , Logan , D. T. , and Karlsson , E. N . ( 2017 ) Three-dimensional structures and functional studies of two GH43 arabinofuranosidases from Weissella sp . strain 142 and Lactobacillus brevis . The FEBS Journal . 284 , 2019 – 2036 OpenUrl CrossRef PubMed 26. ↵ Falck , P. , Linares-Pastén , J. A. , Adlercreutz , P. , and Karlsson , E. N . ( 2016 ) Characterization of a family 43 β-xylosidase from the xylooligosaccharide utilizing putative probiotic Weissella sp. Strain 92 . Glycobiology . 26 , 193 – 202 OpenUrl CrossRef PubMed 27. ↵ Cartmell , A. , McKee , L. S. , Peña , M. J. , Larsbrink , J. , Brumer , H. , Kaneko , S. , Ichinose , H. , Lewis , R. J. , Viksø-Nielsen , A. , Gilbert , H. J. , and Marles-Wright , J . ( 2011 ) The Structure and Function of an Arabinan-specific α-1,2-Arabinofuranosidase Identified from Screening the Activities of Bacterial GH43 Glycoside Hydrolases* . Journal of Biological Chemistry . 286 , 15483 – 15495 OpenUrl Abstract / FREE Full Text 28. ↵ Till , M. , Goldstone , D. , Card , G. , Attwood , G. T. , Moon , C. D. , and Arcus , V. L . ( 2014 ) Structural analysis of the GH43 enzyme Xsa43E from Butyrivibrio proteoclasticus . Acta Cryst F . 70 , 1193 – 1198 OpenUrl CrossRef 29. ↵ Jiang , D. , Fan , J. , Wang , X. , Zhao , Y. , Huang , B. , Liu , J. , and Zhang , X. C . ( 2012 ) Crystal structure of 1,3Gal43A, an exo-β-1,3-galactanase from Clostridium thermocellum . Journal of Structural Biology . 180 , 447 – 457 OpenUrl CrossRef PubMed 30. ↵ Goyal , A. , Ahmed , S. , Sharma , K. , Gupta , V. , Bule , P. , Alves , V. D. , Fontes , C. M. G. A. , and Najmudin , S . ( 2016 ) Molecular determinants of substrate specificity revealed by the structure of Clostridium thermocellum arabinofuranosidase 43A from glycosyl hydrolase family 43 subfamily 16 . Acta Cryst D . 72 , 1281 – 1289 OpenUrl CrossRef 31. ↵ Ichinose , H. , Yoshida , M. , Fujimoto , Z. , and Kaneko , S . ( 2008 ) Characterization of a modular enzyme of exo-1,5-α-l-arabinofuranosidase and arabinan binding module from Streptomyces avermitilis NBRC14893 . Appl Microbiol Biotechnol . 80 , 399 – 408 OpenUrl CrossRef PubMed 32. ↵ Sayers , E. W. , Beck , J. , Bolton , E. E. , Brister , J. R. , Chan , J. , Comeau , D. C. , Connor , R. , DiCuccio , M. , Farrell , C. M. , Feldgarden , M. , Fine , A. M. , Funk , K. , Hatcher , E. , Hoeppner , M. , Kane , M. , Kannan , S. , Katz , K. S. , Kelly , C. , Klimke , W. , Kim , S. , Kimchi , A. , Landrum , M. , Lathrop , S. , Lu , Z. , Malheiro , A. , Marchler-Bauer , A. , Murphy , T. D. , Phan , L. , Prasad , A. B. , Pujar , S. , Sawyer , A. , Schmieder , E. , Schneider , V. A. , Schoch , C. L. , Sharma , S. , Thibaud-Nissen , F. , Trawick , B. W. , Venkatapathi , T. , Wang , J. , Pruitt , K. D. , and Sherry , S. T . ( 2024 ) Database resources of the National Center for Biotechnology Information . Nucleic Acids Research . 52 , D33 – D43 OpenUrl CrossRef PubMed 33. ↵ Teufel , F. , Almagro Armenteros , J. J. , Johansen , A. R. , Gíslason , M. H. , Pihl , S. I. , Tsirigos , K. D. , Winther , O. , Brunak , S. , von Heijne , G. , and Nielsen , H. ( 2022 ) SignalP 6.0 predicts all five types of signal peptides using protein language models . Nat Biotechnol . 40 , 1023 – 1025 OpenUrl CrossRef PubMed 34. ↵ Letunic , I. , and Bork , P . ( 2024 ) Interactive Tree of Life (iTOL) v6: recent updates to the phylogenetic tree display and annotation tool . Nucleic Acids Research . 52 , W78 – W82 OpenUrl CrossRef PubMed 35. Trifinopoulos , J. , Nguyen , L.-T. , von Haeseler , A. , and Minh , B. Q. ( 2016 ) W-IQ-TREE: a fast online phylogenetic tool for maximum likelihood analysis . Nucleic Acids Research . 44 , W232 – W235 OpenUrl CrossRef PubMed 36. ↵ Sievers , F. , and Higgins , D. G . ( 2018 ) Clustal Omega for making accurate alignments of many protein sequences . Protein Science . 27 , 135 – 145 OpenUrl CrossRef PubMed 37. ↵ Studier , F. W . ( 2005 ) Protein production by auto-induction in high-density shaking cultures . Protein Expression and Purification . 41 , 207 – 234 OpenUrl CrossRef PubMed Web of Science 38. ↵ Noble , J. E. , and Bailey , M. J. A. ( 2009 ) Chapter 8 Quantitation of Protein . in Methods in Enzymology ( Burgess , R. R. , and Deutscher , M. P. eds), pp. 73 – 95 , Guide to Protein Purification, 2nd Edition, Academic Press , 463 , 73–95 OpenUrl CrossRef PubMed Web of Science 39. Protein Extinction Coefficient and Concentration Calculation [online] https://www.novoprolabs.com/tools/protein-extinction-coefficient-calculation # (Accessed February 16, 2025) 40. ↵ Gill , S. C. , and von Hippel , P. H. ( 1989 ) Calculation of protein extinction coefficients from amino acid sequence data . Analytical Biochemistry . 182 , 319 – 326 OpenUrl CrossRef PubMed Web of Science 41. ↵ Bowers , G. N. , McComb , R. B. , Christensen , R. G. , and Schaffer , R . ( 1980 ) High-purity 4-nitrophenol: purification, characterization, and specifications for use as a spectrophotometric reference material . Clin Chem . 26 , 724 – 729 OpenUrl Abstract / FREE Full Text 42. ↵ Multi-Volume Based Protein Quantification Methods 43. ↵ Conway , J. M. , Pierce , W. S. , Le , J. H. , Harper , G. W. , Wright , J. H. , Tucker , A. L. , Zurawski , J. V. , Lee , L. L. , Blumer-Schuette , S. E. , and Kelly , R. M . ( 2016 ) Multidomain, Surface Layer-associated Glycoside Hydrolases Contribute to Plant Polysaccharide Degradation by Caldicellulosiruptor Species* . Journal of Biological Chemistry . 291 , 6732 – 6747 OpenUrl Abstract / FREE Full Text 44. ↵ Miller , G. L . ( 1959 ) Use of Dinitrosalicylic Acid Reagent for Determination of Reducing Sugar . Anal. Chem . 31 , 426 – 428 OpenUrl CrossRef 45. ↵ Kabsch , W . ( 2010 ) XDS . Acta Crystallogr D Biol Crystallogr . 66 , 125 – 132 OpenUrl CrossRef PubMed Web of Science 46. ↵ Evans , P. R. , and Murshudov , G. N . ( 2013 ) How good are my data and what is the resolution? Acta Crystallogr D Biol Crystallogr . 69 , 1204 – 1214 OpenUrl CrossRef PubMed Web of Science 47. ↵ Emsley , P. , Lohkamp , B. , Scott , W. G. , and Cowtan , K . ( 2010 ) Features and development of Coot . Acta Crystallogr D Biol Crystallogr . 66 , 486 – 501 OpenUrl CrossRef PubMed Web of Science 48. ↵ Afonine , P. V. , Grosse-Kunstleve , R. W. , Echols , N. , Headd , J. J. , Moriarty , N. W. , Mustyakimov , M. , Terwilliger , T. C. , Urzhumtsev , A. , Zwart , P. H. , and Adams , P. D . ( 2012 ) Towards automated crystallographic structure refinement with phenix.refine . Acta Crystallogr D Biol Crystallogr . 68 , 352 – 367 OpenUrl CrossRef PubMed Web of Science 49. ↵ Berman , H. M. , Westbrook , J. , Feng , Z. , Gilliland , G. , Bhat , T. N. , Weissig , H. , Shindyalov , I. N. , and Bourne , P. E . ( 2000 ) The Protein Data Bank . Nucleic Acids Research . 28 , 235 – 242 OpenUrl CrossRef PubMed Web of Science View the discussion thread. Back to top Previous Next Posted April 06, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Functional and structural characterization of AtAbf43C: An exo-1,5-⍺-L-arabinofuranosidase from Acetivibrio thermocellus DSM1313 Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Functional and structural characterization of AtAbf43C: An exo-1,5-⍺-L-arabinofuranosidase from Acetivibrio thermocellus DSM1313 Joey L. Galindo , Philip D. Jeffrey , Angela Zhu , A. James Link , Jonathan M. Conway bioRxiv 2025.04.06.647456; doi: https://doi.org/10.1101/2025.04.06.647456 Share This Article: Copy Citation Tools Functional and structural characterization of AtAbf43C: An exo-1,5-⍺-L-arabinofuranosidase from Acetivibrio thermocellus DSM1313 Joey L. Galindo , Philip D. Jeffrey , Angela Zhu , A. James Link , Jonathan M. Conway bioRxiv 2025.04.06.647456; doi: https://doi.org/10.1101/2025.04.06.647456 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Molecular Biology Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41913) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13372) Ecology (19889) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15599) Genomics (22483) Immunology (17728) Microbiology (40365) Molecular Biology (17163) Neuroscience (88540) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15130) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9818) Zoology (2269)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.