Full text
68,406 characters
· extracted from
preprint-html
· click to expand
How do bacterial extracellular Contractile Injection Systems bind target cells? A remarkable diversity of receptor binding domains | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results How do bacterial extracellular Contractile Injection Systems bind target cells? A remarkable diversity of receptor binding domains Nimrod Nachmias , Zhiren Wang , Xiao Feng , Feng Jiang , View ORCID Profile Asaf Levy doi: https://doi.org/10.1101/2025.05.13.653841 Nimrod Nachmias 1 Department of Plant Pathology and Microbiology, Institute of Environmental Science, The Faculty of Agriculture, Food, and Environment, The Hebrew University of Jerusalem , Rehovot, Israel Find this author on Google Scholar Find this author on PubMed Search for this author on this site Zhiren Wang 2 NHC Key Laboratory of Systems Biology of Pathogens, Key Laboratory of Pathogen Infection Prevention and Control (Ministry of Education), State Key Laboratory of Respiratory Health and Multimorbidity, National Institute of Pathogen Biology, Chinese Academy of Medical Sciences & Peking Union Medical College , Beijing, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Xiao Feng 2 NHC Key Laboratory of Systems Biology of Pathogens, Key Laboratory of Pathogen Infection Prevention and Control (Ministry of Education), State Key Laboratory of Respiratory Health and Multimorbidity, National Institute of Pathogen Biology, Chinese Academy of Medical Sciences & Peking Union Medical College , Beijing, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Feng Jiang 2 NHC Key Laboratory of Systems Biology of Pathogens, Key Laboratory of Pathogen Infection Prevention and Control (Ministry of Education), State Key Laboratory of Respiratory Health and Multimorbidity, National Institute of Pathogen Biology, Chinese Academy of Medical Sciences & Peking Union Medical College , Beijing, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: alevy{at}mail.huji.ac.il jiangfenguva{at}126.com Asaf Levy 1 Department of Plant Pathology and Microbiology, Institute of Environmental Science, The Faculty of Agriculture, Food, and Environment, The Hebrew University of Jerusalem , Rehovot, Israel Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Asaf Levy For correspondence: alevy{at}mail.huji.ac.il jiangfenguva{at}126.com Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Extracellular contractile injection systems (eCISs) represent a diverse family of bacteriophage tail-derived toxin delivery complexes in prokaryotes. eCISs are vital for microbial interaction with hosts, and they use tail fiber proteins for target cell binding. However, the identity of the tail fiber proteins and their target cells are unknown. Here, we conducted a comprehensive exploration of eCIS tail fiber genes, providing insights into their remarkable diversity, target cells, functional adaptations, and intriguing evolutionary dynamics. We identified 3,445 eCIS tail fiber proteins found in 2,585 eCIS loci from 1,069 microbes. These fibers can be categorized by five new N-terminal domains responsible for tail fiber attachment to eCIS baseplates. Importantly, we leveraged structure prediction to divide the fibers into 276 structural clusters and dissected 1,177 domain fold families which likely mediate glycan and protein binding on the cell surface of eukaryotes or bacterial targets. These rapidly evolving domains are likely acquired from diverse genomes of eukaryotes, bacteria, and viruses. Finally, we experimentally showed that a candidate tail fiber from Paenibacillus eCIS can bind and direct effector injection into THP-1 human monocyte-like cells, and may bind D-mannose on the cell surface. This study reveals the exceptional diversity of eCIS target specificity determinants, suggests new eCIS target cells in Nature, and provides thousands of proteins that adhere to different cell types. Introduction Extracellular contractile injection systems (eCISs) are a class of bacteriophage tail-like protein delivery systems encoded in bacteria and archaea that affect virulence and host development, and potentially intermicrobial interactions 11 – 5 . eCISs are utilized by bacteria for delivery of various cargo (effector) proteins, mostly toxins, to recipient cells and are emerging as a promising biotechnological tool for modular protein delivery 6 . eCIS cargo proteins display highly adaptive versatility with an N-terminal signal-dependent effector loading mechanism 7 . Initially discovered as insecticidal agents in Serratia entomophila , eCIS were denoted as antifeeding prophage (Afp) which causes the Amber disease in the New Zealand grass grub 3 , 8 . Similar eCIS operons were independently identified in entomopathogenic nematode symbionts as the Photorhabdus virulence cassettes (PVC), which delivers insecticidal toxins 1 , 9 , and in Pseudoalteromonas luteoviolacea, which induce metamorphosis of its host the marine tubeworm 2 , 10 . eCIS complexes have been recently identified in Algoriphagus machipongonensis 5 , which contains unique structural features. Notably, some eCIS related systems function intracellularly such as in Streptomyces 11 , 12 , where they deliver toxins for self-elimination in response to stress, and in the multicellular cyanobacterium Anabaena , which is thylakoid-anchored (denoted as thylakoidCIS, tCIS) 13 via membrane bound tail-fibers. These fascinating discoveries point out CIS’s versatility, their presence and significance within various microbes, both extracellularly and intracellularly. eCIS-related genomic loci have been extensively studied computationally, which involved analysis of taxonomic and ecological distribution and classification of various eCIS 4 , 14 . A wide genomic analysis revealed that approximately 2.2% of sequenced bacteria and archaea have been found to contain these systems, with a high prevalence in terrestrial, aquatic, and invertebrate-associated bacteria and unexplained depletion in mammalian pathogens 4 . Nevertheless, identification of tail fiber genes, which are encoded by genes such as Afp13 and Pvc13 of Afp and PVC, respectively, is lacking in most eCIS operons. This missing piece is significant being the putative target specificity determinant of these systems, likely adhering to specific receptors. Identifying tail fiber genes is challenging due to their rapid diversification 15 under selective pressure to recognize varied targets 16 . This sequence divergence, coupled with their inherent flexibility preventing structural resolution, complicates traditional homology-based discovery. Here, we addressed this challenge utilizing a domain-based computational approach to identify rapidly evolving tail fiber genes, discovering 3,445 tail fibers across 2,585 eCIS systems from 1,069 microbial genomes. Our analysis revealed five novel N-terminal baseplate anchor domains and categorized the fibers into 267 structural clusters comprising over 1,000 domain fold families. Functional validation showed that receptor binding domain from Paenibacillus sp. URHA0014 tail fiber that were engineered into PVC complex enabled specific targeting of the human monocyte-like cell line THP-1. Pretreatment with D-mannose or mutations in key residues in the engineered tail fiber inhibit the target cell recognition of engineered eCIS, pointing to a plausible mechanism of surface glycan binding. These findings illuminate eCIS target specificity mechanisms across the bacterial tree of life, and establish a foundation for engineering programmable protein delivery systems. Results Identification of Conserved Domains Leads to Discovery of Novel eCIS Tail Fiber Genes Tail fibers likely determine eCIS target specificity and have the potential to elucidate target identity of largely unexplored eCIS particles. This study aimed to maximize the identification of novel eCIS tail fiber gene candidates to allow characterization of target cell binding machinery. First, we searched the eCIStem database, comprising 1,425 eCIS operons across 1,249 bacterial and archaeal genomes 4 . Our initial sequence similarity search identified 629 genes resembling Afp13 and Pvc13, covering only 44% of eCIS operons, but their homology was partial and of low-confidence. Structural predictions using AlphaFold2-multimer 17 , 18 revealed that Afp13 resembles a human adenovirus shaft with a beta-spiraled shaft and Ig-like knob, while PVC13 exhibits a chimeric structure of the same shaft with a phage needle-like C-terminal region ( Fig. 1a-c ), as was solved in the course of our study 19 . These findings highlight the modular nature of tail fiber genes. To address this inherent variability feature, we used all-against-all BLAST 20 and MMseqs 21 clustering to generate Hidden Markov Models (HMMs) 22 of conserved protein domains ( Fig. 2a ) This led to the identification of novel tail fiber domains. The domain organization suggested high conservation of N-terminal segments, thus we used these regions in jackhmmer search 22 and retrieved 1,269 additional fiber genes, nearly doubling our scope of fiber harboring loci coverage to 68%. Download figure Open in new tab Fig. 1: Structural prediction and analysis of Afp13 and Pvc13 proteins. Structural prediction of AFP13 ( a ) and PVC13 ( b ) in trimeric form. Both genes contain a shaft region characterized by adenovirus-like repeats forming a characteristic beta-spiral domain. Afp13 seems to harbor a Ig-like knob domain reminiscent of adenovirus spike, while in Pvc13, this region seems to be swapped by phage collar domain forming a unique chimeric protein. c a Foldseek-multimer result based structural comparison of Afp13 with human adenovirus spike protein (PDB:1QIU). Download figure Open in new tab Fig. 2: Computational identification and characterization of novel tail fibers and their constituting domains. a Bioinformatics workflow for identifying novel eCIS tail fiber genes. Known tail fiber sequences were used to search and generate HMM profiles of conserved domains based on the eCIStem database. N-terminal domains were used in jackhmmer iterative searches against the nr combined database retrieving 3,2445 novel tail fiber candidates harboring five major N-terminal baseplate anchor domains termed eBAP1-5 (eCIS Baseplate Anchor Proteins) and various domains. b Schematic representation of an eCIS particle reconstructed from the PVC solved structures (6j0b,6j0f,6j0m,6j0n) 24 with tail fibers containing different eBAP domains at their N-termini anchoring them to the baseplate hub. The baseplate acts as a mounting point for the tail fibers to project outwards and engage target receptors on recipient cells via their C-terminal binding modules. c Structural prediction and comparison highlighting the conserved bundled α-helical folds of the eBAP1 (from PVC13, blue) and eBAP2 (from IMG gene 2505186636, purple) baseplate anchor protein domains, which share structural similarity despite distinct sequences (structure overlay below). d Predicted trimeric structure overlay comparison of a representative eBAP3 domain (from gene 2541268533, green) from eBAP3-containing tail fiber with eBAP4 domain (from tCIS crown gene, PDB:7b5h, brown). “Shoulders” domains are shown with arrows. e Domain scheme representation with structure prediction of eBAP5 (DUF6519, magenta) with the distinctive inserted (evolved) domain (highlighted in orange in gene diagram and 3D predicted structure). Analysis of retrieved genes revealed five new conserved N-terminal domains likely serving as baseplate attachment sites, that we termed: e CIS B aseplate A nchor P rotein (eBAP1-5) domains ( Fig. 2b ). Thriving to maximize tail fiber gene candidates, we expanded the search to a non-redundant combined database which contains 86,785 non-redundant bacterial and archeal genomes. We identified 2,585 putative eCIS genomic loci using a Pfam-based scoring system that prioritized clusters with multiple eCIS-enriched domains (Supplementary Tables 1,2), with higher scores given to distinctive Pfams such as DUF4157, Pvc16_N, and CIS_tube. This yielded 3,445 fiber-encoding genes while excluding loci with Type-6-secretion related Pfams. We then divided the fibers by N-terminal domains into five groups: eBAP1 and eBAP2 (∼50 amino acids in length) are structurally related and are present in 362 and 603 gene candidates, respectively ( Fig. 2c ). A previously unknown ∼200-amino-acid domain, found in 1,420 genes, contains a “shoulders” 23 structural feature like the tCIS crown domain and was designated eBAP3. The first 200 amino acids of the tCIS crown gene, widespread in 402 genes, were classified as eBAP4 ( Fig. 2d ). Additionally, the DUF6519 domain (eBAP5) was identified in 658 genes, sharing partial similarity with eBAP3/eBAP4 but featuring a distinct 200-amino-acid insertion ( Fig. 2e ). Its presence in 1,114 genes in the InterPro database suggests it may serve as a novel tail fiber marker in phage or other systems as well. These findings expand our ability to characterize eCIS evolution and examine microbial host specificity. By defining five novel eBAP domains (Supplementary Tables 3-7), we establish a foundation for future phylogenetic, structural and functional analyses, closing the gap on this elusive structural feature. Phylogenetic Distribution and Genomic Architecture of eCIS Tail Fibers We further thought to explore the distribution of tail fibers across eCIS phylogeny in the light of the newly discovered eBAP domains. Our eCIS phylogeny analysis, based on the phylogenetic tree of the conserved structural protein Afp8 amino acid sequence (which encodes a VgrG protein), showed eBAPs are congruent with the structural evolution of eCIS ( Fig. 3a ). The eBAP1-5 containing eCIS loci were grouped and dispersed each to specific branches, indicating five major events of structural and possibly functional divergence in eCIS evolution, that were likely followed by horizontal gene transfers, as shown by the diverse phyla present in each clade ( Fig. 3a ). We further dissected our observation for each eBAP related clade ( Fig. 3b ). All eBAPs are present in at least some Pseudomonodota eCIS. eBAP1 is exclusive to Pseudomonodota. eBAP2 and eBAP4 are also distributed in Bacteroidota and slightly in Cyanobacteriota, in addition to Pseudomonodota. eBAP3 is evenly distributed in Bacillota and Actinobacteriota, while eBAP5 is correlated with Actinobacteriota. Notably, finding large amounts of fiber genes in Actinobacteriota systems is intriguing as it was previously assumed, based on Streptomyces CIS, that this phylum relies on intracellular CIS alone which should lack tail fibers 11 , 25 . Nevertheless, loci that lacked fibers by our analysis were left out of the study as they are considered non-extracellular. Download figure Open in new tab Figure 3. Phylogenetic distribution and evolutionary dynamics of the tail-fiber genes. a. eCIS phylogenetic tree compiled from the afp8 (VgrG) gene sequences. The circles at the branch tips represent the presence of eBAP1-5 domains, displaying grouped distribution. The outer ring shows bacterial phylum of the bacterial genome harboring the system, displaying horizontal transfer. b. A stacked bar-plot displaying the normalized share of phylum from a , divided by eBAP groups. Color scheme as in a. c. A stacked bar-plot displaying the gene functions found upstream to the fiber genes, indicating their conserved genomic neighborhood and putative baseplate adaptors. Baseplate_J (parallel to Afp11), GPW_gp25 (Parallel to Afp12) and Tail_P2_I Pfams stand-out as putative baseplate binding partners (Supplementary Fig. 3) d. Network representation of eBAP1-5 and Pfam domains found on the fiber genes. Node size represents the number of hmmscan hits found on the fiber genes, an edge is drawn each time domains are found on the same gene while edge thickness represents number of connections. eBAPs are colored by their unique colors, 37 discovered sequence-based domains are in light blue and Pfam domains are in pink. e. C1q domains as an example of a HGT candidate. Structural prediction of a C1q harboring fiber gene, domains are marked with lines above identified domains. f. Phylogenetic tree made from all domain sequence alignment of the C1q domains in Pfam database (interpro API) showing the bacterial branches containing two types of eCIS fibers are nested within eukaryotic clades. g. AF2 predicted structure comparison of to C1q domains found adjacent on the tree. From eCIS fiber (IMG gene ID:2606531960) and from a metazoan domain (A0A9D4MG84) from Dreissena polymorpha (a Zebra mussel). We further investigated the fiber’s occurrences and genomic neighborhoods to test its position conservation within the operon by evaluating the fiber loci within eCIS operons and their upstream genes. Out of 2,585 gene loci that contain fibers; 23.4% contain more than one fiber gene per loci with the maximum being five fibers per loci (Supplementary Fig. 2a). We examined sequence and predicted structural similarity in fiber loci that contain multiple genes. We found instances of both structural resemblance yet with remarkable sequence divergence and completely diverse sets in terms of domain composition, demonstrating the versatile evolution of eCIS baseplate composition (Supplementary Fig. 2c-d). Further analysis of each specific fiber found in eCIS loci may shed light on its putative role in target binding. The fact that some eCIS operons acquired multiple tail fiber genes or possess evolved duplicates, highlights the versatility and rapid evolution of this locus. Evaluation of operonic organization revealed that the fiber gene position is relatively conserved within the loci: it is found downstream to the tail_P2_I and the AFP11/12 (Baseplate J and gp25) gene homologs in ∼90% of the cases ( Fig. 3c ). This is in-line with PVC solved structures demonstrating anchoring of the fibers to Pvc12, which is the homologue of AFP12. We used AlphaFold3 (AF3)-multimer 17 to assess whether the upstream gene serves as the eBAPs baseplate adaptors as well. Our prediction raises the possibility that eBAP2 is anchored to the Pvc11 homolog rather than Pvc12 (Supplementary Fig. 3). Additionally, eBAP3 and eBAP5 demonstrated binding to the Tail_P2_I harboring gene suggesting it serves as an adapter similar to the fiber attachment point in R-pyocins 26 . This suggests that the anchoring mechanism of eBAPs might differ, as well, and is mediated by extending loops from core components. Pfam domain analysis reveals functional diversity and possible evolutionary origins of fiber domains To characterize the functional architecture of fiber genes, we performed HMMscan 27 against the Pfam database and visualized domain co-occurrence within the protein as a network ( Fig. 3d ). This analysis revealed distinct domain association patterns across the five eBAP groups. Although eCIS operons are characterized by a group of only 11 core domains 4 , we identified 165 Pfam domains (above 12 bit score) and defined 37 novel domains (based on all against all Blast) that are found within the eCIS tail fiber genes that might be responsible for its versatile shape, size, and receptor binding. eBAP1 domains showed restricted association, primarily linked to Av_adeno_fiber domains found in well-studied Photorhabdus PVC and Serratia Afp systems. In contrast, eBAP2-5 domains anchor diverse functional modules. eBAP2 also co-occurred with adenovirus shaft repeat as well as Phage T4 gp36, C1q domain that is prevalent in animal proteins 28 , and the Peptidase_S74 chaperone. The eBAP3 group displayed the most diverse associations with several types of domains. Some domains serve as enzymes, e.g Peptidase_S74, amidase and the lactonase domains. Others might imply target binding such as H-lectin, SWM_repeat (responsible for adhesion 29 ), Laminin_II, BIG_2/5 domains, etc. Intriguingly, eBAP3 also co-occurred with Collagen-like domains which are known for promoting elasticity in connective tissue yet their role here is unclear. eBAP4 formed a distinct network module, strongly associating with immunoglobulin-like fold domains including PKD, Big11, von Willebrand factor type A (VWA), and carboxypeptidase-regulatory (PKD) domains as well as Invasin_D3 and Cadherin_II attachment proteins, suggesting this type of fibers is involved in target attachment (Supplementary Fig. 4a). Notably, the overall fabric of the network seems to be interconnected by Pfam and newly defined domains suggesting high domain swapping and/or frequent acquisitions. We further used the detected Pfam tail fiber domains for detecting the phylogenetic origin of the domains. Strikingly, we found 64 Pfam domains that display phylogenetic incongruence with bacteria. Namely, over 90% of proteins in the InterPro taxonomy database are non-bacterial, which might point out putative donors for cross-kingdom acquisitions (Supplementary Fig. 4b). Notably, several bacterial eCIS sequences (e.g. adenovirus shafts, C1q domains) formed nested clades within metazoan and viral phylogenetic branches rather than clustering as sister groups, a pattern consistent with cross-kingdom horizontal gene transfer rather than shared ancestry ( Fig. 3 e-g, Supplementary Fig. 4b-d). The analysis reveals that eCIS tail fibers evolved functional plasticity through horizontal acquisition of receptor-binding domains from eukaryotes, viruses, and phages. Incorporation of immune-related domains (C1q, immunoglobulins, PKD, VWA) suggests bacterial strategies like host immunity mimicking or interference. This modular evolution mirrors phage adaptability, enabling eCIS systems to target diverse hosts through acquired functional modules. Structural Prediction and Classification of Fibers and Fiber-Domains The complexity and versatility of the observed fibers prompted us to apply structure prediction and alignment tools for comprehensive analysis at two levels: whole-fiber clustering and identification of receptor-binding domains. Our workflow began with clustering the complete fiber database at 70% similarity and coverage, effectively reducing the dataset from 3,445 to 1,098 representative sequences. Since tail fibers are ordered in trimers 30 – 32 we predicted trimeric structures for these representatives using AF2-multimer, establishing the eCIS-fiber 3D-database for further analysis. The structural characterization proceeded through parallel pathways (Supplementary Fig. 5a): Foldseek structural clustering of complete fibers (at 25% sequence similarity and 70% coverage) and detailed structure-based extraction of all globular units and subunits followed by clustering with equivalent parameters. The whole-fiber analysis produced 276 distinct structures, while our domain-level analysis identified 3,515 individual units that further grouped into 1,177 fold families decorating the fibers ( Fig. 4a , Supplementary Fig. 5b). We then conducted a foldseek search of the novel domains against PDB which identified a variety of shaft-region fiber domains ( Fig. 4b ) and putative C terminal receptor-binding domains ( Fig. 4c ). Download figure Open in new tab Figure 4. Structural analysis of fiber proteins unveils highly polymorphic modules decorated with various putative carbohydrate and protein binding domains. a. Domain foldseek clusters found on fiber genes in scatter plot representation. Each circle on the plot represents a cluster of domains found on the fibers visualizing cluster size (cycle diameter), relative average positions (rainbow color range from violet to red colors), and midpoint of the domain in obtained coordinates (by average). Labels showing clusters which represent eBAP1-5 N-terminal domains. eBAP5+MG is a fusion of eBAP5 with adjacent macroglobulin domains b-c exemplary structure overlays representing the middle region ( b ) and the c-terminal region ( c ) d-h. Examples of predicted structures from prominent structural architectures of eBAP1-5 containing genes. d. eBAP1: Adenovirus shaft from Mycetohabitans rhizoxinica HKI 454 (IMG gene ID 650723730). e. eBAP2: Mini-fibers from Derxia gummosa DSM 723 (2529305320), C1q from Aquimarina sp. AU119 (2606531960), S74 from Microscilla marina ATCC 23134 (2639240252). f. eBAP3: C3b/BIG from isolate An92 sp002159175 (ID: NFGZ01000037_8), SLL lectins from Pseudoxanthomonas broegbernensis (2861529714) and Embleya scabrispora (KB889561.1_365), beta-propeller/ collagen from Aromatoleum buckelii (WTVH01000027.1_19). g. eBAP4: Bulky shafts from Cellulomonas sp000688475 and helical extensions from Malonomonas rubra (2588100597). h. eBAP5: Macroglobulin and lectin - like domains from Desulfococcus multivorans (ATHJ01000059_23), Mega pectin lyase from Rhizobium grahamii (2535518109), Macroglobulin and pectin lyase from Pseudogulbenkiania ferrooxidans A (644377952) and with hemagglutinin from Thioflavicoccus mobilis (2507114133). We observed that eBAP1, the group that includes tail fibers from the AFP and PVC, almost uniformly carries adeno-shaft repeat folds ( Figure 4d ) followed by collar and needle domains, while in some cases the C-termini displays some variety harboring domains with Ig-like cadherin fold not detected by Pfam search. The eBAP2 group features tail fibers with intertwined triple beta-sheathed shafts resembling phage tail fibers ( Fig. 4e ). Surprisingly, this group encompasses short mini-fibers, which is the most abundant group in our database, harboring putative saccharide-binding domains similar to an RBP from Acinetobacter phage (PDB:6E1R). As described previously, we observed in this group fiber carrying a C1q domain typically associated with human complement systems 28 , 33 and the S74 fold (a chaperone of endosialidase). eBAP3-5 share common “shoulders” substructure yet define distinct groups and putative binding module arsenals Despite their similar “shoulder” regions resembling phage receptor-binding proteins 23 , eBAP3-5 domains differ in sequence identity and overall architecture. We noticed the “shoulder” region backbone chain might be intertwined with adjacent domains (Supplementary Fig. 6) observed in the various subtypes, which might enhance protein stability while promoting evolutionary divergence. eBAP3 fibers display remarkable structural diversity, from minimal attachment-only modules to elongated phage-like β-sheathed shafts ( Fig. 4f ). Their diverse binding domains—including SLL2-like lectins, bacterial Ig folds, and hemagglutinin structures—suggest specialized glycan-binding functions. Notably, many eBAP3 fibers combine beta-propeller lectins with collagen-like triple helix shafts connected to TNFR-like domain, likely providing enhanced flexibility during target engagement. eBAP4 represents the most structurally distinct group, characterized by unique bulky shaft regions first described in tCIS crown proteins 13 ( Fig. 4g ). These domains typically feature 1-2 large α+β globular domains or α-helical extensions, connected to flexible chains decorated with multiple Ig, lectin, and PKD attachment modules in various combinations and copy numbers. The utilization of flexible adhesion chains represents a unique attachment strategy that poses a challenge to solve using cryo-EM techniques. eBAP5 domains contain signature insertion forming miniature β-barrel and β-sandwich folds, with shaft regions often composed of macroglobulin-like domain (single or repeated) ( Fig. 4h ). Their C-terminal regions frequently display beta-helix pectin-lyase folds alongside specialized lectin and hemagglutinin domains. Collectively, our structural analysis reveals that eCIS particles employ a diverse array of structural framework which includes carbohydrate and protein-binding domains to recognize and adhere to target cell surfaces, providing insights that could facilitate future eCIS engineering applications. Engineered Fiber-PVC Constructs for Targeted Cell Recognition We hypothesized that the novel tail fiber genes identified in this study could enable engineering of eCIS to achieve programmable cell targeting. Using the PVC particles expressed on a vector in Escherichia coli as a modular chassis, we computationally screened and identified three candidate tail fiber proteins from major clusters (Supplementary Table 8). Comparative analysis of these candidates against wild-type fibers revealed conserved baseplate-attachment domains and variable receptor-binding regions. We prioritized fiber protein-Pb from Paenibacillus sp. URHA0014 for experimental validation, which represents a widespread structural group hosting eBAP3 and intriguing domains with C3b and hemagglutinin-like folds. Hemagglutinin folds are common glycoproteins found on the surface of viruses that infect human cells, such as influenza 34 and measles 35 and thus we predicted that the Paenibacillus tail fiber protein may similarly adhere to human cells. Two additional fibers, fiber-Mr and fiber-Am, were selected as secondary candidates which represent predominant clusters. To test targeting specificity, we engineered a chimeric fiberPb-PVC system by swapping the predicted receptor-binding protein (RBP) fragment of fiberPb with the cognate fragment of the PVC tail fiber ( Fig. 5a ). Positive control PVCs harboring the adenovirus 5 knob domain (R7PVC) were loaded with the plant-derived toxin TcsT that targets eukaryotic cells 7 . CCK-8 cell viability assays on THP-1 human cells demonstrated that treatment with fiberPb-PVC-TcsT reduced cell viability by 62% after 48 hours (p < 0.0001 vs. empty R7PVC controls; Fig. 5b ), with slightly reduced cell killing than the R7PVC-TcsT positive control. Western Blotting confirmed proper TcsT loading in both PVC particles ( Fig. 5c ), while negative-stain electron microscopy observations validated intact assembly of the chimeric PVCs ( Fig. 5d ). The direct binding of fiberPb-PVC to THP-1 membranes was further visualized ( Fig. 5e , red arrows). Importantly, fiberPb-PVC exhibited no detectable killing of human A549, HEK293T or HeLa cells (Supplementary Fig. 7a), demonstrating high THP-1 target cell binding specificity by the engineered PVC. This result is in contrast to R7PVC, which showed low specificity and injected the TcsT toxin into multiple cell lines (Supplementary Fig. 7a). We run similar tests with our secondary candidates (Supplementary Fig. 7b-h) but they demonstrated mild toxin injection into the four tested cell lines, against demonstrating the specificity of fiberPb towards THP-1 cell line. Download figure Open in new tab Figure 5. PVC-Compatible Tail Fiber Engineering. (a) A diagram of the construction of fiberPb-modified PVC. The yellow part represents the predicted receptor recognition fragment from Paenibacillus sp. URHA0014 fiber protein. R7 represents the adenovirus 5 (Ad5) binding domain (Ad5-knob(RGD/PK7)). (b) Killing of THP-1 cells by PVC complexes at 0.5 mg/mL concentration after 48h. Empty R7PVC was used as negative control and R7PVC loaded with TcsT was used as positive control. The experiment was conducted in quadruplicate and analyzed with two-side corrected Welch’s t-test. (c) Western blot validation of PVC assembly and protein loading. The presence of the Pvc16 protein indicated that the PVCs were assembled in accordance with the established structure. The flag bands confirmed that the PVCs were loaded with the flag-tagged payload (TcsT). (d) Negative-stain electron microscopy graphs of engineered PVC particles. Scale bar, 100 nm. (e) TEM images of THP-1 cells treated with fiberPb-PVC. Scale bar, 100 nm. The red arrows point to engineered fiberPb-PVC bound to the cell membrane. Mannose-Dependent Inhibition of fiberPb-PVC Cytotoxicity To explore the mechanistic basis of THP-1 cell recognition by the agglutinin-like domain of fiberPb, we examined potential interactions with cell surface carbohydrates. Competitive inhibition assays revealed that pre-incubation of fiberPb-PVC-TcsT with D-mannose increased THP-1 viability by 32% and 42% in a concentration-dependent manner (p=0.0032 and p = 0.0002, respectively; Fig. 6a ), whereas galactose, N-acetylglucosamine, and fucose showed no inhibitory effects (Supplementary Fig. 8a–c). This suggests that D-mannose sugar may interfere with fiberPb-PVC engagement of THP-1 cells. Download figure Open in new tab Figure 6. Mannose-Based Inhibition of Target Cell Recognition. (a) Killing of THP-1 cells by 0.05 mg/mL fiberPb-PVC pre-incubated with D-Mannose gradient after 48h. The cell activity was analyzed with two-sided corrected Welch’s t-test. (b) Identification of putative functional residues on the fiberPb domain. We Identified two conserved motives that form a pocket with “VDIT” and “SGEIVH” motifs, a hydrophobic extruding loop with “IAVSPF” motif and five scattered lysines (Ks) that come together to form a positively charged surface. The bold and colored letters as well as the five lysines mark residues that were mutated for following experiments (c) Killing of THP-1 cells by fiberPb-PVC and its sugar-binding mutants at 0.05 mg/mL concentration after 48h. The putative sugar binding sites (VDIT, IVF, K5) were predicted as in (b). (d) Western blot validation of TcsT loading of the fiberPb-PVC variants in (c). (e) The negative-stain electron microscopy graphs of fiberPb-PVC variants in (c). Scale bar, 100 nm. AlphaFold2 structural predictions identified three putative target engagement motifs (VDIT, IVF, K5; Fig. 6b ). Mutating VDIT, IDF and K5 residues reduced cytotoxicity by 82%, 53% and 51%, respectively (p < 0.0001, p < 0.0001, p = 0.0068, Figure 6c and Supplementary Table 10), with VDIT mutation having the most significant effect. Western blots and negative-stain electron microscopy observation confirmed retained TcsT loading ( Fig. 6d ) and structural integrity across mutants ( Fig. 6e ). These data imply that VDIT/IVF/K5 motifs may mediate glycan recognition and membrane interactions while we acknowledge that mannose-based inhibition working via alternative mechanisms (e.g., steric hindrance from mannose binding to THP-1 receptors) cannot be excluded. Notably, R7PVC-TcsT activity remained unaffected by D-mannose (Supplementary Fig. 8d-f), indicating fiberPb-THP-1-binding specific inhibition. This finding implies a potential structural or functional specificity in the recognition mechanism between fiberPb and cell surfaces. Further validation via glycan arrays or surface plasmon resonance is required to conclusively establish mannose as a direct ligand for fiberPb. Discussion eCIS have been studied for 20 years now 36 in a small number of prokaryotes with a focus on their caused phenotypes, assembly, 3D structures, and translocated effectors. However, there is hardly any data about the binding mechanisms of eCIS to the target cells. This study reveals the scope of receptor binding capacity harbored by bacterial eCIS. eCIS achieves target specificity through modular tail fiber gene repertoire with a spectacular structural diversity for a single gene locus. We defined the evolutionarily conserved novel baseplate-anchoring domains (eBAP1–5) and highly diverse receptor-binding modules. The eBAPs divide the eCIS into five separated eCIS structural subtypes that were massively horizontally transferred within bacterial phyla ( Fig. 3a ). By combining computational domain discovery with structural prediction, we identified 3,445 tail fiber proteins across 2,585 eCIS operons, resolving a critical gap in understanding how these systems recognize host cells. Our complex predictions suggest eBAP domains anchor fibers to eCIS base plates via flexible anchor points, enabling integration of structurally plastic C-terminal architectures probably acquired via horizontal gene transfers (HGTs) from viruses, bacteria, and eukaryotes. Functional validation of Paenibacillus fiberPb against a human cell line underscores the biotechnological potential of these findings in examining and redirecting the binding specificity of eCIS. Given that eCIS is encoded by a large variety of environmental bacteria from plants, invertebrates, fungi, soil, and aquatic environments 4 , we propose that the endogenous tail fibers can be used for certain environmental and agricultural applications, and perhaps can be used to bind and enable transfer of proteins into eukaryotic cells that lack genetic systems, such as those of different crops, fungi, and insect pests. Competition assays demonstrated that D-mannose treatment increases THP-1 viability in a dose-dependent manner, while mutagenesis of predicted glycan-binding motifs (VDIT/IDF/K5) abolished activity. These results align with structural predictions of putative-binding pockets but require validation via direct binding assays. We are looking forward to the elucidation of more binding mechanisms as our large scope identification can be set as a trove for the scientific community. The structural diversity of eCIS fibers is staggering: clustering revealed 276 distinct fiber types and 1,177 domain folds, many resembling eukaryotic immune or adhesion proteins. This suggests eCIS systems employ molecular mimicry to adhere to host cells—a strategy parallel to bacterial and viral cell entry. For instance, eBAP3 fibers combine β-propeller lectins with flexible collagen helices shaft which probably allows flexible movement for connected TNFR-like C-terminal domain, a valuable clue for flexible target engagement yet to be described in the phage or virus literature. eBAP5 fibers incorporate macroglobulin-like repeats, which might play an additional role such as immune interference. Such modularity enables niche specialization, as evidenced by eBAP5’s dominance in Actinobacteriota, a phylum previously thought to rely solely on intracellular systems. Overall, the eCIS specificity determinant displays plasticity reminiscent of what is observed in organisms involved in evolutionary arms-race such as phage and viruses. This work establishes eCIS tail fibers as versatile scaffolds for synthetic biology. We acknowledge the lack of solved atomic structures validating our findings. Future efforts should prioritize resolving atomic-level whole-fiber and fiber-receptor interactions and leveraging the 1,177 identified domain folds for therapeutic and biotechnological design. By bridging evolutionary insights with functional validation, we provide a roadmap for harnessing eCIS diversity in targeted protein delivery and ecological studies. Methodology Computational Identification of Tail Fiber Domains To systematically identify conserved domains in eCIS tail fibers, we developed a multi-stage computational workflow. Initial sequence similarity searches using all-versus-all BLAST (-evalue 1e-10, - ungapped, filtered for low coverage of 50% and fragment size between 10-500) identified candidate fiber genes across all gene candidates. Conserved regions were subsequently clustered using MMseqs2 21 and CD-HIT 37 at gradient identity thresholds (40%-30%) to account for sequence divergence. Clusters with less than 5 members were discarded. Multiple sequence alignments generated via Clustal Omega formed the basis for constructing Hidden Markov Models (HMMs) using HMMER 38 , which were integrated with the Pfam 35.0 database 39 through hmmpress. The combined HMM database was scanned with hmmscan (e-value ≤1e-5), and domain boundaries were resolved using the CATH 40 structural classification toolkit (cath-resolved-hits tool). Identification of eCIS clusters within nr combined database For comprehensive eCIS identification in our non-redundant combined database, we employed a Pfam-anchored search strategy. We screen genomes for gene clusters (max. 5 genes apart) housing Pfams found to be enriched in eCIS operons in a previous study 4 . We scored putative eCIS loci by Pfam presence and gave a higher score for marker eCIS Pfams such as DUF4157, Pvc16_N, CIS_tube, DUF6519, etc (see supplementary table 1). We then pooled all amino acid sequences of genes in loci range plus five genes from the two edges. We searched the five newly defined baseplate-anchoring (eBAP1-5) domains as markers using jackhmmer iterative searches (set to 3 iterations), enabling detection of rapidly evolving fiber genes that traditional homology searches often miss. This approach successfully identified 2,585 eCIS clusters containing 3,445 fiber-encoding genes across 1,069 microbial genomes. Clusters lacking identifiable fiber genes or containing capsid (0) or T6SS (18) Pfams were manually excluded as potentially non eCIS (usually found near a true loci). Phylogenetic Analysis To examine evolutionary relationships of eCIS tail fibers, we constructed a phylogenetic tree using Afp8 (VgrG protein) sequences, a highly conserved component showing highest presence across nearly all identified eCIS loci and previously proven to reliably represent eCIS phylogeny, similarly to the afp11 core gene 4 . Sequences were aligned with Clustal-omega 41 , and a maximum-likelihood tree was built with IQ-TREE 42 using the LG+I+G4 substitution model. Branch support was assessed through 1,000 ultrafast bootstrap replicates. The resulting tree revealed clade-specific distribution patterns of the five eBAP domains, using ITOL 43 website with tree display datasets derived from jackhammer 22 results followed by cath-reloved-hits filtering, suggesting distinct evolutionary trajectories. Phylogenetic lineage of genomes were retrieved from our database enabling domain-based phylogenetic profiling to identify potential horizontal gene transfer events between taxonomically distant microorganisms. Examination of fiber genes evolutionary dynamics Examination of multi-fiber operons (23.4% of loci) through pairwise alignment and structural comparison revealed both gene duplication events with subsequent diversification and acquisition of entirely different fiber architectures within the same operon, highlighting the dynamic evolutionary processes shaping eCIS target specificity. Genomic neighborhood analysis of genes upstream to fiber loci was conducted by evaluating the dominant strand that encodes most of the eCIS loci (most eCIS operons display operonic directionality) pooling both upstream genes and screening them against the Pfam database using hmmscan 44 . Domain Co-occurrence Network Construction To analyze functional relationships between Pfam domains in eCIS tail fibers, we constructed a domain co-occurrence network using Python’s NetworkX library (version 2.8.4) 45 . First, we performed hmmscan (HMMER 3.3.2) against the Pfam database (version 35.0) 39 with fiber protein sequences using an e-value threshold of 1e-5. Domain hits were filtered to remove overlapping regions, prioritizing matches with higher bit scores when domains competed for the same region. For network construction, each Pfam domain and newly identified eBAP domain was represented as a node. An edge was drawn between two domains if they co-occurred within the same protein sequence. Edge weights were calculated based on the frequency of co-occurrence. Node sizes were scaled proportionally to the number of occurrences of each domain across all fiber genes, and edge thicknesses reflected co-occurrence frequencies. Node colors were assigned based on domain classification: eBAP domains (five distinct colors), novel sequence-based domains (light blue), and Pfam domains (light red). The network was visualized using a force-directed layout algorithm (Kamada-kawai 46 ) to emphasize meaningful interactions of functional domain clusters. Identification of Potential Horizontal Gene Transfer Events To identify Pfam domains potentially acquired through horizontal gene transfer (HGT), we evaluated the taxonomic distribution of protein family members for each domain. We first extracted all protein sequences and taxonomic lineages associated with each Pfam domain from the Pfam database and retrieved their taxonomic classifications from the InterPro 47 host API. Domains were flagged as potential HGT candidates when their taxonomic distribution showed phylogenetic incongruence—defined as cases where >90% of protein family members belonged to a non-bacterial kingdom despite being found in our bacterial fiber dataset. For each candidate domain: We calculated kingdom-level representation ratios by dividing the number of sequences from each kingdom (Bacteria, Archaea, Eukaryota, Viruses) by the total number of sequences associated with the domain. We performed sequence alignment between our fiber domain instances and representative sequences from the dominant kingdom using Clustal-omega 41 . We constructed maximum-likelihood phylogenetic trees using IQ-TREE 42 with LG+I+G4 model selection to visualize the phylogenetic relationships between bacterial and non-bacterial sequences. Generated tree was presented with a taxonomic dataset using the ITOL website 43 . Structural prediction and domain detection To elucidate the three-dimensional architecture of eCIS tail fibers, we employed AlphaFold2-multimer 17 to predict trimeric structures for 1,098 representative sequences (clustered at 70% similarity from the full dataset of 3,445 proteins). Trimeric modeling reflected the natural oligomeric state of viral fiber proteins and significantly improved prediction quality compared to monomeric predictions and is widely practiced 30 – 32 . For comprehensive domain characterization, we implemented dual structural analysis workflows: whole-fiber clustering using Foldseek (25% sequence similarity threshold) yielded 276 distinct structural groups, while extraction and clustering of individual domains identified 1,177 fold families. For protein domain dissection we designed an algorithm which identifies domains by analyzing secondary structure interactions through a graph-based approach. First, it extracts helices and β-strands using DSSP 48 and represents them as nodes in a graph. Each residue on the secondary structures is assessed for its closest interacting residue (best friend), edges therefore representing spatial proximity of selected interacting residues (8Å threshold between Cα atoms). Connected components in this graph represent a cluster of secondary structure interacting in the manner described and therefore define potential domains, with further refinement through overlap analysis and size filtering. Prediction confidence was assigned using pLDDT scores outputted in AlphaFold models 49 . The tool outputs domain boundaries in tabular format, enabling systematic characterization of predicted structures and facilitating downstream functional analysis. This approach effectively identifies structural domains without relying on sequence homology, making it particularly valuable for novel protein families. To extract (or dissect) protein domains from AlphaFold-predicted structures, we developed a Python-based extraction pipeline utilizing BioPython’s PDB module 50 . The extraction pipeline filters redundant domains with similar boundaries (±4 residue positions), prioritizing regions with higher confidence scores. Each domain is saved as an individual PDB file with preserved residue numbering and chain identifiers, enabling direct mapping to original coordinates. These predicted structures were clustered and compared against the Protein Data Bank 51 using Foldseek’s threading-based algorithm to identify structural homologs and infer potential receptor-binding functions. This approach overcame limitations of sequence-based methods for these rapidly evolving proteins, enabling identification of domains with structural similarity to carbohydrate-binding modules and receptor-binding proteins despite low sequence conservation. Bacterial Strains and Cell Culture Escherichia coli strains DH5α and EPI300 were cultured in Luria-Bertani (LB) broth at 37°C with shaking at 200 rpm. Mammalian cell lines were maintained at 37°C in a humidified 5% CO₂ atmosphere. HeLa and HEK293T cells were cultured in Dulbecco’s Modified Eagle’s Medium (DMEM, Gibco) supplemented with 10% fetal bovine serum (FBS, Gibco), while A549 and THP-1 cells were cultured in RPMI 1640 medium (Gibco) supplemented with 10% FBS. Plasmid Construction Predicted tail fiber fragments were commercially synthesized (Tsingke Biotechnology, China) and cloned into the PVC-expressing plasmid pCNM3 (see supplementary table 9). Site-directed mutations were introduced using homologous recombination. PCR amplification was performed using Tks Gflex™ DNA Polymerase (Takara), and fragments were assembled using T4 DNA Ligase (New England Biolabs). To produce functional PVC complexes, EPI300 cells were co-transformed with three plasmids: modified pCNM3_fibers (structural components), pBR-LysR (regulatory elements), and pBBRN_TcsT (TcsT toxin payload). Control strains were generated with either empty pBBRN (negative control) or wild-type pCNM3 with pBBRN_TcsT (positive control). Plasmids and sequences are listed in Supplementary Tables 2 and 3, respectively. PVC Purification PVC complexes were purified according to previously described methods with modifications. Briefly, transformed EPI300 cells were grown in 200 mL LB broth at 30°C with shaking at 200 rpm for 24 h. Cells were harvested and lysed in 12 mL buffer P (25 mM Tris pH 7.4, 140 mM NaCl, 3 mM KCl, 200 μg/mL lysozyme, 50 μg/mL DNase I, 0.5% Triton X-100, 5 mM MgCl₂, and 1× protease inhibitor) for 45 min at 37°C. After centrifugation at 12,000 rpm for 6 min, the supernatant was ultracentrifuged at 200,000 × g for 75 min at 4°C. The pellet was resuspended in 1 mL ice-cold PBS and centrifuged at 20,000 × g for 10 min at 4°C. The resulting supernatant underwent a second ultracentrifugation at 200,000 × g for 75 min at 4°C. The final pellet was resuspended in 400 μL ice-cold PBS, clarified by centrifugation at 20,000 × g for 10 min at 4°C, and the supernatant containing PVC particles was stored at 4°C. PVC Verification PVC loading was verified by Western blotting. Purified PVC complexes were mixed with 2× Laemmli Sample Buffer (Bio-Rad), heated at 99°C for 10 min, and separated on 12% SDS-PAGE gels (Vazyme) at 150 V for 1 h. Proteins were transferred to PVDF membranes, probed with appropriate antibodies (Supplementary Table 2), and visualized using a Bio-Rad ChemiDoc system. Cell Viability Assays Cell viability was assessed using the Cell Counting Kit-8 (CCK8, MedChemExpress). HeLa, HEK293T, and A549 cells were seeded in 96-well plates at 1×10⁵ cells/mL and incubated overnight. THP-1 cells were seeded at 1×10⁶ cells/mL and differentiated with 0.1 μg/mL phorbol-12-myristate-13-acetate (PMA). Cells were treated with purified PVC complexes (50 μg/mL) for 48 h at 37°C. Culture medium was then replaced with fresh medium containing CCK8 solution and incubated for 1 h at 37°C. Absorbance at 450 nm was measured using a TECAN microplate reader. Electron Microscopy PVC Assembly Verification PVC assembly was confirmed by negative staining electron microscopy. Briefly, 5 μL of purified PVC sample was applied to a glow-discharged holey-carbon-coated copper TEM grid (Quantifoil) for 60 s. After removing excess liquid, the grid was stained twice with 2% uranyl acetate and air-dried at room temperature before imaging with an FEI Tecnai (G2 Spirit TWIN) electron microscope at 80 kV. Cell-Binding Visualization To visualize PVC binding to target cells, cells were seeded in 12-well plates and incubated with PVC complexes (0.5 mg/mL) for 3 h. After gentle scraping, the cells were collected and centrifugation at 1,000 rpm for 5 mins, the supernatant was replaced with 2.5% glutaraldehyde for fixation. The pellet was fixed for 2 h at room temperature and stored at 4°C. Following dehydration, resin infiltration, embedding, and ultrathin sectioning, samples were placed on glow-discharged carbon-coated gold TEM grids, stained with 2% uranyl acetate, and imaged using an FEI Tecnai (G2 Spirit TWIN) electron microscope at 100 kV. Data Availability Raw data: 10.5281/zenodo.15274396 eCIS fibers 3D database and clustered domains: 10.5281/zenodo.15277912 Code Availability https://github.com/Nimrod198/eCIS_tail_fibers Acknowledgements This work is supported by the National Key R&D Program of China (2023YFE0113400). A.L. is generously supported by the Israel Science Foundation (grants 1535/20 and 3062/20), the Volkswagen Foundation (ZN4041), and the Israeli Ministry of Innovation, Science, and Technology (China-Israel collaboration, proposal 005652). F.J. is also supported by the CAMS Innovation Fund for Medical Sciences (2021-I2M-1-037 and 2023-I2M-2-001) and the Non-profit Central Research Institute Fund of Chinese Academy of Medical Sciences (2023-PT310-04). We thank the Core Facilities and Service Centers, and Dr. Jingdong Song, NIPB, CAMS&PUMC for assistance with ultracentrifugation and electron microscopy work. Reference 1. ↵ Yang , G. , Dowling , A. J. , Gerike , U. , ffrench-Constant , R. H . & Waterfield , N. R . Photorhabdus virulence cassettes confer injectable insecticidal activity against the wax moth . J. Bacteriol . 188 , 2254 – 2261 ( 2006 ). OpenUrl Abstract / FREE Full Text 2. ↵ Shikuma , N. J. et al. Marine tubeworm metamorphosis induced by arrays of bacterial phage tail-like structures . Science 343 , 529 – 533 ( 2014 ). OpenUrl Abstract / FREE Full Text 3. ↵ Hurst , M. R. H. , Glare , T. R. & Jackson , T. A . Cloning Serratia entomophila antifeeding genes--a putative defective prophage active against the grass grub Costelytra zealandica . J. Bacteriol . 186 , 5116 – 5128 ( 2004 ). OpenUrl Abstract / FREE Full Text 4. ↵ Geller , A. M. et al. The extracellular contractile injection system is enriched in environmental microbes and associates with numerous toxins . Nat. Commun . 12 , 3743 ( 2021 ). OpenUrl CrossRef PubMed 5. ↵ Xu , J. et al. Identification and structure of an extracellular contractile injection system from the marine bacterium Algoriphagus machipongonensis . Nat Microbiol 7 , 397 – 410 ( 2022 ). OpenUrl CrossRef PubMed 6. ↵ Kreitz , J. et al. Programmable protein delivery with a bacterial contractile injection system . Nature 616 , 357 – 364 ( 2023 ). OpenUrl CrossRef PubMed 7. ↵ Jiang , F. et al. N-terminal signal peptides facilitate the engineering of PVC complex as a potent protein delivery system . Sci Adv 8 , eabm2343 ( 2022 ). 8. ↵ Hurst , M. R. H. , Beard , S. S. , Jackson , T. A. & Jones , S. M . Isolation and characterization of the Serratia entomophila antifeeding prophage . FEMS Microbiol. Lett . 270 , 42 – 48 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 9. ↵ Vlisidou , I. et al. The virulence cassettes deliver protein effectors directly into target eukaryotic cells . Elife 8 , ( 2019 ). 10. ↵ Ericson , C. F. et al. A contractile injection system stimulates tubeworm metamorphosis by translocating a proteinaceous effector . Elife 8 , ( 2019 ). 11. ↵ Casu , B. , Sallmen , J. W. , Schlimpert , S. & Pilhofer , M . Cytoplasmic contractile injection systems mediate cell death in Streptomyces . Nat Microbiol 8 , 711 – 726 ( 2023 ). OpenUrl CrossRef PubMed 12. ↵ Vladimirov , M. , Zhang , R. X. , Mak , S. , Nodwell , J. R. & Davidson , A. R . A contractile injection system is required for developmentally regulated cell death in Streptomyces coelicolor . Nat. Commun . 14 , 1469 ( 2023 ). OpenUrl CrossRef PubMed 13. ↵ Weiss , G. L. et al. Structure of a thylakoid-anchored contractile injection system in multicellular cyanobacteria . Nat Microbiol 7 , 386 – 396 ( 2022 ). OpenUrl CrossRef PubMed 14. ↵ Genome-wide Identification and Characterization of a Superfamily of Bacterial Extracellular Contractile Injection Systems . Cell Rep . 29 , 511 – 521.e2 ( 2019 ). OpenUrl CrossRef PubMed 15. ↵ Roux , S. et al. Ecology and molecular targets of hypermutation in the global microbiome . Nat Commun 12 , 3076 ( 2021 ). OpenUrl CrossRef PubMed 16. ↵ Baltrus , D. A. , Clark , M. , Smith , C. & Hockett , K. L . Localized recombination drives diversification of killing spectra for phage-derived syringacins . ISME J 13 , 237 – 249 ( 2019 ). OpenUrl CrossRef PubMed 17. ↵ Evans , R ., et al. Protein complex prediction with AlphaFold-Multimer . bioRxiv 2021.10.04.463034 ( 2022 ) doi: 10.1101/2021.10.04.463034 . OpenUrl Abstract / FREE Full Text 18. ↵ Choi , S. et al. Improved prediction of protein-protein interactions by a modified strategy using three conventional docking software in combination . Int J Biol Macromol 252 , 126526 ( 2023 ). 19. ↵ Marín-Arraiza , L. et al. Structural characterization of an extracellular contractile injection system from Photorhabdus luminescens in extended and contracted states . bioRxiv 2025.04.20.649488 ( 2025 ) doi: 10.1101/2025.04.20.649488 . OpenUrl Abstract / FREE Full Text 20. ↵ Basic local alignment search tool. Journal of Molecular Biology 215 , 403 – 410 ( 1990 ). OpenUrl CrossRef PubMed Web of Science 21. ↵ Steinegger , M. & Söding , J . MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets . Nature Biotechnology 35 , 1026 – 1028 ( 2017 ). OpenUrl CrossRef PubMed 22. ↵ Johnson , L. S. , Eddy , S. R. & Portugaly , E . Hidden Markov model speed heuristic and iterative HMM search procedure . BMC Bioinformatics 11 , 431 ( 2010 ). 23. ↵ Farenc , C. et al. Molecular insights on the recognition of a Lactococcus lactis cell wall pellicle by the phage 1358 receptor binding protein . J Virol 88 , 7005 – 7015 ( 2014 ). OpenUrl Abstract / FREE Full Text 24. ↵ Cryo-EM Structure and Assembly of an Extracellular Contractile Injection System . Cell 177 , 370 – 383.e15 ( 2019 ). OpenUrl CrossRef PubMed 25. ↵ Vladimirov , M . Extracellular Contractile Injection Systems (eCIS) - Characterization of Conserved Phage Tail-like Structures in Streptomyces . ( 2021 ). 26. ↵ Ge , P. et al. Action of a minimal contractile bactericidal nanomachine . Nature 580 , 658 – 662 ( 2020 ). OpenUrl CrossRef PubMed 27. ↵ Potter , S. C. et al. HMMER web server: 2018 update . Nucleic Acids Res 46 , W200 – W204 ( 2018 ). OpenUrl CrossRef PubMed 28. ↵ Son , M. , Diamond , B. & Santiago-Schwarz , F . Fundamental role of C1q in autoimmunity and inflammation . Immunol Res 63 , 101 – 106 ( 2015 ). OpenUrl CrossRef PubMed 29. ↵ Guo , S. et al. Role of Ca 2+ in folding the tandem β-sandwich extender domains of a bacterial ice-binding adhesin . FEBS J 280 , 5919 – 5932 ( 2013 ). OpenUrl CrossRef PubMed 30. ↵ Gonzalez-Serrano , R. et al. Distantly related Alteromonas bacteriophages share tail fibers exhibiting properties of transient chaperone caps . Nature Communications 14 , 1 – 14 ( 2023 ). OpenUrl CrossRef PubMed 31. Goulet , A. & Cambillau , C . Structure and Topology Prediction of Phage Adhesion Devices Using AlphaFold2: The Case of Two Phages . Microorganisms 9 , ( 2021 ). 32. ↵ Cambillau , C. & Goulet , A . Exploring Host-Binding Machineries of Mycobacteriophages with AlphaFold2 . J Virol 97 , e0179322 ( 2023 ). OpenUrl CrossRef PubMed 33. ↵ Fraser , D. A. , Laust , A. K. , Nelson , E. L. & Tenner , A. J . C1q differentially modulates phagocytosis and cytokine responses during ingestion of apoptotic cells by human monocytes, macrophages, and dendritic cells . J Immunol 183 , 6175 – 6185 ( 2009 ). OpenUrl Abstract / FREE Full Text 34. ↵ Wu , N. C. & Wilson , I. A . Influenza Hemagglutinin Structures and Antibody Recognition . Cold Spring Harb Perspect Med 10 , ( 2020 ). 35. ↵ Hashiguchi , T. et al. Crystal structure of measles virus hemagglutinin provides insight into effective vaccines . Proceedings of the National Academy of Sciences 104 , 19535 – 19540 ( 2007 ). OpenUrl Abstract / FREE Full Text 36. ↵ Hurst , M. R. H. , Glare , T. R. & Jackson , T. A . Cloning Serratia entomophila Antifeeding Genes—a Putative Defective Prophage Active against the Grass Grub Costelytra zealandica . Journal of Bacteriology ( 2004 ) doi: 10.1128/jb.186.15.5116-5128.2004 . OpenUrl CrossRef 37. ↵ Fu , L. , Niu , B. , Zhu , Z. , Wu , S. & Li , W . CD-HIT: accelerated for clustering the next-generation sequencing data . Bioinformatics 28 , 3150 ( 2012 ). OpenUrl CrossRef PubMed Web of Science 38. ↵ Eddy , S. R . Accelerated Profile HMM Searches . PLoS Comput. Biol . 7 , e1002195 ( 2011 ). OpenUrl CrossRef PubMed 39. ↵ Finn , R. D. et al. Pfam: the protein families database . Nucleic Acids Res . 42 , D222 – 30 ( 2014 ). OpenUrl CrossRef PubMed Web of Science 40. ↵ Waman , V. P. et al. CATH v4.4: major expansion of CATH by experimental and predicted structural data . Nucleic Acids Research 53 , D348 ( 2024 ). 41. ↵ Sievers , F. et al. Fast, scalable generation of high-quality protein multiple sequence alignments using Clustal Omega . Molecular systems biology 7 , ( 2011 ). 42. ↵ Nguyen , L.-T. , Schmidt , H. A. , von Haeseler , A. & Minh , B. Q . IQ-TREE: a fast and effective stochastic algorithm for estimating maximum-likelihood phylogenies . Mol Biol Evol 32 , 268 – 274 ( 2015 ). OpenUrl CrossRef PubMed 43. ↵ Letunic , I. & Bork , P . Interactive Tree Of Life (iTOL) v4: recent updates and new developments . Nucleic Acids Res . 47 , W256 – W259 ( 2019 ). OpenUrl CrossRef PubMed 44. ↵ Finn , R. D. , Clements , J. & Eddy , S. R . HMMER web server: interactive sequence similarity searching . Nucleic Acids Research 39 , W29 ( 2011 ). 45. ↵ Reference — NetworkX 3.4.2 documentation . https://networkx.org/documentation/stable/reference/index.html . 46. ↵ An algorithm for drawing general undirected graphs. Information Processing Letters 31 , 7 – 15 ( 1989 ). OpenUrl CrossRef Web of Science 47. ↵ Blum , M. et al. InterPro: the protein sequence classification resource in 2025 . Nucleic Acids Res 53 , D444 – D456 ( 2025 ). OpenUrl CrossRef PubMed 48. ↵ Gorelov , S. , Titov , A. , Tolicheva , O. , Konevega , A. & Shvetsov , A . DSSP in GROMACS: Tool for Defining Secondary Structures of Proteins in Trajectories . J Chem Inf Model 64 , 3593 – 3598 ( 2024 ). OpenUrl CrossRef PubMed 49. ↵ Jumper , J. et al. Highly accurate protein structure prediction with AlphaFold . Nature 596 , 583 – 589 ( 2021 ). OpenUrl CrossRef PubMed 50. ↵ Cock , P. J. A. et al. Biopython: freely available Python tools for computational molecular biology and bioinformatics . Bioinformatics 25 , 1422 – 1423 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 51. ↵ Website . doi: 10.1093/nar/28.1.235 doi:10.1093/nar/28.1.235. OpenUrl CrossRef PubMed Web of Science 49. Burley , S.K. et al. Updated resources for exploring experimentally-determined PDB structures and Computed Structure Models at the RCSB Protein Data Bank . Nucleic Acids Research , 53 : D564 – D574 ( 2025 ) OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted May 13, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following How do bacterial extracellular Contractile Injection Systems bind target cells? A remarkable diversity of receptor binding domains Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share How do bacterial extracellular Contractile Injection Systems bind target cells? A remarkable diversity of receptor binding domains Nimrod Nachmias , Zhiren Wang , Xiao Feng , Feng Jiang , Asaf Levy bioRxiv 2025.05.13.653841; doi: https://doi.org/10.1101/2025.05.13.653841 Share This Article: Copy Citation Tools How do bacterial extracellular Contractile Injection Systems bind target cells? A remarkable diversity of receptor binding domains Nimrod Nachmias , Zhiren Wang , Xiao Feng , Feng Jiang , Asaf Levy bioRxiv 2025.05.13.653841; doi: https://doi.org/10.1101/2025.05.13.653841 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Microbiology Subject Areas All Articles Animal Behavior and Cognition (7640) Biochemistry (17706) Bioengineering (13902) Bioinformatics (41978) Biophysics (21465) Cancer Biology (18611) Cell Biology (25528) Clinical Trials (138) Developmental Biology (13387) Ecology (19920) Epidemiology (2067) Evolutionary Biology (24332) Genetics (15615) Genomics (22519) Immunology (17747) Microbiology (40424) Molecular Biology (17194) Neuroscience (88662) Paleontology (667) Pathology (2839) Pharmacology and Toxicology (4827) Physiology (7650) Plant Biology (15160) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9826) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.