Unlocking the biosynthetic potential of Paenibacilli through a genus-wide exploration of gene clusters for secondary metabolite production

doi:10.1101/2025.01.22.634348

Unlocking the biosynthetic potential of Paenibacilli through a genus-wide exploration of gene clusters for secondary metabolite production

2025 · doi:10.1101/2025.01.22.634348

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 53,801 characters · extracted from preprint-html · click to expand

Unlocking the biosynthetic potential of Paenibacilli through a genus-wide exploration of gene clusters for secondary metabolite production | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Unlocking the biosynthetic potential of Paenibacilli through a genus-wide exploration of gene clusters for secondary metabolite production Lijie Song , View ORCID Profile Matin Nuhamunada , View ORCID Profile Tilmann Weber , View ORCID Profile Ákos T. Kovács doi: https://doi.org/10.1101/2025.01.22.634348 Lijie Song 1 DTU Bioengineering, Technical University of Denmark , 2800 Kgs Lyngby, Denmark Find this author on Google Scholar Find this author on PubMed Search for this author on this site Matin Nuhamunada 2 The Novo Nordisk Foundation Center for Biosustainability, Technical University of Denmark , 2800 Kgs Lyngby, Denmark 4 Faculty of Biology, Universitas Gadjah Mada , 55281 Yogyakarta, Indonesia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Matin Nuhamunada Tilmann Weber 2 The Novo Nordisk Foundation Center for Biosustainability, Technical University of Denmark , 2800 Kgs Lyngby, Denmark Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tilmann Weber For correspondence: a.t.kovacs{at}biology.leidenuniv.nl tiwe{at}biosustain.dtu.dk Ákos T. Kovács 1 DTU Bioengineering, Technical University of Denmark , 2800 Kgs Lyngby, Denmark 3 Institute of Biology, Leiden University , 2333BE Leiden, Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ákos T. Kovács For correspondence: a.t.kovacs{at}biology.leidenuniv.nl tiwe{at}biosustain.dtu.dk Abstract Full Text Info/History Metrics Supplementary material Preview PDF ABSTRACT The genus Paenibacillus is a prolific producer of secondary metabolites with diverse ecological and industrial applications. However, a comprehensive overview of the biosynthetic gene cluster (BGC) diversity and distribution throughout the genus has been limited. Here, we performed large-scale genome mining on 284 high-quality genomes and generated a non-redundant dataset of 126 representative genomes to explore the biosynthetic potential of this genus. A total of 3,273 BGCs were identified from the 284 genomes that clustered into 1,013 gene cluster families (GCFs), with 98.7% classified as unknown, indicating vast potential for novel secondary metabolite discovery in the Paenibacillus genus. Comparative analysis revealed significant phylogenetic and clade-specific distribution patterns of GCFs, with certain clades enriched in unique biosynthetic pathways while others exhibited low similarity to known BGCs, suggesting evolutionary adaptation to diverse ecological niches. This study uncovers the rich and largely untapped biosynthetic potential of the genus Paenibacillus , providing a foundation for future exploration of its natural products and their applications in biotechnology and medicine. IMPORTANCE Bacterial secondary metabolites have been instrumental in the development of antibiotics, antifungals, and other bioactive compounds. The genus Paenibacillus is an underexplored source of such metabolites, with significant potential for novel discoveries. By integrating genome mining and phylogenetic analysis, this study systematically characterizes the diversity, distribution, and novelty of biosynthetic gene cluster across the genus. The identification of clade-specific biosynthetic patterns and numerous unknown gene cluster families highlights Paenibacillus as a promising target for uncovering novel compounds with ecological and therapeutic relevance. These findings not only expand our understanding of bacterial secondary metabolite biosynthesis but also offer new opportunities for the development of sustainable biotechnological applications. INTRODUCTION Secondary metabolites (SMs) are classically defined as groups of organic molecules that are not directly involved in the growth, development, or reproduction of producer organism, but that may confer ecological or physiological advantages unique to their producers ( 1 ). SMs have a wide range of applications in medicine, agriculture, and industry ( 2 , 3 ). Bacteria are prominent SM producers offering a valuable source of bioactive compounds with potential therapeutic properties. These include antibiotics, antifungals, anticancer agents, and immunosuppressants ( 4 ). Since the discovery of streptomycin, chlortetracycline, nystatin, and numerous other antimicrobial compounds derived from Streptomyces in the 1940s to the present day, bacteria have contributed an enormous number of secondary metabolite compounds to the medical and pharmaceutical fields, as well as to scientific research ( 3 ). The synthesis and production of secondary metabolites (SMs) in bacteria is frequently regulated by various genetic and environmental factors. In the case of microbes, the enzymes required for the formation of secondary metabolite compounds are typically encoded by a set of co-localized biosynthetic gene clusters (BGCs). These gene clusters contain all the genes necessary for the synthesis of specific secondary metabolites, including structural, regulatory, and transport-related genes ( 5 ). To better understand and utilize these secondary metabolites, scientists employ various bioinformatic tools and methods to identify and analysis these BGCs. One of the most widely employed bioinformatics tools in this field is antiSMASH, which has been specifically designed for the prediction and analysis of BGCs in bacterial and fungal genomes ( 6 ). BGCFlow is a powerful workflow that integrates antiSMASH with additional tools for data visualization, clustering, and functional annotation, rendering it particularly effective for large-scale comparative studies ( 7 ). The utilization of these tools and connected databases (e.g. MIBiG, antiSMASH-DB, BIG-Fam) ( 8 – 10 ), particularly in conjunction with the advancement of genome sequencing studies and the availability of large-scale datasets, has enabled scientists to efficiently detect BGCs and infer SM products from a diverse range of bacterial species ( 11 – 14 ). A lot of bacteria have been identified that possess a high density of BGCs and secondary metabolites. Among these, the Actinomycetes and Bacilli have attracted particular attention due to the abundance of their secondary metabolites, especially of antibiotics and plant-promoting active compounds ( 15 – 21 ). Paenibacillus , originally classified within the Bacillus genus but later reclassified as a separate genus in 1993 ( 22 ), is known to produce a wide range of biologically active secondary metabolites. According to the LPSN database ( https://lpsn.dsmz.de ), the total number of validly published names (including synonyms) for the Paenibacillus genus is 320. The majority of Paenibacillus are found in soil and frequently associated with plants, where these rhizosphere bacteria have been demonstrated to promote plant growth through a range of metabolic capabilities, including biological nitrogen fixation, phosphate dissolution, and the production of plant hormones ( 23 – 25 ). Another aspect of Paenibacillus that has become more notable in recent years is its ability to produce antimicrobial compounds. This has led to the genus becoming a focus of research connected to its potential as a biocontrol agent and natural antibiotic producer. For example, polymyxin, a known antibiotic produced by Paenibacillus polymyxa , is used to treat infections caused by Gram-negative bacteria. Other antimicrobial compounds produced by Paenibacillus spp include paenibacillin, polyxin, and lantibiotics, among others ( 26 , 27 ). P. polymyxa E681, the representative and most studied strain of the species has been extensively analyzed for its BGCs, which are responsible for the production of at least six antibiotics, including polymyxin, fusaricidin, tridecaptin, paenilipoheptin, paenilan, and bacillaene-like antimicrobials ( 28 ), Pranav et al. identified 104 BGCs from the genome sequences of five Paenibacillus alvei strains, and BGCs encoding paenibactin, fusaricidin, and paenibacterin were found to be present in all five strains ( 29 ). Lebedeva et al. investigated the biosynthetic potential of two Paenibacillus sp . strains isolated from caves and revealed 21 and 19 BGCs in those strains ( 30 ). Kim et al . analyzed 89 genomes of Paenibacillus using antiSMASH, and the result revealed a total of 848 BGCs, with a significant majority (716 or 84.4%) classified as unknown, indicating a vast unexplored potential for new antibiotics within these species ( 31 ). Despite extensive research on the biosynthetic gene clusters (BGCs) and bioactive compounds of various Paenibacillus species, an up-to-date overview of the BGC composition and distribution across this genus remains elusive. Here, we report the analysis of 284 genomes of the Paenibacillus genus and demonstrate conserved and phylogenetically distributed BGCs that further support the potential of this genus for discovery of novel bioactive metabolites. RESULTS AND DISCUSSION Genome mining reveals the biosynthetic potential of the Paenibacillus genus In this study, to establish a systematic insight on the BGC composition of the Paenibacillus genus, we obtained all available accession IDs for Paenibacillus genomes from the NCBI database as of 10 October 2024, and there were 1,969 available genome sequences in total. Since the accuracy and completeness of BGC identification are dependent on genome quality ( 12 ), the genomes were filtered according to their assembly level, completeness, and contamination, following the strategy described in the method section. This process yielded 284 genomes that were annotated by GenBank and deemed to be of high quality, and which were subsequently included in the complete dataset for this study (see Table S1). The genome size of the 284 genomes ranged from 3.97 M to 9.08 M, with 89.08% of them falling within the 5-8 M range. The GC % varied from 40% to 63.5%, with 96.48% of them falling within the 40-55% range (see Fig. S1 and Table S1). The GTDB-Tk (version 2.4.0) was employed for taxonomic assignment at the species level using GTDB release 09-RS220 ( 32 , 33 ), resulting in a total of 266 genomes assigned to one of the 122 species identified. Of these, 89 were represented by a single genome, while some species have numerous genomes, including P. polymyxa_B (27 genomes), P. odorifer (22 genomes), and P. polymyxa (19 genomes). Additionally, 18 genomes lacked species assignments and were treated as unclassified species (see Table S1). Utilizing antiSMASH v7.1.0 ( 6 ), a total of 3,273 BGCs were predicted from the 284 genomes, with an average of 11.25 BGCs per genome, ranged from 2 to 22 (see Fig. S2 and Table S2). The number of BGCs identified per genome differed significantly between species. For instance, the 9 Paenibacillus_H larvae genomes in this dataset had an average of 17.33 BGCs per genome, whereas the 22 Paenibacillus odorifer genomes had an average of only 5.73. In a previous study that was based on 89 genomes of Paenibacillus available in February 2020, Kim et al . obtained an average of 9.5 BGCs per strain ( 31 ). The higher number of BGCs observed in our dataset may be attributed to the higher percentage of Paenibacillus polymyxa and related species, which are BGC-rich species. No significant correlation was found between genome size and the number of BGCs per genome in this Paenibacillus dataset (see Fig. S3). This finding differs from those of other researchers in the field of actinomycetes, who observed a positive trend between the number of BGCs and genome size ( 34 ). It is noteworthy that only 16 of the 3,273 BGCs are situated at contig edges, indicating that 99% (3,257/3,273) of the BGCs are complete, and this observation supports the high quality of the dataset for this study ( 35 ). High diversity and novelty of BGCs of Paenibacillus species The 3,273 BGCs identified in this dataset encompass the majority of all BGC types ( Fig. 1a and Table S2). In summary, Non-ribosomal peptide synthetases (NRPS) and PKS-NRPS hybrids make up the majority of all BGCs with 435 and 413 respectively, representing 13.29% and 12.62% of the total BGCs. This finding is consistent with the results of previous studies on Paenibacillus . The third and fourth most abundant BGC types are cyclic lactone inducers and proteusins, which both belong to ribosomally synthesized and post-translationally modified peptides. There is a paucity of comprehensive data regarding the proportion of these BGC types in other published papers, likely due to the recent incorporation of annotations for these BGC types in the more recent version of antiSMASH (V.7.1.0). All genomes contain a variety of BGCs belonging to different types. The most widely distributed BGC is proteusins, which was identified in 234 out of the 284 genomes (82.39%). Other types of BGCs that are widely distributed across all 284 genomes (e.g. more than 50% of the genomes) include cyclic-lactone-autoinducer (190 genomes, 66.90%), lassopeptide (183 genomes, 64.44%), terpene (180 genomes, 63.38%), PKS-NRPS hybrids (175 genomes, 61.62%), and NRPSs (158 genomes, 55.63%) ( Fig. 1b ). Download figure Open in new tab Fig. 1 Biosynthetic gene clusters (BGCs) in the analyzed 284 genomes of the Paenibacillus genus. (a) Abundance of different BGC types. The x axis shows different BGC types, and the y axis shows the total number of each type of BGCs from the 284 genomes. The pie chart shows the proportion of different types of BGCs. (b) The distribution of each BGC type across all genomes was shown by the heatmap. Each row represents a BGC type, and each column represents a genome. The bar son the left represent the total numbers of each BGC type, and the bars on top represent the total numbers of BGCs identified in each genome. (c) The pie chart shows the number and percentage of known (similarity>80%) and unknown (similarity≤80%) BGCs according to antiSMASH KnownClusterBlast. The color-coded bar highlights the number and percentage of different degrees of similarity from unknown BGCs: gray indicates no similarity, purple indicates low similarity (less than 40%), and blue indicates medium similarity (between 40 and 80%). To evaluate the potential of the detected BGCs to encode known secondary metabolites, we employed the “KnownClusterBlast” program within the antiSMASH analysis to estimate the similarity of the BGCs in comparison to the known BGCs of the MIBiG database version 3 (Minimum Information about a Biosynthetic Gene cluster) ( 36 ). From the 3,273 BGC, 552 (16.87%) gene clusters were classified as known with high similarity (>80%) to MIBIG entries, and 2,721 (83.13%) were unknown. This observation is consistent with what Kim et al . found in their study for 89 Paenibacillus genomes, in which 716 (84.4%) of the 848 BGCs they identified were classified as unknown, and the percentage is approximately 85-90% for the Bacillus genus ( 21 , 37 ), whereas the percentage of unknown BGCs reported in the Streptomyces genus is 56.4% ( 38 ). These observations demonstrate the benefit of performing large genome-wide BGC studies in the Paenibacillus genus, where there is more as-yet untapped potential for encoding and synthesizing novel compounds. For the 2,721unknown BGCs, KnownClusterBlast hits 205 BGCs with medium similarity (40-80%), and 770 BGCs with low similarity (<40%) to MIBiG, and the other 1,746 BGCs do not have any hits to the MIBiG entries ( Fig. 1c and Table S2). In order to assess the diverse biosynthetic potential across the Paenibacillus genus, BGCs were clustered into gene cluster families (GCFs) by utilizing BiG-SCAPE (Biosynthetic Gene Similarity Clustering and Prospecting Engine) with 0.3 cutoff. BiG-SCAPE is designed to identify BGCs that are likely to produce similar or related secondary metabolites and to group BGCs into GCFs based on their sequence similarity and shared domain architecture ( 39 ). In this process, BiG-SCAPE also incorporates the MIBiG database, which provides information if a GCF is related to known BGCs. In total, 1,013 GCFs were clustered from the 3,273 BGCs predicted from 284 genomes. There are 697 GCFs consisting of only 1 BGC (singleton), this means that no other BGCs in the dataset or from the MIBiG database share a high enough level of similarity with this particular BGC to be grouped with it in the same family, suggesting they may produce distinct secondary metabolites or are from underexplored biosynthetic pathways. Additionally, the analysis revealed 266 GCFs with 2-10 BGCs, 29 GCFs with 11-30 BGCs, and a further 21 GCFs consisting of more than 30 BGCs (Fig. S3 and Table S3). With the aid of BIG-SCAPE, we could perform a similar comparison on GCFs with known BGCs in the MIBiG database and classify the GCFs as either known or unknown as well as return the hit BGC ID of MIBiG. In the 316 GCFs clustered from >1 BGC, only 10 were classified as known GCFs as they matched with at least one known BGC in the MIBiG database. The remaining 306 were classified as unknown, which are attributed to unknown or novel compounds. The BGC types of the 10 known GCFs are NRPS (3 GCFs), Lanthipeptide (3GCFs), PKS-NRPS hybrids (2 GCFs), Lassopeptide (1 GCF), and Opine-like-metallophore (1 GCF) (Fig. S3). To inspect the distribution and enrichment of diverse GCFs across all genomes, we mapped GCFs that contain more than 30 BGCs to the phylogenetic tree of the 284 Paenibacillus spp ( Fig. 2 ). The majority of these GCFs demonstrate clade-specific distribution patterns within the phylogenetic tree, except for GCF1, GCF4, and GCF7, which form separate clusters at disparate branches of the tree. Notably, no GCFs were widely distributed across all the genomes, which suggests the lack of uniformly conserved BGCs in the Paenibacillus genus. Unexpectedly, we also observed the absence of GCF in numerous clades across the phylogenetic tree, which may be attributed to the low degree of similarity among the BGCs within these clades. The clade-limited spread and absence of shared GCFs in certain clades reflect a broader diversity of BGCs across the whole genus and indicate the potential for the discovery of novel compounds, especially in the less characterized clades. Noteworthy, the frequently detected GCFs, which are only present in few genomes with very close phylogenetic branch distance, are most likely due to the high number of publicly available genomes of repetitively isolated species. Download figure Open in new tab Fig. 2 Phylogenetic tree and frequent GCF (with >30 BGCs detected) distribution of 284 Paenibacillus species Genome dereplication reveals new patterns of abundance and distribution for BGC type within the Paenibacillus genus To explore the true diversity and distribution of BGCs across different species within the Paenibacillus genus, we streamlined the dataset and minimized redundancy of the 284 genomes based on Mash distance, with a threshold of 0.05 ( 40 ). This strategy resulted in 126 representative genomes mostly representing one single species (Fig. S4 and Table S4). The detailed methodology is explained in the Methods section. BGC mining, annotation, and statistical analysis were performed on the focused genome dataset (126 genomes) using the above-described methodologies. In total, 1,236 BGCs were identified in the non-redundant dataset, with the average of 9.81 BGCs per genome (see Table S5 and Fig. S5). This corresponds to previous studies on Paenibacillus , indicating that redundancy was adequately reduced in our focused dataset. We compared the abundance and distribution of different types of BGCs in two datasets (the original 284 genomes and the 126 representative genomes after dereplication). In both datasets, the distribution and abundance patterns of major BGC types, such as NRPS, PKS-NRPS hybrids, cyclic-lactone autoinducer, proteusins, and the most abundant terpenes are all similar. This suggests that species of the Paenibacillus genus generally have the potential to synthesize these types of products. However, NRPS and PKS-NRPS hybrids were no longer the dominant two types of BGCs, but rather, cyclic-lactone-autoinducer, proteusins, and terpenes display comparable abundance ( Fig. 3a ). Matrix of BGC types and number against the phylogenetic tree of 126 genomes reveals that BGCs synthesizing terpenes and proteusins were the most widely distributed and were found in 93 genomes (73.81%). Other BGC types were identified in more than half of the genomes were cyclic-lactone-autoinducer (68.25%) and lassopeptide (59.25%) ( Fig. 3b and Table 1 ). Although the total number of NRPS and PKS-NRPS hybrids types of BGCs was still high, they were identified in only 41.27% (52/126) and 46.83% (59/126) of the genomes, respectively, suggesting that they are enriched in some specific species. In the previous studies, NRPS was considered to represent the dominant BGC type ( 29 , 30 ), but our extended study of all high-quality genomes within the Paenibacillus genus (after de-replication) reveals that this is not the case for many specific species or clades, such as species of Paenibacillus_Z clade ( Fig. 3b ). Download figure Open in new tab Fig. 3 Abundances and distribution of BGCs in dataset of the 126 representative genomes. (a) Abundance of different BGC types in the two datasets. (b) Distribution of BGCs and BGC types across 126 representative isolates. View this table: View inline View popup Download powerpoint Table 1 The 1,236 BGCs from 126 representative genomes. The focused non-redundant genome dataset displays a greater diversity and novelty of secondary metabolite synthesis potential The non-redundant representative genome dataset eliminates the interference caused by highly similar and redundant BGCs that are present in the same or very closely related species and therefore expected to provide a more unbiased information on BGC diversity. To better explore the synthetic capabilities of these BGCs, we use BIG-SCAPE to cluster BGCs into GCFs based on their similarity. From the 1,236 BGCs predicted from the 126 representative genomes, a total of 831 GCFs were identified, including 695 singletons, which accounted for 83.63% of all BGCs. Furthermore, 541 BGCs were clustered into 136 families, which included only 22 GCFs with more than 5 BGCs, highlighting that this dataset is non-redundant and the potential of BGCs is varied because similar BGCs will be clustered into GCF and are expected to synthesize the same or similar products. (Table S6 and Fig. 4 ). Download figure Open in new tab Fig. 4 Depiction of the 831 GCFs clustered from the 1,236 BGCs predicted from 126 Paenibacillus genomes. (a) The color-coded pie chart illustrates the 831 GCFs with different size: green represents 695 singletons, purple represents 72 GCFs with 2 BGCs, blue represents 42 GCFs with 3-5 BGCs, orange represents 12 GCFs with 6-10 BGCs, and red represents 10 GCFs with more than 10 BGCs. Each small compartment represents a GCF, with the size representing the number of BGCs. (b) IDs and types of the 22 GCFs, which are each constituted by more than 5 BGCs. Moreover, BIG-SCAPE incorporates known BGCs from the MIBiG database, thereby facilitating the identification of known and novel GCFs that encode previously uncharacterized products. Of the 831 GCFs, 11 (1.3%) were classified as known due to the presence of similar BGCs in the MIBiG database, including 5 singleton BGCs and 6 GCFs clustered from 4-11 BGCs (see Table 2 ). View this table: View inline View popup Download powerpoint Table 2 The 11 known GCFs of 126 representative genomes. The top three “known” GCFs (CGF1, GCF2, and GCF 6) containing the highest number of BGCs are all of NRPS type, consisting of 11, 8, and 5 BGCs, respectively, and encode BGCs of the polymyxin, tridecaptin, and fusaricidin B-family, all of which are well-studied antibiotics produced by Paenibacillus spp ( 41 – 43 ). GCF 11, GCF 31, and GCF 34 all code for lanthipeptides, one of the most well-studied families of ribosomally synthesized and post-translationally modified peptides (RiPPs), and they are predicated to encode antimicrobial peptide produced by Paenibacillus spp, which are paenilan, paenicidin B, and paenibacillin ( 42 , 44 , 45 ). In summary, 9 of the 11 GCFs are related to synthesis of 9 known compounds in Paenibacillus , as recorded in the MIBiG database. Notably, although the known cluster most similar to GCF 204 was originally reported in Bacillus circulans SANK 72073, a recent study has suggested that this species should be reclassified to the Paenibacillus genus ( 46 ). The remaining 820 unknown GCFs, which account for 98.7% of all GCFs, are of particular interest as they are likely to encode the biosynthesis apparatus of unknown secondary metabolites, and this promises exciting future discoveries. This includes 690 singletons, 20 relatively large families clustered by more than 5 BGCs, and other 110 GCFs. Phylogenetic distribution of GCFs across 126 representative genomes To explore the biosynthetic potential of species across the Paenibacillus genus, we examined the phylogenetic distribution of large GCFs (comprising >5 BGCs) in 126 representative genomes ( Fig. 5 ). Download figure Open in new tab Fig. 5 Phylogenetic distribution of large GCF (comprising >5 BGCs) in 126 representative genomes. The black bars represent the total number of BGCs in each genome. Each other small square represents a BGC, while squares of the same color represent a gene cluster family (GCFs). Most GCFs demonstrated striking enrichment patterns that align with branching of the phylogenetic tree and correlated with the taxonomy depicted in the Genome Taxonomy Database (GTDB) release 09-RS220 (24th April 2024) ( 33 ), suggesting that these BGCs and their products are phylogenetically conserved and may reflect evolutionary conserved adaptations. The distinctively scattered GCFs and particular BGC products within phylogenetic clades and subclades indicates that these clades have developed unique biosynthetic capabilities during evolution to adapt to the ecological pressures and diversity of environmental ecological niches or these could be potentially horizontally transferred. GCF4 is a representative example, it clusters 25 BGCs of 25 genomes that is localized to a separate clade in the phylogenetic tree. GCF4 is found in all strains of this clade but absent in any other species. Despite the lack of comprehensive study and characterization of these proteusin-BGCs ( 47 ), the clade-specific distribution pattern provides a framework for further targeted exploration for its ecological role in these Paenibacillus species. Such clade specific GCF has been previously reported in the Bacillus ( 20 ) and Streptomyces ( 38 ) genera, and in our study in the Paenibacillus genus. In addition, and more surprisingly, the complete absence of any conserved GCFs in some clades and subclades suggests that these species may have very distinct and diversified BGCs that have such low similarity to each other that they cannot be clustered into any GCFs. The lack of conserved GCFs again promises opportunities for discovering new or rare potential biosynthetic pathways in these taxa. Some of these clades that lack conserved GCFs are marked with a grey background in Figure 5 , including species from Paenibacillus_G , Paenibacillus_D , Paenibacillus_Z , Paenibacillus_O , and some unclassified species. It is possible that they represent an important source of novel biologically active compounds with potential applications in biotechnology or pharmaceuticals, given the distinctive diversity and novelty of their biosynthetic gene clusters. However, it is also important to consider that the observed absence of conserved GCFs in these clades could be influenced by biases arising from the limited representation of genomes for certain species in the dataset. Inadequate sampling of these clades could lead to an underestimation of shared GCFs, as a larger genomic dataset might reveal conserved GCFs that are currently undetected. CONCLUSIONS We performed a comprehensive gene mining for secondary metabolite biosynthetic gene clusters based on the full extent of available high-quality genomic data (as of October 2024) within the Paenibacillus genus. This has significantly expanded our understanding of the synthetic capacity of the species within this genus, and has led to the discovery of a highly diverse biosynthetic capacity for natural products. A large number of uncharacterized BGCs were identified and the majority of GCFs identified through BGC similarity networks could not be correlated with known compounds, which highlights the potential of Paenibacillus as a promising source for the discovery of novel compounds. We showed that most of identified GCFs are clade-specific and consistent with the phylogenetic tree after de-duplication, revealing that particular biosynthetic capabilities have evolved along specific branches of this genus over evolutionary time. Furthermore, species in other branches lacking GCFs are considered to have more diverse and novel BGCs, which makes them worthy of further attention and in-depth study. METHODS Dataset preparation All available genomes of Paenibacillus genus were queried from NCBI in October 2024, and the genomes with ‘complete’ and ‘chromosome’ assembly levels designated at NCBI were selected. Subsequently, the quality of the genomes was evaluated using CheckM (version 1.2.2) ( 48 ), and high-quality genomes were chosen following a threshold of 90% for completeness and finally yielded the original dataset of 284 genomes. The analysis process is realized by BGCFlow, which integrates numerous software packages ( 7 ). Taxonomic classification and Tree building To provide a more detailed description and exploration of the composition and distribution of secondary metabolite biosynthesis gene clusters across species in the Paenibacillus genus, we utilize GTDB-tk (version 2.4.0) and GTDB (release 09-RS220) ( 32 , 33 ) for taxonomic classification and the definitions were used consistently throughout the data analysis and data visualization. The phylogenomic trees was constructed using the top 100 genes with the highest pre-calculated dN/dS values through a customized autoMLST wrapper which bypass additional organism selection ( https://github.com/NBChub/automlst-simplified-wrapper ) ( 20 , 49 ). Tree visualizations were generated using Interactive Tree of Life (iTOL) ( 50 ). Genome mining analysis for BGCs The process of genome mining for BGC is performed through the utilization of BGCFlow in the following steps ( 7 ). Annotated genomes were fetched from NCBI, after which the secondary metabolite BGCs were analyzed using the antiSMASH 7.1.0 ( 6 ). The antiSMASH KnownClusterBlast was employed to compare the similarity between the detected BGC regions and the characterized BGCs from the MIBiG 3.1 database ( 36 ) and to get a similarity level following this: similarity scores >80% is defined as high similarity, 40%-80% is defined as medium similarity, <40% is defined as low similarity, 0 score means no similarity. GCF clustering We use BiG-SCAPE version 1.1.9 ( 39 ) with 0.3 cutoff for GCF clustering, which is based on the sequence similarity of all BGCs to identify which BGCs are likely to encode similar or the same products. These are then clustered into a family, while those BGCs that cannot be clustered with any other BGCs are defined as singletons. We further identified GCF as known if it has BGC with similarity above 80% against the MIBiG database calculated by knownclusterblast . Then the present and absent matrix was visualized in the phylogenetic tree using Interactive Tree of Life (iTOL). Genome de-replicating To get a comprehensive and accurate representation of the diversity and distribution of BGCs across the species within the Paenibacillus genus, we obtained representative genomes in the initial dataset of 284 genomes by de-replication. The Mash distance between each pair of genomes was calculated. A threshold of Mash distance < 0.05 is used to identify which genomes are likely to be from the same species and cluster them into groups, then the best quality genome (higher completeness and lower contamination) is selected as the representative genome for each group, ensuring that the retained genomes were representative of their groups and of high quality, and others are removed as redundant. DECLARATION OF COMPETING INTEREST The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper. Supplementary figures Fig S1. Phylogenetic distribution and basic characters of 284 genomes from genus Paenibacillus . Fig S2. BGCs identified from the 284 genomes. Fig S3. The 1,013 GCFs clustered from the 3,273 BGCs predicted from 284 Paenibacillus genomes. Fig S4. 126 representative genomes. Fig S5. Biosynthetic gene clusters (BGC) in 126 representative genomes of genus Paenibacillus . Supplementary tables Table S1. 284 genomes of Paenibacillus . Table S2. The 3,273 BGCs identified from 284 genomes. Table S3. The 1,013 GCFs clustered from the 3,273 BGCs predicted in 284 genomes. Table S4. 126 representative genomes of genus Paenibacillus . Table S5. The 1,236 BGCs identified from 126 representative genomes. Table S6. The 831 GCFs clustered from the 1,236 BGCs predicted in 126 genomes. ACKNOWLEDGEMENTS This project was supported by the Danish National Research Foundation (DNRF137) for the Center for Microbial Secondary Metabolites, and Novo Nordisk Foundation within the INTERACT project of the Collaborative Crop Resiliency Program (NNF19SA0059360). TW acknowledges funding from the Novo Nordisk Foundation Center for Biosustainability (NNF20CC0035580) and the Novo Nordisk Copenhagen Bioscience PhD program (NNF20SA0035588). REFERENCE 1. ↵ Reddy S , Sinha A , Osborne WJ . 2021 . Microbial secondary metabolites: recent developments and technological challenges . Volatiles and metabolites of microbes : 1 – 22 . doi: 10.1016/B978-0-12-824523-1.00007-9 OpenUrl CrossRef 2. ↵ Kiesewalter HT , Lozano-Andrade CN , Strube ML , Kovács ÁT . 2020 . Secondary metabolites of Bacillus subtilis impact the assembly of soil-derived semisynthetic bacterial communities . Beilstein J Org Chem 16 : 2983 – 2998 . doi: 10.3762/bjoc.16.248 OpenUrl CrossRef PubMed 3. ↵ Pham JV , Yilma MA , Feliz A , Majid MT , Maffetone N , Walker JR , Kim E , Cho HJ , Reynolds JM , Song MC , et al. 2019 . A Review of the Microbial Production of Bioactive Natural Products and Biologics . Front Microbiol 10 : 1404 . doi: 10.3389/fmicb.2019.01404 OpenUrl CrossRef PubMed 4. ↵ Olishevska S , Nickzad A , Deziel E . 2019 . Bacillus and Paenibacillus secreted polyketides and peptides involved in controlling human and plant pathogens . Appl Microbiol Biotechnol 103 : 1189 – 1215 . doi: 10.1007/s00253-018-9541-0 OpenUrl CrossRef 5. ↵ Medema MH , Kottmann R , Yilmaz P , Cummings M , Biggins JB , Blin K , de Bruijn I , Chooi YH , Claesen J , Coates RC , et al. 2015 . Minimum Information about a Biosynthetic Gene cluster . Nat Chem Biol 11 : 625 – 31 . doi: 10.1038/nchembio.1890 OpenUrl CrossRef PubMed 6. ↵ Blin K , Shaw S , Augustijn HE , Reitz ZL , Biermann F , Alanjary M , Fetter A , Terlouw BR , Metcalf WW , Helfrich EJN , et al. 2023 . antiSMASH 7.0: new and improved predictions for detection, regulation, chemical structures and visualisation . Nucleic Acids Res 51 : W46 – W50 . doi: 10.1093/nar/gkad344 OpenUrl CrossRef PubMed 7. ↵ Nuhamunada M , Mohite OS , Phaneuf PV , Palsson BO , Weber T . 2024 . BGCFlow: systematic pangenome workflow for the analysis of biosynthetic gene clusters across large genomic datasets . Nucleic Acids Res 52 : 5478 – 5495 . doi: 10.1093/nar/gkae314 OpenUrl CrossRef PubMed 8. ↵ Blin K , Shaw S , Medema MH , Weber T . 2024 . The antiSMASH database version 4: additional genomes and BGCs, new sequence-based searches and more . Nucleic Acids Res 52 : D586 – D589 . doi: 10.1093/nar/gkad984 OpenUrl CrossRef 9. Kautsar SA , Blin K , Shaw S , Weber T , Medema MH . 2021 . BiG-FAM: the biosynthetic gene cluster families database . Nucleic Acids Res 49 : D490 – D497 . doi: 10.1093/nar/gkaa812 OpenUrl CrossRef 10. ↵ Zdouc MM , Blin K , Louwen NLL , Navarro J , Loureiro C , Bader CD , Bailey CB , Barra L , Booth TJ , Bozhüyük KAJ , et al. 2025 . MIBiG 4.0: advancing biosynthetic gene cluster curation through global collaboration . Nucleic Acids Res 53 : D678 – D690 . doi: 10.1093/nar/gkae1115 OpenUrl CrossRef PubMed 11. ↵ Kiesewalter HT , Lozano-Andrade CN , Maróti G , Snyder D , Cooper VS , Jørgensen TS , Weber T , Kovács ÁT . 2020 . Complete Genome Sequences of 13 Bacillus subtilis Soil Isolates for Studying Secondary Metabolite Diversity . Microbiol Resour Announc 9 : e01406 – 19 . doi: 10.1128/MRA.01406-19 OpenUrl CrossRef PubMed 12. ↵ Sánchez-Navarro R , Nuhamunada M , Mohite OS , Wasmund K , Albertsen M , Gram L , Nielsen PH , Weber T , Singleton CM . 2022 . Long-Read Metagenome-Assembled Genomes Improve Identification of Novel Complete Biosynthetic Gene Clusters in a Complex Microbial Activated Sludge Ecosystem . mSystems 7 : e0063222 . doi: 10.1128/msystems.00632-22 OpenUrl CrossRef PubMed 13. Mohite OS , Lloyd CJ , Monk JM , Weber T , Palsson BO . 2022 . Pangenome analysis of Enterobacteria reveals richness of secondary metabolite gene clusters and their associated gene sets . Synth Syst Biotechnol 7 : 900 – 910 . doi: 10.1016/j.synbio.2022.04.011 OpenUrl CrossRef 14. ↵ Zhang Z , Zhang L , Zhang L , Chu H , Zhou J , Ju F . 2024 . Diversity and distribution of biosynthetic gene clusters in agricultural soil microbiomes . mSystems 9 : e0126323 . doi: 10.1128/msystems.01263-23 OpenUrl CrossRef PubMed 15. ↵ Kiesewalter HT , Lozano-Andrade CN , Wibowo M , Strube ML , Maróti G , Snyder D , Jørgensen TS , Larsen TO , Cooper VS , Weber T , et al. 2021 . Genomic and Chemical Diversity of Bacillus subtilis Secondary Metabolites against Plant Pathogenic Fungi . mSystems 6 : 00770 – 20 . doi: 10.1128/mSystems.00770-20 OpenUrl CrossRef 16. Lee N , Kim W , Hwang S , Lee Y , Cho S , Palsson B , Cho BK . 2020 . Thirty complete Streptomyces genome sequences for mining novel secondary metabolite biosynthetic gene clusters . Sci Data 7 : 55 . doi: 10.1038/s41597-020-0395-9 OpenUrl CrossRef PubMed 17. Gavriilidou A , Kautsar SA , Zaburannyi N , Krug D , Muller R , Medema MH , Ziemert N . 2022 . Compendium of specialized metabolite biosynthetic diversity encoded in bacterial genomes . Nat Microbiol 7 : 726 – 735 . doi: 10.1038/s41564-022-01110-2 OpenUrl CrossRef PubMed 18. Jørgensen TS , Mohite OS , Sterndorff EB , Alvarez-Arevalo M , Blin K , Booth TJ , Charusanti P , Faurdal D , Hansen TØ , Nuhamunada M , et al. 2024 . A treasure trove of 1034 actinomycete genomes . Nucleic Acids Res 52 : 7487 – 7503 . doi: 10.1093/nar/gkae523 OpenUrl CrossRef PubMed 19. Grubbs KJ , Bleich RM , Santa Maria KC , Allen SE , Farag S , AgBiome T , Shank EA , Bowers AA . 2017 . Large-Scale Bioinformatics Analysis of Bacillus Genomes Uncovers Conserved Roles of Natural Products in Bacterial Physiology . mSystems 2 : e00040 – 17 . doi: 10.1128/mSystems.00040-17 OpenUrl CrossRef PubMed 20. ↵ Steinke K , Mohite OS , Weber T , Kovács ÁT . 2021 . Phylogenetic Distribution of Secondary Metabolites in the Bacillus subtilis Species Complex . mSystems 6 : e00057 – 21 . doi: 10.1128/mSystems.00057-21 OpenUrl CrossRef PubMed 21. ↵ Song L , Nielsen LJD , Xu X , Mohite OS , Nuhamunada M , Xu Z , Murphy R , Bodawatta K , Poulsen M , Abdulla MH , et al. 2024 . Expanding the genome information on Bacillales for biosynthetic gene cluster discovery . Sci Data 11 : 1267 . doi: 10.1038/s41597-024-04118-x OpenUrl CrossRef PubMed 22. ↵ Ash C , Priest FG , Collins MD . 1993 . Molecular identification of rRNA group 3 bacilli (Ash, Farrow, Wallbanks and Collins) using a PCR probe test. Proposal for the creation of a new genus Paenibacillus . Antonie Van Leeuwenhoek 64 : 253 – 60 . doi: 10.1007/BF00873085 OpenUrl CrossRef PubMed Web of Science 23. ↵ Xie J , Shi H , Du Z , Wang T , Liu X , Chen S . 2016 . Comparative genomic and functional analysis reveal conservation of plant growth promoting traits in Paenibacillus polymyxa and its closely related species . Sci Rep 6 : 21329 . doi: 10.1038/srep21 329 OpenUrl CrossRef PubMed 24. Xie JB , Du Z , Bai L , Tian C , Zhang Y , Xie JY , Wang T , Liu X , Chen X , Cheng Q , et al. 2014 . Comparative genomic analysis of N2-fixing and non-N2-fixing Paenibacillus spp.: organization, evolution and expression of the nitrogen fixation genes . PLoS Genet 10 : e1004231 . doi: 10.1371/journal.pgen.1004231 OpenUrl CrossRef PubMed 25. ↵ Patten CL , Blakney AJ , Coulson TJ . 2013 . Activity, distribution and function of indole-3-acetic acid biosynthetic pathways in bacteria . Crit Rev Microbiol 39 : 395 – 415 . doi: 10.3109/1040841X.2012.716819 OpenUrl CrossRef PubMed 26. ↵ Lal S , Tabacchioni S . 2009 . Ecology and biotechnological potential of Paenibacillus polymyxa : a minireview . Indian J Microbiol 49 : 2 – 10 . doi: 10.1007/s12088-009-0008-y OpenUrl CrossRef PubMed 27. ↵ Li E , Liu K , Yang S , Li L , Ran K , Sun X , Qu J , Zhao L , Xin Y , Zhu F , et al. 2024 . Analysis of the complete genome sequence of Paenibacillus sp. lzh-N1 reveals its antagonistic ability . BMC Genomics 25 : 276 . doi: 10.1186/s12864-024-10206-4 OpenUrl CrossRef PubMed 28. ↵ Jeong H , Choi SK , Ryu CM , Park SH . 2019 . Chronicle of a Soil Bacterium: Paenibacillus polymyxa E681 as a Tiny Guardian of Plant and Human Health . Front Microbiol 10 : 467 . doi: 10.3389/fmicb.2019.00467 OpenUrl CrossRef PubMed 29. ↵ Pranav PS , Mahalakshmi B , Sivakumar R , Karthikeyan R , Rajendhran J . 2021 . Whole-Genome Sequence Analysis of Paenibacillus alvei JR949 Revealed Biosynthetic Gene Clusters Coding for Novel Antimicrobials . Curr Microbiol 78 : 1168 – 1176 . doi: 10.1007/s00284-021-02393-0 OpenUrl CrossRef PubMed 30. ↵ Lebedeva J , Jukneviciute G , Čepaitė R , Vickackaite V , Pranckutė R , Kuisiene N . 2020 . Genome Mining and Characterization of Biosynthetic Gene Clusters in Two Cave Strains of Paenibacillus sp . Front Microbiol 11 : 612483 . doi: 10.3389/fmicb.2020.612483 OpenUrl CrossRef PubMed 31. ↵ Kim MS , Jeong DE , Jang JP , Jang JH , Choi SK . 2024 . Mining biosynthetic gene clusters in Paenibacillus genomes to discover novel antibiotics . BMC Microbiol 24 : 226 . doi: 10.1186/s12866-024-03375-5 OpenUrl CrossRef PubMed 32. ↵ Chaumeil PA , Mussig AJ , Hugenholtz P , Parks DH . 2022 . GTDB-Tk v2: memory friendly classification with the genome taxonomy database . Bioinformatics 38 : 5315 – 5316 . doi: 10.1093/bioinformatics/btac672 OpenUrl CrossRef PubMed 33. ↵ Parks DH , Chuvochina M , Rinke C , Mussig AJ , Chaumeil PA , Hugenholtz P . 2022 . GTDB: an ongoing census of bacterial and archaeal diversity through a phylogenetically consistent, rank normalized and complete genome-based taxonomy . Nucleic Acids Res 50 : D785 – D794 . doi: 10.1093/nar/gkab776 OpenUrl CrossRef PubMed 34. ↵ Seshadri R , Roux S , Huber KJ , Wu D , Yu S , Udwary D , Call L , Nayfach S , Hahnke RL , Pukall R , et al. 2022 . Expanding the genomic encyclopedia of Actinobacteria with 824 isolate reference genomes . Cell Genom 2 : 100213 . doi: 10.1016/j.xgen.2022.100213 OpenUrl CrossRef 35. ↵ Tizabi D , Bachvaroff T , Hill RT . 2022 . Comparative analysis of assembly algorithms to optimize biosynthetic gene cluster identification in novel marine actinomycete genomes . Frontiers in Marine Science 9 : 914197 . doi: 10.3389/fmars.2022.914197 OpenUrl CrossRef 36. ↵ Terlouw BR , Blin K , Navarro-Munoz JC , Avalon NE , Chevrette MG , Egbert S , Lee S , Meijer D , Recchia MJJ , Reitz ZL , et al. 2023 . MIBiG 3.0: a community-driven effort to annotate experimentally validated biosynthetic gene clusters . Nucleic Acids Res 51 : D603 – D610 . doi: 10.1093/nar/gkac1049 OpenUrl CrossRef PubMed 37. ↵ Yin QJ , Ying TT , Zhou ZY , Hu GA , Yang CL , Hua Y , Wang H , Wei B . 2023 . Species-specificity of the secondary biosynthetic potential in Bacillus . Front Microbiol 14 : 1271418 . doi: 10.3389/fmicb.2023.1271418 OpenUrl CrossRef PubMed 38. ↵ Mohite OS , Jørgensen TS , Booth TJ , Charusanti P , Phaneuf PV , Weber T , Palsson BO . 2025 . Pangenome mining of the Streptomyces genus redefines species’ biosynthetic potential . Genome Biol 26 : 9 . doi: 10.1186/s13059-024-03471-9 OpenUrl CrossRef PubMed 39. ↵ Navarro-Muñoz JC , Selem-Mojica N , Mullowney MW , Kautsar SA , Tryon JH , Parkinson EI , De Los Santos ELC , Yeong M , Cruz-Morales P , Abubucker S , et al. 2020 . A computational framework to explore large-scale biosynthetic diversity . Nat Chem Biol 16 : 60 – 68 . doi: 10.1038/s41589-019-0400-9 OpenUrl CrossRef PubMed 40. ↵ Ondov BD , Treangen TJ , Melsted P , Mallonee AB , Bergman NH , Koren S , Phillippy AM . 2016 . Mash: fast genome and metagenome distance estimation using MinHash . Genome Biol 17 : 132 . doi: 10.1186/s13059-016-0997-x OpenUrl CrossRef PubMed 41. ↵ Choi SK , Park SY , Kim R , Kim SB , Lee CH , Kim JF , Park SH . 2009 . Identification of a polymyxin synthetase gene cluster of Paenibacillus polymyxa and heterologous expression of the gene in Bacillus subtilis . J Bacteriol 191 : 3350 – 8 . doi: 10.1128/JB.01728-08 OpenUrl Abstract / FREE Full Text 42. ↵ Lohans CT , van Belkum MJ , Cochrane SA , Huang Z , Sit CS , McMullen LM , Vederas JC . 2014 . Biochemical, structural, and genetic characterization of tridecaptin A 1 , an antagonist of Campylobacter jejuni . Chembiochem 15 : 243 – 9 . doi: 10.1002/cbic.201300595 OpenUrl CrossRef PubMed 43. ↵ Vater J , Niu B , Dietel K , Borriss R . 2015 . Characterization of Novel Fusaricidins Produced by Paenibacillus polymyxa -M1 Using MALDI-TOF Mass Spectrometry . J Am Soc Mass Spectrom 26 : 1548 – 58 . doi: 10.1007/s13361-015-1130-1 OpenUrl CrossRef PubMed 44. ↵ Park JE , Kim HR , Park SY , Choi SK , Park SH . 2017 . Identification of the biosynthesis gene cluster for the novel lantibiotic paenilan from Paenibacillus polymyxa E681 and characterization of its product . J Appl Microbiol 123 : 1133 – 1147 . doi: 10.1111/jam.13580 OpenUrl CrossRef 45. ↵ He Z , Kisla D , Zhang L , Yuan C , Green-Church KB , Yousef AE . 2007 . Isolation and identification of a Paenibacillus polymyxa strain that coproduces a novel lantibiotic and polymyxin . Appl Environ Microbiol 73 : 168 – 78 . doi: 10.1128/AEM.02023-06 OpenUrl Abstract / FREE Full Text 46. ↵ Hyun K-A , Kim S-Y , Boo K-H , Chi W-J , Hyun C-G . 2024 . Complete Genome Sequence of the Butirosin-Producing Bacillus vitellinus NBRC 13296 and Its Reclassification to Paenibacillus chitinolyticus . Microbiology Research 15 : 1747 – 1757 . OpenUrl CrossRef 47. ↵ Loureiro C , Galani A , Gavriilidou A , Chaib de Mares M , van der Oost J , Medema MH , Sipkema D . 2022 . Comparative Metagenomic Analysis of Biosynthetic Diversity across Sponge Microbiomes Highlights Metabolic Novelty , Conservation, and Diversification. mSystems 7 : e0035722 . doi: 10.1128/msystems.00357-22 OpenUrl CrossRef PubMed 48. ↵ Parks DH , Imelfort M , Skennerton CT , Hugenholtz P , Tyson GW . 2015 . CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes . Genome Res 25 : 1043 – 55 . doi: 10.1101/gr.186072.114 OpenUrl Abstract / FREE Full Text 49. ↵ Alanjary M , Steinke K , Ziemert N . 2019 . AutoMLST: an automated web server for generating multi-locus species trees highlighting natural product potential . Nucleic Acids Res 47 : W276 – W282 . doi: 10.1093/nar/gkz282 OpenUrl CrossRef PubMed 50. ↵ Letunic I , Bork P . 2024 . Interactive Tree of Life (iTOL) v6: recent updates to the phylogenetic tree display and annotation tool . Nucleic Acids Res 52 : W78 – W82 . doi: 10.1093/nar/gkae268 OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted January 24, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Unlocking the biosynthetic potential of Paenibacilli through a genus-wide exploration of gene clusters for secondary metabolite production Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Unlocking the biosynthetic potential of Paenibacilli through a genus-wide exploration of gene clusters for secondary metabolite production Lijie Song , Matin Nuhamunada , Tilmann Weber , Ákos T. Kovács bioRxiv 2025.01.22.634348; doi: https://doi.org/10.1101/2025.01.22.634348 Share This Article: Copy Citation Tools Unlocking the biosynthetic potential of Paenibacilli through a genus-wide exploration of gene clusters for secondary metabolite production Lijie Song , Matin Nuhamunada , Tilmann Weber , Ákos T. Kovács bioRxiv 2025.01.22.634348; doi: https://doi.org/10.1101/2025.01.22.634348 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Microbiology Subject Areas All Articles Animal Behavior and Cognition (7622) Biochemistry (17648) Bioengineering (13871) Bioinformatics (41880) Biophysics (21423) Cancer Biology (18561) Cell Biology (25461) Clinical Trials (138) Developmental Biology (13364) Ecology (19866) Epidemiology (2067) Evolutionary Biology (24290) Genetics (15590) Genomics (22475) Immunology (17713) Microbiology (40328) Molecular Biology (17148) Neuroscience (88473) Paleontology (666) Pathology (2827) Pharmacology and Toxicology (4816) Physiology (7635) Plant Biology (15114) Scientific Communication and Education (2044) Synthetic Biology (4286) Systems Biology (9815) Zoology (2268)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00