Whole-genome sequencing of diverse 351 cultured prokaryotes including yet-unsequenced fastidious type strains

doi:10.1101/2025.05.25.655001

Whole-genome sequencing of diverse 351 cultured prokaryotes including yet-unsequenced fastidious type strains

2025 · doi:10.1101/2025.05.25.655001

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 52,879 characters · extracted from preprint-html · click to expand

Whole-genome sequencing of diverse 351 cultured prokaryotes including yet-unsequenced fastidious type strains | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Whole-genome sequencing of diverse 351 cultured prokaryotes including yet-unsequenced fastidious type strains View ORCID Profile Shingo Kato , View ORCID Profile Sachiko Masuda , View ORCID Profile Arisa Shibata , Takashi Itoh , View ORCID Profile Mitsuo Sakamoto , View ORCID Profile Ken Shirasu , Moriya Ohkuma doi: https://doi.org/10.1101/2025.05.25.655001 Shingo Kato 1 Japan Collection of Microorganisms, RIKEN BioResource Research Center , Tsukuba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Shingo Kato For correspondence: skato{at}riken.jp mohkuma{at}riken.jp Sachiko Masuda 2 Plant Immunity Research Group, RIKEN Center for Sustainable Resource Science , Yokohama, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sachiko Masuda Arisa Shibata 2 Plant Immunity Research Group, RIKEN Center for Sustainable Resource Science , Yokohama, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Arisa Shibata Takashi Itoh 1 Japan Collection of Microorganisms, RIKEN BioResource Research Center , Tsukuba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mitsuo Sakamoto 1 Japan Collection of Microorganisms, RIKEN BioResource Research Center , Tsukuba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Mitsuo Sakamoto Ken Shirasu 2 Plant Immunity Research Group, RIKEN Center for Sustainable Resource Science , Yokohama, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ken Shirasu Moriya Ohkuma 1 Japan Collection of Microorganisms, RIKEN BioResource Research Center , Tsukuba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: skato{at}riken.jp mohkuma{at}riken.jp Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF ABSTRACT Genome sequences provide fundamental information for both basic and applied life sciences. Whole-genome sequencing is now requested for describing novel prokaryotic species and designating their type strains, which serve as representative and well-characterized strains of the species. Indeed, the number of sequenced prokaryotic genomes has been rapidly increasing. However, a considerable number of isolated strains, particularly “fastidious” type strains such as strict anaerobes and slow growers, remain without genome sequence information. Here we report the whole-genome sequencing of 290 bacterial and 61 archaeal strains, including fastidious type strains, obtained from Japan Collection of Microorganisms (JCM) using a combination of short- and long-read sequencing technologies. The dataset includes 284 type strain genomes and 235 complete genomes. Notably, in the dataset, genomes of over 200 strains, including over 150 type strains, had not been made publicly available. Comparative genomic analysis suggests that some strains need to be assigned to novel taxa or reclassified. Functional gene survey indicates that some strains possess previously unrecognized potential for carbon fixation or bioactive secondary metabolite production. Our dataset will contribute to more accurate taxonomic classification, fill gaps in the phylogeny of prokaryotes, and provide insights into their physiology and ecology. INTRODUCTION Whole-genome sequences serve as a fundamental blueprint for life, providing the foundation for a broad range of life sciences research. The genomes of prokaryotes, i.e., archaea and bacteria, are typically smaller (less than 10 Mbp) and simpler in structure, consisting of a single circular chromosome in most cases, than those of eukaryotes. The rapid development of DNA sequencing technologies and the increase in computational power have driven a significant expansion in the number of sequenced prokaryotic genomes over the years. In particular, type strains, which are representatives of species with well-characterized phenotypic information, have been a primary focus of large-scale whole-genome sequencing projects ( 1 , 2 ). In addition, since 2018, the determination of genome sequence of the type strain and comparison with those of related species have been required when describing novel prokaryotic species ( 3 , 4 ), resulting in a substantial accumulation of type strain genomes in public databases. Furthermore, recent advances in long-read sequencing technologies have enabled the re-sequencing and completion of type strain genomes ( 5 , 6 ). However, a considerable number of genomes from “fastidious” strains, such as slow-growing strict anaerobes with low maximum cell density, remain unsequenced due to the challenges associated with their cultivation and DNA extraction. Therefore, gaps in the prokaryotic phylogeny remain to be filled, and the full extent of their metabolic potentials remains unclear. Here, we report the whole-genome sequencing of 351 publicly available prokaryotic strains, including fastidious and/or previously unsequenced strains, obtained from the Japan Collection of Microorganisms (JCM), one of the international culture collections. MATERIAL AND METHODS Microbial strains and DNA extraction We obtained 203 microbial strains from the Japan Collection of Microorganisms (JCM) at the RIKEN BioResource Research Center (BRC) in Tsukuba, Japan. These strains were cultivated under the growth conditions summarized in Table S1. Detailed information on the media used to culture each strain is available on the JCM website ( https://jcm.brc.riken.jp/en/ ) and also deposited on the figshare website ( https://doi.org/10.6084/m9.figshare.28756649.v1 ). Cells were harvested by centrifugation at 10,000 × g and stored at -80°C until DNA extraction. Genomic DNAs were extracted from the harvested cells using a DNeasy PowerLyzer Microbial Kit (Qiagen). Additionally, we obtained pre-extracted genomic DNAs for other 148 microbial strains from the DNA Bank of RIKEN BRC (Table S1). The purity and quantity of the DNA samples were assessed using a Qubit 3.0 Fluorometer (Thermo Fisher Scientific) with dsDNA quantification assay kits, as well as a Multiskan SkyHigh Microplate Spectrophotometer (Thermo Fisher Scientific) with a μDrop Plate (Thermo Fisher Scientific). Genome sequencing We performed short-read sequencing on MiSeq or NextSeq 1000 instruments (Illumina). DNA libraries were constructed from aliquots of the DNA samples using the QIAseq FX DNA Library Kit (Qiagen) for sequencing on MiSeq and NextSeq instruments, and sequenced with the MiSeq Reagent Kit version 3 (600 cycles, 300 bp paired-end reads; Illumina) and the NextSeq 1000/2000 P1 XLEAP-SBS Reagent Kit (600 cycles, 300 bp paired-end reads; Illumina). For long-read sequencing, we used a MinION device (Oxford Nanopore Technologies, ONT) and a PacBio Revio system (Pacific Biosciences). For ONT sequencing, DNA libraries were constructed using the Rapid Barcoding Kit 24 V14 (SQK-RBK114.24) and sequenced with R10.4.1 flow cells (FLO-MIN114). The raw data were basecalled using Dorado version 0.7.1 ( https://github.com/nanoporetech/dorado ) with the “sup” model. For PacBio sequencing, we first assessed the quality of the DNA samples using a Femto Pulse system (Agilent Technologies; Santa Clara, CA, United States). Subsequently, SMRTbell libraries were prepared using SMRTbell Express Template Prep Kit v2.0. On several libraries where necessary, size-selection was performed on the BluePippin system using a 0.75% agarose cassette (Sage Science; Beverly, MA, United States) with a 5-50 kb high-pass cutoff. The SMRTbell libraries were then bound to the sequencing polymerase enzyme using a Revio Polymerase kit. Shotgun genomic DNA sequence data were collected on SMRT Cells using HiFi sequencing protocols and Revio sequencing plate (PacBio). HiFi reads were generated using SMRTLink v13.1 with default parameters and extracted with a quality score >Q30. Genome assembly The short reads were filtered using fastp version 0.23.4 ( 7 ) with the option “-l 200”. The long reads were filtered using Chopper version 0.7.0, implemented in NanoPack2 ( 8 ), with the options “-q 8 -l 1000”. In some cases, long read data exceeding the desired size were downsized to approximately 100× coverage using Filtlong ( https://github.com/rrwick/Filtlong ). The filtered long reads were assembled using Flye version 2.9.3 ( 9 ) or the Improved Phased Assembler version 1.8.0 ( https://github.com/PacificBiosciences/pbipa ) equipped with the Pacbio Revio system. If closed contigs for putative primary chromosomes were not obtained, hybrid assembly was performed using Unicycler version 0.5.0 ( 10 ) (short-read-first hybrid assembling) or Hybracter version 0.7.3 ( 11 ) (long-read-first hybrid assembling) with the short reads. For cases where only short reads were available, Unicycler was used for short-read-only assembly. The assembly software used for each strain is listed in Table S2. In this study, when the longest contig for the primary chromosome in the assembly was circularized, the genome was treated as a “complete” genome, with the exception of Streptomyces species, which may have linear chromosomes. For Streptomyces assemblies, the presence of terminal inverted repeats detected by BLASTn version 2.14.0 ( 12 ) was used to assess whether the linear genomes were complete. Genome analysis The genome assemblies were then annotated using DFAST version 1.3.2 ( 13 ) with the option “--minimum_length 500 --use_prodigal --use_trnascan”. For additional annotation, we used METABOLIC version 4.0 ( 14 ) equipped with KEGG ( 15 ), HydDB ( 16 ), dbCAN2 ( 17 ) and MEROPS ( 18 ), eggNOG-mapper version 2.1.12 ( 19 ), and gapseq version 1.3.1 ( 20 ). Biosynthetic gene clusters (BGCs) were detected using antiSMASH version 7.1.0 ( 21 ). Plasmids and viruses/phages were identified using geNomad version 1.8.1 ( 22 ), mob-suite version 3.1.9 ( 23 ), and VirSorter version 2.2.4 ( 24 ). The quality of the determined genomes was evaluated using CheckM version 1.2.3 ( 25 ) and CheckM2 version 1.0.2 ( 26 ). Taxonomic classification of strains based on the determined genomes was performed using GTDB-tk version 2.4.0 ( 27 ) with the reference database R226 ( 28 ). Average nucleotide identity (ANI) values among the determined genomes were calculated using FastANI version 1.34 ( 29 ). Average amino acid identity (AAI) values among the determined genomes were calculated using EzAAI version 1.2.3 ( 30 ). Values of the percentage of conserved proteins (POCP) were calculated using POCP-nf version 2.3.4 ( 31 ). Digital DNA-DNA hybridization (dDDH) values were determined using the Genome-to-Genome Distance Calculator (GGDC) version 3.0 ( 32 ). We used Type Strain Genome Server ( 32 ) and GTDB-tk to detect the closest publicly available genomes to determined ones, based on dDDH and ANI values, respectively. Alluvial diagrams were generated using RAWGraphs ( https://www.rawgraphs.io ). Plots and bar charts were generated using ggplot2 ( https://ggplot2.tidyverse.org ). RESULTS AND DISCUSSION Overview of the sequenced genomes We determined the whole-genome sequences of 351 microbial strains, including 284 type strains ( Fig. 1A ; Table S2). Of the 351 genomes, 235 genomes (including 189 type strain genomes) were completed. The size of the determined genomes ranged from 1,265,941 bp for Methanothermus sociabilis JCM 10723 T to 10,631,713 bp for Streptomyces phaeofaciens JCM 4814 T (Fig. S1). The genome size of M. sociabilis JCM 10723 T is comparable to that of other small genomes of free-living organisms, such as ‘ Nitrosopelagicus brevis ’ CN25 (1.232 Mbp) and Methanothermus fervidus V24S T (1.243 Mbp). The G+C content of the genomes varied widely, ranging from 24% for Exilispira thermophila JCM14728 T to 76% for Cellulomonas pakistanensis JCM18755 T . Download figure Open in new tab Fig. 1. Overview of the determined genomes of the 351 strains. ( A ) Alluvial diagram showing the distribution of type and non-type strains, genome completeness, and the presence of extra-chromosomal DNA. ( B ) Alluvial diagram showing the taxonomic classification and oxygen requirement of the 351 strains. ( C ) Bar chart showing the number of strains harboring one or more secondary circular contigs. The number of secondary circular contigs is indicated by different colors as shown in the inset box. ( D ) Box plot showing the size distribution of the secondary circular contigs. The dataset encompassed phylogenetically diverse prokaryotes, comprising 61 archaeal and 290 bacterial strains ( Fig. 1B ; Table S3). The taxonomic classification, based on the International Code of Nomenclature of Prokaryotes (ICNP), showed that our dataset spanned 4 archaeal phyla with 7 classes, and 18 bacterial phyla with 36 classes. In addition, these genomes represented physiologically diverse microorganisms, including anaerobes and microaerophiles. Overall, our dataset covers a broad range of prokaryotic taxa, despite being much smaller number than the total of publicly-available genomes. Although some genomes showed high values of contamination level, many of these genomes were completed (Table S2). For instance, the determined genome of “Desulfosporomusa polytropa” JCM 32836 T was fully completed, comprising two circular contigs of 5,636,488 bp and 261,751 bp, but showed contamination values of up to 12.1%. Notably, the strain heterogeneity value was zero, indicating no actual contamination. Therefore, we considered that the high values of contamination level observed in this strain and also others were not due to actual contamination, but rather a limitation of the checking ability for diverse taxa in the software used in this study. Indeed, no 16S rRNA gene sequences from potential contaminants were detected in any of the assemblies. Besides, some complete genomes showed low values of completeness level, e . g ., <95% for Oligosphaera ethanolica JCM 17152 T , which can also be attributed to the inadequacy of the software. The high-quality genome data obtained from pure cultures in this study provide a robust basis for more accurate assessments of genome completeness and contamination levels. The genome sequencing of the isolated strains indicated the presence of extra-chromosomal DNA fragments, such as secondary replicons ( 33 ) including plasmids and chromids, or phages/viruses, in addition to primary chromosomes. Metagenomics of complex microbial communities in natural environments could detect such extra-chromosomal DNA fragments, but often fail to identify their host organisms. In this study, we found that 76 out of 235 complete genomes contained two or more contigs (Table S2), suggesting the presence of extra-chromosomal DNA fragments. In particular, these secondary contigs in 68 of the 76 genomes included circular contigs. Furthermore, 40 draft genomes contained smaller circular contigs than each of the longest contig, which could also represent extra-chromosomal DNA fragments. The circular contigs (up to 5 per genome) of extra-chromosomal DNAs were found in the genomes in 12 of the 22 phyla ( Fig. 1C ). The size of these circular contigs ranged from 1,847 to 1,436,886 bp ( Fig. 1D ), which were likely to include megaplasmids, chromids, or secondary chromosomes. Remarkably, 35 of the circular contigs were not detected as plasmids or phages/viruses using existing tools, suggesting that these were novel types of secondary replicons or phages/viruses. The results will contribute to expand the knowledge of extra-chromosomal DNAs of prokaryotes. Potential proposal of novel taxa and reclassification Notably, 209 (38 archaeal and 171 bacterial) of the 351 genomes, including 166 type strain genomes, were being released from public databases for the first time ( Fig. 2 ; Table S2). Of the 166 type strain genomes, the genomes of 78 strains, e . g ., Hydrogenobaculum acidophilum JCM 8795 T , showed low similarity (<95% ANI or <70% dDDH, commonly used thresholds for species-level definition ( 34 , 35 )) to any of publicly available genomes derived from isolates or metagenome-assembled genomes (MAGs) (Table S3). This indicates that these genomes are the first to be publicly released at the species level. Other 17 genomes showed high similarity (>95% ANI) to previously-reported MAGs or genomes of yet-unnamed isolates. For instance, the genome of Methanothermobacter crinale JCM 17393 T showed high similarity (99.6% ANI) to the MAG 41_258 (accession no. GCA_001507955.1) recovered from an oil reservoir ( 36 ). It should be noted that some of the counts of genomes reported in the present study may be subject to change due to ongoing updates of public databases. Download figure Open in new tab Fig. 2. Genomic and taxonomic novelty. Alluvial diagram showing the genomic and taxonomic novelty of the 209 strains with newly released genomes in this study. Details are provided in the main text and in Table S3. Of the 166 newly released type strain genomes, other 21 genomes were the first type strain genomes to be publicly released at the genus level, based on the GTDB-based taxonomic classification. For instance, the genome of Sulfophobococcus zilligii JCM 10309 T did not match any genomes of isolates, but showed high similarity (98.7% ANI) to a MAG (UBA285, accession no. GCA_002495025.1), which was assigned to the genus-level clade “g_UBA285” within the family-level clade “f_Desulfurococcaceae”. This result indicates that the genus-level clade “g_UBA285” corresponds to the genus Sulfophobococcus . Similarly, we released the first type strain genomes for other 20 validly named genera, including Stetteria and Thermodiscus in the domain Archaea , as well as Vulcanithermus and Thioreductor in the domain Bacteria . Moreover, some genomes of the 21 type strains had a potential for the assignment to novel taxa at the family or higher level. According to the GTDB-based classification (Table S3), our genome dataset potentially encompassed 1 novel class, 3 novel orders, and 4 novel families. For instance, Exilispira thermophila JCM 14728 T , a member of the phylum Spirochaetota , could represent a novel family, order, or even class. Similarly, Endothiovibrio diazotrophicus JCM 17961 T belonging to the class Gammaproteobacteria could represent a novel family and order. Thioprofundum lithotrophicum JCM 14586 T , belonging to the family Thioprofundaceae , could represent a novel order. Both Brassicibacter thermophilus JCM 30480 T and Thiofractor thiocaminus JCM 15747 T could represent a novel family. Further phylogenetic analysis is needed to support the assignment to these novel taxa. The GTDB-based classification suggested that other 8 type strain genomes, currently assigned to already known genera, could represent novel genera ( Fig. 2 ). For example, Methanosarcina baltica JCM 11281 T was classified as “g_JAQVBP01”, which is distinct from “g_Methanosarcina” and the corresponding genus Methanosarcina . This result is consistent with a previous study reporting that M. baltica can be physiologically and phylogenetically distinguished from other species in Methanosarcina ( 37 ). Among species in Methanosarcina , the most similar type strain to M. baltica JCM 11281 T was Methanosarcina subterranea JCM 15540 T , whose genome was determined in this study and classified as “g_Methanosarcina”. The comparisons between these two strains showed the values of 94.9% (16S rRNA gene similarity), 72.9% (AAI), and 61.9% (POCP). Given the reported values for genus boundary (90-99% for 16S rRNA gene similarity, 65–72% for AAI, and 50– 60% for POCP) ( 38 - 41 ), M. baltica JCM 11281 T is standing on the boundary of the genus threshold. These results imply that M. baltica JCM 11281 T belongs to a distinct genus from Methanosarcina , although further careful analysis is needed to conclude this notion. Other 17 type strain genomes showed high similarity (≥95% ANI or ≥70% dDDH) to those of different species ( Fig. 2 ), suggesting that taxonomic reclassification should be considered. For example, the determined genome of Pyrococcus woesei JCM 8421 T showed high similarity to that of Pyrococcus furiosus DSM 3638 T (accession no. GCA_000007305.1), with ANI and dDDH values of 99.6% and 95.2%, respectively. This result supports the notion that these two strains belong to the same species, i . e ., Pyrococcus furiosus , which has been previously noted due to the high similarities on their gene sequences and physiological characteristics ( 42 ). Similarly, the genome of ‘Thermococcus marinus’ JCM 11825 T , a proposed novel species with an as-yet unvalidated name ( 43 ), showed high similarity to that of Thermococcus eurythermalis A501 T (accession no. GCA_000769655.1), with ANI and dDDH values of 99.2% and 90.5%, respectively. Our dataset includes 14 additional strains that have been proposed to represent novel species, but whose scientific names have not yet been validated (Table S3). Given the strong recommendation for whole-genome sequences in the field of prokaryotic taxonomy since 2018, our dataset is expected to facilitate taxonomic reclassification and assessment of the validity of proposed scientific names. Among the newly released 43 genomes of non-type strains, 10 genomes showed low similarity to any publicly available or determined genomes of type strains, indicating that they represented probable novel species. For instance, the genome of Clostridium sp. JCM 10519 (=NkU-1) showed a low similarity to its closest relative, Lacrimispora saccharolytica (formerly Clostridium saccharolytica ) strain WM1 T (accession no. GCA_000144625.1), with ANI and dDDH values of 85.0% and 29.8%, respectively. Indeed, despite a high 16S rRNA gene similarity of 98.9% between the two strains, their physiological characteristics are distinct as reported previously ( 44 ). Further analyses of Clostridium sp. JCM 10519 and the other 9 strains are expected to provide additional supporting evidence for describing novel species. Other 13 genomes of unidentified species showed high similarities (≥95% ANI or ≥70% dDDH) to those of known species. For instance, the genome of Streptomyces sp. JCM 18897 was highly similar to that of Streptomyces albidoflavus NRRL B-1271 T with 99.0% ANI and 95.9% dDDH, and thus Streptomyces sp. JCM 18897 could be assigned to the species Streptomyces albidoflavus . The remained 25 genomes of type strains showed high similarities to the genomes of the same species of non-type strains. For instance, the determined genome of Hydrogenovibrio thermophilus JCM 12397 T showed a high similarity (97.1% ANI) to the genome of H. thermophilus JR-2 (a non-type strain; accession no. GCF_004028275.1) recovered from a deep-sea hydrothermal vent field ( 45 ). The remained 20 genomes of non-type strains, e.g., Paenibacillus thiaminolyticus JCM 7540, showed high similarities to the previously reported genomes of the same species of type- or non-type strains. Potential for previously unrecognized metabolism Genome analysis of isolates not only enables the prediction of previously unrecognized metabolic processes by identifying known genes but also facilitates the discovery of novel pathways underlying experimentally validated metabolisms. In the determined genomes, we identified genes associated with a variety of known metabolic functions (Table S4). Overall, the gene context is largely consistent with reported metabolic activities characterized through cultivation. For example, key genes for methanogenesis ( mcr for methyl-coenzyme M reductase) and methane oxidation ( pmo for particulate methane monooxygenase) were detected in the genomes of methanogenic archaea belonging to Methanobacteriota , such as Methanobacterium movens JCM 15415 T , and methane-oxidizing bacteria belonging to Pseudomonadota , such as Methylosoma difficile JCM 14076 T , respectively (Table S4). However, several predicted metabolic capabilities remain unverified. In this study, we further focused on the potential for carbon fixation and secondary metabolite production, both of which are critical for applications in carbon neutrality and medical and agricultural sciences, directly relevant to human activities. Potential for carbon fixation To date, seven pathways are known to be involved in autotrophic carbon fixation ( 46 , 47 ), including the Calvin-Benson-Bassham (CBB) cycle, the reverse tricarboxylic acid (rTCA) cycle, and the Wood–Ljungdahl (WL) pathway. Additionally, some autotrophs can fix CO 2 via the reversed oxidative TCA (roTCA) cycle, which does not require key enzymes such as citryl-CoA synthetase (CCS) or ATP-citric lyase (ACL) in the rTCA cycle, but is driven only by the enzymes used in the “normal” oxidative TCA cycle ( 48 , 49 ) with a key enzyme of ferredoxin-dependent 2-oxoglutarate synthase ( 50 ). We showed that the 67 genomes possessed the complete or nearly complete gene set with the key genes for the CBB cycle, rTCA cycle, WL pathway, and roTCA cycle ( Fig. 3A ; Table S5). As expected, most of the 67 strains have been reported to be autotrophs. However, we also found the key genes in the genomes of some strains that have not been reported as autotrophs. For example, Mycolicibacterium crocinum JCM 16369 T and Mycolicibacterium pallens JCM 16370 T have not been tested for the capability of carbon fixation ( 51 ), whereas their complete genomes possessed the complete gene set for the CBB cycle. Indeed, some autotrophic species in the genus Mycolicibacterium have been reported ( 52 ), suggesting that the two strains may also be capable of autotrophic growth. Download figure Open in new tab Fig. 3. Carbon fixation pathways encoded in the genomes. ( A ) Alluvial diagram showing the phylum-level classification of the 67 strains possessing key genes for each carbon fixation pathway. WL, the Wood–Ljungdahl pathway; roTCA, the reversed oxidative tricarboxylic acid cycle; rTCA, the reverse tricarboxylic acid cycle; CBB, the Calvin-Benson-Bassham cycle. ( B ) Presence of key genes for each carbon fixation pathway in the genomes of the four autotrophic strains highlighted in the box. *A gene for RuBisCO form III, but not forms I or II, was detected in the genome of Pyrodictium brockii JCM 9392 T (see the main text for details). CCS, citryl-CoA synthetase; CCL, citryl-CoA lyase; ACL, ATP-citrate lyase; CODH, carbon-monoxide dehydrogenase; ACS, acetyl-CoA synthase; PRK, phosphoribulokinase; Rbc, ribulose-bisphosphate carboxylase; CS, citrate synthase; OFOR, 2-oxoacid:ferredoxin oxidoreductases; MDH, malate dehydrogenase. Notably, as shown in Fig. 3B , the key genes for the seven carbon fixation pathways and even roTCA cycle were incompletely found in the genomes of the four strains, i . e ., Pyrodictium brockii JCM 9392 T ( 53 ), Desulfonauticus autotrophicus JCM 13028 T ( 54 ), Desulfonatronum parangueonense JCM 31598 T ( 55 ), and Salinisphaera hydrothermalis JCM 15514 T ( 56 ), all of which have been reported as autotrophs. Except for JCM 31598 T , complete genomes of the three strains were determined in this study. Therefore, they potentially fix CO 2 via unknown carbon fixation pathways. In the case of S. hydrothermalis JCM 15514 T , the gene for ribulose-1,5-bisphosphate carboxylase/oxygenase (RuBisCO), a key enzyme of the CBB cycle, has been detected by PCR cloning-sequencing ( 56 ). However, neither RuBisCO gene nor other key genes for carbon fixation were found in the determined complete genome sequence nor in the previously reported draft genome sequence (accession no. APNE00000000). In the complete genome of P. brockii JCM 9392 T , we found a gene for RuBisCO form III, which may not involve in carbon fixation ( 57 ), and no gene for phosphoribulokinase (PRK), another key enzyme of the CBB cycle. For the above four strains, further analyses including re-evaluation of their autotrophy will be needed to reveal the presence of novel carbon fixation pathways. Potential for secondary metabolite production Bioactive secondary metabolites, including antibiotics, are crucial targets for applications in medicine and agriculture, as well as for understanding interactions among organisms in ecology. The genes responsible for producing secondary metabolites are often encoded in biosynthetic gene clusters (BGCs), which have been found in the genomes of diverse bacteria and archaea ( 58 - 60 ). We identified 1,696 BGCs in 281 of the determined 351 genomes ( Fig. 4 ; Table S6), not only from aerobic bacteria including well-studied Streptomyces species of the phylum Actinomycetota , but also from archaea and anaerobic bacteria, which have been less studied ( 61 ). Of the 235 complete genomes, 222 (94.9%) possessed one or more BGCs. The BGC counts were roughly correlated with the genome sizes in our dataset (r 2 = 0.524) ( Fig. 4A ), a trend that has been previously reported for Actinomycetota genomes ( 62 ). The types of BGCs, such as non-ribosomal peptide synthetases (NRPSs), polyketide synthases (PKSs), and ribosomally synthesized and post-translationally modified peptides (RiPPs), varied among taxa ( Fig. 4B ). For instance, PKS-categorized BGCs were only found in the bacterial genomes. Terpene-categorized BGCs were rare in the genomes of Methanobacteriota and Thermodesulfobacteriota , both of which include obligate anaerobes. In contrast, RiPP-categorized BGCs were widely detected among the bacterial and archaeal phyla. Download figure Open in new tab Fig. 4. Biosynthetic gene clusters (BGCs) encoded in the genomes. ( A ) Plot showing the relationship between genome size and the number of BGCs. ( B ) Bar chart showing the number of BGCs for each category. RiPP, ribosomally synthesized and post-translationally modified peptide; PKS, polyketide synthase; NRPS, non-ribosomal peptide synthetase. Over 20 BGCs per genome were exclusively detected from the phylum Actinomycetota , with the highest number (36 BGCs) found in the genome of Streptomyces phaeofaciens JCM 4814 T , although some Streptomyces species have been reported to harbor over 70 BGCs per genome ( 62 , 63 ). Notably, 10 or more BGCs per genome were detected from the phyla Bacillota, Pseudomonadota , and Thermodesulfobacteriota , in addition to Actinomycetota . In obligate anaerobic bacteria, up to 11 BGCs were found in the genomes of Desulfoconvexum algidum JCM 16085 T ( Thermodesulfobacteriota ) and Clostridium nitrophenolicum JCM 14030 T ( Bacillota ), whose genomes were first determined and completed in this study. In contrast, archaeal genomes had fewer BGCs, which correlated with their smaller genome sizes ( Fig. 4A ). The highest number (8 BGCs) was found in the aerobic halophile Halorubellus litoreus JCM 17117 T , followed by 7 BGCs in the obligate anaerobic methanogen Methanobacterium movens JCM 15415 T . Although the functions of most secondary metabolites produced by BGCs remain unknown, the determined genomes of the isolated strains, especially those of not-well-studied archaea and anaerobic bacteria, will serve as a valuable foundation for genome-based mining of novel bioactive compounds. Conclusion In this study, we determined the whole-genome sequences of 351 prokaryotic strains, spanning a broad range of archaeal and bacterial taxa, including previously unsequenced “fastidious” type strains. The dataset of complete or near-complete genomes provides a robust basis for more accurate assessments of genome completeness and contamination, for extending our knowledge of prokaryotic extra-chromosomal DNAs, and for describing novel taxa and reclassifications. Furthermore, it also enables the prediction of previously unrecognized metabolic processes and the discovery of novel pathways underlying experimentally validated metabolisms. Importantly, the genome dataset was constructed from isolated strains publicly available from culture collections, and therefore, genome-driven hypotheses can be verified by cultivation experiments with the isolates. AUTHOR CONTRIBUTIONS Shingo Kato: Conceptualization, Formal analysis, Methodology, Validation, Writing— original draft, review & editing. Sachiko Masuda, Arisa Shibata, Takashi Itoh, and Mitsuo Sakamoto: Formal analysis, Methodology, Writing—review & editing. Ken Shirasu and Moriya Ohkuma: Conceptualization, Validation, Writing—review & editing. SUPPLEMENTARY DATA Supplementary Data are available at journal online. CONFLICT OF INTEREST The authors have no conflict of interest to declare. FUNDING This work was supported by the value addition subprogram of National BioResource Project (NBRP) of the Ministry of Education, Culture, Sports, Science and Technology (MEXT), Japan, and partially by Japan Science and Technology Agency (JST) GteX Biomanufacturing Area (JPMJGX23B0, JPMJGX23B2), RIKEN TRIP initiative fieldomics, and JSPS KAKENHI Grant Number JP24K00747. The microbial strains and genomic DNAs used in this study were provided by the RIKEN BRC through NBRP. DATA AVAILABILITY Sequence data of raw reads and genome assemblies have been deposited in GenBank/DDBJ/EMBL under the BioProject numbers, PRJDB20344, PRJDB20346 and PRJDB20693, PRJDB20694, respectively. Supplementary data are available in the figshare website ( https://figshare.com/s/b650f6ad54b2e15244fd ). ACKNOWLEDGEMENTS We would thank Hiromi Omokawa, Nahomi Noda, Kai Zhang, Naomi Sakurai, Nagisa Sato, Michiru Shimizu, and Koji Suzu for their technical assistance. Footnotes https://figshare.com/s/b650f6ad54b2e15244fd REFERENCES 1. ↵ Mukherjee S , Seshadri R , Varghese NJ et al. 1,003 reference genomes of bacterial and archaeal isolates expand coverage of the tree of life . Nat Biotechnol , 2017 ; 35 : 676 – 683 . OpenUrl CrossRef PubMed 2. ↵ Wu L , Mccluskey K , Desmeth P et al. The global catalogue of microorganisms 10K type strain sequencing project: closing the genomic gaps for the validly published prokaryotic and fungi species . Gigascience , 2018 ; 7 : giy026 – giy026 . OpenUrl PubMed 3. ↵ Chun J , Oren A , Ventosa A et al. Proposed minimal standards for the use of genome data for the taxonomy of prokaryotes . Int J Syst Evol Microbiol , 2018 ; 68 : 461 – 466 . OpenUrl CrossRef PubMed 4. ↵ Riesco R and Trujillo ME . Update on the proposed minimal standards for the use of genome data for the taxonomy of prokaryotes . Int J Syst Evol Microbiol , 2024 ; 74 . 5. ↵ Nguyen SV , Puthuveetil NP , Petrone JR et al. The ATCC genome portal: 3,938 authenticated microbial reference genomes . Microbiol Resour Announc , 2024 ; 13 : e0104523 . OpenUrl CrossRef PubMed 6. ↵ Dicks J , Fazal MA , Oliver K et al. NCTC3000: a century of bacterial strain collecting leads to a rich genomic data resource . Microb Genom , 2023 ; 9 . 7. ↵ Chen S. Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp . iMeta , 2023 ; 2 : e107 . OpenUrl CrossRef 8. ↵ De Coster W and Rademakers R. NanoPack2: population-scale evaluation of long-read sequencing data . Bioinformatics , 2023 ; 39 : btad311 . OpenUrl CrossRef PubMed 9. ↵ Kolmogorov M , Yuan J , Lin Y et al. Assembly of long, error-prone reads using repeat graphs . Nat Biotechnol , 2019 ; 37 : 540 – 546 . OpenUrl CrossRef PubMed 10. ↵ Wick RR , Judd LM , Gorrie CL et al. Unicycler: resolving bacterial genome assemblies from short and long sequencing reads . PLoS Comput Biol , 2017 ; 13 : e1005595 . OpenUrl CrossRef PubMed 11. ↵ Bouras G , Houtak G , Wick RR et al. Hybracter: enabling scalable, automated, complete and accurate bacterial genome assemblies . Microb Genom , 2024 ; 10 . 12. ↵ Altschul SF , Madden TL , Schaffer AA et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs . Nucleic Acids Res , 1997 ; 25 : 3389 – 3402 . OpenUrl CrossRef PubMed Web of Science 13. ↵ Tanizawa Y , Fujisawa T and Nakamura Y. DFAST: a flexible prokaryotic genome annotation pipeline for faster genome publication . Bioinformatics , 2018 ; 34 : 1037 – 1039 . OpenUrl CrossRef PubMed 14. ↵ Zhou Z , Tran PQ , Breister AM et al. METABOLIC: high-throughput profiling of microbial genomes for functional traits, metabolism, biogeochemistry, and community-scale functional networks . Microbiome , 2022 ; 10 : 33 . OpenUrl CrossRef PubMed 15. ↵ Ogata H , Goto S , Sato K et al. KEGG: Kyoto Encyclopedia of Genes and Genomes . Nucleic Acids Res , 1999 ; 27 : 29 – 34 . OpenUrl CrossRef PubMed Web of Science 16. ↵ Sondergaard D , Pedersen CN and Greening C. HydDB: a web tool for hydrogenase classification and analysis . Sci Rep , 2016 ; 6 : 34212 . OpenUrl CrossRef PubMed 17. ↵ Zhang H , Yohe T , Huang L et al. dbCAN2: a meta server for automated carbohydrate-active enzyme annotation . Nucleic Acids Res , 2018 ; 46 : W95 – W101 . OpenUrl CrossRef PubMed 18. ↵ Rawlings ND , Barrett AJ , Thomas PD et al. The MEROPS database of proteolytic enzymes, their substrates and inhibitors in 2017 and a comparison with peptidases in the PANTHER database . Nucleic Acids Res , 2018 ; 46 : D624 – D632 . OpenUrl CrossRef PubMed 19. ↵ Cantalapiedra CP , Hernandez-Plaza A , Letunic I et al. eggNOG-mapper v2: functional annotation, orthology assignments, and domain prediction at the metagenomic scale . Mol Biol Evol , 2021 ; 38 : 5825 – 5829 . OpenUrl CrossRef PubMed 20. ↵ Zimmermann J , Kaleta C and Waschina S. gapseq: informed prediction of bacterial metabolic pathways and reconstruction of accurate metabolic models . Genome Biol , 2021 ; 22 : 81 . OpenUrl CrossRef PubMed 21. ↵ Blin K , Shaw S , Augustijn HE et al. antiSMASH 7.0: new and improved predictions for detection, regulation, chemical structures and visualisation . Nucleic Acids Res , 2023 ; 51 : W46 – W50 . OpenUrl CrossRef PubMed 22. ↵ Camargo AP , Roux S , Schulz F et al. Identification of mobile genetic elements with geNomad . Nat Biotechnol , 2024 ; 42 : 1303 – 1312 . OpenUrl CrossRef PubMed 23. ↵ Robertson J and Nash JHE . MOB-suite: software tools for clustering, reconstruction and typing of plasmids from draft assemblies . Microb Genom , 2018 ; 4 . 24. ↵ Guo J , Bolduc B , Zayed AA et al. VirSorter2: a multi-classifier, expert-guided approach to detect diverse DNA and RNA viruses . Microbiome , 2021 ; 9 : 37 . OpenUrl CrossRef PubMed 25. ↵ Parks DH , Imelfort M , Skennerton CT et al. CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes . Genome Res , 2015 ; 25 : 1043 – 1055 . OpenUrl Abstract / FREE Full Text 26. ↵ Chklovski A , Parks DH , Woodcroft BJ et al. CheckM2: a rapid, scalable and accurate tool for assessing microbial genome quality using machine learning . Nat Methods , 2023 ; 20 : 1203 – 1212 . OpenUrl CrossRef PubMed 27. ↵ Chaumeil PA , Mussig AJ , Hugenholtz P et al. GTDB-Tk v2: memory friendly classification with the genome taxonomy database . Bioinformatics , 2022 ; 38 : 5315 – 5316 . OpenUrl CrossRef PubMed 28. ↵ Parks DH , Chuvochina M , Rinke C et al. GTDB: an ongoing census of bacterial and archaeal diversity through a phylogenetically consistent, rank normalized and complete genome-based taxonomy . Nucleic Acids Res , 2022 ; 50 : D785 – D794 . OpenUrl CrossRef PubMed 29. ↵ Jain C , Rodriguez RL , Phillippy AM et al. High throughput ANI analysis of 90K prokaryotic genomes reveals clear species boundaries . Nat Commun , 2018 ; 9 : 5114 . OpenUrl CrossRef PubMed 30. ↵ Kim D , Park S and Chun J. Introducing EzAAI: a pipeline for high throughput calculations of prokaryotic average amino acid identity . J Microbiol , 2021 ; 59 : 476 – 480 . OpenUrl CrossRef PubMed 31. ↵ Holzer M. POCP-nf: an automatic Nextflow pipeline for calculating the percentage of conserved proteins in bacterial taxonomy . Bioinformatics , 2024 ; 40 : btae175 . OpenUrl CrossRef PubMed 32. ↵ Meier-Kolthoff JP , Carbasse JS , Peinado-Olarte RL et al. TYGS and LPSN: a database tandem for fast and reliable genome-based classification and nomenclature of prokaryotes . Nucleic Acids Res , 2022 ; 50 : D801 – D807 . OpenUrl CrossRef PubMed 33. ↵ Dicenzo GC and Finan TM . The divided bacterial genome: structure, function, and evolution . Microbiol Mol Biol Rev , 2017 ; 81 . 34. ↵ Goris J , Konstantinidis KT , Klappenbach JA et al. DNA-DNA hybridization values and their relationship to whole-genome sequence similarities . Int J Syst Evol Microbiol , 2007 ; 57 : 81 – 91 . OpenUrl CrossRef PubMed Web of Science 35. ↵ Konstantinidis KT and Tiedje JM . Genomic insights that advance the species definition for prokaryotes . Proc Natl Acad Sci U S A , 2005 ; 102 : 2567 – 2572 . OpenUrl Abstract / FREE Full Text 36. ↵ Hu P , Tom L , Singh A et al. Genome-resolved metagenomic analysis reveals roles for candidate phyla and other microbial community members in biogeochemical transformations in oil reservoirs . mBio , 2016 ; 7 : e01669 – 01615 . OpenUrl CrossRef PubMed 37. ↵ Von Klein D , Arab H , Volker H et al. Methanosarcina baltica, sp. nov., a novel methanogen isolated from the Gotland Deep of the Baltic Sea . Extremophiles , 2002 ; 6 : 103 – 110 . OpenUrl CrossRef PubMed 38. ↵ Konstantinidis KT and Tiedje JM . Prokaryotic taxonomy and phylogeny in the genomic era: advancements and challenges ahead . Curr Opin Microbiol , 2007 ; 10 : 504 – 509 . OpenUrl CrossRef PubMed Web of Science 39. Hackmann TJ . Setting new boundaries of 16S rRNA gene identity for prokaryotic taxonomy . Int J Syst Evol Microbiol , 2025 ; 75 . 40. Yarza P , Yilmaz P , Pruesse E et al. Uniting the classification of cultured and uncultured bacteria and archaea using 16S rRNA gene sequences . Nat Rev Microbiol , 2014 ; 12 : 635 – 645 . OpenUrl CrossRef PubMed 41. ↵ Qin QL , Xie BB , Zhang XY et al. A proposed genus boundary for the prokaryotes based on genomic insights . J Bacteriol , 2014 ; 196 : 2210 – 2215 . OpenUrl Abstract / FREE Full Text 42. ↵ Kanoksilapatham W , Gonzalez JM , Maeder DL et al. A proposal to rename the hyperthermophile Pyrococcus woesei as Pyrococcus furiosus subsp. woesei . Archaea , 2004 ; 1 : 277 – 283 . OpenUrl CrossRef PubMed 43. ↵ Jolivet E , Corre E , L’haridon S et al. Thermococcus marinus sp. nov. and Thermococcus radiotolerans sp. nov., two hyperthermophilic archaea from deep-sea hydrothermal vents that resist ionizing radiation . Extremophiles , 2004 ; 8 : 219 – 227 . OpenUrl CrossRef PubMed 44. ↵ Thong-On A , Suzuki K , Noda S et al. Isolation and characterization of anaerobic bacteria for symbiotic recycling of uric acid nitrogen in the gut of various termites . Microbes Environ , 2012 ; 27 : 186 – 192 . OpenUrl CrossRef PubMed Web of Science 45. ↵ Scott KM , Williams J , Porter CMB et al. Genomes of ubiquitous marine and hypersaline Hydrogenovibrio, Thiomicrorhabdus and Thiomicrospira spp. encode a diversity of mechanisms to sustain chemolithoautotrophy in heterogeneous environments . Environ Microbiol , 2018 ; 20 : 2686 – 2708 . OpenUrl CrossRef 46. ↵ Hugler M and Sievert SM . Beyond the Calvin cycle: autotrophic carbon fixation in the ocean . Ann Rev Mar Sci , 2011 ; 3 : 261 – 289 . OpenUrl CrossRef PubMed 47. ↵ Sanchez-Andrea I , Guedes IA , Hornung B et al. The reductive glycine pathway allows autotrophic growth of Desulfovibrio desulfuricans . Nat Commun , 2020 ; 11 : 5090 . OpenUrl CrossRef PubMed 48. ↵ Nunoura T , Chikaraishi Y , Izaki R et al. A primordial and reversible TCA cycle in a facultatively chemolithoautotrophic thermophile . Science , 2018 ; 359 : 559 – 563 . OpenUrl Abstract / FREE Full Text 49. ↵ Mall A , Sobotta J , Huber C et al. Reversibility of citrate synthase allows autotrophic growth of a thermophilic bacterium . Science , 2018 ; 359 : 563 – 567 . OpenUrl Abstract / FREE Full Text 50. ↵ Steffens L , Pettinato E , Steiner TM et al. High CO 2 levels drive the TCA cycle backwards towards autotrophy . Nature , 2021 ; 592 : 784 – 788 . OpenUrl CrossRef PubMed 51. ↵ Hennessee CT , Seo JS , Alvarez AM et al. Polycyclic aromatic hydrocarbon-degrading species isolated from Hawaiian soils: Mycobacterium crocinum sp. nov., Mycobacterium pallens sp. nov., Mycobacterium rutilum sp. nov., Mycobacterium rufum sp. nov. and Mycobacterium aromaticivorans sp. nov . Int J Syst Evol Microbiol , 2009 ; 59 : 378 – 387 . OpenUrl CrossRef PubMed 52. ↵ Gomila M , Ramirez A , Gasco J et al. Mycobacterium llatzerense sp. nov., a facultatively autotrophic, hydrogen-oxidizing bacterium isolated from haemodialysis water . Int J Syst Evol Microbiol , 2008 ; 58 : 2769 – 2773 . OpenUrl CrossRef PubMed 53. ↵ Stetter KO , Konig H and Stackebrandt E. Pyrodictium gen. nov., a new genus of submarine disc-shaped sulphur reducing archaebacteria growing optimally at 105ºC . Syst Appl Microbiol , 1983 ; 4 : 535 – 551 . OpenUrl CrossRef PubMed Web of Science 54. ↵ Mayilraj S , Kaksonen AH , Cord-Ruwisch R et al. Desulfonauticus autotrophicus sp. nov., a novel thermophilic sulfate-reducing bacterium isolated from oil-production water and emended description of the genus Desulfonauticus . Extremophiles , 2009 ; 13 : 247 – 255 . OpenUrl CrossRef PubMed 55. ↵ Perez Bernal MF , Souza Brito EM , Bartoli M et al. Desulfonatronum parangueonense sp. nov., a sulfate-reducing bacterium isolated from sediment of an alkaline crater lake . Int J Syst Evol Microbiol , 2017 ; 67 : 4999 – 5005 . OpenUrl CrossRef PubMed 56. ↵ Crespo-Medina M , Chatziefthimiou A , Cruz-Matos R et al. Salinisphaera hydrothermalis sp. nov., a mesophilic, halotolerant, facultatively autotrophic, thiosulfate-oxidizing gammaproteobacterium from deep-sea hydrothermal vents, and emended description of the genus Salinisphaera . Int J Syst Evol Microbiol , 2009 ; 59 : 1497 – 1503 . OpenUrl CrossRef PubMed Web of Science 57. ↵ Sato T , Atomi H and Imanaka T. Archaeal type III RuBisCOs function in a pathway for AMP metabolism . Science , 2007 ; 315 : 1003 – 1006 . OpenUrl Abstract / FREE Full Text 58. ↵ Dinglasan JLN , Otani H , Doering DT et al. Microbial secondary metabolites: advancements to accelerate discovery towards application . Nat Rev Microbiol , 2025 . 59. Zdouc MM , Blin K , Louwen NLL et al. MIBiG 4.0: advancing biosynthetic gene cluster curation through global collaboration . Nucleic Acids Res , 2025 ; 53 : D678 – D690 . OpenUrl CrossRef PubMed 60. ↵ Udwary DW , Doering DT , Foster B et al. The secondary metabolism collaboratory: a database and web discussion portal for secondary metabolite biosynthetic gene clusters . Nucleic Acids Res , 2025 ; 53 : D717 – D723 . OpenUrl CrossRef PubMed 61. ↵ Scherlach K and Hertweck C. Mining and unearthing hidden biosynthetic potential . Nat Commun , 2021 ; 12 : 3864 . OpenUrl CrossRef PubMed 62. ↵ Seshadri R , Roux S , Huber KJ et al. Expanding the genomic encyclopedia of Actinobacteria with 824 isolate reference genomes . Cell Genom , 2022 ; 2 : 100213 . OpenUrl CrossRef PubMed 63. ↵ Belknap KC , Park CJ , Barth BM et al. Genome mining of biosynthetic and chemotherapeutic gene clusters in Streptomyces bacteria . Sci Rep , 2020 ; 10 : 2003 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted May 27, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Whole-genome sequencing of diverse 351 cultured prokaryotes including yet-unsequenced fastidious type strains Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Whole-genome sequencing of diverse 351 cultured prokaryotes including yet-unsequenced fastidious type strains Shingo Kato , Sachiko Masuda , Arisa Shibata , Takashi Itoh , Mitsuo Sakamoto , Ken Shirasu , Moriya Ohkuma bioRxiv 2025.05.25.655001; doi: https://doi.org/10.1101/2025.05.25.655001 Share This Article: Copy Citation Tools Whole-genome sequencing of diverse 351 cultured prokaryotes including yet-unsequenced fastidious type strains Shingo Kato , Sachiko Masuda , Arisa Shibata , Takashi Itoh , Mitsuo Sakamoto , Ken Shirasu , Moriya Ohkuma bioRxiv 2025.05.25.655001; doi: https://doi.org/10.1101/2025.05.25.655001 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Microbiology Subject Areas All Articles Animal Behavior and Cognition (7618) Biochemistry (17636) Bioengineering (13859) Bioinformatics (41847) Biophysics (21401) Cancer Biology (18535) Cell Biology (25423) Clinical Trials (138) Developmental Biology (13353) Ecology (19860) Epidemiology (2067) Evolutionary Biology (24287) Genetics (15582) Genomics (22463) Immunology (17701) Microbiology (40300) Molecular Biology (17141) Neuroscience (88432) Paleontology (666) Pathology (2825) Pharmacology and Toxicology (4813) Physiology (7633) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4285) Systems Biology (9808) Zoology (2267)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00