Full text
41,599 characters
· extracted from
preprint-html
· click to expand
A Practical Resource for Multi-Omics Data Integration in Microbial Systems | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results A Practical Resource for Multi-Omics Data Integration in Microbial Systems View ORCID Profile Warasinee Mujchariyakul , View ORCID Profile Abderrahman Hachani , View ORCID Profile Timothy P. Stinear , View ORCID Profile Kim-Anh LêCao , View ORCID Profile Benjamin P. Howden , View ORCID Profile Calum J. Walsh , View ORCID Profile Romain Guérillot doi: https://doi.org/10.1101/2025.11.19.689359 Warasinee Mujchariyakul 1 Department of Microbiology and Immunology, The University of Melbourne at the Peter Doherty Institute for Infection and Immunity , Melbourne, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Warasinee Mujchariyakul Abderrahman Hachani 1 Department of Microbiology and Immunology, The University of Melbourne at the Peter Doherty Institute for Infection and Immunity , Melbourne, Victoria, Australia 2 Centre for Pathogen Genomics, The University of Melbourne , Melbourne, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Abderrahman Hachani Timothy P. Stinear 1 Department of Microbiology and Immunology, The University of Melbourne at the Peter Doherty Institute for Infection and Immunity , Melbourne, Victoria, Australia 2 Centre for Pathogen Genomics, The University of Melbourne , Melbourne, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Timothy P. Stinear Kim-Anh LêCao 3 Melbourne Integrative Genomics, School of Mathematics and Statistics, University of Melbourne , Melbourne, Victoria, 3000, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kim-Anh LêCao Benjamin P. Howden 1 Department of Microbiology and Immunology, The University of Melbourne at the Peter Doherty Institute for Infection and Immunity , Melbourne, Victoria, Australia 2 Centre for Pathogen Genomics, The University of Melbourne , Melbourne, Victoria, Australia 4 Microbiological Diagnostic Unit Public Health Laboratory, Department of Microbiology and Immunology, Doherty Institute, University of Melbourne , Melbourne, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Benjamin P. Howden Calum J. Walsh 1 Department of Microbiology and Immunology, The University of Melbourne at the Peter Doherty Institute for Infection and Immunity , Melbourne, Victoria, Australia 2 Centre for Pathogen Genomics, The University of Melbourne , Melbourne, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Calum J. Walsh Romain Guérillot 1 Department of Microbiology and Immunology, The University of Melbourne at the Peter Doherty Institute for Infection and Immunity , Melbourne, Victoria, Australia 2 Centre for Pathogen Genomics, The University of Melbourne , Melbourne, Victoria, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Romain Guérillot For correspondence: romain.guerillot{at}unimelb.edu.au Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract The increasing availability of microbial multi-omics datasets has created new opportunities to explore complex biological systems. However, exploration remains limited by the lack of accessible, reproducible workflows that integrate multiple omics layers and deliver easily interpretable visualisations of functional and pathway-level insights. Here, we present an R-based workflow for integrated analysis and network-based pathway visualisation of microbial multi-omic data. The workflow enables microbiologists to analyse transcriptomic, proteomic, and metabolomic datasets either individually or in combination, apply univariate and multivariate approaches for biomarker discovery, and generate easily interpretable visualisations of functional and pathway-level signatures. Implemented as multi-step R Markdown, it leverages widely-used open-source tools, including mixOmics for biomarker identification and omics integration and clusterProfiler for pathway and functional enrichment analyses, with a new network-based integration and visualisation. Its flexible design supports a range of experimental structures and facilitates comparisons across strains, omics layers, and conditions, making it suitable for researchers with limited computational expertise. We demonstrate its utility using a publicly available Streptococcus pyogenes dataset, revealing both shared and strain-specific functional responses to human serum. This workflow provides a comprehensive and adaptable framework for systematic multi-omics analysis, improving accessibility and reproducibility and facilitating functional interpretation of microbial responses to diverse environments. Data summary The code for this workflow is available on GitHub ( https://github.com/warasinee/Multiomics_Case_Study ). Datasets from our previously published study ( 1 ) were used to showcase the functionality and practical utility of the workflow. The multi-omics Streptococcus pyogenes dataset used in this study is available in the following public repositories: Gene Expression Omnibus ( GSE152821 ; GSE152822 ; GSE152823 ; GSE152824 , GSE152826 ), Proteomics Identifications Database ( PXD020863 ), and MetaobLights ( MTBLS2324 ) ( 1 ). Impact Statement High-throughput omics technologies are transforming our understanding of how microbes adapt to diverse environments and cause disease. The integration of diverse omics layers at a systems level, combining transcriptomics, proteomics, and metabolomics data to identify signature molecules, pathways, and their interactions, remains challenging. Here, we present an R-based bioinformatic workflow designed for microbiology research, which connects existing tools and customised functions to streamline data integration and interpretation. The workflow links biomarkers to functional pathways, visualises results in an interactive network context, and is designed for flexibility and reproducibility. This practical resource lowers technical barriers to microbial multi-omics analysis, providing user-friendly access for dataset exploration and integration and supporting interpretation of system-level microbial adaptation in environmental, clinical, and industrial contexts. Introduction Advances in high-throughput technologies have enabled microbiologists to generate diverse omics datasets, capturing gene expression, protein abundance and metabolite levels, within the same system. Combined, these data offer a multi-layered view of microbial physiology and adaptation, providing opportunities to uncover coordinated responses that single-omics analysis cannot capture. This systems-level perspective contrasts with the traditional reductionist approach often used in molecular microbiology ( 2 ), which dissects complex systems into individual components and examines their interactions ( 3 ). While reductionism remains essential for elucidating molecular mechanisms, it overlooks the emergent behaviours that arise only when the system is considered as a whole. Consequently, multi-omics integration is therefore becoming a cornerstone of systems microbiology, with applications spanning from bacterial metabolism and stress responses to host–microbe interactions and microbiome ecology ( 1 , 4 , 5 ). Despite the promise of multi-omics, analysing these datasets remains challenging. Current workflows usually take one of two approaches. In early integration, different omics layers (i.e. genes, proteins, and metabolites) are analysed together from the outset using multivariate methods. A widely used example is DIABLO (Data Integration Analysis for Biomarker discovery using Latent variable approaches for Omics studies), which implements supervised multiblock partial least squares discriminant analysis (PLS-DA) to uncover correlated features across datasets in relation to experimental conditions ( 5 ). This strategy helps reveal coordinated biological signals across omics layers. In contrast, late integration involves analysing each dataset separately and then combining the results afterwards, often through pathway enrichment approaches. Two common methods are Over-Representation Analysis (ORA), which tests whether certain pathways contain more differentially expressed genes than expected by chance, and Gene Set Enrichment Analysis (GSEA), which evaluates whether predefined sets of genes are enriched at the top or bottom of a ranked list of all genes ( 6 ). Both strategies are valuable, but they are rarely implemented side-by-side in a single, reproducible framework. Moreover, many existing pipelines require users to be proficient in multiple programming languages, and results are often scattered across different tools, which complicates reproducibility and makes biological interpretation less straightforward. To help address these issues, we present an R Markdown-based workflow that integrates established packages ( mixOmics , clusterProfiler ) with custom code and function to connect outputs across stages. The workflow supports both univariate and multivariate analyses, implements early and late integration strategies, and links results to pathway-level network visualisation. By situating biomarkers within functional networks, the workflow enables microbiologists to explore conserved and condition-specific responses across strains, omics layers, and experimental conditions. Rather than introducing new algorithms, our contribution is to integrate established methods into a coherent, R-based modular workflow. This resource is designed to lower technical barriers, enhance reproducibility, and support the functional interpretation of multi-omics data, helping microbiologists translate complex datasets into biological insight across diverse microbial systems. Existing Methods and Their Integration in the Workflow Traditional differential expression (DE) analysis remains a cornerstone of omics research, identifying individual genes or molecules that change between conditions. While powerful, this univariate approach considers each feature in isolation and may overlook coordinated patterns. Multivariate methods, such as those implemented in the mixOmics framework ( 7 , 8 ), address this limitation by analysing many features simultaneously. Techniques like principal component analysis (PCA) allow researchers to explore overall data structure, while (sparse) partial least squares (PLS) and PLS-discriminant analysis (PLS-DA) highlight sets of molecules that best explain or discriminate between biological conditions. For multi-omics integration, multiblock PLS-DA (i.e. DIABLO) extends these methods to uncover signatures that are consistent across different omic layers, providing a system-level view of bacterial responses ( 7 , 8 ). Importantly, univariate and multivariate analyses often yield distinct yet complementary insights that together help explain complex biological systems. The signature molecules identified through these statistical approaches can then be combined with prior knowledge of underlying molecular pathways and functional annotations using pathway enrichment analysis (PEA) such as ORA and GSEA. To further aid interpretation, we developed a pathway integration visualisation that highlights conserved functional responses to reveals conserved functional responses and supports application to a wide range of multi-omics study designs and biological questions. This network-based integration of functional enrichment can be used to compare responses across bacterial strains, or to combine results from different omics layers and multiple tested conditions when focusing on a single strain ( Figure 1 ). The workflow facilitates the identification of biomarkers and signature molecules, contextualising them within pathways and functional interactions to support hypothesis generation and guide functional validation. Download figure Open in new tab Figure 1. Workflow for Multi-Omics Data Integration and Functional Network Interpretation. Signature molecular features identified from multivariate (i.e., (s)PLS-DA, DIABLO) and univariate (i.e., differential expression analysis) approaches are used as inputs for pathway/functional enrichment analysis (PEA). The resulting pathway-level functional relationships are visualised as networks, with nodes representing pathways or other significantly enriched functional annotations and edges connecting functionally related pathways and functions. Network-based integration of functional enrichment enables the identification of shared functional responses across different strains, omics layers, or multiple tested conditions. The core elements of the workflow are highlighted in purple. Comprehensive Overview of the Workflow This workflow leverages publicly available R packages and commonly used software, supplemented with custom R functions to connect different analyses’ outputs. The workflow is implemented in R Markdown documents that interleave executable code with explanatory text, creating an annotated, step-by-step guide for visualising and interpreting omics results. It supports two major analyses: (i) multi-omics data integration using mixOmics , and (ii) pathway enrichment and network analysis ( Figure 2 ). Key steps include: Dimensionality reduction and correlation analysis using PCA and PLS methods in mixOmics , enabling exploration of global data structure and relationships across omics layers. Supervised integration and classification with DIABLO and sPLS-DA from mixOmics , which uncover correlated features across datasets and highlight discriminative signatures linked to experimental conditions. Pathway enrichment analysis (PEA) through clusterProfiler , supporting both ORA and GSEA ( 6 ), and allowing flexible use of inputs from univariate DE results or multivariate feature rankings. Network-based integration of functional enrichment of enriched pathways using Cytoscape via the RCy3 interface, with additional interactive visualisation through a dedicated simple shiny app. Download figure Open in new tab Figure 2. Overview of the integration of multi-omics data, pathway-level network analysis, and interpretation. The workflow combines dimensionality reduction, supervised integration, and network visualisation to identify key biomarkers and functional relationships across omics layers, ultimately leading in network-based analysis of enriched pathways to reveal connected biological processes. Together, these components situate biomarkers within functional networks, revealing conserved and condition-specific responses across strains, omics layers, and experimental conditions. The workflow thus lowers technical barriers, enhances reproducibility, and supports hypothesis generation by linking statistical outputs directly to biological interpretation. Preprocessing, Functional Annotation, and Data Inputs for the Workflow Preprocessing and quality control are essential prerequisites that lie outside the workflow but ensure reliable downstream analysis. For transcriptomic data, this typically involves read quality assessment, trimming, alignment, and normalization of counts to correct sequencing biases. Proteomic datasets require spectral QC, peptide identification with false discovery rate control, and normalization of quantification values to reduce technical variability. Metabolomic data undergo peak detection and alignment, filtering of outliers, imputation of missing values, and normalization against internal standards to account for instrument drift. These steps collectively minimize noise, remove low-quality signals, and enhance the biological relevance of the inputs carried forward into downstream multi-omics integration and functional analysis. For RNA-seq data, DESeq2 ( 9 ), edgeR ( 10 , 11 ), and limma-voom ( 12 ) are widely used statistical R packages for normalisation and the identification of differentially expressed genes from sequence read counts, with the latter two implemented in the easy-to-use web tool degust ( 13 ). MetaboAnalyst ( 14 ), MZmine 3 ( 15 ), and XCMS ( 16 ) are popular processing pipelines from raw mass spectrometry (MS) data. Functional gene annotation is essential for linking features to biological pathways and categories. For functional mapping, dedicated microbial annotation pipelines such as Prokka ( 17 ), Bakta ( 18 ), BASys2 ( 19 ), MicrobeAnnotator ( 20 ) and eggNOG-mapper ( 21 ) are recommended, as they provide per-feature assignments to KO, GO, KEGG, eggNOG, or COG terms, enabling consistent functional annotation across datasets ( 22 ). The workflow requires three main inputs: (i) Normalised abundance matrices (features × samples) for each omics layer; (ii) Differential expression results table containing identifiers, experimental metadata, and statistical outputs (e.g., logFC, FDR); and (iii) Functional annotation mappings that link features to pathways or functional terms. Detailed specifications of input file formats, recommended preprocessing steps, and example datasets are provided in the GitHub repository to ensure reproducibility and compatibility with the workflow. Case study: Multi-omic characterisation of Streptococcus pyogenes response to human serum The workflow was originally developed to characterise the Staphylococcus aureus dataset described in ( 23 ). Here, we demonstrate its broader applicability using a multi-omic dataset for Streptococcus pyogenes , which includes transcriptomic, proteomic, and metabolomic data ( 1 ). In this dataset, samples were collected from five clinically relevant strains grown under two conditions - RPMI or human serum - with six biological replicates per condition, yielding a total of 60 samples. Unsupervised, single-omic analysis To assess the similarity of bacterial responses to human serum, we performed Principal Component Analysis (PCA) across all omics datasets. The primary source of variation corresponded to the growth condition, indicating distinct global profiles between serum- and RMPI-grown bacteria ( Figure 3A ). Each dataset showed clear separation between conditions, suggesting that S. pyogenes undergoes a substantial adaptation to human serum. Shared (conserved) responses across strains accounted for most of the variation, whereas strain-specific effects were comparatively minor ( Figure 3B ). Notably, the transcriptomic and metabolomic profiles of strain SP444 differed from those of other strains, while the proteomic data showed little strain-related separation along the first two principal components ( Figure 3B-C ). To further explore relationships between omics layers, we applied Projection to Latent Structures (PLS), also known as Partial Least Squares. This approach models the covariance between datasets to assess whether shared information exists across layers. Pairwise PLS analyses revealed strong correlations (r > 0.9) between the first latent components of the omics datasets, highlighting a high degree of co-variation and, therefore, a coordinated biological response among the omic layers. Download figure Open in new tab Figure 3. Unsupervised, single-omic analysis of Streptococcus pyogenes response to human serum using unsupervised Principal Component Analysis. Sample plot from PCA illustrating the overall similarity between samples across transcriptomic, proteomic, and metabolomic (GC-MS) datasets. Samples are coloured by growth condition (A) or strain (B) or a combination of strain and condition (C). Supervised Single-Omic Analysis Next, we applied a supervised classification approach using the sPLS-DA framework to identify features that best discriminate between serum and culture media conditions. This analysis demonstrated the enhanced separation power of the supervised method compared to the unsupervised PCA ( Figure 3A and B ), particularly for the proteomic dataset ( Figure 4A and B ), where clustering by condition and strain becomes more distinct. Samples separated clearly by media condition along the first component, while strain-specific variation was primarily captured by the second component. Download figure Open in new tab Figure 4. Supervised single- and multi-omic analysis of Streptococcus pyogenes response to human serum. (A-B) Supervised classification using the sPLS-DA framework. Sample plots from sPLS-DA on the proteomic dataset show discrimination of samples by media condition (A) and strain (B). (C-D) Supervised multiblock sPLS-DA (DIABLO) integrating transcriptomic, proteomic, and metabolomic datasets. (C) Circos plot showing pairwise correlations (|r| > 0.9) between features across omics layers. (D) Loading plots illustrating the most discriminatory features along the first component for each dataset. (E) Network visualisation of correlated features, adapted from (C). Edges represent correlations, with edge colours indicating positive (orange) or negative (black) correlations. Node sizes indicate the number of interactions, and node colours represent data type: purple, transcripts; pink, proteins; green, metabolites. Supervised Multi-Omic Analysis We then employed multiblock (s)PLS-DA (DIABLO) to integrate multiple omics datasets in a supervised framework. The model was trained using labelled data corresponding to the two conditions (RPMI and human serum) and identified highly correlated features across omics layers that maximally discriminate between these conditions ( Table S1 ; Figure 4C–E ). The loading values derived from the DIABLO model ( Table S1; Figure 4D ) reflect the importance of each feature in driving this separation along the first component. Features with larger absolute loading values are the key contributors to the observed differences between conditions. The direction (sign) of these loadings further indicates condition-specific responses – positive loadings correspond to serum-enriched responses, while negative loadings correspond to RPMI-associated features. Pathway Enrichment Analysis Building on the discriminant and highly correlated variables identified by the DIABLO model, we performed pathway enrichment analysis to explore the biological context of these cross-omic associations. This analysis revealed that carbon metabolism, lipid metabolism, nucleotide metabolism, and defence mechanisms were the pathways most strongly associated with S. pyogenes response to serum. To explore the functional relationships among enriched pathways, we used network-based integration of pathway enrichment results using Jaccard similarity scores to quantify gene overlap between pathways. This approach moves beyond static pathway lists by visualising functional connectivity, revealing both shared (conserved) and strain- or condition-specific responses. Pathway-level networks were generated from three complementary analyses: i) enrichment of discriminant features identified by multi-block PLS-DA, highlighting shared serum responses across strains ( Figure 5A ); ii) enrichment from single-omics differential expression analyses showing both shared and strain-specific responses ( Figure 5B ); and iii) integration of responses across multiple omics layers within a single strain ( Figure 5C ). These networks integrate diverse biological signals into a coherent systems-level view, allowing researchers to identify and prioritize key functions and molecules while providing a foundation for generating testable hypotheses. They revealed major pathways involved in S. pyogenes adaptation to serum, including carbon metabolism, ribosomal function, and one-carbon metabolism by folate, as well as pyruvate and butanoate metabolism, which were particularly prominent in strain SP444 based on both transcriptomic and proteomic data. Download figure Open in new tab Figure 5. Networks of enriched pathways in Streptococcus pyogenes exposed to human serum. The networks were constructed from the outputs of multi-block PLS-DA (A) and DE analysis (B-C). Nodes represent pathways, and connected edges represent significant pairwise similarity between enriched functional annotations or pathways calculated using Jaccard’s similarity score. The edge length reflects pathway similarity. Node sizes and colours represent the number of strains (A-B) or omics (C) sharing the same enriched pathways. Edge widths indicate the number of strains or omics with interaction between pathways. Together, this case study uncovered substantial reprogramming of carbon metabolism in S. pyogenes when exposed to human serum. Several interconnected pathways showed coordinated shifts, reflecting the bacterium’s metabolic adaptation to the host environment. Many studies have emphasised the importance of specific carbon flow patterns in central carbon metabolism for enhancing pathogen fitness during infection ( 24 , 25 ) and reprogramming carbon flow has been suggested as a potential therapeutic target ( 25 ). Overall, this analysis indicates that S. pyogenes likely prioritises host nutrient scavenging over biosynthesis and upregulates fermentation for energy and defence mechanisms. Discussion Rapid advances in high-throughput omics technologies have enabled researchers to capture multiple layers of biological information from a single sample collection. Yet, analyses confined to a single omics layer often fail to fully resolve the complexity of microbial systems, highlighting the need for integrative multi-omics approaches ( 26 ). Previous studies have emphasised that the key challenge in multi-omics research is increasingly shifting from data generation to effective downstream analysis, such as data integration, functional interpretation, and biological contextualisation ( 27 – 30 ). Building on established tools, the analysis framework described here delivers a flexible and well-documented approach for microbial multi-omics data integration and visualisation, thereby facilitating biological interpretation. While this workflow is demonstrated using an existing Streptococcus pyogenes multi-omics dataset ( 1 ), its structure is generalisable to a wide range of experimental designs and microbial systems. It focuses on downstream data integration, supporting both knowledge-based and data-driven hypothesis formulation, simplifying the use of powerful multivariate and network-based methods for biomarker discovery and pathway-level interpretation within microbial datasets. In particular, the use of a multi-block (s)PSL-DA framework and network integration of pathway enrichment results allows for systematic consideration of multiple biological layers, revealing key molecular targets and pathways relevant to experimental conditions. The network-based analyses applied in our case study capture both shared and strain-specific responses, demonstrating the approach’s capacity to disentangle conserved processes from context-dependent adaptations. A scale-free network structure was employed because it remains stable even when nodes are removed, reflecting that not all features or pathways are biologically essential under every condition. This design enables the filtering of overly broad categories — for example, general GO terms such as “metabolic process (GO:0008152)” — which can obscure meaningful insights. Focusing instead on more specific terms, such as “glycolytic process (GO:0006096)” and “pentose-phosphate pathway (GO:0006098),” yields more precise and interpretable results in microbial systems. Looking forward, we anticipate that multi-omics technologies will become routinely used in the scientific community, enhancing our understanding of microbial cellular metabolism and their complex biological systems. The workflow presented here provides an accessible framework to support researchers in exploring, integrating, and interpreting multi-omics data, helping to translate complex datasets into biological insight and testable hypotheses within the expanding landscape of systems microbiology. Funding information This work was supported by the Development and Promotion of Science and Technology Talents Project (DPST) from the Thai Government (WM), the National Health and Medical Research Council (NHMRC) Investigator Grants to KALC (GNT2025648), TPS (GNT1105525) and BPH (GNT1196103), and the National Health and Medical Research Council (NHMRC) Ideas Grant to AH and RG (GNT2018880). Author contributions Conceptualisation, data analysis and visualisation, and writing the original draft and editing (WM); Writing – review and editing (AH, TPS, BPH, KALC). Conceptualisation, methodology, and writing – review and editing (CJW, RG); All authors read and approved the final version of the manuscript. Conflicts of interest The authors declare that there are no conflicts of interest. Acknowledgements We thank colleagues from Howden/Stinear and LêCao labs, particularly Max Bladen, for their technical support and stimulating discussions. Funder Information Declared National Health and Medical Research Council , GNT2025648 , GNT1105525 , GNT1196103 , GNT2018880 Development and Promotion of Science and Technology Talents Project (DPST) from the Thai Government Footnotes ↵ * Joint senior authors https://github.com/warasinee/Multiomics_Case_Study References 1. ↵ Mu A , Klare WP , Baines SL , Ignatius Pang CN , Guérillot R , Harbison-Price N , et al. Integrative omics identifies conserved and pathogen-specific responses of sepsis-causing bacteria . Nat Commun . 2023 ; 14 ( 1 ): 1530 . OpenUrl CrossRef PubMed 2. ↵ O’Donnell ST , Ross RP , Stanton C . The Progress of Multi-Omics Technologies: Determining Function in Lactic Acid Bacteria Using a Systems Level Approach . Front Microbiol . 2019 ; 10 : 3084 . OpenUrl PubMed 3. ↵ Van Regenmortel MH . Reductionism and complexity in molecular biology. Scientists now have the tools to unravel biological and overcome the limitations of reductionism . EMBO Rep . 2004 ; 5 ( 11 ): 1016 – 20 . OpenUrl FREE Full Text 4. ↵ Arini GS , Borelli TC , Ferreira EG , de Felício R , Rezende-Teixeira P , Pedrino M , et al. A multi-omics reciprocal analysis for characterization of bacterial metabolism . Front Mol Biosci . 2025 ; 12 : 1515276 . 5. ↵ Wang Q , Wang K , Wu W , Giannoulatou E , Ho JWK , Li L . Host and microbiome multi-omics integration: applications and methodologies . Biophys Rev . 2019 ; 11 ( 1 ): 55 – 65 . OpenUrl CrossRef PubMed 6. ↵ Yu G , Wang LG , Han Y , He QY . clusterProfiler: an R package for comparing biological themes among gene clusters . Omics . 2012 ; 16 ( 5 ): 284 – 7 . OpenUrl CrossRef PubMed Web of Science 7. ↵ Rohart F , Gautier B , Singh A , KA LC . mixOmics: An R package for ’omics feature selection and multiple data integration . PLoS Comput Biol . 2017 ; 13 ( 11 ): e1005752 . OpenUrl CrossRef PubMed 8. ↵ Singh A , Shannon CP , Gautier B , Rohart F , Vacher M , Tebbutt SJ , et al. DIABLO: an integrative approach for identifying key molecular drivers from multi-omics assays . Bioinformatics . 2019 ; 35 ( 17 ): 3055 – 62 . OpenUrl CrossRef PubMed 9. ↵ Love MI , Huber W , Anders S . Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2 . Genome Biol . 2014 ; 15 ( 12 ): 550 . OpenUrl CrossRef PubMed 10. ↵ Chen Y , Chen L , Lun ATL , Baldoni PL , Smyth GK . edgeR v4: powerful differential analysis of sequencing data with expanded functionality and improved support for small counts and larger datasets . Nucleic Acids Res . 2025 ; 53 ( 2 ). 11. ↵ McCarthy DJ , Chen Y , Smyth GK . Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation . Nucleic Acids Res . 2012 ; 40 ( 10 ): 4288 – 97 . OpenUrl CrossRef PubMed Web of Science 12. ↵ Ritchie ME , Phipson B , Wu D , Hu Y , Law CW , Shi W , et al. limma powers differential expression analyses for RNA-sequencing and microarray studies . Nucleic Acids Res . 2015 ; 43 ( 7 ): e47 . OpenUrl CrossRef PubMed 13. ↵ Powell DR . Degust: Interactive RNA-seq analysis . Zenodo ; 2019 . 14. ↵ Xia J , Psychogios N , Young N , Wishart DS . MetaboAnalyst: a web server for metabolomic data analysis and interpretation . Nucleic Acids Res . 2009 ; 37 (Web Server issue): W652 – 60 . OpenUrl CrossRef PubMed Web of Science 15. ↵ Schmid R , Heuckeroth S , Korf A , Smirnov A , Myers O , Dyrlund TS , et al. Integrative analysis of multimodal mass spectrometry data in MZmine 3 . Nat Biotechnol . 2023 ; 41 ( 4 ): 447 – 9 . OpenUrl CrossRef PubMed 16. ↵ Smith CA , Want EJ , O’Maille G , Abagyan R , Siuzdak G . XCMS: processing mass spectrometry data for metabolite profiling using nonlinear peak alignment, matching, and identification . Anal Chem . 2006 ; 78 ( 3 ): 779 – 87 . OpenUrl CrossRef PubMed 17. ↵ Seemann T . Prokka: rapid prokaryotic genome annotation . Bioinformatics . 2014 ; 30 ( 14 ): 2068 – 9 . OpenUrl CrossRef PubMed Web of Science 18. ↵ Schwengers O , Jelonek L , Dieckmann MA , Beyvers S , Blom J , Goesmann A . Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification . Microb Genom . 2021 ; 7 ( 11 ). 19. ↵ Poelzer J , Han S , Saha S , Oler E , Kruger R , Berjanskii M , et al. BASys2: a next-generation bacterial genome annotation system . Nucleic Acids Res . 2025 ; 53 ( W1 ): W57 – w67 . OpenUrl PubMed 20. ↵ Ruiz-Perez CA , Conrad RE , Konstantinidis KT . MicrobeAnnotator: a user-friendly, comprehensive functional annotation pipeline for microbial genomes . BMC Bioinformatics . 2021 ; 22 ( 1 ): 11 . OpenUrl CrossRef PubMed 21. ↵ Cantalapiedra CP , Hernández-Plaza A , Letunic I , Bork P , Huerta-Cepas J . eggNOG-mapper v2: Functional Annotation, Orthology Assignments, and Domain Prediction at the Metagenomic Scale . Mol Biol Evol . 2021 ; 38 ( 12 ): 5825 – 9 . OpenUrl CrossRef PubMed 22. ↵ Fuchs S , Mehlan H , Bernhardt J , Hennig A , Michalik S , Surmann K , et al. AureoWiki ̵ The repository of the Staphylococcus aureus research and annotation community . Int J Med Microbiol . 2018 ; 308 ( 6 ): 558 – 68 . OpenUrl CrossRef PubMed 23. ↵ Mujchariyakul W , Walsh CJ , Giulieri S , LêCao K , Stinear TP , Howden BP , et al. Integrated multi-omics reveals coordinated Staphylococcus aureus metabolic, iron transport, and stress responses to human serum . bioRxiv . 2025 . 24. ↵ Merriman JA , Xu W , Caparon MG . Central carbon flux controls growth/damage balance for Streptococcus pyogenes . PLoS Pathog . 2023 ; 19 ( 6 ): e1011481 . OpenUrl CrossRef PubMed 25. ↵ Xu W , Bradstreet TR , Zou Z , Hickerson S , Zhou Y , He H , et al. Reprogramming aerobic metabolism mitigates Streptococcus pyogenes tissue damage in a mouse necrotizing skin infection model . Nat Commun . 2025 ; 16 ( 1 ): 2559 . OpenUrl PubMed 26. ↵ Zhang W , Li F , Nie L . Integrating multiple ’omics’ analysis for microbial biology: application and methodologies . Microbiology (Reading ). 2010 ; 156 (Pt 2 ): 287 – 301 . OpenUrl CrossRef PubMed Web of Science 27. ↵ Hu ZZ , Huang H , Wu CH , Jung M , Dritschilo A , Riegel AT , et al. Omics-based molecular target and biomarker identification . Methods Mol Biol . 2011 ; 719 : 547 – 71 . OpenUrl CrossRef PubMed 28. Wanichthanarak K , Fahrmann JF , Grapov D . Genomic, Proteomic, and Metabolomic Data Integration Strategies . Biomark Insights . 2015 ; 10 ( Suppl 4 ): 1 – 6 . OpenUrl CrossRef PubMed 29. Subramanian I , Verma S , Kumar S , Jere A , Anamika K . Multi-omics Data Integration, Interpretation, and Its Application . Bioinform Biol Insights . 2020 ; 14 : 1177932219899051 . 30. ↵ Wanichthanarak K , Fan S , Grapov D , Barupal DK , Fiehn O . Metabox: A Toolbox for Metabolomic Data Analysis, Interpretation and Integrative Exploration . PLoS One . 2017 ; 12 ( 1 ): e0171046 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted November 19, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following A Practical Resource for Multi-Omics Data Integration in Microbial Systems Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share A Practical Resource for Multi-Omics Data Integration in Microbial Systems Warasinee Mujchariyakul , Abderrahman Hachani , Timothy P. Stinear , Kim-Anh LêCao , Benjamin P. Howden , Calum J. Walsh , Romain Guérillot bioRxiv 2025.11.19.689359; doi: https://doi.org/10.1101/2025.11.19.689359 Share This Article: Copy Citation Tools A Practical Resource for Multi-Omics Data Integration in Microbial Systems Warasinee Mujchariyakul , Abderrahman Hachani , Timothy P. Stinear , Kim-Anh LêCao , Benjamin P. Howden , Calum J. Walsh , Romain Guérillot bioRxiv 2025.11.19.689359; doi: https://doi.org/10.1101/2025.11.19.689359 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Systems Biology Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41910) Biophysics (21436) Cancer Biology (18576) Cell Biology (25480) Clinical Trials (138) Developmental Biology (13368) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15598) Genomics (22482) Immunology (17726) Microbiology (40360) Molecular Biology (17163) Neuroscience (88534) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.