Full text
41,408 characters
· extracted from
preprint-html
· click to expand
Confidence: A Web App for Cross-Platform Differential Gene Expression Analysis, Gene Scoring, and Enrichment Analysis | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Confidence: A Web App for Cross-Platform Differential Gene Expression Analysis, Gene Scoring, and Enrichment Analysis Abhishek Shastry , Benjamin P. Ott , Alex Paterson , Matt Simpson , View ORCID Profile Kimberly J. Dunham-Snary , Charles C.T. Hindmarch doi: https://doi.org/10.1101/2025.06.27.661997 Abhishek Shastry 1 Department of Medicine, Queen’s University ; Kingston, K7L 3N6, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Benjamin P. Ott 2 Queen’s CardioPulmonary Unit, Queen’s University ; Kingston, K7L 3N6, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alex Paterson 3 Dorothy Hodgkin Building, University of Bristol ; Bristol, United Kingdom Find this author on Google Scholar Find this author on PubMed Search for this author on this site Matt Simpson 4 Queen’s Health Sciences, Queen’s University , Kingston, K7L 3N6, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kimberly J. Dunham-Snary 1 Department of Medicine, Queen’s University ; Kingston, K7L 3N6, Canada 5 Department of Biomedical and Molecular Sciences, Queen’s University ; Kingston, K7L 3N6, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kimberly J. Dunham-Snary Charles C.T. Hindmarch 1 Department of Medicine, Queen’s University ; Kingston, K7L 3N6, Canada 2 Queen’s CardioPulmonary Unit, Queen’s University ; Kingston, K7L 3N6, Canada 5 Department of Biomedical and Molecular Sciences, Queen’s University ; Kingston, K7L 3N6, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: c.hindmarch{at}queensu.ca Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract RNA sequencing (RNA-seq) is used to quantify transcript levels through measurement of nucleotide sequences. To evaluate statistically significant changes in gene expression, transcript counts between samples are compared using differential expression analysis methods. However, three of the most pressing challenges in transcriptomics analyses are: 1) analytical packages produce a distinct number of differentially expressed genes with varied P -value and fold-change values; 2) the effective use of these analytical packages requires substantial knowledge of programming and bioinformatics; and 3) there are a lack of intuitive methods to select target genes for further investigation in an unbiased manner. To address these challenges, we developed Confidence, a web-based application to perform simultaneous statistical analysis of RNA-seq count data. Confidence incorporates the Confidence Score (CS), ranging from 1 to 4 to aid in gene prioritization, where 1 represents low confidence and 4 represents high confidence. The Confidence web-based application was designed for rapid and intuitive analysis of standard experimental metadata and gene count inputs. Confidence provided a web-based, ‘wide-net’ approach to differential gene expression analysis. Gene scoring allows for unbiased gene selection and identification of novel genes strongly associated with disease and treatment models across multiple species. Additionally, pathway analysis tools have been integrated so that highly confident genes can be placed into biological context in terms of functions and pathways. Confidence provides a new strategy for target prioritization in RNA-seq analysis and the generation of publication-quality figures. Background The draft of the human genome in 2001 introduced a post-genomic era that promised global resolution of the molecular landscape in both preclinical and clinical research environments ( 1 ) . To capitalize on this new information, new technologies were developed; among them, the microarray, capable of profiling the simultaneous expression of tens of thousands of genes ( 2 - 4 ) . The endpoint of a microarray experiment is the identification of differentially expressed genes between two conditions ( 5 - 7 ) . The emergence of various RNA sequencing (RNA-seq) technologies substantially improved genome-wide transcriptomic analyses. Rather than relying on the hybridization of mRNA fragments to a library of prescribed probes, as with microarrays ( 3 , 4 , 8 ) , RNA-seq turned the sample into a library, facilitating novel transcript discovery ( 9 - 15 ) . This high-throughput sequencing method provides several benefits compared to older transcriptomic analysis methods, including the ability to differentiate between isoforms of the same gene, identify coding and non-coding genes, and detect low-abundance transcripts ( 9 , 16 - 18 ) . The products of RNA-seq are often short, unaligned reads from fragments of the template material. These sequence data are stored in .FASTA or .FASTQ files. To characterize gene expression, reads must be aligned to a previously compiled, annotated, and species-specific reference genome. As the number of defined genomes increased after the assembly of the human genome, several genome alignment tools also emerged to assess the probability that sequenced reads match known genomic sequences ( 19 - 23 ) . After genome alignment, gene counts can be approximated using predictive models assessing the probability of alignment to exon sequences ( 24 - 28 ) . A schematic of the Confidence workflow is found in Figure 1 . Download figure Open in new tab Figure 1. As with microarrays, differential expression analysis in RNA-seq studies compares gene counts between experimental samples through various statistical tests that were adopted from older experimental pipelines, which proved to be insufficient for this new data. In addition, variances in inter-sample library size, sequencing variability, and batch effects between biological replicates were often left unaccounted for in simple statistical analyses ( 29 ) . To address these issues, numerous bespoke analytical packages were developed that incorporated varying statistical assumptions about read count distributions, data normalization, and methods for hypothesis testing between samples ( 30 ) . In isolation, these analytical packages provide appropriate differential expression analysis based on experimental metadata and gene count input by the user. These packages can all be implemented using the R statistical programming language. Challenges in Differential Expression Analysis Broadly, there are three key challenges to the use of analytical packages for differential expression analysis: Modelling variability: Transcriptomic studies using deep RNA-seq often incorporate multiple biological replicates to power the experiment. Therefore, there may be variability associated with sample replicates (e.g., heterogenous disease expression between replicates), library sizes, and read quality, among other factors. Additionally, the presence of genes with abnormally skewed count distributions may cause true Differentially Expressed Gene (DEG) to be misclassified or overlooked. Since analytical packages incorporate data preprocessing measures such as batch correction, normalization, multiple test correction, and transformation to account for these factors ( 31 ) ; because the statistical assumptions and hypothesis testing procedures differ drastically between analytical packages, the number of genes identified as differentially expressed differs between analysis packages, as does the expression level (e.g., log 2 FC, log-ratio) and significance value reported for each gene (e.g., P -value, P -adj) ( 30 , 32 , 33 ) . Expert analysis: Appropriate use of analytical packages for transcriptomics requires a substantial bioinformatics and programming background. Indeed, qualitative studies have demonstrated that the complexity of software, lack of formal training, and decreased perceived ease-of-use of bioinformatics tools reduce non-bioinformaticians’ ability to perform bioinformatics analyses such as functional enrichment and/or pathways analysis ( 34 - 36 ) . Target prioritization: A primary goal of transcriptomic analyses is the identification of genes whose differential regulation contributes to the experimental condition under investigation. However, the list of DEGs can be substantial. Genes of interest from these lists have traditionally been chosen arbitrarily through P -value, P -adj, and/or expression level parameters; alternatively, genes are often selected through the familiarity of the gene or its related protein to the investigator. Without a systematic, unbiased method to rank and delineate genes of interest, genes or gene sets that are reflective of real biological changes and play novel roles in the disease pathology, treatment intervention, or biological model may be overlooked. As transcriptomics analyses incorporating next-generation RNA-seq technology have become increasingly prominent in almost every domain of biological study, it is important to provide non-bioinformaticians an avenue in which to analyze large gene expression datasets and best use the insights from these experiments. Here, we built an intuitive web-based application using Shiny, called Confidence. Confidence is easy to use and answers each of the three challenges laid out above; 1. Confidence provides a way for users to model experimental metadata and visualize the clustering of samples used in RNA-seq experiments; 2. Confidence has a simple interface that allows count.csv files, and a metadata table to be uploaded; 3. Confidence provides sequential analysis of RNA-seq derived gene counts through multiple analytical packages, providing publication-ready figures and tables that can be directly downloaded to a local machine. Methods Application Development We built the Confidence web application using the Shiny package v1.10.0 ( 37 ) for R v4.4.3 ( http://www.r-project.org ), bslib v0.9.0, bsicons v0.1.2, DESeq2 v1.44.0 ( 38 ) , NOISeq v2.48.0 ( 39 , 40 ) , limma v3.60.6 ( 41 ) , edgeR v4.2.2 ( 42 ) , shinycssloaders v1.1.0 shinyjs v2.1.0, plotly v4.10.4 ( 43 ) , thematic v0.1.6, ggvenn v0.1.10 ( 44 ) , dplyr v1.1.4, enrichplot v1.24.4 ( 45 ) , shinyWidgets v0.9.0, spsComps v0.3.3.0, gprofiler2 v0.2.3 ( 46 ) , tidyr v1.3.1, eply v0.1.2, DT v0.33, ggrepel v0.9.6, ggplot2 v3.5.2 ( 47 ) , Matrix v1.7-3, SummarizedExperiment v1.34.0, Biobase v2.64.0, MatrixGenerics v1.16.0, matrixStats v1.5.0, GenomicRanges v1.56.2, GenomeInfoDb v1.40.1, IRanges v2.38.1, S4Vectors v0.42.1, BiocGenerics v0.50.0. Data Input Confidence requires users to input two files: one containing gene counts and the other containing experimental metadata. Gene counts must be formatted from count estimation computational tools in a tabular format, with column values representing sample names and rows representing genes; experimental metadata can be constructed as a comma-separated values file in programs such as Microsoft Excel ® . Data Modelling Confidence provides extensive options to model experiment-associated variables found in the metadata, including selection of the main effect (the primary factor being investigated; e.g., treatment), reference class (the reference level for comparison within the main effect; e.g., untreated), and comparison class (the level to be compared to the reference class within the main effect; e.g., penicillin-treated). This is typically sufficient for two-factor comparisons (e.g., untreated vs. penicillin-treated). As a feature to guide the design of experimental questions, Confidence also allows users to build complex experimental designs controlling for multiple continuous or discrete factors (e.g., sex, age, diet) to isolate the impact of the primary variable of interest on gene expression while controlling for other factors. Analytical Packages Confidence also requires the user to define a P -adj threshold and provides options for low count filtration based on the smallest comparison group size before initiation. The Confidence pipeline was developed to perform gene count normalization, filtration, and hypothesis testing sequentially across DESeq2 v1.44.0 ( 38 ) , limma v3.60.64 ( 48 , 49 ) , edgeR v4.2.2 ( 42 ) , and NOISeq v2.48.0 ( 39 , 40 ) , which are the most popular analytical packages used in published transcriptomic research. The default normalization settings and hypothesis tests were maintained in each analytical package (see Table 1 ) as per their respective vignettes ( 38 - 42 , 48 ) . Annotation of Ensembl gene IDs was facilitated using the biomaRt database v2.60.1 ( 50 - 52 ) . A dropdown list of Human, Rat, and Mouse genome annotations are provided to characterize ENSEMBL gene IDs. This list will be expanded in the future to include more species. Due to the presence of uncharacterized mRNA reads in RNA-seq datasets, unannotated results that do not have gene names are also included in the Confidence results table, relying on their ENSEMBL gene ID annotation instead. An overview of the Confidence application methodology is presented in Figure 1 . View this table: View inline View popup Download powerpoint Table 1. Differential Gene Expression Analysis After contrasting samples based on selected main effects, each analytical package outputs a table of DEGs along with each gene’s ID, log 2 FC, P -value, and P -adj, across conditions. NOISeq is an exception, however, as it produces an a posteriori probability p that a gene is differentially expressed between two conditions ( 39 , 40 ) . Here, we specify that the P -adj for NOISeq is equivalent to 1- p , as instructed in the NOISeq vignette. Confidence Score After sequential tests of differential expression are performed by each analytical package, Confidence Scores are calculated for each DEG. The Confidence Score is a tally of the number of packages where a significant DEG is demonstrated to be up-or down-regulated. This ‘consensus’ among analytical packages allows for the filtering of genes that are likely to be both biologically and statistically significant. As an example, if three of the four packages implemented in Confidence report a DEG, the gene’s associated Confidence Score is 3. Herein, we qualitatively describe genes with a Confidence Score of 0 as ‘not confident’, Confidence Score of 1 as ‘least confident’, a Confidence Score of 2 as ‘moderately confident’, a Confidence Score of 3 as ‘highly confident’, and a Confidence Score of 4 as ‘most confident’. To elucidate overlaps between DEGs reported by each analytical package, a four-way Venn diagram is automatically generated by Confidence on the named list using the ggvenn package 0.1.10 ( 53 ) . Outputs and Visualizations Confidence allows for the generation of standard visualizations found in transcriptomic literature. Principal component analysis (PCA) was integrated into Confidence using the DESeq2 plotPCA function and formatted using ggplot2 ( 55 ) . PCA was performed using the prcomp function on variance stabilizing transformed gene counts; a PCA biplot is immediately available when experimental metadata and gene count files are submitted, so that the effect of including/excluding experimental variables and metadata factors (e.g., sex, diet, outliers, and low gene counts) can be visualized through sample clustering. To investigate the spread of DEGs according to their log 2 FC and P -adj values, volcano plots are generated on DEGs from each analytical package using ggplot2 v3.5.2 and plotly v4.10.4. Additionally, Box plots can be generated for each DEG to visualize normalized gene counts between experimental conditions; for box plots only, normalization is performed using log 2 -transformed, DESeq size-factors. These box plots are embedded into the final Confidence results table and are immediately accessible through a clickable button. Pathway Analysis Pathway enrichment analysis (PEA) tools were additionally programmed into Confidence using the GOSt function from the gprofiler2 package v0.2.3 ( 56 ) . GOSt analysis is provided to perform PEA using numerous databases including gene ontology (GO), Kyoto Encyclopedia of Genes and Genomes (KEGG), Reactome (REAC), WikiPathways (WP), TRANScription FACtor database (TF), MicroRNA-Target database (miRTarBase), Human Protein Atlas (HPA), Comprehensive Resource of Mammalian protein complexes (CORUM), and Human Phenotype Ontology (HP), simultaneously. Results Confidence can be found, here: https://confidence.apps.meds.queensu.ca/ To showcase Confidence, we have included sample figures that were generated in Confidence using toy data ( Figure 2 ). Confidence allows for modelling of metadata (by treatment, animal ID, sample number, and sample ID; Figure 2A-D ), an understanding of how many DEGs each package identified using a Volcano plot ( Figure 2E ) and how many intersected ( Figure 2F ), and the expression profiles of different, user defined genes ( Figure 2G/H ). Confidence also visualizes the functional analysis of the gene list as an interactive dot plot of all enriched terms organized by domain ( Figure 3A ), as well as user-defined dot plots of the top functions/processes/targets in each domain ( Figure 3B ). Download figure Open in new tab Figure 2. Download figure Open in new tab Figure 3. Discussion Differential expression analysis is often the penultimate step of transcriptomic studies, which statistically evaluates the differences in gene counts between experimental groups. Since the publication of the first RNA-seq study in 2006 ( 57 ) , several analytical packages and methodologies have been developed to normalize, model, and test differential expression effectively ( 58 ) . However, large variations in the number of discovered genes, false discovery rate, and expression changes across these analytical packages may confound the interpretation of results ( 30 , 32 , 33 , 59 - 62 ) . Among these transcriptomics analytical packages, DESeq2, limma, edgeR, and NOISeq are commonly used in deep RNA-seq studies ( 62 - 64 ) . Additionally, DESeq2 accounted for 26.36%, edgeR for 20.84%, limma-voom for 3.13%, and NOISeq for 1.38% of all citations for RNA-seq studies ( 58 ) . NOISeq, the most popular non-parametric method, was implemented into Confidence as it offers a unique computational and theoretical approach relative to parametric methods such as DESeq2, edgeR, and limma. Here, we introduced the Confidence application, which embeds each of these analytical packages into a single analysis pipeline to leverage their unique features in a user-friendly application. The use of multiple analytical packages to simultaneously assess gene expression has previously been implemented in other tools, including consensusDE and consexpression ( 65 , 66 ) ; however, these tools both require the use of programming languages. In untargeted omics analyses more broadly, the process to select features to characterize, validate, or manipulate must take several considerations into account, including the cost of reagents and the considerable amount of time needed to investigate findings. We propose that Confidence can provide an intuitive means to filter large gene lists and streamline gene selection. Additionally, although many analytical package-consensus tools have been developed ( 65 , 66 ) , they are implemented as computational packages within coding languages such as Python or R. In a survey conducted by Alomair et al . (2023) of scientists, only one-third used programming languages to address biological problems, and half reported hiring a bioinformatician to conduct the analysis ( 34 ) . To address this issue, Confidence was developed as a web-based application, which included options to produce the visualizations presented in this study, as well as clear descriptions of analytical methods, plot interpretations, and plot customization tools. Confidence addresses three major challenges to identify and prioritize DEGs from transcriptomic studies Confidence allows the easy modelling of data using different sample attributes reported in the user-defined metadata; PCA’s will allow for the rapid review of sample variability using condition, as well as other information such as sex, batch, animal ID, and time. Confidence integrates standard pipelines for differential expression analysis into a simple web-based platform, which may ensure that differential expression analysis is more accessible for investigators without programming experience. Using the Confidence Score, Confidence provides a consensus tool to identify common DEGs across analytical packages employing diverse statistical methodologies, enhancing target prioritization. Conclusions We have developed the Confidence application to perform multi-package differential expression analysis, provide intuitive scoring systems to streamline gene prioritization and selection, and provide a web-based tool for non-bioinformaticians to analyze transcriptomics data. Further developing Confidence to integrate the vast array of analytical packages available in other omics fields, such as proteomics and metabolomics, and to gauge their performance, can help identify novel markers implicated in disease processes. Finally, we foresee that the ability for Confidence to streamline marker selection without the influence of knowledge bias by the investigator will reduce costs associated with unproductive validation and experimentation in transcriptomics and beyond. Funding This work was supported by a Canadian Institutes of Health Research Project Grant (202303PJT-495854), a Tier II Canada Research Chair in Mitochondrial and Metabolic Regulation in Health and Disease (CRC-2020-00192), the Canada Foundation for Innovation - John R. Evans Leaders Fund (41511), the Banting Research Foundation and Mitacs (6035577), the Faculty of Health Sciences (6032495) and Department of Medicine (6034430) at Queen’s University (K. Dunham-Snary), and an Ontario Graduate Scholarship (A. Shastry). Author contributions Conceptualization: A.S., B.P.O, K.DS., AP., C.C.T.H. Methodology: A.S., B.P.O., K.DS., C.C.T.H. Investigation: A.S. Visualization: B.P.O., A.S. Formal Analysis: A.S., B.P.O Funding acquisition: K.DS., C.C.T.H. Project administration: A.S., B.P.O., K.DS., C.C.T.H. Resources: B.P.O., M.S., K.DS., C.C.T.H. Software: B.P.O., A.S., C.C.T.H. Supervision: K.DS., C.C.T.H. Writing – original draft: A.S., K.DS., C.C.T.H. Writing – review & editing: A.S., B.P.O, K.DS., C.C.T.H. Competing interest The authors declare no competing interests. Data and materials availability The Confidence application is available at https://confidence.apps.meds.queensu.ca/ . Any questions regarding code availability and the conclusions of this paper should be directed to the corresponding author. Acknowledgments We would like to thank the MitoMetabLab at Queen’s University for their suggestions and tests of the Confidence application. We would also like to thank the Queen’s CardioPulmonary Unit (QCPU), and the Translational Institute of Medicine (TIME) at Queen’s University. Funder Information Declared Canadian Institutes of Health Research , 202303PJT-495854 Canada Research Chairs, https://ror.org/0517h6h17 , CRC-2020-00192 Canada Foundation for Innovation, https://ror.org/000az4664 , 41511 Banting Research Foundation, https://ror.org/05xe9t676 , 6035577 Ontario Graduate Scholarship Footnotes https://confidence.apps.meds.queensu.ca/ References 1. ↵ E. S. Lander et al. , Initial sequencing and analysis of the human genome . Nature 409 , 860 – 921 ( 2001 ). OpenUrl CrossRef PubMed Web of Science 2. ↵ A. E. Pozhitkov , D. Tautz , P. A. Noble , Oligonucleotide microarrays: widely applied--poorly understood . Brief Funct Genomic Proteomic 6 , 141 – 148 ( 2007 ). OpenUrl CrossRef PubMed 3. ↵ M. Schena , D. Shalon , R. W. Davis , P. O. Brown , Quantitative Monitoring of Gene Expression Patterns with a Complementary DNA Microarray . Science 270 , 467 – 470 ( 1995 ). OpenUrl Abstract / FREE Full Text 4. ↵ M. T. Ghorbel et al. , Microarray screening of suppression subtractive hybridization-PCR cDNA libraries identifies novel RNAs regulated by dehydration in the rat supraoptic nucleus . Physiol Genomics 24 , 163 – 172 ( 2006 ). OpenUrl CrossRef PubMed Web of Science 5. ↵ C. C. Hindmarch et al. , The transcriptome of the medullary area postrema: the thirsty rat, the hungry rat and the hypertensive rat . Exp Physiol 96 , 495 – 504 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 6. L. Stewart et al. , Hypothalamic transcriptome plasticity in two rodent species reveals divergent differential gene expression but conserved pathways . J Neuroendocrinol 23 , 177 – 185 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 7. ↵ C. C. T. Hindmarch , D. Murphy , The transcriptome and the hypothalamo-neurohypophyseal system . Endocr Dev 17 , 1 – 10 ( 2010 ). OpenUrl CrossRef PubMed 8. ↵ H. Liu , I. Bebu , X. Li , Microarray probes and probe sets . Front Biosci (Elite Ed) 2 , 325 – 338 ( 2010 ). OpenUrl PubMed 9. ↵ Z. Wang , M. Gerstein , M. Snyder , RNA-Seq: a revolutionary tool for transcriptomics . Nature Reviews Genetics 10 , 57 – 63 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 10. R. E. T. Bentley , C. C. T. Hindmarch , S. L. Archer , Using omics to breathe new life into our understanding of the ductus arteriosus oxygen response . Semin Perinatol 47 , 151715 ( 2023 ). OpenUrl CrossRef PubMed 11. R. Kloosterman et al. , A transcriptome analysis of basal and stimulated VWF release from endothelial cells derived from patients with type 1 VWD . Blood Adv 7 , 1477 – 1487 ( 2023 ). OpenUrl CrossRef PubMed 12. A.G. Pauža et al. , Osmoregulation of the transcriptome of the hypothalamic supraoptic nucleus: A resource for the community . J Neuroendocrinol 33 , e13007 ( 2021 ). OpenUrl CrossRef PubMed 13. F. Potus , C. C. T. Hindmarch , K. J. Dunham-Snary , J. Stafford , S. L. Archer , Transcriptomic Signature of Right Ventricular Failure in Experimental Pulmonary Arterial Hypertension: Deep Sequencing Demonstrates Mitochondrial, Fibrotic, Inflammatory and Angiogenic Abnormalities . Int J Mol Sci 19 , ( 2018 ). 14. L. Jiang et al. , RNA sequencing analysis of human podocytes reveals glucocorticoid regulated gene networks targeting non-immune pathways . Sci Rep 6 , 35671 ( 2016 ). OpenUrl CrossRef PubMed 15. ↵ K. R. Johnson et al. , A RNA-Seq Analysis of the Rat Supraoptic Nucleus Transcriptome: Effects of Salt Loading on Gene Expression . PLoS One 10 , e0124523 ( 2015 ). OpenUrl CrossRef PubMed 16. ↵ S. Zhao , W. P. Fung-Leung , A. Bittner , K. Ngo , X. Liu , Comparison of RNA-Seq and microarray in transcriptome profiling of activated T cells . PLoS One 9 , e78644 ( 2014 ). OpenUrl CrossRef PubMed 17. A. Mortazavi , B. A. Williams , K. McCue , L. Schaeffer , B. Wold , Mapping and quantifying mammalian transcriptomes by RNA-Seq . Nat Methods 5 , 621 – 628 ( 2008 ). OpenUrl CrossRef PubMed Web of Science 18. ↵ U. Nagalakshmi et al. , The transcriptional landscape of the yeast genome defined by RNA sequencing . Science 320 , 1344 – 1349 ( 2008 ). OpenUrl Abstract / FREE Full Text 19. ↵ H. Li , R. Durbin , Fast and accurate short read alignment with Burrows-Wheeler transform . Bioinformatics 25 , 1754 – 1760 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 20. B. Langmead , S. L. Salzberg , Fast gapped-read alignment with Bowtie 2 . Nat Methods 9 , 357 – 359 ( 2012 ). OpenUrl CrossRef PubMed Web of Science 21. D. Kim et al. , TopHat2: accurate alignment of transcriptomes in the presence of insertions, deletions and gene fusions . Genome Biol 14 , R36 ( 2013 ). OpenUrl CrossRef PubMed 22. D. Kim , B. Langmead , S. L. Salzberg , HISAT: a fast spliced aligner with low memory requirements . Nat Methods 12 , 357 – 360 ( 2015 ). OpenUrl CrossRef PubMed 23. ↵ A. Dobin et al. , STAR: ultrafast universal RNA-seq aligner . Bioinformatics 29 , 15 – 21 ( 2013 ). OpenUrl CrossRef PubMed Web of Science 24. ↵ N. L. Bray , H. Pimentel , P. Melsted , L. Pachter , Near-optimal probabilistic RNA-seq quantification . Nat Biotechnol 34 , 525 – 527 ( 2016 ). OpenUrl CrossRef PubMed 25. R. Patro , G. Duggal , M. I. Love , R. A. Irizarry , C. Kingsford , Salmon provides fast and bias-aware quantification of transcript expression . Nat Methods 14 , 417 – 419 ( 2017 ). OpenUrl CrossRef PubMed 26. C. Trapnell et al. , Transcript assembly and quantification by RNA-Seq reveals unannotated transcripts and isoform switching during cell differentiation . Nat Biotechnol 28 , 511 – 515 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 27. Y. Liao , G. K. Smyth , W. Shi , featureCounts: an efficient general purpose program for assigning sequence reads to genomic features . Bioinformatics 30 , 923 – 930 ( 2014 ). OpenUrl CrossRef PubMed Web of Science 28. ↵ S. Anders , P. T. Pyl , W. Huber , HTSeq--a Python framework to work with high-throughput sequencing data . Bioinformatics 31 , 166 – 169 ( 2015 ). OpenUrl CrossRef PubMed Web of Science 29. ↵ M. Jeanmougin et al. , Should we abandon the t-test in the analysis of gene expression microarray data: a comparison of variance modeling strategies . PLoS One 5 , e12336 ( 2010 ). OpenUrl CrossRef PubMed 30. ↵ F. Seyednasrollah , A. Laiho , L. L. Elo , Comparison of software packages for detecting differential expression in RNA-seq studies . Briefings in Bioinformatics 16 , 59 – 70 ( 2015 ). OpenUrl CrossRef PubMed 31. ↵ Z. B. Abrams , T. S. Johnson , K. Huang , P. R. O. Payne , K. Coombes , A protocol to evaluate RNA sequencing normalization methods . BMC Bioinformatics 20 , 679 ( 2019 ). OpenUrl CrossRef PubMed 32. ↵ C. Soneson , M. Delorenzi , A comparison of methods for differential expression analysis of RNA-seq data . BMC Bioinformatics 14 , 91 ( 2013 ). OpenUrl CrossRef PubMed 33. ↵ V. M. Kvam , P. Liu , Y. Si , A comparison of statistical methods for detecting differentially expressed genes from RNA-seq data . American Journal of Botany 99 , 248 – 256 ( 2012 ). OpenUrl Abstract / FREE Full Text 34. ↵ L. Alomair , M. A. Abolfotouh , Awareness and Predictors of the Use of Bioinformatics in Genome Research in Saudi Arabia . Int J Gen Med 16 , 3413 – 3425 ( 2023 ). OpenUrl CrossRef PubMed 35. A. Shachak , K. Shuval , S. Fine , Barriers and enablers to the acceptance of bioinformatics tools: a qualitative study . J Med Libr Assoc 95 , 454 – 458 ( 2007 ). OpenUrl CrossRef PubMed Web of Science 36. ↵ K. Wijesooriya , S. A. Jadaan , K. L. Perera , T. Kaur , M. Ziemann , Urgent need for consistent standards in functional enrichment analysis . PLoS Comput Biol 18 , e1009935 ( 2022 ). OpenUrl CrossRef PubMed 37. ↵ W. Chang et al. , in R package version 1.9.1.9000 . ( 2024 ). 38. ↵ M. I. Love , W. Huber , S. Anders , Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2 . Genome Biology 15 , 550 ( 2014 ). OpenUrl CrossRef PubMed 39. ↵ S. Tarazona et al. , Data quality aware analysis of differential expression in RNA-seq with NOISeq R/Biocpackage . Nucleic Acids Res 43 , e140 ( 2015 ). OpenUrl CrossRef PubMed 40. ↵ S. Tarazona , F. García-Alcalde , J. Dopazo , A. Ferrer , A. Conesa , Differential expression in RNA-seq: a matter of depth . Genome Res 21 , 2213 – 2223 ( 2011 ). OpenUrl Abstract / FREE Full Text 41. ↵ M. E. Ritchie et al. , limma powers differential expression analyses for RNA-sequencing and microarray studies . Nucleic Acids Res 43 , e47 ( 2015 ). OpenUrl CrossRef PubMed 42. ↵ M. D. Robinson , D. J. McCarthy , G. K. Smyth , edgeR: a Bioconductor package for differential expression analysis of digital gene expression data . Bioinformatics 26 , 139 – 140 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 43. ↵ C. Sievert et al. , plotly: Create Interactive Web Graphics via plotly.js . ( 2024 ). 44. ↵ L. Yan , ggvenn: Draw Venn Diagram by ggplot2 . ( 2023 ). 45. ↵ G. Yu , enrichplot: Visualization of Functional Enrichment Result . ( 2024 ). 46. ↵ L. Kolberg , U. Raudvere , gprofiler2: Interface to the g:Profiler Toolset . ( 2024 ). 47. ↵ H. Wickham et al. , ggplot2: Create Elegant Data Visualisations Using the Grammar of Graphics . ( 2024 ). 48. ↵ G. K. Smyth , Linear models and empirical bayes methods for assessing differential expression in microarray experiments . Stat Appl Genet Mol Biol 3 , Article3 ( 2004 ). OpenUrl CrossRef PubMed 49. ↵ C. W. Law , Y. Chen , W. Shi , G. K. Smyth , voom: precision weights unlock linear model analysis tools for RNA-seq read counts . Genome Biology 15 , R29 ( 2014 ). OpenUrl CrossRef PubMed 50. ↵ R. J. Kinsella et al. , Ensembl BioMarts: a hub for data retrieval across taxonomic space . Database (Oxford) 2011 , bar030 ( 2011 ). OpenUrl CrossRef PubMed 51. D. Smedley et al. , BioMart--biological queries made easy . BMC Genomics 10 , 22 ( 2009 ). OpenUrl CrossRef PubMed 52. ↵ J. Zhang et al. , BioMart: a data federation framework for large collaborative projects . Database (Oxford) 2011 , bar038 ( 2011 ). OpenUrl CrossRef PubMed 53. ↵ C.-H. Gao , G. Yu , P. Cai , ggVennDiagram: An Intuitive, Easy-to-Use, and Highly Customizable R Package to Generate Venn Diagram . Frontiers in Genetics 12 , ( 2021 ). 54. J. I. Fuxman Bass et al. , Using networks to measure similarity between genes: association index selection . Nat Methods 10 , 1169 – 1176 ( 2013 ). OpenUrl CrossRef PubMed Web of Science 55. ↵ K. Blighe , A. Lun , in R package version 2.16.0 . ( 2024 ). 56. ↵ L. Kolberg , U. Raudvere , I. Kuzmin , J. Vilo , H. Peterson , gprofiler2 -- an R package for gene list functional enrichment analysis and namespace conversion toolset g:Profiler . F1000Res 9 , ( 2020 ). 57. ↵ M. N. Bainbridge et al. , Analysis of the prostate cancer cell line LNCaP transcriptome using a sequencing-by-synthesis approach . BMC Genomics 7 , 246 ( 2006 ). OpenUrl CrossRef PubMed 58. ↵ J. Costa-Silva , D. S. Domingues , D. Menotti , M. Hungria , F. M. Lopes , Temporal progress of gene expression analysis with RNA-Seq data: A review on the relationship between computational methods . Comput Struct Biotechnol J 21 , 86 – 98 ( 2023 ). OpenUrl CrossRef PubMed 59. ↵ L. A. Corchete et al. , Systematic comparison and assessment of RNA-seq procedures for gene expression quantitative analysis . Sci Rep 10 , 19737 ( 2020 ). OpenUrl CrossRef PubMed 60. I. Nookaew et al. , A comprehensive comparison of RNA-Seq-based transcriptome analysis from reads to differential gene expression and cross-comparison with microarrays: a case study in Saccharomyces cerevisiae . Nucleic Acids Res 40 , 10084 – 10097 ( 2012 ). OpenUrl CrossRef PubMed Web of Science 61. D. Li et al. , An evaluation of RNA-seq differential analysis methods . PLoS One 17 , e0264246 ( 2022 ). OpenUrl CrossRef PubMed 62. ↵ S. Das , A. Rai , M. L. Merchant , M. C. Cave , S. N. Rai , A Comprehensive Survey of Statistical Approaches for Differential Expression Analysis in Single-Cell RNA Sequencing Studies . Genes (Basel) 12 , ( 2021 ). 63. D. Li et al. , An evaluation of RNA-seq differential analysis methods . PloS one . 2022 ( doi: 10.1371/journal.pone.0264246 ). OpenUrl CrossRef PubMed 64. ↵ H. C. Huang , Y. Niu , L. X. Qin , Differential Expression Analysis for RNA-Seq: An Overview of Statistical Methods and Computational Software . Cancer Inform 14 , 57 – 67 ( 2015 ). OpenUrl 65. ↵ A. J. Waardenberg , M. A. Field , consensusDE: an R package for assessing consensus of multiple RNA-seq algorithms with RUV correction . PeerJ 7 , e8206 ( 2019 ). OpenUrl CrossRef PubMed 66. ↵ J. Costa-Silva , D. Domingues , F. M. Lopes , RNA-Seq differential expression analysis: An extended review and a software tool . PLoS One 12 , e0190152 ( 2017 ). OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted July 02, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Confidence: A Web App for Cross-Platform Differential Gene Expression Analysis, Gene Scoring, and Enrichment Analysis Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Confidence: A Web App for Cross-Platform Differential Gene Expression Analysis, Gene Scoring, and Enrichment Analysis Abhishek Shastry , Benjamin P. Ott , Alex Paterson , Matt Simpson , Kimberly J. Dunham-Snary , Charles C.T. Hindmarch bioRxiv 2025.06.27.661997; doi: https://doi.org/10.1101/2025.06.27.661997 Share This Article: Copy Citation Tools Confidence: A Web App for Cross-Platform Differential Gene Expression Analysis, Gene Scoring, and Enrichment Analysis Abhishek Shastry , Benjamin P. Ott , Alex Paterson , Matt Simpson , Kimberly J. Dunham-Snary , Charles C.T. Hindmarch bioRxiv 2025.06.27.661997; doi: https://doi.org/10.1101/2025.06.27.661997 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17697) Bioengineering (13894) Bioinformatics (41951) Biophysics (21456) Cancer Biology (18594) Cell Biology (25515) Clinical Trials (138) Developmental Biology (13380) Ecology (19903) Epidemiology (2067) Evolutionary Biology (24322) Genetics (15612) Genomics (22510) Immunology (17737) Microbiology (40400) Molecular Biology (17183) Neuroscience (88619) Paleontology (667) Pathology (2833) Pharmacology and Toxicology (4825) Physiology (7644) Plant Biology (15158) Scientific Communication and Education (2046) Synthetic Biology (4296) Systems Biology (9825) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.