Full text
62,183 characters
· extracted from
preprint-html
· click to expand
AniMarkerDB: a comprehensive database for exploring cell types and marker genes in livestock and poultry at single-cell resolution | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results AniMarkerDB: a comprehensive database for exploring cell types and marker genes in livestock and poultry at single-cell resolution Zhuohang Li , Tao Zhang , Xueqing Li , Jiangwu Huang , Zimin Xie , Fei Gao , Haiming Cai , Mingfei Sun , Manman Dai , Ming Liao doi: https://doi.org/10.1101/2025.10.14.682327 Zhuohang Li 1 Guangdong Laboratory for Lingnan Modern Agriculture, National and Regional Joint Engineering Laboratory for Medicament of Zoonosis Prevention and Control, College of Veterinary Medicine, South China Agricultural University , Guangzhou 510642, China ; Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tao Zhang 1 Guangdong Laboratory for Lingnan Modern Agriculture, National and Regional Joint Engineering Laboratory for Medicament of Zoonosis Prevention and Control, College of Veterinary Medicine, South China Agricultural University , Guangzhou 510642, China ; Find this author on Google Scholar Find this author on PubMed Search for this author on this site Xueqing Li 1 Guangdong Laboratory for Lingnan Modern Agriculture, National and Regional Joint Engineering Laboratory for Medicament of Zoonosis Prevention and Control, College of Veterinary Medicine, South China Agricultural University , Guangzhou 510642, China ; Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jiangwu Huang 1 Guangdong Laboratory for Lingnan Modern Agriculture, National and Regional Joint Engineering Laboratory for Medicament of Zoonosis Prevention and Control, College of Veterinary Medicine, South China Agricultural University , Guangzhou 510642, China ; Find this author on Google Scholar Find this author on PubMed Search for this author on this site Zimin Xie 1 Guangdong Laboratory for Lingnan Modern Agriculture, National and Regional Joint Engineering Laboratory for Medicament of Zoonosis Prevention and Control, College of Veterinary Medicine, South China Agricultural University , Guangzhou 510642, China ; Find this author on Google Scholar Find this author on PubMed Search for this author on this site Fei Gao 1 Guangdong Laboratory for Lingnan Modern Agriculture, National and Regional Joint Engineering Laboratory for Medicament of Zoonosis Prevention and Control, College of Veterinary Medicine, South China Agricultural University , Guangzhou 510642, China ; Find this author on Google Scholar Find this author on PubMed Search for this author on this site Haiming Cai 3 State Key Laboratory of Swine and Poultry Breeding Industry, Key Laboratory of Livestock Disease Prevention of Guangdong Province, Key Laboratory for Prevention and Control of Avian Influenza and Other Major Poultry Diseases of Agriculture and Rural Affairs Ministry, Institute of Animal Health, Guangdong Academy of Agricultural Sciences , Guangzhou, 510640, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mingfei Sun 3 State Key Laboratory of Swine and Poultry Breeding Industry, Key Laboratory of Livestock Disease Prevention of Guangdong Province, Key Laboratory for Prevention and Control of Avian Influenza and Other Major Poultry Diseases of Agriculture and Rural Affairs Ministry, Institute of Animal Health, Guangdong Academy of Agricultural Sciences , Guangzhou, 510640, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: mliao{at}scau.edu.cn daimanman1229{at}scau.edu.cn smf7810{at}126.com Manman Dai 1 Guangdong Laboratory for Lingnan Modern Agriculture, National and Regional Joint Engineering Laboratory for Medicament of Zoonosis Prevention and Control, College of Veterinary Medicine, South China Agricultural University , Guangzhou 510642, China ; 2 UK-China Centre of Excellence for Research on Avian Diseases , Guangzhou 510642, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: mliao{at}scau.edu.cn daimanman1229{at}scau.edu.cn smf7810{at}126.com Ming Liao 1 Guangdong Laboratory for Lingnan Modern Agriculture, National and Regional Joint Engineering Laboratory for Medicament of Zoonosis Prevention and Control, College of Veterinary Medicine, South China Agricultural University , Guangzhou 510642, China ; 2 UK-China Centre of Excellence for Research on Avian Diseases , Guangzhou 510642, China 3 State Key Laboratory of Swine and Poultry Breeding Industry, Key Laboratory of Livestock Disease Prevention of Guangdong Province, Key Laboratory for Prevention and Control of Avian Influenza and Other Major Poultry Diseases of Agriculture and Rural Affairs Ministry, Institute of Animal Health, Guangdong Academy of Agricultural Sciences , Guangzhou, 510640, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: mliao{at}scau.edu.cn daimanman1229{at}scau.edu.cn smf7810{at}126.com Abstract Full Text Info/History Metrics Data/Code Preview PDF ABSTRACT Single-cell RNA sequencing (scRNA-seq) has dramatically advanced the understanding of cellular heterogeneity. While numerous marker gene databases are available for humans and mice, a lack of systematic resources for livestock and poultry species remains, limiting progress in functional genomics, immunology, and breeding.. To address this challenge, we developed AniMarkerDB ( https://animarkerdb.bio ), a comprehensive and curated database dedicated to marker genes and immune-related epitopes in economically animals, including chicken, pig, and duck. AniMarkerDB integrates 7,010 marker gene across 37 tissues and 846 cell types, together with 71,442 immune epitope records from IEDB. All entries undergo rigorous literature curation, manual validation, and multi-level quality control, with standardized nomenclature and annotation to ensure data consistency and reusability. The platform supports flexible queries by species, tissue, cell type, or gene. It offers analytical tools for cross-species comparison model organisms such as human and mouse, interactive single-cell atlas visualization, and user-defined cell type annotation. Additionally, AniMarkerDB provides dynamic visualizations and export options, enabling researchers to efficiently obtain large-scale marker and epitope data for downstream applications such as infectious disease research, vaccine target design, and comparative immunology. Looking ahead, AniMarkerDB will expand species coverage and incorporate additional modalities, including single-cell atlases from healthy and disease models, establishing itself as a comprehensive and authoritative platform for animal cell biology, disease modeling, and translational research. Download figure Open in new tab INTRODUCTION Single-cell transcriptomics has transformed biological research by enabling high-resolution profiling of cellular diversity across various tissues, species, and developmental stages( 1 , 2 ). While the earliest applications were primarily confined to human and model organisms, scRNA-seq has been increasingly adopted in livestock and poultry research ( 3 ). In particular, studies in species such as chicken, pig, and duck have yielded unprecedented insights into cellular heterogeneity, tissue architecture, and immune system function( 3 – 5 ). Mohammadinejad et al . employed transcriptomic profiling to identify regulatory genes and pathways crucial for skeletal muscle growth in cattle, sheep, and pigs, thereby highlighting genetic markers that are valuable for livestock breeding and production improvement ( 6 ). These economically essential animals not only serve as vital sources of food and agricultural products but have also emerged as valuable models for studying genetics, immunology, and disease mechanisms. For instance, single-cell immune profiling in chickens and ducks has identified novel immune cell types and deepened our understanding of avian resistance mechanisms( 7 ). In the context of xenotransplantation, Saad-Bay et al . employed single-cell RNA and TCR sequencing to elucidate the dynamics of donor-reactive T cells and innate immune cells during pig-to-human organ transplantation, identifying new targets for immunosuppressive therapy ( 8 ). Additionally, spatial transcriptomic analysis has been applied to study liver pathology following pig-to-primate islet transplantation, uncovering pathways related to fatty liver disease and adipogenesis ( 9 ). Recent advances demonstrate how emerging sequencing technologies and computational approaches now enable researchers to systematically dissect gene expression at single-cell resolution across a wide range of tissues and cell types ( 4 ). Together, these advances have laid the groundwork for precise cell type definition, lineage tracing, and in-depth exploration of immune mechanisms in livestock species. Despite these advances, significant challenges remain in the field of single-cell research for livestock and poultry. First, information on marker genes and cell type annotation is highly dispersed across the literature, often buried in supplementary files with inconsistent data formats and nomenclature standards( 10 , 11 ). This fragmentation hinders cross-tissue and cross-species comparisons, reproducible cell-type annotation, and broader functional analysis. While databases such as CellMarker( 12 ), CellMarker 2.0( 13 ), PanglaoDB( 14 ), and scPlantDB( 15 ) have provided structured resources for human, mouse, and plant cell markers, available resources for livestock and poultry are still limited in terms of species and tissue coverage, annotation consistency, and functional depth. Furthermore, the limited availability of commercial antibodies and the incomplete functional annotation of many marker genes in these species pose additional challenges for experimental validation and immune studies( 16 – 18 ). To address these limitations, we developed AniMarkerDB ( https://animarkerdb.bio ), a manually curated and standardized database of marker genes and immune epitopes for chicken, pig, and duck. The database integrates high-quality data from 132 single-cell studies, along with 71,442 epitope records retrieved from the IEDB. AniMarkerDB offers flexible query capabilities, cross-species marker exploration, structure-based similarity prediction, pathway enrichment tools, and interactive single-cell atlas visualization. This platform aims to provide a centralized, authoritative resource to accelerate research in livestock and poultry cell biology, immunity, and disease modeling. MATERIAL AND METHODS Data collection and literature curation To systematically build AniMarkerDB — a specialized marker gene database for chicken ( Gallus gallus ), pig ( Sus scrofa ), and duck ( Anas platyrhynchos ) — we curated single-cell transcriptomic studies published since 2019 from the PubMed database. Search strategies incorporated keywords such as “single-cell RNA sequencing,” “cell type identification,” and “marker gene discovery”. Studies were screened based on explicit cell type definitions and reported marker genes. Review articles, studies lacking cell annotation information, or those with incomplete metadata were excluded. Accession numbers from eligible studies ( e.g ., GEO, SRA, CRA) were collected to facilitate subsequent data integration. From each publication, we extracted the species, tissue origin, annotated cell types, marker genes, sequencing platform information, accession IDs, and bibliographic details. All marker genes were standardized using the NCBI Entrez E-utilities to retrieve official gene symbols, Entrez Gene IDs, protein IDs, and functional descriptions( 19 ). Synonymous gene names were unified following official gene symbols. As of July 31, 2025, AniMarkerDB includes 98 chicken studies, 113 pig studies, and 7 duck studies, encompassing 7,010 marker records across 37 tissues and 846 cell types. Data processing and matrix standardization To construct comprehensive single-cell transcriptomic atlases for livestock species, we applied a unified data processing and matrix standardization pipeline to all curated datasets. In total, AniMarkerDB integrates 2,224,866 chicken cells and 3,861,087 pig cells, originating from 17 chicken tissues and 24 pig tissues, and covering major biological systems such as the immune, digestive, reproductive, and muscular systems. Datasets were primarily obtained from public repositories, including GEO (Gene Expression Omnibus), SRA (Sequence Read Archive), CRA (Genome Sequence Archive), CNGBdb (China National GeneBank DataBase), and USDA (United States Department of Agriculture). For datasets providing ready-to-use sparse matrices ( e.g ..mtx or .h5 formats), we directly incorporated them into downstream analyses without additional preprocessing. For projects offering only raw sequencing data ( e.g ., SRA format or FASTQ files), a standardized preprocessing pipeline was applied. Raw reads were aligned to the appropriate reference genomes ( GRCg7b for chicken and Sscrofa11.1 for pig), followed by feature-barcode matrix generation. Different sequencing platforms were processed using dedicated pipelines: CellRanger (v9.0.0) was applied for 10x Genomics datasets, while Celescope2 (v2.4.0) was used for BGI GEXSCOPE and DNBSEQ C4/C5 datasets. All resulting expression matrices were standardized into .h5ad format to facilitate uniform downstream integration and analysis. Datasets originating from the same tissue type and species were subsequently integrated using Scanpy (v1.10.3)( 20 ). To ensure consistent gene annotation across datasets, gene identifiers were unified based on Ensembl IDs and corresponding official gene symbols( 21 ). Quality control and batch effect correction All single-cell datasets were preprocessed using a standardized Scanpy (v1.10.3)( 20 ) pipeline to ensure data quality and cross-sample consistency. Cells with 10% were excluded to remove low-quality cells and technical artifacts. To further eliminate potential doublets, Scrublet (v0.2.3)( 22 ) was applied to compute doublet scores, and cells with scores >0.3 were filtered out. Following quality control, batch effect correction was performed using the scVI model (scvi-tools, v1.2.0) ( 23 ), which models cellular transcriptomic features in a latent space to harmonize expression distributions while preserving biological heterogeneity. The effectiveness of batch correction was evaluated by calculating Silhouette coefficients ( 24 ) based on latent embeddings using scikit-learn (v1.5.2) ( 23 ), thereby quantifying the extent of batch mixing. Subsequently, principal component analysis (PCA) was conducted on the latent representations to extract the top 30 principal components, followed by the construction of a k-nearest neighbor (kNN) graph for mapping cell relationships. Cell clusters were then identified using the Leiden( 24 ) community detection algorithm, starting with an initial resolution of 1.0 and dynamically adjusting the resolution to ensure that the number of clusters fell within the range of 10 to 50 and that each cluster contained at least 50 cells. Finally, Uniform Manifold Approximation and Projection (UMAP) was employed for the two-dimensional visualization of the global distribution of cell types ( 25 ). Cell-type annotation and functional enrichment analysis Following clustering, differential expression analysis was performed for each cell cluster using the Wilcoxon rank-sum test, as implemented in Scanpy (v1.10.3) ( 20 ). Filtering criteria included a gene detection frequency >25% within the cluster, log2 fold change >1, and an adjusted P-value <0.001 using the Benjamini-Hochberg method. The top 200 upregulated genes per cluster were selected to construct cluster-specific expression profiles. Cell type annotation was conducted by comparing these cluster-specific profiles against reference profiles curated in AniMarkerDB. Only reference expression profiles with an average expression level greater than 1 were considered for annotation to ensure reliability. Assignments were made based on the highest correlation scores, following a strategy inspired by expression profile-matching approaches used in SingleR ( 28 ), scmap ( 29 ), and CellTypist ( 26 ), ensuring robust cross-dataset annotation consistency. Functional enrichment analysis was performed on signature genes for each identified cell type. Gene Ontology (GO) enrichment across biological process (BP), molecular function (MF), and cellular component (CC) categories, along with Kyoto Encyclopedia of Genes and Genomes (KEGG) pathway analysis, were conducted using g: Profiler ( 27 ) and clusterProfiler (v4.0.0)( 28 ), with an FDR threshold of <0.05. Gene set enrichment analysis (GSEA) was performed using clusterProfiler (v4.0.0)( 28 ), with reference gene sets obtained from MSigDB (v7.2)( 29 ) to assess activation trends. Protein–protein interaction (PPI) networks were constructed using STRING( 30 ) with a minimum confidence score threshold of 0.4. Epitope data integration To supplement immune-related resources, experimentally validated epitope data for livestock and poultry species (primarily chicken, pig, and duck) were collected from the Immune Epitope Database (IEDB)( 31 ). The records included antigen information, epitope amino acid sequences, associated MHC molecule classes, and origin. All epitope data were standardized and integrated into the database’s retrieval module, allowing users to filter results based on species, pathogen, or epitope sequence. To enhance the structural context, external links to corresponding MHC molecule entries were incorporated from the IPD-MHC database, enabling users to explore the spatial conformations of MHC-epitope binding interfaces ( 32 ). User-defined cell-type annotation To enhance interactivity and flexibility, we implemented a user-defined annotation module that accepts custom marker gene sets uploaded by users. Cell type predictions are generated based on an internally curated marker database using two complementary strategies. First, a hypergeometric enrichment analysis evaluates the significance between user-submitted genes and reference marker sets, with composite scores calculated by weighting P-values and the number of matched genes. The top 10 predicted cell types are ranked and presented in a bar plot. Second, direct overlaps between user-provided markers and reference cell types are quantified to construct a matching matrix, which is visualized as a heatmap for intuitive interpretation. To ensure accurate matching, user-uploaded genes must conform to standardized gene symbol formats, and internal normalization is applied to correct case sensitivity and formatting inconsistencies. Search and cross-species comparison AniMarkerDB provides an integrated search system to facilitate the efficient retrieval of cell marker information. Users can search by cell type name or gene symbol through a unified search interface on the homepage. Search results are displayed in sortable and filterable tables, including species, tissue, cell type, marker gene, and supporting references. The platform also promotes reverse lookup by marker gene, enabling users to query associated cell types based on specific gene inputs. Search outputs are structured with tabular presentations and intuitive visualizations, including summary tables of matched markers and word cloud figures highlighting the most representative marker genes based on the number of supporting occurrences. Building upon the basic search functionality, AniMarkerDB incorporates a cross-species marker gene comparison module to enhance the interpretability and comparative utility of cell type annotation. Upon querying any cell type from chicken, pig, or duck, the platform simultaneously displays the Top 5 most frequently reported marker genes for the corresponding cell type across five species: chicken, pig, duck, human, and mouse. Marker gene information for humans and mice is sourced from CellMarker 2.0 ( 13 ), with cross-species matching based on standardized cell type nomenclature and unified gene symbol annotation. This module enables users to rapidly assess conserved marker expression patterns across species, providing valuable references for translational research, evolutionary studies, and functional validation. For marker genes with the same name across multiple species, AniMarkerDB allows users to assess structural similarities and differences across species. Protein structure data are retrieved from AlphaFoldDB( 33 ), and root mean square deviation (RMSD) values are computed to quantify structural divergence. This module enables users to rapidly assess conserved marker expression and structural patterns across species, providing valuable references for translational research, evolutionary studies, and functional validation. Web interface and statistical visualization AniMarkerDB utilizes its integrated search system, allowing for intuitive data visualization and exploration. The homepage provides an overview of species, tissues, and cell types covered in the database. For organ-level single-cell atlases, the platform presents tissue-specific UMAP projections, enabling users to explore the distribution of cell types within each tissue. Hovering over a specific cell cluster reveals real-time information including cell type annotation, source publication, and sample attributes. Additionally, a statistical module provides dynamic summaries of database contents, such as sample counts, cell type distributions, and marker gene frequencies, with interactive filtering by species and tissue origin. A detailed user guide is available to assist users in navigating search, visualization, and enrichment analysis workflows. Data availability AniMarkerDB is freely available through the official website (URL to be provided upon release), offering unrestricted access to all integrated marker gene data and organ-level single-cell atlases. Users can browse, search, and download standardized datasets without the need for registration. All resources have been curated and organized in accordance with the FAIR principles (Findable, Accessible, Interoperable, and Reusable) to support data reuse and secondary development ( 34 ). Results Overview of AniMarkerDB AniMarkerDB is the first comprehensive database dedicated to integrating multi-dimensional information on marker genes and immune epitopes in poultry and livestock. Focusing on three major economic animals—chicken, pig, and duck—AniMarkerDB systematically curates and integrates high-quality data from primary literature indexed in major databases such as PubMed, establishing a standardized data management and annotation framework. As of July 2025, AniMarkerDB contains 218 original publications, covering over 37 tissue types and 846 cell types, with thousands of marker gene entries ( Figure 1 ), including 2,555, 4,250, and 205 entries for chicken, pig, and duck, respectively. All data are rigorously manually curated, consistently named, and subjected to multi-level quality control, with standardized gene symbols, protein IDs, and comprehensive metadata to ensure high accuracy and reusability across datasets and species. Download figure Open in new tab Figure 1 The architecture of AniMarkerDB The annotation framework of AniMarkerDB is compatible with mainstream resources, such as CellMarker 2.0 ( 13 ). Each marker gene entry includes detailed metadata, such as the gene’s full name, Gene ID, Protein ID, PMID, tissue, cell type, and sequencing platform ( Figure 1 ), providing a solid foundation for downstream comparative and functional analyses. In parallel, the database systematically integrates immune epitope data from IEDB and compiles statistics on epitope distribution across species, cell types, and pathogens ( 31 ). A core feature of AniMarkerDB is the standardized integration of primary scRNA-seq datasets from repositories such as GEO, GSA, EMBL, and others, enabling unified normalization and clustering across diverse tissue types. Automated cell-type annotation and hierarchical clustering facilitate the construction of comprehensive single-cell atlases for various tissues and cell populations. In total, we collected 2,224,866 chicken cells and 3,861,087 pig cells ( Table 1 ), with most tissues contributing tens to hundreds of thousands of cells. Batch-correction performance was quantified using Silhouette Scores across major tissues; values were close to zero for most tissues, indicating effective removal of batch effects and consistent intra-tissue clustering. This extensive cell coverage, combined with robust quality control, establishes a solid foundation for constructing downstream single-cell atlases and identifying marker genes. Building upon these atlases, AniMarkerDB integrates functional enrichment analyses including GSEA, GO, KEGG, and PPI network reconstruction, thereby enabling in-depth investigation of cellular heterogeneity and biological function across species and tissue contexts. View this table: View inline View popup Download powerpoint Table 1 Data summary of Atlas Module In terms of functional expansion, AniMarkerDB supports multi-dimensional search, interactive visualization (including word clouds, bar charts, and heatmaps), as well as tools for cross-species marker gene comparison, protein structural similarity assessment, and user-defined cell annotation ( Figure 1 ). The homepage summarizes coverage by species ( Figure 2A ) and visualizes usage trends via a Sankey plot ( Figure 2B ), together with panels that display the most-searched cell types and the top pathogens by epitope counts for each host species ( Figure 2C ). The resource is continuously updated to track new single-cell datasets and epitope records. The platform is continuously updated with newly published single-cell transcriptomic datasets and epitope information, ensuring alignment with the latest research developments. Download figure Open in new tab Figure 2. Distribution of marker and epitope data in AniMarkerDB. ( A ) Summary statistics of chicken, duck, and pig marker entries, including numbers of tissue types, cell types, markers, and literature sources. ( B ) Sankey diagram showing the relationships among species, tissues, cell types, and the immune/somatic classification of marker-associated cells. ( C ) Left: Top 10 most frequently searched cell types in the marker module; Right: The top 10 pathogens with the highest number of epitopes recorded for the selected host species. AniMarkerDB makes extensive use of public resources by integrating not only epitope data from IEDB but also marker gene information for mouse and human from CellMarker 2.0. Epitope statistics showed that most entries were derived from pig, followed by chicken and duck, with peptide lengths predominantly distributed between 9 and 11 amino acids ( Figure 3A ), consistent with typical MHC-I studies. At the pathogen level, African swine fever virus (ASFV) and porcine reproductive and respiratory syndrome virus (PRRSV) accounted for the largest number of epitopes, while avian pathogens were mainly represented by avian paramyxovirus type 1 and influenza A virus ( Figure 3B ). Cross-species comparison of marker genes revealed that endothelial cells, macrophages, T cells, and fibroblasts exhibited the highest conservation ( Figure 3C ), with detailed shared markers illustrated in the heatmap ( Figure 3D , showing genes present in ≥4 species). Additional statistics indicated that human and mouse shared the largest number of common cell types (656), followed by human and pig (224) ( Figure 3E ). Overall, these results demonstrate the broad coverage and systematic integration of epitopes and marker genes across species within the platform. Download figure Open in new tab Figure 3. Epitope composition, pathogen distribution, and cross-species marker gene comparison in AniMarkerDB. ( A ) Histogram of epitope length distribution and pie chart of species composition. ( B ) Bar chart of epitope counts for different pathogens. ( C ) Bar chart of cell types ranked by the number of marker genes shared across three or more species. ( D ) Heatmap of cross-species marker genes across cell types. The heatmap shows conserved marker genes present in at least four species, with red indicating presence and blue ( E ) UpSet plot showing intersections of cell-type among species.indicating absence. Taken together, the establishment of AniMarkerDB substantially enriches the landscape of systematic resources dedicated to single-cell marker genes and immune epitopes in poultry and livestock. Through rigorous standardization and a diverse array of functionalities, the platform enables high-resolution exploration of cellular heterogeneity, immune mechanisms, molecular breeding, and disease resistance in economically important animal species. User Interface and Application Scenarios of AniMarkerDB AniMarkerDB features an intuitive and hierarchical web interface that supports rapid information retrieval and in-depth exploration of marker genes and immune epitopes for livestock and poultry research ( Figure 4 ). Upon visiting the homepage, users can quickly search by entering keywords such as gene names, tissues, or cell types in the top search bar or by using the species selector. Below, an anatomical overview presents the major organ systems of species like chicken and pig as clickable nodes. Users can click different organ regions to quickly navigate to the summary page of marker genes for corresponding tissues and cell types, enabling direct visualization of the distribution of main tissue cells ( Figure 4A ). For example, when “chicken” is selected, clicking its icon displays a visual overview of key organs such as the bursa of Fabricius, thymus, and liver. By clicking on the relevant organ area, users can view statistics for all cell types within the tissue, the number of main marker genes, and a collection of related literature for that tissue ( Figure 4B ). In addition to anatomical navigation, AniMarkerDB offers robust custom cell annotation functionality directly on the homepage. In the “Cell Annotation” module, users can upload their own gene sets. The platform automatically performs enrichment analysis against all cell-type markers in the database. It outputs the top 10 most enriched cell types (displayed as bar or heat maps), while showing the marker overlap for each cell type. For example, when uploading T cell-associated genes such as CD3D and CD8A, the system highlights “CD8 + T cell” as the most probable match and simultaneously displays which input genes match the markers for this cell type ( Figure 4C ). Download figure Open in new tab Figure 4. Overview of the AniMarkerDB interface and core functionalities. ( A ) The homepage interface integrates a quick search bar, species selector, and an anatomical overview of major tissues, each linked to corresponding cell marker entries, enabling quick access to the associated cell types. ( B ) The advanced search page allows queries by species, tissue, cell type, and gene, with results visualized as a marker word cloud and table. ( C ) The marker annotation module supports user-uploaded gene sets, returning predicted cell types ranked by enrichment scores and marker overlap, displayed via bar chart and matrix heatmap. The “Advanced Search” page enables users to flexibly combine criteria such as species, tissue, cell type, and marker gene for refined queries. The search results are displayed as word clouds and tables, both sorted by the number of supporting publications. For instance, to systematically query “porcine lung T cells and their marker genes,” users can sequentially select “Species: Pig,” “Tissue: Lung,” and “Cell Type: T cell.” The platform will return word clouds and heatmaps of all T cell-related markers ( Figure 5A ). Users can further pinpoint specific cell types ( e.g ., “T cell”) in the list above the word cloud, with the platform then displaying the corresponding marker gene word cloud and ranking table ( Figure 5B ). By clicking any marker gene entry (such as CD3E), users are directed to the detailed gene information page, which includes the gene symbol, Gene ID, Protein ID, supporting PMID, and annotation of tissue/cell type, among other metadata ( Figure 5C ). Moreover, the immune epitope query function is a highlight of AniMarkerDB. Users can quickly switch to the “Epitope Module” from the homepage, which integrates IEDB immune epitope data. For immune cell-related marker genes, users can directly access the “Epitope Module” from the detailed gene information page to perform multidimensional interactive queries based on host species, pathogen, epitope type (MHC-I/II, etc.), and experimental category ( Figure Download figure Open in new tab Figure 5. Search result pages of AniMarkerDB. ( A ) Marker gene search interface allowing queries by species, tissue, cell type, or gene symbol. Searches return multiple candidate cell types along with associated markers. ( B ) Refined search results for a selected cell type, highlighting relevant marker genes. ( C ) Detailed view of a specific marker gene, including gene symbol, protein IDs, and reference sources. (D) Epitope records associated with immune cell markers, with filtering options for host, pathogen, and assay type. ( E ) Cross-species comparison of the selected cell type, showing shared and species-specific marker genes among chicken, pig, duck, human, and mouse. ( F ) Structural comparison of marker gene proteins across species. A heatmap of RMSD values summarizes 3D protein similarity, with structure download links. 5D ). All results support batch download, greatly facilitating applications such as infectious disease research and vaccine target design. The platform also supports systematic horizontal comparisons of cell types and their marker genes across species. The “Cross-species Comparison Module” can be found under the result panel of a particular cell type search; for example, selecting “T cell” will automatically list the top 5 markers in chicken, pig, duck, human, and mouse (the latter two derived from CellMarker 2.0), and identify common markers such as CD3D and CD3E across species ( Figure 5E ). For any given marker gene, users can directly access the “Cross-species Structure Comparison” on its detailed page and visualize 3D protein structure similarities via RMSD heatmaps ( Figure 5F ). For example, the details page of CD3E reveals that the greatest structural difference is between duck and human, with an RMSD value of 5.397. Such results provide evidence for studies of functional conservation and antibody cross-reactivity. Additionally, the “Atlas Module” in AniMarkerDB enables the visualization of single-cell atlases and marker gene expression for specific tissues/cell types. Users can select target species, tissues, and cell types, and view the distribution of cell subpopulations in UMAP/t-SNE dimensionality reduction plots. For example, if researchers wish to examine the “spatial distribution and marker expression of CD4 + T cells in chicken lung,” they can simply select the target tissue and cell type in the Atlas interface ( Figure 6A ); the platform will automatically present the distribution of cell subgroups and the top expressed marker word clouds ( Figure 6B and C ), with the ability to hover for detailed metadata for each cell. Users can also click to examine the heatmap of a particular marker gene’s expression across cell types ( Figure 6D ). The Atlas module further integrates GSEA, GO, and KEGG enrichment analyses, as well as PPI networks ( Figure 6 E, F and G ), enabling users to analyze the function and regulatory networks of different cell types across various species and tissues in one step. Download figure Open in new tab Figure 6. Functional characterization and visualization of cell types in tissue-level atlases. ( A ) Selection interface for selecting interest species, tissue, and annotated cell types for downstream analysis. ( B ) Word cloud visualization displaying the top marker genes for the selected cell type, which are subsequently used for downstream enrichment analyses. ( C ) UMAP visualization of the selected tissue, with cells colored by annotated cell types. Hovering reveals metadata for individual cells. ( D ) Ranked marker gene list for the selected cell type, alongside a feature plot showing the expression of a selected gene across all cells. ( E ) GSEA (Gene Set Enrichment Analysis) results for the selected cell type. ( F ) GO and KEGG enrichment results with toggle support; pathway descriptions and matching marker genes are shown interactively. ( G ) Protein– protein interaction (PPI) network constructed from top differentially expressed genes using STRING, with the 10 most connected proteins based on interaction degree. Database Management and Continuous Updates AniMarkerDB is supported by a dedicated backend management system that enables real-time curation and maintenance of marker genes, immune epitopes, and single-cell atlases. The system provides secure administrator access( Figure 7A ) and organizes all records in a standardized tabular format containing key metadata such as PMID, species, tissue type, cell type, sequencing technology, gene symbol, Gene ID, and Protein ID. Administrators can efficiently filter records, conduct batch imports or exports, and upload standardized datasets via pre-defined templates, ensuring consistency and accuracy. In addition to ad hoc updates( Figure 7B ), AniMarkerDB follows a scheduled quarterly update cycle, during which the latest literature is systematically reviewed, newly released single-cell transcriptomic datasets are integrated, and relevant entries from external resources such as IEDB, CellMarker 2.0, and public repositories including GEO, GSA are incorporated. This continuous management and update mechanism ensures that AniMarkerDB remains comprehensive, current, and aligned with the latest advances in single-cell biology and immunological research in livestock and poultry. Download figure Open in new tab Figure 7. AniMarkerDB management system for database maintenance and updates. (A) Secure login interface for the AniMarkerDB backend management system. (B) Data management panel for browsing, editing, and batch import/export of marker genes, immune epitopes, and single-cell atlas records. In summary, AniMarkerDB, through multi-level data retrieval, visualization, and functional expansion, enables users to explore from species and tissues down to cells, genes, and epitopes in a full pipeline manner. Whether for basic biological research, functional genomics, vaccine or antibody development, or rapid annotation and validation of single-cell data, AniMarkerDB provides researchers with an intuitive, systematic, and authoritative all-in-one resource and support. Discussion The rapid proliferation of single-cell transcriptomic studies in livestock and poultry has produced a large volume of valuable data. However, the lack of standardized, high-coverage resources has significantly hindered downstream comparative and functional analyses ( 10 , 35 ). AniMarkerDB addresses this critical gap by systematically curating and integrating marker genes and immune epitopes from economically significant species—such as chicken, pig, and duck— into a unified, queryable framework. First, AniMarkerDB currently provides the most comprehensive marker gene resource for livestock and poultry, systematically integrating 6352 high-quality marker gene records across more than 26 tissues and 791 cell types. This extensive data foundation enables robust horizontal and vertical analyses across multiple tissues, cell types, and species, significantly broadening the scope and depth of animal cell atlas research. These features allow researchers to perform reliable comparative studies while minimizing issues caused by inconsistent nomenclature or fragmented annotations—challenges that are still prevalent in livestock scRNA-seq literature. Beyond its comprehensive data coverage, a key strength of AniMarkerDB lies in its multi-layered standardization pipeline, which includes harmonized gene symbols, cell ontology alignment, and rigorous metadata curation. This ensures consistency and interoperability across datasets, enabling users to conduct reliable comparative studies without being hindered by inconsistent annotations or fragmented records—issues that remain prevalent in livestock scRNA-seq literature. ( 10 , 11 ). Furthermore, integrative analyses of epitope composition and marker gene conservation underscore the database’s ability to capture both species-specific molecular features and cross-species commonalities, thereby validating its utility for comparative immunogenomics. In addition, AniMarkerDB offers a flexible and powerful suite of analytical tools designed to support the diverse research needs of the livestock and poultry research communities.. These tools include cross-species marker gene comparison, protein structural similarity analysis, immune epitope retrieval, visualization of single-cell atlases, functional enrichment, and user-defined annotation. Users can easily retrieve marker genes specific tissues or cell types, perform multi-species functional and evolutionary analyses, and support applications in basic research, molecular breeding, and disease prevention. These capabilities not only facilitate basic and translational research, but also enhance molecular breeding and disease prevention strategies, particularly in non-model species. Moreover, the platform supports interactive data visualization and export, reducing technical barriers and promoting reproducibility in single-cell workflows. Despite its current strengths, AniMarkerDB remains limited in species coverage, focusing primarily on chicken, pig, and duck. However, the critical importance of these economically valuable animals in agriculture, food security, and as models for zoonotic disease research underscores the urgent need for systematic resources in this field( 36 ). With the rapid advancement of single-cell sequencing and spatial multi-omics technologies, the capacity to unravel cellular complexity and disease mechanisms in non-model and emerging animal species is expanding, making comprehensive marker gene databases increasingly indispensable( 37 ). To ensure sustainability and data freshness, AniMarkerDB is supported by a linked management system that coordinates literature tracking, data ingestion, quality control, and version updates. This system not only facilitates timely incorporation of new single-cell and epitope datasets but also provides a framework for user contributions under expert curation, thereby maintaining long-term reliability and community engagement. Accordingly, future development of AniMarkerDB will focus on broadening its species scope to encompass additional livestock, poultry, and wild animals of agricultural and biomedical significance, while enriching the diversity and resolution of data modalities and annotations. In particular, integrating single-cell datasets from both healthy and pathogen-infected animals will provide valuable resources for the study of animal disease models and host-pathogen interactions, ultimately contributing to the prevention and control of zoonotic diseases. The platform’s adherence to FAIR principles and collaboration with international consortia will further enhance interoperability, sustainability, and global impact. Taken together, AniMarkerDB is positioned as a key resource to support single-cell research in non-model and economically important animals, and will continue to play a pivotal role in advancing functional genomics, disease resistance, and translational research in agricultural and veterinary sciences. In conclusion, AniMarkerDB represents a foundational advance in livestock and poultry single-cell research. By integrating high-confidence molecular signatures and immune features into a unified, user-friendly platform, it enables the scientific community to explore cellular complexity, functional divergence, and immunological mechanisms with unprecedented resolution. We anticipate that AniMarkerDB will serve as a pivotal enabler for both fundamental discovery and applied innovations in animal health, breeding, and disease resilience. DATA AVAILABILITY AniMarkerDB can be accessed at https://animarkerdb.bio AUTHOR CONTRIBUTIONS Zhuohang Li: Writing—original draft, Formal analysis, Data curation, Conceptualization. Tao Zhang: Data curation, Formal analysis. Xueqing Li: Writing—review & editing. Jiangwu Huang: Data curation. Zimin Xie: Data curation. Fei Gao: Data curation. Haiming Cai: Funding acquisition. Mingfei Sun: Funding acquisition. Manman Dai: Conceptualization, Writing—review & editing, Funding acquisition. Ming Liao: Funding acquisition. FUNDING This work was supported by the National Natural Science Foundation of China ( 32473060 to MD, 32172868 to MD and 32461120064 to ML); the National Key R&D Program of China (2022YFD1801000 to ML); the National Natural Science Foundation of Guangdong Province (2024A1515013151 to MD); Guangzhou Basic and Applied Basic Research Project (2025A04J5445 to MD); Laboratory of Lingnan Modern Agriculture Project (NT2025005 to MD); Young Scholars of Yangtze River Scholar Professor Program (2024, Manman Dai); Young Pearl River Scholar of “Guangdong Special Support Plan” (2024, Manman Dai); the Opening Project of State Key Laboratory of Swine and Poultry Breeding Industry (2023QZ-NK14 and 2023QZ-NK05 to MS). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript. Conflict of interest statement No potential conflict of interest was reported by the authors. ACKNOWLEDGEMENTS We acknowledge the performance computing resources support from Guangzhou Minglead Gene Technology Co., Ltd. in the development and maintenance of the platform. We are grateful to all the data contributors whose invaluable contributions have made this project possible. Funder Information Declared National Natural Science Foundation of China , 32473060 , 32172868 , 32461120064 the National Key R&D Program of China , 2022YFD1801000 the National Natural Science Foundation of Guangdong Province , 2024A1515013151 Guangzhou Basic and Applied Basic Research Project , 2025A04J5445 Laboratory of Lingnan Modern Agriculture Project , NT2025005 Young Scholars of Yangtze River Scholar Professor Program , 2024, Manman Dai Young Pearl River Scholar of “Guangdong Special Support Plan” , 2024, Manman Dai the Opening Project of State Key Laboratory of Swine and Poultry Breeding Industry , 2023QZ-NK14 , 2023QZ-NK05 Footnotes https://animarkerdb.bio References 1. ↵ Gulati , G.S. , D’Silva , J.P. , Liu , Y. , Wang , L. and Newman , A.M . ( 2025 ) Profiling cell identity and tissue architecture with single-cell and spatial transcriptomics . NAT REV MOL CELL BIO , 26 , 11 – 31 . OpenUrl CrossRef PubMed 2. ↵ Stuart , T. and Satija , R . ( 2019 ) Integrative single-cell analysis . NAT REV GENET , 20 , 257 – 272 . OpenUrl CrossRef PubMed 3. ↵ Lu , Y. , Li , M. , Gao , Z. , Ma , H. , Chong , Y. , Hong , J. , Wu , J. , Wu , D. , Xi , D. and Deng , W . ( 2024 ) Innovative Insights into Single-Cell Technologies and Multi-Omics Integration in Livestock and Poultry . INT J MOL SCI , 25 . 4. ↵ Yan , Y. , Zhu , S. , Jia , M. , Chen , X. , Qi , W. , Gu , F. , Valencak , T.G. , Liu , J. and Sun , H . ( 2024 ) Advances in single-cell transcriptomics in animal research . J ANIM SCI BIOTECHNO , 15 , 102 . OpenUrl 5. ↵ Maxwell , M. , Soderlund , R. , Hartle , S. and Wattrang , E . ( 2024 ) Single-cell RNA-seq mapping of chicken peripheral blood leukocytes . BMC GENOMICS , 25 , 124 . OpenUrl CrossRef PubMed 6. ↵ Mohammadinejad , F. , Mohammadabadi , M. , Roudbari , Z. and Sadkowski , T . ( 2022 ) Identification of Key Genes and Biological Pathways Associated with Skeletal Muscle Maturation and Hypertrophy in Bos taurus , Ovis aries, and Sus scrofa. ANIMALS-BASEL , 12 . 7. ↵ Liang , Y. , Ma , Y. , Zhang , Y. , Chen , Z. , Wang , Z. , Li , X. , Cui , L. , Xu , L. , Liu , S. and Li , H . ( 2021 ) Single-Cell Analysis of the In Vivo Dynamics of Host Circulating Immune Cells Highlights the Importance of Myeloid Cells in Avian Flaviviral Infection . J IMMUNOL , 207 , 2878 – 2891 . OpenUrl Abstract / FREE Full Text 8. ↵ Fathi , F. , Suek , N. , Vermette , B. , Breen , K. , Saad , Y.S. , Bay , C. , Parks , C.A. , Stern , J. , Khalil , K. and Kim , J. , et al. ( 2025 ) Donor-reactive T cells and innate immune cells promote pig-to-human decedent xenograft rejection . Res Sq . 9. ↵ Bang , Y.J. , Chung , H. , Kim , J. , Gong , J. , Min , B. , Shin , J. , Kim , Y. , Kim , H. and Park , C . ( 2025 ) Spatial Transcriptomic Analysis Reveals Increased Adipogenesis and Triggering of the Non-Alcoholic Fatty Liver Disease Pathway in Pig-to-NHP Islet Recipients’ livers During the Early Post-xenotransplant Period . XENOTRANSPLANTATION , 32 , e70062 . OpenUrl PubMed 10. ↵ Lyons , A. , Brown , J. and Davenport , K.M . ( 2024 ) Single-Cell Sequencing Technology in Ruminant Livestock: Challenges and Opportunities . CURR ISSUES MOL BIOL , 46 , 5291 – 5306 . OpenUrl PubMed 11. ↵ Meng , F. , Huang , X. , Qin , W. , Liu , K. , Wang , Y. , Li , M. , Ren , Y. , Li , Y. and Sun , Y . ( 2023 ) singleCellBase: a high-quality manually curated database of cell markers for single cell annotation across multiple species . BIOMARK RES , 11 , 83 . OpenUrl PubMed 12. ↵ Zhang , X. , Lan , Y. , Xu , J. , Quan , F. , Zhao , E. , Deng , C. , Luo , T. , Xu , L. , Liao , G. and Yan , M. et al. ( 2019 ) CellMarker: a manually curated resource of cell markers in human and mouse . NUCLEIC ACIDS RES , 47 , D721 – D728 . OpenUrl CrossRef PubMed 13. ↵ Hu , C. , Li , T. , Xu , Y. , Zhang , X. , Li , F. , Bai , J. , Chen , J. , Jiang , W. , Yang , K. and Ou , Q. et al. ( 2023 ) CellMarker 2.0: an updated database of manually curated cell markers in human/mouse and web tools based on scRNA-seq data . NUCLEIC ACIDS RES , 51 , D870 – D876 . OpenUrl CrossRef PubMed 14. ↵ Franzen , O. , Gan , L. and Bjorkegren , J.L.M . ( 2019 ) PanglaoDB: a web server for exploration of mouse and human single-cell RNA sequencing data . DATABASE-OXFORD , 2019 . 15. ↵ He , Z. , Luo , Y. , Zhou , X. , Zhu , T. , Lan , Y. and Chen , D . ( 2024 ) scPlantDB: a comprehensive database for exploring cell types and markers of plant cell atlases . NUCLEIC ACIDS RES , 52 , D1629 – D1638 . OpenUrl CrossRef PubMed 16. ↵ Deeg , C.A. , Degroote , R.L. , Giese , I.M. , Hirmer , S. , Amann , B. , Weigand , M. , Wiedemann , C. and Hauck , S.M . ( 2020 ) CD11d is a novel antigen on chicken leukocytes . J PROTEOMICS , 225 , 103876 . OpenUrl PubMed 17. Yuan , C. , Gillon , A. , Gualdron Duarte , J.L. , Takeda , H. , Coppieters , W. , Georges , M. and Druet , T . ( 2025 ) Evaluation of genomic selection models using whole genome sequence data and functional annotation in Belgian Blue cattle . GENET SEL EVOL , 57 , 10 . OpenUrl PubMed 18. ↵ Tian , K. , Zhang , C. , Gao , C. , Shi , J. , Xu , C. , Xie , W. , Yan , S. , Xiao , C. , Jia , X. and Tian , Y. et al. ( 2025 ) Full-length transcriptome sequencing of seven tissues of GuShi chickens . POULTRY SCI , 104 , 104697 . OpenUrl PubMed 19. ↵ Sayers , E.W. , Beck , J. , Bolton , E.E. , Brister , J.R. , Chan , J. , Comeau , D.C. , Connor , R. , DiCuccio , M. , Farrell , C.M. and Feldgarden , M. et al. ( 2024 ) Database resources of the National Center for Biotechnology Information . NUCLEIC ACIDS RES , 52 , D33 – D43 . OpenUrl CrossRef PubMed 20. ↵ Wolf , F.A. , Angerer , P. and Theis , F.J . ( 2018 ) SCANPY: large-scale single-cell gene expression data analysis . GENOME BIOL , 19 , 15 . OpenUrl CrossRef PubMed 21. ↵ Dyer , S.C. , Austine-Orimoloye , O. , Azov , A.G. , Barba , M. , Barnes , I. , Barrera-Enriquez , V.P. , Becker , A. , Bennett , R. , Beracochea , M. and Berry , A. et al. ( 2025 ) Ensembl 2025 . NUCLEIC ACIDS RES , 53 , D948 – D957 . OpenUrl CrossRef PubMed 22. ↵ Wolock , S.L. , Lopez , R. and Klein , A.M . ( 2019 ) Scrublet: Computational Identification of Cell Doublets in Single-Cell Transcriptomic Data . CELL SYST , 8 , 281 – 291 . OpenUrl PubMed 23. ↵ Swami , A. and Jain , R . ( 2013 ) Scikit-learn: Machine Learning in Python . J MACH LEARN RES , 12 , 2825 – 2830 . OpenUrl 24. ↵ Traag , V.A. , Waltman , L. and van Eck , N.J. ( 2019 ) From Louvain to Leiden: guaranteeing well-connected communities . SCI REP-UK , 9 , 5233 . OpenUrl 25. ↵ Mcinnes , L. and Healy , J . ( 2018 ) UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction . The Journal of Open Source Software , 3 , 861 . OpenUrl CrossRef 26. ↵ Dominguez Conde , C. , Xu , C. , Jarvis , L.B. , Rainbow , D.B. , Wells , S.B. , Gomes , T. , Howlett , S.K. , Suchanek , O. , Polanski , K. and King , H.W. , et al. ( 2022 ) Cross-tissue immune cell analysis reveals tissue-specific features in humans . SCIENCE , 376 , eabl5197 . OpenUrl CrossRef PubMed 27. ↵ Raudvere , U. , Kolberg , L. , Kuzmin , I. , Arak , T. , Adler , P. , Peterson , H. and Vilo , J . ( 2019 ) g:Profiler: a web server for functional enrichment analysis and conversions of gene lists (2019 update) . NUCLEIC ACIDS RES , 47 , W191 – W198 . OpenUrl CrossRef PubMed 28. ↵ Wu , T. , Hu , E. , Xu , S. , Chen , M. , Guo , P. , Dai , Z. , Feng , T. , Zhou , L. , Tang , W. and Zhan , L. et al. ( 2021 ) clusterProfiler 4.0: A universal enrichment tool for interpreting omics data . Innovation (Camb ) , 2 , 100141 . OpenUrl PubMed 29. ↵ Castanza , A.S. , Recla , J.M. , Eby , D. , Thorvaldsdottir , H. , Bult , C.J. and Mesirov , J.P . ( 2023 ) Extending support for mouse data in the Molecular Signatures Database (MSigDB) . NAT METHODS , 20 , 1619 – 1620 . OpenUrl CrossRef PubMed 30. ↵ von Mering , C. , Jensen , L.J. , Snel , B. , Hooper , S.D. , Krupp , M. , Foglierini , M. , Jouffre , N. , Huynen , M.A. and Bork , P. ( 2005 ) STRING: known and predicted protein-protein associations, integrated and transferred across organisms . NUCLEIC ACIDS RES , 33 , D433 – D437 . OpenUrl CrossRef PubMed Web of Science 31. ↵ Vita , R. , Blazeska , N. , Marrama , D. , Duesing , S. , Bennett , J. , Greenbaum , J. , De Almeida Mendes , M. , Mahita , J. , Wheeler , D.K. and Cantrell , J.R. , et al. ( 2025 ) The Immune Epitope Database (IEDB): 2024 update . NUCLEIC ACIDS RES , 53 , D436 – D443 . OpenUrl CrossRef PubMed 32. ↵ Maccari , G. , Robinson , J. , Barker , D.J. , Yates , A.D. , Hammond , J.A. and Marsh , S.G.E . ( 2025 ) The 2024 IPD-MHC database update: a comprehensive resource for major histocompatibility complex studies . NUCLEIC ACIDS RES , 53 , D457 – D461 . OpenUrl PubMed 33. ↵ Varadi , M. , Bertoni , D. , Magana , P. , Paramval , U. , Pidruchna , I. , Radhakrishnan , M. , Tsenkov , M. , Nair , S. , Mirdita , M. and Yeo , J. et al. ( 2024 ) AlphaFold Protein Structure Database in 2024: providing structure coverage for over 214 million protein sequences . NUCLEIC ACIDS RES , 52 , D368 – D375 . OpenUrl CrossRef PubMed 34. ↵ Wilkinson , M.D. , Dumontier , M. , Aalbersberg , I.J.J. , Appleton , G. , Axton , M. , Baak , A. , Blomberg , N. , Boiten , J. , Da Silva Santos , L.B. and Bourne , P.E. , et al. ( 2016 ) The FAIR Guiding Principles for scientific data management and stewardship . SCI DATA , 3 , 160018 . OpenUrl PubMed 35. ↵ Paisley , B.M. and Liu , Y . ( 2021 ) GeneMarkeR: A Database and User Interface for scRNA-seq Marker Genes . FRONT GENET , 12 , 763431 . OpenUrl PubMed 36. ↵ Lu , Y. , Li , M. , Gao , Z. , Ma , H. , Chong , Y. , Hong , J. , Wu , J. , Wu , D. , Xi , D. and Deng , W . ( 2024 ) Innovative Insights into Single-Cell Technologies and Multi-Omics Integration in Livestock and Poultry . INT J MOL SCI , 25 . 37. ↵ Chen , D. , Sun , J. , Zhu , J. , Ding , X. , Lan , T. , Wang , X. , Wu , W. , Ou , Z. , Zhu , L. and Ding , P. et al. ( 2021 ) Single cell atlas for 11 non-model mammals, reptiles and birds . NAT COMMUN , 12 , 7083 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted October 15, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following AniMarkerDB: a comprehensive database for exploring cell types and marker genes in livestock and poultry at single-cell resolution Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share AniMarkerDB: a comprehensive database for exploring cell types and marker genes in livestock and poultry at single-cell resolution Zhuohang Li , Tao Zhang , Xueqing Li , Jiangwu Huang , Zimin Xie , Fei Gao , Haiming Cai , Mingfei Sun , Manman Dai , Ming Liao bioRxiv 2025.10.14.682327; doi: https://doi.org/10.1101/2025.10.14.682327 Share This Article: Copy Citation Tools AniMarkerDB: a comprehensive database for exploring cell types and marker genes in livestock and poultry at single-cell resolution Zhuohang Li , Tao Zhang , Xueqing Li , Jiangwu Huang , Zimin Xie , Fei Gao , Haiming Cai , Mingfei Sun , Manman Dai , Ming Liao bioRxiv 2025.10.14.682327; doi: https://doi.org/10.1101/2025.10.14.682327 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7633) Biochemistry (17680) Bioengineering (13889) Bioinformatics (41927) Biophysics (21445) Cancer Biology (18585) Cell Biology (25491) Clinical Trials (138) Developmental Biology (13373) Ecology (19897) Epidemiology (2067) Evolutionary Biology (24308) Genetics (15606) Genomics (22496) Immunology (17736) Microbiology (40385) Molecular Biology (17175) Neuroscience (88583) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4822) Physiology (7641) Plant Biology (15149) Scientific Communication and Education (2045) Synthetic Biology (4293) Systems Biology (9822) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.