scMarkerAgent: An LLM Evidence Agent-based Cell Marker Atlas | Research Square window.SnipcartSettings = { analytics: { enabled: false } }; (function() { var accessVector = localStorage.getItem('access_vector') || ''; window.dataLayer = window.dataLayer || []; if (accessVector) { window.dataLayer.push({ user: { profile: { profileInfo: { snid: accessVector } } } }); } })(); (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-K279D39R'); Browse Preprints In Review Journals COVID-19 Preprints AJE Video Bytes Research Tools Research Promotion AJE Professional Editing AJE Rubriq About Preprint Platform In Review Editorial Policies Our Team Advisory Board Help Center Sign In Submit a Preprint Cite Share Download PDF Article scMarkerAgent: An LLM Evidence Agent-based Cell Marker Atlas Chen Cao, HaoRan Chen, Yanqun Sun, Hangchen Zhang, Yin Liu, Hanzi Zhao, and 3 more This is a preprint; it has not been peer reviewed by a journal. https://doi.org/ 10.21203/rs.3.rs-9224570/v1 This work is licensed under a CC BY 4.0 License Status: Under Review Version 1 posted You are reading this latest preprint version Abstract Evidence-augmented and reliable cell-type annotation remains a major bottleneck in single-cell RNA-seq analysis, particularly for rare, transitional, and disease-associated populations. To address this, we introduce scMarkerAgent, an evidence-grounded cell marker resource developed using an LLM-assisted literature-curation framework. It integrates 294,692 full-text publications to provide 890,296 high-quality cell type–marker annotations from 50,233 cell types across human, mouse, and rat. scMarkerAgent integrates 82,165 curated negative-marker annotations and 417,812 disease-context annotations, improving disambiguation of homologous cell types and delineation of malignant cells. Every cell type–marker annotation is directly supported by sentence-level literature evidence. In the cell annotation workflow, candidate labels are further refined through an LLM-based reasoning step that jointly evaluates positive and negative markers. Compared with existing resources, scMarkerAgent offers broader coverage of markers, tissues, cell types, and diseases. It is released as a FAIR-compliant database together with a code-free web platform that supports marker retrieval, automated cell annotation, and customizable cell scoring (available at https://www.markeragent.net ). Biological sciences/Computational biology and bioinformatics/Databases Biological sciences/Computational biology and bioinformatics/Data mining Biological sciences/Computational biology and bioinformatics/Data integration Full Text Additional Declarations There is NO Competing Interest. Supplementary Files SupplementaryTables.xlsx Supplementary Tables for scMarkerAgent: An LLM Evidence Agent-based Cell Marker Atlas SupplementaryInformation.pdf Supplementary Information for scMarkerAgent: An LLM Evidence Agent-based Cell Marker Atlas Cite Share Download PDF Status: Under Review Version 1 posted You are reading this latest preprint version Research Square lets you share your work early, gain feedback from the community, and start making changes to your manuscript prior to peer review in a journal. As a division of Research Square Company, we’re committed to making research communication faster, fairer, and more useful. We do this by developing innovative software and high quality services for the global research community. Our growing team is made up of researchers and industry professionals working together to solve the most critical problems facing scientific publishing. Also discoverable on Platform About Our Team In Review Editorial Policies Advisory Board Help Center Resources Author Services Accessibility API Access RSS feed Manage Cookie Preferences © Research Square 2026 | ISSN 2693-5015 (online) Privacy Policy Terms of Service Do Not Sell My Personal Information {"props":{"pageProps":{"initialData":{"identity":"rs-9224570","acceptedTermsAndConditions":true,"allowDirectSubmit":false,"archivedVersions":[],"articleType":"Article","associatedPublications":[],"authors":[{"id":612827405,"identity":"be5c4123-be50-4321-98c5-3abee779cd82","order_by":0,"name":"Chen Cao","email":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAAyAQMAAABI0h/eAAAABlBMVEX///8AAABVwtN+AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAA0ElEQVRIiWNgGAWjYJACZiCWY2A4AGMTqcWYdC2JDQg2AcDffvjg54KKO+nbGU+nSTBUWCc2sJ89gFeLxJm0ZOkZZ57l7mw4u02C4Ux6YgNPXgJeLQYMOWbMvG2HczccAGphbDuc2CDBY4BfC/8boJZ/h9MNwFr+EaNFAmRLw+EEiJYGIrRI3HiWLM1z7LAh0GGbLRKOpRu38eTg18Lfn3zwM0/NYXmDG2c33vhQYy3bz34GvxYk+w4wMCQAaTYi1YPsayBe7SgYBaNgFIwsAADFMkcV8gUN0gAAAABJRU5ErkJggg==","orcid":"","institution":"Nanjing Medical University","correspondingAuthor":true,"prefix":"","firstName":"Chen","middleName":"","lastName":"Cao","suffix":""},{"id":612827406,"identity":"dd163a0e-ba51-405e-a469-cf96b800024f","order_by":1,"name":"HaoRan Chen","email":"","orcid":"https://orcid.org/0000-0002-1666-3226","institution":"Nanjing Medical University","correspondingAuthor":false,"prefix":"","firstName":"HaoRan","middleName":"","lastName":"Chen","suffix":""},{"id":612827407,"identity":"57c45433-f954-4376-a379-6b681d866d74","order_by":2,"name":"Yanqun Sun","email":"","orcid":"","institution":"Nanjing Medical University","correspondingAuthor":false,"prefix":"","firstName":"Yanqun","middleName":"","lastName":"Sun","suffix":""},{"id":612827408,"identity":"378f36ff-88ec-4d3a-ad7d-1d361ac74c58","order_by":3,"name":"Hangchen Zhang","email":"","orcid":"","institution":"Nanjing Medical University","correspondingAuthor":false,"prefix":"","firstName":"Hangchen","middleName":"","lastName":"Zhang","suffix":""},{"id":612827409,"identity":"71290057-b958-4fb5-8175-5b7851bb79c1","order_by":4,"name":"Yin Liu","email":"","orcid":"","institution":"Nanjing Medical University","correspondingAuthor":false,"prefix":"","firstName":"Yin","middleName":"","lastName":"Liu","suffix":""},{"id":612827410,"identity":"8040085c-f0d0-44c6-8da4-e46632fcb2fb","order_by":5,"name":"Hanzi Zhao","email":"","orcid":"","institution":"Nanjing Medical University","correspondingAuthor":false,"prefix":"","firstName":"Hanzi","middleName":"","lastName":"Zhao","suffix":""},{"id":612827411,"identity":"9a3d2623-67bd-4646-8d8b-c4e8a8580f6a","order_by":6,"name":"Xu Wang","email":"","orcid":"","institution":"Nanjing Medical University","correspondingAuthor":false,"prefix":"","firstName":"Xu","middleName":"","lastName":"Wang","suffix":""},{"id":612827412,"identity":"1cd82f43-71e9-4149-a891-48f60cdb3022","order_by":7,"name":"Guishen Wang","email":"","orcid":"","institution":"Changchun University of Technology","correspondingAuthor":false,"prefix":"","firstName":"Guishen","middleName":"","lastName":"Wang","suffix":""},{"id":612827413,"identity":"c3ce670c-bd99-4a83-ba70-963b48623040","order_by":8,"name":"Gaoyang Li","email":"","orcid":"","institution":"Nanjing Medical University","correspondingAuthor":false,"prefix":"","firstName":"Gaoyang","middleName":"","lastName":"Li","suffix":""}],"badges":[],"createdAt":"2026-03-25 14:45:12","currentVersionCode":1,"declarations":"","doi":"10.21203/rs.3.rs-9224570/v1","doiUrl":"https://doi.org/10.21203/rs.3.rs-9224570/v1","draftVersion":[],"editorialEvents":[],"editorialNote":"","failedWorkflow":false,"files":[{"id":107707830,"identity":"1a74bcbc-a38c-4820-b92a-4b8a472068de","added_by":"auto","created_at":"2026-04-24 09:21:14","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"manuscript-pdf","size":6037062,"visible":true,"origin":"","legend":"Article File","description":"","filename":"manuscript.pdf","url":"https://assets-eu.researchsquare.com/files/rs-9224570/v1_covered_383c1b69-9170-4a1b-bae9-e6a3fcd26798.pdf"},{"id":106276969,"identity":"7da6e247-735e-47b0-a907-0629bac08dfc","added_by":"auto","created_at":"2026-04-07 04:35:04","extension":"xlsx","order_by":1,"title":"","display":"","copyAsset":false,"role":"supplement","size":106122,"visible":true,"origin":"","legend":"Supplementary Tables for scMarkerAgent: An LLM Evidence Agent-based Cell Marker Atlas","description":"","filename":"SupplementaryTables.xlsx","url":"https://assets-eu.researchsquare.com/files/rs-9224570/v1/7013e9e5a9df5c691d34f9e1.xlsx"},{"id":106276968,"identity":"9e081399-1561-4acf-af41-27728f7c6bb6","added_by":"auto","created_at":"2026-04-07 04:35:04","extension":"pdf","order_by":2,"title":"","display":"","copyAsset":false,"role":"supplement","size":5689437,"visible":true,"origin":"","legend":"Supplementary Information for scMarkerAgent: An LLM Evidence Agent-based Cell Marker Atlas","description":"","filename":"SupplementaryInformation.pdf","url":"https://assets-eu.researchsquare.com/files/rs-9224570/v1/4a278eb9e4bbd9ebf154180e.pdf"}],"financialInterests":"There is \u003cb\u003eNO\u003c/b\u003e Competing Interest.","formattedTitle":"scMarkerAgent: An LLM Evidence Agent-based Cell Marker Atlas","fulltext":[],"fulltextSource":"","fullText":"","funders":[],"hasAdminPriorityOnWorkflow":false,"hasManuscriptDocX":false,"hasOptedInToPreprint":true,"hasPassedJournalQc":"","hasAnyPriority":true,"hideJournal":false,"highlight":"","institution":"","isAcceptedByJournal":false,"isAuthorSuppliedPdf":true,"isDeskRejected":"","isHiddenFromSearch":false,"isInQc":false,"isInWorkflow":false,"isPdf":true,"isPdfUpToDate":true,"isWithdrawnOrRetracted":false,"journal":{"display":true,"email":"
[email protected]","identity":"nature-portfolio","isNatureJournal":true,"hasQc":false,"allowDirectSubmit":false,"externalIdentity":"","sideBox":"","snPcode":"","submissionUrl":"","title":"Nature Portfolio","twitterHandle":"","acdcEnabled":false,"dfaEnabled":false,"editorialSystem":"ejp","reportingPortfolio":"","inReviewEnabled":true,"inReviewRevisionsEnabled":false},"keywords":"","lastPublishedDoi":"10.21203/rs.3.rs-9224570/v1","lastPublishedDoiUrl":"https://doi.org/10.21203/rs.3.rs-9224570/v1","license":{"name":"CC BY 4.0","url":"https://creativecommons.org/licenses/by/4.0/"},"manuscriptAbstract":"Evidence-augmented and reliable cell-type annotation remains a major bottleneck in single-cell RNA-seq analysis, particularly for rare, transitional, and disease-associated populations. To address this, we introduce scMarkerAgent, an evidence-grounded cell marker resource developed using an LLM-assisted literature-curation framework. It integrates 294,692 full-text publications to provide 890,296 high-quality cell type–marker annotations from 50,233 cell types across human, mouse, and rat. scMarkerAgent integrates 82,165 curated negative-marker annotations and 417,812 disease-context annotations, improving disambiguation of homologous cell types and delineation of malignant cells. Every cell type–marker annotation is directly supported by sentence-level literature evidence. In the cell annotation workflow, candidate labels are further refined through an LLM-based reasoning step that jointly evaluates positive and negative markers. Compared with existing resources, scMarkerAgent offers broader coverage of markers, tissues, cell types, and diseases. It is released as a FAIR-compliant database together with a code-free web platform that supports marker retrieval, automated cell annotation, and customizable cell scoring (available at https://www.markeragent.net).","manuscriptTitle":"scMarkerAgent: An LLM Evidence Agent-based Cell Marker Atlas","msid":"","msnumber":"","nonDraftVersions":[{"code":1,"date":"2026-04-07 04:35:00","doi":"10.21203/rs.3.rs-9224570/v1","editorialEvents":[],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"nature-communications","isNatureJournal":true,"hasQc":false,"allowDirectSubmit":false,"externalIdentity":"NCOMMS","sideBox":"Learn more about [Nature Communications](http://www.nature.com/ncomms/)","snPcode":"","submissionUrl":"https://mts-ncomms.nature.com/","title":"Nature Communications","twitterHandle":"","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"ejp","reportingPortfolio":"Nature Communications","inReviewEnabled":true,"inReviewRevisionsEnabled":false}}],"origin":"","ownerIdentity":"162ee595-8c2c-4178-a6fc-3e04ecdaff76","owner":[],"postedDate":"April 7th, 2026","published":true,"recentEditorialEvents":[],"rejectedJournal":[],"revision":"","amendment":"","status":"under-review","subjectAreas":[{"id":65210934,"name":"Biological sciences/Computational biology and bioinformatics/Databases"},{"id":65210935,"name":"Biological sciences/Computational biology and bioinformatics/Data mining"},{"id":65210936,"name":"Biological sciences/Computational biology and bioinformatics/Data integration"}],"tags":[],"updatedAt":"2026-05-08T17:50:31+00:00","versionOfRecord":[],"versionCreatedAt":"2026-04-07 04:35:00","video":"","vorDoi":"","vorDoiUrl":"","workflowStages":[]},"version":"v1","identity":"rs-9224570","journalConfig":"researchsquare"},"__N_SSP":true},"page":"/article/[identity]/[[...version]]","query":{"redirect":"/article/rs-9224570","identity":"rs-9224570","version":["v1"]},"buildId":"XKTyCvWXoU3ODBz1xrDgd","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[84888],"gssp":true,"scriptLoader":[]}
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.