Classification Accuracy Estimation Without Labels via Architecture-Agnostic Model Agreement | Research Square window.SnipcartSettings = { analytics: { enabled: false } }; (function() { var accessVector = localStorage.getItem('access_vector') || ''; window.dataLayer = window.dataLayer || []; if (accessVector) { window.dataLayer.push({ user: { profile: { profileInfo: { snid: accessVector } } } }); } })(); (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-K279D39R'); Browse Preprints In Review Journals COVID-19 Preprints AJE Video Bytes Research Tools Research Promotion AJE Professional Editing AJE Rubriq About Preprint Platform In Review Editorial Policies Our Team Advisory Board Help Center Sign In Submit a Preprint Cite Share Download PDF Research Article Classification Accuracy Estimation Without Labels via Architecture-Agnostic Model Agreement Erin Woo, Hyungkook Jun, Sangyeop Yeo, YoungIk Eom, YuSeung Ma This is a preprint; it has not been peer reviewed by a journal. https://doi.org/ 10.21203/rs.3.rs-7331038/v1 This work is licensed under a CC BY 4.0 License Status: Under Review Version 1 posted 6 You are reading this latest preprint version Abstract We propose a method to estimate the classification accuracy of a machine learning model without requiring any ground-truth labels, by leveraging the agreement between two models on unlabeled data. Unlike prior work that interprets model agreement as an upper bound or average of model accuracies, we are the first to demonstrate that in heterogeneous, label-free settings, agreement reliably approximates the performance of the weaker model in the pair. Our method is architecture-agnostic and does not require any labeled data, assumptions on model calibration, or prior performance information. We introduce a principled estimator that combines hard-label agreement, probability-level consistency, and a correction term for class imbalance and calibration bias. This estimator remains robust across diverse model types—including convolutional networks and Transformers—and performs reliably on standard image and text classification benchmarks. Experimental results confirm that our estimator closely approximates the accuracy of the weaker model in the pair, often within 1–2% of the ground-truth. Our approach operates entirely on in-distribution unlabeled data, offering a practical and reliable solution for model evaluation in real-world scenarios where labeled validation sets are unavailable. Quality Assessment Accuracy Estimation Classification Machine Learning Model Evaluation Full Text Additional Declarations No competing interests reported. Cite Share Download PDF Status: Under Review Version 1 posted Reviews received at journal 23 Feb, 2026 Reviewers agreed at journal 26 Jan, 2026 Reviewers invited by journal 21 Aug, 2025 Editor assigned by journal 11 Aug, 2025 Submission checks completed at journal 11 Aug, 2025 First submitted to journal 08 Aug, 2025 You are reading this latest preprint version Research Square lets you share your work early, gain feedback from the community, and start making changes to your manuscript prior to peer review in a journal. As a division of Research Square Company, we’re committed to making research communication faster, fairer, and more useful. We do this by developing innovative software and high quality services for the global research community. Our growing team is made up of researchers and industry professionals working together to solve the most critical problems facing scientific publishing. Also discoverable on Platform About Our Team In Review Editorial Policies Advisory Board Help Center Resources Author Services Accessibility API Access RSS feed Manage Cookie Preferences © Research Square 2026 | ISSN 2693-5015 (online) Privacy Policy Terms of Service Do Not Sell My Personal Information {"props":{"pageProps":{"initialData":{"identity":"rs-7331038","acceptedTermsAndConditions":true,"allowDirectSubmit":false,"archivedVersions":[],"articleType":"Research Article","associatedPublications":[],"authors":[{"id":508105021,"identity":"87a430bc-fa5f-4259-b266-373ff97f4d89","order_by":0,"name":"Erin Woo","email":"","orcid":"","institution":"Korea National University of Science and Technology","correspondingAuthor":false,"prefix":"","firstName":"Erin","middleName":"","lastName":"Woo","suffix":""},{"id":508105022,"identity":"fbfad32e-ac64-4087-81cd-e1d41511ed5b","order_by":1,"name":"Hyungkook Jun","email":"","orcid":"","institution":"Sungkyunkwan University","correspondingAuthor":false,"prefix":"","firstName":"Hyungkook","middleName":"","lastName":"Jun","suffix":""},{"id":508105023,"identity":"252f26c3-e635-4ada-8c39-d92e179a7273","order_by":2,"name":"Sangyeop Yeo","email":"","orcid":"","institution":"Korea National University of Science and Technology","correspondingAuthor":false,"prefix":"","firstName":"Sangyeop","middleName":"","lastName":"Yeo","suffix":""},{"id":508105024,"identity":"2b10c855-e23c-460b-a83b-c47564bbb0fe","order_by":3,"name":"YoungIk Eom","email":"","orcid":"","institution":"Sungkyunkwan University","correspondingAuthor":false,"prefix":"","firstName":"YoungIk","middleName":"","lastName":"Eom","suffix":""},{"id":508105025,"identity":"16110ef0-e087-44cc-aab5-5143dbdf6ec9","order_by":4,"name":"YuSeung Ma","email":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAAyAQMAAABI0h/eAAAABlBMVEX///8AAABVwtN+AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAAt0lEQVRIiWNgGAWjYLCCD0DMBmYdIFIH4wwGAxK1MPMAtTAQrcXg/OnEzzYVf/L5GJiPfWA4c48ILTdyN0vnnDGwbGNgS57BcKOYGC28G6Rz2wwM2Bh4jIEBkUCMw85u/m35D6SF/zORWg7kbpNmbADbwszAcIMILZI3crdZ9hwzNmBjZjNmSDhDhBY+oMNu/KiRM5Bvb37M8OEYEVoUDsBYQGcxEKGBgUG+gRhVo2AUjIJRMLIBAGouNQHiUXiJAAAAAElFTkSuQmCC","orcid":"","institution":"Electronics and Telecommunications Research Institute","correspondingAuthor":true,"prefix":"","firstName":"YuSeung","middleName":"","lastName":"Ma","suffix":""}],"badges":[],"createdAt":"2025-08-09 03:38:15","currentVersionCode":1,"declarations":"","doi":"10.21203/rs.3.rs-7331038/v1","doiUrl":"https://doi.org/10.21203/rs.3.rs-7331038/v1","draftVersion":[],"editorialEvents":[],"editorialNote":"","failedWorkflow":false,"files":[{"id":90285316,"identity":"a2d21fac-31b1-4112-addf-413b2de47520","added_by":"auto","created_at":"2025-09-01 06:00:18","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"manuscript-pdf","size":822659,"visible":true,"origin":"","legend":"","description":"","filename":"ClassificationAccuracyEstimationWithoutLabelsviaArchitectureAgnosticModelAgreement.pdf","url":"https://assets-eu.researchsquare.com/files/rs-7331038/v1_covered_9457c097-b3f9-4b3d-9ab7-6a28e297dd96.pdf"}],"financialInterests":"No competing interests reported.","formattedTitle":"Classification Accuracy Estimation Without Labels via Architecture-Agnostic Model Agreement","fulltext":[],"fulltextSource":"","fullText":"","funders":[],"hasAdminPriorityOnWorkflow":false,"hasManuscriptDocX":false,"hasOptedInToPreprint":true,"hasPassedJournalQc":"","hasAnyPriority":false,"hideJournal":false,"highlight":"","institution":"","isAcceptedByJournal":false,"isAuthorSuppliedPdf":true,"isDeskRejected":"","isHiddenFromSearch":false,"isInQc":false,"isInWorkflow":false,"isPdf":true,"isPdfUpToDate":true,"isWithdrawnOrRetracted":false,"journal":{"display":true,"email":"
[email protected]","identity":"software-quality-journal","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":false,"externalIdentity":"sqjo","sideBox":"Learn more about [Software Quality Journal](http://link.springer.com/journal/11219)","snPcode":"11219","submissionUrl":"https://submission.nature.com/new-submission/11219/3","title":"Software Quality Journal","twitterHandle":"","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"em","reportingPortfolio":"Springer Hybrid","inReviewEnabled":true,"inReviewRevisionsEnabled":false},"keywords":"Quality Assessment, Accuracy Estimation, Classification, Machine Learning, Model Evaluation","lastPublishedDoi":"10.21203/rs.3.rs-7331038/v1","lastPublishedDoiUrl":"https://doi.org/10.21203/rs.3.rs-7331038/v1","license":{"name":"CC BY 4.0","url":"https://creativecommons.org/licenses/by/4.0/"},"manuscriptAbstract":"We propose a method to estimate the classification accuracy of a machine learning model without requiring any ground-truth labels, by leveraging the agreement between two models on unlabeled data. Unlike prior work that interprets model agreement as an upper bound or average of model accuracies, we are the first to demonstrate that in heterogeneous, label-free settings, agreement reliably approximates the performance of the weaker model in the pair. Our method is architecture-agnostic and does not require any labeled data, assumptions on model calibration, or prior performance information. We introduce a principled estimator that combines hard-label agreement, probability-level consistency, and a correction term for class imbalance and calibration bias. This estimator remains robust across diverse model types—including convolutional networks and Transformers—and performs reliably on standard image and text classification benchmarks. Experimental results confirm that our estimator closely approximates the accuracy of the weaker model in the pair, often within 1–2% of the ground-truth. Our approach operates entirely on in-distribution unlabeled data, offering a practical and reliable solution for model evaluation in real-world scenarios where labeled validation sets are unavailable.","manuscriptTitle":"Classification Accuracy Estimation Without Labels via Architecture-Agnostic Model Agreement","msid":"","msnumber":"","nonDraftVersions":[{"code":1,"date":"2025-09-01 05:52:12","doi":"10.21203/rs.3.rs-7331038/v1","editorialEvents":[{"type":"communityComments","content":0},{"type":"editorInvitedReview","content":"","date":"2026-02-23T18:49:38+00:00","index":"hide","fulltext":""},{"type":"reviewerAgreed","content":"74499425754390271158484439358046270271","date":"2026-01-27T03:41:18+00:00","index":"hide","fulltext":""},{"type":"reviewersInvited","content":"","date":"2025-08-21T17:23:08+00:00","index":"","fulltext":""},{"type":"editorAssigned","content":"","date":"2025-08-11T05:18:18+00:00","index":"","fulltext":""},{"type":"checksComplete","content":"","date":"2025-08-11T05:18:06+00:00","index":"","fulltext":""},{"type":"submitted","content":"Software Quality Journal","date":"2025-08-09T03:35:31+00:00","index":"","fulltext":""}],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"software-quality-journal","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":false,"externalIdentity":"sqjo","sideBox":"Learn more about [Software Quality Journal](http://link.springer.com/journal/11219)","snPcode":"11219","submissionUrl":"https://submission.nature.com/new-submission/11219/3","title":"Software Quality Journal","twitterHandle":"","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"em","reportingPortfolio":"Springer Hybrid","inReviewEnabled":true,"inReviewRevisionsEnabled":false}}],"origin":"","ownerIdentity":"835c3b21-97e7-46f6-8360-ad71fbf95b0b","owner":[],"postedDate":"September 1st, 2025","published":true,"recentEditorialEvents":[],"rejectedJournal":[],"revision":"","amendment":"","status":"under-review","subjectAreas":[],"tags":[],"updatedAt":"2025-09-01T05:52:12+00:00","versionOfRecord":[],"versionCreatedAt":"2025-09-01 05:52:12","video":"","vorDoi":"","vorDoiUrl":"","workflowStages":[]},"version":"v1","identity":"rs-7331038","journalConfig":"researchsquare"},"__N_SSP":true},"page":"/article/[identity]/[[...version]]","query":{"redirect":"/article/rs-7331038","identity":"rs-7331038","version":["v1"]},"buildId":"8U1c8b4HqxoKbykW_rLl7","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[84888],"gssp":true,"scriptLoader":[]}
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.