MCC-GCN: An Interpretable Graph Learning Framework for Multicomponent Crystal Classification and Discovery | Research Square window.SnipcartSettings = { analytics: { enabled: false } }; (function() { var accessVector = localStorage.getItem('access_vector') || ''; window.dataLayer = window.dataLayer || []; if (accessVector) { window.dataLayer.push({ user: { profile: { profileInfo: { snid: accessVector } } } }); } })(); (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-K279D39R'); Browse Preprints In Review Journals COVID-19 Preprints AJE Video Bytes Research Tools Research Promotion AJE Professional Editing AJE Rubriq About Preprint Platform In Review Editorial Policies Our Team Advisory Board Help Center Sign In Submit a Preprint Cite Share Download PDF Article MCC-GCN: An Interpretable Graph Learning Framework for Multicomponent Crystal Classification and Discovery Shing Fung Chow, Yuehua Deng, Fanyu Zhao, Xinliang Zhou, Minqi Fu, and 3 more This is a preprint; it has not been peer reviewed by a journal. https://doi.org/ 10.21203/rs.3.rs-9086306/v1 This work is licensed under a CC BY 4.0 License Status: Under Review Version 1 posted You are reading this latest preprint version Abstract Multicomponent crystals (MCCs), including cocrystals, salts, and solvates, are of interest in organic molecule design, yet their discovery remains largely empirical and inefficient. Existing computational approaches are limited to binary predictions and lack interpretability, restricting their ability to guide discovery and provide mechanistic insight. Here, we present MCC-GCN, an interpretable graph-based learning framework that reformulates MCC prediction as a multi-class problem and enables unified prediction and mechanistic interpretation of MCC formation. Trained on over 34,000 entries and refined through strategic fine-tuning, MCC-GCN demonstrates robust generalization to chemical domains beyond the Cambridge Structural Database. Benchmarking against five classical methods and three machine-learning baselines shows that MCC-GCN outperforms existing approaches in predictive accuracy and interpretability. Validation through an experimental campaign involving 64 prospective cases successfully identified 47 new MCCs. MCC-GCN provides a scalable, generalizable methodological framework for studying MCCs and offers a foundation for data-driven discovery in pharmacy, chemistry, and materials science. Physical sciences/Engineering/Chemical engineering Physical sciences/Chemistry/Supramolecular chemistry/Crystal engineering Physical sciences/Chemistry/Chemical engineering Multicomponent Crystal Transfer Learning Graph Convolutional Network Crystal Engineering Cocrystal Prediction Full Text Additional Declarations There is NO Competing Interest. Supplementary Files SingleCrystals.zip Single Crystal Structures SupplementaryInformation.pdf Supplementary Information Cite Share Download PDF Status: Under Review Version 1 posted You are reading this latest preprint version Research Square lets you share your work early, gain feedback from the community, and start making changes to your manuscript prior to peer review in a journal. As a division of Research Square Company, we’re committed to making research communication faster, fairer, and more useful. We do this by developing innovative software and high quality services for the global research community. Our growing team is made up of researchers and industry professionals working together to solve the most critical problems facing scientific publishing. Also discoverable on Platform About Our Team In Review Editorial Policies Advisory Board Help Center Resources Author Services Accessibility API Access RSS feed Manage Cookie Preferences © Research Square 2026 | ISSN 2693-5015 (online) Privacy Policy Terms of Service Do Not Sell My Personal Information {"props":{"pageProps":{"initialData":{"identity":"rs-9086306","acceptedTermsAndConditions":true,"allowDirectSubmit":false,"archivedVersions":[],"articleType":"Article","associatedPublications":[],"authors":[{"id":604527640,"identity":"4f35571b-04e1-4c2a-b79f-6f9ef83306d1","order_by":0,"name":"Shing Fung Chow","email":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAAyAQMAAABI0h/eAAAABlBMVEX///8AAABVwtN+AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAArklEQVRIiWNgGAWjYFACxgfMQFLOgIeBgZnhAFFamA1AWoxJ15K4gWgt/P2HGT8XVBxO385zxoC54AwRWiRuJDNLzzhzOHdnb48B84wbxLjrBv8xZt6227kbzvMYMPN8IEKH/PnDbCAt6QZEazE4kAzWkmBwFugwHmIcZgjyC8+Z/4YbzhwrOMxDjPflzgNDjKciTd7gTPLGxzzHiNCCAg6QqmEUjIJRMApGAQ4AAPGRNkgyuUVxAAAAAElFTkSuQmCC","orcid":"","institution":"The University of Hong Kong","correspondingAuthor":true,"prefix":"","firstName":"Shing","middleName":"Fung","lastName":"Chow","suffix":""},{"id":604527641,"identity":"5becb752-917a-4c18-819d-a2e0109a602b","order_by":1,"name":"Yuehua Deng","email":"","orcid":"","institution":"The University of Hong Kong","correspondingAuthor":false,"prefix":"","firstName":"Yuehua","middleName":"","lastName":"Deng","suffix":""},{"id":604527642,"identity":"95ef01c7-f756-4cd9-a420-e450f3f3abca","order_by":2,"name":"Fanyu Zhao","email":"","orcid":"","institution":"College of Computer Science and Artificial Intelligence, Fudan University","correspondingAuthor":false,"prefix":"","firstName":"Fanyu","middleName":"","lastName":"Zhao","suffix":""},{"id":604527643,"identity":"27958496-cc3d-49fa-b399-4b31f30ee0d7","order_by":3,"name":"Xinliang Zhou","email":"","orcid":"","institution":"College of Computing and Data Science, Nanyang Technological University","correspondingAuthor":false,"prefix":"","firstName":"Xinliang","middleName":"","lastName":"Zhou","suffix":""},{"id":604527644,"identity":"830f6a77-9657-422a-b243-13dbcb6ce4d3","order_by":4,"name":"Minqi Fu","email":"","orcid":"","institution":"Department of Pharmacology and Pharmacy, Li Ka Shing Faculty of Medicine, The University of Hong Kong","correspondingAuthor":false,"prefix":"","firstName":"Minqi","middleName":"","lastName":"Fu","suffix":""},{"id":604527645,"identity":"c3e56f6a-350e-43ea-ab75-24bad136be25","order_by":5,"name":"Stephanie Chow","email":"","orcid":"","institution":"Department of Pharmacology and Pharmacy, Li Ka Shing Faculty of Medicine, The University of Hong Kong","correspondingAuthor":false,"prefix":"","firstName":"Stephanie","middleName":"","lastName":"Chow","suffix":""},{"id":604527646,"identity":"b148b0ac-0409-4f7f-8e3d-0a52db7d28f4","order_by":6,"name":"Zhi Wei","email":"","orcid":"","institution":"Department of Computer Science, Ying Wu College of Computing, New Jersey, Institute of Technology","correspondingAuthor":false,"prefix":"","firstName":"Zhi","middleName":"","lastName":"Wei","suffix":""},{"id":604527647,"identity":"8ce40c14-a559-49a4-82d3-90198196e15d","order_by":7,"name":"Qingsong Wen","email":"","orcid":"","institution":"Squirrel Ai Learning","correspondingAuthor":false,"prefix":"","firstName":"Qingsong","middleName":"","lastName":"Wen","suffix":""}],"badges":[],"createdAt":"2026-03-10 16:35:53","currentVersionCode":1,"declarations":"","doi":"10.21203/rs.3.rs-9086306/v1","doiUrl":"https://doi.org/10.21203/rs.3.rs-9086306/v1","draftVersion":[],"editorialEvents":[],"editorialNote":"","failedWorkflow":false,"files":[{"id":107483659,"identity":"d04d19ee-dda2-441f-ae25-ec9b98bf5aae","added_by":"auto","created_at":"2026-04-22 02:28:37","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"manuscript-pdf","size":9694964,"visible":true,"origin":"","legend":"Article File","description":"","filename":"Manuscript.pdf","url":"https://assets-eu.researchsquare.com/files/rs-9086306/v1_covered_c36da09e-3ea2-4959-8211-2fb99bc9343d.pdf"},{"id":107481173,"identity":"33b27d4f-dddf-4580-ba8a-327f335abb2d","added_by":"auto","created_at":"2026-04-22 02:16:23","extension":"zip","order_by":1,"title":"","display":"","copyAsset":false,"role":"supplement","size":4574658,"visible":true,"origin":"","legend":"Single Crystal Structures","description":"","filename":"SingleCrystals.zip","url":"https://assets-eu.researchsquare.com/files/rs-9086306/v1/3f5034cd9ef21db8500ff0b7.zip"},{"id":107120187,"identity":"828b7284-8f54-4c43-9c4a-4f75a10696e9","added_by":"auto","created_at":"2026-04-17 04:22:00","extension":"pdf","order_by":2,"title":"","display":"","copyAsset":false,"role":"supplement","size":25221850,"visible":true,"origin":"","legend":"Supplementary Information","description":"","filename":"SupplementaryInformation.pdf","url":"https://assets-eu.researchsquare.com/files/rs-9086306/v1/107aae8cac1ac817f34e47c9.pdf"}],"financialInterests":"There is \u003cb\u003eNO\u003c/b\u003e Competing Interest.","formattedTitle":"MCC-GCN: An Interpretable Graph Learning Framework for Multicomponent Crystal Classification and Discovery","fulltext":[],"fulltextSource":"","fullText":"","funders":[],"hasAdminPriorityOnWorkflow":false,"hasManuscriptDocX":false,"hasOptedInToPreprint":true,"hasPassedJournalQc":"","hasAnyPriority":true,"hideJournal":false,"highlight":"","institution":"","isAcceptedByJournal":false,"isAuthorSuppliedPdf":true,"isDeskRejected":"","isHiddenFromSearch":false,"isInQc":false,"isInWorkflow":false,"isPdf":true,"isPdfUpToDate":true,"isWithdrawnOrRetracted":false,"journal":{"display":true,"email":"
[email protected]","identity":"nature-portfolio","isNatureJournal":true,"hasQc":false,"allowDirectSubmit":false,"externalIdentity":"","sideBox":"","snPcode":"","submissionUrl":"","title":"Nature Portfolio","twitterHandle":"","acdcEnabled":false,"dfaEnabled":false,"editorialSystem":"ejp","reportingPortfolio":"","inReviewEnabled":true,"inReviewRevisionsEnabled":false},"keywords":"Multicomponent Crystal, Transfer Learning, Graph Convolutional Network, Crystal Engineering, Cocrystal Prediction","lastPublishedDoi":"10.21203/rs.3.rs-9086306/v1","lastPublishedDoiUrl":"https://doi.org/10.21203/rs.3.rs-9086306/v1","license":{"name":"CC BY 4.0","url":"https://creativecommons.org/licenses/by/4.0/"},"manuscriptAbstract":"Multicomponent crystals (MCCs), including cocrystals, salts, and solvates, are of interest in organic molecule design, yet their discovery remains largely empirical and inefficient. Existing computational approaches are limited to binary predictions and lack interpretability, restricting their ability to guide discovery and provide mechanistic insight. Here, we present MCC-GCN, an interpretable graph-based learning framework that reformulates MCC prediction as a multi-class problem and enables unified prediction and mechanistic interpretation of MCC formation. Trained on over 34,000 entries and refined through strategic fine-tuning, MCC-GCN demonstrates robust generalization to chemical domains beyond the Cambridge Structural Database. Benchmarking against five classical methods and three machine-learning baselines shows that MCC-GCN outperforms existing approaches in predictive accuracy and interpretability. Validation through an experimental campaign involving 64 prospective cases successfully identified 47 new MCCs. MCC-GCN provides a scalable, generalizable methodological framework for studying MCCs and offers a foundation for data-driven discovery in pharmacy, chemistry, and materials science.","manuscriptTitle":"MCC-GCN: An Interpretable Graph Learning Framework for Multicomponent Crystal Classification and Discovery","msid":"","msnumber":"","nonDraftVersions":[{"code":1,"date":"2026-04-17 04:21:56","doi":"10.21203/rs.3.rs-9086306/v1","editorialEvents":[],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"nature-communications","isNatureJournal":true,"hasQc":false,"allowDirectSubmit":false,"externalIdentity":"NCOMMS","sideBox":"Learn more about [Nature Communications](http://www.nature.com/ncomms/)","snPcode":"","submissionUrl":"https://mts-ncomms.nature.com/","title":"Nature Communications","twitterHandle":"","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"ejp","reportingPortfolio":"Nature Communications","inReviewEnabled":true,"inReviewRevisionsEnabled":false}}],"origin":"","ownerIdentity":"3d88c132-e796-4c8f-bd6c-7eae2c3ca1a5","owner":[],"postedDate":"April 17th, 2026","published":true,"recentEditorialEvents":[],"rejectedJournal":[],"revision":"","amendment":"","status":"under-review","subjectAreas":[{"id":64333963,"name":"Physical sciences/Engineering/Chemical engineering"},{"id":64333964,"name":"Physical sciences/Chemistry/Supramolecular chemistry/Crystal engineering"},{"id":64333965,"name":"Physical sciences/Chemistry/Chemical engineering"}],"tags":[],"updatedAt":"2026-04-17T04:21:56+00:00","versionOfRecord":[],"versionCreatedAt":"2026-04-17 04:21:56","video":"","vorDoi":"","vorDoiUrl":"","workflowStages":[]},"version":"v1","identity":"rs-9086306","journalConfig":"researchsquare"},"__N_SSP":true},"page":"/article/[identity]/[[...version]]","query":{"redirect":"/article/rs-9086306","identity":"rs-9086306","version":["v1"]},"buildId":"XKTyCvWXoU3ODBz1xrDgd","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[84888],"gssp":true,"scriptLoader":[]}
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.