Reinforcement Learning-Based Generation of EGFR-Targeted Anticancer Small Molecules | Research Square window.SnipcartSettings = { analytics: { enabled: false } }; (function() { var accessVector = localStorage.getItem('access_vector') || ''; window.dataLayer = window.dataLayer || []; if (accessVector) { window.dataLayer.push({ user: { profile: { profileInfo: { snid: accessVector } } } }); } })(); (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-K279D39R'); Browse Preprints In Review Journals COVID-19 Preprints AJE Video Bytes Research Tools Research Promotion AJE Professional Editing AJE Rubriq About Preprint Platform In Review Editorial Policies Our Team Advisory Board Help Center Sign In Submit a Preprint Cite Share Download PDF Research Article Reinforcement Learning-Based Generation of EGFR-Targeted Anticancer Small Molecules Yuran Chai, Xiao Huang This is a preprint; it has not been peer reviewed by a journal. https://doi.org/ 10.21203/rs.3.rs-7587466/v1 This work is licensed under a CC BY 4.0 License Status: Published Journal Publication published 17 Jan, 2026 Read the published version in Journal of Computer-Aided Molecular Design → Version 1 posted 11 You are reading this latest preprint version Abstract We report a reinforcement-learning-enhanced generative chemistry pipeline for the de novo design of small-molecule inhibitors targeting Epidermal Growth Factor Receptor (EGFR). Starting from a pretrained ChemBERTa language model fine-tuned on high-affinity EGFR in- hibitors, we introduce a multicomponent reward function combining predicted potency (40%), drug-likeness (QED, 25%), synthetic accessibility (SA, 15%), and novelty relative to the train- ing library (20%). Through policy-gradient optimization over 500 iterations, the model learns to produce chemically valid, diverse, and novel scaffolds enriched for high composite rewards. Compared with the prior policy, the RL-tuned generator achieves a 20 % increase in mean re- ward and yields a threefold expansion in unique Bemis-Murcko cores. High-throughput dock- ing against the EGFR kinase domain (PDB ID: 1M17) demonstrates that the newly generated library attains a median predicted binding affinity of –9.2 kcal/mol, significantly surpassing the –8.5 kcal/mol baseline of known inhibitors. An exemplar generated ligand recapitulates key hinge-binding interactions while presenting a novel solvent-exposed substituent for further optimization. This study illustrates the power of integrating language-model pretraining with reinforcement learning and composite reward engineering to accelerate target-focused drug discovery. Full Text Additional Declarations No competing interests reported. Cite Share Download PDF Status: Published Journal Publication published 17 Jan, 2026 Read the published version in Journal of Computer-Aided Molecular Design → Version 1 posted Editorial decision: Revision requested 14 Nov, 2025 Reviews received at journal 14 Nov, 2025 Reviews received at journal 02 Nov, 2025 Reviewers agreed at journal 23 Oct, 2025 Reviewers agreed at journal 19 Oct, 2025 Reviews received at journal 06 Oct, 2025 Reviewers agreed at journal 24 Sep, 2025 Reviewers invited by journal 24 Sep, 2025 Editor assigned by journal 23 Sep, 2025 Submission checks completed at journal 16 Sep, 2025 First submitted to journal 10 Sep, 2025 You are reading this latest preprint version Research Square lets you share your work early, gain feedback from the community, and start making changes to your manuscript prior to peer review in a journal. As a division of Research Square Company, we’re committed to making research communication faster, fairer, and more useful. We do this by developing innovative software and high quality services for the global research community. Our growing team is made up of researchers and industry professionals working together to solve the most critical problems facing scientific publishing. Also discoverable on Platform About Our Team In Review Editorial Policies Advisory Board Help Center Resources Author Services Accessibility API Access RSS feed Manage Cookie Preferences © Research Square 2026 | ISSN 2693-5015 (online) Privacy Policy Terms of Service Do Not Sell My Personal Information {"props":{"pageProps":{"initialData":{"identity":"rs-7587466","acceptedTermsAndConditions":true,"allowDirectSubmit":false,"archivedVersions":[],"articleType":"Research Article","associatedPublications":[],"authors":[{"id":525494042,"identity":"f189c01f-8bbc-42fb-a269-92f23674d05f","order_by":0,"name":"Yuran Chai","email":"","orcid":"","institution":"Wake Forest University","correspondingAuthor":false,"prefix":"","firstName":"Yuran","middleName":"","lastName":"Chai","suffix":""},{"id":525494043,"identity":"093ccc33-01e5-46a9-9da4-4034b56a0a63","order_by":1,"name":"Xiao Huang","email":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAAyAQMAAABI0h/eAAAABlBMVEX///8AAABVwtN+AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAA5UlEQVRIiWNgGAWjYDACZiBmbACzDkgwHAAxEojWwpZApBYGuBYeA+K0GBxnfviAccdheXP+NR9v85yxYeBnzzHAq0Wymc3YgPHMYcOdM95utua5kcYg2fMGvxZ+ZgYzCca2w4wbbpzdJs3z4TCDwQ0CtrAxs38DabHfcOPMM6CW/wz2hLTwM/OAbUnccL6HTZrnxgEGAwmCfuEpNkhsS0/ecIPN2HLOmWQeiTPPCvBqMTh/fOODj23WthvOH354480xOzn+9uQNeLWAQQKIkACTDDyElcMB/wESFI+CUTAKRsGIAgDadEjJGPwYZAAAAABJRU5ErkJggg==","orcid":"","institution":"Duke University","correspondingAuthor":true,"prefix":"","firstName":"Xiao","middleName":"","lastName":"Huang","suffix":""}],"badges":[],"createdAt":"2025-09-11 03:53:19","currentVersionCode":1,"declarations":"","doi":"10.21203/rs.3.rs-7587466/v1","doiUrl":"https://doi.org/10.21203/rs.3.rs-7587466/v1","draftVersion":[],"editorialEvents":[{"content":"https://doi.org/10.1007/s10822-025-00753-7","type":"published","date":"2026-01-17T16:28:44+00:00"}],"editorialNote":"","failedWorkflow":false,"files":[{"id":92979150,"identity":"7fec4101-2d52-43e4-944c-ad0c20189dc0","added_by":"auto","created_at":"2025-10-07 18:57:05","extension":"json","order_by":0,"title":"","display":"","copyAsset":false,"role":"acdc-reference","size":4907,"visible":true,"origin":"","legend":"","description":"","filename":"791d806d19d6448dbe305ba62190ca71.json","url":"https://assets-eu.researchsquare.com/files/rs-7587466/v1/dea844716185db4e1bbad0a8.json"},{"id":100615895,"identity":"46c52558-bda5-44d6-8d9b-7957304a4fe0","added_by":"auto","created_at":"2026-01-19 17:37:58","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"manuscript-pdf","size":584131,"visible":true,"origin":"","legend":"","description":"","filename":"AIDrug4.pdf","url":"https://assets-eu.researchsquare.com/files/rs-7587466/v1_covered_1b638994-0f5e-402b-a275-d24fc051e365.pdf"}],"financialInterests":"No competing interests reported.","formattedTitle":"Reinforcement Learning-Based Generation of EGFR-Targeted Anticancer Small Molecules","fulltext":[],"fulltextSource":"","fullText":"","funders":[],"hasAdminPriorityOnWorkflow":false,"hasManuscriptDocX":false,"hasOptedInToPreprint":true,"hasPassedJournalQc":"","hasAnyPriority":false,"hideJournal":false,"highlight":"","institution":"","isAcceptedByJournal":true,"isAuthorSuppliedPdf":true,"isDeskRejected":"","isHiddenFromSearch":false,"isInQc":false,"isInWorkflow":false,"isPdf":true,"isPdfUpToDate":true,"isWithdrawnOrRetracted":false,"journal":{"display":true,"email":"
[email protected]","identity":"journal-of-computer-aided-molecular-design","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":false,"externalIdentity":"jcam","sideBox":"Learn more about [Journal of Computer-Aided Molecular Design](http://link.springer.com/journal/10822)","snPcode":"10822","submissionUrl":"https://submission.nature.com/new-submission/10822/3","title":"Journal of Computer-Aided Molecular Design","twitterHandle":"","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"em","reportingPortfolio":"Springer Hybrid","inReviewEnabled":true,"inReviewRevisionsEnabled":false},"keywords":"","lastPublishedDoi":"10.21203/rs.3.rs-7587466/v1","lastPublishedDoiUrl":"https://doi.org/10.21203/rs.3.rs-7587466/v1","license":{"name":"CC BY 4.0","url":"https://creativecommons.org/licenses/by/4.0/"},"manuscriptAbstract":"We report a reinforcement-learning-enhanced generative chemistry pipeline for the de novo design of small-molecule inhibitors targeting Epidermal Growth Factor Receptor (EGFR). Starting from a pretrained ChemBERTa language model fine-tuned on high-affinity EGFR in- hibitors, we introduce a multicomponent reward function combining predicted potency (40%), drug-likeness (QED, 25%), synthetic accessibility (SA, 15%), and novelty relative to the train- ing library (20%). Through policy-gradient optimization over 500 iterations, the model learns to produce chemically valid, diverse, and novel scaffolds enriched for high composite rewards. Compared with the prior policy, the RL-tuned generator achieves a 20 % increase in mean re- ward and yields a threefold expansion in unique Bemis-Murcko cores. High-throughput dock- ing against the EGFR kinase domain (PDB ID: 1M17) demonstrates that the newly generated library attains a median predicted binding affinity of –9.2 kcal/mol, significantly surpassing the –8.5 kcal/mol baseline of known inhibitors. An exemplar generated ligand recapitulates key hinge-binding interactions while presenting a novel solvent-exposed substituent for further optimization. This study illustrates the power of integrating language-model pretraining with reinforcement learning and composite reward engineering to accelerate target-focused drug discovery.","manuscriptTitle":"Reinforcement Learning-Based Generation of EGFR-Targeted Anticancer Small Molecules","msid":"","msnumber":"","nonDraftVersions":[{"code":1,"date":"2025-10-07 18:57:00","doi":"10.21203/rs.3.rs-7587466/v1","editorialEvents":[{"type":"communityComments","content":0},{"type":"decision","content":"Revision requested","date":"2025-11-14T20:13:31+00:00","index":"","fulltext":""},{"type":"editorInvitedReview","content":"","date":"2025-11-14T17:57:40+00:00","index":"hide","fulltext":""},{"type":"editorInvitedReview","content":"","date":"2025-11-02T10:01:16+00:00","index":"hide","fulltext":""},{"type":"reviewerAgreed","content":"22215728966311918485439917189590243395","date":"2025-10-23T20:16:50+00:00","index":"hide","fulltext":""},{"type":"reviewerAgreed","content":"208401189745272350377135492491809175445","date":"2025-10-19T09:18:17+00:00","index":"hide","fulltext":""},{"type":"editorInvitedReview","content":"","date":"2025-10-06T15:26:40+00:00","index":"hide","fulltext":""},{"type":"reviewerAgreed","content":"339831579769327270165700094864995829884","date":"2025-09-24T07:40:28+00:00","index":"hide","fulltext":""},{"type":"reviewersInvited","content":"","date":"2025-09-24T04:25:36+00:00","index":"","fulltext":""},{"type":"editorAssigned","content":"","date":"2025-09-23T23:48:22+00:00","index":"","fulltext":""},{"type":"checksComplete","content":"","date":"2025-09-16T11:59:25+00:00","index":"","fulltext":""},{"type":"submitted","content":"Journal of Computer-Aided Molecular Design","date":"2025-09-11T03:50:03+00:00","index":"","fulltext":""}],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"journal-of-computer-aided-molecular-design","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":false,"externalIdentity":"jcam","sideBox":"Learn more about [Journal of Computer-Aided Molecular Design](http://link.springer.com/journal/10822)","snPcode":"10822","submissionUrl":"https://submission.nature.com/new-submission/10822/3","title":"Journal of Computer-Aided Molecular Design","twitterHandle":"","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"em","reportingPortfolio":"Springer Hybrid","inReviewEnabled":true,"inReviewRevisionsEnabled":false}}],"origin":"","ownerIdentity":"53f3e41e-6f39-4b1e-bf5b-f837e8168f18","owner":[],"postedDate":"October 7th, 2025","published":true,"recentEditorialEvents":[],"rejectedJournal":[],"revision":"","amendment":"","status":"published-in-journal","subjectAreas":[],"tags":[],"updatedAt":"2026-01-19T17:02:39+00:00","versionOfRecord":{"articleIdentity":"rs-7587466","link":"https://doi.org/10.1007/s10822-025-00753-7","journal":{"identity":"journal-of-computer-aided-molecular-design","isVorOnly":false,"title":"Journal of Computer-Aided Molecular Design"},"publishedOn":"2026-01-17 16:28:44","publishedOnDateReadable":"January 17th, 2026"},"versionCreatedAt":"2025-10-07 18:57:00","video":"","vorDoi":"10.1007/s10822-025-00753-7","vorDoiUrl":"https://doi.org/10.1007/s10822-025-00753-7","workflowStages":[]},"version":"v1","identity":"rs-7587466","journalConfig":"researchsquare"},"__N_SSP":true},"page":"/article/[identity]/[[...version]]","query":{"redirect":"/article/rs-7587466","identity":"rs-7587466","version":["v1"]},"buildId":"8U1c8b4HqxoKbykW_rLl7","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[84888],"gssp":true,"scriptLoader":[]}
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.