Enhancing Cellular Clustering in Malaria Drug Discovery via Unsupervised Learning | Research Square window.SnipcartSettings = { analytics: { enabled: false } }; (function() { var accessVector = localStorage.getItem('access_vector') || ''; window.dataLayer = window.dataLayer || []; if (accessVector) { window.dataLayer.push({ user: { profile: { profileInfo: { snid: accessVector } } } }); } })(); (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-K279D39R'); Browse Preprints In Review Journals COVID-19 Preprints AJE Video Bytes Research Tools Research Promotion AJE Professional Editing AJE Rubriq About Preprint Platform In Review Editorial Policies Our Team Advisory Board Help Center Sign In Submit a Preprint Cite Share Download PDF Research Article Enhancing Cellular Clustering in Malaria Drug Discovery via Unsupervised Learning Mani Mandepudi This is a preprint; it has not been peer reviewed by a journal. https://doi.org/ 10.21203/rs.3.rs-6999524/v1 This work is licensed under a CC BY 4.0 License Status: Posted Version 1 posted You are reading this latest preprint version Abstract Malaria remains a major global health threat, driving the need for novel antimalarial drug discovery techniques. High-content fluorescence microscopy, combined with artificial intelligence (AI), enables large-scale phenotypic screening of Plasmodium falciparum gametocytes. The PHIDDLI (Phenotype-based High-content Imaging for Drug Discovery using Live Imaging) pipeline is one such AI-powered system that automates the analysis of cellular responses to chemical compounds through feature extraction and clustering. However, PHIDDLI currently uses Principal Component Analysis (PCA) and KMeans clustering, which may limit the interpretability of complex phenotypic patterns. In this study, we enhance the pipeline by introducing Kernel PCA and t-distributed Stochastic Neighbor Embedding (t-SNE) for dimensionality reduction, and Hierarchical clustering for phenotypic grouping. Using feature embeddings from fluorescence microscopy images, we evaluate each combination based on biological relevance and visual clarity. Our results demonstrate that t-SNE with Hierarchical clustering produces more distinct and interpretable phenotype groupings, aligning closely with expert biological interpretation. We also present interactive visualizations to support exploratory analysis. These enhancements contribute to more explainable AI in antimalarial research, facilitating faster and more reliable identification of promising drug candidates. Full Text Additional Declarations No competing interests reported. Cite Share Download PDF Status: Posted Version 1 posted You are reading this latest preprint version Research Square lets you share your work early, gain feedback from the community, and start making changes to your manuscript prior to peer review in a journal. As a division of Research Square Company, we’re committed to making research communication faster, fairer, and more useful. We do this by developing innovative software and high quality services for the global research community. Our growing team is made up of researchers and industry professionals working together to solve the most critical problems facing scientific publishing. Also discoverable on Platform About Our Team In Review Editorial Policies Advisory Board Help Center Resources Author Services Accessibility API Access RSS feed Manage Cookie Preferences © Research Square 2026 | ISSN 2693-5015 (online) Privacy Policy Terms of Service Do Not Sell My Personal Information {"props":{"pageProps":{"initialData":{"identity":"rs-6999524","acceptedTermsAndConditions":true,"allowDirectSubmit":true,"archivedVersions":[],"articleType":"Research Article","associatedPublications":[],"authors":[{"id":482578881,"identity":"20e3723d-e7cb-412a-9e68-08c0e4083464","order_by":0,"name":"Mani Mandepudi","email":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAAyAQMAAABI0h/eAAAABlBMVEX///8AAABVwtN+AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAA50lEQVRIiWNgGAWjYJACCSBmbGNgYGb4AGSxsZOihXEGSAszsVoagFqYeUBcQlp0288evPFxx2HZPgbuZGObX9vk+ZgZGD98zMGtxexMXrLlzDOHjdsYeDcn5/bdNmxjZmCWnLkNj5YDOWbSvG2HE0FaDuf23GYEamFj5sWn5fwbM+m/MC2WPbftCWu5AbSFEaolmeHH7UQitLwxtuw9k27cxsy72bC34XZyGzNjM36/nM8xvPFzh7Xs/PbezRI//ty2nd/efPDDRzxawAAUKeDoAEUolEuMFjD4Q1jxKBgFo2AUjDwAADkgUO4XXOe4AAAAAElFTkSuQmCC","orcid":"","institution":"University of Leeds","correspondingAuthor":true,"prefix":"","firstName":"Mani","middleName":"","lastName":"Mandepudi","suffix":""}],"badges":[],"createdAt":"2025-06-28 19:23:08","currentVersionCode":1,"declarations":"","doi":"10.21203/rs.3.rs-6999524/v1","doiUrl":"https://doi.org/10.21203/rs.3.rs-6999524/v1","draftVersion":[],"editorialEvents":[],"editorialNote":"","failedWorkflow":false,"files":[{"id":92738622,"identity":"8ac64862-2c1f-4801-b865-e7af0fc968b9","added_by":"auto","created_at":"2025-10-03 16:53:45","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"manuscript-pdf","size":410690,"visible":true,"origin":"","legend":"","description":"","filename":"ResearchPaperClean.pdf","url":"https://assets-eu.researchsquare.com/files/rs-6999524/v1_covered_dacf8745-858a-4072-bbce-5142619544f7.pdf"}],"financialInterests":"No competing interests reported.","formattedTitle":"Enhancing Cellular Clustering in Malaria Drug Discovery via Unsupervised Learning","fulltext":[],"fulltextSource":"","fullText":"","funders":[],"hasAdminPriorityOnWorkflow":false,"hasManuscriptDocX":false,"hasOptedInToPreprint":true,"hasPassedJournalQc":"","hasAnyPriority":false,"hideJournal":true,"highlight":"","institution":"","isAcceptedByJournal":false,"isAuthorSuppliedPdf":true,"isDeskRejected":"","isHiddenFromSearch":false,"isInQc":false,"isInWorkflow":false,"isPdf":true,"isPdfUpToDate":true,"isWithdrawnOrRetracted":false,"journal":{"display":true,"email":"
[email protected]","identity":"researchsquare","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":true,"externalIdentity":"","sideBox":"","snPcode":"","submissionUrl":"/submission","title":"Research Square","twitterHandle":"researchsquare","acdcEnabled":true,"dfaEnabled":false,"editorialSystem":"","reportingPortfolio":"","inReviewEnabled":false,"inReviewRevisionsEnabled":true},"keywords":"","lastPublishedDoi":"10.21203/rs.3.rs-6999524/v1","lastPublishedDoiUrl":"https://doi.org/10.21203/rs.3.rs-6999524/v1","license":{"name":"CC BY 4.0","url":"https://creativecommons.org/licenses/by/4.0/"},"manuscriptAbstract":"\u003cp\u003eMalaria remains a major global health threat, driving the need for novel antimalarial drug discovery techniques. High-content fluorescence microscopy, combined with artificial intelligence (AI), enables large-scale phenotypic screening of \u003cem\u003ePlasmodium falciparum\u003c/em\u003e gametocytes. The PHIDDLI (Phenotype-based High-content Imaging for Drug Discovery using Live Imaging) pipeline is one such AI-powered system that automates the analysis of cellular responses to chemical compounds through feature extraction and clustering.\u0026nbsp;\u003c/p\u003e\n\u003cp\u003eHowever, PHIDDLI currently uses Principal Component Analysis (PCA) and KMeans clustering, which may limit the interpretability of complex phenotypic patterns. In this study, we enhance the pipeline by introducing Kernel PCA and t-distributed Stochastic Neighbor Embedding (t-SNE) for dimensionality reduction, and Hierarchical clustering for phenotypic grouping. Using feature embeddings from fluorescence microscopy images, we evaluate each combination based on biological relevance and visual clarity.\u0026nbsp;\u003c/p\u003e\n\u003cp\u003eOur results demonstrate that t-SNE with Hierarchical clustering produces more distinct and interpretable phenotype groupings, aligning closely with expert biological interpretation. We also present interactive visualizations to support exploratory analysis. These enhancements contribute to more explainable AI in antimalarial research, facilitating faster and more reliable identification of promising drug candidates.\u003c/p\u003e","manuscriptTitle":"Enhancing Cellular Clustering in Malaria Drug Discovery via Unsupervised Learning","msid":"","msnumber":"","nonDraftVersions":[{"code":1,"date":"2025-07-10 04:19:48","doi":"10.21203/rs.3.rs-6999524/v1","editorialEvents":[{"type":"communityComments","content":0}],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"researchsquare","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":true,"externalIdentity":"","sideBox":"","snPcode":"","submissionUrl":"/submission","title":"Research Square","twitterHandle":"researchsquare","acdcEnabled":true,"dfaEnabled":false,"editorialSystem":"","reportingPortfolio":"","inReviewEnabled":false,"inReviewRevisionsEnabled":true}}],"origin":"","ownerIdentity":"13218dfd-50d9-4f2b-9134-7c266ab0cb6a","owner":[],"postedDate":"July 10th, 2025","published":true,"recentEditorialEvents":[],"rejectedJournal":[],"revision":"","amendment":"","status":"posted","subjectAreas":[],"tags":[],"updatedAt":"2025-10-03T16:53:21+00:00","versionOfRecord":[],"versionCreatedAt":"2025-07-10 04:19:48","video":"","vorDoi":"","vorDoiUrl":"","workflowStages":[]},"version":"v1","identity":"rs-6999524","journalConfig":"researchsquare"},"__N_SSP":true},"page":"/article/[identity]/[[...version]]","query":{"redirect":"/article/rs-6999524","identity":"rs-6999524","version":["v1"]},"buildId":"8U1c8b4HqxoKbykW_rLl7","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[84888],"gssp":true,"scriptLoader":[]}
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.