Visual-Language Transformer-Based Tomato Leaf Disease Detection for Portable Greenhouse Monitoring Device | Research Square window.SnipcartSettings = { analytics: { enabled: false } }; (function() { var accessVector = localStorage.getItem('access_vector') || ''; window.dataLayer = window.dataLayer || []; if (accessVector) { window.dataLayer.push({ user: { profile: { profileInfo: { snid: accessVector } } } }); } })(); (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-K279D39R'); Browse Preprints In Review Journals COVID-19 Preprints AJE Video Bytes Research Tools Research Promotion AJE Professional Editing AJE Rubriq About Preprint Platform In Review Editorial Policies Our Team Advisory Board Help Center Sign In Submit a Preprint Cite Share Download PDF Research Article Visual-Language Transformer-Based Tomato Leaf Disease Detection for Portable Greenhouse Monitoring Device Manveen Kaur, Rajmeet Singh, Shahpour Alirezaee, Irfan Hussain This is a preprint; it has not been peer reviewed by a journal. https://doi.org/ 10.21203/rs.3.rs-7077052/v1 This work is licensed under a CC BY 4.0 License Status: Published Journal Publication published 28 Oct, 2025 Read the published version in Plant Methods → Version 1 posted 13 You are reading this latest preprint version Abstract Tomato leaf diseases pose a significant threat to global food security, necessitating accurate and efficient detection methods. This paper introduces the Tomato Leaf Disease Visual Language Model (TLDVLM), a novel approach based on the BLIP-2 architecture enhanced with Low-Rank Adaptation (LoRA), for precise classification of 10 distinct tomato leaf diseases. Our methodology integrates a sophisticated image preprocessing pipeline, utilizing GroundingDINO for robust leaf detection and SAM-2 for pixel-level segmentation, ensuring that the model focuses solely on relevant plant tissue. The TLDVLM leverages the powerful multimodal understanding of BLIP-2, with LoRA applied to its Q-Former module, enabling parameter-efficient fine-tuning without compromising performance. Comparative experiments demonstrate that the TLDVLM significantly outperforms baseline models, including CLIP-LoRA and ConvNeXT-tiny, achieving an accuracy of 97.27%, a precision of 0.9587, a recall of 0.9789, and an F1-score of 0.9681. Beyond classification, the finetuned TLDVLM checkpoints are integrated into a practical application for new image inference. This application displays the raw and segmented images, the predicted disease, and offers functionalities to fetch comprehensive information on disease causes and remedies using external APIs (e.g., OpenAI), with an option to download a PDF summary for offline access on a portable device. This research highlights the potential of LoRA-adapted Vision-Language Models in developing highly accurate, efficient, and user-friendly agricultural diagnostic tools. Tomato leave disease BLIP-2 LoRA VLM LLM Full Text Additional Declarations No competing interests reported. Cite Share Download PDF Status: Published Journal Publication published 28 Oct, 2025 Read the published version in Plant Methods → Version 1 posted Editorial decision: Revision requested 11 Aug, 2025 Reviews received at journal 06 Aug, 2025 Reviews received at journal 25 Jul, 2025 Reviewers agreed at journal 22 Jul, 2025 Reviewers agreed at journal 21 Jul, 2025 Reviewers agreed at journal 21 Jul, 2025 Reviews received at journal 20 Jul, 2025 Reviewers agreed at journal 17 Jul, 2025 Reviewers agreed at journal 17 Jul, 2025 Reviewers invited by journal 17 Jul, 2025 Editor assigned by journal 10 Jul, 2025 Submission checks completed at journal 10 Jul, 2025 First submitted to journal 08 Jul, 2025 You are reading this latest preprint version Research Square lets you share your work early, gain feedback from the community, and start making changes to your manuscript prior to peer review in a journal. As a division of Research Square Company, we’re committed to making research communication faster, fairer, and more useful. We do this by developing innovative software and high quality services for the global research community. Our growing team is made up of researchers and industry professionals working together to solve the most critical problems facing scientific publishing. Also discoverable on Platform About Our Team In Review Editorial Policies Advisory Board Help Center Resources Author Services Accessibility API Access RSS feed Manage Cookie Preferences © Research Square 2026 | ISSN 2693-5015 (online) Privacy Policy Terms of Service Do Not Sell My Personal Information {"props":{"pageProps":{"initialData":{"identity":"rs-7077052","acceptedTermsAndConditions":true,"allowDirectSubmit":false,"archivedVersions":[],"articleType":"Research Article","associatedPublications":[],"authors":[{"id":488153052,"identity":"8af3ef60-3a6e-4047-bdc6-cf674f74fe11","order_by":0,"name":"Manveen Kaur","email":"","orcid":"","institution":"University of Windsor","correspondingAuthor":false,"prefix":"","firstName":"Manveen","middleName":"","lastName":"Kaur","suffix":""},{"id":488153053,"identity":"d85cb46d-4d6d-45b9-8517-1ef1671b4819","order_by":1,"name":"Rajmeet Singh","email":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAAyAQMAAABI0h/eAAAABlBMVEX///8AAABVwtN+AAAACXBIWXMAAA7EAAAOxAGVKw4bAAABBUlEQVRIiWNgGAWjYHCCBDBpIAEieSTkJIjUYgDXYkyMFgYkLQwMiTMIqTVvP/B0w4eKPwzm0s3HpCtkLNJnzkhg/PCDwU4OlxaZMwlpN2ecMWCwnHMsTfIMj0TubIkEZskehmRjXFokGBLSbvO2AR12I8dMsgGoZZ5EAoM0AwNzYgMuLfwP0m7//YfQki4HtOU3A0N9PU4tEkBbGBsQWhKkJRLYgLYcTsDpMIkHaTd7jhnzWM5IS7YEajGc2fOwzbLH4LghboflpN34USMnZy6RfPBmY0+dvMTx5MM3flRUy+OyBRh7YBfwgNmMPWCyARRReAD7ASTOD3wqR8EoGAWjYKQCAEY/TgAoBAuIAAAAAElFTkSuQmCC","orcid":"","institution":"Khalifa University of Science and Technology","correspondingAuthor":true,"prefix":"","firstName":"Rajmeet","middleName":"","lastName":"Singh","suffix":""},{"id":488153054,"identity":"1a4cc8e9-e659-461d-907e-59d80e5b0db2","order_by":2,"name":"Shahpour Alirezaee","email":"","orcid":"","institution":"University of Windsor","correspondingAuthor":false,"prefix":"","firstName":"Shahpour","middleName":"","lastName":"Alirezaee","suffix":""},{"id":488153055,"identity":"fd8ccfd6-3fa8-4044-bc47-33286984beb6","order_by":3,"name":"Irfan Hussain","email":"","orcid":"","institution":"Khalifa University of Science and Technology","correspondingAuthor":false,"prefix":"","firstName":"Irfan","middleName":"","lastName":"Hussain","suffix":""}],"badges":[],"createdAt":"2025-07-08 17:08:26","currentVersionCode":1,"declarations":"","doi":"10.21203/rs.3.rs-7077052/v1","doiUrl":"https://doi.org/10.21203/rs.3.rs-7077052/v1","draftVersion":[],"editorialEvents":[{"content":"https://doi.org/10.1186/s13007-025-01456-8","type":"published","date":"2025-10-28T15:58:51+00:00"}],"editorialNote":"","failedWorkflow":false,"files":[{"id":95040425,"identity":"4964043f-8f57-4976-a47e-44d74fdab0f9","added_by":"auto","created_at":"2025-11-03 16:08:33","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"manuscript-pdf","size":2245333,"visible":true,"origin":"","legend":"","description":"","filename":"ManveenPaper.pdf","url":"https://assets-eu.researchsquare.com/files/rs-7077052/v1_covered_3503db4e-81e3-4590-a26b-03e9f069de7a.pdf"}],"financialInterests":"No competing interests reported.","formattedTitle":"\u003cp\u003eVisual-Language Transformer-Based Tomato Leaf Disease Detection for Portable Greenhouse Monitoring Device\u003c/p\u003e","fulltext":[],"fulltextSource":"","fullText":"","funders":[],"hasAdminPriorityOnWorkflow":false,"hasManuscriptDocX":false,"hasOptedInToPreprint":true,"hasPassedJournalQc":"","hasAnyPriority":false,"hideJournal":false,"highlight":"","institution":"","isAcceptedByJournal":true,"isAuthorSuppliedPdf":true,"isDeskRejected":"","isHiddenFromSearch":false,"isInQc":false,"isInWorkflow":false,"isPdf":true,"isPdfUpToDate":true,"isWithdrawnOrRetracted":false,"journal":{"display":true,"email":"
[email protected]","identity":"plant-methods","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":false,"externalIdentity":"plme","sideBox":"Learn more about [Plant Methods](http://plantmethods.biomedcentral.com/)","snPcode":"13007","submissionUrl":"https://submission.nature.com/new-submission/13007/3","title":"Plant Methods","twitterHandle":"@PlantMethods","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"em","reportingPortfolio":"BMC/SO AJ","inReviewEnabled":true,"inReviewRevisionsEnabled":true},"keywords":"Tomato leave, disease, BLIP-2, LoRA, VLM, LLM","lastPublishedDoi":"10.21203/rs.3.rs-7077052/v1","lastPublishedDoiUrl":"https://doi.org/10.21203/rs.3.rs-7077052/v1","license":{"name":"CC BY 4.0","url":"https://creativecommons.org/licenses/by/4.0/"},"manuscriptAbstract":"\u003cp\u003eTomato leaf diseases pose a significant threat to global food security, necessitating accurate and efficient detection methods. This paper introduces the Tomato Leaf Disease Visual Language Model (TLDVLM), a novel approach based on the BLIP-2 architecture enhanced with Low-Rank Adaptation (LoRA), for precise classification of 10 distinct tomato leaf diseases. Our methodology integrates a sophisticated image preprocessing pipeline, utilizing GroundingDINO for robust leaf detection and SAM-2 for pixel-level segmentation, ensuring that the model focuses solely on relevant plant tissue. The TLDVLM leverages the powerful multimodal understanding of BLIP-2, with LoRA applied to its Q-Former module, enabling parameter-efficient fine-tuning without compromising performance. Comparative experiments demonstrate that the TLDVLM significantly outperforms baseline models, including CLIP-LoRA and ConvNeXT-tiny, achieving an accuracy of 97.27%, a precision of 0.9587, a recall of 0.9789, and an F1-score of 0.9681. Beyond classification, the finetuned TLDVLM checkpoints are integrated into a practical application for new image inference. This application displays the raw and segmented images, the predicted disease, and offers functionalities to fetch comprehensive information on disease causes and remedies using external APIs (e.g., OpenAI), with an option to download a PDF summary for offline access on a portable device. This research highlights the potential of LoRA-adapted Vision-Language Models in developing highly accurate, efficient, and user-friendly agricultural diagnostic tools.\u003c/p\u003e","manuscriptTitle":"Visual-Language Transformer-Based Tomato Leaf Disease Detection for Portable Greenhouse Monitoring Device","msid":"","msnumber":"","nonDraftVersions":[{"code":1,"date":"2025-07-21 12:47:09","doi":"10.21203/rs.3.rs-7077052/v1","editorialEvents":[{"type":"communityComments","content":0},{"type":"decision","content":"Revision requested","date":"2025-08-12T03:22:53+00:00","index":"","fulltext":""},{"type":"editorInvitedReview","content":"","date":"2025-08-06T08:02:20+00:00","index":"hide","fulltext":""},{"type":"editorInvitedReview","content":"","date":"2025-07-25T07:02:48+00:00","index":"hide","fulltext":""},{"type":"reviewerAgreed","content":"283198603878754627467846980341840255406","date":"2025-07-22T06:40:07+00:00","index":"hide","fulltext":""},{"type":"reviewerAgreed","content":"90438900192739731463733909006464815063","date":"2025-07-22T02:08:13+00:00","index":"hide","fulltext":""},{"type":"reviewerAgreed","content":"83398619554812528427989051879108225446","date":"2025-07-21T09:00:45+00:00","index":"hide","fulltext":""},{"type":"editorInvitedReview","content":"","date":"2025-07-20T21:03:03+00:00","index":"hide","fulltext":""},{"type":"reviewerAgreed","content":"264890164111031296372079822974467149665","date":"2025-07-17T09:06:30+00:00","index":"hide","fulltext":""},{"type":"reviewerAgreed","content":"207587930921330471755886695687228547388","date":"2025-07-17T08:17:50+00:00","index":"hide","fulltext":""},{"type":"reviewersInvited","content":"","date":"2025-07-17T05:40:49+00:00","index":"","fulltext":""},{"type":"editorAssigned","content":"","date":"2025-07-10T21:40:50+00:00","index":"","fulltext":""},{"type":"checksComplete","content":"","date":"2025-07-10T14:57:41+00:00","index":"","fulltext":""},{"type":"submitted","content":"Plant Methods","date":"2025-07-08T17:00:53+00:00","index":"","fulltext":""}],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"plant-methods","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":false,"externalIdentity":"plme","sideBox":"Learn more about [Plant Methods](http://plantmethods.biomedcentral.com/)","snPcode":"13007","submissionUrl":"https://submission.nature.com/new-submission/13007/3","title":"Plant Methods","twitterHandle":"@PlantMethods","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"em","reportingPortfolio":"BMC/SO AJ","inReviewEnabled":true,"inReviewRevisionsEnabled":true}}],"origin":"","ownerIdentity":"7b197c29-9db2-4bf2-8f28-07d338c04645","owner":[],"postedDate":"July 21st, 2025","published":true,"recentEditorialEvents":[],"rejectedJournal":[],"revision":"","amendment":"","status":"published-in-journal","subjectAreas":[],"tags":[],"updatedAt":"2025-11-03T16:04:04+00:00","versionOfRecord":{"articleIdentity":"rs-7077052","link":"https://doi.org/10.1186/s13007-025-01456-8","journal":{"identity":"plant-methods","isVorOnly":false,"title":"Plant Methods"},"publishedOn":"2025-10-28 15:58:51","publishedOnDateReadable":"October 28th, 2025"},"versionCreatedAt":"2025-07-21 12:47:09","video":"","vorDoi":"10.1186/s13007-025-01456-8","vorDoiUrl":"https://doi.org/10.1186/s13007-025-01456-8","workflowStages":[]},"version":"v1","identity":"rs-7077052","journalConfig":"researchsquare"},"__N_SSP":true},"page":"/article/[identity]/[[...version]]","query":{"redirect":"/article/rs-7077052","identity":"rs-7077052","version":["v1"]},"buildId":"8U1c8b4HqxoKbykW_rLl7","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[84888],"gssp":true,"scriptLoader":[]}
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.