Large Language Model-based Topic-Level Sentiment Analysis for E-Grocery Consumer Reviews | Research Square window.SnipcartSettings = { analytics: { enabled: false } }; (function() { var accessVector = localStorage.getItem('access_vector') || ''; window.dataLayer = window.dataLayer || []; if (accessVector) { window.dataLayer.push({ user: { profile: { profileInfo: { snid: accessVector } } } }); } })(); (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-K279D39R'); Browse Preprints In Review Journals COVID-19 Preprints AJE Video Bytes Research Tools Research Promotion AJE Professional Editing AJE Rubriq About Preprint Platform In Review Editorial Policies Our Team Advisory Board Help Center Sign In Submit a Preprint Cite Share Download PDF Research Article Large Language Model-based Topic-Level Sentiment Analysis for E-Grocery Consumer Reviews Julizar Isya Pandu Wangsa, Safira Raissa Rahmi, Yudhistira Jinawi Agung, and 5 more This is a preprint; it has not been peer reviewed by a journal. https://doi.org/ 10.21203/rs.3.rs-3637575/v3 This work is licensed under a CC BY 4.0 License Status: Published Journal Publication published 22 Jul, 2025 Read the published version in Big Data and Cognitive Computing → Version 3 posted You are reading this latest preprint version Show more versions Abstract Customer sentiment analysis plays a pivotal role in the digital economy by offering comprehensive insights that inform strategic business decisions, optimize digital marketing initiatives, and improve overall customer satisfaction. We propose a large language model-based topic-level sentiment analysis framework: a BERT model is used to obtain vector representations of documents, and then clustering algorithms are automatically applied to group documents into topics. Once the topics are formed, a GPT model is used to perform sentiment classification on the content related to each topic. The simulations show the effectiveness of this approach, where choosing the proper clustering technique can produce more semantically coherent topics. From a practical perspective on the Indonesian e-grocery customer reviews, the framework identifies unique customer concerns that critical for e-grocery customer satisfaction. Furthermore, topic-level sentiment polarization uncovers that 31.7% of all negative sentiment concentrates around the shopping experience topic despite an overall positive sentiment trend. Topic-Level Sentiment Analysis large language models Clustering E-grocery Consumer Analytics Full Text Additional Declarations The authors declare no competing interests. Cite Share Download PDF Status: Published Journal Publication published 22 Jul, 2025 Read the published version in Big Data and Cognitive Computing → Version 3 posted You are reading this latest preprint version Show more versions Research Square lets you share your work early, gain feedback from the community, and start making changes to your manuscript prior to peer review in a journal. As a division of Research Square Company, we’re committed to making research communication faster, fairer, and more useful. We do this by developing innovative software and high quality services for the global research community. Our growing team is made up of researchers and industry professionals working together to solve the most critical problems facing scientific publishing. Also discoverable on Platform About Our Team In Review Editorial Policies Advisory Board Help Center Resources Author Services Accessibility API Access RSS feed Manage Cookie Preferences © Research Square 2026 | ISSN 2693-5015 (online) Privacy Policy Terms of Service Do Not Sell My Personal Information {"props":{"pageProps":{"initialData":{"identity":"rs-3637575","acceptedTermsAndConditions":true,"allowDirectSubmit":true,"archivedVersions":[],"articleType":"Research Article","associatedPublications":[],"authors":[{"id":436565354,"identity":"03d85293-76b4-4e28-bb48-aea0ee44c04d","order_by":0,"name":"Julizar Isya Pandu Wangsa","email":"","orcid":"","institution":"Universitas Indonesia","correspondingAuthor":false,"prefix":"","firstName":"Julizar","middleName":"Isya Pandu","lastName":"Wangsa","suffix":""},{"id":436565355,"identity":"222f735b-c33f-41f9-b6ab-75c0c145c9ee","order_by":1,"name":"Safira Raissa Rahmi","email":"","orcid":"","institution":"Universitas Indonesia","correspondingAuthor":false,"prefix":"","firstName":"Safira","middleName":"Raissa","lastName":"Rahmi","suffix":""},{"id":436565356,"identity":"6d60e947-075f-4544-b0cd-f4f49c007cb6","order_by":2,"name":"Yudhistira Jinawi Agung","email":"","orcid":"","institution":"Universitas Indonesia","correspondingAuthor":false,"prefix":"","firstName":"Yudhistira","middleName":"Jinawi","lastName":"Agung","suffix":""},{"id":436565353,"identity":"e52644ff-f4e5-4dbe-a0c2-826223c4d0bc","order_by":3,"name":"Hendri Murfi","email":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAAyAQMAAABI0h/eAAAABlBMVEX///8AAABVwtN+AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAAp0lEQVRIiWNgGAWjYBACAyCWYDCwYWCDcJmJ1pJGshaGwzA+EVrM2XsP3q4oOJ/Yx8D88ANDgTVhLZY955ItzxjcTmxjYDMGujCdCIfdyDGTbABrYTADcg8ToeX+G5CWc0At7N+I1HKDB6TlAFALD7G2nMkxtmwwSDZuY+Yplkggyi/HzxjebPhjJzu/vX3jhw9/iAgxBADFSAIpGkbBKBgFo2AU4AYAvhkxDR4RhKUAAAAASUVORK5CYII=","orcid":"","institution":"Universitas Indonesia","correspondingAuthor":true,"prefix":"","firstName":"Hendri","middleName":"","lastName":"Murfi","suffix":""},{"id":436565357,"identity":"f2da2d26-b5b4-4679-997c-972201869f1f","order_by":4,"name":"Nora Hariadi","email":"","orcid":"","institution":"Universitas Indonesia","correspondingAuthor":false,"prefix":"","firstName":"Nora","middleName":"","lastName":"Hariadi","suffix":""},{"id":436565358,"identity":"c2046816-34f0-4de2-b153-777460d337f6","order_by":5,"name":"Siti Nurrohmah","email":"","orcid":"","institution":"Universitas Indonesia","correspondingAuthor":false,"prefix":"","firstName":"Siti","middleName":"","lastName":"Nurrohmah","suffix":""},{"id":436565359,"identity":"e72ff1d0-fc58-467d-8a4b-7f05af9cf153","order_by":6,"name":"Yudi Satria","email":"","orcid":"","institution":"Universitas Indonesia","correspondingAuthor":false,"prefix":"","firstName":"Yudi","middleName":"","lastName":"Satria","suffix":""},{"id":436565360,"identity":"23479e79-4e8a-4bea-8b00-ad8c789ab6a0","order_by":7,"name":"Choiru Za’in","email":"","orcid":"","institution":"La Trobe University","correspondingAuthor":false,"prefix":"","firstName":"Choiru","middleName":"","lastName":"Za’in","suffix":""}],"badges":[],"createdAt":"2023-11-20 05:14:11","currentVersionCode":3,"declarations":{"humanSubjects":false,"vertebrateSubjects":false,"conflictsOfInterestStatement":false,"humanSubjectEthicalGuidelines":false,"humanSubjectConsent":false,"humanSubjectClinicalTrial":false,"humanSubjectCaseReport":false,"vertebrateSubjectEthicalGuidelines":false},"doi":"10.21203/rs.3.rs-3637575/v3","doiUrl":"https://doi.org/10.21203/rs.3.rs-3637575/v3","draftVersion":[],"editorialEvents":[{"content":"https://doi.org/10.3390/bdcc9080194","type":"published","date":"2025-07-23T00:00:00+00:00"}],"editorialNote":"","failedWorkflow":false,"files":[{"id":87777821,"identity":"82b0bc53-5c96-4f03-823a-84b7701f7edf","added_by":"auto","created_at":"2025-07-29 00:55:41","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"manuscript-pdf","size":681788,"visible":true,"origin":"","legend":"","description":"","filename":"manuskript20250523Preprint.pdf","url":"https://assets-eu.researchsquare.com/files/rs-3637575/v3_covered_bbd4cff8-086a-43be-aacd-be9a4256cc33.pdf"}],"financialInterests":"The authors declare no competing interests.","formattedTitle":"Large Language Model-based Topic-Level Sentiment Analysis for E-Grocery Consumer Reviews","fulltext":[],"fulltextSource":"","fullText":"","funders":[],"hasAdminPriorityOnWorkflow":false,"hasManuscriptDocX":false,"hasOptedInToPreprint":true,"hasPassedJournalQc":"","hasAnyPriority":false,"hideJournal":false,"highlight":"","institution":"","isAcceptedByJournal":true,"isAuthorSuppliedPdf":true,"isDeskRejected":"","isHiddenFromSearch":false,"isInQc":false,"isInWorkflow":false,"isPdf":true,"isPdfUpToDate":true,"isWithdrawnOrRetracted":false,"journal":{"display":true,"email":"
[email protected]","identity":"researchsquare","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":true,"externalIdentity":"","sideBox":"","snPcode":"","submissionUrl":"/submission","title":"Research Square","twitterHandle":"researchsquare","acdcEnabled":true,"dfaEnabled":false,"editorialSystem":"","reportingPortfolio":"","inReviewEnabled":false,"inReviewRevisionsEnabled":true},"keywords":"Topic-Level Sentiment Analysis, large language models, Clustering, E-grocery, Consumer Analytics","lastPublishedDoi":"10.21203/rs.3.rs-3637575/v3","lastPublishedDoiUrl":"https://doi.org/10.21203/rs.3.rs-3637575/v3","license":{"name":"CC BY 4.0","url":"https://creativecommons.org/licenses/by/4.0/"},"manuscriptAbstract":"\u003cp\u003eCustomer sentiment analysis plays a pivotal role in the digital economy by offering comprehensive insights that inform strategic business decisions, optimize digital marketing initiatives, and improve overall customer satisfaction. We propose a large language model-based topic-level sentiment analysis framework: a BERT model is used to obtain vector representations of documents, and then clustering algorithms are automatically applied to group documents into topics. Once the topics are formed, a GPT model is used to perform sentiment classification on the content related to each topic. The simulations show the effectiveness of this approach, where choosing the proper clustering technique can produce more semantically coherent topics. From a practical perspective on the Indonesian e-grocery customer reviews, the framework identifies unique customer concerns that critical for e-grocery customer satisfaction. Furthermore, topic-level sentiment polarization uncovers that 31.7% of all negative sentiment concentrates around the shopping experience topic despite an overall positive sentiment trend.\u003c/p\u003e","manuscriptTitle":"Large Language Model-based Topic-Level Sentiment Analysis for E-Grocery Consumer Reviews","msid":"","msnumber":"","nonDraftVersions":[{"code":3,"date":"2025-06-11 20:01:18","doi":"10.21203/rs.3.rs-3637575/v3","editorialEvents":[{"type":"communityComments","content":0}],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"researchsquare","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":true,"externalIdentity":"","sideBox":"","snPcode":"","submissionUrl":"/submission","title":"Research Square","twitterHandle":"researchsquare","acdcEnabled":true,"dfaEnabled":false,"editorialSystem":"","reportingPortfolio":"","inReviewEnabled":false,"inReviewRevisionsEnabled":true}},{"code":2,"date":"2025-04-01 00:00:47","doi":"10.21203/rs.3.rs-3637575/v2","editorialEvents":[{"type":"communityComments","content":0}],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"researchsquare","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":true,"externalIdentity":"","sideBox":"","snPcode":"","submissionUrl":"/submission","title":"Research Square","twitterHandle":"researchsquare","acdcEnabled":true,"dfaEnabled":false,"editorialSystem":"","reportingPortfolio":"","inReviewEnabled":false,"inReviewRevisionsEnabled":true}},{"code":1,"date":"2024-01-10 10:21:32","doi":"10.21203/rs.3.rs-3637575/v1","editorialEvents":[{"type":"communityComments","content":0}],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"researchsquare","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":true,"externalIdentity":"","sideBox":"","snPcode":"","submissionUrl":"/submission","title":"Research Square","twitterHandle":"researchsquare","acdcEnabled":true,"dfaEnabled":false,"editorialSystem":"","reportingPortfolio":"","inReviewEnabled":false,"inReviewRevisionsEnabled":true}}],"origin":"","ownerIdentity":"d840291c-e5b9-40b5-bec7-14bdac3a6591","owner":[],"postedDate":"June 11th, 2025","published":true,"recentEditorialEvents":[],"rejectedJournal":[],"revision":"","amendment":"","status":"published-in-journal","subjectAreas":[],"tags":[],"updatedAt":"2025-07-29T00:55:35+00:00","versionOfRecord":{"articleIdentity":"rs-3637575","link":"https://doi.org/10.3390/bdcc9080194","journal":{"identity":"big-data-and-cognitive-computing","isVorOnly":true,"title":"Big Data and Cognitive Computing"},"publishedOn":"2025-07-23 00:00:00","publishedOnDateReadable":"July 23rd, 2025"},"versionCreatedAt":"2025-06-11 20:01:18","video":"","vorDoi":"10.3390/bdcc9080194","vorDoiUrl":"https://doi.org/10.3390/bdcc9080194","workflowStages":[]},"version":"v3","identity":"rs-3637575","journalConfig":"researchsquare"},"__N_SSP":true},"page":"/article/[identity]/[[...version]]","query":{"redirect":"/article/rs-3637575","identity":"rs-3637575","version":["v3"]},"buildId":"qtupq5eGEP_6zYnWcrvyt","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[84888],"gssp":true,"scriptLoader":[]}
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.