Decoupled Multi-Dimensional Reinforcement Learning with Temporal Communication for Vision-Based UAV Control in Partially Observable Environments | Research Square window.SnipcartSettings = { analytics: { enabled: false } }; (function() { var accessVector = localStorage.getItem('access_vector') || ''; window.dataLayer = window.dataLayer || []; if (accessVector) { window.dataLayer.push({ user: { profile: { profileInfo: { snid: accessVector } } } }); } })(); (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-K279D39R'); Browse Preprints In Review Journals COVID-19 Preprints AJE Video Bytes Research Tools Research Promotion AJE Professional Editing AJE Rubriq About Preprint Platform In Review Editorial Policies Our Team Advisory Board Help Center Sign In Submit a Preprint Cite Share Download PDF Research Article Decoupled Multi-Dimensional Reinforcement Learning with Temporal Communication for Vision-Based UAV Control in Partially Observable Environments Dapeng Ji, Shike Yang, Weidong Liu, Li Jingchen This is a preprint; it has not been peer reviewed by a journal. https://doi.org/ 10.21203/rs.3.rs-7322417/v1 This work is licensed under a CC BY 4.0 License Status: Under Review Version 1 posted 4 You are reading this latest preprint version Abstract This paper proposes a novel reinforcement learning approach for controlling vision-based unmanned aerial vehicles (UAVs) in partially observable and dynamic environments. Traditional reinforcement learning methods typically employ a single, monolithic policy to simultaneously manage multiple control dimensions—throttle, roll rate, pitch rate, and yaw rate—leading to complex feature representations and suboptimal performance under partial observability. To address this limitation, we introduce a decoupled policy framework that decomposes the UAV's action space into separate control dimensions, effectively transforming the original partially observable Markov decision process (POMDP) into a specialized multi-agent setting. Each action dimension is managed by an independent policy sharing a common feature extraction backbone and communicating through a customized Bidirectional Long Short-Term Memory (Bi-LSTM) network. This architecture allows specialized representation learning while preserving necessary coupling between control dimensions. Experiments conducted in the photorealistic Flightmare simulator on three challenging tasks—obstacle avoidance, target tracking, and object search—demonstrate significant performance improvements compared to several state-of-the-art baselines. Our approach notably reduces collision rates, enhances navigation efficiency, and achieves higher task success rates, thereby validating the efficacy of policy decoupling and temporal communication mechanisms in vision-based UAV reinforcement learning. Unmanned aerial vehicles Reinforcement learning Partially observable environment Full Text Cite Share Download PDF Status: Under Review Version 1 posted Reviewers agreed at journal 10 Feb, 2026 Reviewers invited by journal 10 Feb, 2026 Editor invited by journal 24 Oct, 2025 First submitted to journal 08 Aug, 2025 You are reading this latest preprint version Research Square lets you share your work early, gain feedback from the community, and start making changes to your manuscript prior to peer review in a journal. As a division of Research Square Company, we’re committed to making research communication faster, fairer, and more useful. We do this by developing innovative software and high quality services for the global research community. Our growing team is made up of researchers and industry professionals working together to solve the most critical problems facing scientific publishing. Also discoverable on Platform About Our Team In Review Editorial Policies Advisory Board Help Center Resources Author Services Accessibility API Access RSS feed Manage Cookie Preferences © Research Square 2026 | ISSN 2693-5015 (online) Privacy Policy Terms of Service Do Not Sell My Personal Information {"props":{"pageProps":{"initialData":{"identity":"rs-7322417","acceptedTermsAndConditions":true,"allowDirectSubmit":false,"archivedVersions":[],"articleType":"Research Article","associatedPublications":[],"authors":[{"id":588899863,"identity":"c6a9ab13-581f-4511-a166-ceaa0856ca07","order_by":0,"name":"Dapeng Ji","email":"","orcid":"","institution":"Northwestern Polytechnical University","correspondingAuthor":false,"prefix":"","firstName":"Dapeng","middleName":"","lastName":"Ji","suffix":""},{"id":588899864,"identity":"9a88a2cc-902b-4329-891e-68773205ddc9","order_by":1,"name":"Shike Yang","email":"","orcid":"","institution":"Northwestern Polytechnical University","correspondingAuthor":false,"prefix":"","firstName":"Shike","middleName":"","lastName":"Yang","suffix":""},{"id":588899865,"identity":"02158799-d434-4001-b789-72ce7ff478a5","order_by":2,"name":"Weidong Liu","email":"","orcid":"","institution":"Northwestern Polytechnical University","correspondingAuthor":false,"prefix":"","firstName":"Weidong","middleName":"","lastName":"Liu","suffix":""},{"id":588899866,"identity":"724150bf-cb12-4c89-88b3-977ba9cf4603","order_by":3,"name":"Li Jingchen","email":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAAyAQMAAABI0h/eAAAABlBMVEX///8AAABVwtN+AAAACXBIWXMAAA7EAAAOxAGVKw4bAAABF0lEQVRIie2QsUrEQBCG5whMmtF6IHq+wsJBjMSHSRC8Jh4BmytEA4FUiq025hW0ufpEWJukv5DCQkglYqNYBHG94jxkAykt9mOb3Z2P/98FMBj+KRYAD9FO5wDCWzvsApe33miTZKAU7q1MwxuOhNr2UHbytHHilgcZRO+3nzHDLh9JhqkfJnY51ylCoutcZ2whlLPqXBXbu5ocMhTjMKFJoFUQXGcjYcTBxWxBShF15KrQhzBhEtpimf3hUMuEFjVVu1K+uhWQ5DqErGII69+UpFsRMjr21VuEMtx6S6ixp9cDL5DjUUaRvlj6eFfH7elZnj831Uu7PxRFeb94O/G3L+1CX+wvtFw/X4W95leWwWAwGNb4Bkh3VDwD6q60AAAAAElFTkSuQmCC","orcid":"https://orcid.org/0000-0003-0905-0816","institution":"Beijing Academy of Agriculture and Forestry Science","correspondingAuthor":true,"prefix":"","firstName":"Li","middleName":"","lastName":"Jingchen","suffix":""}],"badges":[],"createdAt":"2025-08-08 01:19:29","currentVersionCode":1,"declarations":"","doi":"10.21203/rs.3.rs-7322417/v1","doiUrl":"https://doi.org/10.21203/rs.3.rs-7322417/v1","draftVersion":[],"editorialEvents":[],"editorialNote":"","failedWorkflow":false,"files":[{"id":102622315,"identity":"f16efcc5-c01b-4403-a26d-4db7cc7ee603","added_by":"auto","created_at":"2026-02-13 16:55:52","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"manuscript-pdf","size":378534,"visible":true,"origin":"","legend":"","description":"","filename":"JDP5.pdf","url":"https://assets-eu.researchsquare.com/files/rs-7322417/v1_covered_b327c936-4bea-4feb-9329-1ec9cf73bc75.pdf"}],"financialInterests":"","formattedTitle":"Decoupled Multi-Dimensional Reinforcement Learning with Temporal Communication for Vision-Based UAV Control in Partially Observable Environments","fulltext":[],"fulltextSource":"","fullText":"","funders":[],"hasAdminPriorityOnWorkflow":false,"hasManuscriptDocX":false,"hasOptedInToPreprint":true,"hasPassedJournalQc":"","hasAnyPriority":false,"hideJournal":false,"highlight":"","institution":"","isAcceptedByJournal":false,"isAuthorSuppliedPdf":true,"isDeskRejected":"","isHiddenFromSearch":false,"isInQc":false,"isInWorkflow":true,"isPdf":true,"isPdfUpToDate":true,"isWithdrawnOrRetracted":false,"journal":{"display":true,"email":"
[email protected]","identity":"soft-computing","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":false,"externalIdentity":"soco","sideBox":"Learn more about [Soft Computing](https://www.springer.com/journal/500)","snPcode":"500","submissionUrl":"https://submission.nature.com/new-submission/500/3","title":"Soft Computing","twitterHandle":"","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"em","reportingPortfolio":"Springer Hybrid","inReviewEnabled":true,"inReviewRevisionsEnabled":false},"keywords":"Unmanned aerial vehicles, Reinforcement learning, Partially observable environment","lastPublishedDoi":"10.21203/rs.3.rs-7322417/v1","lastPublishedDoiUrl":"https://doi.org/10.21203/rs.3.rs-7322417/v1","license":{"name":"CC BY 4.0","url":"https://creativecommons.org/licenses/by/4.0/"},"manuscriptAbstract":"This paper proposes a novel reinforcement learning approach for controlling vision-based unmanned aerial vehicles (UAVs) in partially observable and dynamic environments. Traditional reinforcement learning methods typically employ a single, monolithic policy to simultaneously manage multiple control dimensions—throttle, roll rate, pitch rate, and yaw rate—leading to complex feature representations and suboptimal performance under partial observability. To address this limitation, we introduce a decoupled policy framework that decomposes the UAV's action space into separate control dimensions, effectively transforming the original partially observable Markov decision process (POMDP) into a specialized multi-agent setting. Each action dimension is managed by an independent policy sharing a common feature extraction backbone and communicating through a customized Bidirectional Long Short-Term Memory (Bi-LSTM) network. This architecture allows specialized representation learning while preserving necessary coupling between control dimensions. Experiments conducted in the photorealistic Flightmare simulator on three challenging tasks—obstacle avoidance, target tracking, and object search—demonstrate significant performance improvements compared to several state-of-the-art baselines. Our approach notably reduces collision rates, enhances navigation efficiency, and achieves higher task success rates, thereby validating the efficacy of policy decoupling and temporal communication mechanisms in vision-based UAV reinforcement learning.","manuscriptTitle":"Decoupled Multi-Dimensional Reinforcement Learning with Temporal Communication for Vision-Based UAV Control in Partially Observable Environments","msid":"","msnumber":"","nonDraftVersions":[{"code":1,"date":"2026-02-13 16:53:26","doi":"10.21203/rs.3.rs-7322417/v1","editorialEvents":[{"type":"communityComments","content":0},{"type":"reviewerAgreed","content":"","date":"2026-02-10T20:42:05+00:00","index":0,"fulltext":""},{"type":"reviewersInvited","content":"","date":"2026-02-10T10:52:48+00:00","index":"","fulltext":""},{"type":"editorInvited","content":"Soft Computing","date":"2025-10-24T06:41:13+00:00","index":"","fulltext":""},{"type":"submitted","content":"Soft Computing","date":"2025-08-08T05:14:10+00:00","index":"","fulltext":""}],"status":"published","journal":{"display":true,"email":"
[email protected]","identity":"soft-computing","isNatureJournal":false,"hasQc":true,"allowDirectSubmit":false,"externalIdentity":"soco","sideBox":"Learn more about [Soft Computing](https://www.springer.com/journal/500)","snPcode":"500","submissionUrl":"https://submission.nature.com/new-submission/500/3","title":"Soft Computing","twitterHandle":"","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"em","reportingPortfolio":"Springer Hybrid","inReviewEnabled":true,"inReviewRevisionsEnabled":false}}],"origin":"","ownerIdentity":"7214ccb3-56da-4b80-b783-8c9947b34aa6","owner":[],"postedDate":"February 13th, 2026","published":true,"recentEditorialEvents":[],"rejectedJournal":[],"revision":"","amendment":"","status":"under-review","subjectAreas":[],"tags":[],"updatedAt":"2026-05-18T08:16:47+00:00","versionOfRecord":[],"versionCreatedAt":"2026-02-13 16:53:26","video":"","vorDoi":"","vorDoiUrl":"","workflowStages":[]},"version":"v1","identity":"rs-7322417","journalConfig":"researchsquare"},"__N_SSP":true},"page":"/article/[identity]/[[...version]]","query":{"redirect":"/article/rs-7322417","identity":"rs-7322417","version":["v1"]},"buildId":"XKTyCvWXoU3ODBz1xrDgd","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[84888],"gssp":true,"scriptLoader":[]}
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.