Deep learning four decades of human migration | Research Square window.SnipcartSettings = { analytics: { enabled: false } }; (function() { var accessVector = localStorage.getItem('access_vector') || ''; window.dataLayer = window.dataLayer || []; if (accessVector) { window.dataLayer.push({ user: { profile: { profileInfo: { snid: accessVector } } } }); } })(); (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-K279D39R'); Browse Preprints In Review Journals COVID-19 Preprints AJE Video Bytes Research Tools Research Promotion AJE Professional Editing AJE Rubriq About Preprint Platform In Review Editorial Policies Our Team Advisory Board Help Center Sign In Submit a Preprint Cite Share Download PDF Social Sciences - Article Deep learning four decades of human migration Thomas Gaskin, Guy Abel This is a preprint; it has not been peer reviewed by a journal. https://doi.org/ 10.21203/rs.3.rs-7065170/v1 This work is licensed under a CC BY 4.0 License Status: Under Review Version 1 posted You are reading this latest preprint version Abstract We present a novel and detailed dataset on origin-destination annual migration flows and stocks between 230 countries and regions, spanning the period from 1990 to the present. Our flow estimates are further disaggregated by country of birth, providing a comprehensive picture of migration over the last 35 years. The estimates are obtained by training a deep recurrent neural network to learn flow patterns from 18 covariates for all countries, including geographic, economic, cultural, societal, and political information. The recurrent architecture of the neural network means that the entire past can influence current migration patterns, allowing us to learn long-range temporal correlations. By training an ensemble of neural networks and additionally pushing uncertainty on the covariates through the trained network, we obtain confidence bounds for all our estimates, allowing researchers to pinpoint the geographic regions most in need of additional data collection. We validate our approach on various test sets of unseen data, demonstrating that it significantly outperforms traditional methods estimating five-year flows while delivering a significant increase in temporal resolution. The model is fully open source: all training data, neural network weights, and training code are made public alongside the migration estimates, providing a valuable resource for future studies of human migration. Scientific community and society/Social sciences/Sociology Physical sciences/Mathematics and computing/Computational science International Migration Deep learning Human Migration Neural networks Full Text Additional Declarations There is NO Competing Interest. Supplementary Files SI.pdf Supplementary Information Cite Share Download PDF Status: Under Review Version 1 posted You are reading this latest preprint version Research Square lets you share your work early, gain feedback from the community, and start making changes to your manuscript prior to peer review in a journal. As a division of Research Square Company, we’re committed to making research communication faster, fairer, and more useful. We do this by developing innovative software and high quality services for the global research community. Our growing team is made up of researchers and industry professionals working together to solve the most critical problems facing scientific publishing. Also discoverable on Platform About Our Team In Review Editorial Policies Advisory Board Help Center Resources Author Services Accessibility API Access RSS feed Manage Cookie Preferences © Research Square 2026 | ISSN 2693-5015 (online) Privacy Policy Terms of Service Do Not Sell My Personal Information {"props":{"pageProps":{"initialData":{"identity":"rs-7065170","acceptedTermsAndConditions":true,"allowDirectSubmit":false,"archivedVersions":[],"articleType":"Social Sciences - Article","associatedPublications":[],"authors":[{"id":489924937,"identity":"6fef6329-acbb-4d9b-9e81-b90a54396c3e","order_by":0,"name":"Thomas Gaskin","email":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAZAAAAAyAQMAAABI0h/eAAAABlBMVEX///8AAABVwtN+AAAACXBIWXMAAA7EAAAOxAGVKw4bAAAAvElEQVRIiWNgGAWjYHAC5gcSFXBOAmH1PAwMbAYWZ0jUwiBR2UaKFnvp4xcMbs6zk9dtYH74gbEtjQhb+HIKHs7clmy47QCbsQRjWw4RWnh4Eowltx1g3HaAwYyBsa2COC3Sf+ccsN92gP0bsVrYD0hINhxI3HaAB2QLMQ47w8NmIHEsOXnbYZ5iiYRzRHifvYf98QOJGjvbbcfbN374UJZMWAvQHgMIzcxAVKyA7XlAnLpRMApGwSgYuQAAMyQ0jbuoPgYAAAAASUVORK5CYII=","orcid":"https://orcid.org/0000-0002-5644-4431","institution":"University of Cambridge","correspondingAuthor":true,"prefix":"","firstName":"Thomas","middleName":"","lastName":"Gaskin","suffix":""},{"id":489924938,"identity":"ace7c273-146e-4106-851f-78f33eb23dff","order_by":1,"name":"Guy Abel","email":"","orcid":"https://orcid.org/0000-0002-4893-5687","institution":"University of Hong kong","correspondingAuthor":false,"prefix":"","firstName":"Guy","middleName":"","lastName":"Abel","suffix":""}],"badges":[],"createdAt":"2025-07-07 11:55:24","currentVersionCode":1,"declarations":"","doi":"10.21203/rs.3.rs-7065170/v1","doiUrl":"https://doi.org/10.21203/rs.3.rs-7065170/v1","draftVersion":[],"editorialEvents":[],"editorialNote":"","failedWorkflow":false,"files":[{"id":87472547,"identity":"848970de-9bca-4c86-9c7f-9055412268d6","added_by":"auto","created_at":"2025-07-24 08:32:04","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"manuscript-pdf","size":3931645,"visible":true,"origin":"","legend":"Article File","description":"","filename":"manuscript.pdf","url":"https://assets-eu.researchsquare.com/files/rs-7065170/v1_covered_25c8fedd-d8ff-4ebc-a74e-ebb29c93718d.pdf"},{"id":87471545,"identity":"09c8e841-5bca-458e-8fc1-894fd715c19f","added_by":"auto","created_at":"2025-07-24 08:29:37","extension":"pdf","order_by":1,"title":"","display":"","copyAsset":false,"role":"supplement","size":1894067,"visible":true,"origin":"","legend":"Supplementary Information","description":"","filename":"SI.pdf","url":"https://assets-eu.researchsquare.com/files/rs-7065170/v1/e42dac87f7f48544640f151c.pdf"}],"financialInterests":"There is \u003cb\u003eNO\u003c/b\u003e Competing Interest.","formattedTitle":"Deep learning four decades of human migration","fulltext":[],"fulltextSource":"","fullText":"","funders":[],"hasAdminPriorityOnWorkflow":false,"hasManuscriptDocX":false,"hasOptedInToPreprint":true,"hasPassedJournalQc":"","hasAnyPriority":true,"hideJournal":false,"highlight":"","institution":"","isAcceptedByJournal":true,"isAuthorSuppliedPdf":true,"isDeskRejected":"","isHiddenFromSearch":false,"isInQc":false,"isInWorkflow":false,"isPdf":true,"isPdfUpToDate":true,"isWithdrawnOrRetracted":false,"journal":{"display":true,"email":"
[email protected]","identity":"nature-portfolio","isNatureJournal":true,"hasQc":false,"allowDirectSubmit":false,"externalIdentity":"","sideBox":"","snPcode":"","submissionUrl":"","title":"Nature Portfolio","twitterHandle":"","acdcEnabled":false,"dfaEnabled":false,"editorialSystem":"ejp","reportingPortfolio":"","inReviewEnabled":true,"inReviewRevisionsEnabled":false},"keywords":"International Migration, Deep learning, Human Migration, Neural networks","lastPublishedDoi":"10.21203/rs.3.rs-7065170/v1","lastPublishedDoiUrl":"https://doi.org/10.21203/rs.3.rs-7065170/v1","license":{"name":"CC BY 4.0","url":"https://creativecommons.org/licenses/by/4.0/"},"manuscriptAbstract":"We present a novel and detailed dataset on origin-destination annual migration flows and stocks between 230 countries and regions, spanning the period from 1990 to the present. Our flow estimates are further disaggregated by country of birth, providing a comprehensive picture of migration over the last 35 years. The estimates are obtained by training a deep recurrent neural network to learn flow patterns from 18 covariates for all countries, including geographic, economic, cultural, societal, and political information. The recurrent architecture of the neural network means that the entire past can influence current migration patterns, allowing us to learn long-range temporal correlations. By training an ensemble of neural networks and additionally pushing uncertainty on the covariates through the trained network, we obtain confidence bounds for all our estimates, allowing researchers to pinpoint the geographic regions most in need of additional data collection. We validate our approach on various test sets of unseen data, demonstrating that it significantly outperforms traditional methods estimating five-year flows while delivering a significant increase in temporal resolution. The model is fully open source: all training data, neural network weights, and training code are made public alongside the migration estimates, providing a valuable resource for future studies of human migration.","manuscriptTitle":"Deep learning four decades of human migration","msid":"","msnumber":"","nonDraftVersions":[{"code":1,"date":"2025-07-24 08:12:02","doi":"10.21203/rs.3.rs-7065170/v1","editorialEvents":[],"status":"published","journal":{"display":false,"email":"
[email protected]","identity":"nature","isNatureJournal":true,"hasQc":false,"allowDirectSubmit":false,"externalIdentity":"nature","sideBox":"Learn more about [Nature](http://www.nature.com/nature/)","snPcode":"","submissionUrl":"","title":"Nature","twitterHandle":"","acdcEnabled":true,"dfaEnabled":true,"editorialSystem":"ejp","reportingPortfolio":"Nature","inReviewEnabled":true,"inReviewRevisionsEnabled":false}}],"origin":"","ownerIdentity":"7d82b2a7-d133-4e66-b8c3-aa9f6ef55527","owner":[],"postedDate":"July 24th, 2025","published":true,"recentEditorialEvents":[],"rejectedJournal":[],"revision":"","amendment":"","status":"under-review","subjectAreas":[{"id":52015941,"name":"Scientific community and society/Social sciences/Sociology"},{"id":52015942,"name":"Physical sciences/Mathematics and computing/Computational science"}],"tags":[],"updatedAt":"2026-04-28T20:25:49+00:00","versionOfRecord":[],"versionCreatedAt":"2025-07-24 08:12:02","video":"","vorDoi":"","vorDoiUrl":"","workflowStages":[]},"version":"v1","identity":"rs-7065170","journalConfig":"researchsquare"},"__N_SSP":true},"page":"/article/[identity]/[[...version]]","query":{"redirect":"/article/rs-7065170","identity":"rs-7065170","version":["v1"]},"buildId":"8U1c8b4HqxoKbykW_rLl7","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[84888],"gssp":true,"scriptLoader":[]}
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.