Population-level migration modeling of North America’s birds through data integration with BirdFlow

doi:10.1101/2025.09.30.679621

Population-level migration modeling of North America’s birds through data integration with BirdFlow

2025 · doi:10.1101/2025.09.30.679621

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 79,363 characters · extracted from preprint-html · click to expand

Population-level migration modeling of North America’s birds through data integration with BirdFlow | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Population-level migration modeling of North America’s birds through data integration with BirdFlow View ORCID Profile Yangkang Chen , View ORCID Profile David L. Slager , View ORCID Profile Ethan Plunkett , Miguel Fuentes , View ORCID Profile Yuting Deng , View ORCID Profile Stuart A. Mackenzie , Lucas E. Berrigan , Daniel Fink , View ORCID Profile Daniel Sheldon , View ORCID Profile Benjamin M. Van Doren , View ORCID Profile Adriaan M. Dokter doi: https://doi.org/10.1101/2025.09.30.679621 Yangkang Chen 1 Department of Natural Resources and Environmental Sciences, University of Illinois Urbana-Champaign , Champaign, Illinois, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yangkang Chen David L. Slager 2 Cornell Lab of Ornithology, Cornell University , Ithaca, New York, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for David L. Slager Ethan Plunkett 3 College of Information and Computer Sciences, University of Massachusetts , Amherst, Massach us etts, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ethan Plunkett Miguel Fuentes 3 College of Information and Computer Sciences, University of Massachusetts , Amherst, Massach us etts, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yuting Deng 2 Cornell Lab of Ornithology, Cornell University , Ithaca, New York, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yuting Deng Stuart A. Mackenzie 4 Birds Canada , Port Rowan, Ontario, CANADA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Stuart A. Mackenzie Lucas E. Berrigan 4 Birds Canada , Port Rowan, Ontario, CANADA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Daniel Fink 2 Cornell Lab of Ornithology, Cornell University , Ithaca, New York, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Daniel Sheldon 3 College of Information and Computer Sciences, University of Massachusetts , Amherst, Massach us etts, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Daniel Sheldon Benjamin M. Van Doren 1 Department of Natural Resources and Environmental Sciences, University of Illinois Urbana-Champaign , Champaign, Illinois, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Benjamin M. Van Doren For correspondence: amd427{at}cornell.edu vandoren{at}illinois.edu Adriaan M. Dokter 2 Cornell Lab of Ornithology, Cornell University , Ithaca, New York, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Adriaan M. Dokter For correspondence: amd427{at}cornell.edu vandoren{at}illinois.edu Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Background Accurate information on population-level movements of migratory animals is essential for understanding migration and for designing effective conservation strategies in a changing world. Yet such information remains scarce for most migratory species due to the effort and expense needed to collect data across their full distribution ranges. BirdFlow is a probabilistic modeling framework that infers population-level movements from weekly species distribution maps produced by the participatory science project eBird. However, BirdFlow models have only been tuned for a handful of species using high-resolution individual tracking data, which is not available for most migratory species. Methods Here, we introduce a general tuning and evaluation framework for BirdFlow that enables the first large-scale integration of distributional and individual-level data to infer animal movement across continents and hundreds of migratory species, eliminating reliance on any single individual-tracking data source. By generalizing the BirdFlow model parametrization, we enable tuning and validation using multiple complementary data sources, including GPS tracks, banding recoveries, and radio telemetry data from the Motus Wildlife Tracking System. We investigate the efficacy of this approach by (1) investigating predictive performance compared to null models; (2) validating the biological plausibility of BirdFlow models by comparing movement properties such as route straightness, number of stopovers, and migration speed between model-generated routes and real movement tracks; and (3) comparing the performance of models tuned on species-specific movement data to models tuned using hyperparameters transferred from other species. Results Our results show that BirdFlow models produced by the new tuning framework achieve biologically realistic performance, even for prediction horizons of thousands of kilometers and several months. When species-specific data are unavailable, models can still be tuned using data from other phylogenetically adjacent species to achieve improved performance. Conclusions By integrating eBird Status & Trends abundance surfaces with data from banding recaptures, radio telemetry, and GPS tracking, we scale BirdFlow model to 153 North American migratory species, representing the first collection of continental-scale population-level movement and forecasting models. Species-specific tuning improves population-level movement forecasts, while taxonomically informed hyperparameter transfer supports the modeling of data-limited species. Overall, our work offers a foundation for more accurate predictions across hundreds of species for research in ecology and conservation, disease surveillance, aviation, and public outreach. Background Our understanding of continent-scale bird migration has been greatly fostered by modern tracking technologies and the rapid accumulation of participatory science data [ 1 , 2 ]. Despite these advances, population-level variation in movement behavior remains poorly understood for most bird species, particularly across continental extents and throughout the annual cycle [ 3 ]. This gap poses a critical challenge because species’ ranges often span diverse ecoregions and habitats, exposing different populations of the same species to distinct environmental barriers and threats (e.g., [ 4 ]). Spatial heterogeneity in threats, which include habitat degradation, artificial light at night (ALAN), climate change, and hunting risk, likely drives corresponding variation in bird population trends [ 5 – 10 ]. Therefore, understanding population-level movements within a species’ range is essential for quantifying migratory connectivity and for targeting conservation actions in a changing world [ 11 ]. However, such information remains largely unavailable due to the difficulties of tracking multiple moving populations across species’ range. Several modeling frameworks have been proposed to infer large-scale avian movement by integrating multiple data types, including methods that combine individual tracking and species distribution information within integrated statistical frameworks [ 16 , 37 ]. These approaches represent important advances and highlight the growing range of tools available for migration inference at broad spatial scales. However, these approaches remain constrained by the limited availability of individual-level movement data, which limits their application in regions and for species for which tracking data remain sparse. BirdFlow is a probabilistic modeling framework developed to address this need by inferring the population-level movements across a species’ range from spatiotemporal abundance distributions [ 12 ]. The model builds on relative abundance estimates generated by the eBird Status & Trends project (hereafter, eBird S&T) [ 13 , 14 ] and uses mechanistic optimization that accounts for energy costs and inter-population movement patterns. Performance of the BirdFlow model can be improved when hyperparameters are tuned using individual movement data from other sources, such as GPS and satellite tracking [ 12 ]. However, generalizing BirdFlow to hundreds of species has remained challenging because GPS tracking data are unavailable for most migratory birds [ 15 ]. To overcome the reliance on any single individual-tracking data source and produce BirdFlow models for many species, we introduce a unified model tuning framework to train BirdFlow models based on general mark-resight data. We also introduce a set of intuitive validation metrics for quantifying the quality and predictive performance of BirdFlow models. Our approach standardizes observations across multiple data sources, including GPS tracking data, banding recovery data, and Motus Wildlife Tracking System automated radio telemetry data (Motus; https://motus.rog ; [ 17 ]). We address three primary questions: (1) Can integrating multiple data sources enable BirdFlow modeling across hundreds of species? (2) How consistent are optimal model parameterizations among species with similar ecological or morphological traits? and (3) Can such cross-species similarities be leveraged to model species lacking individual-level tracking data? Finally, we discussed how species-specific models tuned using this framework open new opportunities for research and outreach across ecology, evolution, and conservation biology. Methods We describe the tuning and validation framework in the sections below. A schematic summary is presented in Fig. 1 . Download figure Open in new tab Figure 1. Schematic workflow for the BirdFlow species-specific model tuning and validation framework. The diagram illustrates the main steps of data preparation, model tuning, and validation methods used in this study. The BirdFlow model BirdFlow model structure and training methodology are described in detail in [ 12 ]. Briefly, the current version of BirdFlow is a Markovian probabilistic graphical model where each state (location and time) is projected to the next state with a certain transition probability. BirdFlow is trained by optimizing an objective function, with three terms controlled by three hyperparameters: The first term is the “observation” term, which is the same as the “location loss” term in [ 12 ]. The observation term minimizes the differences between the modeled probabilistic surface 𝜇(𝜃), given Markov chain parameter set 𝜃 and a species’ spatiotemporal relative abundance surface 𝜇^. The second term in Eq. 1 is the “distance” term, computed as a function of distance that penalizes longer movements. This requires simulated routes to minimize the movement cost for the population. This term is regulated by hyperparameter 𝛼 (the distance weight) and hyperparameter 𝜖 (the distance exponent). The distance term for a single designated transition is formulated as Where 𝜇(𝜃) xt,xt +1 is the pairwise marginal that specify the probability of moving from cell x t at time t to cell x t +1 at time t + 1. d ( x t , x t+1 ) denotes the great circle distance between x t and x t +1 , with 𝜖 regulating the trade-off of cost between a few long-distance jumps versus many short distance hops. The exponentiated distance d is divided by 100 + to make the magnitude of L mov less dependent on 𝜖, thereby reducing correlations in the optimized values of 𝛼 and 𝜖. The third term in Eq. 1 is the “Shannon entropy” term and is regulated by hyperparameter 𝛽 (the entropy weight), which controls the stochasticity of the routes ( Fig. 1 ). Species relative abundance surfaces We acquired eBird Status & Trends weekly relative abundance distributions (data version 2023; hereafter, S&T abundance surfaces) using the ebirdst package version 3.2023.1 [ 18 ]. These surfaces are model-based products derived from eBird checklist data. In the present study, we used these S&T abundance surfaces as inputs to the BirdFlow framework but did not explicitly propagate their uncertainty through the tuning procedure, largely because doing so across many species would be computationally prohibitive. We therefore interpret them as model inputs rather than components of a fully joint uncertainty-propagating inferential pipeline. All species are assigned quality scores by experts during the S&T modeling process; we only included species assigned the highest score (3) across all seasons. We preprocessed S&T surfaces to restrict geographic scope to the Western Hemisphere. In addition, to avoid the influence of high-abundance outliers, we truncated abundance distributions by replacing all values above the 0.99 quantile with the 0.99 quantile value. The rationale behind this trimming was to mitigate the influence of imperfect S&T abundance surfaces on model predictions, where excessive counts in aggregation areas (e.g., due to roosting behavior) could bias abundance estimates. We justified the choice of quantile using a sensitivity analysis (Additional file 1: Fig. S1 ). Individual mark-resight data (transitions) We used empirical movement data to select the best hyperparameters from a set of 225 candidate models (see Hyperparameter tuning section) for each species. We acquired tracking data from Movebank ( movebank.org ) and published literature (Additional file 1: Table S1 ). We acquired bird banding data from the United States Geological Survey (USGS) Bird Banding Program (BBL) database [ 19 ]. Motus data were acquired following the Motus Collaboration Policy ( https://motus.org/resources/policy ) and preprocessed to reduce errors (Additional file 1: Supplementary methods). We converted all data into species-specific BirdFlow spatiotemporal coordinates using the BirdFlowR package, an R-language interface that implements and operationalizes the BirdFlow model (version 0.1.0.9075) [ 20 ]. We refer to a set of detections for a single individual bird movement within a migratory season as a “track”. After compiling empirical movement data, we sampled mark-resight pairs (segments of movement) from the tracks, which we call “transitions”. We sampled up to 10000 transitions for each species, considering the tradeoff of computational cost and model performance, using an iterative sampling procedure (Additional file 1: Supplementary methods). For species to be included, we required availability of at least 20 transitions (justified using a sensitivity analysis; Additional file 1: Fig. S2 ) and high-quality abundance maps based on a sensitivity analysis, resulting in 153 migratory species in North America spanning 14 orders and 39 families (Additional file 1: Table S2 ). Across species, the number of training transitions varied from 29 to 7000 (mean = 1673). Banding data accounted for 69.2% of transitions per species on average, and Motus data made up 27.6%. GPS tracking data were available for only seven species, served as the primary data source (> 50% of transitions) for four, and comprised 11.8% of all transitions (Additional file 1: Fig. S3 ). Hyperparameter tuning For each species, we trained BirdFlow models with different combinations of hyperparameters (𝛼, 𝛽, and 𝜖 introduced in the previous sections) in an exhaustive parameter grid (Additional file 1: Supplementary methods). In total, the training generated 225 models per species. We trained models with a spatial resolution of 150 km and temporal resolution of 1 week. We chose this spatial resolution to balance computation cost while maximizing resolution. The 1-week temporal resolution is inherited from the temporal resolution of the S&T abundance surfaces. For each species, we used 70% of transitions as the training set for model selection in a hyperparameter grid search. We used two criteria when selecting the best model for a species: (1) First, we selected models with a high correlation between the modeled probability surface and the eBird S&T relative abundance surface, indicating that the BirdFlow model reproduces an accurate abundance surface. We defined the mean correlation score as the average of the correlation score for pre-breeding migration and post-breeding migration seasons: Where 𝑐𝑜𝑟 - and 𝑐𝑜𝑟 $ represent the Pearson correlation coefficient calculated across grid cells between BirdFlow marginal distributions and S&T abundance surfaces at week 𝑛 of pre-breeding migration and week 𝑚 of post-breeding migration, with 𝑁 and 𝑀 being the total number of weeks of pre-breeding migration and post-breeding migration. The mean correlation score thus reflects how well the model projection matches observations during migration, with value 1 representing a perfect match. We only considered models with mean correlation score >0.98, which is an empirical value we chose after investigating the models for tens of species. This threshold ensures any candidate BirdFlow model accurately reproduces a species’ abundance surface across areas of both high and low density. (2) Second, we selected the best model from the remaining set of candidate models based on how well the model predicted the true resighting location of each transition, quantified by the weighted log-likelihood improvement (𝛥𝐿𝐿). To calculate this metric, we first introduce the log-likelihood improvement (𝛥𝐿𝐿 0 ) for a single transition 𝑖. The 𝛥𝐿𝐿 0 tells us how much more accurate a BirdFlow model prediction is compared to a random sample from the S&T abundance surface for transition 𝑖. Here, 𝑝 12 denotes the probability assigned by the BirdFlow model to a transition from a starting location and time to a destination location and time. Specifically, 𝑙𝑜𝑔 𝑝 12 (𝑡′ 0 , 𝑥′ 0 | 𝑡 0 , 𝑥 0 ) represents the conditional log probability for the BirdFlow model to correctly predict the resighting location 𝑥′ at time 𝑡′, given that the bird is recorded at location 𝑥 at time 𝑡, where [(𝑡 0 , 𝑥 0 ), (𝑡′ 0 , 𝑥′ 0 )] defines empirical transition 𝑖. In simple terms, it describes how probable the observed data are, given the model. 𝑙𝑜𝑔 𝐴(𝑡′ 0 , 𝑥′ 0 ) is the normalized relative abundance at (𝑡′ 0 , 𝑥′ 0 ) calculated from the S&T abundance surface, representing the probability of randomly sampling the exact resighting location from the abundance surface. The weighted average log-likelihood improvement (𝛥𝐿𝐿) is calculated as Where 𝑁 represents the total number of transitions and 𝐴 0 is a weighting factor equal to the normalized weekly relative abundance of the S&T abundance surfaces at the starting location for transition 𝑖. The purpose of the weight is to mitigate potential spatiotemporal bias introduced by rare transitions that are hardly representative of the population. For each species, we selected the best set of hyperparameters based on the two steps described above. We refer to the models using the selected set of hyperparameters as “species-specifically” tuned models. Quantifying hyperparameter transferability We conducted leave-one-out (LOO) analysis to determine the feasibility of transferring hyperparameters from other species in the scenario that tracking data are not available for a target species. In this process, we sought to determine whether taxonomic information could improve the accuracy of hyperparameter transfer. When applying LOO, we omitted transition data of the target species and selected hyperparameters based on the transition data of the remaining species. We only considered models with a mean correlation score >0.98, consistent with species-specific tuning. We then selected the hyperparameter configuration that had the highest average log-likelihood improvement score within the same taxonomic group as the target species. We compared four model tuning methods, including taxonomically informed (1) family-level LOO (Family-LOO), (2) order-level LOO (Order-LOO), and (3) LOO with all the other species except for the target species (All-LOO). We consider All-LOO as a baseline method since it incorporates no taxonomic information. We also included (4) species-specifically tuned models (no LOO) for comparison. Model validation After tuning, we evaluated tuned models using the remaining 30% of the transition data (test set). We again used log-likelihood improvement to evaluate tuned models as described in the hyperparameter tuning section. However, while log likelihood reflects the quality of prediction, it only validates the model at the exact resighting locations and therefore has limited biologically interpretability. Ideally, an evaluation of a spatiotemporal movement model should also consider how close the predictions are to the resighting locations. To address this need, we introduce a spatially explicit metric called “distance gain” (𝛥𝐷) and a variant, relative distance gain (𝛥𝐷 89: ), as the primary validation metric. It measures how close the BirdFlow model prediction is to the empirical resighting location and compares this distance to a random sample of distances drawn from the abundance surface: Where 𝐷 12,0 is the weighted mean distance from the BirdFlow predicted locations to the empirical resighting location for transition 𝑖: Where 𝑗 is a location index running over all spatial pixels 𝐾. 𝑑(𝑗, 𝑥′ 0 ) represents the great circle distance between location 𝑗 and the resighting location 𝑥′ 0 of the transition 𝑖. The weight for location 𝑗 is the BirdFlow predicted probability (𝑝) at location 𝑗 time 𝑡′ given the starting location 𝑥 0 at time 𝑡. Similarly, 𝐷 BC,0 is defined as the weighted mean distance from the normalized abundance surface (S&T) to the empirical resighting location for transition 𝑖, representing a “null distance” of drawing a random sample from all potential locations. To facilitate multi-species comparison, we developed the relative distance gain (𝛥𝐷 89: ) to account for the variation of distance gain among species with different migration distances: With value 1 representing a perfect prediction of the resighting location, value 0 representing predictive performance equivalent to a random sample from the S&T abundance surfaces, and negative values representing worse performance than a random sample. Model decay analysis Next, we quantified the pattern of decay in model performance across time and distance. We fitted exponential regression functions using the test set transitions and relative distance gain metric. We chose exponential functions because the visual patterns generally match well with exponential decay as transition interval increases, a pattern also seen in [ 12 ] (Additional file 1: Supplementary methods). The regression line is asymptotic to y=0, so we define the maximum functional distance horizon as the distance value where the regression line intersects a threshold of 0.05. Thus, the maximum functional distance represents the distance at which BirdFlow model predictions are <5% closer to true resighting locations than a random sample from the S&T distribution of the resighting week (Additional file 1: Supplementary methods, Fig. S4 ). We do not define a maximum functional time horizon since the models often performed well even beyond the temporal threshold (see results). We also evaluated performance decay using log-likelihood improvement metrics and validated prediction horizons using the same approach. Empirical validation of biological metrics To assess the biological plausibility of BirdFlow-generated trajectories, we compared the route statistics of model-simulated and empirical routes. For all seven species for which GPS tracking data were available at the time of analysis, we extracted track sections during pre-breeding and post-breeding migration phases. Only tracks recorded for more than two weeks and with ≥1 observation per week were included to match the temporal resolution of BirdFlow models. For each track, we sampled 100 trajectory predictions conditional on the starting time and location and the ending time of the track. We then calculated three biological metrics frequently used in migration research [ 21 ] for both simulated and observed routes: migration speed, route straightness, and number of stopovers. We defined migration speed as the total distance traveled by the simulated individual within the trajectory divided by the total length of the time. Route straightness was the beeline distance of the routes divided by the total distance traveled. We defined the number of stopovers as the number of 150-km resolution grid cells in which the individual stayed >7 days; this temporal resolution is predefined and aligned with the eBird S&T product. The route features were calculated using trajr package version 1.5.1 [ 22 ] in R version 4.5. Modeling hyperparameters using species traits We modeled the selected hyperparameters for each species using species-specific morphological traits and distribution range statistics to understand hyperparameter variation across species. We acquired species morphological traits from the AVONET dataset [ 23 ]. We further calculated range-wide summaries as explanatory variables, including range size, spatial variation of abundance, and the minimum, maximum, and centroid of longitude and latitude for different seasons (Additional file 1: Supplementary methods). Some of these explanatory variables were highly correlated (Pearson’s r >0.8); for a group of correlated variables, we only included one representative variable in the model (Additional file 1: Supplementary methods, Fig. S5 ). To account for potential overfitting, we tuned Random Forest regression models for each hyperparameter separately using 5-fold cross-validation on the species-level dataset and calculated the coefficient of determination (𝑅 G ) based on pooled predictions on out-of-fold datasets. We calculated impurity-based feature importance metrics for the best model and linearly regressed BirdFlow hyperparameters against the variable with the highest feature importance in the Random Forest model to understand how a single trait is associated with the hyperparameter values. The feature calculations are performed in R version 4.5 and the Random Forest model is fitted using the scikit-learn package version 1.6.1 [ 24 ] in Python version 3.11.11. Results Multi-source transition data improves model performance BirdFlow models tuned using species-specific mark-resight data consistently predicted resight locations better than a random sample from the S&T abundance surface corresponding to the week of resighting (the null model) for all species ( Fig. 2 ). This was reflected in both relative distance gain and log-likelihood (LL) improvement metrics. Averaged across species and transitions, tuned models were 27 times better in predicting the exact resighting location (average log-likelihood improvement 3.30, range: 0.27–7.13 across species; 95% percentile interval 0.67–5.17) and 1058 km closer to the empirical resighting locations compared to the null model (range: 55–6793 km; 95% percentile interval 225–2352 km). Download figure Open in new tab Figure 2. BirdFlow models for all 153 species perform better compared to a random sample from eBird Status & Trends (indicated by the dotted red line). Performance is evaluated by both weighted average log-likelihood improvement (x axis) and distance gain (y axis). Metrics are calculated across all hold-out transitions with different forecasting horizons. The log-likelihood improvement is additionally labeled using straightforward explanations (e.g., 7.4x means on average 7.4 times better in predicting the actual recovery location of the transitions compared to a random sample). BirdFlow’s predictive performance decreased with increasing forecasting distance and time, but even at long temporal and spatial horizons, performance was better than a random sample from the S&T abundance surface ( Fig. 3 , a-b). Across time, most BirdFlow models continued to achieve better performance compared to S&T after 270 days ( Fig. 3a ) based on the relative distance gain metric. Across space, the averaged maximum functional distance beyond which the BirdFlow model was no longer favored over the null S&T abundance surfaces was 2449 km (95% percentile interval: 242–6395 km; Fig. 3 b-c). Similar results for the log-likelihood improvement metric can be found in Additional file 1: Fig. S6 . Download figure Open in new tab Figure 3. Model performance decays as time and distance horizon increase. Model performance decay is measured as the decrease of relative distance gain as the time horizon (a) and distance horizon (b) extends. Gray lines represent the fitted exponential regression function for each species, and blue lines represent the functions using cross-species-averaged parameters. (c) measures the maximum distance horizon that BirdFlow models still have advantages against a random sample from the abundance surface, defined as the horizon where the exponential decay curve (asymptotic to y=0) in (b) intersects with the y=0.05 line (5% closer to true resighting locations than S&T). Red dashed lines in (c) indicate zero distance horizon. See Additional file 1: Fig. S4 for schematic diagram of calculation in (c). By comparing simulated trajectory statistics with empirical observations, we show that the biological characteristics of BirdFlow-generated routes are generally consistent with data from tracked individual birds ( Fig. 4 , Additional file 1: Fig. S7 , Table S3 ). A biologically plausible model should expect around 95% of the empirically observed biological metrics to occur within the 95% simulated intervals and around 50% to occur within the 50% simulated intervals. For migration speed, on average 86% of the empirical observations fell within the 95% simulated intervals (range: 56%–100% across species), and 44% fell within the 50% simulated intervals (range: 22–58%) ( Fig. 4 ). For route straightness, the averages are 90% (63%–100%) and 49% (11%–64%), respectively (Additional file 1: Fig. S7 ). For the number of stopovers, these values are 99.59% (97.14%–100%) and 98.44% (94.29%–100%), respectively (Additional file 1: Fig. S7 ). The results for the number of stopovers are less informative because the coarse weekly temporal resolution reduces the precision of stopover calculations. Download figure Open in new tab Figure 4. BirdFlow models simulate biologically plausible migration speeds. For each species, we randomly sampled at most 30 tracks from pre-breeding or post-breeding migration seasons. Red points indicate the empirical migration speeds calculated from GPS tracks and gray bars indicate 95% and 50% percentile intervals of the simulated migration speed. A quantitative summary of the figure can be found in Additional file 1: Table S3 . Hyperparameter transfer among related species enhances model performance The taxonomically informed model tuning methods showed better performance than the All-LOO method, both measured by relative distance gain and log-likelihood ( Table 1 ). Models tuned using information from more closely related species performed better relative to the baseline All-LOO: Species-specific tuning performed the best (89.5% species showed log-likelihood improvement; 82.9% species show positive distance gain improvement), followed by Family-LOO (37.3% species showed log-likelihood improvement; 45.5% species show positive distance gain improvement) and Order-LOO (20.4% species showed log-likelihood improvement; 18.4% species show positive distance gain improvement). View this table: View inline View popup Table 1. Taxonomically informed model tuning shows improvements over the all-leave-one-out method (ALL-LOO) We further investigated the influence of transition data abundance on model tuning. The result shows that a small amount of training data during model tuning is sufficient for achieving a performance improvement compared to All-LOO baseline, and that more transition data for model tuning is not correlated with higher performance improvement across species (Additional file 1: Figure S8 ; Pearson’s r: 0.0012, Spearman’s r: 0.0942). Species traits explain variation in hyperparameters While we did not observe statistically meaningful clustering patterns of hyperparameters by taxonomic group ( Fig. 5 ), the regression results show that species’ morphological traits and distribution range summaries are predictive of their tuned hyperparameters (Additional file 1: Fig. S9a-c ). A Random Forest regression model trained by 5-fold cross validation had moderate predictive power for the tuned values of hyperparameter 𝛽 (𝑅 G = 0.27), 𝜖 (𝑅 G = 0.20), and 𝛼 (𝑅 G = 0.27) across species. Body mass was the most important predictor of entropy weight and distance weight, with higher body mass correlated with higher entropy weights (Pearson’s r = 55; Additional file 1: Fig. S9d ) and lower distance weights (Pearson’s r = -0.33; Additional file 1: Fig. S9f ). Higher spatial variation of abundance and narrower range at breeding season were also correlated with higher distance weights (Pearson’s r = 0.45 and -0.47, respectively; Additional file 1: Fig. S9e , Fig. S5 ). Download figure Open in new tab Figure 5. Associations between selected hyperparameters and species traits. Principal component analysis results for the hyperparameter patterns across species. Color denotes the order of the species. Gray text indicates the relative PC loadings of entropy weight 𝛽, distance weight 𝛼, and distance exponent 𝜖, respectively. Higher values in PC1 are correlated with higher entropy weight, lower distance weight, and higher distance exponent. Higher values in PC2 are correlated with higher distance weight and higher distance exponent. Small jitters in x and y axes were added for better visualization, since the best models for certain species share the same hyperparameter combination. Discussion Our results show that BirdFlow models tuned with individual-based movement data can simulate routes that closely match actual individual tracks. By integrating the movements of a small sample of tracked individuals with eBird distribution data, we inferred range-wide population-level movement patterns for 153 species. These models provide a first quantitative yardstick for delineating population-specific migration routes, even when individuals are tracked or banded in only part of that range. Specifically, BirdFlow predicted movement transitions from week i to week j outperformed a null baseline of randomly sampling from a species’ abundance distributions corresponding to the resighting week. This result holds for >100 species ( Fig. 2 ) across forecast horizons of a few thousand kilometers (on average 2449 km) and over 270 days ( Fig. 3 ). By integrating individual-based movement data from multiple sources into the hyperparameter tuning of BirdFlow, we greatly expand the number of species for which species-specific models can be tuned. Banding data alone enable tuning for 88.2% of the study species (defined as at least 20 transitions per species), but individual-level movement data remain scarce for many small-bodied passerines. Because of their low body mass, these species are difficult to track with current GPS technology [ 15 , 25 ], and band-recovery data accumulate slowly due to low recovery rates [ 26 ]. In contrast, the Motus Wildlife Tracking System supports tuning for 49.7% of the study species, a smaller but rapidly growing fraction that is particularly valuable for small passerines [ 17 ]. As a result of their complementarity, combining banding and Motus data makes it possible to tune models for 98.7% of the studied species. Our results demonstrate that improvements in model performance can be achieved with limited mark-resight data. Notably, simply increasing the number of individual transitions does not consistently lead to higher predictive performance across species (Additional file 1: Fig. S8 ). This pattern is encouraging because it indicates that a small amount of empirical data can meaningfully calibrate and improve BirdFlow predictions for a focal species. This result also suggests it may be possible to increase the flexibility (i.e. number of hyperparameters) of the current BirdFlow model structure to learn more detailed patterns from the individual data, which could lead to increasing performance with higher amounts of tuning data. To our knowledge, this study provides the first large-scale integration of distribution and individual-level data to infer animal movement across continents and among hundreds of species. As part of this study, we release the batch of BirdFlow models for 153 North American migratory species (see data statement), representing a comprehensive collection of population-level movement models spanning the full spatial and temporal ranges of these species. Previous research on bird migration has developed quantitative and mechanistic models of movement and connectivity, but such approaches have typically been applied at the species level or to a limited number of intensively tracked populations. In contrast, BirdFlow resolves range-wide migration patterns at the population level across hundreds of species, addressing a key scaling gap given growing evidence that population-level variation in migratory strategies and population trends can be comparable to, or exceed, differences among species [ 10 , 27 , 28 ]. As expected, we see a decay of model performance along spatial and temporal forecasting horizons. A main explanation for this performance decay is the biological reality of populations mixing across space and time. For example, for species with low migratory connectivity, we expect the spatial connection patterns among populations to dissipate within a certain horizon [ 29 ]. For these species, BirdFlow models are not expected to be able to distinguish the movement of individuals in the same spatiotemporal unit. Alternatively, the decayed model performance could stem from the accumulation of prediction errors. Disentangling these two different factors requires further investigation into the migration connectivity estimation of the modeled species. While most species’ models recovered the population-level characteristics of tracked individuals, some models exhibited consistent deviations. For example, the Ovenbird model consistently predicted lower migration speed and higher route straightness relative to empirical data, while the Turkey Vulture model consistently predicted higher migration speed and lower route straightness ( Fig. 4 , Additional file 1: Fig. S7 ). The biases indicate potential space to improve the parameterization and/or structure for those models. Collectively, our results indicate that the three hyperparameters reflect species’ functional traits and distributional properties. For example, small passerine birds generally showed low entropy weight, high distance weight, and low distance exponent ( Fig. 5 ; lower PC1 values; less spread and higher movement cost), while large birds in Anseriformes (ducks and geese) showed the opposite (higher PC1 values; more spread and lower movement cost). This result aligns with our understanding that small songbirds often depend on frequent flights and refueling, whereas large waterfowl accumulate substantial reserves and undertake long-distance flights [ 30 – 32 ]. We also find that species with narrow ranges (low spatial coverage) and high variation in spatial abundance show higher distance weight and higher distance power weight (higher PC2 values), resulting in a migration pattern with higher movement cost and favoring long jumps over small hops. On the other hand, species with broad ranges and low abundance variation show the opposite. This may occur because narrow-ranged species, being restricted to small geographic areas or specialized habitats, may face stronger migration barriers and have fewer suitable stopover habitats to choose from, which elevates the distance cost and forces long jumps rather than small hops (e.g., [ 33 ]). The evidence of hyperparameter similarity among taxonomic groups opens the door for tuning species lacking any individual-level movement data. Hyperparameters could be transferred from close taxonomic relatives using a taxonomically informed leave-one-out approach, providing a practical fallback when species-specific movement data are unavailable. We show that BirdFlow models using hyperparameters transferred from phylogenetically adjacent species groups performed better than models using parameters transferred from distant or unrelated groups. In BirdFlow models, the three hyperparameters 𝛽, 𝛼 and 𝜖 encode different aspects of the migration pattern and strategies: the diversity of the migration routes, the cost of energy expenditure, and the trade-off between long-distance jumps versus short-distance hops. These diverse migration patterns and strategies are driven by species-specific trade-offs that ideally maximize the survival and reproduction of the species [ 34 , 35 ]. Species that are closer in taxonomic relationship or functional traits might be more similar in migration patterns and strategies due to ancestral traits inheritance or convergent evolution [ 36 ] and consequently may also be closer in BirdFlow hyperparameter space. Despite this evidence, we did not find statistically significant clustering patterns of hyperparameters across orders or families, which indicates that within-group hyperparameter variation is comparable to among-group variation, further emphasizing that species-specific tuning is still important when available. Additionally, because the validation framework is insensitive to both model structure and data type, it opens opportunities for additional refinement of the BirdFlow modeling framework, from adjusting hyperparameters to exploring alternative model formulations. For example, we acknowledge that log-likelihood may not be the optimal metric for hyperparameter tuning in all cases. Future work could therefore explore additional tuning criteria, such as distance gain or connectivity-based metrics. This tuning framework also provides a way to quantitatively evaluate different structures as more data become available, ensuring that results grow increasingly robust over time. The probabilistic Markovian structure of BirdFlow models does have limitations in estimating the routes of individuals, in particular across long time scales and long distances. BirdFlow intrinsically models the synthetic movements of all individuals presented within a spatiotemporal unit, making it a model of space-time tracks along which the population’s biomass redistribution occurs. The Markovian nature of the current BirdFlow model algorithm precludes “memory” for individual movement histories. Accordingly, BirdFlow-simulated tracks are not expected to resolve all individual-level variation in the observed tracks. Rather, they represent the average movement patterns across all individuals at a given location and time. While the generalization of movement patterns to species full populations and ranges is one of the key contributions of BirdFlow model output, we should be aware that the individual transition data on which models are tuned still have spatiotemporal biases. Tracking data often concentrate in a few locations rather than spanning the full species distribution [ 15 ], and banding or Motus records are largely restricted to sites where stations are established. Moreover, relatively few transition data extend into Central and South America, which may bias the model to prioritize movement tuning for North America. While overfitting is unlikely with the current three-hyperparameter model, and models are heavily constrained by the fine-scale eBird distributional data, such biases warrant attention. If this tuning framework were applied to a more complex model structure, bias-control methods or balanced sampling strategies could be applied to quantify such potential bias. Another limitation of this work, and of BirdFlow more generally, is that it does not fully propagate uncertainty in eBird S&T relative abundance surfaces and tracking observation inputs (e.g. cf. Buderman et al. [ 37 ]). Rather than jointly estimating movement processes and observation models from multiple data streams within a single hierarchical model, BirdFlow uses abundance surfaces as a ground truth, while additional data sources such as tracking and mark-resight data are used primarily to inform model tuning and evaluation. As a result, uncertainty is not explicitly propagated throughout the full inference process. In this study, we did not incorporate uncertainty in the eBird S&T abundance estimates because doing so at this scale would have been computationally prohibitive. Although the criterion requiring mean correlation score > 0.98 may provide some tolerance to uncertainty and help reduce overfitting during model tuning, it does not substitute for formal uncertainty propagation. Future extensions could more formally account not only for uncertainty in the S&T abundance surfaces, but also for observation-process biases, such as spatiotemporal bias in banding and Motus data. Such approaches may provide stronger uncertainty quantification and a clearer representation of heterogeneity among individuals or populations. As illustrated in Fig. 6 , species-specifically tuned BirdFlow models provide an approach for addressing a broad range of ecological, evolutionary, and conservation questions. These models provide a population-resolved, continent-scale representation of migratory movement across the annual cycle, making it possible to link movement dynamics to environmental context and to quantify population differences without collapsing them into a single species-level pattern. This population-level resolution also allows direct, comparable tests of how migration strategies diverge or converge across populations and taxa. Finally, the same framework supports conservation-relevant attribution by connecting seasonal ranges, enabling researchers to evaluate which times of year and which regions are most strongly associated with among-population differences in trends and therefore most informative for explanation, prediction, and targeted intervention ( Fig. 6 ). These questions often require range-wide coverage as provided by BirdFlow and are difficult to address with distributional data or sparse individual-tracking data alone. Together, these advances position BirdFlow as a general framework for integrating multi-source data to estimate range-wide bird movements at continental scales, opening new opportunities to link population ecology with applied management across hundreds of species. Download figure Open in new tab Figure 6. Applications of using BirdFlow for ecological, evolutionary, and conservation research. In ecological research, BirdFlow enables (i) quantification of population-level migration dynamics, such as regional influxes and effluxes, (ii) integration of migration patterns with environmental conditions to assess environmental drivers and barriers of movement, and (iii) linkage with surveillance data to study the spread, control, and prevention of wildlife-borne diseases. In evolutionary research, BirdFlow-simulated trajectories support analyses of (i) convergence and divergence in migration routes within and among species and (ii) variation in migratory connectivity, providing insights into the mechanisms shaping differences in migration strategies. In conservation applications, BirdFlow models facilitate (i) assessment of population-specific anthropogenic and environmental risks accumulated along migration routes and (ii) identification of the seasons and locations most influential for population trends by connecting breeding and wintering grounds. Conclusions By integrating eBird Status & Trends abundance surfaces with multi-source mark–resight observations, including banding recoveries, Motus telemetry, and GPS tracking, we parametrized BirdFlow models for 153 North American migratory bird species. This continent-wide inference of population movements over the full annual cycle provides a quantitative basis for delineating population-specific routes across species’ full ranges, complementing existing individual-based tracking efforts and helping fill a key gap in ecological research and applied conservation. Tuned models consistently outperform abundance-based null expectations, remain informative across forecasting horizons of thousands of kilometers and many months, and generate biologically realistic migration routes that align with individual movement data. When species-specific movement data are unavailable, taxonomically informed hyperparameter transfer enables improved model, demonstrating that migration strategies share partial predictability across related taxa. Together, these advances establish BirdFlow as a general, extensible framework for estimating the comprehensive migratory movements of entire populations for hundreds of species, offering a foundation for more accurate predictions for ecological understanding, conservation planning, disease surveillance, aviation risk assessment, and public outreach as new data continue to accumulate. Declarations Ethics approval and consent to participate Not applicable Consent for publication Not applicable Availability of data and materials Code is available at https://github.com/birdflow-science/BirdFlowTuning . Data and the species-specifically tuned models are available at https://doi.org/10.5281/zenodo.17545007 . Models can be loaded using the BirdFlowR package ( https://github.com/birdflow-science/BirdFlowR ). Tuned models that passed manual quality control will be released along with the BirdFlowR package in the future. Competing interests The authors declare that they have no competing interests Funding This material is based upon work supported by the National Science Foundation under Grant No. 2210979 and 2210980, and Illinois Distinguished Fellowship (to YC). Computation was supported by the Unity computing platform at the Massachusetts Green High Performance Computing Center. Authors’ contributions Conceptualization: DS, YC, DLS, BVD, AMD, EP, MF; Data curation: LEB, SAM, DLS, BVD, YC, EP, YD, AMD; Formal analysis and investigation: YC, DLS, EP, YD; Methodology: DS, YC, DLS, EP, BVD, AMD, MF, DF; Software: DLS, EP, YC; Validation: YC, DLS, EP, YD. Visualization: YC. Project administration and funding: DS, AMD, BVD; DF; Supervision: BVD, AMD, DS; Writing: All authors. Supplementary Figures Download figure Open in new tab Figure S1. Sensitivity analysis of trimming quantile choices. To avoid the influence of high-abundance outliers, we truncated abundance distributions by replacing all values above the 0.99 quantile with the 0.99 quantile value. We showed that this outlier removal improved the average log-likelihood across the 225 models for 4 out of 5 species tested, for which individual tracking data were available at the time of analysis. The multi-species averaged log-likelihood improvement was the highest with 0.99 quantile trimming (𝛥𝐿𝐿 = 0.074), followed by 0.98 (𝛥𝐿𝐿 = 0.012), 1 (no trimming; 𝛥𝐿𝐿 = 0), and 0.95 (𝛥𝐿𝐿 = -0.009). Download figure Open in new tab Figure S2. Sensitivity analysis of number of training transitions. To determine the minimum number of transitions required to train BirdFlow models, we applied a learning-curve analysis for five species for which individual tracking data were available at the time of analysis, each trained with increasing number of transitions. Model performance was evaluated on a hold-out test dataset as described in the Method section. We found that models trained with approximately 20–50 transitions achieved substantially improved performance (log-likelihood) and began to approach a performance plateau. We therefore chose a threshold of 20 transitions to train the models for maximum number of species. Download figure Open in new tab Figure S3. The transition data composition for each species, colored by data sources. The height of each block is proportional to the square root of total transition count available for the species. The width of the block represents the proportion of transitions from each data source for each species. The top 50 species with most abundant transition data are labeled with their eBird English common name. Download figure Open in new tab Figure S4. A schematic diagram of how the maximum functional forecasting horizon is calculated. We set a threshold of 0.05 for both relative distance gain and log-likelihood improvement, with values below the threshold being considered as BirdFlow models having no advantages over the baseline. We calculated the intersection of each species’ model performance decay line with the threshold, and averaged the distance horizon at the intersection as a multispecies averaged maximum functional horizon. Download figure Open in new tab Figure S5. Pairwise Pearson’s correlation coefficients among variables included in the Random Forest regression. The variables include the hyperparameters, principal components of the hyperparameters, species range summaries, and species morphological traits. Each datapoint is a species. Species range summaries and morphological traits with Pearson’s correlation coefficients over 0.8 were considered highly collinear, and in such cases only one variable was retained while the other was removed before the analysis. Download figure Open in new tab Figure S6. Model performance decays as time and distance horizon increase. Model performance decay is measured as the decrease of log-likelihood as the time horizon (a) and distance horizon (b) extends. Gray lines represent the fitted exponential regression function for each species, and blue lines represent the functions using cross-species-averaged parameters. (c) measures the maximum distance horizon that BirdFlow models still have advantages against a random sample from the abundance surface in terms of log-likelihood across species, defined as the horizon where the exponential decay curve in (b) intersects the y=0.05 horizontal line (1.05 times better than S&T). Red dashed lines in (c) indicate zero distance horizon. See Supplementary Fig. 4 for schematic diagram of calculation in (c). On average, the maximum functional distance horizon is 593.49 km for log-likelihood improvement (95% percentile interval: 31 km - 2145 km). The fact that the maximum functional distance horizon for log-likelihood improvement (593 km) is lower than that for relative distance gain ( Fig. 3 ) demonstrates that log-likelihood is less tolerant to error accumulation along the prediction horizon, since it only measures the performance at the exact resighting location and does not consider geographic closeness. Download figure Open in new tab Figure S7. BirdFlow models simulate biologically plausible route straightness and number of stopovers. For each species, we randomly sampled at most 30 tracks from pre-breeding or post-breeding migration seasons. Red points indicate the empirical metrics calculated from the GPS tracks and the gray bars indicate 95% and 50% percentile intervals of the simulated metrics. A quantitative summary of the figure can be found in Additional file 1: Table S3 . Download figure Open in new tab Figure S8. Lack of correlation between the improvements of species-specifically tuned models and the amount of training transitions. Although species-specific tuning improves model performance compared to All-LOO baselines, more training data does not necessarily lead to higher log-likelihood improvement across species. Each data point represents a best model for each species tuned on species-specific data. Download figure Open in new tab Figure 9. Explaining hyperparameters using species traits. We trained Random Forest models using species morphological traits and range summary statistics to explain the hyperparameter patterns among species. The Random Forest model was trained using 5-fold cross validation, and the out-of-fold prediction results for all species are shown for hyperparameter 𝛽 (a), 𝜖 (b) and 𝛼 (c), respectively. The hyperparameters are also linearly regressed against the explanatory variables with the highest feature importance score in the Random Forest models for 𝛽 (d), 𝜖 (e) and 𝛼 (f). The values on the horizontal axes are log-transformed to calculate the correlation coefficients in (d-f). Small jitters in x and y axes were added in (d-f) to disperse overlapping points. Download figure Open in new tab Figure S10. The BirdFlow-simulated spring migration routes for 9 migratory species. Stay length indicates the inferred total length of stay at the stopover sites in weekly temporal resolution, with zero stay length indicating non-stopping or stopover shorter than one week. 10 routes were simulated for each species. Supplementary Tables View this table: View inline View popup Download powerpoint Table S1. The sources of tracking data for each species used in this research. View this table: View inline View popup Table S2. Species list for the 153 modeled species. View this table: View inline View popup Download powerpoint Table S3. Summarizing the biological plausibility of BirdFlow-simulated routes. A biologically plausible model should expect around 95% of the empirically observed biological metrics to occur within the 95% simulated intervals by BirdFlow models, and roughly 50% of the empirically observed biological metrics to occur within the 50% simulated intervals. This result indicates that BirdFlow-generated routes are biologically realistic and reflect population-level variation. More than 50% of observations occurred within the 50% simulated intervals for the number of stopovers, likely because the coarse weekly temporal resolution increases variability and reduces informativeness of stopover calculations. Stationary tracks were not included for straightness validation. Supplementary methods Motus data preprocessing Raw Motus data were summarized into “visits” for each time a bird visited a Motus station, with arrival and departure times for that event. These data were then validated using multiple criteria so that tracks generated from these summaries had reasonable estimated flight speeds, fell within expected species ranges, and there were at least 3 consecutive detections for any given visit. In addition, Motus stations which were deemed ‘too noisy’ were eliminated from the results to reduce the risk of introducing false positives. Transition sampling We applied an iterative transition sampling procedure that considers the trade-off of computational cost and model performance improvement. First, we iteratively sampled a single one-week transition from each of the tracks without replacement, until the maximum transition count was met or the target tracks run out of transitions to sample (or without any one-week transition to sample, as often seen in banding data), e.g., (𝑡 * , 𝑥 * ) to (𝑡 G , 𝑥 G ), and (𝑡 G , 𝑥 G ) to (𝑡 H , 𝑥 H ), where the 𝑡 is the weekly timestep and 𝑥 is the location corresponding to the timestep. Second, if the size of the one-week transition set did not exceed the maximum transition count, we iteratively randomly sampled one 𝑛-timestep transition (𝑛 can be any positive integer from 2 to maximum week duration for the track) from each track without replacement, until the maximum transition count was met or the target tracks run out of transitions to sample, e.g., (𝑡 * , 𝑥 * ) to (𝑡 I , 𝑥 I ). The rationale behind this sampling strategy is to prioritize the information-rich one-week transitions, while also integrating the information in longer transitions to tune and evaluate the prediction performance of BirdFlow models across seasons. To constrain the transitions in a reasonable spatiotemporal range, only transitions that span less than 270 days with at least one end falling into migration seasons were sampled. We did not sample transitions with both ends falling into breeding or non-breeding seasons as we focused on the migratory periods, and since the species detectability can change vastly in stationary periods (especially during breeding and molting periods), which may cause spatiotemporal variation in S&T abundance surfaces that do not reflect movement (e.g., Socolar et al. 2025) Hyperparameter grid search settings We reparametrized the hyperparameters for easier interpretation, defining the relative proportion of the observation term 𝜌 = 1/(1 + + 𝛽) and the relative proportion of the distance and entropy terms 𝜙 = 𝛽/𝛼, which allows us to rewrite Eq. 1 as Since different combinations of parameters have different emphases on matching S&T abundance surfaces and regulating the route behavior patterns, this reparameterization allowed us to directly vary the overall emphasis of the observation weight with 𝜌 and vary the degree of stochasticity with 𝜙. In our grid search, we varied 𝜌 with values (0.95, 0.975, 0.99, 0.999, 0.9999) to emphasize the high importance of matching S&T abundance surface, 𝜙 across values (2, 4, 6, 8, 10), and 𝜖 with values (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9). We chose these grid search values based on the empirical investigation in Fuentes et al. 2023 where none of the models with these parameter combinations perform the best with the extremal grid search values and the performance appears to vary smoothly as the hyperparameters change, which indicates a potentially optimal model enclosed in this hyperparameter surface. We configured parameters and passed them to the BirdFlowPy package (Fuentes 2022, Fuentes et al. 2023) for model fitting. Model decay analysis The exponential regression can be formulated as: Where 𝑎, 𝑏, and 𝑐 are parameters and 𝑥 is the independent variable (time or distance horizon). The multi-species median regression line is calculated by taking the median value of each parameter across species, respectively. The maximum functional distance horizon is calculated as the value on the x-axis where the regression line intersects with a threshold, which was set to 0.05 for both the relative distance gain and the log-likelihood improvement (supplementary Fig. 4). For relative distance gain, a 0.05 value represents a horizon where the BirdFlow prediction is only 5% closer to the empirical resighting locations than the S&T abundance surfaces, and for log-likelihood improvement, 0.05 means the BirdFlow prediction is 1.05 times more likely to recover the empirical observation than a random sample from the S&T abundance surfaces. While these thresholds are arbitrary, we believe they generally reflect a functional horizon, beyond which the BirdFlow model is no longer meaningfully better than the original S&T prediction. Distributional summaries for each species We calculated distributional summaries as explanatory variables for the best hyperparameters, including range size, spatial variation of abundance, the minimum, maximum, and centroid of longitude and latitude for different seasons. First, the eBird S&T abundance map was aggregated to 150km resolution and split into pre-breeding season, breeding season, post-breeding season, and non-breeding seasons. The season partitioning is queried from the eBird S&T dataset. Second, we averaged the abundance estimation across weeks for each spatial pixel by each season. Next, for each season, we calculated: Range size, as the maximum number of spatial pixels with abundance>0. Range abundance variation, as the standard deviation of the average-abundance variation across spatial pixels. Maximum and minimum range longitude and latitude: The maximum and minimum of the longitude and latitude across spatial pixels with abundance>0. Centroid of longitude and latitude: The abundance-weighted average longitude and latitude. Acknowledgements We thank the USGS bird banding lab for the bird banding data collection and Birds Canada for preparing and sharing the data of the Motus Wildlife Tracking System and the individual contributors to these projects. We are also grateful to the many thousands of participants, reviewers, and partner organizations around the world who support and contribute to eBird. Funder Information Declared U.S. National Science Foundation , 2210979 , 2210980 Illinois Distinguished Fellowship (to YC) Footnotes Update on format; Additional figures added; Rephrased paragraphs; Additional background and discussion paragraphs. https://github.com/birdflow-science/BirdFlowTuning https://zenodo.org/records/16985511 List of abbreviations 𝐿𝐿 log-likelihood 𝛥𝐿𝐿 weighted log-likelihood eBird S&T eBird Status & Trends relative abundance surfaces LOO leave-one-out BF BirdFlow 𝑝 12 the probability assigned by the BirdFlow model to a possible destination in space and time, conditional on a specified starting location and time. References 1. ↵ Sullivan BL , Aycrigg JL , Barry JH , Bonney R , Bruns N , Cooper CB , et al. The eBird enterprise: an integrated approach to development and application of citizen science . Biol Conserv . 2014 ; 169 : 31 – 40 . OpenUrl 2. ↵ Flack A , Aikens EO , Kölzsch A , Nourani E , Snell KRS , Fiedler W , et al. New frontiers in bird migration research . Curr Biol . 2022 ; 32 ( 20 ): R1187 – R99 . OpenUrl CrossRef PubMed 3. ↵ Shaw AK . Causes and consequences of individual variation in animal movement . Mov Ecol . 2020 ; 8 ( 1 ): 12 . OpenUrl PubMed 4. ↵ Kramer GR , Andersen DE , Buehler DA , Wood PB , Peterson SM , Lehman JA , et al. Population trends in Vermivora warblers are linked to strong migratory connectivity . Proc Natl Acad Sci USA . 2018 ; 115 ( 14 ): E3192 – E200 . OpenUrl Abstract / FREE Full Text 5. ↵ Emmenegger T , Hahn S , Arlettaz R , Amrhein V , Zehtindjiev P , Bauer S . Shifts in vegetation phenology along flyways entail varying risks of mistiming in a migratory songbird . Ecosphere . 2016 ; 7 ( 6 ): e01385 . OpenUrl 6. Zurell D , Graham CH , Gallien L , Thuiller W , Zimmermann NE . Long-distance migratory birds threatened by multiple independent risks from global change . Nat Clim Chang . 2018 ; 8 ( 11 ): 992 – 6 . OpenUrl PubMed 7. Rosenberg KV , Dokter AM , Blancher PJ , Sauer JR , Smith AC , Smith PA , et al. Decline of the North American avifauna . Science . 2019 ; 366 ( 6461 ): 120 – 4 . OpenUrl Abstract / FREE Full Text 8. Buchan C , Franco AMA , Catry I , Gamero A , Klvaňová A , Gilroy JJ , et al. Spatially explicit risk mapping reveals direct anthropogenic impacts on migratory birds . Glob Ecol Biogeogr . 2022 ; 31 ( 9 ): 1707 – 25 . OpenUrl 9. Horton KG , Buler JJ , Anderson SJ , Burt CS , Collins AC , Dokter AM , et al. Artificial light at night is a top predictor of bird migration stopover density . Nat Commun . 2023 ; 14 ( 1 ): 7446 . OpenUrl CrossRef PubMed 10. ↵ Johnston A , Rodewald AD , Strimas-Mackey M , Auer T , Hochachka WM , Stillman AN , et al. North American bird declines are greatest where species are most abundant . Science . 2025 ; 388 ( 6746 ): 532 – 37 . OpenUrl CrossRef PubMed 11. ↵ Rushing CS , Ryder TB , Scarpignato AL , Saracco JF , Marra PP . Using demographic attributes from long-term monitoring data to delineate natural population structure . J Appl Ecol . 2016 ; 53 ( 2 ): 491 – 500 . OpenUrl 12. ↵ Fuentes M , Van Doren BM , Fink D , Sheldon D . BirdFlow: learning seasonal bird movements from eBird data . Methods Ecol Evol . 2023 ; 14 ( 3 ): 923 – 38 . OpenUrl 13. ↵ Fink D , Auer T , Johnston A , Ruiz-Gutierrez V , Hochachka WM , Kelling S . Modeling avian full annual cycle distribution and population trends with citizen science data . Ecol Appl . 2020 ; 30 ( 3 ): e02056 . OpenUrl 14. ↵ Fink D , Auer T , Johnston A , Strimas-Mackey M , Ligocki S , Robinson O , et al. eBird status and trends: data version 2023 . Ithaca (NY): Cornell Lab of Ornithology ; 2025 . 15. ↵ Scarpignato AL , Huysman AE , Jimenez MF , Witko CJ , Harrison AL , Seavy NE , et al. Shortfalls in tracking data available to inform North American migratory bird conservation . Biol Conserv . 2023 ; 286 : 110224 . OpenUrl 16. ↵ Meehan TD , Saunders SP , DeLuca WV , Michel NL , Grand J , Deppe JL , et al. Integrating data types to estimate spatial patterns of avian migration across the Western Hemisphere . Ecol Appl . 2022 ; 32 ( 7 ): e2679 . OpenUrl PubMed 17. ↵ Taylor PD , Crewe TL , Mackenzie SA , Lepage D , Aubry Y , Crysler Z , et al. The Motus Wildlife Tracking System: a collaborative research network to enhance the understanding of wildlife movement . Avian Conserv Ecol . 2017 ; 12 ( 1 ): art8 . OpenUrl 18. ↵ Strimas-Mackey M , Ligocki S , Auer T , Fink D. ebirdst: access and analyze eBird status and trends data products . 2025 . https://CRAN.R-project.org/package=ebirdst . Accessed 16 Dec 2025. 19. ↵ Celis-Murillo A , Malorodova M , Nakash E. North American Bird Banding Program dataset 1960–2022 . U.S. Geological Survey data release. 2022 . doi: 10.5066/P9BSM38F . OpenUrl CrossRef 20. ↵ Plunkett E. BirdFlowR: predict and visualize bird movement . 2025 . https://github.com/birdflow-science/BirdFlowR . Accessed 16 Dec 2025. 21. ↵ Alerstam T . The geographical scale factor in orientation of migrating birds . J Exp Biol . 1996 ; 199 ( 1 ): 9 – 19 . OpenUrl Abstract / FREE Full Text 22. ↵ McLean DJ , Skowron Volponi MA. trajr: an R package for characterisation of animal trajectories . Ethology . 2018 ; 124 ( 6 ): 440 – 8 . OpenUrl CrossRef 23. ↵ Tobias JA , Sheard C , Pigot AL , Devenish AJM , Schleuning M , et al. AVONET: morphological, ecological and geographical data for all birds . Ecol Lett . 2022 ; 25 ( 3 ): 581 – 97 . OpenUrl CrossRef PubMed 24. ↵ Pedregosa F , Varoquaux G , Gramfort A , Michel V , Thirion B , Grisel O , et al. Scikit-learn: machine learning in Python . J Mach Learn Res . 2011 ; 12 : 2825 – 30 . OpenUrl CrossRef PubMed 25. ↵ Bridge ES , Thorup K , Bowlin MS , Chilson PB , Diehl RH , Fléron RW , et al. Technology on the move: recent and forthcoming innovations for tracking migratory birds . BioScience . 2011 ; 61 ( 9 ): 689 – 98 . OpenUrl CrossRef Web of Science 26. ↵ Arnold TW , de Sobrino CN , Specht HM . Estimating annual survival rates from sparse band-recovery data: examples from migratory shore and upland gamebirds . Washington (DC) : U.S. Fish and Wildlife Service ; 2013 . 27. ↵ Bonnet-Lebrun AS , Somveille M , Rodrigues AS , Manica A . Exploring intraspecific variation in migratory destinations to investigate the drivers of migration . Oikos . 2021 ; 130 ( 2 ): 187 – 96 . OpenUrl 28. ↵ Delmore KE , Fox JW , Irwin DE . Dramatic intraspecific differences in migratory routes, stopover sites and wintering areas revealed using light-level geolocators . Proc R Soc B . 2012 ; 279 ( 1747 ): 4582 – 9 . OpenUrl CrossRef PubMed 29. ↵ Finch T , Butler SJ , Franco AM , Cresswell W . Low migratory connectivity is common in long-distance migrant birds . J Anim Ecol . 2017 ; 86 ( 3 ): 662 – 73 . OpenUrl CrossRef PubMed 30. ↵ Catry P , Encarnação V , Araújo A , Fearon P , Fearon A , Armelin M , et al. Are long-distance migrant passerines faithful to their stopover sites? J Avian Biol . 2004 ; 35 ( 2 ): 170 – 81 . OpenUrl 31. O’Neal BJ , Stafford JD , Larkin RP , Michel ES . The effect of weather on the decision to migrate from stopover sites by autumn-migrating ducks . Mov Ecol . 2018 ; 6 ( 1 ): 23 . OpenUrl PubMed 32. ↵ Horton KG , Deng Y . Stopover strategies drive potential adaptability under changing environments . Proc Natl Acad Sci USA . 2024 ; 121 ( 20 ): e2406694121 . OpenUrl PubMed 33. ↵ Studds CE , Kendall BE , Murray NJ , Wilson HB , Rogers DI , Clemens RS , et al. Rapid population decline in migratory shorebirds relying on Yellow Sea tidal mudflats as stopover sites . Nat Commun . 2017 ; 8 ( 1 ): 14895 . OpenUrl CrossRef PubMed 34. ↵ Hedenström A . Adaptations to migration in birds: behavioural strategies, morphology and scaling effects . Philos Trans R Soc B Biol Sci . 2008 ; 363 ( 1490 ): 287 – 99 . OpenUrl CrossRef PubMed 35. ↵ Linek N , Yanco SW , Volkmer T , Zuñiga D , Wikelski M , Partecke J . Migratory lifestyle carries no added overall energy cost in a partial migratory songbird . Nat Ecol Evol . 2024 ; 8 ( 12 ): 2286 – 96 . OpenUrl PubMed 36. ↵ Dufour P , Descamps S , Chantepie S , Renaud J , Guéguen M , Schiffers K , et al. Reconstructing the geographic and climatic origins of long-distance bird migrations . J Biogeogr . 2020 ; 47 ( 1 ): 155 – 66 . OpenUrl 37. ↵ Buderman FE , Hanks EM , Ruiz-Gutierrez V , Shull M , Murphy RK , Miller DA . Integrated movement models for individual tracking and species distribution data . Methods Ecol Evol . 2025 ; 16 ( 2 ): 345 – 61 . OpenUrl References Stanley CQ , McKinnon EA , Fraser KC , Macpherson MP , Casbourn G , Friesen L , et al. Connectivity of wood thrush breeding, wintering, and migration sites based on range-wide tracking . Conserv Biol . 2015 ; 29 ( 1 ): 164 – 174 . OpenUrl PubMed Socolar , J. , Galtbalt , B. , Johnston , A. , La Sorte , F. A. , Robinson , O. J. , Rosenberg , K. V. and Dokter , A. M. 2025 . Seasonal macro-demography of North American bird populations revealed through participatory science . – Ecography 2025 : e07349 . OpenUrl Fuentes , M . 2022 . Miguel-Fuentes/birdflow: Birdflow (zenodo_release) – Zenodo . OpenUrl CrossRef Fuentes , M. , Van Doren , B. M. , Fink , D. and Sheldon , D. 2023 . BirdFlow: Learning seasonal bird movements from eBird data . – Methods Ecol. Evol . 14 : 923 – 938 . OpenUrl View the discussion thread. Back to top Previous Next Posted April 16, 2026. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Population-level migration modeling of North America’s birds through data integration with BirdFlow Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Population-level migration modeling of North America’s birds through data integration with BirdFlow Yangkang Chen , David L. Slager , Ethan Plunkett , Miguel Fuentes , Yuting Deng , Stuart A. Mackenzie , Lucas E. Berrigan , Daniel Fink , Daniel Sheldon , Benjamin M. Van Doren , Adriaan M. Dokter bioRxiv 2025.09.30.679621; doi: https://doi.org/10.1101/2025.09.30.679621 Share This Article: Copy Citation Tools Population-level migration modeling of North America’s birds through data integration with BirdFlow Yangkang Chen , David L. Slager , Ethan Plunkett , Miguel Fuentes , Yuting Deng , Stuart A. Mackenzie , Lucas E. Berrigan , Daniel Fink , Daniel Sheldon , Benjamin M. Van Doren , Adriaan M. Dokter bioRxiv 2025.09.30.679621; doi: https://doi.org/10.1101/2025.09.30.679621 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Ecology Subject Areas All Articles Animal Behavior and Cognition (7642) Biochemistry (17708) Bioengineering (13904) Bioinformatics (41992) Biophysics (21466) Cancer Biology (18618) Cell Biology (25531) Clinical Trials (138) Developmental Biology (13387) Ecology (19924) Epidemiology (2067) Evolutionary Biology (24337) Genetics (15615) Genomics (22521) Immunology (17749) Microbiology (40424) Molecular Biology (17194) Neuroscience (88673) Paleontology (667) Pathology (2839) Pharmacology and Toxicology (4827) Physiology (7650) Plant Biology (15160) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9826) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00