Demographic Drivers of Epidemic Outcomes: Sensitivity Analysis of Multidimensional Parameters in the Covasim Model

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 46,461 characters · extracted from preprint-html · click to expand
Demographic Drivers of Epidemic Outcomes: Sensitivity Analysis of Multidimensional Parameters in the Covasim Model | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Demographic Drivers of Epidemic Outcomes: Sensitivity Analysis of Multidimensional Parameters in the Covasim Model View ORCID Profile Vera Tsurkis , Ivan Kozlov , View ORCID Profile Andrei Samoilov , Irina Maslova , Elena Ilina , Alexander Lukashev , Alexander Manolov doi: https://doi.org/10.1101/2025.10.14.25328629 Vera Tsurkis 1 Ludwig-Maximilian University of Munich , Muenchen, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Vera Tsurkis For correspondence: veratsurkis21{at}gmail.com Ivan Kozlov 2 Research Institute of Systems Biology and Medicine , Moscow, Russia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Andrei Samoilov 2 Research Institute of Systems Biology and Medicine , Moscow, Russia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Andrei Samoilov Irina Maslova 2 Research Institute of Systems Biology and Medicine , Moscow, Russia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Elena Ilina 2 Research Institute of Systems Biology and Medicine , Moscow, Russia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alexander Lukashev 2 Research Institute of Systems Biology and Medicine , Moscow, Russia 3 Martsinovsky Institute of Medical Parasitology, Tropical and Vector Borne Diseases, First Moscow State Medical University (Sechenov University) , 119435 Moscow, Russia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alexander Manolov 2 Research Institute of Systems Biology and Medicine , Moscow, Russia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Background Sensitivity analysis is a key tool for identifying which model inputs most strongly influence model outputs thereby informing data collection priorities. In agent-based models, these inputs include demographic parameters used to construct synthetic populations. Such parameters — age distributions, household size distributions, and contact matrices — are critical in shaping transmission dynamics. However, most established sensitivity analysis methods are designed for scalar inputs and cannot readily accommodate multidimensional demographic data. We introduce a novel sampling approach to assess the influence of such parameters on model outputs, addressing a key gap in sensitivity analysis of epidemiological agent-based models. Methods The Covasim model of COVID-19 transmission was used as the case study in this work. An autoencoder neural network was trained on country-level demographic datasets to generate realistic samples of high-dimensional inputs of the model. These sampled inputs were coupled with Sobol’ sensitivity indices to quantify their influence on cumulative infections and deaths. Results Non-scalar parameters were found to exert a major influence on model outputs. Household size distribution was the most important parameter for cumulative number of infectious cases, while age distribution had the largest effect on cumulative deaths. These findings were consistent across experimental settings, and parameter rankings were stable despite stochastic variation. Conclusions Our autoencoder-based sampling approach extends methods of global sensitivity analysis to high-dimensional demographic parameters in agent-based epidemiological models. Our results highlight that only a subset of demographic inputs exert a dominant influence on Covasim outputs, and similar behavior can be expected in other epidemiological modeling platforms. Introduction Coronavirus pandemic highlighted the need for detailed models which could be used for reliable forecasts and evaluation of interventions. To obtain a realistic model of an outbreak, one has to use appropriate synthetic population. The synthetic populations of the epidemiological agent-based models are often constructed using national population censuses as a major source of information. For example, agent-based model was used to simulate the spread of measles in Ireland town in 2012; the census data from Ireland’s Central Statistics office was the main source of information [ 1 ]. Additionally, transportation surveys may be included to produce a realistic structure of social contact networks: e.g. USA National Household Transportation Survey was used in EpiSindemics [ 2 ] and the Framework for Reconstructing Epidemic Dynamics (FRED) [ 3 ]. Cell phone tracking can be also used to obtain realistic individual movement patterns [ 4 ]. A major challenge for modelers is to obtain the data that is both sufficiently precise and up-to-date: unfortunately, censuses are not frequent enough to provide recent information about the current population, and transportation or mobile tracking data are not always available. This raises the question of whether all model parameters require such detailed data. Indeed, if a given parameter does not significantly influence the model outcome, it can be roughly estimated or excluded from the model for simplification. Usually, the importance of the parameters is determined through the sensitivity analysis [ 6 ]. Sensitivity analysis is a procedure which matches uncertainty of the model’s output with uncertainties in input parameters, therefore, it shows to what extent each parameter influences the outcome [ 6 ]. There are two types of sensitivity analysis: local and global [ 7 ]. Local sensitivity analysis refers to the analysis of the model’s behavior in close proximity of a particular point in the parametric space (e.g. default values) [ 8 ]. The most straightforward method of local analysis is the ‘one-at-a-time’ method, when all but one parameter are fixed and this one parameter is varied around its initial value [ 9 ]. It should be noted that the results of local analysis depend on the choice of the point around which the analysis is performed [ 10 ]. In contrast, global analysis explores a large part of the parametric space of the model [ 11 ]. Among others, the variance-based methods of the global sensitivity analysis are widely used, such as Sobol’ sensitivity analysis [ 12 ] and Fourier amplitude sensitivity test (FAST) [ 13 ] Due to the ease of interpretation of their results, these methods are often favored over alternative approaches, such as ensemble learning models (random forests), which can also be applied to sensitivity analysis in similar contexts [ 14 ]. Sobol’ indices in particularly are frequently used for global sensitivity analysis of agent-based models and epidemiological analytical models. Their application is straightforward and doesn’t rely on analytical calculations which are impossible to apply to agent based models (e.g., partial derivatives). For example, Sobol’ method of sensitivity analysis was applied to the Soil and Water Assessment Tool (SWAT) model, to determine the most important parameters and pairs of parameters [ 15 ]. Similarly, Sobol’ indices were used to identify the most important parameters of the agricultural land conservation agent-based model and to subsequently reduce input space by setting non-influential parameters to default values [ 16 ]. In epidemiological studies Sobol’ indices were applied to the generalized SIR model of the COVID-19 spread [ 17 ]. In the majority of studies Sobol’ method is applied to scalar parameters. However, many population-level parameters are represented as vectors or matrices that possess intrinsic structure. The elements of these vectors and matrices are not independent and therefore cannot be sampled independently. One way to assess sensitivity with respect to non-scalar parameters is to use a low-dimensional space generated by an autoencoder. Autoencoders are neural network architectures designed for unsupervised learning. They have been theoretically introduced in 1986 [ 18 ], and with the growth of computer power became a robust tool for many scientific areas. Autoencoders are used in many fields of biology, especially in bioinformatics: in genomics [ 19 ], transcriptomics [ 20 ], proteomics [ 21 ]. They are trained by minimizing the difference (parameterized by some metric) between the initial and reproduced data. In this process an autoencoder learns compressed representations of data by encoding inputs into a lower-dimensional latent space and then decoding them back to reconstruct the original input. Once trained, the latent space of an autoencoder constitutes an effective low-dimensional representation of the input data and can be used for sampling of synthetic data through the sampling of points in the latent space and further decoding of these points. In this work we apply Sobol’ method coupled with sampling of vector and matrix parameters via autoencoders to the Covasim epidemiological model [ 22 ]. Since its release in 2021 the Covasim model has become one of the most widely used agent-based models for studying the spread of SARS-CoV-2 and other respiratory viruses [ 22 ] [ 23 ] [ 24 ] [ 25 ]. Here, we assess sensitivity of the Covasim model to demographic parameters of a synthetic population. Materials and data The Covasim model is considered in this paper. It is described in detail in [ 22 ]. Shortly, the virus is transmitted between agents via their contacts. All contacts are divided into 4 layers: household, school, work, and community contacts. Probability of transmission depends on the agent’s age. The core part of the experiments consisted of multiple runs of Covasim with different parameter sets. There were two steps in each run: first, a synthetic population was made (using Synthpops [ 26 ]); then, the spread of virus in this population was modeled. For all experiments the Covasim version 3.1.6 (cloned from github at https://github.com/InstituteforDiseaseModeling/covasim ) and Synthpops version 1.10.5 (cloned from github at https://github.com/InstituteforDiseaseModeling/synthpops ) were used. Sampling method 17 demographic parameters were included in the analysis: all demographic parameters in Synthpops and the average number of community contacts in Covasim, as this layer is not constructed in Synthpops. All parameters are listed in Table 1 . View this table: View inline View popup Table 1. Information about parameters under consideration To obtain comprehensive coverage of the parameter space using the Sobol’ sequence, it is necessary to define the bounds within which the parameters vary. Whenever possible, real-world statistical data from different countries were utilized to constrain parameters and avoid unrealistic values. The set of countries used for such evaluations varied by parameter, depending on data availability. In general, data from Organisation for Economic Co-operation and Development (OECD) countries were used and supplemented with data from other regions when available. African countries were excluded due to variation in population parameters, which often rendered them incompatible with non-African countries and led to failures in constructing synthetic populations using SynthPops. The overall logic of the experimental workflow is illustrated in Fig. 1A . When sufficient statistical data were available, most parameters represented as matrices or distributions were sampled using autoencoders. This neural network architecture was selected due to the convenience of using its latent space to generate new, realistic parameter values. Download figure Open in new tab Fig 1. The overall pipeline of the experiments. A – sampling of parameters depending on their dimensionality and the availability of statistical data; B – special case of sampling using autoencoders for matrix- or distribution-based parameters; C, D - examples of initially provided to autoencoder matrices and recovered once Details of the autoencoder training procedure are presented in Fig. 1B . All autoencoders trained in this study were based on the same architecture: a small (1-3) number of linear layers connecting the input to the latent space, and a symmetric set of layers mapping from the latent space to the output. While all autoencoders were initially designed with a one-dimensional latent space, this configuration did not yield satisfactory reconstruction results for the age distribution of the population, and a two-dimensional latent space had to be used instead. It should be noted that all autoencoders were initially trained on the full dataset, including African countries where applicable. However, the parameter variation ranges were inferred solely from non-African countries. The performance of the autoencoders is demonstrated in Fig. 1C and Fig. 1D , using the case of household contact matrix sampling as an example. Plots of learning curves for all autoencoders can be found in Supplementary file 1. Although the reconstruction ability of trained autoencoders varied depending on the demographic parameter, each model enabled realistic sampling of the respective parameter from the latent space. For scalar parameters, realistic values were estimated from statistical data where available, and the interval between the 5th and 95th percentiles was selected to define the range of variation. For workplace size distribution and enrollment rate this procedure was applied to every bin of the distribution, since real data were available, but insufficient for reliable autoencoder training. The upper limit for the average number of community contacts was set to the Covasim’s default value, which exceeds the 95th percentile of real data. For some parameters, no publicly available statistical data could be found. For such parameters, the default value was multiplied by a coefficient sampled from the range [0.5, 2]. For school size distribution the same technique was used within every bin of distribution. A detailed description of the data sources and the ranges chosen can be found in Table 1 . Other simulation parameters were fixed as follows: the population size was fixed at 100,000 individuals and the initial number of infected individuals was set at 30. To account for stochastic effects, each parameter set was simulated using different random seed. Sensitivity analysis method Sensitivity analysis was conducted using Sobol’ indices [ 12 ], a method based on functional ANOVA decomposition of the model variance [ 27 ]. Let us consider a model Y = f ( X 1 , …, X k ), where X 1 , …, X k are parameters of the model, Y is a scalar output of the model. Then the first order Sobol’ indices are defined as where E X ~ i ( Y X i ) is the mathematical expectation of model’s output when parameter X i is fixed, and the numerator is the variance of this expectation, calculated over all values of X i . The denominator is the variance of the model calculated over the whole parametric space. Similarly, the total Sobol’ indices are defined as Conceptually, the first-order index S i represents the fraction of the model’s variance that would be eliminated if the parameter X i were fixed; the total-effect index S T i reflects the overall contribution of X i to the model’s variance, including its interactions with other parameters. Numerically, these indices are calculated using the Monte Carlo technique. To ensure effective and uniform coverage of the parameter space, Sobol’ sequences are typically used. A detailed description of the numerical procedure can be found in [ 6 ]. In our analysis, the Python package SALib was used [ 28 ], [ 29 ], where the Sobol’ indices were implemented based on the paper [ 6 ]. The number of parameter sets needed for the indices evaluation equals A · N · ( K + 2), where A — number of runs per each scenario (1 in most of the experiments), K — number of input parameters of the model (17 or 9 in our experiments for full and truncated sets of parameters respectively), N — number of points in parametric space (should be chosen so that there are enough points for Monte-Carlo integration to converge). The sensitivity analysis was performed in several steps. First, parameters were sampled and synthetic populations were generated. Then, the spread of the virus was simulated using Covasim. Sobol’ sensitivity analysis was then applied to the model outputs. Two major outcomes were considered: the cumulative number of cases and the cumulative number of deaths. We also present here results for other outcomes: the maximum number of new cases per day, and the maximum number of severe and critical cases in the population, which characterize the peak workload of the healthcare system. Results Accounting for stochasticity By definition, Sobol’ sensitivity analysis assumes that the system under investigation is deterministic. However, Covasim, like many other agent-based models, is a stochastic model: identical input parameters may produce different outputs depending on the random seed. Investigation of the stochasticity effects is presented in Supplementary file 2. Based on the results of the experiment, it was concluded that further analysis should be conducted with a population size of at least 100,000 and 30 initially infected individuals. We chose to use a single run per scenario and to justify this simplification in a separate experiment by explicitly comparing two setups — with and without averaging. Sensitivity analysis with full and reduced parameter sets Sensitivity analysis was initially performed with respect to all 17 input parameters (Supplementary Figure 1). Many parameters had only a minor impact on the variation of the model outputs. To simplify the analysis, eight most ‘important’ parameters were selected, while the remaining parameters were grouped together. Least important parameters were: matrices of community and school contacts, the average number of teacher-to-teacher contacts per person per day, the average number of students per staff member at school, the average number of contacts for non-teaching staff per person per day, average community contacts per person per day, enrollment and employment rate distributions, and school size distribution. From this point on, these parameters were treated as a single combined group in the sensitivity analysis ( Fig. 2 A, B ; Table 2 ). View this table: View inline View popup Download powerpoint Table 2. Sobol’ indices for experiment with truncated set of parameters Download figure Open in new tab Fig 2. Results of the sensitivity analysis for the truncated set of parameters. (A, B) Sobol’ indices for the cumulative number of cases and deaths, respectively. Red indicates first order Sobol’ indices (S1), while green represents total indices (ST). As shown in Table 2 , the key demographic parameters are the household contact matrix, the distribution of household sizes, the distribution of household head age by household size, the age distribution, and the distribution of workplace sizes. To ensure that the number of samples in the parameter space was sufficient for the convergence of the Monte Carlo integration, the dependence of Sobol’ indices on the number of samples was examined( Fig. 3 A, B, C, D ). Since the indices showed little variation between 1024 and 8192 points, it was concluded that the analysis had converged and was therefore reliable. Download figure Open in new tab Fig 3. Convergence of Sobol’ indices. Sensitivity indices for the most important parameters with respect to cumulative number of cases (A, B) and deaths (C, D) respectively, shown as a function of the number of sampling points in the parameter space. To further validate the methodology, we compared the results of the sensitivity analysis with and without averaging over runs. Both analyses were conducted using 1,024 sampling points in the parameter space. The values of the Sobol’ indices did not differ significantly between the two approaches (Supplementary Figure 2 and Supplementary Figure 3), supporting the validity of using a single run per scenario in the main analysis. As the household size distribution exhibited a major impact on the cumulative number of cases, its role was studied in detail, including the changes caused by altering the algorithm used for household construction in Synthpops. The details can be found in Supplementary file 3. Sensitivity analysis for different viruses The default transmissibility in Covasim ( β = 0.016) is calibrated to approximate the spread of the original Wuhan strain. To broaden the applicability of our sensitivity analysis to other coronavirus variants and potentially to different pathogens, we repeated the analysis using two additional transmissibility values: β = 0.008 and β = 0.024 ( Fig. 4C and Fig. 4D ). Download figure Open in new tab Fig 4. Results of the sensitivity analysis with different virus transmissibility levels. A, B – first order Sobol’ indices for the cumulative number of cases and deaths, respectively, for different values of virus transmissibility; red – 0.024, orange – 0.016, green – 0.008. The results for the cumulative number of cases were largely independent of the virus transmissibility. For the cumulative number of deaths, the results for β = 0.016 and β = 0.024 were closely aligned, while those for β = 0.008 differ significantly. This distinct behavior at low transmissibility can be explained by epidemic saturation in the other two scenarios: for β = 0.024 (and in almost all cases for β = 0.016), the number of infections exceeded the total population size (10 5 ), indicating that nearly all individuals were infected at least once (Supplementary Figure 4 A). Consequently, the cumulative number of deaths was similarly distributed for these two values (Supplementary Figure 4 B). In contrast, in the case of low transmissibility ( β = 0.008), the number of cases remained below the population size in all simulations, and the number of deaths was substantially lower. As a result, the sensitivity of the outcomes to individual parameters changed under this scenario. Discussion Identifying the parameters that significantly influence model outcomes is an important step in computational epidemiological simulations. Parameters of low importance can be inferred with less precision, whereas parameters with high impact must be accurately estimated and regularly updated to ensure model reliability. By using autoencoders, we were able to analyze the Covasim model with Sobol’ indices and to investigate the importance of non-scalar population parameters, which inform parameter prioritization. Our results highlight that only a subset of demographic parameters exerts a significant influence on the model output. In particular, the household size distribution, age distribution, household contact matrix, matrix of household head age by household size, and workplace size distribution influenced both the total number of infections and the number of deaths. These findings are consistent across multiple experimental settings, including variations in virus transmissibility and different synthetic population generation methods. Notably, all of the most important parameters were vectors or matrices. This underscores the importance of the approach proposed in this study for analyzing non-scalar parameters. Many of the important parameters were related to household characteristics. This can be explained by the higher transmission efficiency of contacts within households compared to other contact layers [ 37 ]. Moreover, household features—such as age composition and size distribution—vary substantially across countries, regions, and cities, reflecting differences in socioeconomic and cultural contexts. Since these parameters exhibit substantial variability in real-world data, sampling from the latent space of autoencoders trained on such data resulted in a wide range of realistic values, thereby increasing the overall importance of these parameters in the model. The Sobol’ method of sensitivity analysis, which we used to rank the parameters by their importance, is convenient due to its straightforward implementation and the ease of interpreting the results. However, it does not account for stochasticity. To reduce stochastic effects, we used a relatively large initial number of infected individuals (30). In this setup, the difference between runs with identical inputs was a few percent in both the number of cases and deaths. When thousands of runs with shuffled parameter values were used to evaluate the Sobol’ indices, the effect of model stochasticity was negligible (Supplementary Figure 2, Supplementary Figure 3). There are some limitations to the methods and results of our study. Our sampling method assumed the input parameters to be independent, as it is a necessary condition for clear interpretation of a variance-based sensitivity analysis [ 6 ]. For real populations, this is not the case: for example, the dependence of the household head age on the household size cannot be independent of household size distribution and age structure of the population. As a result, in some cases, SynthPops was unable to construct a valid synthetic population from certain parameter combinations, and these samples had to be excluded from the analysis. In future work, this limitation can be addressed by training a single autoencoder to jointly reconstruct all demographic parameters, allowing for the generation of coherent and realistic parameter sets. The theoretical derivation and practical application of Sobol’ indices for models with correlated inputs can be found in [ 38 ]. Sampling via autoencoders benefits from the simplicity of the approach and the models’ ability to capture structural patterns in demographic parameters. Nevertheless, some information is inevitably lost in their low-dimensional representations. This study employed relatively simple architectures, leaving room for future improvements through the use of more sophisticated models. Inevitably, the results of the sensitivity analysis depend on the method of parameter sampling, including the bounds in which parameters vary. In our study, we determined these bounds based on real-life data where enough data was available. In other cases, we used the default parameter values from Covasim and applied a scaling factor selected from [0.5, 2] range. Broader ranges led to frequent failures in the construction of synthetic populations. Lastly, we didn’t vary some parameters which interact with demographic parameters and thus influence the results of the sensitivity analysis. For example, in the default settings of the Covasim contacts within the households are ten times more likely to result in infection than contacts within the workplaces (relative transmissibility of the virus is multiplied by 3 for household contacts and by 0.3 for workplace contacts). This leads to the increased sensitivity of the outputs to the household parameters in comparison with sensitivity to the workplace parameters, but this disposition could have changed if we had included these Covasim coefficients in the analysis. This constrains our estimates to a epidemiological profile of the virus. Conclusion In this study, we presented a novel autoencoder-based sampling method for multidimensional parameters and conducted a global sensitivity analysis of the agent-based epidemiological model Covasim, focusing on the demographic parameters used to construct synthetic populations. By applying Sobol’ sensitivity indices we evaluated the relative importance of scalar, vector, and matrix parameters on key epidemiological outcomes such as cumulative cases and deaths. Our results highlight that only a subset of demographic inputs —– particularly the household size distribution, age distribution, household contact matrix, matrix of household head age by household size, and workplace size distribution —– exert a dominant influence on the chosen model outputs. These findings are consistent across multiple experimental settings, including variations in virus transmissibility and different synthetic population generation methods. Due to the dominant influence of non-scalar parameters on the outputs of the model Our method can be useful for sampling realistic, data-driven parameters and is therefore particularly well-suited for researchers employing agent-based models. Data Availability All code used for this study is available online at https://github.com/tsurkisvera/sensitivity-analysis-covasim https://desapublications.un.org/publications/world-population-prospects-2024-summary-results https://zenodo.org/records/8142652 https://data.un.org/Data.aspx?d=POP&f=tableCode%3A50 https://www.oecd.org/en/publications/education-at-a-glance-2024_c00cad36-en.html https://rshiny.ilo.org/dataexplorer18/?lang=en&id=EMP_TEMP_SEX_AGE_NB_A https://ec.europa.eu/eurostat/databrowser/view/SBS_SC_SCA_R2__custom_2928090/bookmark/table?lang=en&bookmarkId=4bdbd2d1-3236-4d2f-be66-c77585a6619e Supplementary data Supplementary data are available at MedRxiv online Funding This work was supported by a subsidy from Rospotrebnadzor (The Federal Service for Surveillance on Consumer Rights Protection and Human Wellbeing) [141-02-2023-208]. Data availability The code used for the experiments is publicly available at https://github.com/tsurkisvera/sensitivity-analysis-covasim . References 1. ↵ Hunter E , Mac Namee B , Kelleher J. An open-data-driven agent-based model to simulate infectious disease outbreaks . PLOS ONE . 2018 ; 13 ( 12 ): 1 – 35 . doi: 10.1371/journal.pone.0208775 . OpenUrl CrossRef PubMed 2. ↵ Barrett CL , Bisset KR , Eubank S , Feng X , Marathe MV . EpiSimdemics: An efficient algorithm for simulating the spread of infectious disease over large realistic social networks . 2008 SC - International Conference for High Performance Computing, Networking, Storage and Analysis . 2008 ; p. 1 – 12 . 3. ↵ Grefenstette JJ , Brown ST , Rosenfeld R , DePasse J , Stone NTB , Cooley P , et al. FRED (A Framework for Reconstructing Epidemic Dynamics): an open-source software system for modeling infectious diseases and control strategies using census-based populations . BMC Public Health . 2013 ; 13 : 940 – 940 . OpenUrl PubMed 4. ↵ Frias-Martinez E , Williamson G , Frias-Martinez V. An Agent-Based Model of Epidemic Spread Using Human Mobility and Social Network Information . In: 2011 IEEE Third International Conference on Privacy, Security, Risk and Trust and 2011 IEEE Third International Conference on Social Computing ; 2011 . p. 57 – 64 . 5. Davidsson P , Logan B , Takadama K Edmonds B , Moss S. From KISS to KIDS – An ‘Anti-simplistic’ Modelling Approach . In: Davidsson P , Logan B , Takadama K , editors. Multi-Agent and Multi-Agent-Based Simulation . Berlin, Heidelberg : Springer Berlin Heidelberg ; 2005 . p. 130 – 144 . 6. ↵ Saltelli A , Annoni P , Azzini I , Campolongo F , Ratto M , Tarantola S. Variance based sensitivity analysis of model output. Design and estimator for the total sensitivity index . Comput Phys Commun . 2010 ; 181 : 259 – 270 . OpenUrl CrossRef PubMed Web of Science 7. ↵ Reed PM , Hadjimichael A , Malek K , Karimi T , Vernon CR , Srikrishnan V , et al. Addressing Uncertainty in Multisector Dynamics Research . Zenodo ; 2022 . Available from: https://uc-ebook.org . 8. ↵ Perzan Z , Babey T , Caers J , Bargar JR , Maher K. Local and Global Sensitivity Analysis of a Reactive Transport Model Simulating Floodplain Redox Cycling . Water Resources Research . 2021 ; 57 ( 12 ). doi: 10.1029/2021WR029723 . OpenUrl CrossRef 9. ↵ Morris MD . Factorial Sampling Plans for Preliminary Computational Experiments . Technometrics . 1991 ; 33 ( 2 ): 161 – 174 . doi: 10.1080/00401706.1991.10484804 . OpenUrl CrossRef Web of Science 10. ↵ Saltelli A , Annoni P. How to Avoid a Perfunctory Sensitivity Analysis . Environmental modelling and software . 2010 ; 25 ( 12 ): 1508 – 1517 . doi: 10.1016/j.envsoft.2010.04.012 . OpenUrl CrossRef 11. ↵ Sobol IM . Sensitivity Estimates for Nonlinear Mathematical Models . Mathematical Modelling and Computational Experiments . 1993 ; 1 ( 4 ): 407 – 414 . OpenUrl 12. ↵ Sobol IM . Global sensitivity indices for nonlinear mathematical models and their Monte Carlo estimates . Mathematics and Computers in Simulation . 2001 ; 55 ( 1-3 ): 271 – 280 . OpenUrl CrossRef Web of Science 13. ↵ Cukier RI , Fortuin CM , Shuler KE , Petschek AG , Schaibly JH . Study of the sensitivity of coupled reaction systems to uncertainties in rate coefficients . I Theory. Journal of Chemical Physics . 1973 ; 59 : 3873 – 3878 . OpenUrl 14. ↵ Garg A , Yuen S , Seekhao N , Yu G , Karwowski J , Powell M , et al. Towards a Physiological Scale of Vocal Fold Agent-Based Models of Surgical Injury and Repair: Sensitivity Analysis, Calibration and Verification . Applied Sciences . 2019 ; 9 . doi: 10.3390/app9152974 . OpenUrl CrossRef 15. ↵ Nossent J , Elsen P , Bauwens W. Sobol’ sensitivity analysis of a complex environmental model . Environmental Modelling and Software . 2011 ; 26 : 1515 – 1525 . OpenUrl 16. ↵ Thill JC , Dragicevic S Ligmann-Zielinska A. GeoComputational Analysis and Modeling of Regional Systems . In: Thill JC , Dragicevic S , editors. ‘Can You Fix It?’ Using Variance-Based Sensitivity Analysis to Reduce the Input Space of an Agent-Based Model of Land Use Change . Cham : Springer International Publishing ; 2018 . p. 77 – 99 . Available from : doi: 10.1007/978-3-319-59511-5_6 . OpenUrl CrossRef 17. ↵ Lu X , Borgonovo E. Global Sensitivity Analysis in Epidemiological Modeling . European Journal of Operational Research . 2021 ; 304 . doi: 10.1016/j.ejor.2021.11.018 . OpenUrl CrossRef PubMed 18. ↵ Rumelhart DE , Hinton GE , Williams RJ , University of California SDIfCS . Learning Internal Representations by Error Propagation . ICS report. Institute for Cognitive Science, University of California, San Diego ; 1985 . Available from: https://books.google.ru/books?id=Ff9iHAAACAAJ . 19. ↵ Lu X , Meng J , Wang H , Zhou Y , Zhou J , Ruan X , et al. DNA replication stress stratifies prognosis and enables exploitable therapeutic vulnerabilities of HBV-associated hepatocellular carcinoma: An ¡i¿in-silico¡/i¿ precision oncology strategy . The Innovation Medicine . 2023 ; 1 ( 1 ): 100014 . doi: 10.59717/j.xinn-med.2023.100014 . OpenUrl CrossRef 20. ↵ Abdolhosseini F , Azarkhalili B , Maazallahi A , Kamal A , Motahari SA , Sharifi-Zarchi A , et al. Cell Identity Codes: Understanding Cell Identity from Gene Expression Profiles using Deep Neural Networks . Scientific Reports . 2018 ; 9 . 21. ↵ Gao W , Mahajan S , Gray J. Deep Learning in Protein Structural Modeling and Design . Patterns . 2020 ; 1 : 100142 . doi: 10.1016/j.patter.2020.100142 . OpenUrl CrossRef PubMed 22. ↵ Kerr CC , Stuart RM , Mistry D , Abeysuriya RG , Rosenfeld K , Hart GR , et al. Covasim: An agent-based model of COVID-19 dynamics and interventions . PLOS Computational Biology . 2021 ; 17 ( 7 ): 1 – 32 . doi: 10.1371/journal.pcbi.1009149 . OpenUrl CrossRef 23. ↵ Stuart R , Abeysuriya R , Kerr C , Mistry D , Klein D , Gray R , et al. Role of masks, testing and contact tracing in preventing COVID-19 resurgences: a case study from New South Wales, Australia . BMJ Open . 2021 ; 11 : e045941 . doi: 10.1136/bmjopen-2020-045941 . OpenUrl CrossRef PubMed 24. ↵ Panovska-Griffiths J , Kerr C , Stuart R , Mistry D , Klein D , Viner R , et al. Determining the optimal strategy for reopening schools, the impact of test and trace interventions, and the risk of occurrence of a second COVID-19 epidemic wave in the UK: a modelling study . The Lancet Child and Adolescent Health . 2020 ; 4 . doi: 10.1016/S2352-4642(20)30250-9 . OpenUrl CrossRef PubMed 25. ↵ Maslova II , Manolov AI , Glushchenko OE , Kozlov IE , Tsurkis VI , Popov NS , et al. Limitations in creating artificial populations in agent-based epidemic modeling: a systematic review . Journal of microbiology, epidemiology and immunobiology . 2024 ; 101 ( 4 ): 530 – 545 . OpenUrl 26. ↵ Mistry D , Kerr CC , Abeysuriya R , Wu M , Fisher M , Thompson A , et al. SynthPops: a generative model of human contact networks; 2021 - 2025 . Available from: https://github.com/synthpops/synthpops . 27. ↵ Efron B , Stein CM . The Jackknife Estimate of Variance . Annals of Statistics . 1981 ; 9 : 586 – 596 . OpenUrl CrossRef 28. ↵ Herman J , Usher W. SALib: An open-source Python library for Sensitivity Analysis . The Journal of Open Source Software . 2017 ; 2 ( 9 ). doi: 10.21105/joss.00097 . OpenUrl CrossRef 29. ↵ Iwanaga T , Usher W , Herman J. Toward SALib 2.0: Advancing the accessibility and interpretability of global sensitivity analyses . Socio-Environmental Systems Modelling . 2022 ; 4 : 18155 . doi: 10.18174/sesmo.18155 . OpenUrl CrossRef 30. Prem K , Cook AR , Jit M. Projecting social contact matrices in 152 countries using contact surveys and demographic data . PLOS Computational Biology . 2017 ; 13 ( 9 ): 1 – 21 . doi: 10.1371/journal.pcbi.1005697 . OpenUrl CrossRef 31. United Nations Department of Economic and Social Affairs, Population Division . World Population Prospects 2024: Summary of Results . United Nations ; 2024 . Available from: https://desapublications.un.org/publications/world-population-prospects-2024-summary-results . 32. Galeano J , Esteve A , Turu A , Joan G , Becca F , Fang H , et al. CORESIDENCE: National and subnational data on household size and composition around the world, 1964–2021 . Scientific Data . 2024 ; 11 . doi: 10.1038/s41597-024-02964-3 . OpenUrl CrossRef 33. United Nations Statistics Division . Households by Age and Sex of Reference Person and by Size of Household ; 2024 . Demographic Statistics Database . Available from: https://data.un.org/Data.aspx?d=POP&f=tableCode%3A50 . 34. OECD . Education at a Glance 2024: OECD Indicators . Paris : OECD Publishing ; 2024 . Available from : doi: 10.1787/c00cad36-en . OpenUrl CrossRef 35. International Labour Organization . ILOSTAT: Employment by Sex and Age (Annual) ; 2024 . ILOSTAT Database . Available from: https://rshiny.ilo.org/dataexplorer18/?lang=en&id=EMP_TEMP_SEX_AGE_NB_A . 36. Eurostat . Annual Enterprise Statistics by Size Class for Special Aggregates of NACE Rev.2 Activities (2005-2020) ; 2024 . Eurostat Database . Available from: https://ec.europa.eu/eurostat/databrowser/view/SBS_SC_SCA_R2__custom_2928090/bookmark/table?lang=en&bookmarkId=4bdbd2d1-3236-4d2f-be66-c77585a6619e . 37. ↵ Bi Q , Lessler J , Eckerle I , Lauer S , Kaiser L , Vuilleumier N , et al. Insights into household transmission of SARS-CoV-2 from a population-based serological survey . Nature Communications . 2021 ; 12 . doi: 10.1038/s41467-021-23733-5 . OpenUrl CrossRef PubMed 38. ↵ Mara T , Tarantola S , Annoni P. Non-parametric methods for global sensitivity analysis of model output with dependent inputs . Environmental modelling and software . 2015 ; 72 : 173 – 183 . doi: 10.1016/j.envsoft.2015.07.010 . OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted October 17, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Demographic Drivers of Epidemic Outcomes: Sensitivity Analysis of Multidimensional Parameters in the Covasim Model Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Demographic Drivers of Epidemic Outcomes: Sensitivity Analysis of Multidimensional Parameters in the Covasim Model Vera Tsurkis , Ivan Kozlov , Andrei Samoilov , Irina Maslova , Elena Ilina , Alexander Lukashev , Alexander Manolov medRxiv 2025.10.14.25328629; doi: https://doi.org/10.1101/2025.10.14.25328629 Share This Article: Copy Citation Tools Demographic Drivers of Epidemic Outcomes: Sensitivity Analysis of Multidimensional Parameters in the Covasim Model Vera Tsurkis , Ivan Kozlov , Andrei Samoilov , Irina Maslova , Elena Ilina , Alexander Lukashev , Alexander Manolov medRxiv 2025.10.14.25328629; doi: https://doi.org/10.1101/2025.10.14.25328629 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Epidemiology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4538) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (541) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3333) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00d72b5ada952ad',t:'MTc3OTYzNzc0Mg=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00