A multicentre study to predict COVID-19 outbreaks in long-term care homes using wastewater surveillance and environmental surface sampling for SARS-CoV-2

doi:10.1101/2025.10.20.25337094

A multicentre study to predict COVID-19 outbreaks in long-term care homes using wastewater surveillance and environmental surface sampling for SARS-CoV-2

2025 · doi:10.1101/2025.10.20.25337094

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 37,033 characters · extracted from preprint-html · click to expand

A multicentre study to predict COVID-19 outbreaks in long-term care homes using wastewater surveillance and environmental surface sampling for SARS-CoV-2 | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search A multicentre study to predict COVID-19 outbreaks in long-term care homes using wastewater surveillance and environmental surface sampling for SARS-CoV-2 Jason A. Moggridge , Kritisha Acharya , Derek R. MacFadden , Alex Wong , Rees Kassen , Caroline Nott , Gustavo Ybazeta , David S. Guttman , Lucas Castellani , Michael Fralick doi: https://doi.org/10.1101/2025.10.20.25337094 Jason A. Moggridge 1 Sinai Health System , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kritisha Acharya 1 Sinai Health System , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Derek R. MacFadden 2 Department of Medicine, University of Ottawa , Ottawa, Ontario, Canada 3 Ottawa Hospital Research Institute , Ottawa, Ontario, Canada 4 The Ottawa Hospital , Ottawa, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alex Wong 5 Department of Biology, Carleton University , Ottawa, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rees Kassen 6 Department of Biology, University of Ottawa , Ottawa, Ontario, Canada 7 Department of Biology, McGill University , Montreal, Quebec, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Caroline Nott 2 Department of Medicine, University of Ottawa , Ottawa, Ontario, Canada 3 Ottawa Hospital Research Institute , Ottawa, Ontario, Canada 4 The Ottawa Hospital , Ottawa, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Gustavo Ybazeta 8 Health Sciences North Research Institute , Sudbury, Ontario, Canada 9 NOSM University , Sudbury, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site David S. Guttman 10 Centre for the Analysis of Genome Evolution and Function, University of Toronto , Toronto, Ontario, Canada 11 Department of Cell & Systems Biology, University of Toronto , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lucas Castellani 9 NOSM University , Sudbury, Ontario, Canada 12 Sault Area Hospital , Sault Ste. Marie, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Michael Fralick 1 Sinai Health System , Toronto, Ontario, Canada 13 Department of Medicine, University of Toronto , Toronto, Ontario, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: mike.fralick{at}mail.utoronto.ca Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF ABSTRACT Background Floor swabs can be an effective environmental sampling method for proactive SARS-CoV-2 surveillance in congregate settings like long-term care homes (LTCHs). Concurrent assessment of additional variables such as wastewater surveillance data and weather data have the potential to improve the predictive performance of this approach. Methods We analyzed existing data from 5,095 floor swabs collected between August 2021 and January 2023 from 10 LTCHs across three cities in Ontario, Canada: Ottawa, Toronto, and Sault Ste. Marie. Floors were swabbed weekly at each LTCH. Swabs were analyzed using RT-qPCR. Wastewater data was obtained from the Ontario Wastewater Surveillance Consortium’s repository; we included one treatment plant for each city. Weather data was sourced from Environment Canada, with one station selected from each city. Logistic regression, LASSO-penalized logistic regression, Random Forest, and XGBoost were used for COVID-19 outbreak predictions using different subsets of predictors with leave-one-LTCH-out cross-validation. SHAP values were computed for model explainability. Our outcome of interest was a COVID-19 outbreak within an LTCH. Results Over the study period, 25 COVID-19 outbreaks occurred in the participating LTCHs, with a median duration of 30 days and a median of 39 cases per outbreak (range 2 to 196). LASSO generally out-performed logistic regression, Random Forest, and XGBoost. The two variables with the highest SHAP values were log transformed 7-day mean wastewater and log viral copies from floor swabs. Conclusions Incorporating wastewater data and weather data enhanced the ability of floor swab results to predict an outbreak of COVID-19 in an LTCH. Future studies are needed to evaluate how well the model performs when implemented into practice. INTRODUCTION Residents of long term care homes (LTCHs) were disproportionately affected by COVID-19, and experienced high rates of morbidity and mortality. 1 , 2 At the beginning of the pandemic, the mortality rate from COVID-19 outbreaks in Canadian LTCHs exceeded 30%, with LTC residents comprising 81% of all COVID-19 deaths in Canada. 3 , 4 The outbreaks also had other adverse effects. For example, there was limited access to outpatient services or visiting healthcare professionals for residents, restrictions on community groups and day programs, and social isolation. To mitigate the impact of COVID-19 outbreaks on LTCHs, consistent and proactive disease surveillance is crucial in predicting and preventing future outbreaks. While active clinical testing is a commonly used approach, it is costly and hard to sustain. 5 Passive surveillance, like environmental detection, offers a non-invasive option for congregate settings. Our team has validated a method of built environment sampling for SARS-CoV-2 in which swab samples are taken from floors and processed using reverse transcription quantitative polymerase chain reaction (RT-qPCR). 6 Our largest study was a 14-month, multicentre prospective study at 10 LTCHs in Ontario, which yielded three main findings. 7 First, the percentage of floor swabs detecting SARS-CoV-2 can effectively rule out an active outbreak. Second, the percentage of floor swabs positive for SARS-CoV-2 rises days, and sometimes weeks, before an outbreak is identified. Lastly, floor swabs provide spatial resolution to identify the areas in the LTCH where COVID-19 cases occur. Our data suggest that floor swabs can serve as a proactive surveillance method. However, this analytic approach is primarily descriptive, and while viral load is a strong predictor of COVID-19 burden, each LTCH also has specific factors that can either contribute to or hinder the transmission of SARS-CoV-2. In addition, prior studies have shown that weather (e.g., temperature, wind speed) are associated with the spread of COVID-19. 8 , 9 For example, a recent review article highlighted that multiple studies have shown a negative correlation between ambient temperature and number of COVID-19 cases. 9 The objective of our current study was to include additional predictors such as local wastewater data and weather to identify whether these variables improved model performance. METHODS Study Design To enhance the predictive accuracy of detecting COVID-19 outbreaks in LTCHs, we utilized our existing floor swab RT-qPCR results with supplementary data from regional wastewater monitoring and weather stations. Our study was reviewed by the research ethics board at the University of Ottawa and received a waiver because the samples collected are not human (as discussed in Article 2.1 of the Tri-Council Policy Statement: Ethical Conduct for Research Involving Humans [TCPS 2]), and any demographic data are either: (1) publicly available through a mechanism set out by legislation or regulation and that is protected by law; or (2) they are in the public domain and the individuals to whom the information refers have no reasonable expectation of privacy (see Article 2.2 of the TCPS 2). Data Sources Between August 2021 and January 2023, our team collected and assayed by RT-qPCR 5,095 floor swabs from 10 LTCHs across three cities in Ontario, Canada: Ottawa, Toronto, and Sault Ste. Marie. Floors were swabbed weekly in the same interior locations during each visit. Each LTCH was blinded to the results. All other methods related to floor swab collection, RNA extraction, and RT-qPCR testing have been described previously. 7 Results for each collection date were aggregated as the rate of positive tests (positivity) and the geometric mean number of SARS-CoV-2 RNA copies detected plus one. The pseudocount of one copy was added to each value of viral copies to enable logarithmic transformation of these data (as the logarithm of zero is undefined) for summary statistics, visualization, and modelling. Confirmed COVID-19 outbreaks in LTCHs were defined as “two or more lab-confirmed COVID-19 cases in residents, staff or other visitors in a home, with an epidemiological link, within a 14-day period, where at least one case could have reasonably acquired their infection in the LTC home” as per provincial guidance in Ontario set before the start of the sample collection period. 10 The start date, end date, and number of COVID-19 cases for residents and staff were recorded for each outbreak. Over the study period, 25 outbreaks occurred, with a median duration of 30 days and a median of 39 cases per outbreak (range 2 to 196). We accessed historical SARS-CoV-2 wastewater data from the areas surrounding the participating LTCHs. The wastewater sampling frequency (both over time and among sites) ranged from biweekly to daily, with each sampling instance reporting wastewater viral concentrations. All wastewater data was obtained from the Ontario Wastewater Surveillance Consortium’s repository. 11 We selected results from the relevant treatment plant (TP) sites for each city: “Ottawa WWTP” (ROPEC TP facility) in Ottawa, “AB” (Ashbridges Bay TP) in Toronto, and “TPSSM” in Sault Ste. Marie. All wastewater results were from qPCR assays of solid fraction samples. The mean concentration of N1 and N2 gene targets was normalized against the concentration of pepper mild mottle virus (PMMoV) to compute daily relative SARS-CoV-2 proportions for each city. These daily relative proportions were used to calculate 7-day endpoint averages (with missing values ignored) for modelling. We used the last-observation-carried-forward method to replace missing values for a small number of dates with an undefined rolling average. Weather data were downloaded from Environment Canada. 12 We selected a single, representative weather station for each city. Station names and IDs for these were OTTAWA CDA RCS (#30578), TORONTO INTL A (#51459), and SAULT STE MARIE A (#50092). We collected data for the following variables (units): daily mean temperature (°C), total precipitation (mm), average relative humidity (%), speed (kph) and direction (in degrees) of the maximum wind gust, and barometric pressure (mbar). We computed each variable’s 7-day endpoint rolling averages and 7-day spreads (i.e., max. value - min. value); however, spread was not computed for wind direction. These data were used as predictors for modelling COVID-19 outbreaks in LTCHs. Study Outcome and Covariates The primary outcome variable was the presence of a COVID-19 outbreak within an LTCH as determined by local public health guidelines. Model covariates include floor swab results (i.e., test positivity and viral copies recovered), viral detection in wastewater, and weather data. Statistical Analysis All statistical analyses were performed using the R programming language (v4.4.1). We computed descriptive statistics, as well as Pearson correlation coefficients between each covariate, and between covariates and COVID-19 outbreaks. Confidence intervals for continuous variables were calculated using the Wald method, and the Agresti-Coull method was used for confidence intervals of binary variables. Finally, we evaluated various supervised machine learning classifiers for outbreak discrimination, with outbreak status modelled as a binary outcome and concurrent results from LTCH floor swabs, regional wastewater testing, and weather records used as predictors. Supervised Learning Methods We evaluated classifiers created using several different supervised learning methods, including logistic regression using the glm package, LASSO regression using the glmnet package (abbreviated as LR in this work), Random Forest using the ranger package (RF), and boosted trees with the xgboost package (XGB). Classifiers were evaluated by leave-one-LTCH-out cross-validation (k=10 LTCHs), where each fold corresponded to the set of observations from a single LTCH, and each of the ten iterations had a different set of observations from a single LTCH held out for testing. For each iteration in cross-validation, we applied a data pre-processing in which categorical predictors were converted to dummy variables, centring and scaling was performed for all numeric predictors, and any zero-variance variables were dropped from the dataset. To prevent data leakage, learned pre-processor settings (specifically, means and standard deviations for centring and scaling) were determined in each iteration from the training examples only. Optimal hyperparameters for each method were determined in cross-validation. For LR, we tuned the LASSO regularization penalty λ; for RF, we searched for the optimal combination of mtry , the number of predictors sampled at each split and min_n , the minimum number of observations required per node for further splitting; for XGB, we tuned mtry, min_n , tree-depth, learning-rate (shrinkage), loss-reduction required for further splits, sample-size used for fitting, and the number of iterations with improvement before stopping. For LR and RF, we used a grid-search with 250 and 25 different hyperparameter settings, respectively. For XGB, we used a Latin hypercube search with 100 iterations, due to the larger number of hyperparameters requiring tuning. To assess the performance of our predictive model, we chose key metrics such as the area under the receiver operating curve (AUROC), accuracy, and Brier score. Model calibration was assessed by plotting the predicted probability of an outbreak against the observed probability of an outbreak. Receiver operator curves were plotted by varying the alarm threshold for the predicted probability from 0.1% to 99.9%. Additionally, we have provided SHAP (SHapley Additive exPlanations) values to estimate the relative importance of each included variable to aid in model interpretability. 13 , 14 These were calculated using the kernelshap package. RESULTS We collected and assayed 5,095 floor samples from 10 LTCHs across Ottawa, Toronto, and Sault Ste. Marie in Ontario, Canada between August 2021 and January 2023 ( Table 1 ). The number of samples collected per facility ranged from 148 to 1,157. This variability is attributed to the size of the LTCH and the duration of the monitoring period. The monitoring periods at different LTCHs ranged from 112 days (with 15 sample collection visits) to 428 days (with 56 visits). Overall, SARS-CoV-2 RNA was detected by RT-qPCR in 1,808 of 5,095 samples (35.5%, 95% CI: 34.0, 37.0%), and the geometric mean number of genomic copies recovered per assay (plus one) was 2.57 (95% CI: 2.5, 2.7 copies). Aggregate SARS-CoV-2 RNA detection rates varied among LTCHs from 10.8% (CI: 6.7, 17.0%) for the LTCH with the least test positivity to 57.5% (CI: 54.0, 61.0%) for the LTCH with the greatest positivity. The aggregate geometric mean number of viral copies (plus one) detected in all samples varied from 1.14 (CI: 1.07, 1.2) to 6.3 (CI: 5.5, 7.1) among LTCHs. Characteristics of the LTCH buildings have been previously described. 7 View this table: View inline View popup Download powerpoint Table 1: Summary statistics for floor swabs at each long-term care home. Description of wastewater SARS-CoV-2 monitoring and weather records Daily regional wastewater signals relative to PMMoV were smoothed for modelling using a 7-day endpoint rolling average. Daily wastewater detection of SARS-CoV-2 was greater at Sault Ste. Marie (median: 0.0038 copies / PMMoV; IQR: 0.0015, 0.0077) than Toronto (3.1 × 10 -4 ; IQR: 6 × 10 -5 , 8.5 × 10 -4 ) or Ottawa (1.3 × 10 -4 , IQR: 6.12 × 10 -5 , 2.8 × 10 -4 ), though the sampling periods were substantially different for each city ( Table 2 ). View this table: View inline View popup Download powerpoint Table 2. City-level characteristics relevant to multiple long-term care homes, including SARS-CoV-2 concentrations in wastewater and weather variables. We collected weather data from three Environment Canada stations near the 10 LTCHs ( Table 2 ). We used temperature, precipitation, and wind statistics from the daily records and calculated daily statistics for barometric pressure and relative humidity using hourly observations. Toronto had the highest average daily temperatures (min = 1.2°C, mean = 5.6°C, and max = 10.1°C), followed by Sault Ste. Marie (min = -1.7°C, mean = 3.6°C, max = 9.0°C) and Ottawa (min = - 10.8°C, mean = -5.8°C, max = -0.8°C). Toronto was also the windiest, with an average daily maximum wind gust speed of 43.9 km/h, followed by Sault Ste. Marie (42.1 km/h) and Ottawa (37.5 km/h). Sault Ste. Marie had the largest average daily precipitation with 2.6 mm, followed by Toronto (2.4 mm) and Ottawa (1.8 mm). Ottawa had the highest average hourly barometric pressure (100.7 kPa), followed by Toronto (99.5 kPa) and Sault Ste. Marie (99.2 kPa). Sault Ste. Marie had the greatest average hourly relative humidity (79.4%), followed by Ottawa (72.8%) and Toronto (71.2%). Pairwise Correlations We computed Pearson correlations for all pairs of variables used in modelling (Suppl. Figure S2). Significant correlations were found between floor swab positivity and viral copies (r = 0.91), floor swab positivity and wastewater signal (7-day rolling endpoint average; r = 0.45), and floor swab positivity and daily temperature range (7-day rolling endpoint average; r = 0.3). Floor swab viral copies were also significantly correlated with wastewater signal (r = 0.52), as well as temperature range (r = 0.31). Wastewater signal was positively correlated with daily temperature range (r = 0.23), daily wind speed range (7-day endpoint average; r = 0.15), but negatively correlated with barometric pressure (r = -0.51). ML Model performance We evaluated outbreak discrimination by single logistic regression with three different predictor variables to establish benchmarks for machine-learning models ( Figure 1 ). Floor swab qPCR results provided the best predictors of outbreaks in leave-one-LTCH-out cross-validation: floor swab viral copies had an AUROC of 0.82 ± 0.05, accuracy of 0.72 ± 0.05, and Brier score of 0.19 ± 0.03, while floor swab test positivity had similar performance (AUROC: 0.81 ± 0.04; accuracy: 0.73 ± 0.04; Brier: 0.19 ± 0.03). Regional wastewater (as 7-day rolling endpoint average) had comparable AUROC (0.82 ± 0.05) but worse accuracy (0.62 ± 0.05) and Brier score (0.23 ± 0.03) than the floor-swab–derived measures. Download figure Open in new tab Figure 1. AUROC results for different combinations of machine learning methods and subsets of predictors, evaluated by leave-one-LTCH-out cross-validation (k=10 sites). SC2 = SARS-CoV-2, PMMoV = pepper mild mottle virus, 7d = 7-day, LTCH = long-term care home, AUROC = area under the receiver operating curve We evaluated more complex predictive models using three different machine learning methods (LASSO logistic regression, Random Forest, and XGBoost) and various subsets of available predictors, including aggregate floor swab test results with and without dummy variables for different LTCH sites, regional wastewater signals, and weather variables ( Figure 1 ; Suppl. Table S2). A LASSO regression using only floor swab positivity and viral copies with dummy variables for different LTCHs performed similarly to the logistic regression with either predictor alone, with AUROC of 0.81 ± 0.05. Random Forest and XGBoost with the same predictors performed slightly worse (AUROC 0.79 ± 0.04 and 0.80 ± 0.04, respectively). Including wastewater as an additional predictor significantly improved the performance of LASSO (0.85 ± 0.04) and XGBoost (0.87 ± 0.03) models. Maximal performance in terms of AUROC occurred when swab results, weather, and wastewater variables were used with LTCH dummy variables, with LASSO performing slightly better than the other two methods (LASSO: 0.88 ± 0.03; RF: 0.86 ± 0.03; XGB: 0.87 ± 0.03). Overall, including additional weather and wastewater predictors yielded a slight improvement in outbreak prediction compared to simple logistic regression with either floor swab test positivity or viral copies as a single predictor (Suppl. Table S2). We selected the LASSO model that maximized AUROC in cross-validation as the “best” model and performed analysis of SHAP values. For this model, the mean absolute SHAP values were (from largest to smallest) wastewater 7-day rolling average (0.165), LTCH dummy variables (0.134), floor swab viral copies (0.127), floor swab positivity (0.048), wind direction (0.040), wind speed (0.023), mean temperature (0.020), relative humidity (0.010), mean barometric pressure (0.007), and finally weather variables that appear to be low utility predictors having the same 0.003 mean SHAP value: wind speed range, temperature range, precipitation, relative humidity range, and barometric pressure range ( Figure 2 ). Download figure Open in new tab Figure 2. SHAP values for each predictor of outbreak status from the selected LASSO model. DISCUSSION In this data analysis from a multicentre prospective study across 10 LTCs, 7 the combination of floor swabs, weather data, and wastewater data could accurately predict a COVID-19 outbreak. The predictions were generated with three of the most commonly used supervised machine learning techniques. The results highlight how the combination of advanced analytic approaches and unique data sources can be leveraged to predict outbreaks of COVID-19. Over the course of the pandemic, wastewater has received significant attention and funding for its role in estimating disease burden in a given region. Its primary use case has been for surveillance, as opposed to real-time prediction and prevention, for three main reasons. First, in general there is a lag of days (and sometimes weeks) between collection of wastewater samples and availability of results. Second, most often wastewater results are reported on a regional level, and it is rare for results to be available for an individual LTCH or hospital. As a result, wastewater generally lacks spatial resolution; although we acknowledge it is technically possible for an individual building to have its own dedicated wastewater surveillance. In contrast, floor swabbing has lower resource demands and can provide spatial resolution to the level of a single room within a building. A recent pilot demonstrated that floor swab results can be available within 48 hours of a building being swabbed. 15 However, a wider implementation study of floor sampling for SARS-CoV-2 outbreak prediction and prevention has not yet been conducted. The results of this current study suggest that the addition of weather and wastewater data can improve the predictive performance of floor swabs. Our study has four main strengths. First, it was multicentre and conducted in both urban and community settings. Second, it incorporated diverse datasets and provided estimates for the relative importance of each of the included variables. Third, it applied supervised machine learning techniques which are designed for prediction. Fourth, it represents one of the few studies that directly compared wastewater sampling to environmental surface sampling (i.e., floor swabbing). Our study also has important limitations. First, while our model was built with both prospective data (e.g., floor sampling) and retrospective data (e.g., wastewater data), a future prospective study is needed to identify whether acting on these predictions can mitigate the size and scope of an outbreak. Second, our study included both urban and community settings, but it is unknown how our results might generalize to rural settings or regions with less climate variability. Third, our study was restricted to LTCHs, and thus it is unknown how results might generalize to other congregate settings such as retirement homes or acute care hospitals. Fourth, we did not have access to data on other respiratory viruses such as influenza or respiratory syncytial virus (RSV). These viruses commonly cause outbreaks and have high mortality rates in LTCHs, which underscores the importance of surveillance methods that consider multiple viruses in addition to SARS-CoV-2. Fifth, we did not conduct a cost-effectiveness study to help assess whether this approach is affordable or sustainable for a LTCH. In closing, the results of our study suggest that incorporating wastewater data and weather data enhances the ability of floor swab results to predict a COVID-19 outbreak in an LTCH. Future work is needed to evaluate whether acting on such results can mitigate the size and scope of COVID-19 outbreaks in LTCHs. Data Availability Results of environmental surveillance by swabbing location cannot be shared due to data and privacy agreements. All wastewater data were obtained from the Ontario Wastewater Surveillance Consortium's publicly available repository at https://github.com/OntarioWastewaterSurveillanceConsortium/sars-cov-2-data . Weather data were downloaded from Environment Canada's publicly available Historical Climate Data database at https://climate.weather.gc.ca/ . https://github.com/OntarioWastewaterSurveillanceConsortium/sars-cov-2-data https://climate.weather.gc.ca/ Footnotes Study concept and design: All authors Analysis/interpretation of data: All authors Drafting of the manuscript: Moggridge JA, Fralick M, Acharya K Critical revision of the manuscript: All authors Statistical analysis: Moggridge JA Obtained funding: This study was supported by grant number DSI-CGY3R1P35 from the Data Sciences Institute at the University of Toronto. References 1. ↵ Clarke J. Impacts of the COVID-19 Pandemic in Nursing and Residential Care Facilities in Canada . Statistics Canada ; 2021 . Accessed October 31, 2023 . https://www150.statcan.gc.ca/n1/pub/45-28-0001/2021001/article/00025-eng.htm 2. ↵ Stall NM , Brown KA , Maltsev A. COVID-19 and Ontario’s long-term care homes . Journal of Elder Policy . 2021 ; 1 ( 3 ). https://www.journalofelderpolicy.org/covid-19-and-ontario.html 3. ↵ Canadian Institute for Health Information . Pandemic experience in the long-term care sector: how does Canada compare with other countries? Ottawa, ON . Published online 2020. 4. ↵ Fisman DN , Bogoch I , Lapointe-Shaw L , McCready J , Tuite AR . Risk Factors Associated With Mortality Among Residents With Coronavirus Disease 2019 (COVID-19) in Long-term Care Facilities in Ontario, Canada . JAMA Netw Open . 2020 ; 3 ( 7 ): e2015957 . OpenUrl 5. ↵ Li Y , Lu SM , Wang JL , Yao HP , Liang LG . Progress in SARS-CoV-2, diagnostic and clinical treatment of COVID-19 . Heliyon . 2024 ; 10 ( 12 ): e33179 . OpenUrl 6. ↵ Hinz A , Xing L , Doukhanine E , et al. SARS-CoV-2 detection from the built environment and wastewater and its use for hospital surveillance . Facets (Ott) . 2022 ; 7 : 82 – 97 . OpenUrl 7. ↵ Fralick Michael , Nott Caroline , Moggridge Jason , et al. Detection of Covid-19 Outbreaks Using Built Environment Testing for SARS-CoV-2 . NEJM Evidence . 2023 ; 2 ( 3 ): EVIDoa2200203 . OpenUrl 8. ↵ Ganslmeier M , Furceri D , Ostry JD . The impact of weather on COVID-19 pandemic . Sci Rep . 2021 ; 11 ( 1 ): 22027 . OpenUrl PubMed 9. ↵ Paraskevis D , Kostaki EG , Alygizakis N , et al. A review of the impact of weather and climate variables to COVID-19: In the absence of public health measures high temperatures cannot probably mitigate outbreaks . Sci Total Environ . 2021 ; 768 ( 144578 ): 144578 . OpenUrl PubMed 10. ↵ Ontario Ministry of Long-Term Care . Long-Term Care Home COVID-19 Data . March 30, 2023 . Accessed January 7, 2025 . https://data.ontario.ca/en/dataset/long-term-care-home-covid-19-data 11. ↵ SARS-CoV-2 Data Repository for the Ontario Wastewater Surveillance Consortium . Ontario Wastewater Surveillance Consortium (OWSC) . Accessed October 1, 2024 . https://github.com/OntarioWastewaterSurveillanceConsortium/sars-cov-2-data 12. ↵ Historical Climate Data . Government of Canada . Accessed October 31, 2023 . https://climate.weather.gc.ca/ 13. ↵ El Shawi R , Sherif Y , Al-Mallah M , Sakr S. Interpretability in HealthCare A Comparative Study of Local Machine Learning Interpretability Techniques . In: 2019 IEEE 32nd International Symposium on Computer-Based Medical Systems (CBMS) . IEEE ; 2019 : 275 – 280 . 14. ↵ Lundberg S , Lee SI . A unified approach to interpreting model predictions . arXiv [csAI] . Published online May 22, 2017. http://arxiv.org/abs/1705.07874 15. ↵ Siddiqui H , Hicks AMA , Hinz A , et al. Environmental surveillance of SARS-CoV-2 for outbreak detection in hospital: A single centre prospective study . J Clin Virol Plus . 2025 ; 5 ( 1 ): 100199 . OpenUrl View the discussion thread. Back to top Previous Next Posted October 28, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following A multicentre study to predict COVID-19 outbreaks in long-term care homes using wastewater surveillance and environmental surface sampling for SARS-CoV-2 Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share A multicentre study to predict COVID-19 outbreaks in long-term care homes using wastewater surveillance and environmental surface sampling for SARS-CoV-2 Jason A. Moggridge , Kritisha Acharya , Derek R. MacFadden , Alex Wong , Rees Kassen , Caroline Nott , Gustavo Ybazeta , David S. Guttman , Lucas Castellani , Michael Fralick medRxiv 2025.10.20.25337094; doi: https://doi.org/10.1101/2025.10.20.25337094 Share This Article: Copy Citation Tools A multicentre study to predict COVID-19 outbreaks in long-term care homes using wastewater surveillance and environmental surface sampling for SARS-CoV-2 Jason A. Moggridge , Kritisha Acharya , Derek R. MacFadden , Alex Wong , Rees Kassen , Caroline Nott , Gustavo Ybazeta , David S. Guttman , Lucas Castellani , Michael Fralick medRxiv 2025.10.20.25337094; doi: https://doi.org/10.1101/2025.10.20.25337094 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Public and Global Health Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15228) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6598) Geriatric Medicine (668) Health Economics (997) Health Informatics (4536) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9230) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a004eaedc97b2858',t:'MTc3OTU0ODI5NQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00