‘Not finding causal effect’ is not... | F1000Research "use strict";function _typeof(t){return(_typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}!function(){var t=function(){var t,e,o=[],n=window,r=n;for(;r;){try{if(r.frames.__tcfapiLocator){t=r;break}}catch(t){}if(r===n.top)break;r=r.parent}t||(!function t(){var e=n.document,o=!!n.frames.__tcfapiLocator;if(!o)if(e.body){var r=e.createElement("iframe");r.style.cssText="display:none",r.name="__tcfapiLocator",e.body.appendChild(r)}else setTimeout(t,5);return!o}(),n.__tcfapi=function(){for(var t=arguments.length,n=new Array(t),r=0;r 3&&2===parseInt(n[1],10)&&"boolean"==typeof n[3]&&(e=n[3],"function"==typeof n[2]&&n[2]("set",!0)):"ping"===n[0]?"function"==typeof n[2]&&n[2]({gdprApplies:e,cmpLoaded:!1,cmpStatus:"stub"}):o.push(n)},n.addEventListener("message",(function(t){var e="string"==typeof t.data,o={};if(e)try{o=JSON.parse(t.data)}catch(t){}else o=t.data;var n="object"===_typeof(o)&&null!==o?o.__tcfapiCall:null;n&&window.__tcfapi(n.command,n.version,(function(o,r){var a={__tcfapiReturn:{returnValue:o,success:r,callId:n.callId}};t&&t.source&&t.source.postMessage&&t.source.postMessage(e?JSON.stringify(a):a,"*")}),n.parameter)}),!1))};"undefined"!=typeof module?module.exports=t:t()}(); dataLayer = dataLayer || []; // Standard GTM initialization - Google Consent Mode handles consent automatically (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl+ '>m_auth=hzk0Vc3qFsQYhCrIoHz68A>m_preview=env-1>m_cookies_win=x';f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-MWFK8L5J'); ;window.NREUM||(NREUM={});NREUM.init={distributed_tracing:{enabled:true},privacy:{cookies_enabled:true},ajax:{deny_list:["bam.nr-data.net"]}}; ;NREUM.loader_config={accountID:"438030",trustKey:"438030",agentID:"772317073",licenseKey:"97f8f67f26",applicationID:"772317073"} ;NREUM.info={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net",licenseKey:"97f8f67f26",applicationID:"772317073",sa:1} ;/*! For license information please see nr-loader-spa-1.236.0.min.js.LICENSE.txt */ (()=>{"use strict";var e,t,r={5763:(e,t,r)=>{r.d(t,{P_:()=>l,Mt:()=>g,C5:()=>s,DL:()=>v,OP:()=>T,lF:()=>D,Yu:()=>y,Dg:()=>h,CX:()=>c,GE:()=>b,sU:()=>_});var n=r(8632),i=r(9567);const o={beacon:n.ce.beacon,errorBeacon:n.ce.errorBeacon,licenseKey:void 0,applicationID:void 0,sa:void 0,queueTime:void 0,applicationTime:void 0,ttGuid:void 0,user:void 0,account:void 0,product:void 0,extra:void 0,jsAttributes:{},userAttributes:void 0,atts:void 0,transactionName:void 0,tNamePlain:void 0},a={};function s(e){if(!e)throw new Error("All info objects require an agent identifier!");if(!a[e])throw new Error("Info for ".concat(e," was never set"));return a[e]}function c(e,t){if(!e)throw new Error("All info objects require an agent identifier!");a[e]=(0,i.D)(t,o),(0,n.Qy)(e,a[e],"info")}var u=r(7056);const d=()=>{const e={blockSelector:"[data-nr-block]",maskInputOptions:{password:!0}};return{allow_bfcache:!0,privacy:{cookies_enabled:!0},ajax:{deny_list:void 0,enabled:!0,harvestTimeSeconds:10},distributed_tracing:{enabled:void 0,exclude_newrelic_header:void 0,cors_use_newrelic_header:void 0,cors_use_tracecontext_headers:void 0,allowed_origins:void 0},session:{domain:void 0,expiresMs:u.oD,inactiveMs:u.Hb},ssl:void 0,obfuscate:void 0,jserrors:{enabled:!0,harvestTimeSeconds:10},metrics:{enabled:!0},page_action:{enabled:!0,harvestTimeSeconds:30},page_view_event:{enabled:!0},page_view_timing:{enabled:!0,harvestTimeSeconds:30,long_task:!1},session_trace:{enabled:!0,harvestTimeSeconds:10},harvest:{tooManyRequestsDelay:60},session_replay:{enabled:!1,harvestTimeSeconds:60,sampleRate:.1,errorSampleRate:.1,maskTextSelector:"*",maskAllInputs:!0,get blockClass(){return"nr-block"},get ignoreClass(){return"nr-ignore"},get maskTextClass(){return"nr-mask"},get blockSelector(){return e.blockSelector},set blockSelector(t){e.blockSelector+=",".concat(t)},get maskInputOptions(){return e.maskInputOptions},set maskInputOptions(t){e.maskInputOptions={...t,password:!0}}},spa:{enabled:!0,harvestTimeSeconds:10}}},f={};function l(e){if(!e)throw new Error("All configuration objects require an agent identifier!");if(!f[e])throw new Error("Configuration for ".concat(e," was never set"));return f[e]}function h(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");f[e]=(0,i.D)(t,d()),(0,n.Qy)(e,f[e],"config")}function g(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");var r=l(e);if(r){for(var n=t.split("."),i=0;i {r.d(t,{D:()=>i});var n=r(50);function i(e,t){try{if(!e||"object"!=typeof e)return(0,n.Z)("Setting a Configurable requires an object as input");if(!t||"object"!=typeof t)return(0,n.Z)("Setting a Configurable requires a model to set its initial properties");const r=Object.create(Object.getPrototypeOf(t),Object.getOwnPropertyDescriptors(t)),o=0===Object.keys(r).length?e:r;for(let a in o)if(void 0!==e[a])try{"object"==typeof e[a]&&"object"==typeof t[a]?r[a]=i(e[a],t[a]):r[a]=e[a]}catch(e){(0,n.Z)("An error occurred while setting a property of a Configurable",e)}return r}catch(e){(0,n.Z)("An error occured while setting a Configurable",e)}}},6818:(e,t,r)=>{r.d(t,{Re:()=>i,gF:()=>o,q4:()=>n});const n="1.236.0",i="PROD",o="CDN"},385:(e,t,r)=>{r.d(t,{FN:()=>a,IF:()=>u,Nk:()=>f,Tt:()=>s,_A:()=>o,il:()=>n,pL:()=>c,v6:()=>i,w1:()=>d});const n="undefined"!=typeof window&&!!window.document,i="undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self.navigator instanceof WorkerNavigator||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis.navigator instanceof WorkerNavigator),o=n?window:"undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis),a=""+o?.location,s=/iPad|iPhone|iPod/.test(navigator.userAgent),c=s&&"undefined"==typeof SharedWorker,u=(()=>{const e=navigator.userAgent.match(/Firefox[/\s](\d+\.\d+)/);return Array.isArray(e)&&e.length>=2?+e[1]:0})(),d=Boolean(n&&window.document.documentMode),f=!!navigator.sendBeacon},1117:(e,t,r)=>{r.d(t,{w:()=>o});var n=r(50);const i={agentIdentifier:"",ee:void 0};class o{constructor(e){try{if("object"!=typeof e)return(0,n.Z)("shared context requires an object as input");this.sharedContext={},Object.assign(this.sharedContext,i),Object.entries(e).forEach((e=>{let[t,r]=e;Object.keys(i).includes(t)&&(this.sharedContext[t]=r)}))}catch(e){(0,n.Z)("An error occured while setting SharedContext",e)}}}},8e3:(e,t,r)=>{r.d(t,{L:()=>d,R:()=>c});var n=r(2177),i=r(1284),o=r(4322),a=r(3325);const s={};function c(e,t){const r={staged:!1,priority:a.p[t]||0};u(e),s[e].get(t)||s[e].set(t,r)}function u(e){e&&(s[e]||(s[e]=new Map))}function d(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"",t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"feature";if(u(e),!e||!s[e].get(t))return a(t);s[e].get(t).staged=!0;const r=[...s[e]];function a(t){const r=e?n.ee.get(e):n.ee,a=o.X.handlers;if(r.backlog&&a){var s=r.backlog[t],c=a[t];if(c){for(var u=0;s&&u {let[t,r]=e;return r.staged}))&&(r.sort(((e,t)=>e[1].priority-t[1].priority)),r.forEach((e=>{let[t]=e;a(t)})))}function f(e,t){var r=e[1];(0,i.D)(t[r],(function(t,r){var n=e[0];if(r[0]===n){var i=r[1],o=e[3],a=e[2];i.apply(o,a)}}))}},2177:(e,t,r)=>{r.d(t,{c:()=>f,ee:()=>u});var n=r(8632),i=r(2210),o=r(1284),a=r(5763),s="nr@context";let c=(0,n.fP)();var u;function d(){}function f(e){return(0,i.X)(e,s,l)}function l(){return new d}function h(){u.aborted=!0,u.backlog={}}c.ee?u=c.ee:(u=function e(t,r){var n={},c={},f={},g=!1;try{g=16===r.length&&(0,a.OP)(r).isolatedBacklog}catch(e){}var p={on:b,addEventListener:b,removeEventListener:y,emit:v,get:x,listeners:w,context:m,buffer:A,abort:h,aborted:!1,isBuffering:E,debugId:r,backlog:g?{}:t&&"object"==typeof t.backlog?t.backlog:{}};return p;function m(e){return e&&e instanceof d?e:e?(0,i.X)(e,s,l):l()}function v(e,r,n,i,o){if(!1!==o&&(o=!0),!u.aborted||i){t&&o&&t.emit(e,r,n);for(var a=m(n),s=w(e),d=s.length,f=0;fn,p:()=>i});var n=r(2177).ee.get("handle");function i(e,t,r,i,o){o?(o.buffer([e],i),o.emit(e,t,r)):(n.buffer([e],i),n.emit(e,t,r))}},4322:(e,t,r)=>{r.d(t,{X:()=>o});var n=r(5546);o.on=a;var i=o.handlers={};function o(e,t,r,o){a(o||n.E,i,e,t,r)}function a(e,t,r,i,o){o||(o="feature"),e||(e=n.E);var a=t[o]=t[o]||{};(a[r]=a[r]||[]).push([e,i])}},3239:(e,t,r)=>{r.d(t,{bP:()=>s,iz:()=>c,m$:()=>a});var n=r(385);let i=!1,o=!1;try{const e={get passive(){return i=!0,!1},get signal(){return o=!0,!1}};n._A.addEventListener("test",null,e),n._A.removeEventListener("test",null,e)}catch(e){}function a(e,t){return i||o?{capture:!!e,passive:i,signal:t}:!!e}function s(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;window.addEventListener(e,t,a(r,n))}function c(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;document.addEventListener(e,t,a(r,n))}},4402:(e,t,r)=>{r.d(t,{Ht:()=>u,M:()=>c,Rl:()=>a,ky:()=>s});var n=r(385);const i="xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx";function o(e,t){return e?15&e[t]:16*Math.random()|0}function a(){const e=n._A?.crypto||n._A?.msCrypto;let t,r=0;return e&&e.getRandomValues&&(t=e.getRandomValues(new Uint8Array(31))),i.split("").map((e=>"x"===e?o(t,++r).toString(16):"y"===e?(3&o()|8).toString(16):e)).join("")}function s(e){const t=n._A?.crypto||n._A?.msCrypto;let r,i=0;t&&t.getRandomValues&&(r=t.getRandomValues(new Uint8Array(31)));const a=[];for(var s=0;s {r.d(t,{Bq:()=>n,Hb:()=>o,oD:()=>i});const n="NRBA",i=144e5,o=18e5},7894:(e,t,r)=>{function n(){return Math.round(performance.now())}r.d(t,{z:()=>n})},7243:(e,t,r)=>{r.d(t,{e:()=>o});var n=r(385),i={};function o(e){if(e in i)return i[e];if(0===(e||"").indexOf("data:"))return{protocol:"data"};let t;var r=n._A?.location,o={};if(n.il)t=document.createElement("a"),t.href=e;else try{t=new URL(e,r.href)}catch(e){return o}o.port=t.port;var a=t.href.split("://");!o.port&&a[1]&&(o.port=a[1].split("/")[0].split("@").pop().split(":")[1]),o.port&&"0"!==o.port||(o.port="https"===a[0]?"443":"80"),o.hostname=t.hostname||r.hostname,o.pathname=t.pathname,o.protocol=a[0],"/"!==o.pathname.charAt(0)&&(o.pathname="/"+o.pathname);var s=!t.protocol||":"===t.protocol||t.protocol===r.protocol,c=t.hostname===r.hostname&&t.port===r.port;return o.sameOrigin=s&&(!t.hostname||c),"/"===o.pathname&&(i[e]=o),o}},50:(e,t,r)=>{function n(e,t){"function"==typeof console.warn&&(console.warn("New Relic: ".concat(e)),t&&console.warn(t))}r.d(t,{Z:()=>n})},2587:(e,t,r)=>{r.d(t,{N:()=>c,T:()=>u});var n=r(2177),i=r(5546),o=r(8e3),a=r(3325);const s={stn:[a.D.sessionTrace],err:[a.D.jserrors,a.D.metrics],ins:[a.D.pageAction],spa:[a.D.spa],sr:[a.D.sessionReplay,a.D.sessionTrace]};function c(e,t){const r=n.ee.get(t);e&&"object"==typeof e&&(Object.entries(e).forEach((e=>{let[t,n]=e;void 0===u[t]&&(s[t]?s[t].forEach((e=>{n?(0,i.p)("feat-"+t,[],void 0,e,r):(0,i.p)("block-"+t,[],void 0,e,r),(0,i.p)("rumresp-"+t,[Boolean(n)],void 0,e,r)})):n&&(0,i.p)("feat-"+t,[],void 0,void 0,r),u[t]=Boolean(n))})),Object.keys(s).forEach((e=>{void 0===u[e]&&(s[e]?.forEach((t=>(0,i.p)("rumresp-"+e,[!1],void 0,t,r))),u[e]=!1)})),(0,o.L)(t,a.D.pageViewEvent))}const u={}},2210:(e,t,r)=>{r.d(t,{X:()=>i});var n=Object.prototype.hasOwnProperty;function i(e,t,r){if(n.call(e,t))return e[t];var i=r();if(Object.defineProperty&&Object.keys)try{return Object.defineProperty(e,t,{value:i,writable:!0,enumerable:!1}),i}catch(e){}return e[t]=i,i}},1284:(e,t,r)=>{r.d(t,{D:()=>n});const n=(e,t)=>Object.entries(e||{}).map((e=>{let[r,n]=e;return t(r,n)}))},4351:(e,t,r)=>{r.d(t,{P:()=>o});var n=r(2177);const i=()=>{const e=new WeakSet;return(t,r)=>{if("object"==typeof r&&null!==r){if(e.has(r))return;e.add(r)}return r}};function o(e){try{return JSON.stringify(e,i())}catch(e){try{n.ee.emit("internal-error",[e])}catch(e){}}}},3960:(e,t,r)=>{r.d(t,{K:()=>a,b:()=>o});var n=r(3239);function i(){return"undefined"==typeof document||"complete"===document.readyState}function o(e,t){if(i())return e();(0,n.bP)("load",e,t)}function a(e){if(i())return e();(0,n.iz)("DOMContentLoaded",e)}},8632:(e,t,r)=>{r.d(t,{EZ:()=>u,Qy:()=>c,ce:()=>o,fP:()=>a,gG:()=>d,mF:()=>s});var n=r(7894),i=r(385);const o={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net"};function a(){return i._A.NREUM||(i._A.NREUM={}),void 0===i._A.newrelic&&(i._A.newrelic=i._A.NREUM),i._A.NREUM}function s(){let e=a();return e.o||(e.o={ST:i._A.setTimeout,SI:i._A.setImmediate,CT:i._A.clearTimeout,XHR:i._A.XMLHttpRequest,REQ:i._A.Request,EV:i._A.Event,PR:i._A.Promise,MO:i._A.MutationObserver,FETCH:i._A.fetch}),e}function c(e,t,r){let i=a();const o=i.initializedAgents||{},s=o[e]||{};return Object.keys(s).length||(s.initializedAt={ms:(0,n.z)(),date:new Date}),i.initializedAgents={...o,[e]:{...s,[r]:t}},i}function u(e,t){a()[e]=t}function d(){return function(){let e=a();const t=e.info||{};e.info={beacon:o.beacon,errorBeacon:o.errorBeacon,...t}}(),function(){let e=a();const t=e.init||{};e.init={...t}}(),s(),function(){let e=a();const t=e.loader_config||{};e.loader_config={...t}}(),a()}},7956:(e,t,r)=>{r.d(t,{N:()=>i});var n=r(3239);function i(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],r=arguments.length>2?arguments[2]:void 0,i=arguments.length>3?arguments[3]:void 0;return void(0,n.iz)("visibilitychange",(function(){if(t)return void("hidden"==document.visibilityState&&e());e(document.visibilityState)}),r,i)}},1214:(e,t,r)=>{r.d(t,{em:()=>v,u5:()=>N,QU:()=>S,_L:()=>I,Gm:()=>L,Lg:()=>M,gy:()=>U,BV:()=>Q,Kf:()=>ee});var n=r(2177);const i="nr@original";var o=Object.prototype.hasOwnProperty,a=!1;function s(e,t){return e||(e=n.ee),r.inPlace=function(e,t,n,i,o){n||(n="");var a,s,c,u="-"===n.charAt(0);for(c=0;c 2?n-2:0),o=2;o {r(A[T],e,w),r(E[T],e,w)})),r(l._A,"fetch",y),t.on(y+"end",(function(e,r){var n=this;if(r){var i=r.headers.get("content-length");null!==i&&(n.rxSize=i),t.emit(y+"done",[null,r],n)}else t.emit(y+"done",[e],n)})),t}const O={},j=["pushState","replaceState"];function S(e){const t=function(e){return(e||n.ee).get("history")}(e);return!l.il||O[t.debugId]++||(O[t.debugId]=1,s(t).inPlace(window.history,j,"-")),t}var P=r(3239);const C={},R=["appendChild","insertBefore","replaceChild"];function I(e){const t=function(e){return(e||n.ee).get("jsonp")}(e);if(!l.il||C[t.debugId])return t;C[t.debugId]=!0;var r=s(t),i=/[?&](?:callback|cb)=([^&#]+)/,o=/(.*)\.([^.]+)/,a=/^(\w+)(\.|$)(.*)$/;function c(e,t){var r=e.match(a),n=r[1],i=r[3];return i?c(i,t[n]):t[n]}return r.inPlace(Node.prototype,R,"dom-"),t.on("dom-start",(function(e){!function(e){if(!e||"string"!=typeof e.nodeName||"script"!==e.nodeName.toLowerCase())return;if("function"!=typeof e.addEventListener)return;var n=(a=e.src,s=a.match(i),s?s[1]:null);var a,s;if(!n)return;var u=function(e){var t=e.match(o);if(t&&t.length>=3)return{key:t[2],parent:c(t[1],window)};return{key:e,parent:window}}(n);if("function"!=typeof u.parent[u.key])return;var d={};function f(){t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}function l(){t.emit("jsonp-error",[],d),t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}r.inPlace(u.parent,[u.key],"cb-",d),e.addEventListener("load",f,(0,P.m$)(!1)),e.addEventListener("error",l,(0,P.m$)(!1)),t.emit("new-jsonp",[e.src],d)}(e[0])})),t}var k=r(5763);const H={};function L(e){const t=function(e){return(e||n.ee).get("mutation")}(e);if(!l.il||H[t.debugId])return t;H[t.debugId]=!0;var r=s(t),i=k.Yu.MO;return i&&(window.MutationObserver=function(e){return this instanceof i?new i(r(e,"fn-")):i.apply(this,arguments)},MutationObserver.prototype=i.prototype),t}const z={};function M(e){const t=function(e){return(e||n.ee).get("promise")}(e);if(z[t.debugId])return t;z[t.debugId]=!0;var r=n.c,o=s(t),a=k.Yu.PR;return a&&function(){function e(r){var n=t.context(),i=o(r,"executor-",n,null,!1);const s=Reflect.construct(a,[i],e);return t.context(s).getCtx=function(){return n},s}l._A.Promise=e,Object.defineProperty(e,"name",{value:"Promise"}),e.toString=function(){return a.toString()},Object.setPrototypeOf(e,a),["all","race"].forEach((function(r){const n=a[r];e[r]=function(e){let i=!1;[...e||[]].forEach((e=>{this.resolve(e).then(a("all"===r),a(!1))}));const o=n.apply(this,arguments);return o;function a(e){return function(){t.emit("propagate",[null,!i],o,!1,!1),i=i||!e}}}})),["resolve","reject"].forEach((function(r){const n=a[r];e[r]=function(e){const r=n.apply(this,arguments);return e!==r&&t.emit("propagate",[e,!0],r,!1,!1),r}})),e.prototype=a.prototype;const n=a.prototype.then;a.prototype.then=function(){var e=this,i=r(e);i.promise=e;for(var a=arguments.length,s=new Array(a),c=0;c e())),t};function m(e,t){i.inPlace(t,["onreadystatechange"],"fn-",E)}function b(){var e=this,t=r.context(e);e.readyState>3&&!t.resolved&&(t.resolved=!0,r.emit("xhr-resolved",[],e)),i.inPlace(e,f,"fn-",E)}if(function(e,t){for(var r in e)t[r]=e[r]}(o,p),p.prototype=o.prototype,i.inPlace(p.prototype,J,"-xhr-",E),r.on("send-xhr-start",(function(e,t){m(e,t),function(e){h.push(e),a&&(y?y.then(A):u?u(A):(w=-w,x.data=w))}(t)})),r.on("open-xhr-start",m),a){var y=c&&c.resolve();if(!u&&!c){var w=1,x=document.createTextNode(w);new a(A).observe(x,{characterData:!0})}}else t.on("fn-end",(function(e){e[0]&&e[0].type===d||A()}));function A(){for(var e=0;e {r.d(t,{t:()=>n});const n=r(3325).D.ajax},6660:(e,t,r)=>{r.d(t,{A:()=>i,t:()=>n});const n=r(3325).D.jserrors,i="nr@seenError"},3081:(e,t,r)=>{r.d(t,{gF:()=>o,mY:()=>i,t9:()=>n,vz:()=>s,xS:()=>a});const n=r(3325).D.metrics,i="sm",o="cm",a="storeSupportabilityMetrics",s="storeEventMetrics"},4649:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageAction},7633:(e,t,r)=>{r.d(t,{Dz:()=>i,OJ:()=>a,qw:()=>o,t9:()=>n});const n=r(3325).D.pageViewEvent,i="firstbyte",o="domcontent",a="windowload"},9251:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageViewTiming},3614:(e,t,r)=>{r.d(t,{BST_RESOURCE:()=>i,END:()=>s,FEATURE_NAME:()=>n,FN_END:()=>u,FN_START:()=>c,PUSH_STATE:()=>d,RESOURCE:()=>o,START:()=>a});const n=r(3325).D.sessionTrace,i="bstResource",o="resource",a="-start",s="-end",c="fn"+a,u="fn"+s,d="pushState"},7836:(e,t,r)=>{r.d(t,{BODY:()=>A,CB_END:()=>E,CB_START:()=>u,END:()=>x,FEATURE_NAME:()=>i,FETCH:()=>_,FETCH_BODY:()=>v,FETCH_DONE:()=>m,FETCH_START:()=>p,FN_END:()=>c,FN_START:()=>s,INTERACTION:()=>l,INTERACTION_API:()=>d,INTERACTION_EVENTS:()=>o,JSONP_END:()=>b,JSONP_NODE:()=>g,JS_TIME:()=>T,MAX_TIMER_BUDGET:()=>a,REMAINING:()=>f,SPA_NODE:()=>h,START:()=>w,originalSetTimeout:()=>y});var n=r(5763);const i=r(3325).D.spa,o=["click","submit","keypress","keydown","keyup","change"],a=999,s="fn-start",c="fn-end",u="cb-start",d="api-ixn-",f="remaining",l="interaction",h="spaNode",g="jsonpNode",p="fetch-start",m="fetch-done",v="fetch-body-",b="jsonp-end",y=n.Yu.ST,w="-start",x="-end",A="-body",E="cb"+x,T="jsTime",_="fetch"},5938:(e,t,r)=>{r.d(t,{W:()=>o});var n=r(5763),i=r(2177);class o{constructor(e,t,r){this.agentIdentifier=e,this.aggregator=t,this.ee=i.ee.get(e,(0,n.OP)(this.agentIdentifier).isolatedBacklog),this.featureName=r,this.blocked=!1}}},9144:(e,t,r)=>{r.d(t,{j:()=>m});var n=r(3325),i=r(5763),o=r(5546),a=r(2177),s=r(7894),c=r(8e3),u=r(3960),d=r(385),f=r(50),l=r(3081),h=r(8632);function g(){const e=(0,h.gG)();["setErrorHandler","finished","addToTrace","inlineHit","addRelease","addPageAction","setCurrentRouteName","setPageViewName","setCustomAttribute","interaction","noticeError","setUserId"].forEach((t=>{e[t]=function(){for(var r=arguments.length,n=new Array(r),i=0;i 1?r-1:0),i=1;i {e.exposed&&e.api[t]&&o.push(e.api[t](...n))})),o.length>1?o:o[0]}(t,...n)}}))}var p=r(2587);function m(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},m=arguments.length>2?arguments[2]:void 0,v=arguments.length>3?arguments[3]:void 0,{init:b,info:y,loader_config:w,runtime:x={loaderType:m},exposed:A=!0}=t;const E=(0,h.gG)();y||(b=E.init,y=E.info,w=E.loader_config),(0,i.Dg)(e,b||{}),(0,i.GE)(e,w||{}),(0,i.sU)(e,x),y.jsAttributes??={},d.v6&&(y.jsAttributes.isWorker=!0),(0,i.CX)(e,y),g();const T=function(e,t){t||(0,c.R)(e,"api");const h={};var g=a.ee.get(e),p=g.get("tracer"),m="api-",v=m+"ixn-";function b(t,r,n,o){const a=(0,i.C5)(e);return null===r?delete a.jsAttributes[t]:(0,i.CX)(e,{...a,jsAttributes:{...a.jsAttributes,[t]:r}}),x(m,n,!0,o||null===r?"session":void 0)(t,r)}function y(){}["setErrorHandler","finished","addToTrace","inlineHit","addRelease"].forEach((e=>h[e]=x(m,e,!0,"api"))),h.addPageAction=x(m,"addPageAction",!0,n.D.pageAction),h.setCurrentRouteName=x(m,"routeName",!0,n.D.spa),h.setPageViewName=function(t,r){if("string"==typeof t)return"/"!==t.charAt(0)&&(t="/"+t),(0,i.OP)(e).customTransaction=(r||"http://custom.transaction")+t,x(m,"setPageViewName",!0)()},h.setCustomAttribute=function(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2];if("string"==typeof e){if(["string","number"].includes(typeof t)||null===t)return b(e,t,"setCustomAttribute",r);(0,f.Z)("Failed to execute setCustomAttribute.\nNon-null value must be a string or number type, but a type of was provided."))}else(0,f.Z)("Failed to execute setCustomAttribute.\nName must be a string type, but a type of was provided."))},h.setUserId=function(e){if("string"==typeof e||null===e)return b("enduser.id",e,"setUserId",!0);(0,f.Z)("Failed to execute setUserId.\nNon-null value must be a string type, but a type of was provided."))},h.interaction=function(){return(new y).get()};var w=y.prototype={createTracer:function(e,t){var r={},i=this,a="function"==typeof t;return(0,o.p)(v+"tracer",[(0,s.z)(),e,r],i,n.D.spa,g),function(){if(p.emit((a?"":"no-")+"fn-start",[(0,s.z)(),i,a],r),a)try{return t.apply(this,arguments)}catch(e){throw p.emit("fn-err",[arguments,this,"string"==typeof e?new Error(e):e],r),e}finally{p.emit("fn-end",[(0,s.z)()],r)}}}};function x(e,t,r,i){return function(){return(0,o.p)(l.xS,["API/"+t+"/called"],void 0,n.D.metrics,g),i&&(0,o.p)(e+t,[(0,s.z)(),...arguments],r?null:this,i,g),r?void 0:this}}function A(){r.e(439).then(r.bind(r,7438)).then((t=>{let{setAPI:r}=t;r(e),(0,c.L)(e,"api")})).catch((()=>(0,f.Z)("Downloading runtime APIs failed...")))}return["actionText","setName","setAttribute","save","ignore","onEnd","getContext","end","get"].forEach((e=>{w[e]=x(v,e,void 0,n.D.spa)})),h.noticeError=function(e,t){"string"==typeof e&&(e=new Error(e)),(0,o.p)(l.xS,["API/noticeError/called"],void 0,n.D.metrics,g),(0,o.p)("err",[e,(0,s.z)(),!1,t],void 0,n.D.jserrors,g)},d.il?(0,u.b)((()=>A()),!0):A(),h}(e,v);return(0,h.Qy)(e,T,"api"),(0,h.Qy)(e,A,"exposed"),(0,h.EZ)("activatedFeatures",p.T),T}},3325:(e,t,r)=>{r.d(t,{D:()=>n,p:()=>i});const n={ajax:"ajax",jserrors:"jserrors",metrics:"metrics",pageAction:"page_action",pageViewEvent:"page_view_event",pageViewTiming:"page_view_timing",sessionReplay:"session_replay",sessionTrace:"session_trace",spa:"spa"},i={[n.pageViewEvent]:1,[n.pageViewTiming]:2,[n.metrics]:3,[n.jserrors]:4,[n.ajax]:5,[n.sessionTrace]:6,[n.pageAction]:7,[n.spa]:8,[n.sessionReplay]:9}}},n={};function i(e){var t=n[e];if(void 0!==t)return t.exports;var o=n[e]={exports:{}};return r[e](o,o.exports,i),o.exports}i.m=r,i.d=(e,t)=>{for(var r in t)i.o(t,r)&&!i.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},i.f={},i.e=e=>Promise.all(Object.keys(i.f).reduce(((t,r)=>(i.f[r](e,t),t)),[])),i.u=e=>(({78:"page_action-aggregate",147:"metrics-aggregate",242:"session-manager",317:"jserrors-aggregate",348:"page_view_timing-aggregate",412:"lazy-feature-loader",439:"async-api",538:"recorder",590:"session_replay-aggregate",675:"compressor",733:"session_trace-aggregate",786:"page_view_event-aggregate",873:"spa-aggregate",898:"ajax-aggregate"}[e]||e)+"."+{78:"ac76d497",147:"3dc53903",148:"1a20d5fe",242:"2a64278a",317:"49e41428",348:"bd6de33a",412:"2f55ce66",439:"30bd804e",538:"1b18459f",590:"cf0efb30",675:"ae9f91a8",733:"83105561",786:"06482edd",860:"03a8b7a5",873:"e6b09d52",898:"998ef92b"}[e]+"-1.236.0.min.js"),i.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),e={},t="NRBA:",i.l=(r,n,o,a)=>{if(e[r])e[r].push(n);else{var s,c;if(void 0!==o)for(var u=document.getElementsByTagName("script"),d=0;d {s.onerror=s.onload=null,clearTimeout(h);var i=e[r];if(delete e[r],s.parentNode&&s.parentNode.removeChild(s),i&&i.forEach((e=>e(n))),t)return t(n)},h=setTimeout(l.bind(null,void 0,{type:"timeout",target:s}),12e4);s.onerror=l.bind(null,s.onerror),s.onload=l.bind(null,s.onload),c&&document.head.appendChild(s)}},i.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.j=364,i.p="https://js-agent.newrelic.com/",(()=>{var e={364:0,953:0};i.f.j=(t,r)=>{var n=i.o(e,t)?e[t]:void 0;if(0!==n)if(n)r.push(n[2]);else{var o=new Promise(((r,i)=>n=e[t]=[r,i]));r.push(n[2]=o);var a=i.p+i.u(t),s=new Error;i.l(a,(r=>{if(i.o(e,t)&&(0!==(n=e[t])&&(e[t]=void 0),n)){var o=r&&("load"===r.type?"missing":r.type),a=r&&r.target&&r.target.src;s.message="Loading chunk "+t+" failed.\n("+o+": "+a+")",s.name="ChunkLoadError",s.type=o,s.request=a,n[1](s)}}),"chunk-"+t,t)}};var t=(t,r)=>{var n,o,[a,s,c]=r,u=0;if(a.some((t=>0!==e[t]))){for(n in s)i.o(s,n)&&(i.m[n]=s[n]);if(c)c(i)}for(t&&t(r);u {i.r(o);var e=i(3325),t=i(5763);const r=Object.values(e.D);function n(e){const n={};return r.forEach((r=>{n[r]=function(e,r){return!1!==(0,t.Mt)(r,"".concat(e,".enabled"))}(r,e)})),n}var a=i(9144);var s=i(5546),c=i(385),u=i(8e3),d=i(5938),f=i(3960),l=i(50);class h extends d.W{constructor(e,t,r){let n=!(arguments.length>3&&void 0!==arguments[3])||arguments[3];super(e,t,r),this.auto=n,this.abortHandler,this.featAggregate,this.onAggregateImported,n&&(0,u.R)(e,r)}importAggregator(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};if(this.featAggregate||!this.auto)return;const r=c.il&&!0===(0,t.Mt)(this.agentIdentifier,"privacy.cookies_enabled");let n;this.onAggregateImported=new Promise((e=>{n=e}));const o=async()=>{let t;try{if(r){const{setupAgentSession:e}=await Promise.all([i.e(860),i.e(242)]).then(i.bind(i,3228));t=e(this.agentIdentifier)}}catch(e){(0,l.Z)("A problem occurred when starting up session manager. This page will not start or extend any session.",e)}try{if(!this.shouldImportAgg(this.featureName,t))return void(0,u.L)(this.agentIdentifier,this.featureName);const{lazyFeatureLoader:r}=await i.e(412).then(i.bind(i,8582)),{Aggregate:o}=await r(this.featureName,"aggregate");this.featAggregate=new o(this.agentIdentifier,this.aggregator,e),n(!0)}catch(e){(0,l.Z)("Downloading and initializing ".concat(this.featureName," failed..."),e),this.abortHandler?.(),n(!1)}};c.il?(0,f.b)((()=>o()),!0):o()}shouldImportAgg(r,n){return r!==e.D.sessionReplay||!1!==(0,t.Mt)(this.agentIdentifier,"session_trace.enabled")&&(!!n?.isNew||!!n?.state.sessionReplay)}}var g=i(7633),p=i(7894);class m extends h{static featureName=g.t9;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];if(super(r,n,g.t9,i),("undefined"==typeof PerformanceNavigationTiming||c.Tt)&&"undefined"!=typeof PerformanceTiming){const n=(0,t.OP)(r);n[g.Dz]=Math.max(Date.now()-n.offset,0),(0,f.K)((()=>n[g.qw]=Math.max((0,p.z)()-n[g.Dz],0))),(0,f.b)((()=>{const t=(0,p.z)();n[g.OJ]=Math.max(t-n[g.Dz],0),(0,s.p)("timing",["load",t],void 0,e.D.pageViewTiming,this.ee)}))}this.importAggregator()}}var v=i(1117),b=i(1284);class y extends v.w{constructor(e){super(e),this.aggregatedData={}}store(e,t,r,n,i){var o=this.getBucket(e,t,r,i);return o.metrics=function(e,t){t||(t={count:0});return t.count+=1,(0,b.D)(e,(function(e,r){t[e]=w(r,t[e])})),t}(n,o.metrics),o}merge(e,t,r,n,i){var o=this.getBucket(e,t,n,i);if(o.metrics){var a=o.metrics;a.count+=r.count,(0,b.D)(r,(function(e,t){if("count"!==e){var n=a[e],i=r[e];i&&!i.c?a[e]=w(i.t,n):a[e]=function(e,t){if(!t)return e;t.c||(t=x(t.t));return t.min=Math.min(e.min,t.min),t.max=Math.max(e.max,t.max),t.t+=e.t,t.sos+=e.sos,t.c+=e.c,t}(i,a[e])}}))}else o.metrics=r}storeMetric(e,t,r,n){var i=this.getBucket(e,t,r);return i.stats=w(n,i.stats),i}getBucket(e,t,r,n){this.aggregatedData[e]||(this.aggregatedData[e]={});var i=this.aggregatedData[e][t];return i||(i=this.aggregatedData[e][t]={params:r||{}},n&&(i.custom=n)),i}get(e,t){return t?this.aggregatedData[e]&&this.aggregatedData[e][t]:this.aggregatedData[e]}take(e){for(var t={},r="",n=!1,i=0;i t.max&&(t.max=e),e 2&&void 0!==arguments[2])||arguments[2];super(e,r,j.t,n),c.il&&((0,t.OP)(e).initHidden=Boolean("hidden"===document.visibilityState),(0,N.N)((()=>(0,s.p)("docHidden",[(0,p.z)()],void 0,j.t,this.ee)),!0),(0,O.bP)("pagehide",(()=>(0,s.p)("winPagehide",[(0,p.z)()],void 0,j.t,this.ee))),this.importAggregator())}}var P=i(3081);class C extends h{static featureName=P.t9;constructor(e,t){let r=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(e,t,P.t9,r),this.importAggregator()}}var R,I=i(2210),k=i(1214),H=i(2177),L={};try{R=localStorage.getItem("__nr_flags").split(","),console&&"function"==typeof console.log&&(L.console=!0,-1!==R.indexOf("dev")&&(L.dev=!0),-1!==R.indexOf("nr_dev")&&(L.nrDev=!0))}catch(e){}function z(e){try{L.console&&z(e)}catch(e){}}L.nrDev&&H.ee.on("internal-error",(function(e){z(e.stack)})),L.dev&&H.ee.on("fn-err",(function(e,t,r){z(r.stack)})),L.dev&&(z("NR AGENT IN DEVELOPMENT MODE"),z("flags: "+(0,b.D)(L,(function(e,t){return e})).join(", ")));var M=i(6660);class B extends h{static featureName=M.t;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(r,n,M.t,i),this.skipNext=0;try{this.removeOnAbort=new AbortController}catch(e){}const o=this;o.ee.on("fn-start",(function(e,t,r){o.abortHandler&&(o.skipNext+=1)})),o.ee.on("fn-err",(function(t,r,n){o.abortHandler&&!n[M.A]&&((0,I.X)(n,M.A,(function(){return!0})),this.thrown=!0,(0,s.p)("err",[n,(0,p.z)()],void 0,e.D.jserrors,o.ee))})),o.ee.on("fn-end",(function(){o.abortHandler&&!this.thrown&&o.skipNext>0&&(o.skipNext-=1)})),o.ee.on("internal-error",(function(t){(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,o.ee)})),this.origOnerror=c._A.onerror,c._A.onerror=this.onerrorHandler.bind(this),c._A.addEventListener("unhandledrejection",(t=>{const r=function(e){let t="Unhandled Promise Rejection: ";if(e instanceof Error)try{return e.message=t+e.message,e}catch(t){return e}if(void 0===e)return new Error(t);try{return new Error(t+(0,D.P)(e))}catch(e){return new Error(t)}}(t.reason);(0,s.p)("err",[r,(0,p.z)(),!1,{unhandledPromiseRejection:1}],void 0,e.D.jserrors,this.ee)}),(0,O.m$)(!1,this.removeOnAbort?.signal)),(0,k.gy)(this.ee),(0,k.BV)(this.ee),(0,k.em)(this.ee),(0,t.OP)(r).xhrWrappable&&(0,k.Kf)(this.ee),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}onerrorHandler(t,r,n,i,o){"function"==typeof this.origOnerror&&this.origOnerror(...arguments);try{this.skipNext?this.skipNext-=1:(0,s.p)("err",[o||new F(t,r,n),(0,p.z)()],void 0,e.D.jserrors,this.ee)}catch(t){try{(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,this.ee)}catch(e){}}return!1}}function F(e,t,r){this.message=e||"Uncaught error with no additional information",this.sourceURL=t,this.line=r}let U=1;const q="nr@id";function G(e){const t=typeof e;return!e||"object"!==t&&"function"!==t?-1:e===c._A?0:(0,I.X)(e,q,(function(){return U++}))}function V(e){if("string"==typeof e&&e.length)return e.length;if("object"==typeof e){if("undefined"!=typeof ArrayBuffer&&e instanceof ArrayBuffer&&e.byteLength)return e.byteLength;if("undefined"!=typeof Blob&&e instanceof Blob&&e.size)return e.size;if(!("undefined"!=typeof FormData&&e instanceof FormData))try{return(0,D.P)(e).length}catch(e){return}}}var X=i(7243);class W{constructor(e){this.agentIdentifier=e,this.generateTracePayload=this.generateTracePayload.bind(this),this.shouldGenerateTrace=this.shouldGenerateTrace.bind(this)}generateTracePayload(e){if(!this.shouldGenerateTrace(e))return null;var r=(0,t.DL)(this.agentIdentifier);if(!r)return null;var n=(r.accountID||"").toString()||null,i=(r.agentID||"").toString()||null,o=(r.trustKey||"").toString()||null;if(!n||!i)return null;var a=(0,_.M)(),s=(0,_.Ht)(),c=Date.now(),u={spanId:a,traceId:s,timestamp:c};return(e.sameOrigin||this.isAllowedOrigin(e)&&this.useTraceContextHeadersForCors())&&(u.traceContextParentHeader=this.generateTraceContextParentHeader(a,s),u.traceContextStateHeader=this.generateTraceContextStateHeader(a,c,n,i,o)),(e.sameOrigin&&!this.excludeNewrelicHeader()||!e.sameOrigin&&this.isAllowedOrigin(e)&&this.useNewrelicHeaderForCors())&&(u.newrelicHeader=this.generateTraceHeader(a,s,c,n,i,o)),u}generateTraceContextParentHeader(e,t){return"00-"+t+"-"+e+"-01"}generateTraceContextStateHeader(e,t,r,n,i){return i+"@nr=0-1-"+r+"-"+n+"-"+e+"----"+t}generateTraceHeader(e,t,r,n,i,o){if(!("function"==typeof c._A?.btoa))return null;var a={v:[0,1],d:{ty:"Browser",ac:n,ap:i,id:e,tr:t,ti:r}};return o&&n!==o&&(a.d.tk=o),btoa((0,D.P)(a))}shouldGenerateTrace(e){return this.isDtEnabled()&&this.isAllowedOrigin(e)}isAllowedOrigin(e){var r=!1,n={};if((0,t.Mt)(this.agentIdentifier,"distributed_tracing")&&(n=(0,t.P_)(this.agentIdentifier).distributed_tracing),e.sameOrigin)r=!0;else if(n.allowed_origins instanceof Array)for(var i=0;i 2&&void 0!==arguments[2])||arguments[2];super(r,n,Z.t,i),(0,t.OP)(r).xhrWrappable&&(this.dt=new W(r),this.handler=(e,t,r,n)=>(0,s.p)(e,t,r,n,this.ee),(0,k.u5)(this.ee),(0,k.Kf)(this.ee),function(r,n,i,o){function a(e){var t=this;t.totalCbs=0,t.called=0,t.cbTime=0,t.end=E,t.ended=!1,t.xhrGuids={},t.lastSize=null,t.loadCaptureCalled=!1,t.params=this.params||{},t.metrics=this.metrics||{},e.addEventListener("load",(function(r){_(t,e)}),(0,O.m$)(!1)),c.IF||e.addEventListener("progress",(function(e){t.lastSize=e.loaded}),(0,O.m$)(!1))}function s(e){this.params={method:e[0]},T(this,e[1]),this.metrics={}}function u(e,n){var i=(0,t.DL)(r);i.xpid&&this.sameOrigin&&n.setRequestHeader("X-NewRelic-ID",i.xpid);var a=o.generateTracePayload(this.parsedOrigin);if(a){var s=!1;a.newrelicHeader&&(n.setRequestHeader("newrelic",a.newrelicHeader),s=!0),a.traceContextParentHeader&&(n.setRequestHeader("traceparent",a.traceContextParentHeader),a.traceContextStateHeader&&n.setRequestHeader("tracestate",a.traceContextStateHeader),s=!0),s&&(this.dt=a)}}function d(e,t){var r=this.metrics,i=e[0],o=this;if(r&&i){var a=V(i);a&&(r.txSize=a)}this.startTime=(0,p.z)(),this.listener=function(e){try{"abort"!==e.type||o.loadCaptureCalled||(o.params.aborted=!0),("load"!==e.type||o.called===o.totalCbs&&(o.onloadCalled||"function"!=typeof t.onload)&&"function"==typeof o.end)&&o.end(t)}catch(e){try{n.emit("internal-error",[e])}catch(e){}}};for(var s=0;s 1?e[1]=i:e.push(i)}else e[0]&&e[0].headers&&s(e[0].headers,n)&&(this.dt=n);function s(e,t){var r=!1;return t.newrelicHeader&&(e.set("newrelic",t.newrelicHeader),r=!0),t.traceContextParentHeader&&(e.set("traceparent",t.traceContextParentHeader),t.traceContextStateHeader&&e.set("tracestate",t.traceContextStateHeader),r=!0),r}}function x(e,t){this.params={},this.metrics={},this.startTime=(0,p.z)(),this.dt=t,e.length>=1&&(this.target=e[0]),e.length>=2&&(this.opts=e[1]);var r,n=this.opts||{},i=this.target;"string"==typeof i?r=i:"object"==typeof i&&i instanceof Y?r=i.url:c._A?.URL&&"object"==typeof i&&i instanceof URL&&(r=i.href),T(this,r);var o=(""+(i&&i instanceof Y&&i.method||n.method||"GET")).toUpperCase();this.params.method=o,this.txSize=V(n.body)||0}function A(t,r){var n;this.endTime=(0,p.z)(),this.params||(this.params={}),this.params.status=r?r.status:0,"string"==typeof this.rxSize&&this.rxSize.length>0&&(n=+this.rxSize);var o={txSize:this.txSize,rxSize:n,duration:(0,p.z)()-this.startTime};i("xhr",[this.params,o,this.startTime,this.endTime,"fetch"],this,e.D.ajax)}function E(t){var r=this.params,n=this.metrics;if(!this.ended){this.ended=!0;for(var o=0;o 2&&void 0!==arguments[2])||arguments[2];super(e,t,we.t,r),this.importAggregator()}}new class{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(0,_.ky)(16);c._A?(this.agentIdentifier=t,this.sharedAggregator=new y({agentIdentifier:this.agentIdentifier}),this.features={},this.desiredFeatures=new Set(e.features||[]),this.desiredFeatures.add(m),Object.assign(this,(0,a.j)(this.agentIdentifier,e,e.loaderType||"agent")),this.start()):(0,l.Z)("Failed to initial the agent. Could not determine the runtime environment.")}get config(){return{info:(0,t.C5)(this.agentIdentifier),init:(0,t.P_)(this.agentIdentifier),loader_config:(0,t.DL)(this.agentIdentifier),runtime:(0,t.OP)(this.agentIdentifier)}}start(){const t="features";try{const r=n(this.agentIdentifier),i=[...this.desiredFeatures];i.sort(((t,r)=>e.p[t.featureName]-e.p[r.featureName])),i.forEach((t=>{if(r[t.featureName]||t.featureName===e.D.pageViewEvent){const n=function(t){switch(t){case e.D.ajax:return[e.D.jserrors];case e.D.sessionTrace:return[e.D.ajax,e.D.pageViewEvent];case e.D.sessionReplay:return[e.D.sessionTrace];case e.D.pageViewTiming:return[e.D.pageViewEvent];default:return[]}}(t.featureName);n.every((e=>r[e]))||(0,l.Z)("".concat(t.featureName," is enabled but one or more dependent features has been disabled (").concat((0,D.P)(n),"). This may cause unintended consequences or missing data...")),this.features[t.featureName]=new t(this.agentIdentifier,this.sharedAggregator)}})),(0,T.Qy)(this.agentIdentifier,this.features,t)}catch(e){(0,l.Z)("Failed to initialize all enabled instrument classes (agent aborted) -",e);for(const e in this.features)this.features[e].abortHandler?.();const r=(0,T.fP)();return delete r.initializedAgents[this.agentIdentifier]?.api,delete r.initializedAgents[this.agentIdentifier]?.[t],delete this.sharedAggregator,r.ee?.abort(),delete r.ee?.get(this.agentIdentifier),!1}}}({features:[J,m,S,class extends h{static featureName=oe;constructor(t,r){if(super(t,r,oe,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;const n=this.ee;let i;(0,k.QU)(n),this.eventsEE=(0,k.em)(n),this.eventsEE.on(se,(function(e,t){this.bstStart=(0,p.z)()})),this.eventsEE.on(ae,(function(t,r){(0,s.p)("bst",[t[0],r,this.bstStart,(0,p.z)()],void 0,e.D.sessionTrace,n)})),n.on(ce+ne,(function(e){this.time=(0,p.z)(),this.startPath=location.pathname+location.hash})),n.on(ce+ie,(function(t){(0,s.p)("bstHist",[location.pathname+location.hash,this.startPath,this.time],void 0,e.D.sessionTrace,n)}));try{i=new PerformanceObserver((t=>{const r=t.getEntries();(0,s.p)(te,[r],void 0,e.D.sessionTrace,n)})),i.observe({type:re,buffered:!0})}catch(e){}this.importAggregator({resourceObserver:i})}},C,xe,B,class extends h{static featureName=de;constructor(e,r){if(super(e,r,de,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;if(!(0,t.OP)(e).xhrWrappable)return;try{this.removeOnAbort=new AbortController}catch(e){}let n,i=0;const o=this.ee.get("tracer"),a=(0,k._L)(this.ee),s=(0,k.Lg)(this.ee),u=(0,k.BV)(this.ee),d=(0,k.Kf)(this.ee),f=this.ee.get("events"),l=(0,k.u5)(this.ee),h=(0,k.QU)(this.ee),g=(0,k.Gm)(this.ee);function m(e,t){h.emit("newURL",[""+window.location,t])}function v(){i++,n=window.location.hash,this[ve]=(0,p.z)()}function b(){i--,window.location.hash!==n&&m(0,!0);var e=(0,p.z)();this[pe]=~~this[pe]+e-this[ve],this[ye]=e}function y(e,t){e.on(t,(function(){this[t]=(0,p.z)()}))}this.ee.on(ve,v),s.on(be,v),a.on(be,v),this.ee.on(ye,b),s.on(ge,b),a.on(ge,b),this.ee.buffer([ve,ye,"xhr-resolved"],this.featureName),f.buffer([ve],this.featureName),u.buffer(["setTimeout"+le,"clearTimeout"+fe,ve],this.featureName),d.buffer([ve,"new-xhr","send-xhr"+fe],this.featureName),l.buffer([me+fe,me+"-done",me+he+fe,me+he+le],this.featureName),h.buffer(["newURL"],this.featureName),g.buffer([ve],this.featureName),s.buffer(["propagate",be,ge,"executor-err","resolve"+fe],this.featureName),o.buffer([ve,"no-"+ve],this.featureName),a.buffer(["new-jsonp","cb-start","jsonp-error","jsonp-end"],this.featureName),y(l,me+fe),y(l,me+"-done"),y(a,"new-jsonp"),y(a,"jsonp-end"),y(a,"cb-start"),h.on("pushState-end",m),h.on("replaceState-end",m),window.addEventListener("hashchange",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("load",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("popstate",(function(){m(0,i>1)}),(0,O.m$)(!0,this.removeOnAbort?.signal)),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}}],loaderType:"spa"})})(),window.NRBA=o})(); window.jQuery || document.write(' ') CKEDITOR_BASEPATH='https://f1000research.com/js/vendor/ckeditor/' window.reactTheme = 'research'; window.MathJax = { CommonHTML: { linebreaks: { automatic: true } }, 'HTML-CSS': { linebreaks: { automatic: true } }, SVG: { linebreaks: { automatic: true } }, AuthorInit: function() { MathJax.Hub.Register.MessageHook('End Process', function () { let timeout = false; // holder for timeout id const delay = 250; // delay after event is "complete" to run callback const reflowMath = function() { const dispFormulas = document.querySelectorAll('.disp-formula.panel'); if (!dispFormulas) { return; } for (const dispFormula of dispFormulas) { const child = dispFormula.querySelector('.MathJax_Preview').nextSibling.firstChild; const isMultiline = MathJax.Hub.getAllJax(dispFormula)[0].root.isMultiline; if (dispFormula.offsetWidth < child.offsetWidth || isMultiline) { MathJax.Hub.Queue(['Rerender', MathJax.Hub, dispFormula]); } } }; window.addEventListener('resize', function() { clearTimeout(timeout); // clear the timeout timeout = setTimeout(reflowMath, delay); // start timing for event "completion" }); }); }, }; if (window.location.hash == '#_=_'){ window.location = window.location.href.split('#')[0] } !function(f,b,e,v,n,t,s){if(f.fbq)return;n=f.fbq=function() {n.callMethod? n.callMethod.apply(n,arguments):n.queue.push(arguments)} ;if(!f._fbq)f._fbq=n; n.push=n;n.loaded=!0;n.version='2.0';n.queue=[];t=b.createElement(e);t.async=!0; t.src=v;s=b.getElementsByTagName(e)[0];s.parentNode.insertBefore(t,s)}(window, document,'script','https://connect.facebook.net/en_US/fbevents.js'); fbq('init', '1641728616063202'); fbq('track', "PixelInitialized", {}); (function(h,o,t,j,a,r){ h.hj=h.hj||function(){(h.hj.q=h.hj.q||[]).push(arguments)}; h._hjSettings={hjid:2318163,hjsv:6}; a=o.getElementsByTagName('head')[0]; r=o.createElement('script');r.async=1; r.src=t+h._hjSettings.hjid+j+h._hjSettings.hjsv; a.appendChild(r); })(window,document,'https://static.hotjar.com/c/hotjar-','.js?sv='); search file_upload Submit your research search menu close search Browse Gateways & Collections How to Publish Submit your Research My Submissions Article Guidelines Article Guidelines (New Versions) Open Data, Software and Code Guidelines Open Data and Accessible Source Materials Guidelines (HSS) Open Data, Software and Code Guidelines (PSE) Prepublication Checks Production Process Posters and Slides Guidelines Document Guidelines Article Processing Charges Peer Review Finding Article Reviewers About How it Works For Reviewers Our Advisors Policies Glossary FAQs For Developers Newsroom Contact My Research Submissions Content and Tracking Alerts My Details Sign In file_upload Submit your research { "@context": "https://schema.org", "@type": "ScholarlyArticle", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://f1000research.com/articles/11-456" }, "headline": "‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on...", "datePublished": "2022-04-25T15:26:22", "dateModified": "2024-04-16T15:56:17", "author": [ { "@type": "Person", "name": "Akira Endo" } ], "publisher": { "@type": "Organization", "name": "F1000Research", "logo": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 480, "width": 60 } }, "image": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 1200, "width": 150 }, "description": "In a paper recently published in Nature Medicine, Fukumoto et al. tried to assess the government-led school closure policy during the early phase of the COVID-19 pandemic in Japan. They compared the reported incidence rates between municipalities that had and had not implemented school closure in selected periods from March–May 2020, where they matched for various potential confounders, and claimed that there was no causal effect on the incidence rates of COVID-19. However, the effective sample size (ESS) of their dataset had been substantially reduced in the process of matching due to imbalanced covariates between the treatment (i.e. with closure) and control (without closure) municipalities, which led to the wide uncertainty in the estimates. Despite the study title starting with “No causal effect of school closures”, their results are insufficient to exclude the possibility of a strong mitigating effect of school closure on incidence of COVID-19. In this replication/reanalysis study, we showed that the confidence intervals of the effect estimates from Fukumoto et al. included a 100% relative reduction in COVID-19 incidence. Simulations of a hypothetical 50% or 80% mitigating effect hardly yielded statistical significance with the same study design and sample size. We also showed that matching of variables that had large influence on propensity scores (e.g. prefecture dummy variables) may have been incomplete." } { "@context": "http://schema.org", "@type": "BreadcrumbList", "itemListElement": [ { "@type": "ListItem", "position": "1", "item": { "@id": "https://f1000research.com/", "name": "Home" } }, { "@type": "ListItem", "position": "2", "item": { "@id": "https://f1000research.com/browse/articles", "name": "Browse" } }, { "@type": "ListItem", "position": "3", "item": { "@id": "https://f1000research.com/articles/11-456/v2", "name": "‘Not finding causal effect’ is not ‘finding no causal effect’ of school..." } } ] } Home Browse ‘Not finding causal effect’ is not ‘finding no causal effect’ of school... ALL Metrics - Views Downloads Get PDF Get XML Cite How to cite this article Endo A. ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.12688/f1000research.111915.2 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. Close Copy Citation Details Export Export Citation Sciwheel EndNote Ref. Manager Bibtex ProCite Sente EXPORT Select a format first Track Share ▬ ✚ Correspondence Revised ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] Akira Endo https://orcid.org/0000-0001-6377-7296 1-4 Akira Endo https://orcid.org/0000-0001-6377-7296 1-4 PUBLISHED 16 Apr 2024 Author details Author details 1 The Centre for Mathematical Modelling of Infectious Diseases, London School of Hygiene & Tropical Medicine, London, WC1E 7HT, UK 2 Department of Infectious Disease Epidemiology, London School of Hygiene & Tropical Medicine, London, WC1E 7HT, UK 3 School of Tropical Medicine and Global Health, Nagasaki University, Nagasaki, 852-8521, Japan 4 Saw Swee Hock School of Public Health, National University of Singapore, Singapore, 117549, Singapore Akira Endo Roles: Conceptualization, Data Curation, Formal Analysis, Investigation, Methodology, Writing – Original Draft Preparation OPEN PEER REVIEW DETAILS REVIEWER STATUS This article is included in the Japan Institutional Gateway gateway. Abstract In a paper recently published in Nature Medicine , Fukumoto et al. tried to assess the government-led school closure policy during the early phase of the COVID-19 pandemic in Japan. They compared the reported incidence rates between municipalities that had and had not implemented school closure in selected periods from March–May 2020, where they matched for various potential confounders, and claimed that there was no causal effect on the incidence rates of COVID-19. However, the effective sample size (ESS) of their dataset had been substantially reduced in the process of matching due to imbalanced covariates between the treatment (i.e. with closure) and control (without closure) municipalities, which led to the wide uncertainty in the estimates. Despite the study title starting with “No causal effect of school closures”, their results are insufficient to exclude the possibility of a strong mitigating effect of school closure on incidence of COVID-19. In this replication/reanalysis study, we showed that the confidence intervals of the effect estimates from Fukumoto et al. included a 100% relative reduction in COVID-19 incidence. Simulations of a hypothetical 50% or 80% mitigating effect hardly yielded statistical significance with the same study design and sample size. We also showed that matching of variables that had large influence on propensity scores (e.g. prefecture dummy variables) may have been incomplete. READ ALL READ LESS Keywords COVID-19, school closure, Japan, causal inference, reanalysis Corresponding Author(s) Akira Endo ( [email protected] ) Close Corresponding author: Akira Endo Competing interests: AE received a grant from Taisho Pharmaceutical Co., Ltd. for research outside of this study. Grant information: AE was supported by JSPS KAKENHI (JP22K17329), JSPS Overseas Research Fellowships and Japan Science and Technology Agency (JPMJPR22R3). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript. Copyright: © 2024 Endo A. This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. How to cite: Endo A. ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.12688/f1000research.111915.2 ) First published: 25 Apr 2022, 11 :456 ( https://doi.org/10.12688/f1000research.111915.1 ) Latest published: 16 Apr 2024, 11 :456 ( https://doi.org/10.12688/f1000research.111915.2 ) Revised Amendments from Version 1 I have included more discussions in response to the peer review comments: notably the two additional issues in Fukumoto et al. raised by the reviewers; the choice of estimand for the main analysis (ATC instead of ATT) and potential residual confounding. The overall conclusion of the article remains the same. I have included more discussions in response to the peer review comments: notably the two additional issues in Fukumoto et al. raised by the reviewers; the choice of estimand for the main analysis (ATC instead of ATT) and potential residual confounding. The overall conclusion of the article remains the same. See the author's detailed response to the review by Takehiko I. Hayashi See the author's detailed response to the review by Koichiro Shiba READ REVIEWER RESPONSES Introduction A paper recently published in Nature Medicine , Fukumoto et al. , tried to assess the government-led school closure policy during the early phase of the COVID-19 pandemic in Japan. They compared the reported incidence rates between municipalities that had and had not implemented school closure in selected periods from March–May 2020, where they matched for various potential confounders, and claimed that they found no causal effect on the incidence rates of COVID-19. School closure as a means to control outbreaks has been studied mostly for influenza prior to the emergence of COVID-19, which generally suggested low-to-moderate effects, but the evidence on other respiratory infections including coronavirus diseases has been limited ( Viner et al. , 2020 ). Sometimes decisions need to be made in the lack of sufficient evidence in the earliest phase of the pandemic; nonetheless, such decisions should undergo retrospective policy assessment to provide insights and refinement for future pandemic responses. One of the challenges in this type of analysis of the early COVID-19 epidemic in Japan is the limited statistical power due to low case counts. During the first wave of the epidemic from February to June 2020 that overlapped with the study period of Fukumoto et al. , Japan never observed more than 1,000 COVID-19 cases per day. As a result, out of the total 79,989 municipality-level daily counts from the 847 municipalities included, 99.9% were less than 10 cases per day (Figure S2 of the original study). Moreover, the matching technique used to minimise confounding has a known side effect of limiting statistical power, especially when there is little overlap in the covariates between arms ( King et al. , 2017 ). Unfortunately, the analysis in Fukumoto et al. appears to suffer from these issues. The study title says “No causal effect”, which is a rather strong statement given the substantial uncertainty in their estimates. As the saying goes, “absence of evidence is not evidence of absence”—when the uncertainty range covers practically meaningful values, it should not be prematurely concluded that there is “no effect” just because the effect estimates are statistically insignificant. Here I highlight limitations of the analysis and discuss possible factors that may have rendered the study underpowered. Relative ATC and ATT estimates The original study measures the effect of school closures as the absolute difference in incidence rates between the treatment and control municipalities. However, the theoretical ground is unclear for assuming a fixed additive effect of school closures on the incidence rate per capita. Infectious disease risks are inherently dynamic; more current infections in a population would result in a greater risk of infection among susceptible individuals through increased encounters with infectious others. This means that the effect of school closures, which intended to reduce contacts at schools, should also depend on the baseline incidence in the population because the risk of infection averted would be the reduction in contacts multiplied by the probability that the contacts were otherwise with infectious individuals. The effect estimates relative to the baseline incidence would therefore be a more relevant and interpretable measure for assessment of its practical use. It should also be noted that since incidence rates can only take non-negative values, the absolute mitigating effect of school closure can only be as high as the average incidence rate in the control group. I rescaled the reported average treatment effects (average treatment effect on the control: ATC; and average treatment effect on the treatment: ATT) and their confidence intervals relative to the average outcome (incidence rate per capita) in the control group ( Figure 1 ). The confidence intervals of the relative ATC and ATT cover most of the regions from 100% reduction to 100% elevation, suggesting the underpowered nature of the original study. An effect of 50% reduction (i.e. -50% relative effect), which most experts would agree is of practical significance, or even complete reduction (i.e. -100%) was within the confidence intervals over the substantial part of the period of interest. The effective sample size (ESS; a proxy measure for the amount of information contained in weighted samples ( Shook-Sa and Hudgens )) of the matched arms of around 40–50 ( Figure 1d ) was likely insufficient to find a statistical significance because incidence of infectious diseases typically exhibits higher dispersion than independent- and identically-distributed settings due to its self-exciting nature (i.e. an increase in cases induces a further increase via transmission). Figure 1. Relative average treatment effect on the control (ATC) and average treatment effect on the treatment (ATT). The turquoise vertical lines represent the date of treatment (school closure). The black lines and shaded areas represent the mean effect and 95% confidence intervals, respectively. (a) Relative ATC for the closure as of April 6, 2020. (b) Relative ATC for the closure as of April 10, 2020. (d) Relative ATT for the closure as of April 6, 2020. (d) Comparison of sample sizes. The number of all samples included for matching, the number of unique samples matched to at least one other sample and the effective sample size (ESS) of the matched samples are shown. Statistical power demonstration with assumed causal mitigating effect of 50%/80% To further examine the statistical power of the study, I artificially modified the dataset such that school closure has a 50% or 80% mitigating effect on the incidence rate per capita. On the treatment reference date (April 6) and onward, the expected incidence rate of each municipality in the treatment group was assumed to be 50%/20% that of the matched control municipality plus Poisson noise (see Extended data: Supplementary document for details). The results suggested that, even with as much as 50%/80% mitigating effect, the approach in the original study might not have reached statistical significance ( Figure 2 ). The absolute ATT for the 50% mitigating effect ( Figure 2b ) appears similar to what were referred to as “no effect” in the original study. ATT for the 80% mitigating effect was also statistically insignificant ( Figure 2c and 2d ), suggesting that the study was underpowered to find even moderate to high mitigating effects, if any. ATC estimates also yielded similarly insignificant/barely significant patterns ( Figure 3 ). Figure 2. Simulated average treatment effect on the treatment (ATT) estimates assuming 50%/80% mitigating effects. (a) The average outcome (incidence per capita) of the matched treatment (black) and control (red) groups for closure as of April 6, 2020. (b) Absolute ATT estimates (black line) and 95% confidence intervals (shaded area) for closure as of April 6. (c) Relative ATT estimates and 95% confidence intervals for closure as of April 6. (d)–(f) Those for closure as of April 10. Figure 3. Simulated average treatment effect on the control (ATC) estimates assuming 50%/80% mitigating effects. (a) The average outcome (incidence per capita) of the unmatched treatment (dashed), matched treatment (black) and control (red) groups for closure as of April 6, 2020. (b) Absolute ATC estimates (black line) and 95% confidence intervals (shaded area) for closure as of April 6. (c) Relative ATC estimates and 95% confidence intervals for closure as of April 6. (d)–(f) Those for closure as of April 10. Separation of propensity scores I also noticed that propensity scores computed for one of the subanalyses included, inverse-probability weighting, exhibited substantial/complete “separation” ( Heinze & Schemper, 2002 ) and most samples were essentially lost due to the substantial imbalance in the assigned weights ( Figure 4 ). Although separation of propensity scores can arise from overfitting, in this case it remained (while slightly ameliorated) even after addressing overfitting by Lasso regularisation ( Figure 5 ). This indicates that the treatment assignments may have been nearly deterministic in the dataset, which can compromise the performance of quasi-experimental causal inference via “positivity violation” ( Petersen et al. , 2020 ). Figure 4. Propensity scores and effective sample sizes for the inverse probability weighting analysis in the original study. (a) Balance of propensity scores before and after matching for school closure as of April 6, 2021. (b) Balance of propensity scores before and after matching for school closure as of April 10, 2021. (c) All and effective sample sizes and the maximum weight among the samples. The effective sample size of NaN indicates that the all samples received zero weights. Figure 5. Inverse probability weighting with Lasso regularisation. (a) The average outcome (incidence per capita) of the unmatched treatment (dashed), matched treatment (black) and control (red) groups for closure as of April 6, 2020. (b) Absolute ATC estimates (black line) and 95% confidence intervals (shaded area) for closure as of April 6. (c) Result of 10-fold cross validation. The x-axis represents the logarithm of the regularisation coefficient λ for each model; the number of included variables is also displayed above the panel. The left dotted vertical line denotes the selected model with the best cross validation performance and the right dotted line the most parsimonious within the 1 standard error range of the performance from the best model (for reference purpose). (d) Balance of propensity scores before and after matching. (e)–(h) Those for closure as of April 10. (i) All and effective sample sizes and the maximum weight among the samples. The authors did not use propensity scores in the Mahalanobis distance-based genetic matching for the main analysis as opposed to the general recommendation ( Diamond & Sekhon, 2013 ) (the authors cite King & Nielsen, 2019 as a reason not to use propensity scores, the authors of which however clarifies that their criticism does not apply to genetic matching). This means that the covariates that strongly determined the treatment assignment may not have received large weights (and therefore were not prioritised) in the matching process, which could leave unadjusted bias arising from these potential confounders. For example, many regression coefficients for prefecture dummy variables had large values (~5 or larger) in the Lasso-regularised model, whereas 236 out of 483 matched pairs of municipalities in the original analysis for April 6 were from different prefectures. The robustness to the above concerns could be assessed by computing ESS from another genetic matching including propensity scores and a calliper (to ensure the matched pairs have sufficiently similar features), which I report in the next section. Reanalysis with genetic matching with propensity scores and a calliper I reanalysed the original dataset with the genetic matching algorithm incorporating propensity scores and a calliper and estimated ATCs for school closures as of Aril 6 and 10, 2020. Propensity scores were estimated by a Lasso-regularised linear regression model and included in genetic matching with a calliper of 0.25 ( Rosenbaum & Rubin, 1985 ). The results remained statistically insignificant and the confidence intervals for the relative effects covered most region from -100% to 100%, although the direction of the weak trend reversed for closure as of April 6 from the original study ( Figure 6 ). ESS of the matched treatment group was only 7 and 3.8 for April 6 and 10, respectively, indicating that the results relied on only a small set of samples that were repeatedly used in matching. Genetic matching is a generalisation of propensity score and Mahalanobis distance matching that searches for optimal covariate balance and thus should achieve no worse balance than matching using only Mahalanobis distance ( Diamond & Sekhon, 2013 ). The substantial loss of ESS in the updated genetic matching with propensity scores suggests that improved matching required more samples to be discarded and that both the original and current results are likely unreliable. Figure 6. Re-estimated average treatment effect on the control (ATC) using a genetic matching with propensity scores and a calliper of 0.25. (a) The average outcome (incidence per capita) of the unmatched treatment (dashed black), matched treatment (solid black) and control (red) groups for closure as of April 6, 2020. (b) Absolute ATC estimates (black line) and 95% confidence intervals (shaded area) for closure as of April 6. (c) Relative ATC estimates and 95% confidence intervals for closure as of April 6. (d)–(f) Those for closure as of April 10. Discussion and Conclusion The reanalysis of Fukumoto et al. suggested that the study was inherently underpowered to identify the presence of causal effects of school closure on COVID-19. While I recognise the importance of their attempt to assess the school closure policy given its collateral effect imposed onto students and their families, I argue that their conclusion of “no causal effect” was not well supported by data due to the limited statistical power. Finding no mitigating effect itself would not be surprising as children were not the centre of the outbreak especially in the earliest phase ( Davies et al. 2020 ); nonetheless, evidence claiming “no effect” would need to show that effects were at least below the level of practical significance. In addition to this issue of insufficient statistical power, which I demonstrated in the present reanalysis, two additional issues have been raised during the peer review process of this article. For one: the authors’ choice of ATC as the main estimand may have been suboptimal as Shiba has pointed out in his comment ( Shiba, 2022 ). The control group in the original study may have consisted of municipalities that did not need school closures because of low incidence. ATC in this context would represent the effect in settings where the policy was not needed, which is of limited political implication. To counterargue against school closures as a control policy, the authors should have aimed to robustly show insufficient effect of such a policy even in municipalities in which school closures had been a selectable option (possibly because of higher incidence rate, where an effective policy could be more impactful). For the other: residual confounding may have remained among the matched samples. Both ( Shiba, 2022 ) and ( Hayashi, 2022 ) expressed concern on the immediate positive effect on incidence rate (e.g. increased incidence) immediately after the implementation of school closures in the treated group, which Fukumoto et al. left unexplained. Unless a plausible causal mechanism in which school closures could increase COVID-19 incidence is provided, this gap between the treated and control group may indicate residual bias, which is unsurprising given my reanalysis results suggesting matching failure. Hayashi additionally suggested that the trend in incidence (e.g. increasing/decreasing) may be one of the potential confounding variables that had not been adjusted for in the original study ( Hayashi, 2022 ). Altogether, these limitations represent difficulties in post-hoc causal analysis of mass interventions implemented without a built-in evaluation design such as randomisation. The fact that even the reasonably designed approach of Fukumoto et al. suffers insufficient power emphasises the importance of the “evidence-generating” philosophy in policy planning as has been promoted for medicine ( Embi & Payne, 2013 ). Data availability Underlying data This study did not generate original data. The underlying dataset is available from the repository associated with the original study: Harvard Dataverse. Replication Data for: No causal effect of school closures in Japan on the spread of COVID-19 in spring 2020. DOI: https://doi.org/10.7910/DVN/N803UQ ( Fukumoto et al. 2021a ). Data are available under the terms of the Creative Commons Zero “No rights reserved” data waiver . Extended data Replication code along with the full analysis report (Extended data: Supplementary document) is available from a GitHub repository: https://github.com/akira-endo/reanalysis_Fukumoto2021 . Archived version of the above repository at time of publication is available from: Zenodo. akira-endo/reanalysis_Fukumoto2021: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19. DOI: https://doi.org/10.5281/zenodo.6457916 ( Endo, 2022 ). This project contains the following data: - main.html/main.ipynb (Extended data: Supplementary document). - replication codes and data from the original study ( Fukumoto et al. 2021a ) which are partially modified and reused. - replication codes for the analysis conducted in this study. Data are available under the terms of the Creative Commons Attribution 4.0 International license (CC-BY 4.0). Acknowledgement I thank Takahiro Kinoshita for insightful discussion. References Davies NG, Klepac P, Liu Y, et al. : Age-dependent effects in the transmission and control of COVID-19 epidemics. Nat. Med. 2020; 26 (26): 1205–1211. PubMed Abstract | Publisher Full Text Diamond A, Sekhon JS: Genetic Matching for Estimating Causal Effects: A General Multivariate Matching Method for Achieving Balance in Observational Studies. Rev. Econ. Stat. 2013; 95 (3): 932–945. Publisher Full Text Embi PJ, Payne PRO: Evidence Generating Medicine. Med. Care. 2013; 51 (8 Suppl 3): S87–S91. Publisher Full Text Endo A: akira-endo/reanalysis_Fukumoto2021: ’Not finding causal effect’ is not ’finding no causal effect’ of school closure on COVID-19 (v.1.0.0). Zenodo. 2022. Publisher Full Text Fukumoto K, McClean CT, Nakagawa K: No causal effect of school closures in Japan on the spread of COVID-19 in spring 2020. Nat. Med. 2021a; 27 : 2111–2119. PubMed Abstract | Publisher Full Text Fukumoto K, McClean CT, Nakagawa K: Replication data for: no causal effect of school closures in Japan on the spread of COVID-19 in spring 2020. Harvard Dataverse. 2021b; 27 : 2111–2119. (Accessed: 8 November 2021). Publisher Full Text Hayashi TI: Peer Review Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 1; peer review: 1 approved, 1 approved with reservations]. F1000Research. 2022; 11 : 456. Publisher Full Text Heinze G, Schemper M: A solution to the problem of separation in logistic regression. Stat. Med. 2002; 21 : 2409–2419. PubMed Abstract | Publisher Full Text King G, Lucas C, Nielsen R: The Balance-Sample Size Frontier in Matching Methods for Causal Inference. Am. J. Political Sci. 2017; 61 (2): 473–489. Publisher Full Text King G, Nielsen R: Why propensity scores should not be used for matching. Polit. Anal. 2019; 27 (4): 435–454. Publisher Full Text Petersen ML, Porter KE, Gruber S, et al. : Diagnosing and responding to violations in the positivity assumption. Stat. Methods Med. Res. 2020; 21 (1): 31–54. PubMed Abstract | Publisher Full Text Rosenbaum PR, Rubin DB: Constructing a Control Group Using Multivariate Matched Sampling Methods That Incorporate the Propensity Score. Am. Stat. 1985; 39 (1): 33–38. Publisher Full Text Shook-Sa BE, Hudgens MG: Power and sample size for observational studies of point exposure effects. Biometrics. 2022 Mar; 78 (1): 388–398. PubMed Abstract | Publisher Full Text | Free Full Text Shiba K: Peer Review Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 1; peer review: 1 approved, 1 approved with reservations]. F1000Research. 2022; 11 : 456. Publisher Full Text Viner RM, Russell SJ, Croker H, et al. : School closure and management practices during coronavirus outbreaks including COVID-19: a rapid systematic review. Lancet Child Adolesc. Health. 2020; 4 (5): 397–404. PubMed Abstract | Publisher Full Text Comments on this article Comments (0) Version 2 VERSION 2 PUBLISHED 25 Apr 2022 ADD YOUR COMMENT Comment Author details Author details 1 The Centre for Mathematical Modelling of Infectious Diseases, London School of Hygiene & Tropical Medicine, London, WC1E 7HT, UK 2 Department of Infectious Disease Epidemiology, London School of Hygiene & Tropical Medicine, London, WC1E 7HT, UK 3 School of Tropical Medicine and Global Health, Nagasaki University, Nagasaki, 852-8521, Japan 4 Saw Swee Hock School of Public Health, National University of Singapore, Singapore, 117549, Singapore Akira Endo Roles: Conceptualization, Data Curation, Formal Analysis, Investigation, Methodology, Writing – Original Draft Preparation Competing interests AE received a grant from Taisho Pharmaceutical Co., Ltd. for research outside of this study. Grant information AE was supported by JSPS KAKENHI (JP22K17329), JSPS Overseas Research Fellowships and Japan Science and Technology Agency (JPMJPR22R3). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript. Article Versions (2) version 2 Revised Published: 16 Apr 2024, 11:456 https://doi.org/10.12688/f1000research.111915.2 version 1 Published: 25 Apr 2022, 11:456 https://doi.org/10.12688/f1000research.111915.1 Copyright © 2024 Endo A. This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. Download Export To Sciwheel Bibtex EndNote ProCite Ref. Manager (RIS) Sente metrics Views Downloads F1000Research - - PubMed Central info_outline Data from PMC are received and updated monthly. - - Citations open_in_new 0 open_in_new 0 open_in_new SEE MORE DETAILS CITE how to cite this article Endo A. ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.12688/f1000research.111915.2 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS track receive updates on this article Track an article to receive email alerts on any updates to this article. TRACK THIS ARTICLE Share Open Peer Review Current Reviewer Status: ? Key to Reviewer Statuses VIEW HIDE Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Version 2 VERSION 2 PUBLISHED 16 Apr 2024 Revised Views 0 Cite How to cite this report: Yamamura E. Reviewer Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.5256/f1000research.140839.r301839 ) The direct URL for this report is: https://f1000research.com/articles/11-456/v2#referee-response-301839 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 14 Sep 2024 Eiji Yamamura , Seinan Gakuin University, Fukuoka, Japan Approved with Reservations VIEWS 0 https://doi.org/10.5256/f1000research.140839.r301839 Report on ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 The aim of the study is to examine whether PSM (Propensity Score Matching) method of Fukumoto, McClean, Nakagawa (FMN ... Continue reading READ ALL Report on ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 The aim of the study is to examine whether PSM (Propensity Score Matching) method of Fukumoto, McClean, Nakagawa (FMN 2021) is appropriately conducted. Main finding of this study is that ESS has substantially reduced due to imbalanced covariates between samples. FMN did not find causal effect of school closure on incidence rates in the early phase of COVID-19 pandemic. However, actually, their estimation results are not reliable because of insufficient EES. In my view, validity of set of covariates to calculate propensity score and its results in the “first” stage have not been scrutinized and discussed profoundly. However, other reports have already pointed out. The author has already replied to it. The focus of this study seems to be on balance issue and EES. I agree with it because this study is correspondence rather than full paper. Here, I raise only several issues remain to be addressed. Major issues: 1. For obtaining results of Fig.6, the author conducted estimation to set the caliper to be 0.25 by following the classical study of Rosenbaum & Rubin (1985) as below. "Propensity scores were estimated by a Lasso-regularised linear regression model and included in genetic matching with a calliper of 0.25 (Rosenbaum & Rubin, 1985)". 2. A caliper of 0.25 is widely used for matching in empirical studies. However, more recent works scrutinize the optimal value of calipers. For instance, Austin (2011) recommend that researchers match the propensity score using calipers being equal to 0.2. For illustrating Fig 6, author should use 0.2 rather than 0.25. Otherwise, author should justify his choosing 0.25 as caliper based on recent literatures. References. Austin PC, 2011 (Ref 1) Minor issues: 1. This study rigorously scrutinized the validity of PSM. In conclusion, the author derived more general argument than what has been done in this study. PSM has been widely used in empirical studies. However, in my view, most of studies using PSM mainly reported the main results, while not having sufficiently examined its validity. Actually, I believe that PSM results reported in many studies published in peer review journals would not meet the criteria to justify them (not valid) if researchers rigorously test it. FMN is one of them. Therefore, it seems better to narrow down the points to limitation of PSM. 2. I found inconsistency between Figs and its explanation in main body of text. Further, there seems to be several errors in Figs. As I read the text, referring to Figs, I became confused. Careless mistakes in basic information should be corrected to avoid reader’s misunderstanding. (1) In caption of Fig 1, (d) appeared two times. “(d) Relative ATT for the closure as of April 6, 2020. (d) Comparison of sample sizes.” This should be “(c) Relative ATT for the closure as of April 6, 2020. (d) Comparison of sample sizes.” (2) Concerning Fig 1, I cannot find “Relative ATT (April 10)” although “Relative ATT (April 06)” was presented. It is strange because readers can compare results between different setting in Figures 2-6. Author should present “Relative ATT (April 10)” as Fig 1 d (The current Fig 1 d should be “Fig 1 e”), otherwise explain the reason not to indicated it. (3) In caption of Figs 2. Tittle of Fig 2a is "Outcome (April 6): 50% mitigating effect" wile that of Fig 2 d is “Outcome (April 6): 80% mitigating effect". From these titles and contents of Figs 2, I believe that Fig 2a, b and c are results of "Outcome (April 6): 50% mitigating effect" while Fig 2d, e and f "Outcome (April 6): 80% mitigating effect". "ATT for the 80% mitigating effect was also statistically insignificant (Figure 2c and 2d)," Probably, the second sentence should be “ATT for the 80% mitigating effect was also statistically insignificant (Figure 2e and 2f),” (4) In the end of Caption of Fig 2, I found “(d)–(f) Those for closure as of April 10”. The sentence should be “In (d)–(f), 80% mitigating effects”. Probably, the author copied the caption of Fig 6 although caption of Fig 6 is correct. (5) Comment (4) also applied to Figs 3. Is the rationale for commenting on the previous publication clearly described? Yes Are any opinions stated well-argued, clear and cogent? Yes Are arguments sufficiently supported by evidence from the published literature or by new data and results? Yes Is the conclusion balanced and justified on the basis of the presented arguments? Yes References 1. Austin PC: Optimal caliper widths for propensity-score matching when estimating differences in means and differences in proportions in observational studies. Pharm Stat . 2011; 10 (2): 150-61 PubMed Abstract | Publisher Full Text Competing Interests: No competing interests were disclosed. Reviewer Expertise: Behavioral Economics, Applied Economics I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Yamamura E. Reviewer Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.5256/f1000research.140839.r301839 ) The direct URL for this report is: https://f1000research.com/articles/11-456/v2#referee-response-301839 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Respond or Comment COMMENT ON THIS REPORT Version 1 VERSION 1 PUBLISHED 25 Apr 2022 Views 0 Cite How to cite this report: Hayashi TI. Reviewer Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.5256/f1000research.123641.r136221 ) The direct URL for this report is: https://f1000research.com/articles/11-456/v1#referee-response-136221 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 04 Jul 2022 Takehiko I. Hayashi , Social Systems Division, National Institute for Environmental Studies, Tsukuba, Japan Approved VIEWS 0 https://doi.org/10.5256/f1000research.123641.r136221 This article reanalyzed Fukumoto et al. (2021), which concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 in Japan. The author first examined the robustness of the conclusion of Fukumoto et al. (2021) ... Continue reading READ ALL This article reanalyzed Fukumoto et al. (2021), which concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 in Japan. The author first examined the robustness of the conclusion of Fukumoto et al. (2021) to the way the indicator is set up. The author then conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). As I state below (see Major points), I agree that these results (Figs. 1 and 2) support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude there was “no causal effect.” The author also pointed out the risk of positivity violation and the resulting small effective sample size in propensity score analysis (Figs. 4 and 5). I believe this issue should be further examined because there was a possibly nonnegligible level of residual confounding (see Minor points), but the additional examination may be beyond the scope of this article. Overall, I agree that the authors' arguments are adequately supported by the results presented. In general, the issue of statistical power tends to receive less focus than the issue of identification in the practice of causal inference. However, statistical power is always important when discussing the effects of policies in real-world contexts. This paper is a good practice example to remind us of the fundamentals of statistical inference. Major points: The primary purpose of this study is to examine the statistical power of the analysis of Fukumoto et al. (2021). Although Fukumoto et al. (2021) is an elaborate study that examined most possible considerations, it lacks an examination of statistical power, and statistical power is a logically essential issue if one is to conclude “no causal effect” based on the lack of statistical significance. The author first addressed the issue of outcome measures. I agree with the author that effect estimates relative to the baseline incidence may be superior to per-capita incidence rates as an outcome measure. As the author stated, taking only non-negative values for the incidence rate can be a large problem when the incidence rate of the control population is very low, as it was in this case. I think that the spikes in the red line of the control population in Figures 1c and 1i of Fukumoto et al. (2021) (which diverge from the black line of the matched treatment population, indicating a failure to construct an adequate counterfactual) also suggest a disadvantage of using per capita rates, given the explanation that these spikes were caused by the small sizes of the focal municipalities (see the Supplementary Information of Fukumoto et al. 2021, P. 11, Lines 3–11). Although I do not immediately conclude that the advantage of using effect estimates relative to the baseline incidence as the indicator is absolute, using this indicator is one of the possible reasonable choices. The results using this indicator (Fig. 1) showed an inherent lack of power in the analysis, illustrating that the conclusions of Fukumoto et al. (2021) (implicitly assuming a degree of statistical power of the analyses) are not robust to the way the indicator is set up. The author's next approach is more direct. The author conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). The simulation showed that statistical significance was hardly detected even for substantial effects (Fig. 2). I believe these results convincingly demonstrate that the design and data of Fukumoto et al. (2021) did not provide sufficient statistical power to conclude “no causal effect.” The above results (Figs. 1 and 2) logically support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude "no causal effect." Minor points: Although the following comments may address issues that are beyond the scope of this study, the issues themselves are essential. These comments are intended as suggestions and not mandatory revisions of this article. First, as the other reviewer (Dr. Shiba) stated, the validity of the estimand is essential. I agree that ATT should have been the main estimand if Fukumoto et al. (2021) mention the efficacy of the policy in the real-world context. I recommend that the author discuss this point further. Second, I think more consideration needs to be given to the possibility of insufficient adjustment (i.e., residual confounding). The author's mention of the risk of positivity violation and the resulting small effective sample size in propensity score analysis is a good point (Figs. 4 and 5). The separation of the propensity score distribution implies the inherent difficulties in matching important factors, especially those having a large effect on both treatment and outcome, which can introduce confounding. This is a real concern because many major covariates were not sufficiently adjusted. A well-known recommendation for an acceptable degree of ASMD after matching is ≤0.1 (Nguyen et al. 2017). However, the ASMDs of many covariates were actually ≥0.2 in this case (Table S3 in the Supplementary Information of Fukumoto et al. 2021). In general, I think it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” when the absolute value of ASMD was ≥0.2 in many covariates. I recommend the author check the loveplot of the matching of April 6, 10, and some cases for ATTs with the reference dashed line at 0.1 ASMD. I also recommend presenting the importance of covariates (e.g., specifying covariates having high standardized coefficient values in the propensity score estimation with red symbols) in the loveplots (see the Note below concerning the need for a loveplot). Third, discussing the possibility of missing important covariates may also be worthwhile. Some unexpected behavior of the Fukumoto et al. (2021) data suggests the possibility of residual confounding (due to the lack of incorporation of important covariates). For example, in Figure 1g of Fukumoto et al. (2021), large (see the absolute values) spikes appear only in the matching treated municipalities (and no spikes appear in all treated municipalities). In general, matching is expected to reduce the difference between treated and untreated baselines. Thus, it seems difficult to naturally explain the occurrence of large spikes only in the matched municipalities (unless the treatment actually increased outcomes or the effective sample size is very small). It is possible that important covariates were missed. For example, the trends (not sum) of the incidence rate before treatment was not included as covariates, but they might cause such spikes as follows. In the matching process, matched municipalities tend to have a similar value of the sum of the 7-day incidence rate. Here, the same value of this sum (e.g., 100 incidents per unit) can arise from municipalities that have different time trends (i.e., both increasing and decreasing trends are possible). In this situation, if the treatment (school closure) decision-making was affected by the increasing/decreasing trend, the treated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is an increasing trend. Similarly, the untreated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is a decreasing trend. In this case, the spikes (i.e., the difference in post-treatment outcomes) only in matched treated municipalities (as in Fig. 1g of Fukumoto et al ., 2021) could occur as an artifact of the inertia of the temporal trend (not sum) from the preceding 7 days. Note on the need for a loveplot: The following description in Fukumoto et al. (2021, P. 2114) is not sufficiently correct in two points: “Moreover, the differences in other covariates between the treated and control groups were also much smaller after matching than before (Supplementary Fig. 1 and Supplementary Table 3). Therefore, differences between the matched groups cannot be attributed to previous levels of infection or any other covariates.” First, whether confounding was removed or not does not depend on the relative ratio before/after ASMD; rather, it depends on the absolute magnitude of ASMD. Even if ASMD becomes relatively much smaller after matching, if the absolute magnitude of ASMD was over 0.2 in many covariates, it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” in a general sense. Second, a smaller average ASMD of many covariates does not assure the removal of confounding. The removal of confounding needs to balance important covariates satisfying a backdoor criterion (not an average of all covariates). Practically, we can speculate about the importance of covariates from the effects of these covariates on treatments and outcomes (c.f., VanderWeele 2019). Figure S1 in the Supplementary Information of Fukumoto et al. (2021) did not provide good information with which to judge these two essential points in terms of the reduction of confounding. To make such a judgment, a loveplot with a reference line at 0.1 is suitable and is also a standard practice. Is the rationale for commenting on the previous publication clearly described? Yes Are any opinions stated well-argued, clear and cogent? Yes Are arguments sufficiently supported by evidence from the published literature or by new data and results? Yes Is the conclusion balanced and justified on the basis of the presented arguments? Yes References 1. Nguyen TL, Collins GS, Spence J, Daurès JP, et al.: Double-adjustment in propensity score matching analysis: choosing a threshold for considering residual imbalance. BMC Med Res Methodol . 2017; 17 (1): 78 PubMed Abstract | Publisher Full Text 2. VanderWeele TJ: Principles of confounder selection. Eur J Epidemiol . 2019; 34 (3): 211-219 PubMed Abstract | Publisher Full Text Competing Interests: No competing interests were disclosed. Reviewer Expertise: Risk analysis, statistical causal inference, and environmental data analyis I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Hayashi TI. Reviewer Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.5256/f1000research.123641.r136221 ) The direct URL for this report is: https://f1000research.com/articles/11-456/v1#referee-response-136221 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 27 Jun 2024 Akira Endo , School of Tropical Medicine and Global Health, Nagasaki University, Nagasaki, 852-8521, Japan 27 Jun 2024 Author Response > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised ... Continue reading > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article reanalyzed Fukumoto et al. (2021), which concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 in Japan. The author first examined the robustness of the conclusion of Fukumoto et al. (2021) to the way the indicator is set up. The author then conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). As I state below (see Major points), I agree that these results (Figs. 1 and 2) support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude there was “no causal effect.” The author also pointed out the risk of positivity violation and the resulting small effective sample size in propensity score analysis (Figs. 4 and 5). I believe this issue should be further examined because there was a possibly nonnegligible level of residual confounding (see Minor points), but the additional examination may be beyond the scope of this article. Overall, I agree that the authors' arguments are adequately supported by the results presented. In general, the issue of statistical power tends to receive less focus than the issue of identification in the practice of causal inference. However, statistical power is always important when discussing the effects of policies in real-world contexts. This paper is a good practice example to remind us of the fundamentals of statistical inference. > I thank the reviewer for supporting the points discussed in the manuscript and for offering additional comments on its content. Major points: The primary purpose of this study is to examine the statistical power of the analysis of Fukumoto et al. (2021). Although Fukumoto et al. (2021) is an elaborate study that examined most possible considerations, it lacks an examination of statistical power, and statistical power is a logically essential issue if one is to conclude “no causal effect” based on the lack of statistical significance. The author first addressed the issue of outcome measures. I agree with the author that effect estimates relative to the baseline incidence may be superior to per-capita incidence rates as an outcome measure. As the author stated, taking only non-negative values for the incidence rate can be a large problem when the incidence rate of the control population is very low, as it was in this case. I think that the spikes in the red line of the control population in Figures 1c and 1i of Fukumoto et al. (2021) (which diverge from the black line of the matched treatment population, indicating a failure to construct an adequate counterfactual) also suggest a disadvantage of using per capita rates, given the explanation that these spikes were caused by the small sizes of the focal municipalities (see the Supplementary Information of Fukumoto et al. 2021, P. 11, Lines 3–11). Although I do not immediately conclude that the advantage of using effect estimates relative to the baseline incidence as the indicator is absolute, using this indicator is one of the possible reasonable choices. The results using this indicator (Fig. 1) showed an inherent lack of power in the analysis, illustrating that the conclusions of Fukumoto et al. (2021) (implicitly assuming a degree of statistical power of the analyses) are not robust to the way the indicator is set up. The author's next approach is more direct. The author conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). The simulation showed that statistical significance was hardly detected even for substantial effects (Fig. 2). I believe these results convincingly demonstrate that the design and data of Fukumoto et al. (2021) did not provide sufficient statistical power to conclude “no causal effect.” The above results (Figs. 1 and 2) logically support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude "no causal effect." > I thank the reviewer for appreciating and further discussing in detail the main points of my criticism. Regarding the choice of indicator, in response to Reviewer 1 (Dr Shiba)’s comment, I have included additional reasoning why relative effect is more relevant to this infectious disease context. Minor points: Although the following comments may address issues that are beyond the scope of this study, the issues themselves are essential. These comments are intended as suggestions and not mandatory revisions of this article. First, as the other reviewer (Dr. Shiba) stated, the validity of the estimand is essential. I agree that ATT should have been the main estimand if Fukumoto et al. (2021) mention the efficacy of the policy in the real-world context. I recommend that the author discuss this point further. > Discussion on the choice of estimand (along with that on residual confounding) has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Second, I think more consideration needs to be given to the possibility of insufficient adjustment (i.e., residual confounding). The author's mention of the risk of positivity violation and the resulting small effective sample size in propensity score analysis is a good point (Figs. 4 and 5). The separation of the propensity score distribution implies the inherent difficulties in matching important factors, especially those having a large effect on both treatment and outcome, which can introduce confounding. This is a real concern because many major covariates were not sufficiently adjusted. A well-known recommendation for an acceptable degree of ASMD after matching is ≤0.1 (Nguyen et al. 2017). However, the ASMDs of many covariates were actually ≥0.2 in this case (Table S3 in the Supplementary Information of Fukumoto et al. 2021). In general, I think it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” when the absolute value of ASMD was ≥0.2 in many covariates. I recommend the author check the loveplot of the matching of April 6, 10, and some cases for ATTs with the reference dashed line at 0.1 ASMD. I also recommend presenting the importance of covariates (e.g., specifying covariates having high standardized coefficient values in the propensity score estimation with red symbols) in the loveplots (see the Note below concerning the need for a loveplot). > I thank the reviewer for constructive suggestion to further investigate the appropriateness of matching in the original study. While I agree that a loveplot would provide more in-depth understanding of what might have gone wrong with the original analysis, the aim of my article is to highlight the existence of the issue (not necessarily revealing every detail of the individual issues), which I believe has already been demonstrated. Once the potential issue is identified as such, in principle the original authors should be responsible for conducting robust analysis to defend their findings. Third, discussing the possibility of missing important covariates may also be worthwhile. Some unexpected behavior of the Fukumoto et al. (2021) data suggests the possibility of residual confounding (due to the lack of incorporation of important covariates). For example, in Figure 1g of Fukumoto et al. (2021), large (see the absolute values) spikes appear only in the matching treated municipalities (and no spikes appear in all treated municipalities). In general, matching is expected to reduce the difference between treated and untreated baselines. Thus, it seems difficult to naturally explain the occurrence of large spikes only in the matched municipalities (unless the treatment actually increased outcomes or the effective sample size is very small). It is possible that important covariates were missed. For example, the trends (not sum) of the incidence rate before treatment was not included as covariates, but they might cause such spikes as follows. In the matching process, matched municipalities tend to have a similar value of the sum of the 7-day incidence rate. Here, the same value of this sum (e.g., 100 incidents per unit) can arise from municipalities that have different time trends (i.e., both increasing and decreasing trends are possible). In this situation, if the treatment (school closure) decision-making was affected by the increasing/decreasing trend, the treated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is an increasing trend. Similarly, the untreated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is a decreasing trend. In this case, the spikes (i.e., the difference in post-treatment outcomes) only in matched treated municipalities (as in Fig. 1g of Fukumoto et al ., 2021) could occur as an artifact of the inertia of the temporal trend (not sum) from the preceding 7 days. > Discussion on possible residual confounder has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Note on the need for a loveplot: The following description in Fukumoto et al. (2021, P. 2114) is not sufficiently correct in two points: “Moreover, the differences in other covariates between the treated and control groups were also much smaller after matching than before (Supplementary Fig. 1 and Supplementary Table 3). Therefore, differences between the matched groups cannot be attributed to previous levels of infection or any other covariates.” First, whether confounding was removed or not does not depend on the relative ratio before/after ASMD; rather, it depends on the absolute magnitude of ASMD. Even if ASMD becomes relatively much smaller after matching, if the absolute magnitude of ASMD was over 0.2 in many covariates, it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” in a general sense. Second, a smaller average ASMD of many covariates does not assure the removal of confounding. The removal of confounding needs to balance important covariates satisfying a backdoor criterion (not an average of all covariates). Practically, we can speculate about the importance of covariates from the effects of these covariates on treatments and outcomes (c.f., VanderWeele 2019). Figure S1 in the Supplementary Information of Fukumoto et al. (2021) did not provide good information with which to judge these two essential points in terms of the reduction of confounding. To make such a judgment, a loveplot with a reference line at 0.1 is suitable and is also a standard practice. > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article reanalyzed Fukumoto et al. (2021), which concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 in Japan. The author first examined the robustness of the conclusion of Fukumoto et al. (2021) to the way the indicator is set up. The author then conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). As I state below (see Major points), I agree that these results (Figs. 1 and 2) support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude there was “no causal effect.” The author also pointed out the risk of positivity violation and the resulting small effective sample size in propensity score analysis (Figs. 4 and 5). I believe this issue should be further examined because there was a possibly nonnegligible level of residual confounding (see Minor points), but the additional examination may be beyond the scope of this article. Overall, I agree that the authors' arguments are adequately supported by the results presented. In general, the issue of statistical power tends to receive less focus than the issue of identification in the practice of causal inference. However, statistical power is always important when discussing the effects of policies in real-world contexts. This paper is a good practice example to remind us of the fundamentals of statistical inference. > I thank the reviewer for supporting the points discussed in the manuscript and for offering additional comments on its content. Major points: The primary purpose of this study is to examine the statistical power of the analysis of Fukumoto et al. (2021). Although Fukumoto et al. (2021) is an elaborate study that examined most possible considerations, it lacks an examination of statistical power, and statistical power is a logically essential issue if one is to conclude “no causal effect” based on the lack of statistical significance. The author first addressed the issue of outcome measures. I agree with the author that effect estimates relative to the baseline incidence may be superior to per-capita incidence rates as an outcome measure. As the author stated, taking only non-negative values for the incidence rate can be a large problem when the incidence rate of the control population is very low, as it was in this case. I think that the spikes in the red line of the control population in Figures 1c and 1i of Fukumoto et al. (2021) (which diverge from the black line of the matched treatment population, indicating a failure to construct an adequate counterfactual) also suggest a disadvantage of using per capita rates, given the explanation that these spikes were caused by the small sizes of the focal municipalities (see the Supplementary Information of Fukumoto et al. 2021, P. 11, Lines 3–11). Although I do not immediately conclude that the advantage of using effect estimates relative to the baseline incidence as the indicator is absolute, using this indicator is one of the possible reasonable choices. The results using this indicator (Fig. 1) showed an inherent lack of power in the analysis, illustrating that the conclusions of Fukumoto et al. (2021) (implicitly assuming a degree of statistical power of the analyses) are not robust to the way the indicator is set up. The author's next approach is more direct. The author conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). The simulation showed that statistical significance was hardly detected even for substantial effects (Fig. 2). I believe these results convincingly demonstrate that the design and data of Fukumoto et al. (2021) did not provide sufficient statistical power to conclude “no causal effect.” The above results (Figs. 1 and 2) logically support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude "no causal effect." > I thank the reviewer for appreciating and further discussing in detail the main points of my criticism. Regarding the choice of indicator, in response to Reviewer 1 (Dr Shiba)’s comment, I have included additional reasoning why relative effect is more relevant to this infectious disease context. Minor points: Although the following comments may address issues that are beyond the scope of this study, the issues themselves are essential. These comments are intended as suggestions and not mandatory revisions of this article. First, as the other reviewer (Dr. Shiba) stated, the validity of the estimand is essential. I agree that ATT should have been the main estimand if Fukumoto et al. (2021) mention the efficacy of the policy in the real-world context. I recommend that the author discuss this point further. > Discussion on the choice of estimand (along with that on residual confounding) has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Second, I think more consideration needs to be given to the possibility of insufficient adjustment (i.e., residual confounding). The author's mention of the risk of positivity violation and the resulting small effective sample size in propensity score analysis is a good point (Figs. 4 and 5). The separation of the propensity score distribution implies the inherent difficulties in matching important factors, especially those having a large effect on both treatment and outcome, which can introduce confounding. This is a real concern because many major covariates were not sufficiently adjusted. A well-known recommendation for an acceptable degree of ASMD after matching is ≤0.1 (Nguyen et al. 2017). However, the ASMDs of many covariates were actually ≥0.2 in this case (Table S3 in the Supplementary Information of Fukumoto et al. 2021). In general, I think it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” when the absolute value of ASMD was ≥0.2 in many covariates. I recommend the author check the loveplot of the matching of April 6, 10, and some cases for ATTs with the reference dashed line at 0.1 ASMD. I also recommend presenting the importance of covariates (e.g., specifying covariates having high standardized coefficient values in the propensity score estimation with red symbols) in the loveplots (see the Note below concerning the need for a loveplot). > I thank the reviewer for constructive suggestion to further investigate the appropriateness of matching in the original study. While I agree that a loveplot would provide more in-depth understanding of what might have gone wrong with the original analysis, the aim of my article is to highlight the existence of the issue (not necessarily revealing every detail of the individual issues), which I believe has already been demonstrated. Once the potential issue is identified as such, in principle the original authors should be responsible for conducting robust analysis to defend their findings. Third, discussing the possibility of missing important covariates may also be worthwhile. Some unexpected behavior of the Fukumoto et al. (2021) data suggests the possibility of residual confounding (due to the lack of incorporation of important covariates). For example, in Figure 1g of Fukumoto et al. (2021), large (see the absolute values) spikes appear only in the matching treated municipalities (and no spikes appear in all treated municipalities). In general, matching is expected to reduce the difference between treated and untreated baselines. Thus, it seems difficult to naturally explain the occurrence of large spikes only in the matched municipalities (unless the treatment actually increased outcomes or the effective sample size is very small). It is possible that important covariates were missed. For example, the trends (not sum) of the incidence rate before treatment was not included as covariates, but they might cause such spikes as follows. In the matching process, matched municipalities tend to have a similar value of the sum of the 7-day incidence rate. Here, the same value of this sum (e.g., 100 incidents per unit) can arise from municipalities that have different time trends (i.e., both increasing and decreasing trends are possible). In this situation, if the treatment (school closure) decision-making was affected by the increasing/decreasing trend, the treated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is an increasing trend. Similarly, the untreated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is a decreasing trend. In this case, the spikes (i.e., the difference in post-treatment outcomes) only in matched treated municipalities (as in Fig. 1g of Fukumoto et al ., 2021) could occur as an artifact of the inertia of the temporal trend (not sum) from the preceding 7 days. > Discussion on possible residual confounder has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Note on the need for a loveplot: The following description in Fukumoto et al. (2021, P. 2114) is not sufficiently correct in two points: “Moreover, the differences in other covariates between the treated and control groups were also much smaller after matching than before (Supplementary Fig. 1 and Supplementary Table 3). Therefore, differences between the matched groups cannot be attributed to previous levels of infection or any other covariates.” First, whether confounding was removed or not does not depend on the relative ratio before/after ASMD; rather, it depends on the absolute magnitude of ASMD. Even if ASMD becomes relatively much smaller after matching, if the absolute magnitude of ASMD was over 0.2 in many covariates, it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” in a general sense. Second, a smaller average ASMD of many covariates does not assure the removal of confounding. The removal of confounding needs to balance important covariates satisfying a backdoor criterion (not an average of all covariates). Practically, we can speculate about the importance of covariates from the effects of these covariates on treatments and outcomes (c.f., VanderWeele 2019). Figure S1 in the Supplementary Information of Fukumoto et al. (2021) did not provide good information with which to judge these two essential points in terms of the reduction of confounding. To make such a judgment, a loveplot with a reference line at 0.1 is suitable and is also a standard practice. Competing Interests: I received a grant from Taisho Pharmaceutical Co., Ltd. for research outside of this study. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 27 Jun 2024 Akira Endo , School of Tropical Medicine and Global Health, Nagasaki University, Nagasaki, 852-8521, Japan 27 Jun 2024 Author Response > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised ... Continue reading > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article reanalyzed Fukumoto et al. (2021), which concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 in Japan. The author first examined the robustness of the conclusion of Fukumoto et al. (2021) to the way the indicator is set up. The author then conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). As I state below (see Major points), I agree that these results (Figs. 1 and 2) support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude there was “no causal effect.” The author also pointed out the risk of positivity violation and the resulting small effective sample size in propensity score analysis (Figs. 4 and 5). I believe this issue should be further examined because there was a possibly nonnegligible level of residual confounding (see Minor points), but the additional examination may be beyond the scope of this article. Overall, I agree that the authors' arguments are adequately supported by the results presented. In general, the issue of statistical power tends to receive less focus than the issue of identification in the practice of causal inference. However, statistical power is always important when discussing the effects of policies in real-world contexts. This paper is a good practice example to remind us of the fundamentals of statistical inference. > I thank the reviewer for supporting the points discussed in the manuscript and for offering additional comments on its content. Major points: The primary purpose of this study is to examine the statistical power of the analysis of Fukumoto et al. (2021). Although Fukumoto et al. (2021) is an elaborate study that examined most possible considerations, it lacks an examination of statistical power, and statistical power is a logically essential issue if one is to conclude “no causal effect” based on the lack of statistical significance. The author first addressed the issue of outcome measures. I agree with the author that effect estimates relative to the baseline incidence may be superior to per-capita incidence rates as an outcome measure. As the author stated, taking only non-negative values for the incidence rate can be a large problem when the incidence rate of the control population is very low, as it was in this case. I think that the spikes in the red line of the control population in Figures 1c and 1i of Fukumoto et al. (2021) (which diverge from the black line of the matched treatment population, indicating a failure to construct an adequate counterfactual) also suggest a disadvantage of using per capita rates, given the explanation that these spikes were caused by the small sizes of the focal municipalities (see the Supplementary Information of Fukumoto et al. 2021, P. 11, Lines 3–11). Although I do not immediately conclude that the advantage of using effect estimates relative to the baseline incidence as the indicator is absolute, using this indicator is one of the possible reasonable choices. The results using this indicator (Fig. 1) showed an inherent lack of power in the analysis, illustrating that the conclusions of Fukumoto et al. (2021) (implicitly assuming a degree of statistical power of the analyses) are not robust to the way the indicator is set up. The author's next approach is more direct. The author conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). The simulation showed that statistical significance was hardly detected even for substantial effects (Fig. 2). I believe these results convincingly demonstrate that the design and data of Fukumoto et al. (2021) did not provide sufficient statistical power to conclude “no causal effect.” The above results (Figs. 1 and 2) logically support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude "no causal effect." > I thank the reviewer for appreciating and further discussing in detail the main points of my criticism. Regarding the choice of indicator, in response to Reviewer 1 (Dr Shiba)’s comment, I have included additional reasoning why relative effect is more relevant to this infectious disease context. Minor points: Although the following comments may address issues that are beyond the scope of this study, the issues themselves are essential. These comments are intended as suggestions and not mandatory revisions of this article. First, as the other reviewer (Dr. Shiba) stated, the validity of the estimand is essential. I agree that ATT should have been the main estimand if Fukumoto et al. (2021) mention the efficacy of the policy in the real-world context. I recommend that the author discuss this point further. > Discussion on the choice of estimand (along with that on residual confounding) has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Second, I think more consideration needs to be given to the possibility of insufficient adjustment (i.e., residual confounding). The author's mention of the risk of positivity violation and the resulting small effective sample size in propensity score analysis is a good point (Figs. 4 and 5). The separation of the propensity score distribution implies the inherent difficulties in matching important factors, especially those having a large effect on both treatment and outcome, which can introduce confounding. This is a real concern because many major covariates were not sufficiently adjusted. A well-known recommendation for an acceptable degree of ASMD after matching is ≤0.1 (Nguyen et al. 2017). However, the ASMDs of many covariates were actually ≥0.2 in this case (Table S3 in the Supplementary Information of Fukumoto et al. 2021). In general, I think it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” when the absolute value of ASMD was ≥0.2 in many covariates. I recommend the author check the loveplot of the matching of April 6, 10, and some cases for ATTs with the reference dashed line at 0.1 ASMD. I also recommend presenting the importance of covariates (e.g., specifying covariates having high standardized coefficient values in the propensity score estimation with red symbols) in the loveplots (see the Note below concerning the need for a loveplot). > I thank the reviewer for constructive suggestion to further investigate the appropriateness of matching in the original study. While I agree that a loveplot would provide more in-depth understanding of what might have gone wrong with the original analysis, the aim of my article is to highlight the existence of the issue (not necessarily revealing every detail of the individual issues), which I believe has already been demonstrated. Once the potential issue is identified as such, in principle the original authors should be responsible for conducting robust analysis to defend their findings. Third, discussing the possibility of missing important covariates may also be worthwhile. Some unexpected behavior of the Fukumoto et al. (2021) data suggests the possibility of residual confounding (due to the lack of incorporation of important covariates). For example, in Figure 1g of Fukumoto et al. (2021), large (see the absolute values) spikes appear only in the matching treated municipalities (and no spikes appear in all treated municipalities). In general, matching is expected to reduce the difference between treated and untreated baselines. Thus, it seems difficult to naturally explain the occurrence of large spikes only in the matched municipalities (unless the treatment actually increased outcomes or the effective sample size is very small). It is possible that important covariates were missed. For example, the trends (not sum) of the incidence rate before treatment was not included as covariates, but they might cause such spikes as follows. In the matching process, matched municipalities tend to have a similar value of the sum of the 7-day incidence rate. Here, the same value of this sum (e.g., 100 incidents per unit) can arise from municipalities that have different time trends (i.e., both increasing and decreasing trends are possible). In this situation, if the treatment (school closure) decision-making was affected by the increasing/decreasing trend, the treated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is an increasing trend. Similarly, the untreated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is a decreasing trend. In this case, the spikes (i.e., the difference in post-treatment outcomes) only in matched treated municipalities (as in Fig. 1g of Fukumoto et al ., 2021) could occur as an artifact of the inertia of the temporal trend (not sum) from the preceding 7 days. > Discussion on possible residual confounder has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Note on the need for a loveplot: The following description in Fukumoto et al. (2021, P. 2114) is not sufficiently correct in two points: “Moreover, the differences in other covariates between the treated and control groups were also much smaller after matching than before (Supplementary Fig. 1 and Supplementary Table 3). Therefore, differences between the matched groups cannot be attributed to previous levels of infection or any other covariates.” First, whether confounding was removed or not does not depend on the relative ratio before/after ASMD; rather, it depends on the absolute magnitude of ASMD. Even if ASMD becomes relatively much smaller after matching, if the absolute magnitude of ASMD was over 0.2 in many covariates, it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” in a general sense. Second, a smaller average ASMD of many covariates does not assure the removal of confounding. The removal of confounding needs to balance important covariates satisfying a backdoor criterion (not an average of all covariates). Practically, we can speculate about the importance of covariates from the effects of these covariates on treatments and outcomes (c.f., VanderWeele 2019). Figure S1 in the Supplementary Information of Fukumoto et al. (2021) did not provide good information with which to judge these two essential points in terms of the reduction of confounding. To make such a judgment, a loveplot with a reference line at 0.1 is suitable and is also a standard practice. > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article reanalyzed Fukumoto et al. (2021), which concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 in Japan. The author first examined the robustness of the conclusion of Fukumoto et al. (2021) to the way the indicator is set up. The author then conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). As I state below (see Major points), I agree that these results (Figs. 1 and 2) support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude there was “no causal effect.” The author also pointed out the risk of positivity violation and the resulting small effective sample size in propensity score analysis (Figs. 4 and 5). I believe this issue should be further examined because there was a possibly nonnegligible level of residual confounding (see Minor points), but the additional examination may be beyond the scope of this article. Overall, I agree that the authors' arguments are adequately supported by the results presented. In general, the issue of statistical power tends to receive less focus than the issue of identification in the practice of causal inference. However, statistical power is always important when discussing the effects of policies in real-world contexts. This paper is a good practice example to remind us of the fundamentals of statistical inference. > I thank the reviewer for supporting the points discussed in the manuscript and for offering additional comments on its content. Major points: The primary purpose of this study is to examine the statistical power of the analysis of Fukumoto et al. (2021). Although Fukumoto et al. (2021) is an elaborate study that examined most possible considerations, it lacks an examination of statistical power, and statistical power is a logically essential issue if one is to conclude “no causal effect” based on the lack of statistical significance. The author first addressed the issue of outcome measures. I agree with the author that effect estimates relative to the baseline incidence may be superior to per-capita incidence rates as an outcome measure. As the author stated, taking only non-negative values for the incidence rate can be a large problem when the incidence rate of the control population is very low, as it was in this case. I think that the spikes in the red line of the control population in Figures 1c and 1i of Fukumoto et al. (2021) (which diverge from the black line of the matched treatment population, indicating a failure to construct an adequate counterfactual) also suggest a disadvantage of using per capita rates, given the explanation that these spikes were caused by the small sizes of the focal municipalities (see the Supplementary Information of Fukumoto et al. 2021, P. 11, Lines 3–11). Although I do not immediately conclude that the advantage of using effect estimates relative to the baseline incidence as the indicator is absolute, using this indicator is one of the possible reasonable choices. The results using this indicator (Fig. 1) showed an inherent lack of power in the analysis, illustrating that the conclusions of Fukumoto et al. (2021) (implicitly assuming a degree of statistical power of the analyses) are not robust to the way the indicator is set up. The author's next approach is more direct. The author conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). The simulation showed that statistical significance was hardly detected even for substantial effects (Fig. 2). I believe these results convincingly demonstrate that the design and data of Fukumoto et al. (2021) did not provide sufficient statistical power to conclude “no causal effect.” The above results (Figs. 1 and 2) logically support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude "no causal effect." > I thank the reviewer for appreciating and further discussing in detail the main points of my criticism. Regarding the choice of indicator, in response to Reviewer 1 (Dr Shiba)’s comment, I have included additional reasoning why relative effect is more relevant to this infectious disease context. Minor points: Although the following comments may address issues that are beyond the scope of this study, the issues themselves are essential. These comments are intended as suggestions and not mandatory revisions of this article. First, as the other reviewer (Dr. Shiba) stated, the validity of the estimand is essential. I agree that ATT should have been the main estimand if Fukumoto et al. (2021) mention the efficacy of the policy in the real-world context. I recommend that the author discuss this point further. > Discussion on the choice of estimand (along with that on residual confounding) has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Second, I think more consideration needs to be given to the possibility of insufficient adjustment (i.e., residual confounding). The author's mention of the risk of positivity violation and the resulting small effective sample size in propensity score analysis is a good point (Figs. 4 and 5). The separation of the propensity score distribution implies the inherent difficulties in matching important factors, especially those having a large effect on both treatment and outcome, which can introduce confounding. This is a real concern because many major covariates were not sufficiently adjusted. A well-known recommendation for an acceptable degree of ASMD after matching is ≤0.1 (Nguyen et al. 2017). However, the ASMDs of many covariates were actually ≥0.2 in this case (Table S3 in the Supplementary Information of Fukumoto et al. 2021). In general, I think it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” when the absolute value of ASMD was ≥0.2 in many covariates. I recommend the author check the loveplot of the matching of April 6, 10, and some cases for ATTs with the reference dashed line at 0.1 ASMD. I also recommend presenting the importance of covariates (e.g., specifying covariates having high standardized coefficient values in the propensity score estimation with red symbols) in the loveplots (see the Note below concerning the need for a loveplot). > I thank the reviewer for constructive suggestion to further investigate the appropriateness of matching in the original study. While I agree that a loveplot would provide more in-depth understanding of what might have gone wrong with the original analysis, the aim of my article is to highlight the existence of the issue (not necessarily revealing every detail of the individual issues), which I believe has already been demonstrated. Once the potential issue is identified as such, in principle the original authors should be responsible for conducting robust analysis to defend their findings. Third, discussing the possibility of missing important covariates may also be worthwhile. Some unexpected behavior of the Fukumoto et al. (2021) data suggests the possibility of residual confounding (due to the lack of incorporation of important covariates). For example, in Figure 1g of Fukumoto et al. (2021), large (see the absolute values) spikes appear only in the matching treated municipalities (and no spikes appear in all treated municipalities). In general, matching is expected to reduce the difference between treated and untreated baselines. Thus, it seems difficult to naturally explain the occurrence of large spikes only in the matched municipalities (unless the treatment actually increased outcomes or the effective sample size is very small). It is possible that important covariates were missed. For example, the trends (not sum) of the incidence rate before treatment was not included as covariates, but they might cause such spikes as follows. In the matching process, matched municipalities tend to have a similar value of the sum of the 7-day incidence rate. Here, the same value of this sum (e.g., 100 incidents per unit) can arise from municipalities that have different time trends (i.e., both increasing and decreasing trends are possible). In this situation, if the treatment (school closure) decision-making was affected by the increasing/decreasing trend, the treated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is an increasing trend. Similarly, the untreated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is a decreasing trend. In this case, the spikes (i.e., the difference in post-treatment outcomes) only in matched treated municipalities (as in Fig. 1g of Fukumoto et al ., 2021) could occur as an artifact of the inertia of the temporal trend (not sum) from the preceding 7 days. > Discussion on possible residual confounder has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Note on the need for a loveplot: The following description in Fukumoto et al. (2021, P. 2114) is not sufficiently correct in two points: “Moreover, the differences in other covariates between the treated and control groups were also much smaller after matching than before (Supplementary Fig. 1 and Supplementary Table 3). Therefore, differences between the matched groups cannot be attributed to previous levels of infection or any other covariates.” First, whether confounding was removed or not does not depend on the relative ratio before/after ASMD; rather, it depends on the absolute magnitude of ASMD. Even if ASMD becomes relatively much smaller after matching, if the absolute magnitude of ASMD was over 0.2 in many covariates, it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” in a general sense. Second, a smaller average ASMD of many covariates does not assure the removal of confounding. The removal of confounding needs to balance important covariates satisfying a backdoor criterion (not an average of all covariates). Practically, we can speculate about the importance of covariates from the effects of these covariates on treatments and outcomes (c.f., VanderWeele 2019). Figure S1 in the Supplementary Information of Fukumoto et al. (2021) did not provide good information with which to judge these two essential points in terms of the reduction of confounding. To make such a judgment, a loveplot with a reference line at 0.1 is suitable and is also a standard practice. Competing Interests: I received a grant from Taisho Pharmaceutical Co., Ltd. for research outside of this study. Close Report a concern COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Shiba K. Reviewer Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.5256/f1000research.123641.r136223 ) The direct URL for this report is: https://f1000research.com/articles/11-456/v1#referee-response-136223 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 29 Apr 2022 Koichiro Shiba , Department of Epidemiology, Harvard T.H. Chan School of Public Health, Boston, MA, USA Approved with Reservations VIEWS 0 https://doi.org/10.5256/f1000research.123641.r136223 This article provides a critical re-assessment of the recent paper that concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 (Fukumoto et al. , 2021). The author of this article argued that the original ... Continue reading READ ALL This article provides a critical re-assessment of the recent paper that concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 (Fukumoto et al. , 2021). The author of this article argued that the original analysis (and the refined version presented in the current article) was likely underpowered and unreliable. The author raised a great point about the potential violation of positivity, about which the original paper provided little discussion. I appreciate the author for bringing our attention to these important issues and allowing me to engage in carefully reading the original article. I agree with the author that it is vital to be mindful that the absence of “evidence” defined by the lack of statistical significance is not evidence of absence. Hence, I find this type of article assessing alternative explanations for the null findings in the original paper particularly valuable. I provide some major and minor suggestions to strengthen the current manuscript further. Major points: The manuscript is currently written as if the lack of statistical power is the primary (and perhaps only) issue that might explain the original paper’s null findings. The author does discuss the possibility of unadjusted confounding later in the paper, but the way the Introduction and the Conclusion were written made it seem a secondary problem. I suggest the author provide a more in-depth discussion on other potential issues. There are at least two other issues I think are worth discussing: the choice of causal estimand and residual confounding. To me, the most significant limitation of the original article is its focus on identifying and estimating ATC. The controls in the study were the municipalities that did not (need to) enforce school closures in the Spring of 2020; that is, they were most likely the areas that do not benefit from school closures because they were not experiencing the spread of COVID-19 to begin with, which is supported by the extremely low (nearly zero) confirmed cases in the control group shown in Figure 1. ATC, in this context, is of little policy relevance because such municipalities would have few cases regardless of the implementation of the school closures (the counterfactual “what would happen had they closed the schools” would not differ much from the reality). The null finding for ATC is somewhat expected. What we want to know instead is if the spread in the treated (i.e., municipalities that were experiencing the rise in confirmed cases and had to decide to close schools) would have been worse without the school closures (i.e., ATT). I know that the original article did investigate ATT as a sensitivity analysis, but it was based on the data stemming from the school closure on only one date (April 6) with a much smaller matched sample size (as the treated was smaller in number). They did not provide key supplementary information for the ATT analysis (e.g., covariate balance after matching) either. I understand that, with the available data, ATC (versus ATT) was easier to estimate as there were more controls; but that does not justify their oversimplified conclusion that there is no effect of school closures in Japan because the effect of a hypothetical intervention can vary substantially depending on the target populations. The original article and the current article took a careful approach to mitigating bias due to confounding; yet, I see some evidence of residual confounding. The two articles indicate that ATC immediately after the school closures was positive point-estimate-wise, at least for some dates. The original article's authors wrote: “The ATC values suggest that municipalities that closed their schools mostly increased the number of cases”. If matching was successful and there was no unadjusted confounding, this statement would be true. Yet, they provide no compelling explanation for why school closures may causally “increase” confirmed cases. I cannot think of any. If not causal, the increase in cases among the treated municipalities in the matched sample is likely due to residual confounding—they had a reason to be concerned about the spread and decided to close the school, which was not captured by the observed covariates, including the prior outcome values and school closure status. The author of the current article raised an excellent point regarding residual bias, but some additional discussion on this issue would be appreciated. Minor points: Page 3: “Moreover, matching technique used to minimise confounding has a known side effect of...” needs citation. Page 3: “The effect estimates relative to the baseline incidence would be a more intuitive and interpretable measure for assessment of its practical use.” This needs more justification. The additive effect measure has its own advantage because it can speak directly to the population impacts of the intervention. Page 3: The sentence starting, “However, the theoretical ground is unclear...” seems incomplete. Page 3: “ATC and average treatment effect on the treatment: ATT) and their confidence intervals relative to the average outcome (incidence rate per capita) in the control group (Figure 1).” This sentence seems incomplete. Perhaps delete the part after “relative to...“. Page 3: Spell out ESS and provide a bit more context of what it is. Is the rationale for commenting on the previous publication clearly described? Yes Are any opinions stated well-argued, clear and cogent? Partly Are arguments sufficiently supported by evidence from the published literature or by new data and results? Yes Is the conclusion balanced and justified on the basis of the presented arguments? Partly Competing Interests: No competing interests were disclosed. Reviewer Expertise: Public health, epidemiology, causal inference I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Shiba K. Reviewer Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.5256/f1000research.123641.r136223 ) The direct URL for this report is: https://f1000research.com/articles/11-456/v1#referee-response-136223 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 27 Jun 2024 Akira Endo , School of Tropical Medicine and Global Health, Nagasaki University, Nagasaki, 852-8521, Japan 27 Jun 2024 Author Response > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised ... Continue reading > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article provides a critical re-assessment of the recent paper that concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 (Fukumoto et al. , 2021). The author of this article argued that the original analysis (and the refined version presented in the current article) was likely underpowered and unreliable. The author raised a great point about the potential violation of positivity, about which the original paper provided little discussion. I appreciate the author for bringing our attention to these important issues and allowing me to engage in carefully reading the original article. I agree with the author that it is vital to be mindful that the absence of “evidence” defined by the lack of statistical significance is not evidence of absence. Hence, I find this type of article assessing alternative explanations for the null findings in the original paper particularly valuable. I provide some major and minor suggestions to strengthen the current manuscript further. > I thank the reviewer for appreciating my criticism and providing useful suggestions. Major points: The manuscript is currently written as if the lack of statistical power is the primary (and perhaps only) issue that might explain the original paper’s null findings. The author does discuss the possibility of unadjusted confounding later in the paper, but the way the Introduction and the Conclusion were written made it seem a secondary problem. I suggest the author provide a more in-depth discussion on other potential issues. There are at least two other issues I think are worth discussing: the choice of causal estimand and residual confounding. To me, the most significant limitation of the original article is its focus on identifying and estimating ATC. The controls in the study were the municipalities that did not (need to) enforce school closures in the Spring of 2020; that is, they were most likely the areas that do not benefit from school closures because they were not experiencing the spread of COVID-19 to begin with, which is supported by the extremely low (nearly zero) confirmed cases in the control group shown in Figure 1. ATC, in this context, is of little policy relevance because such municipalities would have few cases regardless of the implementation of the school closures (the counterfactual “what would happen had they closed the schools” would not differ much from the reality). The null finding for ATC is somewhat expected. What we want to know instead is if the spread in the treated (i.e., municipalities that were experiencing the rise in confirmed cases and had to decide to close schools) would have been worse without the school closures (i.e., ATT). I know that the original article did investigate ATT as a sensitivity analysis, but it was based on the data stemming from the school closure on only one date (April 6) with a much smaller matched sample size (as the treated was smaller in number). They did not provide key supplementary information for the ATT analysis (e.g., covariate balance after matching) either. I understand that, with the available data, ATC (versus ATT) was easier to estimate as there were more controls; but that does not justify their oversimplified conclusion that there is no effect of school closures in Japan because the effect of a hypothetical intervention can vary substantially depending on the target populations. The original article and the current article took a careful approach to mitigating bias due to confounding; yet, I see some evidence of residual confounding. The two articles indicate that ATC immediately after the school closures was positive point-estimate-wise, at least for some dates. The original article's authors wrote: “The ATC values suggest that municipalities that closed their schools mostly increased the number of cases”. If matching was successful and there was no unadjusted confounding, this statement would be true. Yet, they provide no compelling explanation for why school closures may causally “increase” confirmed cases. I cannot think of any. If not causal, the increase in cases among the treated municipalities in the matched sample is likely due to residual confounding—they had a reason to be concerned about the spread and decided to close the school, which was not captured by the observed covariates, including the prior outcome values and school closure status. The author of the current article raised an excellent point regarding residual bias, but some additional discussion on this issue would be appreciated. > I thank the reviewer for pointing out the existence of potential issues that I did not emphasise in the paper. I believe these additional issues are indeed worth mentioning in the manuscript. Meanwhile, I would prefer to retain the lack of statistical power as the primary issue because the present manuscript leverages the results of re-analysis focusing on the statistical power. Moreover, the two additional points suggested would also eventually come down to the problem of limited sample size and reporting results without considering the statistical power / effective size. For example, as the reviewer suggests, the choice of ATC was probably not ideal in the original study’s context because of low incidence levels in the control group in the first place. However, the same study design focusing on ATC could still have found an effect (if there is a true effect and) if the sample size (in this case both the number of included municipalities and the number of cases reported in these municipalities) was sufficient. Instead, I would like to propose changing the previous Conclusion section to the “Discussion and Conclusion” section and citing reviewers’ reports to discuss the suggested two points there. This allows me to separate the criticisms derived from my own analysis from those that were not, and also to appropriately acknowledge that the ideas came from the reviewers’ suggestions. I am aware that it may be a rather unusual practice in academic publications; however, given the nature of the publishing model of F1000 with citable open reviews and the fact that the reviewers provided new discussion points that were absent from the original version, I would like to opt for offering credit to the reviewers who contributed their time for the scholarly discussion. (Added to Discussion and Conclusion section): In addition to this issue of insufficient statistical power, which I demonstrated in the present reanalysis, two additional issues have been raised during the peer review process of this article. For one: the authors’ choice of ATC as the main estimand may have been suboptimal as Shiba has pointed out in his comment (Shiba, 2022). The control group in the original study may have consisted of municipalities that did not need school closures because of low incidence. ATC in this context would represent the effect in settings where the policy was not needed, which is of limited political implication. To counterargue against school closures as a control policy, the authors should have aimed to robustly show insufficient effect of such a policy even in municipalities in which school closures had been a selectable option (possibly because of higher incidence rate, where an effective policy could be more impactful). For the other: residual confounding may have remained among the matched samples. Both (Shiba, 2022) and (Hayashi, 2022) expressed concern on the immediate positive effect on incidence rate (e.g. increased incidence) immediately after the implementation of school closures in the treated group, which Fukumoto et al. left unexplained. Unless a plausible causal mechanism in which school closures could increase COVID-19 incidence is provided, this gap between the treated and control group may indicate residual bias, which is unsurprising given my reanalysis results suggesting matching failure. Hayashi additionally suggested that the trend in incidence (e.g. increasing/decreasing) may be one of the potential confounding variables that had not been adjusted for in the original study (Hayashi, 2022). Minor points: Page 3: “Moreover, matching technique used to minimise confounding has a known side effect of...” needs citation. > We have newly cited King et al. (2017). Page 3: “The effect estimates relative to the baseline incidence would be a more intuitive and interpretable measure for assessment of its practical use.” This needs more justification. The additive effect measure has its own advantage because it can speak directly to the population impacts of the intervention. > We have added an explanation that the relative risk reduction is particularly relevant because of the dynamic nature of the infectious disease transmission: Infectious disease risks are inherently dynamic; more current infections in a population would result in a greater risk of infection among susceptible individuals through increased encounters with infectious others. This means that the effect of school closures, which intended to reduce contacts at schools, should also depend on the baseline incidence in the population because the risk of infection averted would be the reduction in contacts multiplied by the probability that the contacts were otherwise with infectious individuals. Page 3: The sentence starting, “However, the theoretical ground is unclear...” seems incomplete. > I have replaced “to” with “on” but believe the sentence itself is complete (“theoretical ground is unclear”). Page 3: “ATC and average treatment effect on the treatment: ATT) and their confidence intervals relative to the average outcome (incidence rate per capita) in the control group (Figure 1).” This sentence seems incomplete. Perhaps delete the part after “relative to...“. > I have added a semicolon and a comma to clarify the structure of the sentence. This sentence is meant to indicate that both the ATC (or ATT) and their confidence intervals were rescaled to a relative value, where the incidence rate per capita in the control group is the reference. Page 3: Spell out ESS and provide a bit more context of what it is. > I have spelled it out with a brief explanation and citation: “The effective sample size (ESS; a proxy measure for the amount of information contained in weighted samples (Shook-Sa and Hudgens))…” > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article provides a critical re-assessment of the recent paper that concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 (Fukumoto et al. , 2021). The author of this article argued that the original analysis (and the refined version presented in the current article) was likely underpowered and unreliable. The author raised a great point about the potential violation of positivity, about which the original paper provided little discussion. I appreciate the author for bringing our attention to these important issues and allowing me to engage in carefully reading the original article. I agree with the author that it is vital to be mindful that the absence of “evidence” defined by the lack of statistical significance is not evidence of absence. Hence, I find this type of article assessing alternative explanations for the null findings in the original paper particularly valuable. I provide some major and minor suggestions to strengthen the current manuscript further. > I thank the reviewer for appreciating my criticism and providing useful suggestions. Major points: The manuscript is currently written as if the lack of statistical power is the primary (and perhaps only) issue that might explain the original paper’s null findings. The author does discuss the possibility of unadjusted confounding later in the paper, but the way the Introduction and the Conclusion were written made it seem a secondary problem. I suggest the author provide a more in-depth discussion on other potential issues. There are at least two other issues I think are worth discussing: the choice of causal estimand and residual confounding. To me, the most significant limitation of the original article is its focus on identifying and estimating ATC. The controls in the study were the municipalities that did not (need to) enforce school closures in the Spring of 2020; that is, they were most likely the areas that do not benefit from school closures because they were not experiencing the spread of COVID-19 to begin with, which is supported by the extremely low (nearly zero) confirmed cases in the control group shown in Figure 1. ATC, in this context, is of little policy relevance because such municipalities would have few cases regardless of the implementation of the school closures (the counterfactual “what would happen had they closed the schools” would not differ much from the reality). The null finding for ATC is somewhat expected. What we want to know instead is if the spread in the treated (i.e., municipalities that were experiencing the rise in confirmed cases and had to decide to close schools) would have been worse without the school closures (i.e., ATT). I know that the original article did investigate ATT as a sensitivity analysis, but it was based on the data stemming from the school closure on only one date (April 6) with a much smaller matched sample size (as the treated was smaller in number). They did not provide key supplementary information for the ATT analysis (e.g., covariate balance after matching) either. I understand that, with the available data, ATC (versus ATT) was easier to estimate as there were more controls; but that does not justify their oversimplified conclusion that there is no effect of school closures in Japan because the effect of a hypothetical intervention can vary substantially depending on the target populations. The original article and the current article took a careful approach to mitigating bias due to confounding; yet, I see some evidence of residual confounding. The two articles indicate that ATC immediately after the school closures was positive point-estimate-wise, at least for some dates. The original article's authors wrote: “The ATC values suggest that municipalities that closed their schools mostly increased the number of cases”. If matching was successful and there was no unadjusted confounding, this statement would be true. Yet, they provide no compelling explanation for why school closures may causally “increase” confirmed cases. I cannot think of any. If not causal, the increase in cases among the treated municipalities in the matched sample is likely due to residual confounding—they had a reason to be concerned about the spread and decided to close the school, which was not captured by the observed covariates, including the prior outcome values and school closure status. The author of the current article raised an excellent point regarding residual bias, but some additional discussion on this issue would be appreciated. > I thank the reviewer for pointing out the existence of potential issues that I did not emphasise in the paper. I believe these additional issues are indeed worth mentioning in the manuscript. Meanwhile, I would prefer to retain the lack of statistical power as the primary issue because the present manuscript leverages the results of re-analysis focusing on the statistical power. Moreover, the two additional points suggested would also eventually come down to the problem of limited sample size and reporting results without considering the statistical power / effective size. For example, as the reviewer suggests, the choice of ATC was probably not ideal in the original study’s context because of low incidence levels in the control group in the first place. However, the same study design focusing on ATC could still have found an effect (if there is a true effect and) if the sample size (in this case both the number of included municipalities and the number of cases reported in these municipalities) was sufficient. Instead, I would like to propose changing the previous Conclusion section to the “Discussion and Conclusion” section and citing reviewers’ reports to discuss the suggested two points there. This allows me to separate the criticisms derived from my own analysis from those that were not, and also to appropriately acknowledge that the ideas came from the reviewers’ suggestions. I am aware that it may be a rather unusual practice in academic publications; however, given the nature of the publishing model of F1000 with citable open reviews and the fact that the reviewers provided new discussion points that were absent from the original version, I would like to opt for offering credit to the reviewers who contributed their time for the scholarly discussion. (Added to Discussion and Conclusion section): In addition to this issue of insufficient statistical power, which I demonstrated in the present reanalysis, two additional issues have been raised during the peer review process of this article. For one: the authors’ choice of ATC as the main estimand may have been suboptimal as Shiba has pointed out in his comment (Shiba, 2022). The control group in the original study may have consisted of municipalities that did not need school closures because of low incidence. ATC in this context would represent the effect in settings where the policy was not needed, which is of limited political implication. To counterargue against school closures as a control policy, the authors should have aimed to robustly show insufficient effect of such a policy even in municipalities in which school closures had been a selectable option (possibly because of higher incidence rate, where an effective policy could be more impactful). For the other: residual confounding may have remained among the matched samples. Both (Shiba, 2022) and (Hayashi, 2022) expressed concern on the immediate positive effect on incidence rate (e.g. increased incidence) immediately after the implementation of school closures in the treated group, which Fukumoto et al. left unexplained. Unless a plausible causal mechanism in which school closures could increase COVID-19 incidence is provided, this gap between the treated and control group may indicate residual bias, which is unsurprising given my reanalysis results suggesting matching failure. Hayashi additionally suggested that the trend in incidence (e.g. increasing/decreasing) may be one of the potential confounding variables that had not been adjusted for in the original study (Hayashi, 2022). Minor points: Page 3: “Moreover, matching technique used to minimise confounding has a known side effect of...” needs citation. > We have newly cited King et al. (2017). Page 3: “The effect estimates relative to the baseline incidence would be a more intuitive and interpretable measure for assessment of its practical use.” This needs more justification. The additive effect measure has its own advantage because it can speak directly to the population impacts of the intervention. > We have added an explanation that the relative risk reduction is particularly relevant because of the dynamic nature of the infectious disease transmission: Infectious disease risks are inherently dynamic; more current infections in a population would result in a greater risk of infection among susceptible individuals through increased encounters with infectious others. This means that the effect of school closures, which intended to reduce contacts at schools, should also depend on the baseline incidence in the population because the risk of infection averted would be the reduction in contacts multiplied by the probability that the contacts were otherwise with infectious individuals. Page 3: The sentence starting, “However, the theoretical ground is unclear...” seems incomplete. > I have replaced “to” with “on” but believe the sentence itself is complete (“theoretical ground is unclear”). Page 3: “ATC and average treatment effect on the treatment: ATT) and their confidence intervals relative to the average outcome (incidence rate per capita) in the control group (Figure 1).” This sentence seems incomplete. Perhaps delete the part after “relative to...“. > I have added a semicolon and a comma to clarify the structure of the sentence. This sentence is meant to indicate that both the ATC (or ATT) and their confidence intervals were rescaled to a relative value, where the incidence rate per capita in the control group is the reference. Page 3: Spell out ESS and provide a bit more context of what it is. > I have spelled it out with a brief explanation and citation: “The effective sample size (ESS; a proxy measure for the amount of information contained in weighted samples (Shook-Sa and Hudgens))…” Competing Interests: I received a grant from Taisho Pharmaceutical Co., Ltd. for research outside of this study. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 27 Jun 2024 Akira Endo , School of Tropical Medicine and Global Health, Nagasaki University, Nagasaki, 852-8521, Japan 27 Jun 2024 Author Response > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised ... Continue reading > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article provides a critical re-assessment of the recent paper that concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 (Fukumoto et al. , 2021). The author of this article argued that the original analysis (and the refined version presented in the current article) was likely underpowered and unreliable. The author raised a great point about the potential violation of positivity, about which the original paper provided little discussion. I appreciate the author for bringing our attention to these important issues and allowing me to engage in carefully reading the original article. I agree with the author that it is vital to be mindful that the absence of “evidence” defined by the lack of statistical significance is not evidence of absence. Hence, I find this type of article assessing alternative explanations for the null findings in the original paper particularly valuable. I provide some major and minor suggestions to strengthen the current manuscript further. > I thank the reviewer for appreciating my criticism and providing useful suggestions. Major points: The manuscript is currently written as if the lack of statistical power is the primary (and perhaps only) issue that might explain the original paper’s null findings. The author does discuss the possibility of unadjusted confounding later in the paper, but the way the Introduction and the Conclusion were written made it seem a secondary problem. I suggest the author provide a more in-depth discussion on other potential issues. There are at least two other issues I think are worth discussing: the choice of causal estimand and residual confounding. To me, the most significant limitation of the original article is its focus on identifying and estimating ATC. The controls in the study were the municipalities that did not (need to) enforce school closures in the Spring of 2020; that is, they were most likely the areas that do not benefit from school closures because they were not experiencing the spread of COVID-19 to begin with, which is supported by the extremely low (nearly zero) confirmed cases in the control group shown in Figure 1. ATC, in this context, is of little policy relevance because such municipalities would have few cases regardless of the implementation of the school closures (the counterfactual “what would happen had they closed the schools” would not differ much from the reality). The null finding for ATC is somewhat expected. What we want to know instead is if the spread in the treated (i.e., municipalities that were experiencing the rise in confirmed cases and had to decide to close schools) would have been worse without the school closures (i.e., ATT). I know that the original article did investigate ATT as a sensitivity analysis, but it was based on the data stemming from the school closure on only one date (April 6) with a much smaller matched sample size (as the treated was smaller in number). They did not provide key supplementary information for the ATT analysis (e.g., covariate balance after matching) either. I understand that, with the available data, ATC (versus ATT) was easier to estimate as there were more controls; but that does not justify their oversimplified conclusion that there is no effect of school closures in Japan because the effect of a hypothetical intervention can vary substantially depending on the target populations. The original article and the current article took a careful approach to mitigating bias due to confounding; yet, I see some evidence of residual confounding. The two articles indicate that ATC immediately after the school closures was positive point-estimate-wise, at least for some dates. The original article's authors wrote: “The ATC values suggest that municipalities that closed their schools mostly increased the number of cases”. If matching was successful and there was no unadjusted confounding, this statement would be true. Yet, they provide no compelling explanation for why school closures may causally “increase” confirmed cases. I cannot think of any. If not causal, the increase in cases among the treated municipalities in the matched sample is likely due to residual confounding—they had a reason to be concerned about the spread and decided to close the school, which was not captured by the observed covariates, including the prior outcome values and school closure status. The author of the current article raised an excellent point regarding residual bias, but some additional discussion on this issue would be appreciated. > I thank the reviewer for pointing out the existence of potential issues that I did not emphasise in the paper. I believe these additional issues are indeed worth mentioning in the manuscript. Meanwhile, I would prefer to retain the lack of statistical power as the primary issue because the present manuscript leverages the results of re-analysis focusing on the statistical power. Moreover, the two additional points suggested would also eventually come down to the problem of limited sample size and reporting results without considering the statistical power / effective size. For example, as the reviewer suggests, the choice of ATC was probably not ideal in the original study’s context because of low incidence levels in the control group in the first place. However, the same study design focusing on ATC could still have found an effect (if there is a true effect and) if the sample size (in this case both the number of included municipalities and the number of cases reported in these municipalities) was sufficient. Instead, I would like to propose changing the previous Conclusion section to the “Discussion and Conclusion” section and citing reviewers’ reports to discuss the suggested two points there. This allows me to separate the criticisms derived from my own analysis from those that were not, and also to appropriately acknowledge that the ideas came from the reviewers’ suggestions. I am aware that it may be a rather unusual practice in academic publications; however, given the nature of the publishing model of F1000 with citable open reviews and the fact that the reviewers provided new discussion points that were absent from the original version, I would like to opt for offering credit to the reviewers who contributed their time for the scholarly discussion. (Added to Discussion and Conclusion section): In addition to this issue of insufficient statistical power, which I demonstrated in the present reanalysis, two additional issues have been raised during the peer review process of this article. For one: the authors’ choice of ATC as the main estimand may have been suboptimal as Shiba has pointed out in his comment (Shiba, 2022). The control group in the original study may have consisted of municipalities that did not need school closures because of low incidence. ATC in this context would represent the effect in settings where the policy was not needed, which is of limited political implication. To counterargue against school closures as a control policy, the authors should have aimed to robustly show insufficient effect of such a policy even in municipalities in which school closures had been a selectable option (possibly because of higher incidence rate, where an effective policy could be more impactful). For the other: residual confounding may have remained among the matched samples. Both (Shiba, 2022) and (Hayashi, 2022) expressed concern on the immediate positive effect on incidence rate (e.g. increased incidence) immediately after the implementation of school closures in the treated group, which Fukumoto et al. left unexplained. Unless a plausible causal mechanism in which school closures could increase COVID-19 incidence is provided, this gap between the treated and control group may indicate residual bias, which is unsurprising given my reanalysis results suggesting matching failure. Hayashi additionally suggested that the trend in incidence (e.g. increasing/decreasing) may be one of the potential confounding variables that had not been adjusted for in the original study (Hayashi, 2022). Minor points: Page 3: “Moreover, matching technique used to minimise confounding has a known side effect of...” needs citation. > We have newly cited King et al. (2017). Page 3: “The effect estimates relative to the baseline incidence would be a more intuitive and interpretable measure for assessment of its practical use.” This needs more justification. The additive effect measure has its own advantage because it can speak directly to the population impacts of the intervention. > We have added an explanation that the relative risk reduction is particularly relevant because of the dynamic nature of the infectious disease transmission: Infectious disease risks are inherently dynamic; more current infections in a population would result in a greater risk of infection among susceptible individuals through increased encounters with infectious others. This means that the effect of school closures, which intended to reduce contacts at schools, should also depend on the baseline incidence in the population because the risk of infection averted would be the reduction in contacts multiplied by the probability that the contacts were otherwise with infectious individuals. Page 3: The sentence starting, “However, the theoretical ground is unclear...” seems incomplete. > I have replaced “to” with “on” but believe the sentence itself is complete (“theoretical ground is unclear”). Page 3: “ATC and average treatment effect on the treatment: ATT) and their confidence intervals relative to the average outcome (incidence rate per capita) in the control group (Figure 1).” This sentence seems incomplete. Perhaps delete the part after “relative to...“. > I have added a semicolon and a comma to clarify the structure of the sentence. This sentence is meant to indicate that both the ATC (or ATT) and their confidence intervals were rescaled to a relative value, where the incidence rate per capita in the control group is the reference. Page 3: Spell out ESS and provide a bit more context of what it is. > I have spelled it out with a brief explanation and citation: “The effective sample size (ESS; a proxy measure for the amount of information contained in weighted samples (Shook-Sa and Hudgens))…” > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article provides a critical re-assessment of the recent paper that concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 (Fukumoto et al. , 2021). The author of this article argued that the original analysis (and the refined version presented in the current article) was likely underpowered and unreliable. The author raised a great point about the potential violation of positivity, about which the original paper provided little discussion. I appreciate the author for bringing our attention to these important issues and allowing me to engage in carefully reading the original article. I agree with the author that it is vital to be mindful that the absence of “evidence” defined by the lack of statistical significance is not evidence of absence. Hence, I find this type of article assessing alternative explanations for the null findings in the original paper particularly valuable. I provide some major and minor suggestions to strengthen the current manuscript further. > I thank the reviewer for appreciating my criticism and providing useful suggestions. Major points: The manuscript is currently written as if the lack of statistical power is the primary (and perhaps only) issue that might explain the original paper’s null findings. The author does discuss the possibility of unadjusted confounding later in the paper, but the way the Introduction and the Conclusion were written made it seem a secondary problem. I suggest the author provide a more in-depth discussion on other potential issues. There are at least two other issues I think are worth discussing: the choice of causal estimand and residual confounding. To me, the most significant limitation of the original article is its focus on identifying and estimating ATC. The controls in the study were the municipalities that did not (need to) enforce school closures in the Spring of 2020; that is, they were most likely the areas that do not benefit from school closures because they were not experiencing the spread of COVID-19 to begin with, which is supported by the extremely low (nearly zero) confirmed cases in the control group shown in Figure 1. ATC, in this context, is of little policy relevance because such municipalities would have few cases regardless of the implementation of the school closures (the counterfactual “what would happen had they closed the schools” would not differ much from the reality). The null finding for ATC is somewhat expected. What we want to know instead is if the spread in the treated (i.e., municipalities that were experiencing the rise in confirmed cases and had to decide to close schools) would have been worse without the school closures (i.e., ATT). I know that the original article did investigate ATT as a sensitivity analysis, but it was based on the data stemming from the school closure on only one date (April 6) with a much smaller matched sample size (as the treated was smaller in number). They did not provide key supplementary information for the ATT analysis (e.g., covariate balance after matching) either. I understand that, with the available data, ATC (versus ATT) was easier to estimate as there were more controls; but that does not justify their oversimplified conclusion that there is no effect of school closures in Japan because the effect of a hypothetical intervention can vary substantially depending on the target populations. The original article and the current article took a careful approach to mitigating bias due to confounding; yet, I see some evidence of residual confounding. The two articles indicate that ATC immediately after the school closures was positive point-estimate-wise, at least for some dates. The original article's authors wrote: “The ATC values suggest that municipalities that closed their schools mostly increased the number of cases”. If matching was successful and there was no unadjusted confounding, this statement would be true. Yet, they provide no compelling explanation for why school closures may causally “increase” confirmed cases. I cannot think of any. If not causal, the increase in cases among the treated municipalities in the matched sample is likely due to residual confounding—they had a reason to be concerned about the spread and decided to close the school, which was not captured by the observed covariates, including the prior outcome values and school closure status. The author of the current article raised an excellent point regarding residual bias, but some additional discussion on this issue would be appreciated. > I thank the reviewer for pointing out the existence of potential issues that I did not emphasise in the paper. I believe these additional issues are indeed worth mentioning in the manuscript. Meanwhile, I would prefer to retain the lack of statistical power as the primary issue because the present manuscript leverages the results of re-analysis focusing on the statistical power. Moreover, the two additional points suggested would also eventually come down to the problem of limited sample size and reporting results without considering the statistical power / effective size. For example, as the reviewer suggests, the choice of ATC was probably not ideal in the original study’s context because of low incidence levels in the control group in the first place. However, the same study design focusing on ATC could still have found an effect (if there is a true effect and) if the sample size (in this case both the number of included municipalities and the number of cases reported in these municipalities) was sufficient. Instead, I would like to propose changing the previous Conclusion section to the “Discussion and Conclusion” section and citing reviewers’ reports to discuss the suggested two points there. This allows me to separate the criticisms derived from my own analysis from those that were not, and also to appropriately acknowledge that the ideas came from the reviewers’ suggestions. I am aware that it may be a rather unusual practice in academic publications; however, given the nature of the publishing model of F1000 with citable open reviews and the fact that the reviewers provided new discussion points that were absent from the original version, I would like to opt for offering credit to the reviewers who contributed their time for the scholarly discussion. (Added to Discussion and Conclusion section): In addition to this issue of insufficient statistical power, which I demonstrated in the present reanalysis, two additional issues have been raised during the peer review process of this article. For one: the authors’ choice of ATC as the main estimand may have been suboptimal as Shiba has pointed out in his comment (Shiba, 2022). The control group in the original study may have consisted of municipalities that did not need school closures because of low incidence. ATC in this context would represent the effect in settings where the policy was not needed, which is of limited political implication. To counterargue against school closures as a control policy, the authors should have aimed to robustly show insufficient effect of such a policy even in municipalities in which school closures had been a selectable option (possibly because of higher incidence rate, where an effective policy could be more impactful). For the other: residual confounding may have remained among the matched samples. Both (Shiba, 2022) and (Hayashi, 2022) expressed concern on the immediate positive effect on incidence rate (e.g. increased incidence) immediately after the implementation of school closures in the treated group, which Fukumoto et al. left unexplained. Unless a plausible causal mechanism in which school closures could increase COVID-19 incidence is provided, this gap between the treated and control group may indicate residual bias, which is unsurprising given my reanalysis results suggesting matching failure. Hayashi additionally suggested that the trend in incidence (e.g. increasing/decreasing) may be one of the potential confounding variables that had not been adjusted for in the original study (Hayashi, 2022). Minor points: Page 3: “Moreover, matching technique used to minimise confounding has a known side effect of...” needs citation. > We have newly cited King et al. (2017). Page 3: “The effect estimates relative to the baseline incidence would be a more intuitive and interpretable measure for assessment of its practical use.” This needs more justification. The additive effect measure has its own advantage because it can speak directly to the population impacts of the intervention. > We have added an explanation that the relative risk reduction is particularly relevant because of the dynamic nature of the infectious disease transmission: Infectious disease risks are inherently dynamic; more current infections in a population would result in a greater risk of infection among susceptible individuals through increased encounters with infectious others. This means that the effect of school closures, which intended to reduce contacts at schools, should also depend on the baseline incidence in the population because the risk of infection averted would be the reduction in contacts multiplied by the probability that the contacts were otherwise with infectious individuals. Page 3: The sentence starting, “However, the theoretical ground is unclear...” seems incomplete. > I have replaced “to” with “on” but believe the sentence itself is complete (“theoretical ground is unclear”). Page 3: “ATC and average treatment effect on the treatment: ATT) and their confidence intervals relative to the average outcome (incidence rate per capita) in the control group (Figure 1).” This sentence seems incomplete. Perhaps delete the part after “relative to...“. > I have added a semicolon and a comma to clarify the structure of the sentence. This sentence is meant to indicate that both the ATC (or ATT) and their confidence intervals were rescaled to a relative value, where the incidence rate per capita in the control group is the reference. Page 3: Spell out ESS and provide a bit more context of what it is. > I have spelled it out with a brief explanation and citation: “The effective sample size (ESS; a proxy measure for the amount of information contained in weighted samples (Shook-Sa and Hudgens))…” Competing Interests: I received a grant from Taisho Pharmaceutical Co., Ltd. for research outside of this study. Close Report a concern COMMENT ON THIS REPORT Comments on this article Comments (0) Version 2 VERSION 2 PUBLISHED 25 Apr 2022 ADD YOUR COMMENT Comment keyboard_arrow_left keyboard_arrow_right Open Peer Review Reviewer Status info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Reviewer Reports Invited Reviewers 1 2 3 Version 2 (revision) 16 Apr 24 read Version 1 25 Apr 22 read read Koichiro Shiba , Harvard T.H. Chan School of Public Health, Boston, USA Takehiko I. Hayashi , National Institute for Environmental Studies, Tsukuba, Japan Eiji Yamamura , Seinan Gakuin University, Fukuoka, Japan Comments on this article All Comments (0) Add a comment Sign up for content alerts Sign Up You are now signed up to receive this alert Browse by related subjects keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2024 Yamamura E. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 14 Sep 2024 | for Version 2 Eiji Yamamura , Seinan Gakuin University, Fukuoka, Japan 0 Views copyright © 2024 Yamamura E. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (0) Approved With Reservations info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Report on ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 The aim of the study is to examine whether PSM (Propensity Score Matching) method of Fukumoto, McClean, Nakagawa (FMN 2021) is appropriately conducted. Main finding of this study is that ESS has substantially reduced due to imbalanced covariates between samples. FMN did not find causal effect of school closure on incidence rates in the early phase of COVID-19 pandemic. However, actually, their estimation results are not reliable because of insufficient EES. In my view, validity of set of covariates to calculate propensity score and its results in the “first” stage have not been scrutinized and discussed profoundly. However, other reports have already pointed out. The author has already replied to it. The focus of this study seems to be on balance issue and EES. I agree with it because this study is correspondence rather than full paper. Here, I raise only several issues remain to be addressed. Major issues: 1. For obtaining results of Fig.6, the author conducted estimation to set the caliper to be 0.25 by following the classical study of Rosenbaum & Rubin (1985) as below. "Propensity scores were estimated by a Lasso-regularised linear regression model and included in genetic matching with a calliper of 0.25 (Rosenbaum & Rubin, 1985)". 2. A caliper of 0.25 is widely used for matching in empirical studies. However, more recent works scrutinize the optimal value of calipers. For instance, Austin (2011) recommend that researchers match the propensity score using calipers being equal to 0.2. For illustrating Fig 6, author should use 0.2 rather than 0.25. Otherwise, author should justify his choosing 0.25 as caliper based on recent literatures. References. Austin PC, 2011 (Ref 1) Minor issues: 1. This study rigorously scrutinized the validity of PSM. In conclusion, the author derived more general argument than what has been done in this study. PSM has been widely used in empirical studies. However, in my view, most of studies using PSM mainly reported the main results, while not having sufficiently examined its validity. Actually, I believe that PSM results reported in many studies published in peer review journals would not meet the criteria to justify them (not valid) if researchers rigorously test it. FMN is one of them. Therefore, it seems better to narrow down the points to limitation of PSM. 2. I found inconsistency between Figs and its explanation in main body of text. Further, there seems to be several errors in Figs. As I read the text, referring to Figs, I became confused. Careless mistakes in basic information should be corrected to avoid reader’s misunderstanding. (1) In caption of Fig 1, (d) appeared two times. “(d) Relative ATT for the closure as of April 6, 2020. (d) Comparison of sample sizes.” This should be “(c) Relative ATT for the closure as of April 6, 2020. (d) Comparison of sample sizes.” (2) Concerning Fig 1, I cannot find “Relative ATT (April 10)” although “Relative ATT (April 06)” was presented. It is strange because readers can compare results between different setting in Figures 2-6. Author should present “Relative ATT (April 10)” as Fig 1 d (The current Fig 1 d should be “Fig 1 e”), otherwise explain the reason not to indicated it. (3) In caption of Figs 2. Tittle of Fig 2a is "Outcome (April 6): 50% mitigating effect" wile that of Fig 2 d is “Outcome (April 6): 80% mitigating effect". From these titles and contents of Figs 2, I believe that Fig 2a, b and c are results of "Outcome (April 6): 50% mitigating effect" while Fig 2d, e and f "Outcome (April 6): 80% mitigating effect". "ATT for the 80% mitigating effect was also statistically insignificant (Figure 2c and 2d)," Probably, the second sentence should be “ATT for the 80% mitigating effect was also statistically insignificant (Figure 2e and 2f),” (4) In the end of Caption of Fig 2, I found “(d)–(f) Those for closure as of April 10”. The sentence should be “In (d)–(f), 80% mitigating effects”. Probably, the author copied the caption of Fig 6 although caption of Fig 6 is correct. (5) Comment (4) also applied to Figs 3. Is the rationale for commenting on the previous publication clearly described? Yes Are any opinions stated well-argued, clear and cogent? Yes Are arguments sufficiently supported by evidence from the published literature or by new data and results? Yes Is the conclusion balanced and justified on the basis of the presented arguments? Yes References 1. Austin PC: Optimal caliper widths for propensity-score matching when estimating differences in means and differences in proportions in observational studies. Pharm Stat . 2011; 10 (2): 150-61 PubMed Abstract | Publisher Full Text Competing Interests No competing interests were disclosed. Reviewer Expertise Behavioral Economics, Applied Economics I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. reply Respond to this report Responses (0) Yamamura E. Peer Review Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.5256/f1000research.140839.r301839) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/11-456/v2#referee-response-301839 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2022 Hayashi T. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 04 Jul 2022 | for Version 1 Takehiko I. Hayashi , Social Systems Division, National Institute for Environmental Studies, Tsukuba, Japan 0 Views copyright © 2022 Hayashi T. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions This article reanalyzed Fukumoto et al. (2021), which concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 in Japan. The author first examined the robustness of the conclusion of Fukumoto et al. (2021) to the way the indicator is set up. The author then conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). As I state below (see Major points), I agree that these results (Figs. 1 and 2) support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude there was “no causal effect.” The author also pointed out the risk of positivity violation and the resulting small effective sample size in propensity score analysis (Figs. 4 and 5). I believe this issue should be further examined because there was a possibly nonnegligible level of residual confounding (see Minor points), but the additional examination may be beyond the scope of this article. Overall, I agree that the authors' arguments are adequately supported by the results presented. In general, the issue of statistical power tends to receive less focus than the issue of identification in the practice of causal inference. However, statistical power is always important when discussing the effects of policies in real-world contexts. This paper is a good practice example to remind us of the fundamentals of statistical inference. Major points: The primary purpose of this study is to examine the statistical power of the analysis of Fukumoto et al. (2021). Although Fukumoto et al. (2021) is an elaborate study that examined most possible considerations, it lacks an examination of statistical power, and statistical power is a logically essential issue if one is to conclude “no causal effect” based on the lack of statistical significance. The author first addressed the issue of outcome measures. I agree with the author that effect estimates relative to the baseline incidence may be superior to per-capita incidence rates as an outcome measure. As the author stated, taking only non-negative values for the incidence rate can be a large problem when the incidence rate of the control population is very low, as it was in this case. I think that the spikes in the red line of the control population in Figures 1c and 1i of Fukumoto et al. (2021) (which diverge from the black line of the matched treatment population, indicating a failure to construct an adequate counterfactual) also suggest a disadvantage of using per capita rates, given the explanation that these spikes were caused by the small sizes of the focal municipalities (see the Supplementary Information of Fukumoto et al. 2021, P. 11, Lines 3–11). Although I do not immediately conclude that the advantage of using effect estimates relative to the baseline incidence as the indicator is absolute, using this indicator is one of the possible reasonable choices. The results using this indicator (Fig. 1) showed an inherent lack of power in the analysis, illustrating that the conclusions of Fukumoto et al. (2021) (implicitly assuming a degree of statistical power of the analyses) are not robust to the way the indicator is set up. The author's next approach is more direct. The author conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). The simulation showed that statistical significance was hardly detected even for substantial effects (Fig. 2). I believe these results convincingly demonstrate that the design and data of Fukumoto et al. (2021) did not provide sufficient statistical power to conclude “no causal effect.” The above results (Figs. 1 and 2) logically support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude "no causal effect." Minor points: Although the following comments may address issues that are beyond the scope of this study, the issues themselves are essential. These comments are intended as suggestions and not mandatory revisions of this article. First, as the other reviewer (Dr. Shiba) stated, the validity of the estimand is essential. I agree that ATT should have been the main estimand if Fukumoto et al. (2021) mention the efficacy of the policy in the real-world context. I recommend that the author discuss this point further. Second, I think more consideration needs to be given to the possibility of insufficient adjustment (i.e., residual confounding). The author's mention of the risk of positivity violation and the resulting small effective sample size in propensity score analysis is a good point (Figs. 4 and 5). The separation of the propensity score distribution implies the inherent difficulties in matching important factors, especially those having a large effect on both treatment and outcome, which can introduce confounding. This is a real concern because many major covariates were not sufficiently adjusted. A well-known recommendation for an acceptable degree of ASMD after matching is ≤0.1 (Nguyen et al. 2017). However, the ASMDs of many covariates were actually ≥0.2 in this case (Table S3 in the Supplementary Information of Fukumoto et al. 2021). In general, I think it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” when the absolute value of ASMD was ≥0.2 in many covariates. I recommend the author check the loveplot of the matching of April 6, 10, and some cases for ATTs with the reference dashed line at 0.1 ASMD. I also recommend presenting the importance of covariates (e.g., specifying covariates having high standardized coefficient values in the propensity score estimation with red symbols) in the loveplots (see the Note below concerning the need for a loveplot). Third, discussing the possibility of missing important covariates may also be worthwhile. Some unexpected behavior of the Fukumoto et al. (2021) data suggests the possibility of residual confounding (due to the lack of incorporation of important covariates). For example, in Figure 1g of Fukumoto et al. (2021), large (see the absolute values) spikes appear only in the matching treated municipalities (and no spikes appear in all treated municipalities). In general, matching is expected to reduce the difference between treated and untreated baselines. Thus, it seems difficult to naturally explain the occurrence of large spikes only in the matched municipalities (unless the treatment actually increased outcomes or the effective sample size is very small). It is possible that important covariates were missed. For example, the trends (not sum) of the incidence rate before treatment was not included as covariates, but they might cause such spikes as follows. In the matching process, matched municipalities tend to have a similar value of the sum of the 7-day incidence rate. Here, the same value of this sum (e.g., 100 incidents per unit) can arise from municipalities that have different time trends (i.e., both increasing and decreasing trends are possible). In this situation, if the treatment (school closure) decision-making was affected by the increasing/decreasing trend, the treated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is an increasing trend. Similarly, the untreated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is a decreasing trend. In this case, the spikes (i.e., the difference in post-treatment outcomes) only in matched treated municipalities (as in Fig. 1g of Fukumoto et al ., 2021) could occur as an artifact of the inertia of the temporal trend (not sum) from the preceding 7 days. Note on the need for a loveplot: The following description in Fukumoto et al. (2021, P. 2114) is not sufficiently correct in two points: “Moreover, the differences in other covariates between the treated and control groups were also much smaller after matching than before (Supplementary Fig. 1 and Supplementary Table 3). Therefore, differences between the matched groups cannot be attributed to previous levels of infection or any other covariates.” First, whether confounding was removed or not does not depend on the relative ratio before/after ASMD; rather, it depends on the absolute magnitude of ASMD. Even if ASMD becomes relatively much smaller after matching, if the absolute magnitude of ASMD was over 0.2 in many covariates, it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” in a general sense. Second, a smaller average ASMD of many covariates does not assure the removal of confounding. The removal of confounding needs to balance important covariates satisfying a backdoor criterion (not an average of all covariates). Practically, we can speculate about the importance of covariates from the effects of these covariates on treatments and outcomes (c.f., VanderWeele 2019). Figure S1 in the Supplementary Information of Fukumoto et al. (2021) did not provide good information with which to judge these two essential points in terms of the reduction of confounding. To make such a judgment, a loveplot with a reference line at 0.1 is suitable and is also a standard practice. Is the rationale for commenting on the previous publication clearly described? Yes Are any opinions stated well-argued, clear and cogent? Yes Are arguments sufficiently supported by evidence from the published literature or by new data and results? Yes Is the conclusion balanced and justified on the basis of the presented arguments? Yes References 1. Nguyen TL, Collins GS, Spence J, Daurès JP, et al.: Double-adjustment in propensity score matching analysis: choosing a threshold for considering residual imbalance. BMC Med Res Methodol . 2017; 17 (1): 78 PubMed Abstract | Publisher Full Text 2. VanderWeele TJ: Principles of confounder selection. Eur J Epidemiol . 2019; 34 (3): 211-219 PubMed Abstract | Publisher Full Text Competing Interests No competing interests were disclosed. Reviewer Expertise Risk analysis, statistical causal inference, and environmental data analyis I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (1) Author Response 27 Jun 2024 Akira Endo, School of Tropical Medicine and Global Health, Nagasaki University, Nagasaki, 852-8521, Japan > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article reanalyzed Fukumoto et al. (2021), which concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 in Japan. The author first examined the robustness of the conclusion of Fukumoto et al. (2021) to the way the indicator is set up. The author then conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). As I state below (see Major points), I agree that these results (Figs. 1 and 2) support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude there was “no causal effect.” The author also pointed out the risk of positivity violation and the resulting small effective sample size in propensity score analysis (Figs. 4 and 5). I believe this issue should be further examined because there was a possibly nonnegligible level of residual confounding (see Minor points), but the additional examination may be beyond the scope of this article. Overall, I agree that the authors' arguments are adequately supported by the results presented. In general, the issue of statistical power tends to receive less focus than the issue of identification in the practice of causal inference. However, statistical power is always important when discussing the effects of policies in real-world contexts. This paper is a good practice example to remind us of the fundamentals of statistical inference. > I thank the reviewer for supporting the points discussed in the manuscript and for offering additional comments on its content. Major points: The primary purpose of this study is to examine the statistical power of the analysis of Fukumoto et al. (2021). Although Fukumoto et al. (2021) is an elaborate study that examined most possible considerations, it lacks an examination of statistical power, and statistical power is a logically essential issue if one is to conclude “no causal effect” based on the lack of statistical significance. The author first addressed the issue of outcome measures. I agree with the author that effect estimates relative to the baseline incidence may be superior to per-capita incidence rates as an outcome measure. As the author stated, taking only non-negative values for the incidence rate can be a large problem when the incidence rate of the control population is very low, as it was in this case. I think that the spikes in the red line of the control population in Figures 1c and 1i of Fukumoto et al. (2021) (which diverge from the black line of the matched treatment population, indicating a failure to construct an adequate counterfactual) also suggest a disadvantage of using per capita rates, given the explanation that these spikes were caused by the small sizes of the focal municipalities (see the Supplementary Information of Fukumoto et al. 2021, P. 11, Lines 3–11). Although I do not immediately conclude that the advantage of using effect estimates relative to the baseline incidence as the indicator is absolute, using this indicator is one of the possible reasonable choices. The results using this indicator (Fig. 1) showed an inherent lack of power in the analysis, illustrating that the conclusions of Fukumoto et al. (2021) (implicitly assuming a degree of statistical power of the analyses) are not robust to the way the indicator is set up. The author's next approach is more direct. The author conducted simulations of cases with hypothetical 50% or 80% mitigating effects using the same study design and sample size as Fukumoto et al. (2021). The simulation showed that statistical significance was hardly detected even for substantial effects (Fig. 2). I believe these results convincingly demonstrate that the design and data of Fukumoto et al. (2021) did not provide sufficient statistical power to conclude “no causal effect.” The above results (Figs. 1 and 2) logically support the author's main argument that Fukumoto et al. (2021) did not provide sufficient statistical power to conclude "no causal effect." > I thank the reviewer for appreciating and further discussing in detail the main points of my criticism. Regarding the choice of indicator, in response to Reviewer 1 (Dr Shiba)’s comment, I have included additional reasoning why relative effect is more relevant to this infectious disease context. Minor points: Although the following comments may address issues that are beyond the scope of this study, the issues themselves are essential. These comments are intended as suggestions and not mandatory revisions of this article. First, as the other reviewer (Dr. Shiba) stated, the validity of the estimand is essential. I agree that ATT should have been the main estimand if Fukumoto et al. (2021) mention the efficacy of the policy in the real-world context. I recommend that the author discuss this point further. > Discussion on the choice of estimand (along with that on residual confounding) has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Second, I think more consideration needs to be given to the possibility of insufficient adjustment (i.e., residual confounding). The author's mention of the risk of positivity violation and the resulting small effective sample size in propensity score analysis is a good point (Figs. 4 and 5). The separation of the propensity score distribution implies the inherent difficulties in matching important factors, especially those having a large effect on both treatment and outcome, which can introduce confounding. This is a real concern because many major covariates were not sufficiently adjusted. A well-known recommendation for an acceptable degree of ASMD after matching is ≤0.1 (Nguyen et al. 2017). However, the ASMDs of many covariates were actually ≥0.2 in this case (Table S3 in the Supplementary Information of Fukumoto et al. 2021). In general, I think it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” when the absolute value of ASMD was ≥0.2 in many covariates. I recommend the author check the loveplot of the matching of April 6, 10, and some cases for ATTs with the reference dashed line at 0.1 ASMD. I also recommend presenting the importance of covariates (e.g., specifying covariates having high standardized coefficient values in the propensity score estimation with red symbols) in the loveplots (see the Note below concerning the need for a loveplot). > I thank the reviewer for constructive suggestion to further investigate the appropriateness of matching in the original study. While I agree that a loveplot would provide more in-depth understanding of what might have gone wrong with the original analysis, the aim of my article is to highlight the existence of the issue (not necessarily revealing every detail of the individual issues), which I believe has already been demonstrated. Once the potential issue is identified as such, in principle the original authors should be responsible for conducting robust analysis to defend their findings. Third, discussing the possibility of missing important covariates may also be worthwhile. Some unexpected behavior of the Fukumoto et al. (2021) data suggests the possibility of residual confounding (due to the lack of incorporation of important covariates). For example, in Figure 1g of Fukumoto et al. (2021), large (see the absolute values) spikes appear only in the matching treated municipalities (and no spikes appear in all treated municipalities). In general, matching is expected to reduce the difference between treated and untreated baselines. Thus, it seems difficult to naturally explain the occurrence of large spikes only in the matched municipalities (unless the treatment actually increased outcomes or the effective sample size is very small). It is possible that important covariates were missed. For example, the trends (not sum) of the incidence rate before treatment was not included as covariates, but they might cause such spikes as follows. In the matching process, matched municipalities tend to have a similar value of the sum of the 7-day incidence rate. Here, the same value of this sum (e.g., 100 incidents per unit) can arise from municipalities that have different time trends (i.e., both increasing and decreasing trends are possible). In this situation, if the treatment (school closure) decision-making was affected by the increasing/decreasing trend, the treated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is an increasing trend. Similarly, the untreated group may tend to include municipalities (with 100 incidents per unit before 7 days) when there is a decreasing trend. In this case, the spikes (i.e., the difference in post-treatment outcomes) only in matched treated municipalities (as in Fig. 1g of Fukumoto et al ., 2021) could occur as an artifact of the inertia of the temporal trend (not sum) from the preceding 7 days. > Discussion on possible residual confounder has been included in the Discussion and Conclusion section. Please also see the response to Reviewer 1. Note on the need for a loveplot: The following description in Fukumoto et al. (2021, P. 2114) is not sufficiently correct in two points: “Moreover, the differences in other covariates between the treated and control groups were also much smaller after matching than before (Supplementary Fig. 1 and Supplementary Table 3). Therefore, differences between the matched groups cannot be attributed to previous levels of infection or any other covariates.” First, whether confounding was removed or not does not depend on the relative ratio before/after ASMD; rather, it depends on the absolute magnitude of ASMD. Even if ASMD becomes relatively much smaller after matching, if the absolute magnitude of ASMD was over 0.2 in many covariates, it is difficult to state that “differences between the matched groups cannot be attributed to previous levels of infection or any other covariates” in a general sense. Second, a smaller average ASMD of many covariates does not assure the removal of confounding. The removal of confounding needs to balance important covariates satisfying a backdoor criterion (not an average of all covariates). Practically, we can speculate about the importance of covariates from the effects of these covariates on treatments and outcomes (c.f., VanderWeele 2019). Figure S1 in the Supplementary Information of Fukumoto et al. (2021) did not provide good information with which to judge these two essential points in terms of the reduction of confounding. To make such a judgment, a loveplot with a reference line at 0.1 is suitable and is also a standard practice. View more View less Competing Interests I received a grant from Taisho Pharmaceutical Co., Ltd. for research outside of this study. reply Respond Report a concern Hayashi TI. Peer Review Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.5256/f1000research.123641.r136221) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/11-456/v1#referee-response-136221 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2022 Shiba K. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 29 Apr 2022 | for Version 1 Koichiro Shiba , Department of Epidemiology, Harvard T.H. Chan School of Public Health, Boston, MA, USA 0 Views copyright © 2022 Shiba K. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved With Reservations info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions This article provides a critical re-assessment of the recent paper that concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 (Fukumoto et al. , 2021). The author of this article argued that the original analysis (and the refined version presented in the current article) was likely underpowered and unreliable. The author raised a great point about the potential violation of positivity, about which the original paper provided little discussion. I appreciate the author for bringing our attention to these important issues and allowing me to engage in carefully reading the original article. I agree with the author that it is vital to be mindful that the absence of “evidence” defined by the lack of statistical significance is not evidence of absence. Hence, I find this type of article assessing alternative explanations for the null findings in the original paper particularly valuable. I provide some major and minor suggestions to strengthen the current manuscript further. Major points: The manuscript is currently written as if the lack of statistical power is the primary (and perhaps only) issue that might explain the original paper’s null findings. The author does discuss the possibility of unadjusted confounding later in the paper, but the way the Introduction and the Conclusion were written made it seem a secondary problem. I suggest the author provide a more in-depth discussion on other potential issues. There are at least two other issues I think are worth discussing: the choice of causal estimand and residual confounding. To me, the most significant limitation of the original article is its focus on identifying and estimating ATC. The controls in the study were the municipalities that did not (need to) enforce school closures in the Spring of 2020; that is, they were most likely the areas that do not benefit from school closures because they were not experiencing the spread of COVID-19 to begin with, which is supported by the extremely low (nearly zero) confirmed cases in the control group shown in Figure 1. ATC, in this context, is of little policy relevance because such municipalities would have few cases regardless of the implementation of the school closures (the counterfactual “what would happen had they closed the schools” would not differ much from the reality). The null finding for ATC is somewhat expected. What we want to know instead is if the spread in the treated (i.e., municipalities that were experiencing the rise in confirmed cases and had to decide to close schools) would have been worse without the school closures (i.e., ATT). I know that the original article did investigate ATT as a sensitivity analysis, but it was based on the data stemming from the school closure on only one date (April 6) with a much smaller matched sample size (as the treated was smaller in number). They did not provide key supplementary information for the ATT analysis (e.g., covariate balance after matching) either. I understand that, with the available data, ATC (versus ATT) was easier to estimate as there were more controls; but that does not justify their oversimplified conclusion that there is no effect of school closures in Japan because the effect of a hypothetical intervention can vary substantially depending on the target populations. The original article and the current article took a careful approach to mitigating bias due to confounding; yet, I see some evidence of residual confounding. The two articles indicate that ATC immediately after the school closures was positive point-estimate-wise, at least for some dates. The original article's authors wrote: “The ATC values suggest that municipalities that closed their schools mostly increased the number of cases”. If matching was successful and there was no unadjusted confounding, this statement would be true. Yet, they provide no compelling explanation for why school closures may causally “increase” confirmed cases. I cannot think of any. If not causal, the increase in cases among the treated municipalities in the matched sample is likely due to residual confounding—they had a reason to be concerned about the spread and decided to close the school, which was not captured by the observed covariates, including the prior outcome values and school closure status. The author of the current article raised an excellent point regarding residual bias, but some additional discussion on this issue would be appreciated. Minor points: Page 3: “Moreover, matching technique used to minimise confounding has a known side effect of...” needs citation. Page 3: “The effect estimates relative to the baseline incidence would be a more intuitive and interpretable measure for assessment of its practical use.” This needs more justification. The additive effect measure has its own advantage because it can speak directly to the population impacts of the intervention. Page 3: The sentence starting, “However, the theoretical ground is unclear...” seems incomplete. Page 3: “ATC and average treatment effect on the treatment: ATT) and their confidence intervals relative to the average outcome (incidence rate per capita) in the control group (Figure 1).” This sentence seems incomplete. Perhaps delete the part after “relative to...“. Page 3: Spell out ESS and provide a bit more context of what it is. Is the rationale for commenting on the previous publication clearly described? Yes Are any opinions stated well-argued, clear and cogent? Partly Are arguments sufficiently supported by evidence from the published literature or by new data and results? Yes Is the conclusion balanced and justified on the basis of the presented arguments? Partly Competing Interests No competing interests were disclosed. Reviewer Expertise Public health, epidemiology, causal inference I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. reply Respond to this report Responses (1) Author Response 27 Jun 2024 Akira Endo, School of Tropical Medicine and Global Health, Nagasaki University, Nagasaki, 852-8521, Japan > I thank the reviewers for their constructive feedback. While I regret the extended time it took to revise the manuscript in response to their comments, I believe the revised version and the following responses will address the reviewers' concerns. Specifically, I added two key discussion points that both reviewers agreed should be included: (i) choice of the estimand (ATC vs ATT) and (ii) potential residual confounding. This article provides a critical re-assessment of the recent paper that concluded that school closures had no causal effect on the spread of COVID-19 in Spring 2020 (Fukumoto et al. , 2021). The author of this article argued that the original analysis (and the refined version presented in the current article) was likely underpowered and unreliable. The author raised a great point about the potential violation of positivity, about which the original paper provided little discussion. I appreciate the author for bringing our attention to these important issues and allowing me to engage in carefully reading the original article. I agree with the author that it is vital to be mindful that the absence of “evidence” defined by the lack of statistical significance is not evidence of absence. Hence, I find this type of article assessing alternative explanations for the null findings in the original paper particularly valuable. I provide some major and minor suggestions to strengthen the current manuscript further. > I thank the reviewer for appreciating my criticism and providing useful suggestions. Major points: The manuscript is currently written as if the lack of statistical power is the primary (and perhaps only) issue that might explain the original paper’s null findings. The author does discuss the possibility of unadjusted confounding later in the paper, but the way the Introduction and the Conclusion were written made it seem a secondary problem. I suggest the author provide a more in-depth discussion on other potential issues. There are at least two other issues I think are worth discussing: the choice of causal estimand and residual confounding. To me, the most significant limitation of the original article is its focus on identifying and estimating ATC. The controls in the study were the municipalities that did not (need to) enforce school closures in the Spring of 2020; that is, they were most likely the areas that do not benefit from school closures because they were not experiencing the spread of COVID-19 to begin with, which is supported by the extremely low (nearly zero) confirmed cases in the control group shown in Figure 1. ATC, in this context, is of little policy relevance because such municipalities would have few cases regardless of the implementation of the school closures (the counterfactual “what would happen had they closed the schools” would not differ much from the reality). The null finding for ATC is somewhat expected. What we want to know instead is if the spread in the treated (i.e., municipalities that were experiencing the rise in confirmed cases and had to decide to close schools) would have been worse without the school closures (i.e., ATT). I know that the original article did investigate ATT as a sensitivity analysis, but it was based on the data stemming from the school closure on only one date (April 6) with a much smaller matched sample size (as the treated was smaller in number). They did not provide key supplementary information for the ATT analysis (e.g., covariate balance after matching) either. I understand that, with the available data, ATC (versus ATT) was easier to estimate as there were more controls; but that does not justify their oversimplified conclusion that there is no effect of school closures in Japan because the effect of a hypothetical intervention can vary substantially depending on the target populations. The original article and the current article took a careful approach to mitigating bias due to confounding; yet, I see some evidence of residual confounding. The two articles indicate that ATC immediately after the school closures was positive point-estimate-wise, at least for some dates. The original article's authors wrote: “The ATC values suggest that municipalities that closed their schools mostly increased the number of cases”. If matching was successful and there was no unadjusted confounding, this statement would be true. Yet, they provide no compelling explanation for why school closures may causally “increase” confirmed cases. I cannot think of any. If not causal, the increase in cases among the treated municipalities in the matched sample is likely due to residual confounding—they had a reason to be concerned about the spread and decided to close the school, which was not captured by the observed covariates, including the prior outcome values and school closure status. The author of the current article raised an excellent point regarding residual bias, but some additional discussion on this issue would be appreciated. > I thank the reviewer for pointing out the existence of potential issues that I did not emphasise in the paper. I believe these additional issues are indeed worth mentioning in the manuscript. Meanwhile, I would prefer to retain the lack of statistical power as the primary issue because the present manuscript leverages the results of re-analysis focusing on the statistical power. Moreover, the two additional points suggested would also eventually come down to the problem of limited sample size and reporting results without considering the statistical power / effective size. For example, as the reviewer suggests, the choice of ATC was probably not ideal in the original study’s context because of low incidence levels in the control group in the first place. However, the same study design focusing on ATC could still have found an effect (if there is a true effect and) if the sample size (in this case both the number of included municipalities and the number of cases reported in these municipalities) was sufficient. Instead, I would like to propose changing the previous Conclusion section to the “Discussion and Conclusion” section and citing reviewers’ reports to discuss the suggested two points there. This allows me to separate the criticisms derived from my own analysis from those that were not, and also to appropriately acknowledge that the ideas came from the reviewers’ suggestions. I am aware that it may be a rather unusual practice in academic publications; however, given the nature of the publishing model of F1000 with citable open reviews and the fact that the reviewers provided new discussion points that were absent from the original version, I would like to opt for offering credit to the reviewers who contributed their time for the scholarly discussion. (Added to Discussion and Conclusion section): In addition to this issue of insufficient statistical power, which I demonstrated in the present reanalysis, two additional issues have been raised during the peer review process of this article. For one: the authors’ choice of ATC as the main estimand may have been suboptimal as Shiba has pointed out in his comment (Shiba, 2022). The control group in the original study may have consisted of municipalities that did not need school closures because of low incidence. ATC in this context would represent the effect in settings where the policy was not needed, which is of limited political implication. To counterargue against school closures as a control policy, the authors should have aimed to robustly show insufficient effect of such a policy even in municipalities in which school closures had been a selectable option (possibly because of higher incidence rate, where an effective policy could be more impactful). For the other: residual confounding may have remained among the matched samples. Both (Shiba, 2022) and (Hayashi, 2022) expressed concern on the immediate positive effect on incidence rate (e.g. increased incidence) immediately after the implementation of school closures in the treated group, which Fukumoto et al. left unexplained. Unless a plausible causal mechanism in which school closures could increase COVID-19 incidence is provided, this gap between the treated and control group may indicate residual bias, which is unsurprising given my reanalysis results suggesting matching failure. Hayashi additionally suggested that the trend in incidence (e.g. increasing/decreasing) may be one of the potential confounding variables that had not been adjusted for in the original study (Hayashi, 2022). Minor points: Page 3: “Moreover, matching technique used to minimise confounding has a known side effect of...” needs citation. > We have newly cited King et al. (2017). Page 3: “The effect estimates relative to the baseline incidence would be a more intuitive and interpretable measure for assessment of its practical use.” This needs more justification. The additive effect measure has its own advantage because it can speak directly to the population impacts of the intervention. > We have added an explanation that the relative risk reduction is particularly relevant because of the dynamic nature of the infectious disease transmission: Infectious disease risks are inherently dynamic; more current infections in a population would result in a greater risk of infection among susceptible individuals through increased encounters with infectious others. This means that the effect of school closures, which intended to reduce contacts at schools, should also depend on the baseline incidence in the population because the risk of infection averted would be the reduction in contacts multiplied by the probability that the contacts were otherwise with infectious individuals. Page 3: The sentence starting, “However, the theoretical ground is unclear...” seems incomplete. > I have replaced “to” with “on” but believe the sentence itself is complete (“theoretical ground is unclear”). Page 3: “ATC and average treatment effect on the treatment: ATT) and their confidence intervals relative to the average outcome (incidence rate per capita) in the control group (Figure 1).” This sentence seems incomplete. Perhaps delete the part after “relative to...“. > I have added a semicolon and a comma to clarify the structure of the sentence. This sentence is meant to indicate that both the ATC (or ATT) and their confidence intervals were rescaled to a relative value, where the incidence rate per capita in the control group is the reference. Page 3: Spell out ESS and provide a bit more context of what it is. > I have spelled it out with a brief explanation and citation: “The effective sample size (ESS; a proxy measure for the amount of information contained in weighted samples (Shook-Sa and Hudgens))…” View more View less Competing Interests I received a grant from Taisho Pharmaceutical Co., Ltd. for research outside of this study. reply Respond Report a concern Shiba K. Peer Review Report For: ‘Not finding causal effect’ is not ‘finding no causal effect’ of school closure on COVID-19 [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2024, 11 :456 ( https://doi.org/10.5256/f1000research.123641.r136223) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/11-456/v1#referee-response-136223 Alongside their report, reviewers assign a status to the article: Approved - the paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations - A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved - fundamental flaws in the paper seriously undermine the findings and conclusions Adjust parameters to alter display View on desktop for interactive features Includes Interactive Elements View on desktop for interactive features Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Stay Updated Sign up for content alerts and receive a weekly or monthly email with all newly published articles Register with F1000Research Already registered? Sign in Not now, thanks close PLEASE NOTE If you are an AUTHOR of this article, please check that you signed in with the account associated with this article otherwise we cannot automatically identify your role as an author and your comment will be labelled as a “User Comment”. If you are a REVIEWER of this article, please check that you have signed in with the account associated with this article and then go to your account to submit your report, please do not post your review here. If you do not have access to your original account, please contact us . All commenters must hold a formal affiliation as per our Policies . The information that you give us will be displayed next to your comment. User comments must be in English, comprehensible and relevant to the article under discussion. We reserve the right to remove any comments that we consider to be inappropriate, offensive or otherwise in breach of the User Comment Terms and Conditions . Commenters must not use a comment for personal attacks. When criticisms of the article are based on unpublished data, the data should be made available. I accept the User Comment Terms and Conditions Please confirm that you accept the User Comment Terms and Conditions. Affiliation ✕ refresh Please enter your institution. Note: To add your institution or organisation, start typing the name and then select the correct name from the list. Where applicable, the name will appear in both the original language and in English. Do not paste in the name. If the name does not appear in the drop-down list, we will display the information you have entered. ✕ refresh Country/Region * USA UK Canada China France Germany Afghanistan Aland Islands Albania Algeria American Samoa Andorra Angola Anguilla Antarctica Antigua and Barbuda Argentina Armenia Aruba Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados Belarus Belgium Belize Benin Bermuda Bhutan Bolivia Bosnia and Herzegovina Botswana Bouvet Island Brazil British Indian Ocean Territory British Virgin Islands Brunei Bulgaria Burkina Faso Burundi Cambodia Cameroon Canada Cape Verde Cayman Islands Central African Republic Chad Chile China Christmas Island Cocos (Keeling) Islands Colombia Comoros Congo Cook Islands Costa Rica Cote d'Ivoire Croatia Cuba Cyprus Czech Republic Democratic Republic of the Congo Denmark Djibouti Dominica Dominican Republic Ecuador Egypt El Salvador Equatorial Guinea Eritrea Estonia Ethiopia Falkland Islands Faroe Islands Federated States of Micronesia Fiji Finland France French Guiana French Polynesia French Southern Territories Gabon Georgia Germany Ghana Gibraltar Greece Greenland Grenada Guadeloupe Guam Guatemala Guernsey Guinea Guinea-Bissau Guyana Haiti Heard Island and Mcdonald Islands Holy See (Vatican City State) Honduras Hong Kong Hungary Iceland India Indonesia Iran Iraq Ireland Israel Italy Jamaica Japan Jersey Jordan Kazakhstan Kenya Kiribati Kosovo (Serbia and Montenegro) Kuwait Kyrgyzstan Lao People's Democratic Republic Latvia Lebanon Lesotho Liberia Libya Liechtenstein Lithuania Luxembourg Macao Madagascar Malawi Malaysia Maldives Mali Malta Marshall Islands Martinique Mauritania Mauritius Mayotte Mexico Minor Outlying Islands of the United States Moldova Monaco Mongolia Montenegro Montserrat Morocco Mozambique Myanmar Namibia Nauru Nepal Netherlands Antilles New Caledonia New Zealand Nicaragua Niger Nigeria Niue Norfolk Island North Korea North Macedonia Northern Mariana Islands Norway Oman Pakistan Palau Palestinian Territory Panama Papua New Guinea Paraguay Peru Philippines Pitcairn Poland Portugal Puerto Rico Qatar Reunion Romania Russian Federation Rwanda Saint Helena Saint Kitts and Nevis Saint Lucia Saint Pierre and Miquelon Saint Vincent and the Grenadines Samoa San Marino Sao Tome and Principe Saudi Arabia Senegal Serbia Seychelles Sierra Leone Singapore Slovakia Slovenia Solomon Islands Somalia South Africa South Georgia and the South Sandwich Is South Korea South Sudan Spain Sri Lanka Sudan Suriname Svalbard and Jan Mayen Swaziland Sweden Switzerland Syria Taiwan Tajikistan Tanzania Thailand The Gambia The Netherlands Timor-Leste Togo Tokelau Tonga Trinidad and Tobago Tunisia Turkey Turkmenistan Turks and Caicos Islands Tuvalu UK USA Uganda Ukraine United Arab Emirates United States Virgin Islands Uruguay Uzbekistan Vanuatu Venezuela Vietnam Wallis and Futuna West Bank and Gaza Strip Western Sahara Yemen Zambia Zimbabwe Please select your country/region. You must enter a comment. Competing Interests Please disclose any competing interests that might be construed to influence your judgment of the article's or peer review report's validity or importance. Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Please state your competing interests The comment has been saved. An error has occurred. Please try again. Cancel Post var lTitle = "\‘Not finding causal effect\’ is...".replace("'", ''); var linkedInUrl = "http://www.linkedin.com/shareArticle?url=https://f1000research.com/articles/11-456/v2" + "&title=" + encodeURIComponent(lTitle) + "&summary=" + encodeURIComponent('Read the article by '); var deliciousUrl = "https://del.icio.us/post?url=https://f1000research.com/articles/11-456/v2&title=" + encodeURIComponent(lTitle); var redditUrl = "http://reddit.com/submit?url=https://f1000research.com/articles/11-456/v2" + "&title=" + encodeURIComponent(lTitle); linkedInUrl += encodeURIComponent('Endo A'); var offsetTop = /chrome/i.test( navigator.userAgent ) ? 4 : -10; var addthis_config = { ui_offset_top: offsetTop, services_compact : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_expanded : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_custom : [ { name: "LinkedIn", url: linkedInUrl, icon:"/img/icon/at_linkedin.svg" }, { name: "Mendeley", url: "http://www.mendeley.com/import/?url=https://f1000research.com/articles/11-456/v2/mendeley", icon:"/img/icon/at_mendeley.svg" }, { name: "Reddit", url: redditUrl, icon:"/img/icon/at_reddit.svg" }, ] }; var addthis_share = { url: "https://f1000research.com/articles/11-456", templates : { twitter : "\‘Not finding causal effect\’ is not \‘finding no.... Endo A, published by " + "@F1000Research" + ", https://f1000research.com/articles/11-456/v2" } }; if (typeof(addthis) != "undefined"){ addthis.addEventListener('addthis.ready', checkCount); addthis.addEventListener('addthis.menu.share', checkCount); } $(".f1r-shares-twitter").attr("href", "https://twitter.com/intent/tweet?text=" + addthis_share.templates.twitter); $(".f1r-shares-facebook").attr("href", "https://www.facebook.com/sharer/sharer.php?u=" + addthis_share.url); $(".f1r-shares-linkedin").attr("href", addthis_config.services_custom[0].url); $(".f1r-shares-reddit").attr("href", addthis_config.services_custom[2].url); $(".f1r-shares-mendelay").attr("href", addthis_config.services_custom[1].url); function checkCount(){ setTimeout(function(){ $(".addthis_button_expanded").each(function(){ var count = $(this).text(); if (count !== "" && count != "0") $(this).removeClass("is-hidden"); else $(this).addClass("is-hidden"); }); }, 1000); } close How to cite this report {{reportCitation}} Cancel Copy Citation Details $(function(){R.ui.buttonDropdowns('.dropdown-for-downloads');}); $(function(){R.ui.toolbarDropdowns('.toolbar-dropdown-for-downloads');}); $.get("/articles/acj/111915/140839") new F1000.Clipboard(); new F1000.ThesaurusTermsDisplay("articles", "article", "140839"); $(document).ready(function() { $( "#frame1" ).on('load', function() { var mydiv = $(this).contents().find("div"); var h = mydiv.height(); console.log(h) }); var tooltipLivingFigure = jQuery(".interactive-living-figure-label .icon-more-info"), titleLivingFigure = tooltipLivingFigure.attr("title"); tooltipLivingFigure.simpletip({ fixed: true, position: ["-115", "30"], baseClass: 'small-tooltip', content:titleLivingFigure + " " }); tooltipLivingFigure.removeAttr("title"); $("body").on("click", ".cite-living-figure", function(e) { e.preventDefault(); var ref = $(this).attr("data-ref"); $(this).closest(".living-figure-list-container").find("#" + ref).fadeIn(200); }); $("body").on("click", ".close-cite-living-figure", function(e) { e.preventDefault(); $(this).closest(".popup-window-wrapper").fadeOut(200); }); $(document).on("mouseup", function(e) { var metricsContainer = $(".article-metrics-popover-wrapper"); if (!metricsContainer.is(e.target) && metricsContainer.has(e.target).length === 0) { $(".article-metrics-close-button").click(); } }); var articleId = $('#articleId').val(); if($("#main-article-count-box").attachArticleMetrics) { $("#main-article-count-box").attachArticleMetrics(articleId, { articleMetricsView: true }); } }); var figshareWidget = $(".new_figshare_widget"); if (figshareWidget.length > 0) { window.figshare.load("f1000", function(Widget) { // Select a tag/tags defined in your page. In this tag we will place the widget. _.map(figshareWidget, function(el){ var widget = new Widget({ articleId: $(el).attr("figshare_articleId") //height:300 // this is the height of the viewer part. [Default: 550] }); widget.initialize(); // initialize the widget widget.mount(el); // mount it in a tag that's on your page // this will save the widget on the global scope for later use from // your JS scripts. This line is optional. //window.widget = widget; }); }); } close Error Close Add Reset F1000.MICROSERVICES.AFFILIATION = ''; $(document).ready(function () { $('.js-affiliations-form').each((index, form) => { new AffiliationForm({ formId: form.id, institutionErrorSelector: '.comment-enter-institution', departmentErrorSelector: '.comment-enter-department', placeSelector: '.js-add-comment-place', stateSelector: '.js-add-comment-state', zipCodeSelector: '.js-add-comment-zipcode', countrySelector: '.js-add-comment-country', countryErrorSelector: '.comment-enter-country', }); }); }); $(document).ready(function () { var reportIds = { "301836": 0, "301837": 0, "301838": 0, "301839": 14, "301833": 0, "301834": 0, "301835": 0, "301840": 0, "301841": 0, "301842": 0, "136222": 0, "136223": 38, "136221": 31, "136224": 0, "136225": 0, "320686": 0, "320687": 0, "320692": 0, "320693": 0, "320694": 0, "320695": 0, "320688": 0, "320689": 0, "320690": 0, "320691": 0, "296508": 0, "296509": 0, "296510": 0, "296511": 0, "296505": 0, "296506": 0, "296507": 0, "318149": 0, "296512": 0, "296513": 0, "296514": 0, "318152": 0, "318153": 0, "318154": 0, "318155": 0, "266846": 0, "266847": 0, "305644": 0, "305645": 0, "305646": 0, "305647": 0, "305641": 0, "305642": 0, "305643": 0, "305648": 0, "305649": 0, "305650": 0, }; $(".referee-response-container,.js-referee-report").each(function(index, el) { var reportId = $(el).attr("data-reportid"), reportCount = reportIds[reportId] || 0; $(el).find(".comments-count-container,.js-referee-report-views").html(reportCount); }); var uuidInput = $("#article_uuid"), oldUUId = uuidInput.val(), newUUId = "9b12e4a7-d12f-4cb8-ac67-6d7a5fc383ec"; uuidInput.val(newUUId); $("a[href*='article_uuid=']").each(function(index, el) { var newHref = $(el).attr("href").replace(oldUUId, newUUId); $(el).attr("href", newHref); }); }); An innovative open access publishing platform offering rapid publication and open peer review, whilst supporting data deposition and sharing. Browse Gateways Collections How it Works Contact For Developers Cookie Notice Privacy Notice RSS Submit Your Research Follow us © 2012-2026 F1000 Research Ltd. ISSN 2046-1402 | Legal | Partner of Research4Life • CrossRef • ORCID • FAIRSharing R.templateTests.simpleTemplate = R.template(' $text $text $text $text $text '); R.templateTests.runTests(); var F1000platform = new F1000.Platform({ name: "f1000research", displayName: "F1000Research", hostName: "f1000research.com", id: "1", editorialEmail: "
[email protected]", infoEmail: "
[email protected]", usePmcStats: true }); $(function(){R.ui.dropdowns('.dropdown-for-authors, .dropdown-for-about, .dropdown-for-myresearch');}); // $(function(){R.ui.dropdowns('.dropdown-for-referees');}); $(document).ready(function () { if ($(".cookie-warning").is(":visible")) { $(".sticky").css("margin-bottom", "35px"); $(".devices").addClass("devices-and-cookie-warning"); } $(".cookie-warning .close-button").click(function (e) { $(".devices").removeClass("devices-and-cookie-warning"); $(".sticky").css("margin-bottom", "0"); }); $("#tweeter-feed .tweet-message").each(function (i, message) { var self = $(message); self.html(linkify(self.html())); }); $(".partner").on("mouseenter mouseleave", function() { $(this).find(".gray-scale, .colour").toggleClass("is-hidden"); }); }); Sign In Remember me Forgotten your password? Sign In Cancel Email or password not correct. Please try again Please wait... $(function(){ // Note: All the setup needs to run against a name attribute and *not* the id due the clonish // nature of facebox... $("a[id=googleSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("GOOGLE"); $("form[id=oAuthForm]").submit(); }); $("a[id=facebookSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("FACEBOOK"); $("form[id=oAuthForm]").submit(); }); $("a[id=orcidSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("ORCID"); $("form[id=oAuthForm]").submit(); }); }); If you've forgotten your password, please enter your email address below and we'll send you instructions on how to reset your password. The email address should be the one you originally registered with F1000. Email address not valid, please try again You registered with F1000 via Google, so we cannot reset your password. To sign in, please click here . If you still need help with your Google account password, please click here . You registered with F1000 via Facebook, so we cannot reset your password. To sign in, please click here . If you still need help with your Facebook account password, please click here . Code not correct, please try again Reset password Cancel Email us for further assistance. Server error, please try again. If your email address is registered with us, we will email you instructions to reset your password. If you think you should have received this email but it has not arrived, please check your spam filters and/or contact for further assistance. Please wait... Register $(document).ready(function () { signIn.createSignInAsRow($("#sign-in-form-gfb-popup")); $(".target-field").each(function () { var uris = $(this).val().split("/"); if (uris.pop() === "login") { $(this).val(uris.toString().replace(",","/")); } }); });
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.