ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses

doi:10.12688/f1000research.74223.2

ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses

2025 · doi:10.12688/f1000research.74223.2

preprint OA: closed

Full text JSON View at publisher

Full text 377,680 characters · extracted from preprint-html · click to expand

ALL-IN meta-analysis: breathing life into living... | F1000Research "use strict";function _typeof(t){return(_typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}!function(){var t=function(){var t,e,o=[],n=window,r=n;for(;r;){try{if(r.frames.__tcfapiLocator){t=r;break}}catch(t){}if(r===n.top)break;r=r.parent}t||(!function t(){var e=n.document,o=!!n.frames.__tcfapiLocator;if(!o)if(e.body){var r=e.createElement("iframe");r.style.cssText="display:none",r.name="__tcfapiLocator",e.body.appendChild(r)}else setTimeout(t,5);return!o}(),n.__tcfapi=function(){for(var t=arguments.length,n=new Array(t),r=0;r 3&&2===parseInt(n[1],10)&&"boolean"==typeof n[3]&&(e=n[3],"function"==typeof n[2]&&n[2]("set",!0)):"ping"===n[0]?"function"==typeof n[2]&&n[2]({gdprApplies:e,cmpLoaded:!1,cmpStatus:"stub"}):o.push(n)},n.addEventListener("message",(function(t){var e="string"==typeof t.data,o={};if(e)try{o=JSON.parse(t.data)}catch(t){}else o=t.data;var n="object"===_typeof(o)&&null!==o?o.__tcfapiCall:null;n&&window.__tcfapi(n.command,n.version,(function(o,r){var a={__tcfapiReturn:{returnValue:o,success:r,callId:n.callId}};t&&t.source&&t.source.postMessage&&t.source.postMessage(e?JSON.stringify(a):a,"*")}),n.parameter)}),!1))};"undefined"!=typeof module?module.exports=t:t()}(); dataLayer = dataLayer || []; // Standard GTM initialization - Google Consent Mode handles consent automatically (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl+ '>m_auth=hzk0Vc3qFsQYhCrIoHz68A>m_preview=env-1>m_cookies_win=x';f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-MWFK8L5J'); ;window.NREUM||(NREUM={});NREUM.init={distributed_tracing:{enabled:true},privacy:{cookies_enabled:true},ajax:{deny_list:["bam.nr-data.net"]}}; ;NREUM.loader_config={accountID:"438030",trustKey:"438030",agentID:"772317073",licenseKey:"97f8f67f26",applicationID:"772317073"} ;NREUM.info={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net",licenseKey:"97f8f67f26",applicationID:"772317073",sa:1} ;/*! For license information please see nr-loader-spa-1.236.0.min.js.LICENSE.txt */ (()=>{"use strict";var e,t,r={5763:(e,t,r)=>{r.d(t,{P_:()=>l,Mt:()=>g,C5:()=>s,DL:()=>v,OP:()=>T,lF:()=>D,Yu:()=>y,Dg:()=>h,CX:()=>c,GE:()=>b,sU:()=>_});var n=r(8632),i=r(9567);const o={beacon:n.ce.beacon,errorBeacon:n.ce.errorBeacon,licenseKey:void 0,applicationID:void 0,sa:void 0,queueTime:void 0,applicationTime:void 0,ttGuid:void 0,user:void 0,account:void 0,product:void 0,extra:void 0,jsAttributes:{},userAttributes:void 0,atts:void 0,transactionName:void 0,tNamePlain:void 0},a={};function s(e){if(!e)throw new Error("All info objects require an agent identifier!");if(!a[e])throw new Error("Info for ".concat(e," was never set"));return a[e]}function c(e,t){if(!e)throw new Error("All info objects require an agent identifier!");a[e]=(0,i.D)(t,o),(0,n.Qy)(e,a[e],"info")}var u=r(7056);const d=()=>{const e={blockSelector:"[data-nr-block]",maskInputOptions:{password:!0}};return{allow_bfcache:!0,privacy:{cookies_enabled:!0},ajax:{deny_list:void 0,enabled:!0,harvestTimeSeconds:10},distributed_tracing:{enabled:void 0,exclude_newrelic_header:void 0,cors_use_newrelic_header:void 0,cors_use_tracecontext_headers:void 0,allowed_origins:void 0},session:{domain:void 0,expiresMs:u.oD,inactiveMs:u.Hb},ssl:void 0,obfuscate:void 0,jserrors:{enabled:!0,harvestTimeSeconds:10},metrics:{enabled:!0},page_action:{enabled:!0,harvestTimeSeconds:30},page_view_event:{enabled:!0},page_view_timing:{enabled:!0,harvestTimeSeconds:30,long_task:!1},session_trace:{enabled:!0,harvestTimeSeconds:10},harvest:{tooManyRequestsDelay:60},session_replay:{enabled:!1,harvestTimeSeconds:60,sampleRate:.1,errorSampleRate:.1,maskTextSelector:"*",maskAllInputs:!0,get blockClass(){return"nr-block"},get ignoreClass(){return"nr-ignore"},get maskTextClass(){return"nr-mask"},get blockSelector(){return e.blockSelector},set blockSelector(t){e.blockSelector+=",".concat(t)},get maskInputOptions(){return e.maskInputOptions},set maskInputOptions(t){e.maskInputOptions={...t,password:!0}}},spa:{enabled:!0,harvestTimeSeconds:10}}},f={};function l(e){if(!e)throw new Error("All configuration objects require an agent identifier!");if(!f[e])throw new Error("Configuration for ".concat(e," was never set"));return f[e]}function h(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");f[e]=(0,i.D)(t,d()),(0,n.Qy)(e,f[e],"config")}function g(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");var r=l(e);if(r){for(var n=t.split("."),i=0;i {r.d(t,{D:()=>i});var n=r(50);function i(e,t){try{if(!e||"object"!=typeof e)return(0,n.Z)("Setting a Configurable requires an object as input");if(!t||"object"!=typeof t)return(0,n.Z)("Setting a Configurable requires a model to set its initial properties");const r=Object.create(Object.getPrototypeOf(t),Object.getOwnPropertyDescriptors(t)),o=0===Object.keys(r).length?e:r;for(let a in o)if(void 0!==e[a])try{"object"==typeof e[a]&&"object"==typeof t[a]?r[a]=i(e[a],t[a]):r[a]=e[a]}catch(e){(0,n.Z)("An error occurred while setting a property of a Configurable",e)}return r}catch(e){(0,n.Z)("An error occured while setting a Configurable",e)}}},6818:(e,t,r)=>{r.d(t,{Re:()=>i,gF:()=>o,q4:()=>n});const n="1.236.0",i="PROD",o="CDN"},385:(e,t,r)=>{r.d(t,{FN:()=>a,IF:()=>u,Nk:()=>f,Tt:()=>s,_A:()=>o,il:()=>n,pL:()=>c,v6:()=>i,w1:()=>d});const n="undefined"!=typeof window&&!!window.document,i="undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self.navigator instanceof WorkerNavigator||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis.navigator instanceof WorkerNavigator),o=n?window:"undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis),a=""+o?.location,s=/iPad|iPhone|iPod/.test(navigator.userAgent),c=s&&"undefined"==typeof SharedWorker,u=(()=>{const e=navigator.userAgent.match(/Firefox[/\s](\d+\.\d+)/);return Array.isArray(e)&&e.length>=2?+e[1]:0})(),d=Boolean(n&&window.document.documentMode),f=!!navigator.sendBeacon},1117:(e,t,r)=>{r.d(t,{w:()=>o});var n=r(50);const i={agentIdentifier:"",ee:void 0};class o{constructor(e){try{if("object"!=typeof e)return(0,n.Z)("shared context requires an object as input");this.sharedContext={},Object.assign(this.sharedContext,i),Object.entries(e).forEach((e=>{let[t,r]=e;Object.keys(i).includes(t)&&(this.sharedContext[t]=r)}))}catch(e){(0,n.Z)("An error occured while setting SharedContext",e)}}}},8e3:(e,t,r)=>{r.d(t,{L:()=>d,R:()=>c});var n=r(2177),i=r(1284),o=r(4322),a=r(3325);const s={};function c(e,t){const r={staged:!1,priority:a.p[t]||0};u(e),s[e].get(t)||s[e].set(t,r)}function u(e){e&&(s[e]||(s[e]=new Map))}function d(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"",t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"feature";if(u(e),!e||!s[e].get(t))return a(t);s[e].get(t).staged=!0;const r=[...s[e]];function a(t){const r=e?n.ee.get(e):n.ee,a=o.X.handlers;if(r.backlog&&a){var s=r.backlog[t],c=a[t];if(c){for(var u=0;s&&u {let[t,r]=e;return r.staged}))&&(r.sort(((e,t)=>e[1].priority-t[1].priority)),r.forEach((e=>{let[t]=e;a(t)})))}function f(e,t){var r=e[1];(0,i.D)(t[r],(function(t,r){var n=e[0];if(r[0]===n){var i=r[1],o=e[3],a=e[2];i.apply(o,a)}}))}},2177:(e,t,r)=>{r.d(t,{c:()=>f,ee:()=>u});var n=r(8632),i=r(2210),o=r(1284),a=r(5763),s="nr@context";let c=(0,n.fP)();var u;function d(){}function f(e){return(0,i.X)(e,s,l)}function l(){return new d}function h(){u.aborted=!0,u.backlog={}}c.ee?u=c.ee:(u=function e(t,r){var n={},c={},f={},g=!1;try{g=16===r.length&&(0,a.OP)(r).isolatedBacklog}catch(e){}var p={on:b,addEventListener:b,removeEventListener:y,emit:v,get:x,listeners:w,context:m,buffer:A,abort:h,aborted:!1,isBuffering:E,debugId:r,backlog:g?{}:t&&"object"==typeof t.backlog?t.backlog:{}};return p;function m(e){return e&&e instanceof d?e:e?(0,i.X)(e,s,l):l()}function v(e,r,n,i,o){if(!1!==o&&(o=!0),!u.aborted||i){t&&o&&t.emit(e,r,n);for(var a=m(n),s=w(e),d=s.length,f=0;fn,p:()=>i});var n=r(2177).ee.get("handle");function i(e,t,r,i,o){o?(o.buffer([e],i),o.emit(e,t,r)):(n.buffer([e],i),n.emit(e,t,r))}},4322:(e,t,r)=>{r.d(t,{X:()=>o});var n=r(5546);o.on=a;var i=o.handlers={};function o(e,t,r,o){a(o||n.E,i,e,t,r)}function a(e,t,r,i,o){o||(o="feature"),e||(e=n.E);var a=t[o]=t[o]||{};(a[r]=a[r]||[]).push([e,i])}},3239:(e,t,r)=>{r.d(t,{bP:()=>s,iz:()=>c,m$:()=>a});var n=r(385);let i=!1,o=!1;try{const e={get passive(){return i=!0,!1},get signal(){return o=!0,!1}};n._A.addEventListener("test",null,e),n._A.removeEventListener("test",null,e)}catch(e){}function a(e,t){return i||o?{capture:!!e,passive:i,signal:t}:!!e}function s(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;window.addEventListener(e,t,a(r,n))}function c(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;document.addEventListener(e,t,a(r,n))}},4402:(e,t,r)=>{r.d(t,{Ht:()=>u,M:()=>c,Rl:()=>a,ky:()=>s});var n=r(385);const i="xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx";function o(e,t){return e?15&e[t]:16*Math.random()|0}function a(){const e=n._A?.crypto||n._A?.msCrypto;let t,r=0;return e&&e.getRandomValues&&(t=e.getRandomValues(new Uint8Array(31))),i.split("").map((e=>"x"===e?o(t,++r).toString(16):"y"===e?(3&o()|8).toString(16):e)).join("")}function s(e){const t=n._A?.crypto||n._A?.msCrypto;let r,i=0;t&&t.getRandomValues&&(r=t.getRandomValues(new Uint8Array(31)));const a=[];for(var s=0;s {r.d(t,{Bq:()=>n,Hb:()=>o,oD:()=>i});const n="NRBA",i=144e5,o=18e5},7894:(e,t,r)=>{function n(){return Math.round(performance.now())}r.d(t,{z:()=>n})},7243:(e,t,r)=>{r.d(t,{e:()=>o});var n=r(385),i={};function o(e){if(e in i)return i[e];if(0===(e||"").indexOf("data:"))return{protocol:"data"};let t;var r=n._A?.location,o={};if(n.il)t=document.createElement("a"),t.href=e;else try{t=new URL(e,r.href)}catch(e){return o}o.port=t.port;var a=t.href.split("://");!o.port&&a[1]&&(o.port=a[1].split("/")[0].split("@").pop().split(":")[1]),o.port&&"0"!==o.port||(o.port="https"===a[0]?"443":"80"),o.hostname=t.hostname||r.hostname,o.pathname=t.pathname,o.protocol=a[0],"/"!==o.pathname.charAt(0)&&(o.pathname="/"+o.pathname);var s=!t.protocol||":"===t.protocol||t.protocol===r.protocol,c=t.hostname===r.hostname&&t.port===r.port;return o.sameOrigin=s&&(!t.hostname||c),"/"===o.pathname&&(i[e]=o),o}},50:(e,t,r)=>{function n(e,t){"function"==typeof console.warn&&(console.warn("New Relic: ".concat(e)),t&&console.warn(t))}r.d(t,{Z:()=>n})},2587:(e,t,r)=>{r.d(t,{N:()=>c,T:()=>u});var n=r(2177),i=r(5546),o=r(8e3),a=r(3325);const s={stn:[a.D.sessionTrace],err:[a.D.jserrors,a.D.metrics],ins:[a.D.pageAction],spa:[a.D.spa],sr:[a.D.sessionReplay,a.D.sessionTrace]};function c(e,t){const r=n.ee.get(t);e&&"object"==typeof e&&(Object.entries(e).forEach((e=>{let[t,n]=e;void 0===u[t]&&(s[t]?s[t].forEach((e=>{n?(0,i.p)("feat-"+t,[],void 0,e,r):(0,i.p)("block-"+t,[],void 0,e,r),(0,i.p)("rumresp-"+t,[Boolean(n)],void 0,e,r)})):n&&(0,i.p)("feat-"+t,[],void 0,void 0,r),u[t]=Boolean(n))})),Object.keys(s).forEach((e=>{void 0===u[e]&&(s[e]?.forEach((t=>(0,i.p)("rumresp-"+e,[!1],void 0,t,r))),u[e]=!1)})),(0,o.L)(t,a.D.pageViewEvent))}const u={}},2210:(e,t,r)=>{r.d(t,{X:()=>i});var n=Object.prototype.hasOwnProperty;function i(e,t,r){if(n.call(e,t))return e[t];var i=r();if(Object.defineProperty&&Object.keys)try{return Object.defineProperty(e,t,{value:i,writable:!0,enumerable:!1}),i}catch(e){}return e[t]=i,i}},1284:(e,t,r)=>{r.d(t,{D:()=>n});const n=(e,t)=>Object.entries(e||{}).map((e=>{let[r,n]=e;return t(r,n)}))},4351:(e,t,r)=>{r.d(t,{P:()=>o});var n=r(2177);const i=()=>{const e=new WeakSet;return(t,r)=>{if("object"==typeof r&&null!==r){if(e.has(r))return;e.add(r)}return r}};function o(e){try{return JSON.stringify(e,i())}catch(e){try{n.ee.emit("internal-error",[e])}catch(e){}}}},3960:(e,t,r)=>{r.d(t,{K:()=>a,b:()=>o});var n=r(3239);function i(){return"undefined"==typeof document||"complete"===document.readyState}function o(e,t){if(i())return e();(0,n.bP)("load",e,t)}function a(e){if(i())return e();(0,n.iz)("DOMContentLoaded",e)}},8632:(e,t,r)=>{r.d(t,{EZ:()=>u,Qy:()=>c,ce:()=>o,fP:()=>a,gG:()=>d,mF:()=>s});var n=r(7894),i=r(385);const o={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net"};function a(){return i._A.NREUM||(i._A.NREUM={}),void 0===i._A.newrelic&&(i._A.newrelic=i._A.NREUM),i._A.NREUM}function s(){let e=a();return e.o||(e.o={ST:i._A.setTimeout,SI:i._A.setImmediate,CT:i._A.clearTimeout,XHR:i._A.XMLHttpRequest,REQ:i._A.Request,EV:i._A.Event,PR:i._A.Promise,MO:i._A.MutationObserver,FETCH:i._A.fetch}),e}function c(e,t,r){let i=a();const o=i.initializedAgents||{},s=o[e]||{};return Object.keys(s).length||(s.initializedAt={ms:(0,n.z)(),date:new Date}),i.initializedAgents={...o,[e]:{...s,[r]:t}},i}function u(e,t){a()[e]=t}function d(){return function(){let e=a();const t=e.info||{};e.info={beacon:o.beacon,errorBeacon:o.errorBeacon,...t}}(),function(){let e=a();const t=e.init||{};e.init={...t}}(),s(),function(){let e=a();const t=e.loader_config||{};e.loader_config={...t}}(),a()}},7956:(e,t,r)=>{r.d(t,{N:()=>i});var n=r(3239);function i(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],r=arguments.length>2?arguments[2]:void 0,i=arguments.length>3?arguments[3]:void 0;return void(0,n.iz)("visibilitychange",(function(){if(t)return void("hidden"==document.visibilityState&&e());e(document.visibilityState)}),r,i)}},1214:(e,t,r)=>{r.d(t,{em:()=>v,u5:()=>N,QU:()=>S,_L:()=>I,Gm:()=>L,Lg:()=>M,gy:()=>U,BV:()=>Q,Kf:()=>ee});var n=r(2177);const i="nr@original";var o=Object.prototype.hasOwnProperty,a=!1;function s(e,t){return e||(e=n.ee),r.inPlace=function(e,t,n,i,o){n||(n="");var a,s,c,u="-"===n.charAt(0);for(c=0;c 2?n-2:0),o=2;o {r(A[T],e,w),r(E[T],e,w)})),r(l._A,"fetch",y),t.on(y+"end",(function(e,r){var n=this;if(r){var i=r.headers.get("content-length");null!==i&&(n.rxSize=i),t.emit(y+"done",[null,r],n)}else t.emit(y+"done",[e],n)})),t}const O={},j=["pushState","replaceState"];function S(e){const t=function(e){return(e||n.ee).get("history")}(e);return!l.il||O[t.debugId]++||(O[t.debugId]=1,s(t).inPlace(window.history,j,"-")),t}var P=r(3239);const C={},R=["appendChild","insertBefore","replaceChild"];function I(e){const t=function(e){return(e||n.ee).get("jsonp")}(e);if(!l.il||C[t.debugId])return t;C[t.debugId]=!0;var r=s(t),i=/[?&](?:callback|cb)=([^&#]+)/,o=/(.*)\.([^.]+)/,a=/^(\w+)(\.|$)(.*)$/;function c(e,t){var r=e.match(a),n=r[1],i=r[3];return i?c(i,t[n]):t[n]}return r.inPlace(Node.prototype,R,"dom-"),t.on("dom-start",(function(e){!function(e){if(!e||"string"!=typeof e.nodeName||"script"!==e.nodeName.toLowerCase())return;if("function"!=typeof e.addEventListener)return;var n=(a=e.src,s=a.match(i),s?s[1]:null);var a,s;if(!n)return;var u=function(e){var t=e.match(o);if(t&&t.length>=3)return{key:t[2],parent:c(t[1],window)};return{key:e,parent:window}}(n);if("function"!=typeof u.parent[u.key])return;var d={};function f(){t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}function l(){t.emit("jsonp-error",[],d),t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}r.inPlace(u.parent,[u.key],"cb-",d),e.addEventListener("load",f,(0,P.m$)(!1)),e.addEventListener("error",l,(0,P.m$)(!1)),t.emit("new-jsonp",[e.src],d)}(e[0])})),t}var k=r(5763);const H={};function L(e){const t=function(e){return(e||n.ee).get("mutation")}(e);if(!l.il||H[t.debugId])return t;H[t.debugId]=!0;var r=s(t),i=k.Yu.MO;return i&&(window.MutationObserver=function(e){return this instanceof i?new i(r(e,"fn-")):i.apply(this,arguments)},MutationObserver.prototype=i.prototype),t}const z={};function M(e){const t=function(e){return(e||n.ee).get("promise")}(e);if(z[t.debugId])return t;z[t.debugId]=!0;var r=n.c,o=s(t),a=k.Yu.PR;return a&&function(){function e(r){var n=t.context(),i=o(r,"executor-",n,null,!1);const s=Reflect.construct(a,[i],e);return t.context(s).getCtx=function(){return n},s}l._A.Promise=e,Object.defineProperty(e,"name",{value:"Promise"}),e.toString=function(){return a.toString()},Object.setPrototypeOf(e,a),["all","race"].forEach((function(r){const n=a[r];e[r]=function(e){let i=!1;[...e||[]].forEach((e=>{this.resolve(e).then(a("all"===r),a(!1))}));const o=n.apply(this,arguments);return o;function a(e){return function(){t.emit("propagate",[null,!i],o,!1,!1),i=i||!e}}}})),["resolve","reject"].forEach((function(r){const n=a[r];e[r]=function(e){const r=n.apply(this,arguments);return e!==r&&t.emit("propagate",[e,!0],r,!1,!1),r}})),e.prototype=a.prototype;const n=a.prototype.then;a.prototype.then=function(){var e=this,i=r(e);i.promise=e;for(var a=arguments.length,s=new Array(a),c=0;c e())),t};function m(e,t){i.inPlace(t,["onreadystatechange"],"fn-",E)}function b(){var e=this,t=r.context(e);e.readyState>3&&!t.resolved&&(t.resolved=!0,r.emit("xhr-resolved",[],e)),i.inPlace(e,f,"fn-",E)}if(function(e,t){for(var r in e)t[r]=e[r]}(o,p),p.prototype=o.prototype,i.inPlace(p.prototype,J,"-xhr-",E),r.on("send-xhr-start",(function(e,t){m(e,t),function(e){h.push(e),a&&(y?y.then(A):u?u(A):(w=-w,x.data=w))}(t)})),r.on("open-xhr-start",m),a){var y=c&&c.resolve();if(!u&&!c){var w=1,x=document.createTextNode(w);new a(A).observe(x,{characterData:!0})}}else t.on("fn-end",(function(e){e[0]&&e[0].type===d||A()}));function A(){for(var e=0;e {r.d(t,{t:()=>n});const n=r(3325).D.ajax},6660:(e,t,r)=>{r.d(t,{A:()=>i,t:()=>n});const n=r(3325).D.jserrors,i="nr@seenError"},3081:(e,t,r)=>{r.d(t,{gF:()=>o,mY:()=>i,t9:()=>n,vz:()=>s,xS:()=>a});const n=r(3325).D.metrics,i="sm",o="cm",a="storeSupportabilityMetrics",s="storeEventMetrics"},4649:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageAction},7633:(e,t,r)=>{r.d(t,{Dz:()=>i,OJ:()=>a,qw:()=>o,t9:()=>n});const n=r(3325).D.pageViewEvent,i="firstbyte",o="domcontent",a="windowload"},9251:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageViewTiming},3614:(e,t,r)=>{r.d(t,{BST_RESOURCE:()=>i,END:()=>s,FEATURE_NAME:()=>n,FN_END:()=>u,FN_START:()=>c,PUSH_STATE:()=>d,RESOURCE:()=>o,START:()=>a});const n=r(3325).D.sessionTrace,i="bstResource",o="resource",a="-start",s="-end",c="fn"+a,u="fn"+s,d="pushState"},7836:(e,t,r)=>{r.d(t,{BODY:()=>A,CB_END:()=>E,CB_START:()=>u,END:()=>x,FEATURE_NAME:()=>i,FETCH:()=>_,FETCH_BODY:()=>v,FETCH_DONE:()=>m,FETCH_START:()=>p,FN_END:()=>c,FN_START:()=>s,INTERACTION:()=>l,INTERACTION_API:()=>d,INTERACTION_EVENTS:()=>o,JSONP_END:()=>b,JSONP_NODE:()=>g,JS_TIME:()=>T,MAX_TIMER_BUDGET:()=>a,REMAINING:()=>f,SPA_NODE:()=>h,START:()=>w,originalSetTimeout:()=>y});var n=r(5763);const i=r(3325).D.spa,o=["click","submit","keypress","keydown","keyup","change"],a=999,s="fn-start",c="fn-end",u="cb-start",d="api-ixn-",f="remaining",l="interaction",h="spaNode",g="jsonpNode",p="fetch-start",m="fetch-done",v="fetch-body-",b="jsonp-end",y=n.Yu.ST,w="-start",x="-end",A="-body",E="cb"+x,T="jsTime",_="fetch"},5938:(e,t,r)=>{r.d(t,{W:()=>o});var n=r(5763),i=r(2177);class o{constructor(e,t,r){this.agentIdentifier=e,this.aggregator=t,this.ee=i.ee.get(e,(0,n.OP)(this.agentIdentifier).isolatedBacklog),this.featureName=r,this.blocked=!1}}},9144:(e,t,r)=>{r.d(t,{j:()=>m});var n=r(3325),i=r(5763),o=r(5546),a=r(2177),s=r(7894),c=r(8e3),u=r(3960),d=r(385),f=r(50),l=r(3081),h=r(8632);function g(){const e=(0,h.gG)();["setErrorHandler","finished","addToTrace","inlineHit","addRelease","addPageAction","setCurrentRouteName","setPageViewName","setCustomAttribute","interaction","noticeError","setUserId"].forEach((t=>{e[t]=function(){for(var r=arguments.length,n=new Array(r),i=0;i 1?r-1:0),i=1;i {e.exposed&&e.api[t]&&o.push(e.api[t](...n))})),o.length>1?o:o[0]}(t,...n)}}))}var p=r(2587);function m(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},m=arguments.length>2?arguments[2]:void 0,v=arguments.length>3?arguments[3]:void 0,{init:b,info:y,loader_config:w,runtime:x={loaderType:m},exposed:A=!0}=t;const E=(0,h.gG)();y||(b=E.init,y=E.info,w=E.loader_config),(0,i.Dg)(e,b||{}),(0,i.GE)(e,w||{}),(0,i.sU)(e,x),y.jsAttributes??={},d.v6&&(y.jsAttributes.isWorker=!0),(0,i.CX)(e,y),g();const T=function(e,t){t||(0,c.R)(e,"api");const h={};var g=a.ee.get(e),p=g.get("tracer"),m="api-",v=m+"ixn-";function b(t,r,n,o){const a=(0,i.C5)(e);return null===r?delete a.jsAttributes[t]:(0,i.CX)(e,{...a,jsAttributes:{...a.jsAttributes,[t]:r}}),x(m,n,!0,o||null===r?"session":void 0)(t,r)}function y(){}["setErrorHandler","finished","addToTrace","inlineHit","addRelease"].forEach((e=>h[e]=x(m,e,!0,"api"))),h.addPageAction=x(m,"addPageAction",!0,n.D.pageAction),h.setCurrentRouteName=x(m,"routeName",!0,n.D.spa),h.setPageViewName=function(t,r){if("string"==typeof t)return"/"!==t.charAt(0)&&(t="/"+t),(0,i.OP)(e).customTransaction=(r||"http://custom.transaction")+t,x(m,"setPageViewName",!0)()},h.setCustomAttribute=function(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2];if("string"==typeof e){if(["string","number"].includes(typeof t)||null===t)return b(e,t,"setCustomAttribute",r);(0,f.Z)("Failed to execute setCustomAttribute.\nNon-null value must be a string or number type, but a type of was provided."))}else(0,f.Z)("Failed to execute setCustomAttribute.\nName must be a string type, but a type of was provided."))},h.setUserId=function(e){if("string"==typeof e||null===e)return b("enduser.id",e,"setUserId",!0);(0,f.Z)("Failed to execute setUserId.\nNon-null value must be a string type, but a type of was provided."))},h.interaction=function(){return(new y).get()};var w=y.prototype={createTracer:function(e,t){var r={},i=this,a="function"==typeof t;return(0,o.p)(v+"tracer",[(0,s.z)(),e,r],i,n.D.spa,g),function(){if(p.emit((a?"":"no-")+"fn-start",[(0,s.z)(),i,a],r),a)try{return t.apply(this,arguments)}catch(e){throw p.emit("fn-err",[arguments,this,"string"==typeof e?new Error(e):e],r),e}finally{p.emit("fn-end",[(0,s.z)()],r)}}}};function x(e,t,r,i){return function(){return(0,o.p)(l.xS,["API/"+t+"/called"],void 0,n.D.metrics,g),i&&(0,o.p)(e+t,[(0,s.z)(),...arguments],r?null:this,i,g),r?void 0:this}}function A(){r.e(439).then(r.bind(r,7438)).then((t=>{let{setAPI:r}=t;r(e),(0,c.L)(e,"api")})).catch((()=>(0,f.Z)("Downloading runtime APIs failed...")))}return["actionText","setName","setAttribute","save","ignore","onEnd","getContext","end","get"].forEach((e=>{w[e]=x(v,e,void 0,n.D.spa)})),h.noticeError=function(e,t){"string"==typeof e&&(e=new Error(e)),(0,o.p)(l.xS,["API/noticeError/called"],void 0,n.D.metrics,g),(0,o.p)("err",[e,(0,s.z)(),!1,t],void 0,n.D.jserrors,g)},d.il?(0,u.b)((()=>A()),!0):A(),h}(e,v);return(0,h.Qy)(e,T,"api"),(0,h.Qy)(e,A,"exposed"),(0,h.EZ)("activatedFeatures",p.T),T}},3325:(e,t,r)=>{r.d(t,{D:()=>n,p:()=>i});const n={ajax:"ajax",jserrors:"jserrors",metrics:"metrics",pageAction:"page_action",pageViewEvent:"page_view_event",pageViewTiming:"page_view_timing",sessionReplay:"session_replay",sessionTrace:"session_trace",spa:"spa"},i={[n.pageViewEvent]:1,[n.pageViewTiming]:2,[n.metrics]:3,[n.jserrors]:4,[n.ajax]:5,[n.sessionTrace]:6,[n.pageAction]:7,[n.spa]:8,[n.sessionReplay]:9}}},n={};function i(e){var t=n[e];if(void 0!==t)return t.exports;var o=n[e]={exports:{}};return r[e](o,o.exports,i),o.exports}i.m=r,i.d=(e,t)=>{for(var r in t)i.o(t,r)&&!i.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},i.f={},i.e=e=>Promise.all(Object.keys(i.f).reduce(((t,r)=>(i.f[r](e,t),t)),[])),i.u=e=>(({78:"page_action-aggregate",147:"metrics-aggregate",242:"session-manager",317:"jserrors-aggregate",348:"page_view_timing-aggregate",412:"lazy-feature-loader",439:"async-api",538:"recorder",590:"session_replay-aggregate",675:"compressor",733:"session_trace-aggregate",786:"page_view_event-aggregate",873:"spa-aggregate",898:"ajax-aggregate"}[e]||e)+"."+{78:"ac76d497",147:"3dc53903",148:"1a20d5fe",242:"2a64278a",317:"49e41428",348:"bd6de33a",412:"2f55ce66",439:"30bd804e",538:"1b18459f",590:"cf0efb30",675:"ae9f91a8",733:"83105561",786:"06482edd",860:"03a8b7a5",873:"e6b09d52",898:"998ef92b"}[e]+"-1.236.0.min.js"),i.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),e={},t="NRBA:",i.l=(r,n,o,a)=>{if(e[r])e[r].push(n);else{var s,c;if(void 0!==o)for(var u=document.getElementsByTagName("script"),d=0;d {s.onerror=s.onload=null,clearTimeout(h);var i=e[r];if(delete e[r],s.parentNode&&s.parentNode.removeChild(s),i&&i.forEach((e=>e(n))),t)return t(n)},h=setTimeout(l.bind(null,void 0,{type:"timeout",target:s}),12e4);s.onerror=l.bind(null,s.onerror),s.onload=l.bind(null,s.onload),c&&document.head.appendChild(s)}},i.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.j=364,i.p="https://js-agent.newrelic.com/",(()=>{var e={364:0,953:0};i.f.j=(t,r)=>{var n=i.o(e,t)?e[t]:void 0;if(0!==n)if(n)r.push(n[2]);else{var o=new Promise(((r,i)=>n=e[t]=[r,i]));r.push(n[2]=o);var a=i.p+i.u(t),s=new Error;i.l(a,(r=>{if(i.o(e,t)&&(0!==(n=e[t])&&(e[t]=void 0),n)){var o=r&&("load"===r.type?"missing":r.type),a=r&&r.target&&r.target.src;s.message="Loading chunk "+t+" failed.\n("+o+": "+a+")",s.name="ChunkLoadError",s.type=o,s.request=a,n[1](s)}}),"chunk-"+t,t)}};var t=(t,r)=>{var n,o,[a,s,c]=r,u=0;if(a.some((t=>0!==e[t]))){for(n in s)i.o(s,n)&&(i.m[n]=s[n]);if(c)c(i)}for(t&&t(r);u {i.r(o);var e=i(3325),t=i(5763);const r=Object.values(e.D);function n(e){const n={};return r.forEach((r=>{n[r]=function(e,r){return!1!==(0,t.Mt)(r,"".concat(e,".enabled"))}(r,e)})),n}var a=i(9144);var s=i(5546),c=i(385),u=i(8e3),d=i(5938),f=i(3960),l=i(50);class h extends d.W{constructor(e,t,r){let n=!(arguments.length>3&&void 0!==arguments[3])||arguments[3];super(e,t,r),this.auto=n,this.abortHandler,this.featAggregate,this.onAggregateImported,n&&(0,u.R)(e,r)}importAggregator(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};if(this.featAggregate||!this.auto)return;const r=c.il&&!0===(0,t.Mt)(this.agentIdentifier,"privacy.cookies_enabled");let n;this.onAggregateImported=new Promise((e=>{n=e}));const o=async()=>{let t;try{if(r){const{setupAgentSession:e}=await Promise.all([i.e(860),i.e(242)]).then(i.bind(i,3228));t=e(this.agentIdentifier)}}catch(e){(0,l.Z)("A problem occurred when starting up session manager. This page will not start or extend any session.",e)}try{if(!this.shouldImportAgg(this.featureName,t))return void(0,u.L)(this.agentIdentifier,this.featureName);const{lazyFeatureLoader:r}=await i.e(412).then(i.bind(i,8582)),{Aggregate:o}=await r(this.featureName,"aggregate");this.featAggregate=new o(this.agentIdentifier,this.aggregator,e),n(!0)}catch(e){(0,l.Z)("Downloading and initializing ".concat(this.featureName," failed..."),e),this.abortHandler?.(),n(!1)}};c.il?(0,f.b)((()=>o()),!0):o()}shouldImportAgg(r,n){return r!==e.D.sessionReplay||!1!==(0,t.Mt)(this.agentIdentifier,"session_trace.enabled")&&(!!n?.isNew||!!n?.state.sessionReplay)}}var g=i(7633),p=i(7894);class m extends h{static featureName=g.t9;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];if(super(r,n,g.t9,i),("undefined"==typeof PerformanceNavigationTiming||c.Tt)&&"undefined"!=typeof PerformanceTiming){const n=(0,t.OP)(r);n[g.Dz]=Math.max(Date.now()-n.offset,0),(0,f.K)((()=>n[g.qw]=Math.max((0,p.z)()-n[g.Dz],0))),(0,f.b)((()=>{const t=(0,p.z)();n[g.OJ]=Math.max(t-n[g.Dz],0),(0,s.p)("timing",["load",t],void 0,e.D.pageViewTiming,this.ee)}))}this.importAggregator()}}var v=i(1117),b=i(1284);class y extends v.w{constructor(e){super(e),this.aggregatedData={}}store(e,t,r,n,i){var o=this.getBucket(e,t,r,i);return o.metrics=function(e,t){t||(t={count:0});return t.count+=1,(0,b.D)(e,(function(e,r){t[e]=w(r,t[e])})),t}(n,o.metrics),o}merge(e,t,r,n,i){var o=this.getBucket(e,t,n,i);if(o.metrics){var a=o.metrics;a.count+=r.count,(0,b.D)(r,(function(e,t){if("count"!==e){var n=a[e],i=r[e];i&&!i.c?a[e]=w(i.t,n):a[e]=function(e,t){if(!t)return e;t.c||(t=x(t.t));return t.min=Math.min(e.min,t.min),t.max=Math.max(e.max,t.max),t.t+=e.t,t.sos+=e.sos,t.c+=e.c,t}(i,a[e])}}))}else o.metrics=r}storeMetric(e,t,r,n){var i=this.getBucket(e,t,r);return i.stats=w(n,i.stats),i}getBucket(e,t,r,n){this.aggregatedData[e]||(this.aggregatedData[e]={});var i=this.aggregatedData[e][t];return i||(i=this.aggregatedData[e][t]={params:r||{}},n&&(i.custom=n)),i}get(e,t){return t?this.aggregatedData[e]&&this.aggregatedData[e][t]:this.aggregatedData[e]}take(e){for(var t={},r="",n=!1,i=0;i t.max&&(t.max=e),e 2&&void 0!==arguments[2])||arguments[2];super(e,r,j.t,n),c.il&&((0,t.OP)(e).initHidden=Boolean("hidden"===document.visibilityState),(0,N.N)((()=>(0,s.p)("docHidden",[(0,p.z)()],void 0,j.t,this.ee)),!0),(0,O.bP)("pagehide",(()=>(0,s.p)("winPagehide",[(0,p.z)()],void 0,j.t,this.ee))),this.importAggregator())}}var P=i(3081);class C extends h{static featureName=P.t9;constructor(e,t){let r=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(e,t,P.t9,r),this.importAggregator()}}var R,I=i(2210),k=i(1214),H=i(2177),L={};try{R=localStorage.getItem("__nr_flags").split(","),console&&"function"==typeof console.log&&(L.console=!0,-1!==R.indexOf("dev")&&(L.dev=!0),-1!==R.indexOf("nr_dev")&&(L.nrDev=!0))}catch(e){}function z(e){try{L.console&&z(e)}catch(e){}}L.nrDev&&H.ee.on("internal-error",(function(e){z(e.stack)})),L.dev&&H.ee.on("fn-err",(function(e,t,r){z(r.stack)})),L.dev&&(z("NR AGENT IN DEVELOPMENT MODE"),z("flags: "+(0,b.D)(L,(function(e,t){return e})).join(", ")));var M=i(6660);class B extends h{static featureName=M.t;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(r,n,M.t,i),this.skipNext=0;try{this.removeOnAbort=new AbortController}catch(e){}const o=this;o.ee.on("fn-start",(function(e,t,r){o.abortHandler&&(o.skipNext+=1)})),o.ee.on("fn-err",(function(t,r,n){o.abortHandler&&!n[M.A]&&((0,I.X)(n,M.A,(function(){return!0})),this.thrown=!0,(0,s.p)("err",[n,(0,p.z)()],void 0,e.D.jserrors,o.ee))})),o.ee.on("fn-end",(function(){o.abortHandler&&!this.thrown&&o.skipNext>0&&(o.skipNext-=1)})),o.ee.on("internal-error",(function(t){(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,o.ee)})),this.origOnerror=c._A.onerror,c._A.onerror=this.onerrorHandler.bind(this),c._A.addEventListener("unhandledrejection",(t=>{const r=function(e){let t="Unhandled Promise Rejection: ";if(e instanceof Error)try{return e.message=t+e.message,e}catch(t){return e}if(void 0===e)return new Error(t);try{return new Error(t+(0,D.P)(e))}catch(e){return new Error(t)}}(t.reason);(0,s.p)("err",[r,(0,p.z)(),!1,{unhandledPromiseRejection:1}],void 0,e.D.jserrors,this.ee)}),(0,O.m$)(!1,this.removeOnAbort?.signal)),(0,k.gy)(this.ee),(0,k.BV)(this.ee),(0,k.em)(this.ee),(0,t.OP)(r).xhrWrappable&&(0,k.Kf)(this.ee),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}onerrorHandler(t,r,n,i,o){"function"==typeof this.origOnerror&&this.origOnerror(...arguments);try{this.skipNext?this.skipNext-=1:(0,s.p)("err",[o||new F(t,r,n),(0,p.z)()],void 0,e.D.jserrors,this.ee)}catch(t){try{(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,this.ee)}catch(e){}}return!1}}function F(e,t,r){this.message=e||"Uncaught error with no additional information",this.sourceURL=t,this.line=r}let U=1;const q="nr@id";function G(e){const t=typeof e;return!e||"object"!==t&&"function"!==t?-1:e===c._A?0:(0,I.X)(e,q,(function(){return U++}))}function V(e){if("string"==typeof e&&e.length)return e.length;if("object"==typeof e){if("undefined"!=typeof ArrayBuffer&&e instanceof ArrayBuffer&&e.byteLength)return e.byteLength;if("undefined"!=typeof Blob&&e instanceof Blob&&e.size)return e.size;if(!("undefined"!=typeof FormData&&e instanceof FormData))try{return(0,D.P)(e).length}catch(e){return}}}var X=i(7243);class W{constructor(e){this.agentIdentifier=e,this.generateTracePayload=this.generateTracePayload.bind(this),this.shouldGenerateTrace=this.shouldGenerateTrace.bind(this)}generateTracePayload(e){if(!this.shouldGenerateTrace(e))return null;var r=(0,t.DL)(this.agentIdentifier);if(!r)return null;var n=(r.accountID||"").toString()||null,i=(r.agentID||"").toString()||null,o=(r.trustKey||"").toString()||null;if(!n||!i)return null;var a=(0,_.M)(),s=(0,_.Ht)(),c=Date.now(),u={spanId:a,traceId:s,timestamp:c};return(e.sameOrigin||this.isAllowedOrigin(e)&&this.useTraceContextHeadersForCors())&&(u.traceContextParentHeader=this.generateTraceContextParentHeader(a,s),u.traceContextStateHeader=this.generateTraceContextStateHeader(a,c,n,i,o)),(e.sameOrigin&&!this.excludeNewrelicHeader()||!e.sameOrigin&&this.isAllowedOrigin(e)&&this.useNewrelicHeaderForCors())&&(u.newrelicHeader=this.generateTraceHeader(a,s,c,n,i,o)),u}generateTraceContextParentHeader(e,t){return"00-"+t+"-"+e+"-01"}generateTraceContextStateHeader(e,t,r,n,i){return i+"@nr=0-1-"+r+"-"+n+"-"+e+"----"+t}generateTraceHeader(e,t,r,n,i,o){if(!("function"==typeof c._A?.btoa))return null;var a={v:[0,1],d:{ty:"Browser",ac:n,ap:i,id:e,tr:t,ti:r}};return o&&n!==o&&(a.d.tk=o),btoa((0,D.P)(a))}shouldGenerateTrace(e){return this.isDtEnabled()&&this.isAllowedOrigin(e)}isAllowedOrigin(e){var r=!1,n={};if((0,t.Mt)(this.agentIdentifier,"distributed_tracing")&&(n=(0,t.P_)(this.agentIdentifier).distributed_tracing),e.sameOrigin)r=!0;else if(n.allowed_origins instanceof Array)for(var i=0;i 2&&void 0!==arguments[2])||arguments[2];super(r,n,Z.t,i),(0,t.OP)(r).xhrWrappable&&(this.dt=new W(r),this.handler=(e,t,r,n)=>(0,s.p)(e,t,r,n,this.ee),(0,k.u5)(this.ee),(0,k.Kf)(this.ee),function(r,n,i,o){function a(e){var t=this;t.totalCbs=0,t.called=0,t.cbTime=0,t.end=E,t.ended=!1,t.xhrGuids={},t.lastSize=null,t.loadCaptureCalled=!1,t.params=this.params||{},t.metrics=this.metrics||{},e.addEventListener("load",(function(r){_(t,e)}),(0,O.m$)(!1)),c.IF||e.addEventListener("progress",(function(e){t.lastSize=e.loaded}),(0,O.m$)(!1))}function s(e){this.params={method:e[0]},T(this,e[1]),this.metrics={}}function u(e,n){var i=(0,t.DL)(r);i.xpid&&this.sameOrigin&&n.setRequestHeader("X-NewRelic-ID",i.xpid);var a=o.generateTracePayload(this.parsedOrigin);if(a){var s=!1;a.newrelicHeader&&(n.setRequestHeader("newrelic",a.newrelicHeader),s=!0),a.traceContextParentHeader&&(n.setRequestHeader("traceparent",a.traceContextParentHeader),a.traceContextStateHeader&&n.setRequestHeader("tracestate",a.traceContextStateHeader),s=!0),s&&(this.dt=a)}}function d(e,t){var r=this.metrics,i=e[0],o=this;if(r&&i){var a=V(i);a&&(r.txSize=a)}this.startTime=(0,p.z)(),this.listener=function(e){try{"abort"!==e.type||o.loadCaptureCalled||(o.params.aborted=!0),("load"!==e.type||o.called===o.totalCbs&&(o.onloadCalled||"function"!=typeof t.onload)&&"function"==typeof o.end)&&o.end(t)}catch(e){try{n.emit("internal-error",[e])}catch(e){}}};for(var s=0;s 1?e[1]=i:e.push(i)}else e[0]&&e[0].headers&&s(e[0].headers,n)&&(this.dt=n);function s(e,t){var r=!1;return t.newrelicHeader&&(e.set("newrelic",t.newrelicHeader),r=!0),t.traceContextParentHeader&&(e.set("traceparent",t.traceContextParentHeader),t.traceContextStateHeader&&e.set("tracestate",t.traceContextStateHeader),r=!0),r}}function x(e,t){this.params={},this.metrics={},this.startTime=(0,p.z)(),this.dt=t,e.length>=1&&(this.target=e[0]),e.length>=2&&(this.opts=e[1]);var r,n=this.opts||{},i=this.target;"string"==typeof i?r=i:"object"==typeof i&&i instanceof Y?r=i.url:c._A?.URL&&"object"==typeof i&&i instanceof URL&&(r=i.href),T(this,r);var o=(""+(i&&i instanceof Y&&i.method||n.method||"GET")).toUpperCase();this.params.method=o,this.txSize=V(n.body)||0}function A(t,r){var n;this.endTime=(0,p.z)(),this.params||(this.params={}),this.params.status=r?r.status:0,"string"==typeof this.rxSize&&this.rxSize.length>0&&(n=+this.rxSize);var o={txSize:this.txSize,rxSize:n,duration:(0,p.z)()-this.startTime};i("xhr",[this.params,o,this.startTime,this.endTime,"fetch"],this,e.D.ajax)}function E(t){var r=this.params,n=this.metrics;if(!this.ended){this.ended=!0;for(var o=0;o 2&&void 0!==arguments[2])||arguments[2];super(e,t,we.t,r),this.importAggregator()}}new class{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(0,_.ky)(16);c._A?(this.agentIdentifier=t,this.sharedAggregator=new y({agentIdentifier:this.agentIdentifier}),this.features={},this.desiredFeatures=new Set(e.features||[]),this.desiredFeatures.add(m),Object.assign(this,(0,a.j)(this.agentIdentifier,e,e.loaderType||"agent")),this.start()):(0,l.Z)("Failed to initial the agent. Could not determine the runtime environment.")}get config(){return{info:(0,t.C5)(this.agentIdentifier),init:(0,t.P_)(this.agentIdentifier),loader_config:(0,t.DL)(this.agentIdentifier),runtime:(0,t.OP)(this.agentIdentifier)}}start(){const t="features";try{const r=n(this.agentIdentifier),i=[...this.desiredFeatures];i.sort(((t,r)=>e.p[t.featureName]-e.p[r.featureName])),i.forEach((t=>{if(r[t.featureName]||t.featureName===e.D.pageViewEvent){const n=function(t){switch(t){case e.D.ajax:return[e.D.jserrors];case e.D.sessionTrace:return[e.D.ajax,e.D.pageViewEvent];case e.D.sessionReplay:return[e.D.sessionTrace];case e.D.pageViewTiming:return[e.D.pageViewEvent];default:return[]}}(t.featureName);n.every((e=>r[e]))||(0,l.Z)("".concat(t.featureName," is enabled but one or more dependent features has been disabled (").concat((0,D.P)(n),"). This may cause unintended consequences or missing data...")),this.features[t.featureName]=new t(this.agentIdentifier,this.sharedAggregator)}})),(0,T.Qy)(this.agentIdentifier,this.features,t)}catch(e){(0,l.Z)("Failed to initialize all enabled instrument classes (agent aborted) -",e);for(const e in this.features)this.features[e].abortHandler?.();const r=(0,T.fP)();return delete r.initializedAgents[this.agentIdentifier]?.api,delete r.initializedAgents[this.agentIdentifier]?.[t],delete this.sharedAggregator,r.ee?.abort(),delete r.ee?.get(this.agentIdentifier),!1}}}({features:[J,m,S,class extends h{static featureName=oe;constructor(t,r){if(super(t,r,oe,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;const n=this.ee;let i;(0,k.QU)(n),this.eventsEE=(0,k.em)(n),this.eventsEE.on(se,(function(e,t){this.bstStart=(0,p.z)()})),this.eventsEE.on(ae,(function(t,r){(0,s.p)("bst",[t[0],r,this.bstStart,(0,p.z)()],void 0,e.D.sessionTrace,n)})),n.on(ce+ne,(function(e){this.time=(0,p.z)(),this.startPath=location.pathname+location.hash})),n.on(ce+ie,(function(t){(0,s.p)("bstHist",[location.pathname+location.hash,this.startPath,this.time],void 0,e.D.sessionTrace,n)}));try{i=new PerformanceObserver((t=>{const r=t.getEntries();(0,s.p)(te,[r],void 0,e.D.sessionTrace,n)})),i.observe({type:re,buffered:!0})}catch(e){}this.importAggregator({resourceObserver:i})}},C,xe,B,class extends h{static featureName=de;constructor(e,r){if(super(e,r,de,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;if(!(0,t.OP)(e).xhrWrappable)return;try{this.removeOnAbort=new AbortController}catch(e){}let n,i=0;const o=this.ee.get("tracer"),a=(0,k._L)(this.ee),s=(0,k.Lg)(this.ee),u=(0,k.BV)(this.ee),d=(0,k.Kf)(this.ee),f=this.ee.get("events"),l=(0,k.u5)(this.ee),h=(0,k.QU)(this.ee),g=(0,k.Gm)(this.ee);function m(e,t){h.emit("newURL",[""+window.location,t])}function v(){i++,n=window.location.hash,this[ve]=(0,p.z)()}function b(){i--,window.location.hash!==n&&m(0,!0);var e=(0,p.z)();this[pe]=~~this[pe]+e-this[ve],this[ye]=e}function y(e,t){e.on(t,(function(){this[t]=(0,p.z)()}))}this.ee.on(ve,v),s.on(be,v),a.on(be,v),this.ee.on(ye,b),s.on(ge,b),a.on(ge,b),this.ee.buffer([ve,ye,"xhr-resolved"],this.featureName),f.buffer([ve],this.featureName),u.buffer(["setTimeout"+le,"clearTimeout"+fe,ve],this.featureName),d.buffer([ve,"new-xhr","send-xhr"+fe],this.featureName),l.buffer([me+fe,me+"-done",me+he+fe,me+he+le],this.featureName),h.buffer(["newURL"],this.featureName),g.buffer([ve],this.featureName),s.buffer(["propagate",be,ge,"executor-err","resolve"+fe],this.featureName),o.buffer([ve,"no-"+ve],this.featureName),a.buffer(["new-jsonp","cb-start","jsonp-error","jsonp-end"],this.featureName),y(l,me+fe),y(l,me+"-done"),y(a,"new-jsonp"),y(a,"jsonp-end"),y(a,"cb-start"),h.on("pushState-end",m),h.on("replaceState-end",m),window.addEventListener("hashchange",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("load",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("popstate",(function(){m(0,i>1)}),(0,O.m$)(!0,this.removeOnAbort?.signal)),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}}],loaderType:"spa"})})(),window.NRBA=o})(); window.jQuery || document.write(' ') CKEDITOR_BASEPATH='https://f1000research.com/js/vendor/ckeditor/' window.reactTheme = 'research'; window.MathJax = { CommonHTML: { linebreaks: { automatic: true } }, 'HTML-CSS': { linebreaks: { automatic: true } }, SVG: { linebreaks: { automatic: true } }, AuthorInit: function() { MathJax.Hub.Register.MessageHook('End Process', function () { let timeout = false; // holder for timeout id const delay = 250; // delay after event is "complete" to run callback const reflowMath = function() { const dispFormulas = document.querySelectorAll('.disp-formula.panel'); if (!dispFormulas) { return; } for (const dispFormula of dispFormulas) { const child = dispFormula.querySelector('.MathJax_Preview').nextSibling.firstChild; const isMultiline = MathJax.Hub.getAllJax(dispFormula)[0].root.isMultiline; if (dispFormula.offsetWidth < child.offsetWidth || isMultiline) { MathJax.Hub.Queue(['Rerender', MathJax.Hub, dispFormula]); } } }; window.addEventListener('resize', function() { clearTimeout(timeout); // clear the timeout timeout = setTimeout(reflowMath, delay); // start timing for event "completion" }); }); }, }; if (window.location.hash == '#_=_'){ window.location = window.location.href.split('#')[0] } !function(f,b,e,v,n,t,s){if(f.fbq)return;n=f.fbq=function() {n.callMethod? n.callMethod.apply(n,arguments):n.queue.push(arguments)} ;if(!f._fbq)f._fbq=n; n.push=n;n.loaded=!0;n.version='2.0';n.queue=[];t=b.createElement(e);t.async=!0; t.src=v;s=b.getElementsByTagName(e)[0];s.parentNode.insertBefore(t,s)}(window, document,'script','https://connect.facebook.net/en_US/fbevents.js'); fbq('init', '1641728616063202'); fbq('track', "PixelInitialized", {}); (function(h,o,t,j,a,r){ h.hj=h.hj||function(){(h.hj.q=h.hj.q||[]).push(arguments)}; h._hjSettings={hjid:2318163,hjsv:6}; a=o.getElementsByTagName('head')[0]; r=o.createElement('script');r.async=1; r.src=t+h._hjSettings.hjid+j+h._hjSettings.hjsv; a.appendChild(r); })(window,document,'https://static.hotjar.com/c/hotjar-','.js?sv='); search file_upload Submit your research search menu close search Browse Gateways & Collections How to Publish Submit your Research My Submissions Article Guidelines Article Guidelines (New Versions) Open Data, Software and Code Guidelines Open Data and Accessible Source Materials Guidelines (HSS) Open Data, Software and Code Guidelines (PSE) Prepublication Checks Production Process Posters and Slides Guidelines Document Guidelines Article Processing Charges Peer Review Finding Article Reviewers About How it Works For Reviewers Our Advisors Policies Glossary FAQs For Developers Newsroom Contact My Research Submissions Content and Tracking Alerts My Details Sign In file_upload Submit your research { "@context": "https://schema.org", "@type": "ScholarlyArticle", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://f1000research.com/articles/11-549" }, "headline": "ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses", "datePublished": "2022-05-19T16:13:09", "dateModified": "2025-06-19T15:00:50", "author": [ { "@type": "Person", "name": "Judith ter Schure" }, { "@type": "Person", "name": "Peter Grünwald" } ], "publisher": { "@type": "Organization", "name": "F1000Research", "logo": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 480, "width": 60 } }, "image": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 1200, "width": 150 }, "description": "Science is justly admired as a cumulative process (“standing on the shoulders of giants”), yet scientific knowledge is typically built on a patchwork of research contributions without much coordination. This lack of efficiency has specifically been addressed in clinical research by recommendations against avoidable research waste and for living systematic reviews and prospective meta-analysis. We propose to further those recommendations with ALL-IN meta-analysis: Anytime Live and Leading INterim meta-analysis. ALL-IN provides meta-analysis based on e-values and anytime-valid confidence intervals that can be updated at any time—reanalyzing after each new observation while retaining type-I error and coverage guarantees, live—no need to prespecify the looks, and leading—in the decisions on whether individual studies should be initiated, stopped or expanded, the meta-analysis can be the leading source of information without losing validity to accumulation bias. The analysis design requires no information about the trial sample sizes or the number of trials eventually included. So ALL-IN meta-analysis can be applied retrospectively as well as prospectively, to evaluate the evidence once or sequentially. Because the intention of the analysis does not change the validity of the results, the results of the analysis can change the intentions (‘optional stopping’ and ‘optional continuation’ based on the results so far). On the one hand: any analysis can be turned into a living one, or even become prospective and real-time by updating with new trial data and including interim data from trials that are still ongoing—without any changes in the cut-offs for testing or the method for interval estimation. On the other hand: no stopping rule needs to be enforced for the analysis to remain valid, so a prospective meta-analysis can be a bottom-up collaboration that requires no outside control over participating clinical trials. Hence ALL-IN meta-analysis breathes life into living systematic reviews and prospective meta-analyses, and offers better and simpler statistics, efficiency, collaboration and communication." } { "@context": "http://schema.org", "@type": "BreadcrumbList", "itemListElement": [ { "@type": "ListItem", "position": "1", "item": { "@id": "https://f1000research.com/", "name": "Home" } }, { "@type": "ListItem", "position": "2", "item": { "@id": "https://f1000research.com/browse/articles", "name": "Browse" } }, { "@type": "ListItem", "position": "3", "item": { "@id": "https://f1000research.com/articles/11-549", "name": "ALL-IN meta-analysis: breathing life into living systematic reviews..." } } ] } Home Browse ALL-IN meta-analysis: breathing life into living systematic reviews... ALL Metrics - Views Downloads Get PDF Get XML Cite How to cite this article ter Schure J and Grünwald P. ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.12688/f1000research.74223.2 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. Close Copy Citation Details Export Export Citation Sciwheel EndNote Ref. Manager Bibtex ProCite Sente EXPORT Select a format first Track Share ▬ ✚ Method Article Revised ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] Previously titled: ALL-IN meta-analysis: breathing life into living systematic reviews Judith ter Schure https://orcid.org/0000-0002-2147-5510 1 , Peter Grünwald 2,3 Judith ter Schure https://orcid.org/0000-0002-2147-5510 1 , Peter Grünwald 2,3 PUBLISHED 19 Jun 2025 Author details Author details 1 Epidemiology & Data Science, Amsterdam UMC, Amsterdam, North Holland, The Netherlands 2 Machine Learning, CWI, Amsterdam, The Netherlands 3 Mathematics, Leiden University, Leiden, The Netherlands Judith ter Schure Roles: Conceptualization, Data Curation, Formal Analysis, Methodology, Software, Visualization, Writing – Original Draft Preparation, Writing – Review & Editing Peter Grünwald Roles: Funding Acquisition, Investigation, Methodology, Supervision, Writing – Review & Editing OPEN PEER REVIEW DETAILS REVIEWER STATUS This article is included in the Research on Research, Policy & Culture gateway. This article is included in the Living Evidence collection. Abstract Science is justly admired as a cumulative process (“standing on the shoulders of giants”), yet scientific knowledge is typically built on a patchwork of research contributions without much coordination. This lack of efficiency has specifically been addressed in clinical research by recommendations against avoidable research waste and for living systematic reviews and prospective meta-analysis. We propose to further those recommendations with ALL-IN meta-analysis: Anytime Live and Leading INterim meta-analysis. ALL-IN provides meta-analysis based on e -values and anytime-valid confidence intervals that can be updated at any time—reanalyzing after each new observation while retaining type-I error and coverage guarantees, live—no need to prespecify the looks, and leading—in the decisions on whether individual studies should be initiated, stopped or expanded, the meta-analysis can be the leading source of information without losing validity to accumulation bias. The analysis design requires no information about the trial sample sizes or the number of trials eventually included. So ALL-IN meta-analysis can be applied retrospectively as well as prospectively, to evaluate the evidence once or sequentially. Because the intention of the analysis does not change the validity of the results, the results of the analysis can change the intentions (‘optional stopping’ and ‘optional continuation’ based on the results so far). On the one hand: any analysis can be turned into a living one, or even become prospective and real-time by updating with new trial data and including interim data from trials that are still ongoing—without any changes in the cut-offs for testing or the method for interval estimation. On the other hand: no stopping rule needs to be enforced for the analysis to remain valid, so a prospective meta-analysis can be a bottom-up collaboration that requires no outside control over participating clinical trials. Hence ALL-IN meta-analysis breathes life into living systematic reviews and prospective meta-analyses, and offers better and simpler statistics, efficiency, collaboration and communication. READ ALL READ LESS Keywords Anytime, Live, Leading, Interim, Meta-analysis, Efficiency, Collaboration, Communication, Research Waste Corresponding Author(s) Judith ter Schure ( [email protected] ) Close Corresponding author: Judith ter Schure Competing interests: No competing interests were disclosed. Grant information: This work is part of the NWO TOP-I research programme Safe Bayesian Inference assigned to Peter Grünwald, with project number 617.001.651, which is financed by the Dutch Research Counsil (Nederlandse Organisatie voor Wetenschappelijk Onderzoek; NWO). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript. Copyright: © 2025 ter Schure J and Grünwald P. This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. How to cite: ter Schure J and Grünwald P. ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.12688/f1000research.74223.2 ) First published: 19 May 2022, 11 :549 ( https://doi.org/10.12688/f1000research.74223.1 ) Latest published: 19 Jun 2025, 11 :549 ( https://doi.org/10.12688/f1000research.74223.2 ) Revised Amendments from Version 1 Version 2 further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper (ALL-IN-META-BCG-CORONA) with a reflection on the collaboration, summary of results and references to a complete replication package. Although ALL-IN is a novel statistical approach to meta-analysis, it does provides statistical summaries for evidence synthesis that can be reported in a familiar way – e-values take the role of p-values, and anytime-valid confidence intervals the role of conventional confidence intervals. Yet due to their anytime-validity, this statistical summary can be Live and can be the Leading source of information in decisions on future research, even at INterim stages of clinical trials. So while the statistical approach fits within mainstream understanding of evidence synthesis as an external and retrospective summary, it can also shift that understanding towards evidence synthesis as an active/involved (advising future trials/liaising with ongoing trials) and possibly prospective tool to facilitate collaboration and increase efficiency. In our Version 2 of the paper we have emphasized this aspect more, and moreover, stressed how the approach to prospective meta-analysis differs from other adaptive approaches to prospective analysis like the Framework for Prospective, Adaptive Meta-analysis (FAME), and Bayesian adaptive methods. The main difference is that the validity of the ALL-IN analysis does not rely on enforcing a maximum sample size or stopping rule, such that prospective collaborations can be more bottom-up rather than outsourcing decisions to a top-down meta-analysis team. This fact needed a better illustration in Section 3 on Collaboration, so we rewrote that section by extending the discussion of our BCG-CORONA example with the results from that ALL-IN meta-analysis, thus including a completely worked-out example to the paper. Version 2 further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper (ALL-IN-META-BCG-CORONA) with a reflection on the collaboration, summary of results and references to a complete replication package. Although ALL-IN is a novel statistical approach to meta-analysis, it does provides statistical summaries for evidence synthesis that can be reported in a familiar way – e-values take the role of p-values, and anytime-valid confidence intervals the role of conventional confidence intervals. Yet due to their anytime-validity, this statistical summary can be Live and can be the Leading source of information in decisions on future research, even at INterim stages of clinical trials. So while the statistical approach fits within mainstream understanding of evidence synthesis as an external and retrospective summary, it can also shift that understanding towards evidence synthesis as an active/involved (advising future trials/liaising with ongoing trials) and possibly prospective tool to facilitate collaboration and increase efficiency. In our Version 2 of the paper we have emphasized this aspect more, and moreover, stressed how the approach to prospective meta-analysis differs from other adaptive approaches to prospective analysis like the Framework for Prospective, Adaptive Meta-analysis (FAME), and Bayesian adaptive methods. The main difference is that the validity of the ALL-IN analysis does not rely on enforcing a maximum sample size or stopping rule, such that prospective collaborations can be more bottom-up rather than outsourcing decisions to a top-down meta-analysis team. This fact needed a better illustration in Section 3 on Collaboration, so we rewrote that section by extending the discussion of our BCG-CORONA example with the results from that ALL-IN meta-analysis, thus including a completely worked-out example to the paper. See the authors' detailed response to the review by Shubhendu Trivedi See the authors' detailed response to the review by Junfeng Wang See the authors' detailed response to the review by Ewelina Rogozinska READ REVIEWER RESPONSES The scientific response to the COVID-19 (coronavirus disease 2019) pandemic constituted a major gamble. In the United States, for example, the funding program for vaccine development did not put money on a single vaccine, but on six different ones. They purposely took “multiple shots on goal” according to Larry Corey of the National Institutes of Health (NIH) COVID-19 Prevention Network in an interview with STAT ( Branswell, 2021 ). Vaccine development is not a sure thing, and so their strategy needed to be robust enough to just “let the chips fall”. Also in the search for treatments, the scientific community had to hedge its bets. Clinical trials competed for resources and patients, and had to continuously change course when new information arrived. In contrast to vaccines, however, in most countries a strategy to find treatments was lacking. Many clinical trials suffered from “poor questions, poor study design, inefficiency of regulation and conduct, and non or poor reporting of results”: research waste ( Glasziou et al. , 2020 ). We believe that more strategic thinking can benefit a future pandemic response as well as non-pandemic evidence-based medicine, as uncertainty in trial recruitment is often a given. Honest scientific bets can breathe life into the approach called living systematic reviews —that aims to keep the evidence record up-to-date ( Elliott et al. , 2017 ) and the medical guidelines current ( Akl et al. , 2017 )—as well as prospective meta-analysis —that makes these ideals collaborative, prospective ( Seidler et al. , 2019 ; Thomas et al. , 2023 ), and possibly 'real-time' ( Petkova et al. , 2020 ). ‘Real-time analysis’ was promoted early in the COVID-19 pandemic and was praised by many as a “lesson learned from COVID-19” ( Janiaud et al. , 2021 ; Lee et al. , 2022 ; Smith & Flaherman, 2021 ). We propose to achieve the necessary flexibility with ALL-IN meta-analysis in clinical trial design, monitoring and reporting. ALL-IN meta-analysis stands for Anytime Live and Leading INterim meta-analysis. The Anytime aspect follows from the analysis with e -values and anytime-valid confidence intervals that can be updated at any time—reanalyzing after each new observation while retaining type-I error and coverage guarantees. This allows for Live adjustments to the results so far in the most flexible way possible. Because there is no need to prespecify meta-analysis looks/updates or maximum sample size, a retrospective meta-analysis can easily become a living systematic review or a prospective meta-analysis and even run in real-time based on INterim results . The results so far can be the Leading source of information: valid no matter the decisions to initiate, stop or expand individual studies, but also not enforcing any stopping rule or maximum sample size. Hence a planned prospective meta-analysis on interim results can also adjust to slow recruitment or data sharing and analyze once trials are completed (and adapt to the difficult practice of running clinical trials) as well as the ideal scenario in which a retrospective meta-analysis adjusts to a collaborative spirit in the research community and reduces research waste in a prospective real-time meta-analysis. ALL-IN meta-analysis breathes live into the possible approaches for bottom-up living collaboration that reduce avoidable research waste. We illustrate ALL-IN meta-analysis in the setting of time-to-event data, where waiting for events is an inherent challenge of clinical trials. Combining trials early can prevent delays if studies are slow in themselves to complete the necessary number of events. ALL-IN has advantages in four categories: statistics, efficiency, collaboration and communication. We introduce all four briefly (page 5–7) before we go into more detail, but first illustrate the language of betting for single trials studying a COVID-19 vaccine. A single trial: the FDA COVID-19 vaccine game On June 30th, 2020, the US Food and Drug Administration (FDA) published its guidance document on “Development and Licensure of Vaccines to Prevent COVID-19” ( FDA, 2020 ). This set the goals for any Phase-III clinical trial betting on a protective effect of a vaccine against COVID-19. The guidance document advised on the definition of events of confirmed (symptomatic) SARS-CoV-2 infection for the trials to be counting. And in counting those, the document prescribed the two things to achieve: (1) at least a vaccine efficacy (VE) of 50% and (2) evidence against a null hypothesis of ≤ 30% VE ( FDA, 2020 , p. 14). Most COVID-19 vaccine trials randomized large numbers of participants 50:50 vaccine:placebo. So here we simplify the example by assuming that also throughout the trial the participants at risk stayed (approximately) balanced. This allows for a back-of-the-envelope calculation to reinterpret the design for the COVID-19 vaccine trials in the language of betting 1 . According to the definition of SARS-Cov-2 infections, we start counting once a participant has a confirmed infection after being fully vaccinated for at least a number of days, e.g. 7 days in the Pfizer-BioNTech trial ( Polack et al. , 2020 ). This is also when a (virtual) bet could start. In the following we reinterpret the design for the COVID-19 vaccine trials in the language of betting. Each new event carries evidence that we express by a betting score. We make a (virtual) investment on one of the two outcomes: either the next event occurs in the vaccine group or it occurs in the placebo group. If there is no effect of the vaccine whatsoever, the 50:50 risk set in our simplification ensures that the infected participant has 0.5 a chance to be vaccinated and 0.5 a chance to be a placebo. Yet, following the FDA, we do not only want to rule out an ineffective vaccine, but also reject the hypothesis that the vaccine has an effect that is too small—set as the null hypothesis of (at most) 30% VE. In that case each newly observed infection has slightly smaller chance to be a vaccinated participant. That probability to be in the vaccine group is 0.41, since each placebo group member has a 100% risk of COVID-19 and a vaccine group member has 100–30 = 70% of the risk, which is a fraction 0.41 of the total risk (70/(100 + 70)). So if the VE is too small to be of interest we expect (at least) a fraction 0.41 of COVID-19 events to occur in the vaccine group and (at most) 0.59 in placebo. How do we bet against that and win if the vaccine has a much larger protective effect? We are betting against the probability 0.41 of the next COVID-19 event to occur in the vaccine group. If this probability actually is that large (the vaccine is not very protective; the null hypothesis) we do not want the game to be favorable under any strategy, just like the casino does not want any gambler to earn a salary playing the roulette wheel. On the other hand, we are betting in favor of a much smaller probability for the vaccine group. If this probability is smaller (the vaccine is protective; the alternative hypothesis) we do want to win money, just like a professional poker player who makes a salary out of gambling well. We use the betting scores to decide whether the vaccine is a real deal-breaker (the scores behave like the salary of a professional poker player) or whether it is not effective enough (the scores behave like anyone playing the roulette wheel). To ensure that our betting scores can show either case, we first design the game such that it is fair—under the null hypothesis—and then optimize playing the game with a strategy that is profitable—under the alternative. Designing a fair game under the null hypothesis Consider gambling at the roulette table where the vaccine trial analogy is like betting on red (vaccine) or black (placebo). Betting correctly doubles your investment, betting incorrectly loses everything you risked. Assuming no house edge (no 0 or 00 on the roulette wheel) and an initial €100 you do not expect to increase your investment, since you have 0.5 a chance of doubling (2 · €100) and 0.5 a chance of losing all (0 · €100). Whether you bet everything on black or red, in expectation the betting score after one round is (0.5 · 2 + 0.5 · 0) · €100, which is the initial investment €100. To achieve the same thing betting against the 0.41:0.59 probabilities instead of 0.5:0.5, your investment needs to multiply by 2.4 (1/0.41) for vaccine and 1.7 (1/0.59) for placebo. If you bet everything on vaccine you have 0.41 chance of multiplying by 2.4 (2.4 · €100) and 0.59 chance of losing all (0 · €100) and if you bet everything on placebo you have 0.59 chance of multiplying by 1.7 (1.7 · €100) and 0.41 chance of losing all (0 · €100). The expected betting score after one round is again the initial investment for both: (0.41 · 1/0.41 + 0.59 · 0) · €100 and (0.59 · 1/0.59 + 0.41 · 0) · €100. Hence, at either the roulette table or in this FDA COVID-19 vaccine game, by design the game is fair and does not favor us. After all, if our observed infections land on the vaccine and control group with the probabilities 0.41:0.59, like a spin of the roulette wheel on black and red with 0.5:0.5, we do not expect to claim an effective vaccine. Optimize playing the game under the alternative hypothesis How do we win as fast and as much as possible if our observed infections do not behave like a roulette wheel? It has been known since the work of Kelly (1956) and Breiman (1961) that the best way to increase your capital in the long run is to not bet all your (virtual) investment €100 on one of the two possible outcomes (red/vaccine or black/placebo) but to divide it based on the odds that make the game favorable to you. So our focus needs to be on the minimal VE of 50% from the FDA guidance. In the scenario of 50% VE, the probability that the next COVID-19 case is in the vaccine group is 1/3: if we set the risk of COVID-19 for a placebo group member to 100%, a vaccine group member has 100–50 = 50% of that risk, which is 1/3 of the total risk (50/(100 + 50)). Kelly (1956) and Breiman (1961) urge us to invest one-third (1/3 · €100) on observing the next infection in the vaccine group and two-thirds (2/3 · €100) on placebo. Likelihood ratios If we bet this way we can rewrite our betting scores in terms of a likelihood ratio . We first show this for the red-black roulette game where we double what we had put at risk on either black or red if the spin of the roulette wheel outputs the color we bet on. Just like in our strategy in the FDA COVID-19 vaccine game, we put 1/3 · €100 on red and 2/3 · €100 on black, so we win the following if the ball X lands on either red or black : X = r e d 2 ⋅ 1 3 ⋅ € 100 = ℒ ( 1 / 3 | X ) ℒ ( 1 / 2 | X ) ⋅ € 100 X = b l a c k 2 ⋅ 2 3 ⋅ € 100 = ℒ ( 1 / 3 | X ) ℒ ( 1 / 2 | X ) ⋅ € 100 The Bernoulli 1/3-likelihood (1/3 | X ) assigns likelihood 1/3 when is X = red and 2/3 when is X = black . So if our strategy is to invest 1/3-2/3 in roulette, our payout is our initial investment €100 multiplied by the likelihood ratio, whether X is red or black . Similarly: X = v a c c i n e 2.4 ⋅ 1 3 ⋅ € 100 = ℒ ( 50 % VE | X ) ℒ ( 30 % VE | X ) ⋅ € 100 X = p l a c e b o 1.7 ⋅ 2 3 ⋅ € 100 = ℒ ( 50 % VE | X ) ℒ ( 30 % VE | X ) ⋅ € 100 The likelihood for 50% VE ((50% VE | X )) assigns likelihood 1/3 when is X = vaccine and 2/3 when is X = placebo . Similarly, the likelihood for 30% VE ((30% VE | X )) assigns likelihood 0.41 when is X = vaccine and 0.59 when is X = placebo . Hence if our strategy is to invest 1/3:2/3 in the FDA COVID-19 vaccine game, our payout is also our initial investment €100 multiplied by the likelihood ratio, whether X is vaccine or placebo . Reinvesting We assume now that we start with an initial (virtual) investment of €1 instead of €100, to easily assess our winnings based on the factor by which we multiply our initial investment. At the first observation we bet €0.33 on vaccine and €0.66 on placebo. After we observe the event in the placebo group we lose our €0.33 bet on vaccine and multiply our €0.66 on placebo by 1.7 to €1.13. The likelihood ratio between our 30% VE alternative hypothesis and our 50% VE null hypothesis—so (50% VE | X )/(30% VE | X )—is also about 1.13, so multiplying our initial investment of €1 into €1.13. On the other hand, if we observe the event in the vaccine group we lose our €0.66 bet on a placebo event and multiply our €0.33 on vaccine by 2.4 to €0.81. The likelihood ratio of a vaccine event multiplies our investment by 0.81. After each observed event we reinvest what we have left in the new bet, so multiply that with the next likelihood ratio. A winner The Pfizer/BioNTech trial observed 8 cases of COVID-19 among participants assigned to receive the vaccine and 162 cases among those assigned to placebo ( Polack et al. , 2020 ). This totals a betting score of 0.81 8 · 1.13 162 · €1, which is about €118 million (note that 1.13 is really 1.13333 . . .). If someone wins that at the poker table, we have good reason to consider her a professional poker player with a favorable strategy, rather than a lucky beginner ( Konnikova, 2020 ). Meta-analysis The Pfizer/BioNTech trial included more than 43 thousand participants ( Polack et al. , 2020 ), which is quite unique for a clinical trial. Usually trials are much smaller, and scientific consensus is built through systematic reviews and retrospectively combining trials in a meta-analysis. A single-trial ALL-IN analysis can be easily extended to include more than one trial, possibly in a prospective meta-analysis, and possibly on INterim data. The decision to change the intention of the original analysis by including more trials can be strategically based on the results so-far. This follows from advantages in four categories that we will first briefly introduce and then further elaborate on in this paper: statistics, efficiency, collaboration and communication. Statistics Not all mRNA vaccines showed such favorable results as the Pfizer/BioNTech vaccine. In a press release CureVac AG (2021) announced that the final analysis of their clinical trial observed 83 events in the vaccinated group and 145 in placebo, so only a 43% VE (our calculations assuming a 50:50 balanced risk set ( r = 1 in CureVac AG (2020, p. 124) ) 2 ). Their protocol is formulated in terms of a confidence interval for the vaccine efficacy (VE): the FDA goal is for it to exclude 30%, adjusted for two interim analyses. That adjusted confidence interval at the final analysis is [25.3%, 57.1% VE] (our calculations; normal approximation interval, based on Z α/2-statistic for the nominal level α/2 = 0.02281 ( CureVac AG, 2020 , Table 8)). Regrettably, this interval does not exclude 30%. When the chips fell, this trial lost. Statistical analyses like these are essentially all-or-nothing , just as any other p < α analysis. As soon as all the α is spent—either on a few interims and a final analysis or just on one fixed sample size—we cannot continue the trial and perform subsequent analyses without violating the type-I error rate. This might be a reasonable price to pay in the urgency of a pandemic when multiple vaccines are competing, but it is a very inconvenient property for clinical trials in general. Usually, we do want to reanalyze a clinical trial in combination with other similar trials in a meta-analysis. Yet any p < α procedure is equivalent to setting a rejection region for the test statistic and checking whether the value for the statistic falls within that region. This rejection region is based on a sampling distribution that assumes the number of studies in the meta-analysis, and the number of participants within each study to be fixed in advance. Given such a fixed sample size (but also for any sequential stopping rule that sets a maximum sample size in advance, such as α -spending), there is only one region, and your test statistic is either in it or not. If it is not, you are not allowed to redo the analyses with an increased sample size. This problem is recognized in approaches to control type-I error for living systematic reviews ( Simmonds et al. , 2017 ). But also if the meta-analysis is not updated, the α is essentially already spent on the individual trial analyses, since the meta-analysis is an update of the trial analysis that is unscheduled and lacks type-I error control at the same level α . If the individual study analysis would have been conclusive, the meta-analysis might never be performed, and we can recognize that we are dealing with a situation of “meta-optional-stopping”. A different way to see this is by the actual sampling distribution of trials in a meta-analysis: any data-driven decision within the series—whether to accumulate more studies and when to perform the meta-analysis—changes the sampling distribution and invalidates the fixed-sample-size distribution assumed for p < α . Hence hardly any meta-analysis has valid type-I error control, when the accumulation of trials is based on strategic decisions, in other words, those that introduce accumulation bias ( Ter Schure & Grünwald, 2019 ). ALL-IN meta-analysis is not all-or-nothing and can combine all available studies, whatever the intentions of earlier analyses. In fact, it allows any number of new studies or participants to be included without ever spending all α . In terms of gambling, we can keep betting our virtual investment because we never lose everything. The CureVac AG (2021) results, for example, would have accumulated a betting score of 0.81 83 · 1.13 145 · €1 = €1.84. In the setting of a pandemic with competing vaccines in large trials, this research was not continued. But in other settings, promising, but inconclusive results appear often in smaller trials that should spur new research. The analysis can preserve the evidence to reinvest in the next trial, such that we can continue to observe evidence and express it by betting on additional observations in a new trial. An ALL-IN meta-analysis can always continue testing the null hypothesis—with type-I error control—and estimating the confidence interval—with coverage guarantees. Importantly, for these tests and intervals the procedures are exactly the same no matter what decisions—so-called stopping rules, or accumulation bias processes ( Ter Schure & Grünwald, 2019 )—are at play. This is not the case for any other approach to type-I error control in living systematic reviews. Bottom-up living meta-analysis The lack of restrictions on sample size is a unique feature of ALL-IN meta-analysis that sets it apart from other statistical approaches to living systematic reviews ( Simmonds et al. , 2017 ), ‘adaptive’ ( Tierney et al. , 2021 ) or ‘real-time’ ( Petkova et al. , 2020 ) prospective meta-analysis and ‘metatrials’ ( van Haren et al. , 2021 ). The only approach that is similar was the proposal of the law of iterated logarithm in meta-analysis ( Hu et al. , 2007 ; Lan et al. , 2003 ) and is based on early work by Robbins and colleagues ( Robbins, 1970 ) that is closely related to the methods proposed here (see e.g. Wang & Ramdas (2025) for a comparison with early work). (Our representation based on betting scores/likelihood ratios/ e -values has advantages in the ease of communication and its rapidly improving field of statistical methods). Other approaches actually in use require a maximum sample size or a maximum number of studies to guarantee type-I error control for all updates of the meta-analysis. The crucial difference with ALL-IN is that for those methods to be valid, participating in a prospective meta-analysis requires outside control over a trial’s data collection. Outside control is needed in the case of the ‘Framework for prospective adaptive meta-analysis’ (FAME) ( Tierney et al. , 2021 ) where a single sample size is set comprised of data from all ongoing trials, possibly at an interim of some of them, and single-analysis statistics is used. The consequence is that the intervals and p -values reported are only valid if never updated, so the analysis assumes that data collection stops after that point. No new analyses are valid after that maximum sample size, since all alpha is spent and possibly accumulation bias is introduced in a re-analysis ( Ter Schure & Grünwald, 2019 ). Similarly, outside control is needed in the operationalization of group-sequential or alpha-spending methods that need the information size relative to the final analysis ( Simmonds et al. , 2017 ), and therefore also need to enforce such maximum sample size for the reported results to be valid. Finally, even in in simulation-calibrated Bayesian approaches ( Goldfeld et al. , 2021 ; van Haren et al. , 2021 ) restrictions on interims (often) and maximum sample size (always) are imposed for the simulations to terminate. So the frequentist operating characteristics (type-I error, coverage of intervals) of the analysis are not known if trials wish to continue data collection after that maximum sample size and analyze again, or in other ways deviate from the simulated scenarios. Hence all available methods in living systematic reviews and prospective meta-analysis are by design quite top-down, or even explicitly described as “using preestablished stopping rules for safety, efficacy, futility, and harm” ( Petkova et al. , 2020 ). The fact that ALL-IN does not require such outside control for the analysis to be valid not only simplifies the statistics, it also simplifies collaboration as a bottom-up instead of a top-down process. We will illustrate this further in our worked-out example in Section 3 on Collaboration. Efficiency Lack of efficiency has been addressed in clinical research in many ways. Not only in the proposal of living systematic reviews ( Elliott et al. , 2017 ), but also in encouragements to present new studies in the context of existing evidence ( Young & Horton, 2005 ), in advice to design new trials based on systematic reviews and meta-analysis ( Chalmers & Lau, 1993 ; Goudie et al. , 2010 ; Lau et al. , 1995 ; Lund et al. , 2016 ; Sutton et al. , 2007 ) and in pleading to prevent the “scandal” of wasteful research into clinical questions that are already answered or not of primary importance ( Altman, 1994 ; Chalmers & Glasziou, 2009 ; Glasziou & Chalmers, 2018 ; Glasziou et al. , 2020 ; Ioannidis et al. , 2014 , “research waste”). These calls have not been completely ignored, since clinical research has seen an increase in efficiency—e.g. in platform trials or prospective meta-analysis whenever collaboration is deemed possible prospectively. Nevertheless, most clinical trial data is synthesized retrospectively, and still deserves all of the above recommendations. ALL-IN meta-analysis enables these data-driven decisions that can make science more efficient; any retrospective meta-analysis can become prospective. New studies can be easily informed by the synthesis of all data so far such that exactly the right number of participants are randomized to answer a research question, no more and no less. Moreover, an ALL-IN meta-analysis can give an account of the evidence at any time and therefore facilitate prioritizing new studies, if more than one line of research needs additional data, but not all can be funded. Collaboration Any ALL-IN meta-analysis can easily be turned into a prospective meta-analysis and possibly a live or real-time meta-analysis, since it does not matter how many studies will eventually be combined or which study will contribute most data. Whether it is based on summary statistics ( Godolphin et al ., 2022 ; Tierney et al. , 2021 ) or on individual participant data (IPD) ( Polanin & Williams, 2016 ), involvement in the same prospective meta-analysis facilitates discussion between those running trials in the same line of research; especially if the line of research can be concluded early. Collaboration in a prospective meta-analysis (PMA) has many advantages, such as reducing research waste and bias (by specifying inclusion criteria and analysis plans in advance) and harmonizing outcome measures (only possible in a close collaboration with trials). Live or real-time PMA can increase the involvement of all trials in such a collaboration, when trials are motivated by the promise of a possible early conclusion ( Seidler et al ., 2019 ; and Thomas et al ., 2023 ). A prospective meta-analysis benefits from homogeneity. With too much heterogeneity, it can be very disheartening to update a random-effects meta-analysis, since many trials are needed to precisely estimate the between trial variation and shrink the interval ( Jackson & Turner, 2017 ; Kulinskaya & Wood, 2014 ; Sutton et al. , 2007 ). Close collaboration might prevent unnecessary heterogeneity, if trial investigators are involved in the selection of trials in the meta-analysis; especially if they can advise on the design and conduct of new trials and align inclusion criteria and endpoint definitions. A fixed-effects meta-analysis can conclude the research effort early. Sufficient homogeneity may be possible in close collaboration. Communication The language of betting The interpretation of evidence in terms of a betting score might help to communicate the uncertainty in statistical results. As Shafer (2021) puts it: “When statistical tests and conclusions are framed as bets, everyone understands their limitations. Great success in betting against probabilities may be the best evidence we can have that the probabilities are wrong, but everyone understands that such success may be mere luck.” Thinking in terms of bets also helps to understand when statistical analyses can be anytime-valid . If they are of the all-or-nothing kind, but reanalyzed in a meta-analysis, they are gambling while broke. (This intuition can be made mathematically precise; see the description of Neyman-Pearson testing in terms of betting Shafer (2021) and Grünwald et al. (2024a) ). Yet if we add new studies to an ALL-IN meta-analysis, we are reinvesting the betting score that we saved from earlier studies, to evaluate whether the strategy in those earlier studies continues to succeed. Just like when reinvesting your profits in a casino from one slot machine into another, the notion of winning stays the same. Our evidence against the hypothesis of a fair casino does not change when we alternate slot machines. It does not change if we use the score so far to decide on alternating them or to decide when to cash out. If the slot machines are fair, any strategy of playing them is not expected to make money, and our notion of type-I error control holds under any dependency on past results (stopping rules or accumulation bias processes). This is the intuition that explains why it is statistically possible for the intensions of the analysis to not change its validity, such that the analysis results can change the intentions. Any meta-analysis can become a living systematic review, possibly prospective and possibly real-time. Other communication Those uncomfortable with the language of betting can also easily resort to any of three more familiar notions of statistical communication. Firstly, the likelihood ratios/betting scores and their generalizations, so-called e-values ( Grünwald et al. , 2024a ; Ramdas & Wang (2024) ; Vovk & Wang, 2021 ), can be interpreted as conservative p -values by taking their inverse. If we denote any betting score or e -value by € (e.g. € = 1.84 for the CureVac trial data), then p < 1/€ is a conservative p -value (e.g. p = 1/1.84 = 0.54 for the CureVac trial data). If we communicate the p -value p = 1/€ anyone can test by comparing p < α but with the addition that this conservative p -value is anytime valid 3 and so p < α can never spend all α (it is never an all-or-nothing test). Secondly, the likelihood ratios have their own notion of evidence in the likelihood paradigm ( Royall, 1997 ). Just as well as stating that the Pfizer/BioNTech trial ( Polack et al. , 2020 ) multiplied €1 to almost €118 million and the CureVac AG (2021) trial multiplied €1 to €1.84, we can state that their data was almost 118 million times and 1.84 times more likely if we assume the FDA’s goal of 50% VE in comparison to assuming only 30% VE. For Pfizer, that sounds very good, for CureVac, not so much, and so these numbers have an interpretation of their own without imposing any α -level. Thirdly, likelihood ratios can be accepted by the Bayesian paradigm, as Bayes factors, and possibly combined with prior odds. Grünwald et al. 2024a and Grünwald (2021) show that betting scores/ e -values and Bayes factors are closely related, although not all Bayes factors are betting scores/ e -values. The bottom-line for communication purposes is that the reporting by ALL-IN meta-analysis can be interpreted in many ways— p -values, likelihood ratios, Bayes factors—but regardless of the interpretation provide fully frequentist type-I error control for tests and coverage for anytime-valid confidence intervals. The remainder of this paper discusses the four categories of advantages in more detail: Statistics in Section 1 , Efficiency in Section 2 , Collaboration in Section 3 and Communication in Section 4 . We use the COVID-19 vaccine trials as running examples, based on the FDA COVID-19 vaccine game described already, but also in terms of the e-value logrank test ( Ter Schure et al. , 2024 ). In section Section 3 on Collaboration we discuss an example ALL-IN meta-analysis that used this e -value logrank test to study whether the Bacillus Calmette–Guérin (BCG) vaccine, originally developed to protect against tuberculosis and named after its inventors, could protect against COVID-19 ( ter Schure et al., 2022 ). In the concluding section we will provide some broader context, with an overview of all the methods already developed— e -values, safe tests ( Grünwald et al. , 2024a ) and anytime-valid confidence intervals—methods already available in software—notably safestats R package ( Turner et al. , 2022 )—and future work. R code for all calculations, simulations and plots is available through the software availability statement and Ter Schure, 2025 , https://doi.org/10.17605/OSF.IO/U6WTP . 1 Statistics The language of betting comes with the intuition that winning a large betting score has a small probability if the null hypothesis is generating our observations (e.g. the roulette wheel is fair). We will make this intuition precise and show how to control the type-I error by bounding this probability by Markov’s inequality and Ville’s inequality. Crucial here is that the betting score underlying our test is an e-value . The language of betting also comes with the intuition that when playing a game that is favorable to us in principle, we can use strategies of different quality: even among all strategies under which we expect to get richer, some of them can be expected to earn us much more than others. We will relate the more well-known notion of power to such a different notion of optimality . In the following we discuss both e-values and optimality first for a single trial (in the FDA COVID-19 vaccine game and more generally) and then for ALL-IN meta-analysis. We conclude by a generalization of optimal e -value tests to anytime-valid confidence intervals. 1.1 Under the null: e -values in a single trial To make the FDA COVID-19 vaccine game fair we imposed a multiplication by 2.4 (or 170/70) if we observe the event in the vaccine group and 1.7 (or 170/100) if we observe it in the placebo group. This multiplication has expectation 1 (or smaller) if we assume the null hypothesis of a vaccine with negligible VE of 30% (or smaller). In case of 30%, we have probability 0.41 (or 70/170) to observe a vaccine event and probability 0.59 (or 100/170) to observe placebo, so in expectation we multiply our investment by 1. For example putting 1/3 on vaccine and 2/3 on placebo: 1/3· 70/170 · 170/70 + 2/3· 100/170 ·170/100 = 1. No matter how we invest in the two outcomes, (e.g. try putting 1/2 on vaccine and 1/2 on placebo, or something different) in expectation under the null we multiply the initial investment by 1. This means that our betting score is an e -value, since by definition an e -value is the outcome of a nonnegative random variable with expectation (at most) 1 under the null hypothesis ( Grünwald et al. , 2024a ; Ramdas & Wang, 2024 ; Vovk & Wang, 2021 ). Our betting score could also be rewritten as a likelihood ratio, so the expectation of the likelihood ratio ( ℒ (50% VE | X )/ ℒ (30% VE | X )) is 1 as well. We hence-forth write the likelihood ratio after n rounds of betting (or after observing n events) as LR ( n ) , with for the FDA COVID-19 vaccine game L R ( n ) = ∏ i = 1 n ℒ ( 50 % VE | X i ) ℒ ( 30 % VE | X i ) . ( 1 ) Using its expectation of 1, Markov’s inequality bounds the probability of observing a large multiplication of our investment (a large likelihood ratio) by α after n = 170 rounds as follows: P 30% VE [ L R ( 170 ) ≥ 1 / α ] ≤ E 30 % VE [ L R ( 170 ) ] 1 / α = 1 1 / α = α . Figure 1 shows at the right side the histogram of betting scores in the FDA COVID-19 vaccine game after 170 events when we simulate events under the null hypothesis, with probability 0.41 to occur in the vaccine group, corresponding to 30% VE. A line is shown at 40, and indeed no more than α = 1/40 = 2.5% of the scores seem to be larger than that threshold. In fact, in these 1000 runs of simulation only 0.3% of the runs have a betting score larger than 40; Markov’s inequality is a loose bound. We also have a stronger result because we obtained our betting score over events by multiplying the score of the rounds (see ( 1 ), corresponding to reinvesting our winnings), called Ville’s inequality. We get the following from Ville (1939) : Figure 1. 1000 simulated betting scores in the FDA COVID-19 vaccine game over betting rounds n assuming a probability of 0.41 (70/170) for each event to occur in the vaccine group (the null hypothesis of 30% VE) and optimal Kelly betting for 50% VE. The dashed line is the threshold 1/ α = 40 one-sided. The histogram at the right shows the betting score/ LR (170) after 170 events. Note that the expectation of 1 of the scores is not the mode of its distribution nor its median and that the vertical axis is on a logarithmic scale. P 30% VE [ L R ( n ) ≥ 1 / α for some n ] ≤ α . Ville’s inequality is also illustrated in Figure 1 : if we take the sequence of rounds into account, still only a few out of the 1000 simulations ever reach a betting score larger than 40. In fact, in these 1000 runs of simulation only 1.1% of the runs have a betting score that is larger at any round in the game, such that our type-I error is controlled at α = 2.5% at any time. Moreover, this type-I error control is not tied to this maximum number of 170 events, but continues to hold with an unlimited horizon. Making a large profit in such a fair game casts doubt on the null hypothesis and is captured by a likelihood ratio that grows away from 1: a large betting profit is obtained if the null likelihood is performing worse than alternative. When trials can be summarized as bets Before they can be combined in a meta-analysis, individual trials are often characterized by the summary statistics from trial publications. Conventional meta-analysis combines these statistics (e.g. mean differences and standard deviations) in a Z -statistic ( Borenstein et al. , 2009 ). Unlike the vaccine/placebo outcomes that we have seen so far, such a Z -statistic has a continuous density and cannot be summarized by separately dealing with all possible outcomes. Fortunately, Shafer (2021) shows that any likelihood ratio of distributions can be viewed as a betting score in a game with initial investment €1. This is possible because likelihood ratios have expectation 1 in general if we assume the null hypothesis in the denominator of the ratio to generate the data. For a Z -statistic we have two normal distributions with variance 1, one with mean µ 0 under the null hypothesis, and one with µ 1 under the alternative. If the data is generated by the null model, the expectation of the likelihood ratio is E Z ~ ϕ μ 0 [ ϕ μ 1 ( Z ) ϕ μ 0 ( Z ) ] = ∫ z ϕ μ 0 ( z ) ϕ μ 1 ( z ) ϕ μ 0 ( z ) d z = ∫ z ϕ μ 1 ( z ) d z = 1 , ( 2 ) since ϕ µ 1 ( z ) is a probability density that integrates to 1. This means that any such likelihood ratio for a Z -statistic is an e -value and can be used to construct tests by betting. Not all summary statistics can be assumed to form a Z -statistic with a normal distribution. Fortunately for the logrank statistic this is reasonable ( Ter Schure et al. , 2024 ) if studies are large and the effect size not too extreme (hazard ratios not too far away from 1). We will use the logrank Z -statistic as a running example for meta-analysis on summary statistics. For an IPD meta-analysis (on individual participant data), however, we recommend to use the exact e -value logrank test from Ter Schure et al. (2024) that is valid regardless of the randomization (e.g. 1:1 balanced or 1:2 unbalanced), the number of participants at risk, the number of events or the size of the effect. 1.2 Under the null: e -values in a (live) meta-analysis Assume we want to perform a meta-analysis and we collect a Z -statistic Z i from each trial i , e.g. a logrank statistic. Before observing Z i we construct an honest bet LR i = ϕ µ 1 ( Z i )/ ϕ µ 0 ( Z i ) for each trial that is an e -value and thus has type-I error control under the null hypothesis ϕ µ 0 —for a default logrank statistic this is always µ 0 = 0 corresponding to hazard ratio of 1. If we think of the betting score from the first study and invest it in the second study, we are in fact multiplying likelihood ratios. We need to have a notion of time t , such that at each time we know the number of studies k 〈 t 〉 so far and the number of observations n i 〈 t 〉 in each study i . Note that for the logrank statistic, and time-to-event in general, the number of observations or sample size n is the number of events. If we assume that all studies are completed at time t with n 1 , n 2 , ... , n k events summarized by logrank Z -statistics z 1 ( n 1 ) , z 2 ( n 2 ) , … , z k ( n k ) we can construct our ALL-IN bet as follows: L R META 〈 t 〉 = ∏ i = 1 k 〈 t 〉 L R i ( n i ) = ∏ i = 1 k 〈 t 〉 ϕ μ 1 n i ( z i ( n i ) ) ϕ 0 ( z i ( n i ) ) . ( 3 ) The global null hypothesis Each trial bet is testing the same null hypothesis µ 0 = 0 in ( 3 ), such that the ALL-IN meta-analysis bet tests a global null hypothesis of no effect (0% VE, hazard ratio 1) in all trials. Such a global null hypothesis can be rejected with a contribution from each trial, but also in case only one trial observes a large score betting against the hypothesis and no other trial observes a very small betting score that loses those winnings again. After all, the global null (null in each trial) is rejected as soon as the null is rejected in one of the trials. Meta-analysis on interim data We can generalize this ALL-IN meta-analysis bet of completed trials to bets on interim data by assuming that we only have an interim logrank Z -statistic z 1 〈 t 〉, z 2 〈 t 〉,..., z k 〈 t 〉 for the n 1 〈 t 〉, n 2 〈 t 〉,..., n k 〈 t 〉 events observed so far at time t ; k 〈 t 〉 still represents the number of studies so far at time t , but now these studies are not (all) completed. We construct our ALL-IN bet in a similar way: L R META 〈 t 〉 = ∏ i = 1 k 〈 t 〉 L R i ( n i 〈 t 〉 ) = ∏ i = 1 k 〈 t 〉 ϕ μ 1 n i 〈 t 〉 ( z i 〈 t 〉 ) ϕ 0 ( z i 〈 t 〉 ) . ( 4 ) From the perspective of Ville’s inequality, the analysis on completed trials and the one on interim data are indistinguishable. The only thing that matters is that we include all the data we have so far at time t , such that we have type-I error control P 0 [ L R META 〈 t 〉 ≥ 1 / α for some t ] ≤ α , ( 5 ) for the global null hypothesis probability P 0 with an unlimited horizon over time t . This is why any ALL-IN meta-analysis can become prospective and live or real-time on INterim data. The reported analysis is the same, no matter the intentions of the analysis. 1.3 Under the alternative: optimality in a single trial A power analysis sets a very specific goal for a trial, usually to detect an effect of minimal clinical importance (MCID, minimal-clinically important difference) or smallest effect size of interest (SESOI). This is the effect we would not like to miss if it were there, although we hope that the real effect is larger. We nevertheless use this effect size of minimal clinical importance to decide on the sample size of the trial, otherwise we risk a futile trial. The FDA was clear on what this minimal effect should be for the COVID-19 vaccine trials: a VE of 50% ( FDA, 2020 ). This is the effect we used to bet in the FDA COVID-19 vaccine game. Our strategy in the FDA COVID-19 vaccine game, however, was not trying to achieve optimal power. If we compare the all-or-nothing confidence interval for CureVac AG (2021) from the introduction—the final analysis on 83+145 events—we notice that this confidence interval [25.3%, 57.1% VE] is smaller than the final anytime valid confidence interval we show in Figure 3 in Section 1.5 , which is [17.1%, 61.6% VE] 4 . The difference is that the former one is optimized to have spent all α at the final analysis, while the latter one is optimized to continue data collection. Power is the probability of finding the desired result using the specified analysis at a sample size or stopping rule. So for an analysis that is intended to have unlimited horizon, power is not a well-defined concept. Instead the literature on e -values ( Grünwald et al ., 2024a ) optimize the expected rate at which the evidence grows for each new data point, not at a specific sample size ( Grünwald et al . (2024a) called growth-rate optimality (GRO) in Grünwald et al. (2024a) , log-optimality in Ramdas & Wang (2024) , and the unique e -value that is optimal in this way the numeraire ( Larsson et al ., 2024 )). The worst case here is the 50% VE for a one-sided alternative hypothesis H 1 = { P VE : 50% ≤ VE ≤ 100%}. We optimized the FDA bet in the introduction by putting this 50% VE in the alternative likelihood. This can be rewritten in terms of a likelihood ratio for the logrank statistic Z as follows: L R ( n ) = ∏ i = 1 n ℒ ( 50 % VE | X i ) ℒ ( 30 % VE | X i ) = ℒ ( 50 % VE | X 1 , … , X n ) ℒ ( 30 % VE | X 1 , … , X n ) ≈ ϕ μ min ⁡ n ( Z ( n ) ) ϕ μ 0 n ( Z ( n ) ) , ( 6 ) with μ min ⁡ = 1 / 2 log ⁡ ( 0.5 ) and μ 0 = 1 / 2 log ⁡ ( 0.7 ) with 0.5 and 0.7 the hazard ratios corresponding to VE of 50% and 30% respectively (see Ter Schure et al. (2024) ). So our one-sided alternative hypothesis for the logrank Z -statistic is a Z -distribution with a mean representing an effect that is at least µ min ('at least' here means more negative, since for a hazard reduction µ min is negative): H 1 = { ϕ μ 1 : μ 1 ≤ μ min ⁡ } (since positive VE corresponds to a negative µ ). Our choice of the parameter of the alternative likelihood µ min follows directly from the minimal effect set by the FDA. Kelly (1956) already showed that this way of betting optimizes the way our betting score grows if the true VE is 50% (our worst-case scenario). Breiman (1961) showed that this approach also minimizes the expected number of events we need to reach a given betting score set in advance (e.g. €1/ α ), for which some intuition is given in Figure 2 . Grünwald et al. (2024a) , Shafer (2021) and the appendix to Ter Schure et al. (2024) give various other reasons why this is the best way to bet, relating it to data compression, information theory, Neyman-Pearson testing, Gibb’s inequality, and Wald’s identity. The most crucial property for the purposes of ALL-IN meta-analysis is that the alternative likelihood puts some money on each possible outcome, such that no matter what outcome we observe, we keep some of the money we risk: our betting score can become small but not 0. This contrasts the approach with a classic p < α test that essentially puts all money on the rejection region, such that if the outcome is not in it, we lose all and cannot continue betting. A thorough interpretation of Neyman-Pearson testing and p -values in terms of betting is given by both Grünwald et al. (2024a) and Shafer (2021) . Figure 2. N α is the expected number of events needed to reach a betting score of 1/ α = 40 for α = 0.025 if we bet according to VE 1 indicated by the three different lines, with bets each of the form ∏ i = 1 N ℒ ( VE 1 | X i ) ℒ ( 30 % VE | X i ) . The number of events we need decreases if the true Vaccine efficacy (VE) underlying the data increases (the true difference in risk between vaccine and control is larger). The smallest number of events for a true VE of 40% is reached by betting VE 1 of 40% (blue solid line), the smallest number of events for a true VE of 50% by betting VE 1 of 50% (orange dotted line) and the smallest number for true VE of 60% by betting VE 1 of 60% (grey dashed line). Note that for the alternative in the FDA COVID-19 vaccine game H 1 = { P VE : 50% ≤ VE ≤ 100%} we are only interested in playing the game well if the true VE is 50% or larger. Since for larger true VE, taking VE 1 = 50% performs quite well, our strategy is to optimize for the worst case of 50% VE itself and use the bet with VE 1 = 50% in the FDA COVID-19 vaccine game. This figure uses Wald's identity. Specific calculations are available in R code on OSF linked in the Software availability section. Figure 3. Anytime-valid 95%-confidence intervals for a random ordering of the 83 events in the vaccine group and 145 events in placebo from the CureVac AG (2021) trial. Note that the vertical axis is on a logarithmic scale and that the hazard ratio scale on the right is flipped: smaller values at the top mean smaller risk in the CureVac vaccinated group and therefore larger vaccine efficacy. 1.4 Under the alternative: optimality in a meta-analysis ALL-IN meta-analysis allows for a retrospective meta-analysis that is bottom-up. The betting score that we accumulate by reinvesting from one trial into the other (which is multiplying betting scores) has an interpretation without enforcing a common design or stopping rule on all included trials. This is especially important if trials have their own stopping rules, or if accumulation processes are at play that influence the existence of trials based on earlier (trial) results in the same meta-analysis. While a meta-analysis can be bottom-up and each have its own design and effect of minimal interest, it can be advisable to agree on a µ min for the meta-analysis. However, the meta-analysis betting score can also allow each trial i to have its own alternative likelihood with parameter µ min( i ) . Then the following multiplication of those betting scores is still a valid meta score with type-I guarantees: L R META 〈 t 〉 = ∏ i = 1 k 〈 t 〉 ϕ μ min ⁡ ( i ) n i ( z i ( n i ) ) ϕ 0 ( z i ( n i ) ) . ( 7 ) As long as φ μ min ⁡ ( i ) n i is a probability density that integrates to 1, we have that each likelihood ratio integrates to 1 under the global null hypothesis, such that ( 5 ) holds. This means that trials can also learn their parameter µ min( i ) from already completed trials. Sometimes trials are not powered to detect an effect of minimal interest, but an effect that is plausibly true based on earlier research. Kulinskaya et al. (2016) shows that such use of existing studies to power new trials can actually bias conventional meta-analysis since it introduces yet another dependency between sample size and results that is unaccounted for in any analysis that assumes a fixed sample size. For ALL-IN meta-analysis this is no problem at all, and trials can learn from each other as long as the parameter µ min( i ) is fixed before seeing new data that is evaluated using that parameter in ( 7 ). In Ter Schure et al. (2024) we discuss the advantages of even learning the parameter within one trial using prequential plugins or Bayesian updating. In a game like the FDA COVID-19 vaccine game with a clear goal, this is inferior to GRO/log-optimality, but in other situations it could be preferred. 1.5 Anytime-valid confidence intervals The CureVac AG (2021) trial reached their final interim analysis but was not able to reject the null hypothesis of 30% VE. The trial had also been optimistically powered for 60% instead of 50% VE ( CureVac AG, 2020 ). If a trial is underpowered but still has a large number of participants in follow-up, there is good reason to continue the trial, or combine the trial with results from a new trial in a meta-analysis. However, with a total of 227 events this trial was not underpowered to reject the null hypothesis with an effect in the same ballpark as the Pfizer/BioNTech trial that reported 95% VE ( Polack et al. , 2020 ). In such a case it is very interesting to zoom in on the estimate for the effect, instead of its test. A standard confidence interval can be seen as an inversion of a hypothesis test: if the null falls outside a two-sided 95%-confidence interval it can be rejected with a one-sided type-I error level of α/ 2 = 0.025. In general, the interval excludes all the values for the parameter that can be rejected when representing the null hypothesis. Similarly, in our context, an anytime-valid confidence interval excludes all values of the parameter that can be rejected by the e -value test that corresponds to the betting strategy at hand. So the interval is essentially tracking a whole range of bets, each against a different null hypothesis. Figure 3 gives a sequence of anytime-valid 95%-confidence intervals for a random ordering of the CureVac AG (2021) data, one for each new observed event or betting round. It shows that the more events we observe, the more parameter values (values for hazard ratios, or their corresponding VEs) we can exclude from the interval. Because these intervals are valid at any time, once we can exclude a value, we never have to include it again. So we also show a sequence of intervals that is the running intersection of all the previous intervals. This of course crucially depends on the ordering, so the one shown for the CureVac AG (2021) data is just an example, since the ordering is randomly chosen. Since these intervals are anytime valid, it is possible to further shrink the intervals by continuing follow-up and observing more events. The coverage of an anytime-valid confidence interval—like an e -value test—has an unlimited horizon. An ALL-IN meta-analysis confidence interval that is based on a running intersection is of course only possible in an IPD meta-analysis, and cannot be based on summary statistics. The confidence interval shown in Figure 3 is based on the logrank Z -statistic (by repeatedly calculating it after each event), which can also be a summary statistic to achieve a single interval that is anytime-valid. The interval follows from the likelihood ratio of normal densities from ( 6 ) and follows a general recipe for constructing anytime-valid confidence intervals from Howard et al. (2021) where the hazard ratio is obtained by maximum-likelihood. The same approach can be used to obtain an ALL-IN meta-analysis confidence interval by weighing the estimates by inverse-variance (the fixed-effects approach) that Peto (1987) calls a typical hazard ratio. We illustrate this approach in our example ALL-IN meta-analysis in Section 3 on Collaboration (for technical details see the Statistical Appendix of Ter Schure et al., 2022 ) and discuss it a bit further in the final section on future research. 2 Efficiency Trials often suffer from recruitment difficulties, with estimates of 35% (between 1994 and 2002) and 56% (between 2004 and 2016) not reaching the goal set in advance ( McDonald et al. , 2006 ; Walters et al. , 2017 ). These trials find themselves underpowered according to their own protocol: when they decide the stop the recruitment and obtain the final sample size for analysis, they have a high probability for their test statistic to fall outside the rejection region they set in advance, also if the effect they set out to find is there. During the COVID-19 pandemic many RCTs missed the peak of the pandemic in their region and were at risk of failing to meet recruitment targets, which motivated the proposal for ‘real-time’ meta-analysis ( Petkova et al ., 2020 ). Unfortunately, the literature on research waste ( Chalmers & Glasziou, 2009 ) and Evidence-Based Research ( Lund et al. , 2016 ) shows that the existing evidence base is not used well to design the new trials needed for conclusion or to interpret new research. ALL-IN meta-analysis makes this very easy to do. It comes with a simple notion of the evidence already collected and what is still needed, and a notion of a new trial’s ability to provide that: anticipated e -growth. The combination of the two has the capacity to make study design more honest, showing what a trial can add to the existing evidence base instead of just evaluating a misguided goal to single-handedly answer a research question. 2.1 The evidence so far and what is still needed An ALL-IN meta-analysis can set a prospective goal for conclusion, e.g. α = 0.0025 = 0.05 2 corresponding to the level of α required by authorities like the FDA that ask for two trials at the α = 0.05 level. Following Ville’s inequality ( 5 ) we need a betting score of 1/ α = €400 if we start with €1 to reach a conclusion. Because an ALL-IN meta-analysis combines trials by reinvesting or multiplying betting scores, a very simple calculation gives the betting score we still need at any given point. If an initial trial is able to reach a score of €8, any new trial can be designed to multiply that by 50. So on its own, starting with €1 instead of €8, it would need a betting score of €50 to help the meta-analysis reach €400. We could evaluate the sample size of the new trial on its ability to reach 50, which for a fixed sample size gives the conditional power of the ALL-IN meta-analysis once the new trial is added. However, if this second trial also foresees recruitment issues, it is more difficult to evaluate its planned contribution since it will probably not be the final trial in the meta-analysis. For this, we propose a continuous notion of the ability of a study: the anticipated e -growth. This concept was first proposed by Shafer (2021) to accompany the betting score or e -value with a study design property that "tells a coherent story" unlike power and p -values where power requires a fixed significance level while the p-value does not. Shafer's concept 'implied target' specifically focused on settings in which the betting strategy is not explicit because no alternative hypothesis is specified. Here we focus on an explicit alternative hypothesis and sample size anticipated in the study design, and propose the term 'anticipated e -growth' to evaluate what a single study is thought to add to an existing line of research. We mostly use this concept to compare the ability of various studies in a meta-analysis to each other, and judge whether new studies can conclude the meta-analysis in terms of the assumptions of their study teams. The concept of 'power' does not have that cumulative nature. In statistical literature, power can also be a confusing concept because it is used both as a property of the study and as a property of a statistical test (for statements like 'the chi-squared test has more power than the Fisher exact test). In the literature on testing by betting and e -values, we do not use the same term for both properties, and use 'anticipated e -value growth' for a property of study design, and use the term 'e-power' ( Ramdas & Wang, 2024 ) for the property of an e -value test. 2.2 The ability of a new trial: the anticipated e -growth The betting score, e -value or likelihood ratio summarizes the data not in just two categories—statistical significant or not statistical significant—but captures the evidence so far on its way to a certain threshold. Similarly we propose to not evaluate experimental design as all-or-nothing, but summarize its ability to build on what is already there and facilitate future research. To capture a study’s expected contribution to a series of studies, we formulate the anticipated e -growth as the multiplicative amount with which the combined evidence is expected to grow if the study—designed with a certain µ antic and sample size n —is added. In general, the anticipated e -growth E * is defined on a Z -statistic as follows: E * = exp ⁡ ( E Z ( n ) ~ ϕ μ antic ⁡ n [ log ⁡ ( L R ( n ) ( Z ( n ) ) ) ] ) . ( 8 ) The logarithm appears in equation ( 8 ) because the distribution of a betting score/ e -value / likelihood ratio based on n events is very non-symmetric and heavy tailed, with extremely large likelihood ratios occurring with not so small probability (see Figure 4 ). So the expectation of the likelihood ratio is drawn very far from its typical values by these large likelihood ratios and is not a good expression of what to expect. The logarithm makes the distribution more symmetric (asymptotically (for large n ) and for normal likelihood ratios even normally distributed), such that the expectation is a more meaningful summary of the evidence promised by the study. By exponentiation (exp()) we bring this expectation back to the scale of the likelihood ratio, such that it can be interpreted as a betting score or e -value. Figure 4. (and Figure 5 ) 1000 simulated sequences of betting scores by round in the FDA COVID-19 vaccine game after 160 events assuming a probability of 0.29 (40 / 140) for each event to occur in the vaccine group. This is the alternative hypothesis of 60% vaccine efficacy (VE) used to power the CureVac AG (2020) trial at a number of events of 160. The dashed line is the threshold 1 /α = 40 one-sided and the solid line is the anticipated e -growth of €104. Note that the horizontal axis is on a logarithmic scale. In the FDA COVID-19 vaccine game the expected growth rate per new event in the CureVac trial, assuming their effect of minimal interest of 60% VE, is the following: exp ⁡ ( E 60 % VE [ log ⁡ ( ℒ ( 50 % VE | X ) ℒ ( 30 % VE | X ) ) ] ) = exp ⁡ ( 40 140 ⋅ log ⁡ ( 50 / 150 70 / 170 ) + 100 140 ⋅ log ⁡ ( 100 / 150 100 / 170 ) ) = 1.029454. The cumulative contribution of each new event is shown as the linear line on a logarithmic scale in Figure 5 . The CureVac AG ( 2020 , Table 8) design planned a final analysis at n = 160 events, so their anticipated e -growth was 1.029454 160 ≈ 104. In comparison to anticipated e -growth of €104 at 160 events, the actual betting score €1.84 after 83 + 145 = 228 events in the press release is quite disappointing. Figure 5. (See above at Figure 4 ). The histogram for the final betting scores at the right shows the larger scores above and the smaller ones at the bottom, which means that if we turn it, it is the mirror image of the histogram in Figure 4 . The dashed line is the threshold 1 /α = 40 one-sided. The increase in the solid line per additional event/betting round shows the contribution to the anticipated e -growth of each event, up until the anticipated e -growth at n = 160 of 104. In this figure, the design has an approximate 79% power to observe a betting score/ e -value larger than 1 /α = 40 before 160 events and 72% power at exactly 160 events (better visible in Figure 4 ). Note that the vertical axis is on a logartihmic scale. 2.3 Honest study design An anticipated e -growth does require an honest proposal of the anticipated effect µ antic , to evaluate the merits of the study. In reality, sometimes this parameter is tweaked—e.g. setting an unrealistically large effect—to still argue for the study’s advancement with only small sample size. Or the effect size of minimal clinical importance is set after data is observed ( Wang et al. , 2018 ). This behavior is incentivized by the all-or-nothing character of Neyman-Pearson tests that also make the power analysis all-or-nothing. If your desired sample size does not meet the power hoped-for, you need to either increase it or abandon the study. This aspect of traditional analyses fully ignores the ideal of cumulative science in which one study is not expected to single-handedly answer a research question and small increments in knowledge are valuable, as long as they build towards a common goal. If they use e -values and the ALL-IN framework, researchers do not have to view their analysis as the final one, which helps them to design their study more honestly ( Lakens, 2022 ). 3. Collaboration The Evidence-Based Research Network ( Lund et al. , 2016 ) aims to always inform new research by past results and to reduce research waste by separating research ideas that are necessary from those that are wasteful. This is not easy to do, however. Different communities might have different notions of necessity or even of what is ethical (a state of so-called clinical equipoise ( Shamy et al. , 2020 )). It might therefore be very beneficial to have all those running new clinical trials in a field collaborate together in a prospective meta-analysis. The ALL-IN approach is the most flexible way to do so and might motivate participating trials by the promise of an early conclusion if performed in ‘real-time’ on INterim data. We ran two ALL-IN prospective meta-analyses during the COVID-19 pandemic with the involvement of seven trials in one and four in the other. All were designed to study whether the BCG vaccine, originally developed to protect against tuberculosis, could protect against COVID-19 (based on a theory of non-specific immune effects and innate immunity ( Netea et al. , 2020 )). The two meta-analyses study different populations (healthcare workers and the elderly) and two questions each: the effect of the BCG vaccine on COVID-19 infection (not necessarily symptomatic) and the effect on severe COVID-19 (indicated by hospitalizations). In the following illustration we will focus on the analysis of COVID-19 infections in the healthcare workers population, and the Secondary analysis that includes all trials 5 . The project was named ALL-IN-META-BCG-CORONA, and followed many of the recommendations in guidelines for prospective meta-analysis (see the publication ter Schure et al ., 2022 for details). It identified trials by searching clinical trial registries. It set the inclusion criteria and analysis plans in advance, before results were known. Outcome measures were harmonized among the participating trials, following consensus procedures. And all data was curated twice, by the trial statistician and the meta-analysis statistician. Apart from these best practices, there were also aspects of the project that are specific to using the ALL-IN statistical approach, which we will briefly discuss here: (1) outside control not necessary but keeping track of results in a dashboard, (2) top down or bottom up: the design of the trials and the meta-analysis design, (3) collaboration in a competitive field or a pandemic: type-I, type-II errors and publication bias, and (4) fixed-effects instead of random-effects meta-analysis. 3.1 Outside control not necessary but keeping track of results in a dashboard Trial researchers might hesitate to collaborate if that means outsourcing decisions in their trial on early stopping or extending follow-up to an outside committee, such as a meta-analysis Data and Safety Monitoring Board recommended by others for ‘real-time’ meta-analysis ( Petkova et al ., 2020 ). Instead, the ALL-IN approach allows for a ‘soft’ threshold that merely guides the trials; the leading source of information, but not enforcing anything. This threshold can set a stopping rule, but not in the strict sense that any rule would for a group-sequential or simulation-calibrated design. Those are much stricter because the analysis becomes invalid when the stopping rule is not enforced. In the ALL-IN approach, the threshold can inform how close to a conclusion the meta-analysis is, while also allowing individual trials to continue their follow-up based on trial-specific rationalizations. Adding new events (or even trials) after the threshold is crossed does in fact not invalidate the interpretation of the analysis. In the ALL-IN-META-BCG-CORONA collaboration, the interim meta-analysis results were communicated through a dashboard. Access to the dashboard could be managed by adding logins and changing permissions to inspect data, and was first only given to one person per trial that was necessarily already unblinded to their own trial results: the one uploading the data for the meta-analysis. At first, each data-uploader received a dashboard account with permissions only to inspect the meta-analysis e -value and their own trial contribution. This access of interim meta-analysis results in the dashboard served as a motivator to keep their own trial data upload up-to-date and to check the sequence of e -values for errors. After an initial period where the data-uploaders could only inspect their own trial results, they granted each other permission to inspect all the individual trial contributions. When the first trials were completed and the meta-analysis was approaching its conclusion, the results were also presented to the Advisory and Steering committees. Figure 6 shows this dashboard based on a demo login with synthetic data (this demo was available for everyone involved from the start to get an impression). Figure 6. Dashboard used to communicate interim results in ALL-IN-META-BCG-CORONA to all data uploaders with a login. The trials were performed in the Netherlands (NL), Denmark (DK), the United States (US), Hungary (HU), Brazil (BR), France (FR) and Guinea-Bissau/Mozambique (AF) 6 . The dashboard is in demo mode and shows synthetic (“fake”) data. The option to (de)select trials is for plotting purposes of individual trial e -values; all trials in the dashboard stay included in the meta e -value, following the decision from the Steering committee on trial inclusion. Note that the vertical axis is on a logarithmic scale. A dashboard for ALL-IN meta-analysis allows to spot trends in the accumulating evidence, and allows any stakeholder to monitor the evidence to prepare for crossing a threshold in the near future, e.g. for independent data monitoring committees of ongoing trials or for those considering new trials or preparing to update medical guidelines. On a log-scale, the increase in e -values is linear (in expectation, see Figure 5 ) and the observed trends can be projected forward in time, e.g. in Figure 6 as an increase in evidence per additional calendar day. For ALL-IN-META-BCG-CORONA, the time unit t in the definition of LR 〈 t 〉 from (4) was set to calendar days and the e -values were updated at each calendar day with an event. The dashboard plots in Figure 6 horizontal lines at 1 for trials that do not observe any events yet: they have not started betting and are still at their initial investment of €1 contributing a neutral amount to the multiplication meta- e -value. ALL-IN meta-analysis monitors e -values as events come in, also when they do so from multiple trials simultaneously. In the language of betting, even the analysis of simultaneous events is considered a sequential bet. If the bet on the events from one trial pays out €4, it multiplies our initial capital by 4, and if the events from another trial pay out €5, it does so by a factor 5. Yet if we actually consider those trials to be consecutive bets, we reinvest the €4 from the first into the second, and obtain €1 · 4 · 5 = €20, as follows from the definition of the meta-analysis e -value on interim data in ( 4 ). A dashboard such as in Figure 6 encourages inspection of each individual trial’s contribution to the meta-analysis. Since each trial’s contribution is a simple multiplication, their components can often be conveniently spotted in the agreement of the shape of the meta-analysis and individual trial lines in a dashboard like Figure 6 (as long as not too many trials are contributing simultaneously). 3.2 Top down or bottom up: the design of the trials and the meta-analysis design The design of the meta-analysis was mostly top-down by a Steering Committee, but could have also included per-trial elements. In the ALL-IN-META-BCG-CORONA project it was possible, and deemed preferable, to let the Steering Committee decide on an IPD meta-analysis on interim data (‘real-time’) and write protocols and statistical analysis plans, still fully blinded to any results. This timestamped three important decisions on the meta-analysis design: the null hazard ratio of 1 (VE 0 = 0%), the hazard ratio of minimal interest of 0.8 (VE 1 = 20%) and the level of α set at 0.0025 so the threshold for the e -value was at 1 /α = 400. Of these, the hazard ratio of minimal interest could have also been set differently for each trial based on individual trial designs, but not all of the trials in the collaboration were powered to detect an effect-size of minimal clinical importance specified on COVID-19 infections. So this effect-size set in a top-down manner, preregistered ( Van Werkhoven et al. , 2021 ) and communicated through a webinar and newsletter and discussed in meetings, where each trial was represented by two member in the Advisory Committee. All documentation was made available on a project website ( Ter Schure et al. , 2020a ), and later added to a replication package ( ter Schure, 2022a ) that now accompanies the ( ter Schure et al ., 2022 ) publication. Anticipated e -growth Table 1 shows that most trials were powered for a different outcome measure 7 (indicated with ‘*’ in the VE column) and that large effect-sizes were anticipated that were informed by observational studies. The SA trial publication, for example, reports powering ‘to detect a clinically relevant target difference of 75% shown in prior BCG studies’ ( Upton et al. , 2022 ). Table 1 shows a simulated anticipated e -growth under this VE in a simulation that decreases the number of participants at risk after each observed event and calculates the exact e -value (see Ter Schure et al. (2024) ). The approximation based on a stable risk set introduced for the CureVac data in Section 2.2 , gives a close estimate in case the number of events is small compared to the number of participants at risk, e.g. for the US trial: Table 1. Power analysis and results for the trials reported in the Secondary analysis of ALL-IN-META-BCG-CORONA, with * indicating that a trial was not powered for COVID-19 infections. All values are obtained from openly available sources: the NL protocol publication ( ten Doesschate et al., 2020 ), the SA trial results publication ( Upton et al ., 2022 ), the US clinicaltrial.gov registration (NCT04348370), the DK protocol publication ( Madsen et al ., 2020 ), the HU EU Clinical Trial registration (2020-001783-28), the BR protocol publication ( Junqueira-Kipnis et al., 2020 ), the AF clinicaltrials.gov registration (NCT04641858), and the ALL-IN-META-BCG-CORONA publication and Replication Package ( ter Schure et al. , 2022 ). Design (power analysis/sample size calculation) events: COVID-19 infections Results events: COVID-19 infections anticipated e-growth given data generated under the assumption of the anticipated VE reported in the power analysis, with e-values based on VE 1 = 20% / HR 1 = 0.8 and VE 0 = 0% / HR 0 = 1. e-values based on VE 1 = 20% / HR 1 = 0.8 prespecified by the Steering Committee as smallest effect-size of interest and VE 0 = 0% / HR 0 = 1. Anticipated VE Participants Events (n) Anticipated e -growth Participants Events (n) Excess events in BCG group: sum(O-E) e -value exact Logrank Z -score e -value Gauss NL * 1500 1496 206 -8.5 1,884 -1.19 1,889 SA 75 440 82 173,5 1000 172 5.6 0,097 0.87 0,096 US 60 1800 90 43,2 575 31 2.4 0,472 0.88 0,472 DK * 1500 1221 63 4.0 0,274 1.02 0,273 HU * 950 10 3 0.1 0,954 0.17 0,950 BR 50 400 38 3,3 131 20 -0.7 1,053 -0.35 1,052 AF * 1050 364 80 -5.0 1,982 -1.20 2,284 ALL-IN META Secondary analysis 7640 >>400 4797 575 -2.0 0,047 -0.17 0,053 exp ( 90 ⋅ E 60 % VE [ log ( ℒ ( 20 % VE | X ) ℒ ( 0 % VE | X ) ) ] ) = exp { 90 ( 40 140 ⋅ log ( 80 / 180 100 / 200 ) + 100 140 ⋅ log ( 100 / 180 100 / 200 ) ) } = 1.062614 90 = 42.3 . The anticipated e -growth in Table 1 show that these three trials alone were anticipated to reach the 400 threshold (173.5 · 43.2 · 3.3 >> 400) and that while the SA and US trial were expected to be more important, the BR trial would still contribute, even if small on its own. As with any study design, assuming a larger effect size (larger true VE in the power analysis) results in more convincing test statistics—and the e -value is a test statistic as well. But while too optimistic effect sizes result in ‘ nothing’ in an all-or-nothing analysis—results with wide confidence intervals and large p -values—a promising but inconclusive e -value can still contribute to the line of research when combined with new data to continue reaching a conclusion. The US, HU, BR and AF trial in this ALL-IN-META-BCG-CORONA example did not manage to recruit their intended number of participants and observe the number of events. ALL-IN meta-analysis is ideally suited to still combine such data into a meta-analysis, since the analysis method does not rely on prespecified alpha spending and therefore requires no rule on the number of events. After all, the procedure of analyzing interim data is exactly the same as for analyzing a trial that is completed. E -values The e -values show that the trials collect little evidence against the global null hypothesis. Table 1 shows exact e -values following the methods based on Individual Participant Data proposed by Ter Schure et al. (2024) . These e -values in Table 1 can also be recalculated based summary statistics, specifically the logrank Z statistic and number of events as the Gaussian e -value (see equation ( 6 ) in Section 1.3 ), as reported in Table 1 . Gaussian e -values are likelihood ratios of Gaussians comparing the HR of minimal clinical relevance to the null based on the logrank Z statistic. For example, the e -value for COVID-19 infections in trial NL can be recalculated as ϕ ( − 1 . 19 | μ = 1 2 log ⁡ ( 0.8 ) 206 ) ϕ ( − 1 . 19 | μ = 0 ) ≈ 1.9. Figure 7 shows that also in retrospect the e -values were never close to the threshold of 400. Interpreted as a likelihood ratio, e -values below 1 mean that the likelihood of the null (VE of 0%) is better than the likelihood of the effect-size prespecified as minimal importance (VE of 20%). Figure 7. Exact logrank e -values over time from the Secondary analysis of ALL-IN-META-BCG-CORONA with the final e -values also shown in Table 1 . Note that the vertical axis is on a logarithmic scale. Anytime-valid confidence intervals The lack of support for 20% VE as the effect-size of minimal clinical relevance raises the question how well the null of 0% VE is supported compared to other small effects, which is answered by the confidence intervals shown in the forest plot of Figure 8 and running forest plot in Figure 9 . These anytime-valid intervals depict all values of the hazard ratio/VE for which, if taken as the null hypothesis in an e -value, the e -value is not reaching the threshold at the 5% level, i.e. e < 1/0.05 = 20 (here we use a more lenient treshold than the 400 used in Figure 7 to reject the null early). So excluded from the interval are all values that are discredited because a successful bet was possible, resulting in a large betting score or e -value > 1/0.05 = 20. Figure 8. Forest plot for the final Secondary analysis of ALL-IN-META-BCG-CORONA. The figure shows intervals for the fixed-effects estimate advocated by Richard Peto (1987) as the ‘typical effect’. The anytime-valid confidence interval for the HU trial is (99). Note that the horizontal axis is on a logarithmic scale. The hazard ratios for the trials are estimated by maximum likelihood but can also be approximated based on the excess events and total events (n) reported in Table 1 using the Peto method ( Yusuf et al., 1985 , pp. 366-367, Statistical Appendix, see also Simmonds et al., 2011 ) and the approximate sum of variances of the sum(O-E) assuming a stable risk set, of n·(½)·(1 – ½), e.g. for the complete ALL-IN meta-analysis: exp (sum(O−E)/sum(V) ) = exp (-2.0/ (575/4)) = 0.986. Figure 9. Simplified sequence of forest plots for the Secondary analysis of ALL-IN-META-BCG-CORONA including only three out of seven trials for a less busy plot. These make up 80% of the total meta-analysis weight ( ter Schure et al. , 2022 ) (see all seven and their weights in the final forest plot in Figure 8 ). The figure shows ALL-IN meta-analysis intervals for the fixed-effects estimate advocated by Richard Peto (1987) as the ‘typical effect’. Note that the vertical axis is on a logarithmic scale and that the hazard ratio scale on the right is flipped: smaller values at the top mean smaller risk in the BCG vaccinated group and therefore larger vaccine efficacy. No futility analyses were planned in this collaboration, but the 30% vaccine efficacy prescribed by the FDA for COVID-19 trials ( FDA, 2020 ) could have been a good candidate for a futility margin and 2,5% a good significance level for the type-I error guarantee for such a strong futility analysis. Using this margin in hindsight in Figure 7 , a futility conclusion could have been drawn as early as December 2020/January 2021, when none of the values in the black ALL-IN confidence interval were larger than 30% vaccine efficacy. At this point, the collaboration could have advised against starting new trials or extending recruitment in ongoing trials, and the AF trial (the last one to start recruitment around that time) might have seen that coming and been able to change course. So collaboration in ALL-IN meta-analysis can reduce research waste by providing a leading source of information for all trialists to adjust their aims and optimize the value of their trial. 3.3 Collaboration in a competitive field or a pandemic: type-I, type-II errors and publication bias Prospective ALL-IN meta-analysis prevents losing type-I error control when many trials compete for answers on the same research question, e.g. in a competitive field or an uncoordinated scientific response to a pandemic. If trials are only evaluated in isolation and a response follows the first positive result of a single trial, serious multiple testing issues arise that inflate the type-I error and result in unreliable inference and, subsequently, poor decisions. This happens especially if all trials perform interim analyses on their own, and a type-I error occurs at an interim analyses before any other trial results are published to refute it. The example dashboard also clearly demonstrates decreased type-II errors: synthesizing the evidence in a meta-analysis at interim stages of the trials, and not after trials are completed, improves the ability to find an effect early. The COVID-19 pandemic showed that many individual studies can also be “at risk of failing to meet recruitment targets” ( Petkova et al ., 2020 ). A prospective meta-analysis on interim data can “honor the participation and risk assumed by the cohorts of study participants who deserve the maximal opportunity to have their participation result in useful findings” ( Petkova et al ., 2020 ). In ALL-IN-META-BCG-CORONA, one trial had such difficulty recruiting participants, that the resulting findings ended up too noisy to merit a standalone publication (HU, see Table 1 ). Because the ALL-IN approach does not rely on stopping rules based on number of participants/events, this trial could still be included following the same analysis plan based on only 10 participants, as it would have with 950 participants. So recruitment difficulties do not change the position of a trial in a prospective meta-analysis and therefore do not change the standing of the research team in consensus discussions. Their knowledge and experience is optimally shared, and their data is published as part of the meta-analysis, so avoids publication bias. 3.4 Fixed-effects instead of random-effects meta-analysis Sutton et al. (2007, p. 2491) note that “in a meta-analysis with considerable heterogeneity, the impact of a new (large) study will be (much) less in a random compared to fixed effect model”. This is due the incorporation of a parameter in the model that represents the between-study variation. Also Kulinskaya & Wood (2014) find that the goal of sequentially updating a random-effect meta-analysis might involve planning a large number of small trials to estimate the between-study variance well. Even if that is considered advisable, a random-effects model result might still be very difficult to interpret ( Riley et al. , 2011 ). Hence there are various reasons to prefer the fixed-effects model to monitor evidence efficiently and to ensure that the trials are sufficiently homogeneous. Alongside the ALL-IN-META-BCG-CORONA analysis in healthcare workers we initiated a second ALL-IN meta-analysis that included trials in the elderly. Early in the process, before seeing any data, our Steering committee noticed that the two groups could be very different. Based on a theory of innate and trained immunity, they expected a different effect of the BCG vaccine on the younger immune system of healthcare workers than on the older immune system in the elderly. It could even be that the BCG vaccine effect was beneficial in the ability to fight off COVID-19 in one population but harmful in the other. In terms of statistical approach, the differences between trials can be in three categories: heterogeneous effects, conflicting effect and trial subgroup multiple testing. Heterogeneous effects Our Steering committee decided that to declare success, all included trials in health-care workers should observe an effect of 20% VE or larger. If they indeed do, heterogeneity in their effect sizes (e.g. one 20%, one 50%, one 25%) does not matter for their joint ability to reject the global null hypothesis of no effect in all trials. So for testing the global null, trials are allowed to be heterogeneous in the space of the alternative hypothesis H 1 = {VE: 20% ≤ VE ≤ 100%}. For estimation, however, it is not clear what the ALL-IN confidence interval is estimating if we assume that the effects in the trials are very different. Still, as a first summary, a typical effect size ( Peto, 1987 ) might be useful if we are unable to estimate a random effects model. The development of anytime-valid confidence intervals for random-effects meta-analysis is a major goal for future work. We do not, however, believe that the evidence in a line of research should be monitored based on whether this interval excludes the null hypothesis, or whether the e -value corresponding to the random-effects null model does: for testing, the global null is much more natural. Waiting for a random-effect model to reach a certain threshold is counter-intuitive, since it might require many small trials to estimate the between-trial variability instead of focusing on testing the treatment effect. Moreover, the goal of rejecting the null hypothesis corresponding to this model can be quite strange. When testing a zero-effect null hypothesis, it assumes that there are true effects of harm and true effects of benefit among the trials and that their mean is exactly zero. Conflicting effects If one of the trials has an effect smaller than 20% or even a harmful effect, we should anticipate betting scores or e -values that are smaller than 1. So a meta-analysis multiplication of those e -values would reduce the evidence available from other trials. If we can identify groups for which we expect that the trials in each group have an effect in the same direction and of at least the minimal size, we can perform separate meta-analyses. This was the rationale behind grouping healthcare workers and the elderly each in their own ALL-IN-META-BCG-CORONA analysis. Trial subgroup multiple testing When our analysis is exploratory, and we really have no idea how to group the various trials, we are faced with a multiple testing problem. Note that in this situation also no conventional meta-analysis method would be used to test a common null-hypothesis. We wonder whether any of the trials has the ability to reject the null hypothesis. In that case, we can divide our initial investment over the trials, and see if the totality of their bet achieves a high betting score. Research into this use of e -values has shown that indeed averaging e -values is the optimal way to have type-I error control in a standard multiple testing setting ( Vovk & Wang, 2021 ) but there are many more advances in this area even improving upon p -value based methods for false discovery rate control and family-wise error rate control ( Goeman et al. , 2025 ; Ren & Barber, 2024 ; Wang & Ramdas, 2022 ; Xu et al. , 2025 ). We return to the notion of hedging bets and averaging e -values in Section 4 . Problems with heterogeneity in meta-analysis are not tied to the ALL-IN approach and familiar to anyone working with meta-analysis methods. ALL-IN-META-BCG-CORONA had the advantage that many of the trials that started later had drawn inspiration from the protocol of the first trial. The same sort of alignment of inclusion criteria and outcome definitions might be achieved in other lines of research as well. Hence close collaboration can be very important and the promise of an early conclusion of the research effort might keep a research field motivated to keep the goals aligned. 4. Communication We have illustrated that the language of betting can be useful in interpreting results from an ALL-IN meta-analysis. Here we argue this further by giving extensions of our method that are very easily explained in terms of betting. 4.1 The language of betting for two-sided tests Our examples so far covered one-sided tests, but those can be easily extended to two-sided tests, e.g. by taking L R two-sided ( n ) = 1 2 ⋅ ( L R left ( n ) + L R right ( n ) ) , with L R left ( n ) = ϕ μ min ⁡ ( left ) n ( z ( n ) ) ϕ μ 0 ( z ( n ) ) and L R right ( n ) = ϕ μ min ⁡ ( right ) n ( z ( n ) ) ϕ μ 0 ( z ( n ) ) , to represent a two-sided alternative hypothesis H 1 = { ϕ μ 1 : μ 1 ≤ μ min ⁡ ( left ) or μ 1 ≥ μ min ⁡ ( right ) } . Such a two-sided test is easy to interpret in the language of betting. We essentially split our initial investment (e.g. €1) between the two sides of the alternative hypothesis (e.g. by betting €0.50 on one side and €0.50 on the other). Any other weighting of the two sides is also possible and corresponds to a different division of the initial investment. The crucial thing is that each side tests the same null hypothesis H 0 = { ϕ µ 0 } and has expectation 1 under the null hypothesis, such that any weighted average also has expectation 1 and is an e -value. Note that for a meta-analysis at time t with k 〈 t 〉 studies this becomes: L R two-sided 〈 t 〉 : = 1 2 ( ∏ i = 1 k 〈 t 〉 L R i , left ( n i 〈 t 〉 ) + ∏ i = 1 k 〈 t 〉 L R i , right ( n i 〈 t 〉 ) ) . ( 9 ) Usually one side of the bet is losing and the other is winning such that we do not want to reinvest (multiply) across sides but keep them separate for all trials. In our ALL-IN-META-BCG-CORONA dashboard we also visualized these two sides of the meta-analysis test separately; in Figure 6 we show only the left-sided test (for benefit) of the two. 4.2 The language of betting for co-primary endpoints Another way to hedge our bets is by considering multiple primary outcomes. In ALL-IN-META-BCG-CORONA, for example, not only were the COVID-19 events counted, but COVID-19 hospitalizations as well, as an indicator for severe disease. We started with α = 0.05 and put 10% on COVID-19 ( α = 0.0025 on each of the two sides of a two-sided test) and 90% on hospitalizations ( α = 0.0225 on each of the two sides of a two-sided test). So the thresholds to achieve with the e -value for COVID-19 was set at 1 /α = 400 and the one for hospitalization at 1 /α = 44.44. A different way to formulate this is that each had to achieve 1 /α = 20, but that the sequence of e -values for COVID-19 started with an initial investment of €0.05 for each side of the two-sided test (and had to multiply by 400 to reach €20) and that the e -value for hospitalization started with an initial investment of €0.45 for each side (and had to multiply by 44.44 to reach €20). There are two ways to consider such a bet on two co-primary outcomes: separately and combined. If we evaluate the e -values for each primary outcome separately and reach the threshold with either of the two, we are rejecting the null for that outcome. We are doing two separate tests. If we evaluate the e -values combined, we average them weighted by their α , just as for the two sides of the two-sided test. In that case we have similar type-I error control, but reject the null hypothesis that both are a null effects in favor of the alternative hypothesis that one of them is not. Yet we cannot conclude which one is non-null with the same type-I error since our α -level applies to the combined bet and the individual components to the averaged bet are essentially lost. Concluding remarks The novelty of this paper lies in a new method for meta-analysis, and specifically all the positive practical consequences of applying the method in terms of simplicity of statistics for prospective and real-time analysis, efficiency of science (and reducing avoidable research waste), bottom-up collaboration and communication/monitoring of results. We do not claim any novelty for the underlying mathematics, though. The basic methods we describe can be viewed as relatively minor variations of the anytime-valid tests that are designed to preserve type-I error under optional stopping, as designed by H. Robbins and his students ( Darling & Robbins, 1968 ; Robbins, 1970 ). Unfortunately and surprisingly, these tests have not caught on in statistics until a few years ago—right now they are thriving in work on so-called safe tests , anytime-valid confidence intervals and e -values e.g. Grünwald, 2024 ; Grünwald et al. (2024a) ; Grünwald et al. (2024b) ; Henzi & Ziegel (2022) ; Howard & Ramdas (2022) ; Howard et al. (2021) ; Koning, 2024 ; Pace & Salvan (2020) ; Ramdas et al. (2020) ; Ramdas et al. (2023) ; Shafer et al. (2011) ; Shafer (2021) ; Turner & Grünwald (2023) ; Turner et al. (2024) ; Vovk & Wang (2021) ; Waudby-Smith & Ramdas (2024) . This paper introduces ALL-IN meta-analysis based on Z-score methods of meta-analysis, as introduced in standard works like by Borenstein et al. (2009) . We mainly focus on testing rather than estimation. For testing, extensions to IPD meta-analysis based on exact e -values (rather than Z-score approximations) follow easily since both can be combined by multiplication, as shown in our ALL-IN-META-BCG-CORONA example. Within this focus on testing with type-I error control, the heterogeneity question is less explicit, since under the global null hypothesis there is no heterogeneity, as discussed in Section 3.3 . In future work we will provide more details on estimation under heterogeneity, with anytime-valid confidence intervals for the fixed-effect (singular), fixed-effects (plural) and random-effects model. Extensions to network meta-analysis seem possible, but are not yet our main direction. With regard to meta-regression, much development can be expected from work on e -values in settings of composite null-hypotheses, for which research is ongoing that extends to linear regression (e.g. Lindon et al . (2024) ; Pérez-Ortiz et al. (2024) ). For IPD meta-analyses that take covariates into account, analysis of randomized controlled trials can use the so-called ‘Model-X’ e -value approach ( Grünwald et al ., 2024b ) but the non-randomized setting needs further research to develop e -values for Generalized Linear Mixed Models. Likelihood ratios, E -variables and e -values In this paper we presented betting scores/ e -values that are equivalent to likelihood ratios. In general though, betting scores and e -values are really generalizations of likelihood ratios that preserve the properties of likelihood ratios that give them a prominent role in statistics. Entire books have been written to advocate for summarizing evidence in observed data by a likelihood ratio ( Edwards, 1974 ; Royall, 1997 ) and to separate the goal of measuring evidence from expressing posterior beliefs and making decisions. Likelihood ratios have the property that they can “favor a true hypothesis over a false one more and more strongly” and while a likelihood ratio can be misleading, “strong evidence cannot be misleading very often” ( Royall, 1997 , p. 14). This latter type-I error control is also referred to as a universal bound by Royall (1997) and, by recognizing Ville’s inequality, can be generalized to other betting scores and e -values. A betting score € is a random outcome of a bet and its random variable is an E -variable if it is nonnegative and for all P ∈ H 0 , E P [€] ≤ 1. For a given outcome of the bet, the value of such a random variable is the e -value. Ville’s inequality relies on the multiplication of E -variables—forming a test martingale—which also has expectation smaller than 1 and thus is itself an E -variable. For the example e -values in this paper, the requirement on the expectation E 0 [ LR ] ≤ 1 holds for a simple null hypothesis, e.g. H 0 = { ϕ 0 }. Apart from likelihood ratios of two simple hypotheses, e -values can also be defined for more complicated tests—e.g. a t -test with a nuisance parameter for the variance—in which case the unit expectation needs to hold not for a single mean-0-normal distribution with known variance, but for all mean-0-distributions with any variance. Grünwald et al. (2024a) shows that it often is possible to construct E -variables for such composite testing problems, which is why we consider the e -value the right generalization of the likelihood ratio. Anytime-valid confidence intervals In this paper we presented anytime-valid (AV) confidence intervals (in Figure 3 , Figure 8 and Figure 9 ) for the hazard ratio or VE that are based on the Gaussian approximation on the maximum likelihood estimator of the hazard ratio (for details, see the Statistical Appendix of Ter Schure et al. , 2022 ). Research into other AV confidence intervals for the hazard ratio is still ongoing. For other estimation problems, AV confidence intervals already have been thoroughly studied, for example for medians and other quantiles ( Howard & Ramdas, 2022 ), Gaussian means ( Wang & Ramdas, 2025 ), odds ratios and risk differences ( Turner et al. , 2024 ) and estimates of multinomial count data ( Lindon & Malek, 2022 ). These have not, however, been extended to meta-analysis, and especially for the random-effects meta-analysis model, research into AV confidence intervals is a major goal of future work. Data availability Data from ALL-IN-META-BCG-CORONA is available in the Replication package ( ter Schure, 2022a ). Software availability The safestats R package ( R Core Team, 2024 ; Turner et al. , 2022 ) provides software to do an e -value analysis for the t -test, Z -test, logrank test and 2 x 2-tables. Also functions are available to calculate the power and anticipated e -growth for these study designs. Anytime-valid confidence intervals can be calculated for the effect size in the t -test, the odds ratio in 2x2-tables and the hazard ratio in time-to-event data. R code for the calculations, simulations and plots in this paper can be found on the Open Science Framework ( Ter Schure, 2025 , https://doi.org/10.17605/OSF.IO/D9JNY ). These are based on the packages ggplot2 ( Wickham, 2016 ), ggside ( Landis, 2024 ), ggforce ( Pedersen, 2024 ), dplyr ( Wickham et al. , 2023 ), tibble ( Müller & Wickham, 2023 ), stringr ( Wickham, 2023 ), meta ( Balduzzi et al. , 2019 ) and survival ( Therneau, 2024 ). This code is available under the terms of the Creative Commons Zero "No rights reserved" data waiver (CC0 1.0 Public domain dedication). The code that produces the dashboard in Figure 6 is not publicly available since it mainly deals with logins and permissions that only concern those involved at the time they were still (partly) blinded to the results. Acknowledgements We acknowledge Henri van Werkhoven for his confidence and nerve. Amid the chaos of the initial COVID-19 pandemic months, he quickly grasped the subtleties and benefits of our ideas and committed to intensify a network of COVID-19 research to implement ALL-IN-META-BCG-CORONA. In this partnership, he structured our thinking for this paper. We further acknowledge Marc Bonten and Mihai Netea for the atmosphere of collaboration they put in place in BCG vaccine research and their public stance against “each-small-study-on-its-own” research culture. We also thank Glenn Shafer, Daniël Lakens, Muriel Pérez and Alexander Ly for feedback and extensive discussions. We are especially grateful to Alexander for being the lead developer of the e -value logrank test software and co-meta-statistician on ALL-IN-META-BCG-CORONA. We also appreciate Alexander's help in coding the anticipated e -growth presented in Table 1 for these trials with a fixed number of events (n), within the current possibilities of the R package safestats . We thank our reviewers Junfeng Wang, Ewelina Rogozinska and Shubhendu Trivedi for useful comments. Footnotes 1 Note that in Section 1 (specifically at the end of Section 1.1 ) we discuss how to exactly analyze such time-to-event data using e -values taking into account that the risk set changes after occurrence of events and censoring. We expect such a reanalysis to be very well approximated by the simple calculations that are used for illustration in this introduction of the FDA game, at least for large and balanced trials like the one by Pfizer/BioNtech. Even for trials that are a lot smaller, this approximation is quite good. For example, the exact logrank e -value of 1,88 that we report for the NL trial in Table 1 can be approximated using this back-of-the-envelope calculation for the e -value of ( 80 180 100 200 ) 96 ⋅ ( 100 180 100 200 ) 110 = 1 , 33 . This is qualitatively similar to 1,88 in being much closer to 1 than to conventional thresholds like 40 or 400 (for significance level α = 0.025 and α = 0.0025). In this trial only 1496 participants were 50:50 randomized (compared to the 40 thousand in the COVID-19 vaccine trials), and 96 events observed in the treatment and 110 events in the control group ( Ter Schure et al ., 2022 ); testing a 20% VE against a 0% VE. 2 The CureVac AG (2021) press release reports a VE of 48%, so uses a different r (ratio of follow-up time in the two groups). In such large trials r can often be assumed to stay close to 1, so we set it to 1 to make all calculations simpler. All our calculations are available as R code in the software availability statement. ( Ter Schure, 2025 , https://doi.org/10.17605/OSF.IO/D9JNY ). 3 Such conservative p -values cannot be pictured as the tails of a sampling distribution since such a picture needs a sample size. Also, for conventional p -values, 1/p is not an e -value. The Introduction chapter and the appendix to Chapter 1 in the Ph.D. dissertation Ter Schure (2022b) give more details. 4 Note that we are comparing a Z α/2 -confidence interval for α /2 = 0.02281 with α /2 = 0.025, so the wider interval cannot be attributed to the level of α , because for the same method of interval construction we get wider intervals for smaller α . 5 The primary analysis excluded the Guinea-Bissau/Mozambique (AF) trial based on a prespecified criterium on the event definition that excluded the observed infections demonstrated by serology tests in the AF trial. 6 Datasharing for the French trial (FR) was delayed considerably by data-transfer agreements, such that this trial’s results were not yet included in the publication reported in Table 1 . The South African (SA) trial, on the other hand, was much faster with data sharing and therefore included, even though it was not anticipated at the time the dashboard was launched. 7 unplanned absenteeism Faculty Opinions recommended References Akl EA, Meerpohl JJ, Elliott J, et al. : Living systematic reviews: 4. Living guideline recommendations. J Clin Epidemiol. 2017; 91 : 47–53. PubMed Abstract | Publisher Full Text Altman DG: The scandal of poor medical research. BMJ. 1994; 308 (6924): 283–284. PubMed Abstract | Publisher Full Text | Free Full Text Balduzzi S, Rücker G, Schwarzer G: How to perform a meta-analysis with R: a practical tutorial. Evid Based Ment Health. 2019; 22 (4): 153–160. PubMed Abstract | Publisher Full Text | Free Full Text Borenstein M, Hedges LV, Higgins JPT, et al. : Introduction to meta-analysis. John Wiley & Sons, Ltd, 2009. Publisher Full Text Branswell H: 12 lessons COVID-19 taught us about developing vaccines during a pandemic. 2021; Accessed: 12 July 2021. Reference Source Breiman L: Optimal gambling systems for favorable games. Fourth Berkeley Symposium. 1961. Reference Source Chalmers I, Glasziou P: Avoidable waste in the production and reporting of research evidence. Lancet. 2009; 374 (9683): 86–89. PubMed Abstract | Publisher Full Text Chalmers TC, Lau J: Meta-analytic stimulus for changes in clinical trials. Stat Methods Med Res. 1993; 2 (2): 161–172. PubMed Abstract | Publisher Full Text CureVac AG: Clinical trial protocol a phase 2b/3, randomized, observer-blinded, placebo-controlled, multicenter clinical study evaluating the efficacy and safety of investigational SARS-COV-2 mRNA vaccine CVnCoV in adults 18 years of age and older. 2020; Accessed: 16 July 2021. Reference Source CureVac AG: CureVac final data from phase 2b/3 trial of first-generation COVID-19 vaccine candidate, CVnCoV, demonstrates protection in age group of 18 to 60. 2021; Accessed: 16 July 2021. Reference Source Darling DA, Robbins H: Some nonparametric sequential tests with power one. Proc Natl Acad Sci U S A. 1968; 61 (3): 804–9. PubMed Abstract | Publisher Full Text | Free Full Text Edwards AWF: Likelihood: an account of the statistical concept of likelihood and its application to scientific inference. Cambridge University Press, New York, 1974; 41 (4): 427–429. Elliott JH, Synnot A, Turner T, et al. : Living systematic review: 1. introduction—the why, what, when, and how. J Clin Epidemiol. 2017; 91 : 23–30. PubMed Abstract | Publisher Full Text FDA: Development and licensure of vaccines to prevent COVID-19. 2020; Accessed: 12 July 2021. Reference Source Glasziou P, Chalmers I: Research waste is still a scandal—an essay by Paul Glasziou and Iain Chalmers. BMJ. 2018; 363 : k4645. Publisher Full Text Glasziou PP, Sanders S, Hoffmann T: Waste in COVID-19 research. BMJ. 2020; 369 : m1847. PubMed Abstract | Publisher Full Text Goeman J, de Heide R, Solari A: The e-Partitioning principle of false discovery rate control. arXiv preprint arXiv:2504.15946, 2025. Publisher Full Text Godolphin PJ, Rogozińska E, Fisher DJ, et al. : Meta-analyses based on summary data can provide timely, thorough and reliable evidence: don’t dismiss them yet. Nat Med. 2022; 28 (3): 429–430. PubMed Abstract | Publisher Full Text Goldfeld KS, Wu D, Tarpey T, et al. : Prospective individual patient data meta-analysis: evaluating convalescent plasma for COVID-19. Stat Med. 2021; 40 (24): 5131–5151. PubMed Abstract | Publisher Full Text | Free Full Text Goudie AC, Sutton AJ, Jones DR, et al. : Empirical assessment suggests that existing evidence could be used more fully in designing randomized controlled trials. J Clin Epidemiol. 2010; 63 (9): 983–991. PubMed Abstract | Publisher Full Text Grünwald PD: Beyond Neyman–Pearson: e-values enable hypothesis testing with a data-driven alpha. Proceedings of the National Academy of Sciences. 2024; 121 (39): e2302098121. Publisher Full Text Grünwald P: Peter D. Grünwald’s contribution to the discussion of ‘testing by betting: a strategy for statistical and scientific communication’ by Glenn Shafer. J R Stat Soc Series A. 2021; 184 (2): 440–441. Publisher Full Text Grünwald P, de Heide R, Koolen W: Safe testing. J R Stat Soc Series B Stat Methodol. 2024a; 86 (5): 1091–1128. Available on arXiv since 2019, arXiv:1906.07801. Publisher Full Text Grünwald P, Henzi A, Lardy T: Anytime-valid tests of conditional independence under model-X. J Am Stat Assoc. 2024b; 119 (546): 1554–1565. Publisher Full Text Henzi A, Ziegel JF: Valid sequential inference on probability forecast performance. Biometrika. 2022; 109 (3): 647–663. Publisher Full Text Howard SR, Ramdas A: Sequential estimation of quantiles with applications to A/B testing and best-arm identification. Bernoulli. 2022; 28 (3): 1704–1728. Publisher Full Text Howard SR, Ramdas A, McAuliffe J, et al. : Time-uniform, nonparametric, nonasymptotic confidence sequences. Ann Stat. 2021; 49 (2): 1055–1080. Publisher Full Text Hu M, Cappelleri JC, Lan KKG: Applying the law of iterated logarithm to control type I error in cumulative meta-analysis of binary outcomes. Clin Trials. 2007; 4 (4): 329–40. PubMed Abstract | Publisher Full Text Ioannidis JPA, Greenland S, Hlatky MA, et al. : Increasing value and reducing waste in research design, conduct, and analysis. Lancet. 2014; 383 (9912): 166–175. PubMed Abstract | Publisher Full Text | Free Full Text Jackson D, Turner R: Power analysis for random-effects meta-analysis. Res Synth Methods. 2017; 8 (3): 290–302. PubMed Abstract | Publisher Full Text | Free Full Text Janiaud P, Hemkens LG, Ioannidis JPA: Challenges and lessons learned from COVID-19 trials: should we be doing clinical trials differently? Can J Cardiol. 2021; 37 (9): 1353–1364. PubMed Abstract | Publisher Full Text | Free Full Text Junqueira-Kipnis AP, Dos Anjos LRB, Barbosa LCS, et al. : BCG revaccination of health workers in Brazil to improve innate immune responses against COVID-19: a structured summary of a study protocol for a randomised controlled trial. Trials. 2020; 21 (1): 881. PubMed Abstract | Publisher Full Text | Free Full Text Kelly JL: A new interpretation of information rate. Bell System Technical Journal. 1956; 35 (4): 917–926. Publisher Full Text Koning NW: Continuous testing. arXiv preprint arXiv: 2409.05654, 2024. Konnikova M: The biggest bluff: how i learned to pay attention, master myself, and win. Penguin, 2020. Reference Source Kulinskaya E, Huggins R, Henry Dogo S: Sequential biases in accumulating evidence. Res Synth Methods. 2016; 7 (3): 294–305. PubMed Abstract | Publisher Full Text | Free Full Text Kulinskaya E, Wood J: Trial sequential methods for meta-analysis. Res Synth Methods. 2014; 5 (3): 212–220. PubMed Abstract | Publisher Full Text Lakens D: Sample size justification. Collabra: Psychology. 2022; 8 (1): 33267. Publisher Full Text Lan KG, Hu M, Cappelleri JC: Applying the law of iterated logarithm to cumulative meta-analysis of a continuous endpoint. Statistica Sinica. 2003; 13 (4): 1135–1145. Reference Source Landis J: ggside: side grammar graphics. R package version 0.3.1. 2024. https://CRAN.R-project.org/package=ggside Larsson M, Ramdas A, Ruf J: The numeraire e-variable and reverse information projection. aarXiv preprint arXiv:2402.18810, 2024. Publisher Full Text Lau J, Schmid CH, Chalmers TC: Cumulative meta-analysis of clinical trials builds evidence for exemplary medical care. J Clin Epidemiol. 1995; 48 (1): 45–57; discussion 59–60. PubMed Abstract | Publisher Full Text Lee JJ, Price JC, Jackson WM, et al. : COVID-19: a catalyst for transforming randomized trials. J Neurosurg Anesthesiol. 2022; 34 (1): 107–112. PubMed Abstract | Publisher Full Text Lindon M, Ham DW, Tingley M, et al. : Anytime-valid inference in linear models and regression-adjusted inference. Harvard Business School, 2024. Reference Source Lindon M, Malek A: Anytime-valid inference for multinomial count data. Adv Neural Inf Process Syst. 2022; 35 : 2817–2831. Reference Source Lund H, Brunnhuber K, Juhl C, et al. : Towards evidence based research. BMJ. 2016; 355 : i5440. PubMed Abstract | Publisher Full Text Ly A, Turner R, Pérez-Ortiz MF, et al. : R-package safestats. Maintainer: Alexander Ly , install in R by devtools::install_github("AlexanderLyNL/safestats",ref="logrank",build_vignettes=TRUE), 2021; accessed 22 September 2021. Madsen AMR, Schaltz-Buchholzer F, Benfield T, et al. : Using BCG vaccine to enhance non-specific protection of health care workers during the COVID-19 pandemic: a structured summary of a study protocol for a randomised controlled trial in Denmark. Trials. 2020; 21 (1): 799. PubMed Abstract | Publisher Full Text | Free Full Text McDonald AM, Knight RC, Campbell MK, et al. : What influences recruitment to randomised controlled trials? A review of trials funded by two UK funding agencies. Trials. 2006; 7 : 9. PubMed Abstract | Publisher Full Text | Free Full Text Müller K, Wickham H: tibble: simple data frames. R package version 3.2.1. 2023. https://CRAN.R-project.org/package=tibble Netea MG, Giamarellos-Bourboulis EJ, Domínguez-Andrés J, et al. : Trained immunity: a tool for reducing susceptibility to and the severity of SARS-COV-2 infection. Cell. 2020; 181 (5): 969–977. PubMed Abstract | Publisher Full Text | Free Full Text Pace L, Salvan A: Likelihood, replicability and Robbins’ confidence sequences. Int Stat Rev. 2020; 88 (3): 599–615. Publisher Full Text Pedersen T: ggforce: accelerating 'ggplot2'. R package version 0.4.2. 2024. https://CRAN.R-project.org/package=ggforce Pérez-Ortiz MF, Lardy T, de Heide R, et al. : E-statistics, group invariance and anytime-valid testing. Ann Stat. 2024; 52 (4): 1410–1432. Publisher Full Text Petkova E, Antman EM, Troxel AB: Pooling data from individual clinical trials in the COVID-19 era. JAMA. 2020; 324 (6): 543–545. PubMed Abstract | Publisher Full Text Peto R: Why do we need systematic overviews of randomized trials? (Transcript of an oral presentation, modified by the editors). Stat Med. 1987; 6 (3): 233–244. PubMed Abstract | Publisher Full Text Polack FP, Thomas SJ, Kitchin N, et al. : Safety and efficacy of the BNT162b2 mRNA COVID-19 vaccine. N Engl J Med. 2020; 383 (27): 2603–2615. PubMed Abstract | Publisher Full Text | Free Full Text Polanin JR, Williams RT: Overcoming obstacles in obtaining individual participant data for meta-analysis. Res Synth Methods. 2016; 7 (3): 333–341. PubMed Abstract | Publisher Full Text R Core Team: R: a language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria, 2024. Reference Source Ramdas A, Grünwald P, Vovk V, et al. : Game-theoretic statistics and safe anytime-valid inference. Statist Sci. 2023; 38 (4): 576–601. Publisher Full Text Ramdas A, Ruf J, Larsson M, et al. : Admissible anytime-valid sequential inference must rely on nonnegative martingales. arXiv preprint arXiv: 2009.03167. 2020. Publisher Full Text Ramdas A, Wang R: Hypothesis testing with e-values. 2024; arXiv:2410.23614[math.ST]. Publisher Full Text Ren Z, Barber RF: Derandomised knockoffs: leveraging e -values for false discovery rate control. J R Stat Soc Series B Stat Methodol. 2024; 86 (1): 122–154. Publisher Full Text Riley RD, Higgins JPT, Deeks JJ: Interpretation of random effects meta-analyses. BMJ. 2011; 342 : d549. PubMed Abstract | Publisher Full Text Robbins H: Statistical methods related to the law of the iterated logarithm. Ann Math Statist. 1970; 41 (5): 1397–1409. Publisher Full Text Royall R: Statistical evidence: a likelihood paradigm. CRC press, 1997; 71 . . Publisher Full Text Seidler AL, Hunter KE, Cheyne S, et al. : A guide to prospective meta-analysis. BMJ. 2019; 367 : l5342. PubMed Abstract | Publisher Full Text Shafer G: Testing by betting: a strategy for statistical and scientific communication. J R Stat Soc Ser A Stat Soc. 2021; 184 (2): 407–431. Publisher Full Text Shafer G, Shen A, Vereshchagin N, et al. : Test martingales, Bayes factors and p -values. Statist Sci. 2011; 26 (1): 84–101. Publisher Full Text Shamy M, Dewar B, Fedyk M: Different meanings of equipoise and the four quadrants of uncertainty. J Clin Epidemiol. 2020; 127 : 248–249. PubMed Abstract | Publisher Full Text Simmonds M, Salanti G, McKenzie J, et al. : Living systematic reviews: 3. Statistical methods for updating meta-analyses. J Clin Epidemiol. 2017; 91 : 38–46. PubMed Abstract | Publisher Full Text Simmonds MC, Tierney J, Bowden J, et al. : Meta-analysis of time-to-event data: a comparison of two-stage methods. Res Synth Methods. 2011; 2 (3): 139–149. PubMed Abstract | Publisher Full Text Smith ER, Flaherman VJ: Why you should share your data during a pandemic. BMJ Glob Health. 2021; 6 (3): e004940. PubMed Abstract | Publisher Full Text | Free Full Text Sutton AJ, Cooper NJ, Jones DR, et al. : Evidence-based sample size calculations based upon updated meta-analysis. Stat Med. 2007; 26 (12): 2479–2500. PubMed Abstract | Publisher Full Text Ten Doesschate T, Moorlag SJCFM, van der Vaart TW, et al. : Two randomized controlled trials of Bacillus Calmette-Guérin vaccination to reduce absenteeism among health care workers and hospital admission by elderly persons during the COVID-19 pandemic: a structured summary of the study protocols for two randomised controlled trials. Trials. 2020; 21 (1): 481. PubMed Abstract | Publisher Full Text | Free Full Text Ter Schure J: Replication Package ALL-IN-META-BCG-CORONA. Research Equals. 2022a. Publisher Full Text Ter Schure J: Code for paper ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analysis. 2025. http://www.doi.org/10.17605/OSF.IO/D9JNY Ter Schure J: ALL-IN meta-analysis. PhD thesis, Leiden University, 2022b. https://ir.cwi.nl/pub/31587 Ter Schure J, Grünwald P: Accumulation Bias in meta-analysis: the need to consider time in error control [version 1; peer review: 2 approved]. F1000Res. 2019; 8 : 962. PubMed Abstract | Publisher Full Text | Free Full Text Ter Schure JA, Ly A, Belin L, et al. : Bacillus Calmette-Guérin vaccine to reduce COVID-19 infections and hospitalisations in healthcare workers – a living systematic review and prospective ALL-IN meta-analysis of individual participant data from randomised controlled trials. medRxiv. 2022. Publisher Full Text Ter Schure J, Ly A, Pérez-Ortiz MF, et al. : Safestats and ALL-IN meta-analysis project page. 2020a. Reference Source Ter Schure J, Pérez-Ortiz MF, Ly A, et al. : The anytime-valid logrank test: error control under continuous monitoring with unlimited horizon. N Engl J Stat Data Sci. 2024; 2 (2): 190–214. Publisher Full Text Therneau T: A package for survival analysis in R. R package version 3.7-0. 2024. https://CRAN.R-project.org/package=survival Thomas J, Askie LM, Berlin JA, et al. : Chapter 22: Prospective approaches to accumulating evidence. In: J.P.T. Higgins, J. Thomas, J. Chandler, M. Cumpston, T. Li, M.J. Page, V.A. Welch (editors) Cochrane Handbook for Systematic Reviews of Interventions version 6.4. Cochrane, Updated August 2023. Reference Source Tierney JF, Fisher DJ, Vale CL, et al. : A framework for prospective, adaptive meta-analysis (FAME) of aggregate data from randomised trials. PLoS Med. 2021; 18 (5): e1003629. PubMed Abstract | Publisher Full Text | Free Full Text Turner RJ, Grünwald PD: Exact anytime-valid confidence intervals for contingency tables and beyond. Stat Probab Lett. 2023; 198 : 109835. Publisher Full Text Turner RJ, Ly A, Grünwald PD: Generic E-variables for exact sequential k -sample tests that allow for optional stopping. J Stat Plan Inference. 2024; 230 : 106116. Publisher Full Text Turner R, Ly A, Pérez-Ortiz MF, et al. : safestats: Safe Anytime-Valid Inference. R package version 0.8.6, Maintainer: Alexander Ly , 2022. Reference Source Upton CM, van Wijk RC, Mockeliunas L, et al. : Safety and efficacy of BCG re-vaccination in relation to COVID-19 morbidity in healthcare workers: a double-blind, randomised, controlled, phase 3 trial. EClinicalMedicine. 2022; 48 : 101414. PubMed Abstract | Publisher Full Text | Free Full Text van Haren FMP, Richardson A, Yoon HJ, et al. : INHALEd nebulised unfractionated HEParin for the treatment of hospitalised patients with COVID-19 (INHALE-HEP): protocol and statistical analysis plan for an investigator-initiated international metatrial of randomised studies. Br J Clin Pharmacol. 2021; 87 (8): 3075–3091. PubMed Abstract | Publisher Full Text van Werkhoven CH, Ter Schure J, Bon-ten M, et al. : Anytime Live and Leading Interim meta-analysis of the impact of Bacillus Calmette-Guérin vaccination in health care workers and elderly during the SARS-COV-2 pandemic (ALL-IN-META-BCG-CORONA). 2021. Reference Source Ville J: Etude critique de la notion de collectif. Bull Amer Math Soc. 1939; 45 (11): 824. Reference Source Vovk V, Wang R: E-values: calibration, combination, and applications. Ann Stat. 2021; 49 (3): 1736–1754. Publisher Full Text Walters SJ, Bonacho Dos Anjos Henriques-Cadby I, Bortolami O, et al. : Recruitment and retention of participants in randomised controlled trials: a review of trials funded and published by the United Kingdom Health Technology Assessment Programme. BMJ Open. 2017; 7 (3): e015276. PubMed Abstract | Publisher Full Text | Free Full Text Wang H, Ramdas A: Anytime-valid t -tests and confidence sequences for Gaussian means with unknown variance. Seq Anal. 2025; 44 (1): 56–110. Publisher Full Text Wang R, Ramdas A: False discovery rate control with e-values. J R Stat Soc Series B Stat Methodol. 2022; 84 (3): 822–852. Publisher Full Text Wang MQ, Yan AF, Katz RV: Researcher requests for inappropriate analysis and reporting: a U.S. survey of consulting biostatisticians. Ann Intern Med. 2018; 169 (8): 554–558. PubMed Abstract | Publisher Full Text Waudby-Smith I, Ramdas A: Estimating means of bounded random variables by betting. J R Stat Soc Series B Stat Methodol. 2024; 86 (1): 1–27. Publisher Full Text Wickham H: ggplot2: elegant graphics for data analysis. Springer-Verlag New York, 2016. Reference Source Wickham H: stringr: simple, consistent wrappers for common string operations. R package version 1.5.1. 2023. https://CRAN.R-project.org/package=stringr Wickham H, François R, Henry L, et al. : dplyr: a grammar of data manipulation. R package version 1.1.4. 2023. https://CRAN.R-project.org/package=dplyr Xu Z, Fischer L, Ramdas A: Bringing closure to FDR control: beating the e-Benjamini-Hochberg procedure. arXiv preprint arXiv:2504.11759, 2025. Publisher Full Text Young C, Horton R: Putting clinical trials into context. Lancet. 2005; 366 (9480): 107–108. PubMed Abstract | Publisher Full Text Yusuf S, Peto R, Lewis J, et al. : Beta blockade during and after myocardial infarction: an overview of the randomized trials. Prog Cardiovasc Dis. 1985; 27 (5): 335–371. PubMed Abstract | Publisher Full Text Comments on this article Comments (0) Version 2 VERSION 2 PUBLISHED 19 May 2022 ADD YOUR COMMENT Comment Author details Author details 1 Epidemiology & Data Science, Amsterdam UMC, Amsterdam, North Holland, The Netherlands 2 Machine Learning, CWI, Amsterdam, The Netherlands 3 Mathematics, Leiden University, Leiden, The Netherlands Judith ter Schure Roles: Conceptualization, Data Curation, Formal Analysis, Methodology, Software, Visualization, Writing – Original Draft Preparation, Writing – Review & Editing Peter Grünwald Roles: Funding Acquisition, Investigation, Methodology, Supervision, Writing – Review & Editing Competing interests No competing interests were disclosed. Grant information This work is part of the NWO TOP-I research programme Safe Bayesian Inference assigned to Peter Grünwald, with project number 617.001.651, which is financed by the Dutch Research Counsil (Nederlandse Organisatie voor Wetenschappelijk Onderzoek; NWO). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript. Article Versions (2) version 2 Revised Published: 19 Jun 2025, 11:549 https://doi.org/10.12688/f1000research.74223.2 version 1 Published: 19 May 2022, 11:549 https://doi.org/10.12688/f1000research.74223.1 Copyright © 2025 ter Schure J and Grünwald P. This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. Download Export To Sciwheel Bibtex EndNote ProCite Ref. Manager (RIS) Sente metrics Views Downloads F1000Research - - PubMed Central info_outline Data from PMC are received and updated monthly. - - Citations open_in_new 0 open_in_new 0 open_in_new SEE MORE DETAILS CITE how to cite this article ter Schure J and Grünwald P. ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.12688/f1000research.74223.2 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS track receive updates on this article Track an article to receive email alerts on any updates to this article. TRACK THIS ARTICLE Share Open Peer Review Current Reviewer Status: ? Key to Reviewer Statuses VIEW HIDE Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Version 2 VERSION 2 PUBLISHED 19 Jun 2025 Revised Views 0 Cite How to cite this report: Trivedi S. Reviewer Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.178216.r393268 ) The direct URL for this report is: https://f1000research.com/articles/11-549/v2#referee-response-393268 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 12 Aug 2025 Shubhendu Trivedi , Massachusetts Institute of Technology (MIT), Cambridge, MA, USA Approved VIEWS 0 https://doi.org/10.5256/f1000research.178216.r393268 No further comments to make. The revised version improves on ... Continue reading READ ALL No further comments to make. The revised version improves on the original manuscript, which was already top notch in my view. Competing Interests: No competing interests were disclosed. Reviewer Expertise: Machine Learning, Statistics, Conformal Prediction, Sequential Testing, Drug Discovery. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Trivedi S. Reviewer Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.178216.r393268 ) The direct URL for this report is: https://f1000research.com/articles/11-549/v2#referee-response-393268 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Respond or Comment COMMENT ON THIS REPORT Version 1 VERSION 1 PUBLISHED 19 May 2022 Views 0 Cite How to cite this report: Trivedi S. Reviewer Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.77953.r146412 ) The direct URL for this report is: https://f1000research.com/articles/11-549/v1#referee-response-146412 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 21 Oct 2022 Shubhendu Trivedi , Massachusetts Institute of Technology (MIT), Cambridge, MA, USA Approved VIEWS 0 https://doi.org/10.5256/f1000research.77953.r146412 The paper presents a new method for meta-analysis, motivated partly by (and for "breathing life" into) living systematic reviews that are used in the clinical domain which provide recommendations to prevent research waste. The authors christen their method to be ... Continue reading READ ALL The paper presents a new method for meta-analysis, motivated partly by (and for "breathing life" into) living systematic reviews that are used in the clinical domain which provide recommendations to prevent research waste. The authors christen their method to be ALL-IN meta-analysis, which is Anytime Live and Learning Interim Meta-Analysis. "Anytime" meaning analysis can be updated at any time and can control for type I error irrespective of any other decision making along the way. "Live" allows for a bottomup collaboration of different trials; a trial can be initiated in any way, and we can include data from the meta-analysis itself. "Interim" permits for a combination of data from trials that are still ongoing. The paper begins with a topical motivation from the covid19 pandemic, while emphasizing that the methodology could help better evidence combination, collaboration, and communication during later pandemics, or even smaller clinical trials. Using a single trial and specifications issues by the FDA for the covid19 vaccine trails (regarding vaccine efficiency and evidence against a null hypothesis say 30% VE) , the general betting game that is the language central to much of the contribution of the paper is introduced. It is shown that the same can also be written in terms of likelihood ratios and examples for scores are calculated for a Pfizer trial and a CuraVac trial. The betting based methodology allows the statistical analysis to not simply be all or nothing (like p testing). In the all or nothing setting, we can not continue from one trial to another without violating type I error rates, while in the betting (ALL-IN) setting one can simply update patients later on. This also permits for better efficiency (we can understand the number of participants needed to answer a research question) and collaboration (since we can combine analysis as data becomes available). The language of betting also can be interpreted in various equivalent ways (likelihood ratios, conservative p values, e values) that also allow for easy and crisp communication about the analysis. The intuitions of the language of betting are made more precise using standard tools in the literature (Markov's inequality, and Ville's inequality). Further, the betting score underlying the test is an e-value which further permits statistical analysis (using the tools cited). logrank Z statistics are used as a running example for meta-analysis on the summary statistics -- we can simply collect the Z statistics Z_i from each trial, which can easily be combined (as shown in equation 3). Notably if we add in interim data, then from the perspective of Ville's inequality, they are indistinguishable for testing. The methodology also allows for combining data from trials without requiring a common design -- this can easily done by deciding upon a min mu parameter for each trial, using which one can still get a valid combination of different trials with valid type I guarantees. Further, method not only captures whether an effect is statistically significant or not, it also captures evidence up till now. The language of the "implied target" of Shafer is used to make this precise, which in turn can also be used to quantify how much will the evidence change if a new study with some mu and N is added. The paper also reports testing the methodology during the covid19 pandemic in two meta-analysis. One involving 7 trials, and the other involving 4 trials - considering different populations (healthcare workers, and the elderly). The trials involved testing if BCG could help with COVID19 immunity. The results are discussed while discussing issues (and recommendations) for meta-analysis design, systematic search for trials, systematic reviews for trial inclusion, data upload, and disseminating results. In general, I found the paper very well written. The methodology is described very clearly, along with a glimpse of the underlying statistical tools available. The advantages and recommendations that the methodology has/implies are also discussed in detail. The underlying mathematics for testing is standard, but as far as I understand this is the first application for it in the setting considered in the paper. I would recommend the paper for acceptance. Is the rationale for developing the new method (or application) clearly explained? Yes Is the description of the method technically sound? Yes Are sufficient details provided to allow replication of the method development and its use by others? Yes If any results are presented, are all the source data underlying the results available to ensure full reproducibility? No source data required Are the conclusions about the method and its performance adequately supported by the findings presented in the article? Yes Competing Interests: No competing interests were disclosed. Reviewer Expertise: Machine Learning, Statistics, Conformal Prediction, Sequential Testing, Drug Discovery. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Trivedi S. Reviewer Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.77953.r146412 ) The direct URL for this report is: https://f1000research.com/articles/11-549/v1#referee-response-146412 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 19 Jun 2025 Judith ter Schure , Machine Learning, CWI, Amsterdam, The Netherlands 19 Jun 2025 Author Response We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates ... Continue reading We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Shubhendu for his very kind and thoughtful reflection on our paper. We actually like ‘ Anytime Live and Learning Interim Meta-Analysis’ as a separate interpretation of the ALL-IN acronym, even though this might have been a typo. In this paper we introduced the ALL-IN method by examples in which a smallest effect-size of interest could be set: the FDA COVID-19 vaccine game and the BCG collaboration. While this setting in ubiquitous in theory –with every power analysis setting a maximum sample size controlling type-II errors by pretending to be based on such an effect-size of minimal interest, in practice many clinical researchers have difficulty setting one. Moreover, in noninferiority settings it is even impossible to set one. One of us has discussed these two issues briefly in a discussion piece (Ter Schure, 2024). While the ALL-IN methods presented in our paper are optimal in both simplicity and efficiency in settings with a smallest effect-size of interest, these other settings need further research, as well as their own introduction paper. As these settings require to learn the betting strategy, or alternative hypothesis parameter, from the data, we might as well call it Anytime Live and Learning meta-analysis! ter Schure, J. (2024). Judith ter Schure’s contribution to the Discussion of ‘Safe testing’ by Grünwald, de Heide, and Koolen. Journal of the Royal Statistical Society Series B: Statistical Methodology , 86 (5), 1157-1159. We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Shubhendu for his very kind and thoughtful reflection on our paper. We actually like ‘ Anytime Live and Learning Interim Meta-Analysis’ as a separate interpretation of the ALL-IN acronym, even though this might have been a typo. In this paper we introduced the ALL-IN method by examples in which a smallest effect-size of interest could be set: the FDA COVID-19 vaccine game and the BCG collaboration. While this setting in ubiquitous in theory –with every power analysis setting a maximum sample size controlling type-II errors by pretending to be based on such an effect-size of minimal interest, in practice many clinical researchers have difficulty setting one. Moreover, in noninferiority settings it is even impossible to set one. One of us has discussed these two issues briefly in a discussion piece (Ter Schure, 2024). While the ALL-IN methods presented in our paper are optimal in both simplicity and efficiency in settings with a smallest effect-size of interest, these other settings need further research, as well as their own introduction paper. As these settings require to learn the betting strategy, or alternative hypothesis parameter, from the data, we might as well call it Anytime Live and Learning meta-analysis! ter Schure, J. (2024). Judith ter Schure’s contribution to the Discussion of ‘Safe testing’ by Grünwald, de Heide, and Koolen. Journal of the Royal Statistical Society Series B: Statistical Methodology , 86 (5), 1157-1159. Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 19 Jun 2025 Judith ter Schure , Machine Learning, CWI, Amsterdam, The Netherlands 19 Jun 2025 Author Response We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates ... Continue reading We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Shubhendu for his very kind and thoughtful reflection on our paper. We actually like ‘ Anytime Live and Learning Interim Meta-Analysis’ as a separate interpretation of the ALL-IN acronym, even though this might have been a typo. In this paper we introduced the ALL-IN method by examples in which a smallest effect-size of interest could be set: the FDA COVID-19 vaccine game and the BCG collaboration. While this setting in ubiquitous in theory –with every power analysis setting a maximum sample size controlling type-II errors by pretending to be based on such an effect-size of minimal interest, in practice many clinical researchers have difficulty setting one. Moreover, in noninferiority settings it is even impossible to set one. One of us has discussed these two issues briefly in a discussion piece (Ter Schure, 2024). While the ALL-IN methods presented in our paper are optimal in both simplicity and efficiency in settings with a smallest effect-size of interest, these other settings need further research, as well as their own introduction paper. As these settings require to learn the betting strategy, or alternative hypothesis parameter, from the data, we might as well call it Anytime Live and Learning meta-analysis! ter Schure, J. (2024). Judith ter Schure’s contribution to the Discussion of ‘Safe testing’ by Grünwald, de Heide, and Koolen. Journal of the Royal Statistical Society Series B: Statistical Methodology , 86 (5), 1157-1159. We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Shubhendu for his very kind and thoughtful reflection on our paper. We actually like ‘ Anytime Live and Learning Interim Meta-Analysis’ as a separate interpretation of the ALL-IN acronym, even though this might have been a typo. In this paper we introduced the ALL-IN method by examples in which a smallest effect-size of interest could be set: the FDA COVID-19 vaccine game and the BCG collaboration. While this setting in ubiquitous in theory –with every power analysis setting a maximum sample size controlling type-II errors by pretending to be based on such an effect-size of minimal interest, in practice many clinical researchers have difficulty setting one. Moreover, in noninferiority settings it is even impossible to set one. One of us has discussed these two issues briefly in a discussion piece (Ter Schure, 2024). While the ALL-IN methods presented in our paper are optimal in both simplicity and efficiency in settings with a smallest effect-size of interest, these other settings need further research, as well as their own introduction paper. As these settings require to learn the betting strategy, or alternative hypothesis parameter, from the data, we might as well call it Anytime Live and Learning meta-analysis! ter Schure, J. (2024). Judith ter Schure’s contribution to the Discussion of ‘Safe testing’ by Grünwald, de Heide, and Koolen. Journal of the Royal Statistical Society Series B: Statistical Methodology , 86 (5), 1157-1159. Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Rogozinska E. Reviewer Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.77953.r146413 ) The direct URL for this report is: https://f1000research.com/articles/11-549/v1#referee-response-146413 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 16 Sep 2022 Ewelina Rogozinska , MRC Clinical Trials Unit, Institute of Clinical Trials and Methodology, University College London, London, UK Approved with Reservations VIEWS 0 https://doi.org/10.5256/f1000research.77953.r146413 The work by ter Schure and Grünwald outlines an alternative approach to evidence synthesis, aiming to include emerging evidence in real time without increasing type-1 error. The authors suggest that their ALL-IN method can “breathe life into living systematic reviews, ... Continue reading READ ALL The work by ter Schure and Grünwald outlines an alternative approach to evidence synthesis, aiming to include emerging evidence in real time without increasing type-1 error. The authors suggest that their ALL-IN method can “breathe life into living systematic reviews, through better and simpler statistics, efficiency, collaboration and communication”. As much as I agree with the three final points and fully support this notion, I have some doubts regarding the professed ‘simplicity’ of the proposed statistical approach, implementation, and generalizability of the method. Firstly, the approach has been developed and tested only in one very unusual setting (Covid-19 pandemic), in which the accumulation of evidence over a short space of time was extreme and sharing of data and collaboration was greater than witnessed in previous years. According to Heinze et al. (manuscript in submission) and their proposed framework of four phases of methodological research in biostatistics, the ALL-IN method would be classified as a method in a second phase of its development. Consequently, it requires further evaluation in a range of settings and refinement before it could be considered as a viable alternative to other available methods. This should be discussed in their paper, with declarations more balanced to reflect the single setting in which their method was applied. Secondly, contrary to the authors' claim that the introduction of terminology from game theory makes it easier to communicate the uncertainties, I am finding the sections using betting language difficult to follow. The evidence synthesis community still to some extent grapples with more standard methods of advanced evidence synthesis (Wang et al. BMJ 2021; 373: n736). 1 Thus, the introduction of new concepts (or their reintroduction) should be carefully thought through. Overall, I feel the manuscript would benefit from limiting the use of references to betting and investments to an essential minimum. Finally, the authors present a real-life example of their method using BCG vaccines trials for Covid-19. The presented example resembles an approach more akin to prospective individual participant data (IPD) meta-analysis than a living systematic review or also the referenced FAME approach - both relying on aggregate rather than individual participant data. The challenges associated with accessing IPD, the non-standard approach to data analysis and lack of clear proof of its benefits push this method toward “interesting” developments rather than “a new way forward”. The authors should explain more clearly how their method could help aggregate-level evidence synthesis or refocus the scope to IPD based projects. Furthermore, it would be interesting to learn how the ALL-IN compares to commonly used methods in terms of efficiency and reliability of obtained results. Concluding, the presented method is an interesting approach to evidence synthesis; however, at the current stage of its development, it requires further evaluation of its utility for the evidence synthesis to be able to bet on it. Is the rationale for developing the new method (or application) clearly explained? Yes Is the description of the method technically sound? Partly Are sufficient details provided to allow replication of the method development and its use by others? Yes If any results are presented, are all the source data underlying the results available to ensure full reproducibility? No source data required Are the conclusions about the method and its performance adequately supported by the findings presented in the article? Partly References 1. Wang H, Chen Y, Lin Y, Abesig J, et al.: The methodological quality of individual participant data meta-analysis on intervention effects: systematic review. BMJ . 2021. Publisher Full Text Competing Interests: No competing interests were disclosed. Reviewer Expertise: Conduct of systematic reviews and methodological reviews, IPD meta-analysis, bias in evidence synthesis, use of evidence synthesis in clinical practice guidelines, I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Rogozinska E. Reviewer Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.77953.r146413 ) The direct URL for this report is: https://f1000research.com/articles/11-549/v1#referee-response-146413 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 19 Jun 2025 Judith ter Schure , Machine Learning, CWI, Amsterdam, The Netherlands 19 Jun 2025 Author Response We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates ... Continue reading We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Ewelina Rogozinska for her review and for supporting our take on efficiency, collaboration and communication. We also thank her for expressing her concerns about our hopeful, but perhaps naïve, statements of simplicity in the setting of sequential meta-analysis. We indeed do not have any empirical evidence that what we propose is simple to understand. What we do know is that other available approaches to control type-I error rates in living systematic reviews have proven to be very difficult (Simmonds et al, 2017), hardly ever used and even, for general use, were recommended against by a scientific committee advising Cochrane on the matter in 2018 (reference below). Our main argument for simplicity is nicely pointed out by reviewer Shubhendu Trivedi, writing ‘ we can simply collect the Z statistics Z_i from each trial, which can easily be combined (as shown in equation 3). Notably, if we add in interim data, then from the perspective of Ville's inequality, they are indistinguishable for testing. ’ So ALL-IN meta-analysis needs the same summary statistics as default Z-score approaches in conventional meta-analysis, and in terms of implementation that is indeed not simpler, but also not harder than conventional meta-analysis. The major point is that the procedure is the same whether the meta-analysis is the first analysis, or whether it is the 10 th update in a living systematic review, or whether it is the 100 th update in a prospective meta-analysis of interim results of ongoing trials. The latter nicely summarized by Rogozinska as ‘ including emerging evidence in real time without increasing type-1 error ’. There is no other statistical approach in use in meta-analysis for which retaining type-1 error control is that simple*. Other methods need a maximum sample size, an alpha-spending function, a stopping rule to enforce on the trials, and we are convinced that these are considerations that are a lot less simple than what we propose. We have added a section to the Version 2 of the paper making explicit that this lack of sample size restrictions is a unique feature of ALL-IN meta-analysis that does not hold for any other statistical approach in use. The introduction section on Statistics now states (references in the paper): Bottom-up living meta-analysis The lack of restrictions on sample size is a unique feature of ALL-IN meta-analysis that sets it apart from other statistical approaches to living systematic reviews , ‘adaptive’ (Tierney et al., 2021) or ‘real-time’ (Petkova et al, 2020) prospective meta-analysis and ‘metatrials’ (Van Haren et al, 2020). These other approaches require a maximum sample size or a maximum number of studies to guarantee type-I error control and interval coverage for all updates of the meta-analysis. The crucial difference with ALL-IN is that for those methods to be valid, participating in a prospective meta-analysis requires outside control over a trial’s data collection. Outside control is needed in the case of the ‘Framework for prospective adaptive meta-analysis’ (FAME) where a single sample size is set comprised of data from all ongoing trials, possibly at an interim of some of them, and single-analysis statistics is used. The consequence is that the intervals and p-values reported are only valid if never updated, so the analysis assumes that data collection stops after that point. No new analyses are valid after that maximum sample size, since all alpha is spent and possibly accumulation bias is introduced (Ter Schure & Grünwald, 2019). Similarly, outside control is needed in the operationalization of group-sequential or alpha-spending methods that need the information size relative to the final analysis (Simmonds et al, 2017), and therefore also need to enforce such maximum sample size for the reported results to be valid. Finally, even in in simulation-calibrated Bayesian approaches restrictions on interims (often) and maximum sample size (always) are imposed for the simulations to terminate. So the frequentist operating characteristics (type-I error, coverage of intervals) of the analysis are not known if trials wish to continue data collection after that maximum sample size and analyze again, or in other ways deviate from the simulated scenarios. Hence all available methods in living systematic reviews and prospective meta-analysis are by design quite top-down, “using preestablished stopping rules for safety, efficacy, futility, and harm” (Petkova et al, 2020). With regard to simplicity of implementation, we have decided to extend Section 3 on Collaboration with a complete worked-out and reproducible example of an ALL-IN meta-analysis. This shows, among other things, that the simplicity of the statistics strengthens the collaborations when participating trials are faced with difficulties that would otherwise invalidate the analysis. Examples of these include, recruitment difficulties, delays in starting the trial, and meta-analysis results that might make a trial change course and directly influence the future sample size in an unforeseen way. We will address the first, second and final point in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Ewelina Rogozinska that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our V2 will be accepted without any further reservations. Firstly We wrote our paper from the hopeful perspective that many good things in science that came from the COVID-19 pandemic are here to stay, like an increase in prospective meta-analysis, data sharing and standardization of e.g. outcome measures and diagnostics. We also wrote our paper for an audience that agrees that they should. But there is nothing in the ALL-IN method that would prevent its use in a retrospective setting of meta-analysis based on summary statistics from published papers. The presentation based on Z -scores, and the worked-out example that we added in Section 3 show that, even though we favor IPD meta-analysis or collaboratively sharing summary statistics (the exact e -value itself for example), an approximate analysis often is already very good and flexible. We added the following text to the abstract: “ Because the intention of the analysis does not change the validity of the results, the results of the analysis can change the intentions (‘optional stopping’ and ‘optional continuation’ based on the results so far). On the one hand: any analysis can be turned into a living one, or even become ‘real-time’ by updating with new trial data or even including interim data from trials that are still ongoing – without any changes in the cut-offs for testing or the method for interval estimation. On the other hand: no stopping rule needs to be enforced for the analysis to remain valid, so participating in a prospective meta-analysis does not require outside control over data collection.” This characteristic of ALL-IN meta-analysis might even help further popularize these good things in science that came from the pandemic. Secondly We wrote our paper for an audience that considers sequential statistics to be complicated. We believe that is still our main audience because most clinical trials are not analyzed with interim analyses and most meta-analyses updates don’t take into account that repeated analyses invalidate both the p -value and the confidence intervals reported. While spending the better part of the past ten years on sequential analysis, we ourselves consider most of sequential statistics to be complicated. This is why we think that our audience would be confused if we just provided a ‘cook-book’ of ALL-IN methods and simply state that type-I error control and coverage of intervals is guaranteed, while using the same method over and over again. We believe that a strong intuition is needed for an audience to accept that this can be done without any corrections to the method. The game theory terminology ís that strong intuition. While absent in most of applied statistics today, this intuition has always been around in the history of probability theory (e.g. the work of Christiaan Huygens on games in the 17 th century), and sequential statistics (e.g. George Alfred Barnard’s discussion of Wald’s probability ratio test in 1947). In the currently thriving literature on e -values and anytime-valid confidence intervals in mathematical statistics, the intuition of games is present almost everywhere and serves both as intuition to present existing results and as a very fruitful analogy that drives new results, see for example the overview article by Ramdas, Grünwald, Vovk and Shafer (2023). However, we agree that a good discussion is possible on whether references to betting and investments benefit intuition about statistics in general. We think they can, but as you can see from the discussion following Glenn Shafer’s JRSSA paper about is, there is plenty of disagreement. Yet for intuition about the sequential nature of ALL-IN meta-analysis, we do feel that the references to betting are essential for intuition, and the method should not be presented to readers familiar with the difficulties of sequential analysis without providing this intuition. Finally Our extension of the BCG example now shows more clearly that the ALL-IN method can be used on IPD (based on exact e -values), or can be used on summary statistics (in this example: the logrank Z-score and number of events). In fact, liaising with trials to share summary statistics can also be very fruitful just like in the FAME approach (e.g. sharing the exact e-value per trial). All the analyses of the BCG example can be reproduced based on openly available summary statistics. We hope Ewelina Rogozinska agrees that this illustrates how aggregate-level evidence synthesis is just as promising with ALL-IN methods as IPD based projects. We agree that future work should compare ALL-IN to other methods. For this introduction paper, we now focus on the settings where such a comparison is of less concern, as we write in the abstract: if the analysis changes the intentions. Take for example the situation in which a research team decides to update a meta-analysis and commit to a living systematic review, and after a first update decides to liaise with future trials in a prospective meta-analysis for future updates on interim results. In that situation there simply is no other statistical technique that guarantees type-I error control and confidence interval coverage, because ‘optional continuation’ decisions are made that are not prespecified and that no stopping rule-based method can handle. Hence ALL-IN breathes life into living systematic reviews in a way that no other method can. Cochrane Scientific Committee Schmid, C., Senn, S., Sterne, J., Kulinskaya, E., Posch, M., Roes, K., and McKenzie, J. (2018). Should Cochrane apply error-adjustment methods when conducting repeated meta-analyses? *It can be that simple in a fully subjective Bayesian analysis, but we have not seen any of those in use. Bayesian analysis in practice uses default priors (and possibly also informative priors, but never exclusively) and checks for frequentist operating characteristics by simulations. Aaditya Ramdas, Peter Grünwald, Vladimir Vovk, Glenn Shafer "Game-Theoretic Statistics and Safe Anytime-Valid Inference," Statistical Science, Statist. Sci. 38(4), 576-601, (November 2023) Glenn Shafer, Testing by Betting: A Strategy for Statistical and Scientific Communication, Journal of the Royal Statistical Society Series A: Statistics in Society , Volume 184, Issue 2, April 2021, Pages 407–431, https://doi.org/10.1111/rssa.12647 We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Ewelina Rogozinska for her review and for supporting our take on efficiency, collaboration and communication. We also thank her for expressing her concerns about our hopeful, but perhaps naïve, statements of simplicity in the setting of sequential meta-analysis. We indeed do not have any empirical evidence that what we propose is simple to understand. What we do know is that other available approaches to control type-I error rates in living systematic reviews have proven to be very difficult (Simmonds et al, 2017), hardly ever used and even, for general use, were recommended against by a scientific committee advising Cochrane on the matter in 2018 (reference below). Our main argument for simplicity is nicely pointed out by reviewer Shubhendu Trivedi, writing ‘ we can simply collect the Z statistics Z_i from each trial, which can easily be combined (as shown in equation 3). Notably, if we add in interim data, then from the perspective of Ville's inequality, they are indistinguishable for testing. ’ So ALL-IN meta-analysis needs the same summary statistics as default Z-score approaches in conventional meta-analysis, and in terms of implementation that is indeed not simpler, but also not harder than conventional meta-analysis. The major point is that the procedure is the same whether the meta-analysis is the first analysis, or whether it is the 10 th update in a living systematic review, or whether it is the 100 th update in a prospective meta-analysis of interim results of ongoing trials. The latter nicely summarized by Rogozinska as ‘ including emerging evidence in real time without increasing type-1 error ’. There is no other statistical approach in use in meta-analysis for which retaining type-1 error control is that simple*. Other methods need a maximum sample size, an alpha-spending function, a stopping rule to enforce on the trials, and we are convinced that these are considerations that are a lot less simple than what we propose. We have added a section to the Version 2 of the paper making explicit that this lack of sample size restrictions is a unique feature of ALL-IN meta-analysis that does not hold for any other statistical approach in use. The introduction section on Statistics now states (references in the paper): Bottom-up living meta-analysis The lack of restrictions on sample size is a unique feature of ALL-IN meta-analysis that sets it apart from other statistical approaches to living systematic reviews , ‘adaptive’ (Tierney et al., 2021) or ‘real-time’ (Petkova et al, 2020) prospective meta-analysis and ‘metatrials’ (Van Haren et al, 2020). These other approaches require a maximum sample size or a maximum number of studies to guarantee type-I error control and interval coverage for all updates of the meta-analysis. The crucial difference with ALL-IN is that for those methods to be valid, participating in a prospective meta-analysis requires outside control over a trial’s data collection. Outside control is needed in the case of the ‘Framework for prospective adaptive meta-analysis’ (FAME) where a single sample size is set comprised of data from all ongoing trials, possibly at an interim of some of them, and single-analysis statistics is used. The consequence is that the intervals and p-values reported are only valid if never updated, so the analysis assumes that data collection stops after that point. No new analyses are valid after that maximum sample size, since all alpha is spent and possibly accumulation bias is introduced (Ter Schure & Grünwald, 2019). Similarly, outside control is needed in the operationalization of group-sequential or alpha-spending methods that need the information size relative to the final analysis (Simmonds et al, 2017), and therefore also need to enforce such maximum sample size for the reported results to be valid. Finally, even in in simulation-calibrated Bayesian approaches restrictions on interims (often) and maximum sample size (always) are imposed for the simulations to terminate. So the frequentist operating characteristics (type-I error, coverage of intervals) of the analysis are not known if trials wish to continue data collection after that maximum sample size and analyze again, or in other ways deviate from the simulated scenarios. Hence all available methods in living systematic reviews and prospective meta-analysis are by design quite top-down, “using preestablished stopping rules for safety, efficacy, futility, and harm” (Petkova et al, 2020). With regard to simplicity of implementation, we have decided to extend Section 3 on Collaboration with a complete worked-out and reproducible example of an ALL-IN meta-analysis. This shows, among other things, that the simplicity of the statistics strengthens the collaborations when participating trials are faced with difficulties that would otherwise invalidate the analysis. Examples of these include, recruitment difficulties, delays in starting the trial, and meta-analysis results that might make a trial change course and directly influence the future sample size in an unforeseen way. We will address the first, second and final point in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Ewelina Rogozinska that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our V2 will be accepted without any further reservations. Firstly We wrote our paper from the hopeful perspective that many good things in science that came from the COVID-19 pandemic are here to stay, like an increase in prospective meta-analysis, data sharing and standardization of e.g. outcome measures and diagnostics. We also wrote our paper for an audience that agrees that they should. But there is nothing in the ALL-IN method that would prevent its use in a retrospective setting of meta-analysis based on summary statistics from published papers. The presentation based on Z -scores, and the worked-out example that we added in Section 3 show that, even though we favor IPD meta-analysis or collaboratively sharing summary statistics (the exact e -value itself for example), an approximate analysis often is already very good and flexible. We added the following text to the abstract: “ Because the intention of the analysis does not change the validity of the results, the results of the analysis can change the intentions (‘optional stopping’ and ‘optional continuation’ based on the results so far). On the one hand: any analysis can be turned into a living one, or even become ‘real-time’ by updating with new trial data or even including interim data from trials that are still ongoing – without any changes in the cut-offs for testing or the method for interval estimation. On the other hand: no stopping rule needs to be enforced for the analysis to remain valid, so participating in a prospective meta-analysis does not require outside control over data collection.” This characteristic of ALL-IN meta-analysis might even help further popularize these good things in science that came from the pandemic. Secondly We wrote our paper for an audience that considers sequential statistics to be complicated. We believe that is still our main audience because most clinical trials are not analyzed with interim analyses and most meta-analyses updates don’t take into account that repeated analyses invalidate both the p -value and the confidence intervals reported. While spending the better part of the past ten years on sequential analysis, we ourselves consider most of sequential statistics to be complicated. This is why we think that our audience would be confused if we just provided a ‘cook-book’ of ALL-IN methods and simply state that type-I error control and coverage of intervals is guaranteed, while using the same method over and over again. We believe that a strong intuition is needed for an audience to accept that this can be done without any corrections to the method. The game theory terminology ís that strong intuition. While absent in most of applied statistics today, this intuition has always been around in the history of probability theory (e.g. the work of Christiaan Huygens on games in the 17 th century), and sequential statistics (e.g. George Alfred Barnard’s discussion of Wald’s probability ratio test in 1947). In the currently thriving literature on e -values and anytime-valid confidence intervals in mathematical statistics, the intuition of games is present almost everywhere and serves both as intuition to present existing results and as a very fruitful analogy that drives new results, see for example the overview article by Ramdas, Grünwald, Vovk and Shafer (2023). However, we agree that a good discussion is possible on whether references to betting and investments benefit intuition about statistics in general. We think they can, but as you can see from the discussion following Glenn Shafer’s JRSSA paper about is, there is plenty of disagreement. Yet for intuition about the sequential nature of ALL-IN meta-analysis, we do feel that the references to betting are essential for intuition, and the method should not be presented to readers familiar with the difficulties of sequential analysis without providing this intuition. Finally Our extension of the BCG example now shows more clearly that the ALL-IN method can be used on IPD (based on exact e -values), or can be used on summary statistics (in this example: the logrank Z-score and number of events). In fact, liaising with trials to share summary statistics can also be very fruitful just like in the FAME approach (e.g. sharing the exact e-value per trial). All the analyses of the BCG example can be reproduced based on openly available summary statistics. We hope Ewelina Rogozinska agrees that this illustrates how aggregate-level evidence synthesis is just as promising with ALL-IN methods as IPD based projects. We agree that future work should compare ALL-IN to other methods. For this introduction paper, we now focus on the settings where such a comparison is of less concern, as we write in the abstract: if the analysis changes the intentions. Take for example the situation in which a research team decides to update a meta-analysis and commit to a living systematic review, and after a first update decides to liaise with future trials in a prospective meta-analysis for future updates on interim results. In that situation there simply is no other statistical technique that guarantees type-I error control and confidence interval coverage, because ‘optional continuation’ decisions are made that are not prespecified and that no stopping rule-based method can handle. Hence ALL-IN breathes life into living systematic reviews in a way that no other method can. Cochrane Scientific Committee Schmid, C., Senn, S., Sterne, J., Kulinskaya, E., Posch, M., Roes, K., and McKenzie, J. (2018). Should Cochrane apply error-adjustment methods when conducting repeated meta-analyses? *It can be that simple in a fully subjective Bayesian analysis, but we have not seen any of those in use. Bayesian analysis in practice uses default priors (and possibly also informative priors, but never exclusively) and checks for frequentist operating characteristics by simulations. Aaditya Ramdas, Peter Grünwald, Vladimir Vovk, Glenn Shafer "Game-Theoretic Statistics and Safe Anytime-Valid Inference," Statistical Science, Statist. Sci. 38(4), 576-601, (November 2023) Glenn Shafer, Testing by Betting: A Strategy for Statistical and Scientific Communication, Journal of the Royal Statistical Society Series A: Statistics in Society , Volume 184, Issue 2, April 2021, Pages 407–431, https://doi.org/10.1111/rssa.12647 Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 19 Jun 2025 Judith ter Schure , Machine Learning, CWI, Amsterdam, The Netherlands 19 Jun 2025 Author Response We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates ... Continue reading We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Ewelina Rogozinska for her review and for supporting our take on efficiency, collaboration and communication. We also thank her for expressing her concerns about our hopeful, but perhaps naïve, statements of simplicity in the setting of sequential meta-analysis. We indeed do not have any empirical evidence that what we propose is simple to understand. What we do know is that other available approaches to control type-I error rates in living systematic reviews have proven to be very difficult (Simmonds et al, 2017), hardly ever used and even, for general use, were recommended against by a scientific committee advising Cochrane on the matter in 2018 (reference below). Our main argument for simplicity is nicely pointed out by reviewer Shubhendu Trivedi, writing ‘ we can simply collect the Z statistics Z_i from each trial, which can easily be combined (as shown in equation 3). Notably, if we add in interim data, then from the perspective of Ville's inequality, they are indistinguishable for testing. ’ So ALL-IN meta-analysis needs the same summary statistics as default Z-score approaches in conventional meta-analysis, and in terms of implementation that is indeed not simpler, but also not harder than conventional meta-analysis. The major point is that the procedure is the same whether the meta-analysis is the first analysis, or whether it is the 10 th update in a living systematic review, or whether it is the 100 th update in a prospective meta-analysis of interim results of ongoing trials. The latter nicely summarized by Rogozinska as ‘ including emerging evidence in real time without increasing type-1 error ’. There is no other statistical approach in use in meta-analysis for which retaining type-1 error control is that simple*. Other methods need a maximum sample size, an alpha-spending function, a stopping rule to enforce on the trials, and we are convinced that these are considerations that are a lot less simple than what we propose. We have added a section to the Version 2 of the paper making explicit that this lack of sample size restrictions is a unique feature of ALL-IN meta-analysis that does not hold for any other statistical approach in use. The introduction section on Statistics now states (references in the paper): Bottom-up living meta-analysis The lack of restrictions on sample size is a unique feature of ALL-IN meta-analysis that sets it apart from other statistical approaches to living systematic reviews , ‘adaptive’ (Tierney et al., 2021) or ‘real-time’ (Petkova et al, 2020) prospective meta-analysis and ‘metatrials’ (Van Haren et al, 2020). These other approaches require a maximum sample size or a maximum number of studies to guarantee type-I error control and interval coverage for all updates of the meta-analysis. The crucial difference with ALL-IN is that for those methods to be valid, participating in a prospective meta-analysis requires outside control over a trial’s data collection. Outside control is needed in the case of the ‘Framework for prospective adaptive meta-analysis’ (FAME) where a single sample size is set comprised of data from all ongoing trials, possibly at an interim of some of them, and single-analysis statistics is used. The consequence is that the intervals and p-values reported are only valid if never updated, so the analysis assumes that data collection stops after that point. No new analyses are valid after that maximum sample size, since all alpha is spent and possibly accumulation bias is introduced (Ter Schure & Grünwald, 2019). Similarly, outside control is needed in the operationalization of group-sequential or alpha-spending methods that need the information size relative to the final analysis (Simmonds et al, 2017), and therefore also need to enforce such maximum sample size for the reported results to be valid. Finally, even in in simulation-calibrated Bayesian approaches restrictions on interims (often) and maximum sample size (always) are imposed for the simulations to terminate. So the frequentist operating characteristics (type-I error, coverage of intervals) of the analysis are not known if trials wish to continue data collection after that maximum sample size and analyze again, or in other ways deviate from the simulated scenarios. Hence all available methods in living systematic reviews and prospective meta-analysis are by design quite top-down, “using preestablished stopping rules for safety, efficacy, futility, and harm” (Petkova et al, 2020). With regard to simplicity of implementation, we have decided to extend Section 3 on Collaboration with a complete worked-out and reproducible example of an ALL-IN meta-analysis. This shows, among other things, that the simplicity of the statistics strengthens the collaborations when participating trials are faced with difficulties that would otherwise invalidate the analysis. Examples of these include, recruitment difficulties, delays in starting the trial, and meta-analysis results that might make a trial change course and directly influence the future sample size in an unforeseen way. We will address the first, second and final point in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Ewelina Rogozinska that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our V2 will be accepted without any further reservations. Firstly We wrote our paper from the hopeful perspective that many good things in science that came from the COVID-19 pandemic are here to stay, like an increase in prospective meta-analysis, data sharing and standardization of e.g. outcome measures and diagnostics. We also wrote our paper for an audience that agrees that they should. But there is nothing in the ALL-IN method that would prevent its use in a retrospective setting of meta-analysis based on summary statistics from published papers. The presentation based on Z -scores, and the worked-out example that we added in Section 3 show that, even though we favor IPD meta-analysis or collaboratively sharing summary statistics (the exact e -value itself for example), an approximate analysis often is already very good and flexible. We added the following text to the abstract: “ Because the intention of the analysis does not change the validity of the results, the results of the analysis can change the intentions (‘optional stopping’ and ‘optional continuation’ based on the results so far). On the one hand: any analysis can be turned into a living one, or even become ‘real-time’ by updating with new trial data or even including interim data from trials that are still ongoing – without any changes in the cut-offs for testing or the method for interval estimation. On the other hand: no stopping rule needs to be enforced for the analysis to remain valid, so participating in a prospective meta-analysis does not require outside control over data collection.” This characteristic of ALL-IN meta-analysis might even help further popularize these good things in science that came from the pandemic. Secondly We wrote our paper for an audience that considers sequential statistics to be complicated. We believe that is still our main audience because most clinical trials are not analyzed with interim analyses and most meta-analyses updates don’t take into account that repeated analyses invalidate both the p -value and the confidence intervals reported. While spending the better part of the past ten years on sequential analysis, we ourselves consider most of sequential statistics to be complicated. This is why we think that our audience would be confused if we just provided a ‘cook-book’ of ALL-IN methods and simply state that type-I error control and coverage of intervals is guaranteed, while using the same method over and over again. We believe that a strong intuition is needed for an audience to accept that this can be done without any corrections to the method. The game theory terminology ís that strong intuition. While absent in most of applied statistics today, this intuition has always been around in the history of probability theory (e.g. the work of Christiaan Huygens on games in the 17 th century), and sequential statistics (e.g. George Alfred Barnard’s discussion of Wald’s probability ratio test in 1947). In the currently thriving literature on e -values and anytime-valid confidence intervals in mathematical statistics, the intuition of games is present almost everywhere and serves both as intuition to present existing results and as a very fruitful analogy that drives new results, see for example the overview article by Ramdas, Grünwald, Vovk and Shafer (2023). However, we agree that a good discussion is possible on whether references to betting and investments benefit intuition about statistics in general. We think they can, but as you can see from the discussion following Glenn Shafer’s JRSSA paper about is, there is plenty of disagreement. Yet for intuition about the sequential nature of ALL-IN meta-analysis, we do feel that the references to betting are essential for intuition, and the method should not be presented to readers familiar with the difficulties of sequential analysis without providing this intuition. Finally Our extension of the BCG example now shows more clearly that the ALL-IN method can be used on IPD (based on exact e -values), or can be used on summary statistics (in this example: the logrank Z-score and number of events). In fact, liaising with trials to share summary statistics can also be very fruitful just like in the FAME approach (e.g. sharing the exact e-value per trial). All the analyses of the BCG example can be reproduced based on openly available summary statistics. We hope Ewelina Rogozinska agrees that this illustrates how aggregate-level evidence synthesis is just as promising with ALL-IN methods as IPD based projects. We agree that future work should compare ALL-IN to other methods. For this introduction paper, we now focus on the settings where such a comparison is of less concern, as we write in the abstract: if the analysis changes the intentions. Take for example the situation in which a research team decides to update a meta-analysis and commit to a living systematic review, and after a first update decides to liaise with future trials in a prospective meta-analysis for future updates on interim results. In that situation there simply is no other statistical technique that guarantees type-I error control and confidence interval coverage, because ‘optional continuation’ decisions are made that are not prespecified and that no stopping rule-based method can handle. Hence ALL-IN breathes life into living systematic reviews in a way that no other method can. Cochrane Scientific Committee Schmid, C., Senn, S., Sterne, J., Kulinskaya, E., Posch, M., Roes, K., and McKenzie, J. (2018). Should Cochrane apply error-adjustment methods when conducting repeated meta-analyses? *It can be that simple in a fully subjective Bayesian analysis, but we have not seen any of those in use. Bayesian analysis in practice uses default priors (and possibly also informative priors, but never exclusively) and checks for frequentist operating characteristics by simulations. Aaditya Ramdas, Peter Grünwald, Vladimir Vovk, Glenn Shafer "Game-Theoretic Statistics and Safe Anytime-Valid Inference," Statistical Science, Statist. Sci. 38(4), 576-601, (November 2023) Glenn Shafer, Testing by Betting: A Strategy for Statistical and Scientific Communication, Journal of the Royal Statistical Society Series A: Statistics in Society , Volume 184, Issue 2, April 2021, Pages 407–431, https://doi.org/10.1111/rssa.12647 We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Ewelina Rogozinska for her review and for supporting our take on efficiency, collaboration and communication. We also thank her for expressing her concerns about our hopeful, but perhaps naïve, statements of simplicity in the setting of sequential meta-analysis. We indeed do not have any empirical evidence that what we propose is simple to understand. What we do know is that other available approaches to control type-I error rates in living systematic reviews have proven to be very difficult (Simmonds et al, 2017), hardly ever used and even, for general use, were recommended against by a scientific committee advising Cochrane on the matter in 2018 (reference below). Our main argument for simplicity is nicely pointed out by reviewer Shubhendu Trivedi, writing ‘ we can simply collect the Z statistics Z_i from each trial, which can easily be combined (as shown in equation 3). Notably, if we add in interim data, then from the perspective of Ville's inequality, they are indistinguishable for testing. ’ So ALL-IN meta-analysis needs the same summary statistics as default Z-score approaches in conventional meta-analysis, and in terms of implementation that is indeed not simpler, but also not harder than conventional meta-analysis. The major point is that the procedure is the same whether the meta-analysis is the first analysis, or whether it is the 10 th update in a living systematic review, or whether it is the 100 th update in a prospective meta-analysis of interim results of ongoing trials. The latter nicely summarized by Rogozinska as ‘ including emerging evidence in real time without increasing type-1 error ’. There is no other statistical approach in use in meta-analysis for which retaining type-1 error control is that simple*. Other methods need a maximum sample size, an alpha-spending function, a stopping rule to enforce on the trials, and we are convinced that these are considerations that are a lot less simple than what we propose. We have added a section to the Version 2 of the paper making explicit that this lack of sample size restrictions is a unique feature of ALL-IN meta-analysis that does not hold for any other statistical approach in use. The introduction section on Statistics now states (references in the paper): Bottom-up living meta-analysis The lack of restrictions on sample size is a unique feature of ALL-IN meta-analysis that sets it apart from other statistical approaches to living systematic reviews , ‘adaptive’ (Tierney et al., 2021) or ‘real-time’ (Petkova et al, 2020) prospective meta-analysis and ‘metatrials’ (Van Haren et al, 2020). These other approaches require a maximum sample size or a maximum number of studies to guarantee type-I error control and interval coverage for all updates of the meta-analysis. The crucial difference with ALL-IN is that for those methods to be valid, participating in a prospective meta-analysis requires outside control over a trial’s data collection. Outside control is needed in the case of the ‘Framework for prospective adaptive meta-analysis’ (FAME) where a single sample size is set comprised of data from all ongoing trials, possibly at an interim of some of them, and single-analysis statistics is used. The consequence is that the intervals and p-values reported are only valid if never updated, so the analysis assumes that data collection stops after that point. No new analyses are valid after that maximum sample size, since all alpha is spent and possibly accumulation bias is introduced (Ter Schure & Grünwald, 2019). Similarly, outside control is needed in the operationalization of group-sequential or alpha-spending methods that need the information size relative to the final analysis (Simmonds et al, 2017), and therefore also need to enforce such maximum sample size for the reported results to be valid. Finally, even in in simulation-calibrated Bayesian approaches restrictions on interims (often) and maximum sample size (always) are imposed for the simulations to terminate. So the frequentist operating characteristics (type-I error, coverage of intervals) of the analysis are not known if trials wish to continue data collection after that maximum sample size and analyze again, or in other ways deviate from the simulated scenarios. Hence all available methods in living systematic reviews and prospective meta-analysis are by design quite top-down, “using preestablished stopping rules for safety, efficacy, futility, and harm” (Petkova et al, 2020). With regard to simplicity of implementation, we have decided to extend Section 3 on Collaboration with a complete worked-out and reproducible example of an ALL-IN meta-analysis. This shows, among other things, that the simplicity of the statistics strengthens the collaborations when participating trials are faced with difficulties that would otherwise invalidate the analysis. Examples of these include, recruitment difficulties, delays in starting the trial, and meta-analysis results that might make a trial change course and directly influence the future sample size in an unforeseen way. We will address the first, second and final point in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Ewelina Rogozinska that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our V2 will be accepted without any further reservations. Firstly We wrote our paper from the hopeful perspective that many good things in science that came from the COVID-19 pandemic are here to stay, like an increase in prospective meta-analysis, data sharing and standardization of e.g. outcome measures and diagnostics. We also wrote our paper for an audience that agrees that they should. But there is nothing in the ALL-IN method that would prevent its use in a retrospective setting of meta-analysis based on summary statistics from published papers. The presentation based on Z -scores, and the worked-out example that we added in Section 3 show that, even though we favor IPD meta-analysis or collaboratively sharing summary statistics (the exact e -value itself for example), an approximate analysis often is already very good and flexible. We added the following text to the abstract: “ Because the intention of the analysis does not change the validity of the results, the results of the analysis can change the intentions (‘optional stopping’ and ‘optional continuation’ based on the results so far). On the one hand: any analysis can be turned into a living one, or even become ‘real-time’ by updating with new trial data or even including interim data from trials that are still ongoing – without any changes in the cut-offs for testing or the method for interval estimation. On the other hand: no stopping rule needs to be enforced for the analysis to remain valid, so participating in a prospective meta-analysis does not require outside control over data collection.” This characteristic of ALL-IN meta-analysis might even help further popularize these good things in science that came from the pandemic. Secondly We wrote our paper for an audience that considers sequential statistics to be complicated. We believe that is still our main audience because most clinical trials are not analyzed with interim analyses and most meta-analyses updates don’t take into account that repeated analyses invalidate both the p -value and the confidence intervals reported. While spending the better part of the past ten years on sequential analysis, we ourselves consider most of sequential statistics to be complicated. This is why we think that our audience would be confused if we just provided a ‘cook-book’ of ALL-IN methods and simply state that type-I error control and coverage of intervals is guaranteed, while using the same method over and over again. We believe that a strong intuition is needed for an audience to accept that this can be done without any corrections to the method. The game theory terminology ís that strong intuition. While absent in most of applied statistics today, this intuition has always been around in the history of probability theory (e.g. the work of Christiaan Huygens on games in the 17 th century), and sequential statistics (e.g. George Alfred Barnard’s discussion of Wald’s probability ratio test in 1947). In the currently thriving literature on e -values and anytime-valid confidence intervals in mathematical statistics, the intuition of games is present almost everywhere and serves both as intuition to present existing results and as a very fruitful analogy that drives new results, see for example the overview article by Ramdas, Grünwald, Vovk and Shafer (2023). However, we agree that a good discussion is possible on whether references to betting and investments benefit intuition about statistics in general. We think they can, but as you can see from the discussion following Glenn Shafer’s JRSSA paper about is, there is plenty of disagreement. Yet for intuition about the sequential nature of ALL-IN meta-analysis, we do feel that the references to betting are essential for intuition, and the method should not be presented to readers familiar with the difficulties of sequential analysis without providing this intuition. Finally Our extension of the BCG example now shows more clearly that the ALL-IN method can be used on IPD (based on exact e -values), or can be used on summary statistics (in this example: the logrank Z-score and number of events). In fact, liaising with trials to share summary statistics can also be very fruitful just like in the FAME approach (e.g. sharing the exact e-value per trial). All the analyses of the BCG example can be reproduced based on openly available summary statistics. We hope Ewelina Rogozinska agrees that this illustrates how aggregate-level evidence synthesis is just as promising with ALL-IN methods as IPD based projects. We agree that future work should compare ALL-IN to other methods. For this introduction paper, we now focus on the settings where such a comparison is of less concern, as we write in the abstract: if the analysis changes the intentions. Take for example the situation in which a research team decides to update a meta-analysis and commit to a living systematic review, and after a first update decides to liaise with future trials in a prospective meta-analysis for future updates on interim results. In that situation there simply is no other statistical technique that guarantees type-I error control and confidence interval coverage, because ‘optional continuation’ decisions are made that are not prespecified and that no stopping rule-based method can handle. Hence ALL-IN breathes life into living systematic reviews in a way that no other method can. Cochrane Scientific Committee Schmid, C., Senn, S., Sterne, J., Kulinskaya, E., Posch, M., Roes, K., and McKenzie, J. (2018). Should Cochrane apply error-adjustment methods when conducting repeated meta-analyses? *It can be that simple in a fully subjective Bayesian analysis, but we have not seen any of those in use. Bayesian analysis in practice uses default priors (and possibly also informative priors, but never exclusively) and checks for frequentist operating characteristics by simulations. Aaditya Ramdas, Peter Grünwald, Vladimir Vovk, Glenn Shafer "Game-Theoretic Statistics and Safe Anytime-Valid Inference," Statistical Science, Statist. Sci. 38(4), 576-601, (November 2023) Glenn Shafer, Testing by Betting: A Strategy for Statistical and Scientific Communication, Journal of the Royal Statistical Society Series A: Statistics in Society , Volume 184, Issue 2, April 2021, Pages 407–431, https://doi.org/10.1111/rssa.12647 Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Wang J. Reviewer Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.77953.r145459 ) The direct URL for this report is: https://f1000research.com/articles/11-549/v1#referee-response-145459 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 19 Aug 2022 Junfeng Wang , Division of Pharmacoepidemiology and Clinical Pharmacology, Utrecht Institute for Pharmaceutical Sciences, Utrecht University, Utrecht, The Netherlands Approved with Reservations VIEWS 0 https://doi.org/10.5256/f1000research.77953.r145459 In this manuscript, ter Schure and Grünwald proposed a new form of meta-analysis, namely ALL-IN meta-analysis, which can facilitate living systematic reviews. The authors provided a different understanding of evidence synthesis, and a novel way of performing ... Continue reading READ ALL In this manuscript, ter Schure and Grünwald proposed a new form of meta-analysis, namely ALL-IN meta-analysis, which can facilitate living systematic reviews. The authors provided a different understanding of evidence synthesis, and a novel way of performing meta-analyses. The new method is illustrated in the language of betting, which is complement with the acronym of ALL-IN. Major comments: The calculation of probability of next event in group X (e.g. in page 3, the fraction of 0.41 of COVID-19 events to occur in the vaccine group, 0.41=70/(100+70)), is only valid when the number of participants is infinite in each group. When the number of participants still at risk in one group decreases, the probability of next event occurring in this group will also decrease. So the constant probability assumption may not be valid in real practice. In page 14 and 15, the authors mentioned heterogeneity several times, however, the authors avoid giving a direct answer or solution to heterogeneous results from primary studies. This should be addressed or at least extensively discussed (as a limitation). The COVID-19 vaccine trials are used as an example. However, all these trials are conducted recently and in a short time period (compared to other treatment). How is the generalizability of this new methods in other treatments? The method is easily extended to IPD meta-analysis. How well this method can be extended to network meta-analysis of multiple treatments? How can covariates, both on study level and individual level, be adjusted in this new framework? Minor comments: In Page 7, section 1.1, the equation “70/170*170/70 + 100/170*170/100 = 1”, seems not correct, the right side should be 2. But I assume the authors forget adding something in the left side of the equation. Please check. “Betting on vaccine” in this paper actually means betting on the next event will occur in the vaccine group (which means vaccine is not effective). It can be a bit misleading, since in common sense, reader may think betting on vaccine means betting on vaccine can protect participants. Is the rationale for developing the new method (or application) clearly explained? Yes Is the description of the method technically sound? Partly Are sufficient details provided to allow replication of the method development and its use by others? Yes If any results are presented, are all the source data underlying the results available to ensure full reproducibility? No source data required Are the conclusions about the method and its performance adequately supported by the findings presented in the article? Partly Competing Interests: No competing interests were disclosed. Reviewer Expertise: DTA meta-analysis, Clinical prediction models, Health Technology Assessment I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Wang J. Reviewer Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.77953.r145459 ) The direct URL for this report is: https://f1000research.com/articles/11-549/v1#referee-response-145459 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 04 Mar 2025 Judith ter Schure , Machine Learning, CWI, Amsterdam, The Netherlands 04 Mar 2025 Author Response We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates ... Continue reading We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Junfeng Wang for his thorough reading of our paper. We are especially grateful that he spotted a mistake in Section 1.1, pointed out as Minor comment 1, and was so kind to state “ I assume the authors forget adding something ”. We indeed forgot to add the betting strategy, which we added in our Version 2 of the paper (see below at Minor comment 1). We also found a mistake ourselves in Section 1.3: there was a square root missing below equation (6), needing a division by 2 instead of 4 (which comes from the standard deviation of a Bernoulli-0.5 likelihood). ALL-IN can indeed provide a “ different understanding of evidence synthesis, and a novel way of performing meta-analyses ”, as stated by Junfeng Wang. Although ALL-IN is a novel statistical approach to meta-analysis, it does provides statistical summaries for evidence synthesis that can be reported in a familiar way – e-values take the role of p-values, and anytime-valid confidence intervals the role of conventional confidence intervals. Our worked-out example illustrates this in Version 2 of the paper. Yet due to their anytime-validity, this statistical summary can be Live and can be the Leading source of information in decisions on future research, even at INterim stages of clinical trials. So while the statistical approach fits within mainstream understanding of evidence synthesis as an external and retrospective summary, it can also shift that understanding towards evidence synthesis as an active/involved (advising future trials/liaising with ongoing trials) and possibly prospective tool to facilitate collaboration and increase efficiency. In our Version 2 of the paper we have emphasized this aspect more, and moreover, stressed how the approach to prospective meta-analysis differs from other adaptive approaches to prospective analysis like the Framework for Prospective, Adaptive Meta-analysis (FAME), and Bayesian adaptive methods. The main difference is that the validity of the ALL-IN analysis does not rely on enforcing a maximum sample size or stopping rule, such that prospective collaborations can be more bottom-up rather than outsourcing decisions to an external top-down meta-analysis team. This fact needed a better illustration in Section 3 on Collaboration, so we rewrote that section by extending the discussion of our BCG-CORONA example with the results from that ALL-IN meta-analysis, thus including a completely worked-out example to the paper. We will address all further comments in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Junfeng Wang that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our Version 2 will be accepted without any further reservations. Major comments We agree with Junfeng Wang that our representation of the FDA game is approximate and not exact, simplifying the story such that we could calculate betting scores / e -values based on the COVID-19 event counts alone (to keep it simple as an illustrative example). Following Junfeng Wang’s concerns, we will make this more explicit in Version 2 of the paper, after the brief mention halfway the first paragraph on the right side of page 3 ( ‘Most COVID-19 vaccine trials randomized large numbers of participants 50:50 vaccine:placebo such that we can assume that also throughout the trial the participants at risk stayed approximately balanced.’ ). We add the following text and footnote there: This allows for a back-of-the-envelope calculation to reinterpret the design for the COVID-19 vaccine trials in the language of betting\footnote{Note that in Section 1 (specifically at the end of Section 1.1) we discuss how to exactly analyze such time-to-event data using e-values taking into account that the risk set changes after occurrence of events and censoring. We expect such a reanalysis to be very well approximated by the simple calculations that are used for illustration in this introduction of the FDA game, at least for large and balanced trials like the one by Pfizer/BioNtech. Even for trials that are a lot smaller, this approximation is quite good. For example, the exact logrank e-value of 1,88 that we report for the NL trial in Table 1 can be approximated using this back-of-the-envelope calculation for the e-value as folllows: . In this trial only 1496 participants were 50:50 randomized (compared to the 40 thousand in the Pfizer/BioNtech trial), and 96 events observed in the treatment and 110 events in the control group (Ter Schure et al, 2022); testing a 20% VE against a 0% VE.} We do believe that our approximation stays true to the essence of these large vaccine trials. For the Pfizer/BioNtech trial, Polack et al (2020) report a 95% vaccine efficacy based on a beta-binomial model taking into account the confirmed cases per 1000 person-years of follow-up based on 8 cases in the vaccine and 162 cases in the control group. This corresponds very well to a simple calculation (1 – 8/162)*100 = 95% assuming a perfect 50:50 ratio of follow-up time or indeed an infinite number of participants in each group. The footnote points to existing text at the end of Section 1.1, the second paragraph of page 8 (in version 1 of the paper), where we do make a recommendation for actual use going forward. Just like a p -value logrank test, the e -value logrank test takes into account, as well-spotted by Junfeng Wang, that ‘ when the number of participants still at risk in one group decreases, the probability of the next event occurring in this group will also decrease’ . Moreover, the exact e -value logrank test can be used in sequential settings in which the p -value logrank test fails, such as in sequential analysis in unbalanced randomized experiments (where p-value tests cannot be used based on group-sequential or alpha spending methods that rely on Gaussian approximations). As we write in the paper: We will use the logrank Z-statistic as a running example for meta-analysis on summary statistics. For an IPD meta-analysis (on individual patient data), however, we recommend to use the exact e-value logrank test from Ter Schure et al. (2024) that is valid regardless of the randomization (e.g. 1:1 balanced or 1:2 unbalanced), the number of participants at risk, the number of events or the size of the effect. We believe we do address heterogeneity when we state the following on page 14: ‘heterogeneity in their effect sizes (e.g. one 20%, one 50%, one 25%) does not matter for their joint ability to reject the global null hypothesis of no effect in all trials. So for testing the global null, trials are allowed to be heterogeneous in where they are in the space of the alternative hypothesis H 1 = {VE: 20% ≤ VE ≤ 100%}. For estimation, however, it is not clear what the ALL-IN confidence interval is estimating if we assume that the effects in the trials are very different. Still, as a first summary, a typical effect size (Peto, 1987) might be useful if we are unable to estimate a random effects model. The development of confidence sequences for random-effects meta-analysis is a major goal for future work. We do not, however, believe that the evidence in a line of research should be monitored based on whether this interval excludes the null hypothesis, or whether the e-value corresponding to the random-effects null model does: for testing, the global null is much more natural. Waiting for a random-effect model to reach a certain threshold is counter-intuitive, since it might require many small trials to estimate the between-trial variability instead of focusing on testing the treatment effect. Moreover, the goal of rejecting the null hypothesis corresponding to this model can be quite strange. When testing a zero-effect null hypothesis, it assumes that there are true effects of harm and true effects of benefit among the trials and that their mean is exactly zero. ’ Junfeng Wang is right to point out that the COVID-19 vaccine trials are a very special example showing how meta-analysis can be done on interim trial results when many studies are simultaneously ongoing. As also stated in the abstract, ALL-IN meta-analysis can be performed on interim data, but does not have to. The analysis design requires no information about the number of patients in trials or the number of trials eventually included. In Section 1.2 we first give the example where we can collect a Z-statistic for each study out of K studies completed so far, so in a setting of conventional retrospective meta-analysis. What makes ALL-IN meta-analysis special is stated right after: the analysis on interim results is exactly the same. The connection to living systematic reviews is made earlier in the paper, on page 5 where we refer to Simmonds et al (2017). Conventional meta-analysis does not control type-I error rates in living systematic reviews, while ALL-IN meta-analysis does. We do see the need to make this connection more explicit: allowing updates of systematic reviews based on interim data is what “breathes life into living systematic reviews”. Also following another reviewer’s remarks, we will use our example of ALL-IN-META-BCG-CORONA in Section 3 to make more explicit what ALL-IN methods can do that is not possible with any other approach. Future research should definitely focus on whether the method can be extended to network meta-analysis, but we have not conducted such research yet. We are pleased to see that the paper convinced the reviewer that the method is easily extended to IPD meta-analysis. We have added these considerations to the discussion section of the Version 2 paper. Covariate adjustment is a major field of research in the literature on e -values and anytime-valid statistics. We propose to add the following paragraph to the discussion section in Version 2 of the paper: This paper introduces ALL-IN meta-analysis based on Z-score methods of meta-analysis, as introduced in standard works like by Borenstein et al. (2009). We mainly focus on testing rather than estimation. For testing, extensions to IPD meta-analysis based on exact e-values (rather than Z-score approximations) follow easily since both can be combined by multiplication, as shown in our ALL-IN-META-BCG-CORONA example. Within this focus on testing with type-I error control, the heterogeneity question is less explicit, since under the null-hypothesis there is no heterogeneity, as discussed in Section 3.3. In future work we will provide more details on estimation under heterogeneity, with anytime-valid confidence intervals for the fixed-effect (singular), fixed-effects (plural) and random-effects model. Extensions to network meta-analysis seem possible, but are not yet our main direction. With regard to meta-regression, much development can be expected from work on e-values in settings of composite null-hypotheses, for which research is ongoing that extends to linear regression (e.g. Pérez-Ortiz et al., (2024)). For IPD meta-analyses that take covariates into account, analysis of randomized controlled trials can use the so-called ‘Model-X’ e-value approach. Pérez-Ortiz, M. F., Lardy, T., de Heide, R., & Grünwald, P. D. (2024). E-statistics, group invariance and anytime-valid testing. The Annals of Statistics, 52(4), 1410-1432. Grünwald, P., Henzi, A., & Lardy, T. (2024). Anytime-valid tests of conditional independence under model-X. Journal of the American Statistical Association, 119(546), 1554-1565. Minor comments Junfeng is completely right that we forgot something: the betting strategy. We propose to correct the text as follows: For example putting 1/3 on vaccine and 2/3 on placebo: 1/3· 70/170 · 170/70 + 2/3· 100/170 ·170/100 = 1. No matter how we invest in the two outcomes, (e.g. try putting 1/2 on vaccine and 1/2 on placebo, or something different) in expectation under the null we multiply the initial investment by 1 . This is a great comment! Indeed the ones betting on development of an effective vaccine – e.g. the NIH in funding the trials (taking ‘multiple shots on goal’ to just ‘let the chips fall’) – hope to find very few events of COVID-19 in the vaccine group of a clinical trial. We will adjust how we phrase this to eliminate this confusion in Version 2 of the paper. We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Junfeng Wang for his thorough reading of our paper. We are especially grateful that he spotted a mistake in Section 1.1, pointed out as Minor comment 1, and was so kind to state “ I assume the authors forget adding something ”. We indeed forgot to add the betting strategy, which we added in our Version 2 of the paper (see below at Minor comment 1). We also found a mistake ourselves in Section 1.3: there was a square root missing below equation (6), needing a division by 2 instead of 4 (which comes from the standard deviation of a Bernoulli-0.5 likelihood). ALL-IN can indeed provide a “ different understanding of evidence synthesis, and a novel way of performing meta-analyses ”, as stated by Junfeng Wang. Although ALL-IN is a novel statistical approach to meta-analysis, it does provides statistical summaries for evidence synthesis that can be reported in a familiar way – e-values take the role of p-values, and anytime-valid confidence intervals the role of conventional confidence intervals. Our worked-out example illustrates this in Version 2 of the paper. Yet due to their anytime-validity, this statistical summary can be Live and can be the Leading source of information in decisions on future research, even at INterim stages of clinical trials. So while the statistical approach fits within mainstream understanding of evidence synthesis as an external and retrospective summary, it can also shift that understanding towards evidence synthesis as an active/involved (advising future trials/liaising with ongoing trials) and possibly prospective tool to facilitate collaboration and increase efficiency. In our Version 2 of the paper we have emphasized this aspect more, and moreover, stressed how the approach to prospective meta-analysis differs from other adaptive approaches to prospective analysis like the Framework for Prospective, Adaptive Meta-analysis (FAME), and Bayesian adaptive methods. The main difference is that the validity of the ALL-IN analysis does not rely on enforcing a maximum sample size or stopping rule, such that prospective collaborations can be more bottom-up rather than outsourcing decisions to an external top-down meta-analysis team. This fact needed a better illustration in Section 3 on Collaboration, so we rewrote that section by extending the discussion of our BCG-CORONA example with the results from that ALL-IN meta-analysis, thus including a completely worked-out example to the paper. We will address all further comments in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Junfeng Wang that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our Version 2 will be accepted without any further reservations. Major comments We agree with Junfeng Wang that our representation of the FDA game is approximate and not exact, simplifying the story such that we could calculate betting scores / e -values based on the COVID-19 event counts alone (to keep it simple as an illustrative example). Following Junfeng Wang’s concerns, we will make this more explicit in Version 2 of the paper, after the brief mention halfway the first paragraph on the right side of page 3 ( ‘Most COVID-19 vaccine trials randomized large numbers of participants 50:50 vaccine:placebo such that we can assume that also throughout the trial the participants at risk stayed approximately balanced.’ ). We add the following text and footnote there: This allows for a back-of-the-envelope calculation to reinterpret the design for the COVID-19 vaccine trials in the language of betting\footnote{Note that in Section 1 (specifically at the end of Section 1.1) we discuss how to exactly analyze such time-to-event data using e-values taking into account that the risk set changes after occurrence of events and censoring. We expect such a reanalysis to be very well approximated by the simple calculations that are used for illustration in this introduction of the FDA game, at least for large and balanced trials like the one by Pfizer/BioNtech. Even for trials that are a lot smaller, this approximation is quite good. For example, the exact logrank e-value of 1,88 that we report for the NL trial in Table 1 can be approximated using this back-of-the-envelope calculation for the e-value as folllows: . In this trial only 1496 participants were 50:50 randomized (compared to the 40 thousand in the Pfizer/BioNtech trial), and 96 events observed in the treatment and 110 events in the control group (Ter Schure et al, 2022); testing a 20% VE against a 0% VE.} We do believe that our approximation stays true to the essence of these large vaccine trials. For the Pfizer/BioNtech trial, Polack et al (2020) report a 95% vaccine efficacy based on a beta-binomial model taking into account the confirmed cases per 1000 person-years of follow-up based on 8 cases in the vaccine and 162 cases in the control group. This corresponds very well to a simple calculation (1 – 8/162)*100 = 95% assuming a perfect 50:50 ratio of follow-up time or indeed an infinite number of participants in each group. The footnote points to existing text at the end of Section 1.1, the second paragraph of page 8 (in version 1 of the paper), where we do make a recommendation for actual use going forward. Just like a p -value logrank test, the e -value logrank test takes into account, as well-spotted by Junfeng Wang, that ‘ when the number of participants still at risk in one group decreases, the probability of the next event occurring in this group will also decrease’ . Moreover, the exact e -value logrank test can be used in sequential settings in which the p -value logrank test fails, such as in sequential analysis in unbalanced randomized experiments (where p-value tests cannot be used based on group-sequential or alpha spending methods that rely on Gaussian approximations). As we write in the paper: We will use the logrank Z-statistic as a running example for meta-analysis on summary statistics. For an IPD meta-analysis (on individual patient data), however, we recommend to use the exact e-value logrank test from Ter Schure et al. (2024) that is valid regardless of the randomization (e.g. 1:1 balanced or 1:2 unbalanced), the number of participants at risk, the number of events or the size of the effect. We believe we do address heterogeneity when we state the following on page 14: ‘heterogeneity in their effect sizes (e.g. one 20%, one 50%, one 25%) does not matter for their joint ability to reject the global null hypothesis of no effect in all trials. So for testing the global null, trials are allowed to be heterogeneous in where they are in the space of the alternative hypothesis H 1 = {VE: 20% ≤ VE ≤ 100%}. For estimation, however, it is not clear what the ALL-IN confidence interval is estimating if we assume that the effects in the trials are very different. Still, as a first summary, a typical effect size (Peto, 1987) might be useful if we are unable to estimate a random effects model. The development of confidence sequences for random-effects meta-analysis is a major goal for future work. We do not, however, believe that the evidence in a line of research should be monitored based on whether this interval excludes the null hypothesis, or whether the e-value corresponding to the random-effects null model does: for testing, the global null is much more natural. Waiting for a random-effect model to reach a certain threshold is counter-intuitive, since it might require many small trials to estimate the between-trial variability instead of focusing on testing the treatment effect. Moreover, the goal of rejecting the null hypothesis corresponding to this model can be quite strange. When testing a zero-effect null hypothesis, it assumes that there are true effects of harm and true effects of benefit among the trials and that their mean is exactly zero. ’ Junfeng Wang is right to point out that the COVID-19 vaccine trials are a very special example showing how meta-analysis can be done on interim trial results when many studies are simultaneously ongoing. As also stated in the abstract, ALL-IN meta-analysis can be performed on interim data, but does not have to. The analysis design requires no information about the number of patients in trials or the number of trials eventually included. In Section 1.2 we first give the example where we can collect a Z-statistic for each study out of K studies completed so far, so in a setting of conventional retrospective meta-analysis. What makes ALL-IN meta-analysis special is stated right after: the analysis on interim results is exactly the same. The connection to living systematic reviews is made earlier in the paper, on page 5 where we refer to Simmonds et al (2017). Conventional meta-analysis does not control type-I error rates in living systematic reviews, while ALL-IN meta-analysis does. We do see the need to make this connection more explicit: allowing updates of systematic reviews based on interim data is what “breathes life into living systematic reviews”. Also following another reviewer’s remarks, we will use our example of ALL-IN-META-BCG-CORONA in Section 3 to make more explicit what ALL-IN methods can do that is not possible with any other approach. Future research should definitely focus on whether the method can be extended to network meta-analysis, but we have not conducted such research yet. We are pleased to see that the paper convinced the reviewer that the method is easily extended to IPD meta-analysis. We have added these considerations to the discussion section of the Version 2 paper. Covariate adjustment is a major field of research in the literature on e -values and anytime-valid statistics. We propose to add the following paragraph to the discussion section in Version 2 of the paper: This paper introduces ALL-IN meta-analysis based on Z-score methods of meta-analysis, as introduced in standard works like by Borenstein et al. (2009). We mainly focus on testing rather than estimation. For testing, extensions to IPD meta-analysis based on exact e-values (rather than Z-score approximations) follow easily since both can be combined by multiplication, as shown in our ALL-IN-META-BCG-CORONA example. Within this focus on testing with type-I error control, the heterogeneity question is less explicit, since under the null-hypothesis there is no heterogeneity, as discussed in Section 3.3. In future work we will provide more details on estimation under heterogeneity, with anytime-valid confidence intervals for the fixed-effect (singular), fixed-effects (plural) and random-effects model. Extensions to network meta-analysis seem possible, but are not yet our main direction. With regard to meta-regression, much development can be expected from work on e-values in settings of composite null-hypotheses, for which research is ongoing that extends to linear regression (e.g. Pérez-Ortiz et al., (2024)). For IPD meta-analyses that take covariates into account, analysis of randomized controlled trials can use the so-called ‘Model-X’ e-value approach. Pérez-Ortiz, M. F., Lardy, T., de Heide, R., & Grünwald, P. D. (2024). E-statistics, group invariance and anytime-valid testing. The Annals of Statistics, 52(4), 1410-1432. Grünwald, P., Henzi, A., & Lardy, T. (2024). Anytime-valid tests of conditional independence under model-X. Journal of the American Statistical Association, 119(546), 1554-1565. Minor comments Junfeng is completely right that we forgot something: the betting strategy. We propose to correct the text as follows: For example putting 1/3 on vaccine and 2/3 on placebo: 1/3· 70/170 · 170/70 + 2/3· 100/170 ·170/100 = 1. No matter how we invest in the two outcomes, (e.g. try putting 1/2 on vaccine and 1/2 on placebo, or something different) in expectation under the null we multiply the initial investment by 1 . This is a great comment! Indeed the ones betting on development of an effective vaccine – e.g. the NIH in funding the trials (taking ‘multiple shots on goal’ to just ‘let the chips fall’) – hope to find very few events of COVID-19 in the vaccine group of a clinical trial. We will adjust how we phrase this to eliminate this confusion in Version 2 of the paper. Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 04 Mar 2025 Judith ter Schure , Machine Learning, CWI, Amsterdam, The Netherlands 04 Mar 2025 Author Response We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates ... Continue reading We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Junfeng Wang for his thorough reading of our paper. We are especially grateful that he spotted a mistake in Section 1.1, pointed out as Minor comment 1, and was so kind to state “ I assume the authors forget adding something ”. We indeed forgot to add the betting strategy, which we added in our Version 2 of the paper (see below at Minor comment 1). We also found a mistake ourselves in Section 1.3: there was a square root missing below equation (6), needing a division by 2 instead of 4 (which comes from the standard deviation of a Bernoulli-0.5 likelihood). ALL-IN can indeed provide a “ different understanding of evidence synthesis, and a novel way of performing meta-analyses ”, as stated by Junfeng Wang. Although ALL-IN is a novel statistical approach to meta-analysis, it does provides statistical summaries for evidence synthesis that can be reported in a familiar way – e-values take the role of p-values, and anytime-valid confidence intervals the role of conventional confidence intervals. Our worked-out example illustrates this in Version 2 of the paper. Yet due to their anytime-validity, this statistical summary can be Live and can be the Leading source of information in decisions on future research, even at INterim stages of clinical trials. So while the statistical approach fits within mainstream understanding of evidence synthesis as an external and retrospective summary, it can also shift that understanding towards evidence synthesis as an active/involved (advising future trials/liaising with ongoing trials) and possibly prospective tool to facilitate collaboration and increase efficiency. In our Version 2 of the paper we have emphasized this aspect more, and moreover, stressed how the approach to prospective meta-analysis differs from other adaptive approaches to prospective analysis like the Framework for Prospective, Adaptive Meta-analysis (FAME), and Bayesian adaptive methods. The main difference is that the validity of the ALL-IN analysis does not rely on enforcing a maximum sample size or stopping rule, such that prospective collaborations can be more bottom-up rather than outsourcing decisions to an external top-down meta-analysis team. This fact needed a better illustration in Section 3 on Collaboration, so we rewrote that section by extending the discussion of our BCG-CORONA example with the results from that ALL-IN meta-analysis, thus including a completely worked-out example to the paper. We will address all further comments in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Junfeng Wang that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our Version 2 will be accepted without any further reservations. Major comments We agree with Junfeng Wang that our representation of the FDA game is approximate and not exact, simplifying the story such that we could calculate betting scores / e -values based on the COVID-19 event counts alone (to keep it simple as an illustrative example). Following Junfeng Wang’s concerns, we will make this more explicit in Version 2 of the paper, after the brief mention halfway the first paragraph on the right side of page 3 ( ‘Most COVID-19 vaccine trials randomized large numbers of participants 50:50 vaccine:placebo such that we can assume that also throughout the trial the participants at risk stayed approximately balanced.’ ). We add the following text and footnote there: This allows for a back-of-the-envelope calculation to reinterpret the design for the COVID-19 vaccine trials in the language of betting\footnote{Note that in Section 1 (specifically at the end of Section 1.1) we discuss how to exactly analyze such time-to-event data using e-values taking into account that the risk set changes after occurrence of events and censoring. We expect such a reanalysis to be very well approximated by the simple calculations that are used for illustration in this introduction of the FDA game, at least for large and balanced trials like the one by Pfizer/BioNtech. Even for trials that are a lot smaller, this approximation is quite good. For example, the exact logrank e-value of 1,88 that we report for the NL trial in Table 1 can be approximated using this back-of-the-envelope calculation for the e-value as folllows: . In this trial only 1496 participants were 50:50 randomized (compared to the 40 thousand in the Pfizer/BioNtech trial), and 96 events observed in the treatment and 110 events in the control group (Ter Schure et al, 2022); testing a 20% VE against a 0% VE.} We do believe that our approximation stays true to the essence of these large vaccine trials. For the Pfizer/BioNtech trial, Polack et al (2020) report a 95% vaccine efficacy based on a beta-binomial model taking into account the confirmed cases per 1000 person-years of follow-up based on 8 cases in the vaccine and 162 cases in the control group. This corresponds very well to a simple calculation (1 – 8/162)*100 = 95% assuming a perfect 50:50 ratio of follow-up time or indeed an infinite number of participants in each group. The footnote points to existing text at the end of Section 1.1, the second paragraph of page 8 (in version 1 of the paper), where we do make a recommendation for actual use going forward. Just like a p -value logrank test, the e -value logrank test takes into account, as well-spotted by Junfeng Wang, that ‘ when the number of participants still at risk in one group decreases, the probability of the next event occurring in this group will also decrease’ . Moreover, the exact e -value logrank test can be used in sequential settings in which the p -value logrank test fails, such as in sequential analysis in unbalanced randomized experiments (where p-value tests cannot be used based on group-sequential or alpha spending methods that rely on Gaussian approximations). As we write in the paper: We will use the logrank Z-statistic as a running example for meta-analysis on summary statistics. For an IPD meta-analysis (on individual patient data), however, we recommend to use the exact e-value logrank test from Ter Schure et al. (2024) that is valid regardless of the randomization (e.g. 1:1 balanced or 1:2 unbalanced), the number of participants at risk, the number of events or the size of the effect. We believe we do address heterogeneity when we state the following on page 14: ‘heterogeneity in their effect sizes (e.g. one 20%, one 50%, one 25%) does not matter for their joint ability to reject the global null hypothesis of no effect in all trials. So for testing the global null, trials are allowed to be heterogeneous in where they are in the space of the alternative hypothesis H 1 = {VE: 20% ≤ VE ≤ 100%}. For estimation, however, it is not clear what the ALL-IN confidence interval is estimating if we assume that the effects in the trials are very different. Still, as a first summary, a typical effect size (Peto, 1987) might be useful if we are unable to estimate a random effects model. The development of confidence sequences for random-effects meta-analysis is a major goal for future work. We do not, however, believe that the evidence in a line of research should be monitored based on whether this interval excludes the null hypothesis, or whether the e-value corresponding to the random-effects null model does: for testing, the global null is much more natural. Waiting for a random-effect model to reach a certain threshold is counter-intuitive, since it might require many small trials to estimate the between-trial variability instead of focusing on testing the treatment effect. Moreover, the goal of rejecting the null hypothesis corresponding to this model can be quite strange. When testing a zero-effect null hypothesis, it assumes that there are true effects of harm and true effects of benefit among the trials and that their mean is exactly zero. ’ Junfeng Wang is right to point out that the COVID-19 vaccine trials are a very special example showing how meta-analysis can be done on interim trial results when many studies are simultaneously ongoing. As also stated in the abstract, ALL-IN meta-analysis can be performed on interim data, but does not have to. The analysis design requires no information about the number of patients in trials or the number of trials eventually included. In Section 1.2 we first give the example where we can collect a Z-statistic for each study out of K studies completed so far, so in a setting of conventional retrospective meta-analysis. What makes ALL-IN meta-analysis special is stated right after: the analysis on interim results is exactly the same. The connection to living systematic reviews is made earlier in the paper, on page 5 where we refer to Simmonds et al (2017). Conventional meta-analysis does not control type-I error rates in living systematic reviews, while ALL-IN meta-analysis does. We do see the need to make this connection more explicit: allowing updates of systematic reviews based on interim data is what “breathes life into living systematic reviews”. Also following another reviewer’s remarks, we will use our example of ALL-IN-META-BCG-CORONA in Section 3 to make more explicit what ALL-IN methods can do that is not possible with any other approach. Future research should definitely focus on whether the method can be extended to network meta-analysis, but we have not conducted such research yet. We are pleased to see that the paper convinced the reviewer that the method is easily extended to IPD meta-analysis. We have added these considerations to the discussion section of the Version 2 paper. Covariate adjustment is a major field of research in the literature on e -values and anytime-valid statistics. We propose to add the following paragraph to the discussion section in Version 2 of the paper: This paper introduces ALL-IN meta-analysis based on Z-score methods of meta-analysis, as introduced in standard works like by Borenstein et al. (2009). We mainly focus on testing rather than estimation. For testing, extensions to IPD meta-analysis based on exact e-values (rather than Z-score approximations) follow easily since both can be combined by multiplication, as shown in our ALL-IN-META-BCG-CORONA example. Within this focus on testing with type-I error control, the heterogeneity question is less explicit, since under the null-hypothesis there is no heterogeneity, as discussed in Section 3.3. In future work we will provide more details on estimation under heterogeneity, with anytime-valid confidence intervals for the fixed-effect (singular), fixed-effects (plural) and random-effects model. Extensions to network meta-analysis seem possible, but are not yet our main direction. With regard to meta-regression, much development can be expected from work on e-values in settings of composite null-hypotheses, for which research is ongoing that extends to linear regression (e.g. Pérez-Ortiz et al., (2024)). For IPD meta-analyses that take covariates into account, analysis of randomized controlled trials can use the so-called ‘Model-X’ e-value approach. Pérez-Ortiz, M. F., Lardy, T., de Heide, R., & Grünwald, P. D. (2024). E-statistics, group invariance and anytime-valid testing. The Annals of Statistics, 52(4), 1410-1432. Grünwald, P., Henzi, A., & Lardy, T. (2024). Anytime-valid tests of conditional independence under model-X. Journal of the American Statistical Association, 119(546), 1554-1565. Minor comments Junfeng is completely right that we forgot something: the betting strategy. We propose to correct the text as follows: For example putting 1/3 on vaccine and 2/3 on placebo: 1/3· 70/170 · 170/70 + 2/3· 100/170 ·170/100 = 1. No matter how we invest in the two outcomes, (e.g. try putting 1/2 on vaccine and 1/2 on placebo, or something different) in expectation under the null we multiply the initial investment by 1 . This is a great comment! Indeed the ones betting on development of an effective vaccine – e.g. the NIH in funding the trials (taking ‘multiple shots on goal’ to just ‘let the chips fall’) – hope to find very few events of COVID-19 in the vaccine group of a clinical trial. We will adjust how we phrase this to eliminate this confusion in Version 2 of the paper. We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Junfeng Wang for his thorough reading of our paper. We are especially grateful that he spotted a mistake in Section 1.1, pointed out as Minor comment 1, and was so kind to state “ I assume the authors forget adding something ”. We indeed forgot to add the betting strategy, which we added in our Version 2 of the paper (see below at Minor comment 1). We also found a mistake ourselves in Section 1.3: there was a square root missing below equation (6), needing a division by 2 instead of 4 (which comes from the standard deviation of a Bernoulli-0.5 likelihood). ALL-IN can indeed provide a “ different understanding of evidence synthesis, and a novel way of performing meta-analyses ”, as stated by Junfeng Wang. Although ALL-IN is a novel statistical approach to meta-analysis, it does provides statistical summaries for evidence synthesis that can be reported in a familiar way – e-values take the role of p-values, and anytime-valid confidence intervals the role of conventional confidence intervals. Our worked-out example illustrates this in Version 2 of the paper. Yet due to their anytime-validity, this statistical summary can be Live and can be the Leading source of information in decisions on future research, even at INterim stages of clinical trials. So while the statistical approach fits within mainstream understanding of evidence synthesis as an external and retrospective summary, it can also shift that understanding towards evidence synthesis as an active/involved (advising future trials/liaising with ongoing trials) and possibly prospective tool to facilitate collaboration and increase efficiency. In our Version 2 of the paper we have emphasized this aspect more, and moreover, stressed how the approach to prospective meta-analysis differs from other adaptive approaches to prospective analysis like the Framework for Prospective, Adaptive Meta-analysis (FAME), and Bayesian adaptive methods. The main difference is that the validity of the ALL-IN analysis does not rely on enforcing a maximum sample size or stopping rule, such that prospective collaborations can be more bottom-up rather than outsourcing decisions to an external top-down meta-analysis team. This fact needed a better illustration in Section 3 on Collaboration, so we rewrote that section by extending the discussion of our BCG-CORONA example with the results from that ALL-IN meta-analysis, thus including a completely worked-out example to the paper. We will address all further comments in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Junfeng Wang that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our Version 2 will be accepted without any further reservations. Major comments We agree with Junfeng Wang that our representation of the FDA game is approximate and not exact, simplifying the story such that we could calculate betting scores / e -values based on the COVID-19 event counts alone (to keep it simple as an illustrative example). Following Junfeng Wang’s concerns, we will make this more explicit in Version 2 of the paper, after the brief mention halfway the first paragraph on the right side of page 3 ( ‘Most COVID-19 vaccine trials randomized large numbers of participants 50:50 vaccine:placebo such that we can assume that also throughout the trial the participants at risk stayed approximately balanced.’ ). We add the following text and footnote there: This allows for a back-of-the-envelope calculation to reinterpret the design for the COVID-19 vaccine trials in the language of betting\footnote{Note that in Section 1 (specifically at the end of Section 1.1) we discuss how to exactly analyze such time-to-event data using e-values taking into account that the risk set changes after occurrence of events and censoring. We expect such a reanalysis to be very well approximated by the simple calculations that are used for illustration in this introduction of the FDA game, at least for large and balanced trials like the one by Pfizer/BioNtech. Even for trials that are a lot smaller, this approximation is quite good. For example, the exact logrank e-value of 1,88 that we report for the NL trial in Table 1 can be approximated using this back-of-the-envelope calculation for the e-value as folllows: . In this trial only 1496 participants were 50:50 randomized (compared to the 40 thousand in the Pfizer/BioNtech trial), and 96 events observed in the treatment and 110 events in the control group (Ter Schure et al, 2022); testing a 20% VE against a 0% VE.} We do believe that our approximation stays true to the essence of these large vaccine trials. For the Pfizer/BioNtech trial, Polack et al (2020) report a 95% vaccine efficacy based on a beta-binomial model taking into account the confirmed cases per 1000 person-years of follow-up based on 8 cases in the vaccine and 162 cases in the control group. This corresponds very well to a simple calculation (1 – 8/162)*100 = 95% assuming a perfect 50:50 ratio of follow-up time or indeed an infinite number of participants in each group. The footnote points to existing text at the end of Section 1.1, the second paragraph of page 8 (in version 1 of the paper), where we do make a recommendation for actual use going forward. Just like a p -value logrank test, the e -value logrank test takes into account, as well-spotted by Junfeng Wang, that ‘ when the number of participants still at risk in one group decreases, the probability of the next event occurring in this group will also decrease’ . Moreover, the exact e -value logrank test can be used in sequential settings in which the p -value logrank test fails, such as in sequential analysis in unbalanced randomized experiments (where p-value tests cannot be used based on group-sequential or alpha spending methods that rely on Gaussian approximations). As we write in the paper: We will use the logrank Z-statistic as a running example for meta-analysis on summary statistics. For an IPD meta-analysis (on individual patient data), however, we recommend to use the exact e-value logrank test from Ter Schure et al. (2024) that is valid regardless of the randomization (e.g. 1:1 balanced or 1:2 unbalanced), the number of participants at risk, the number of events or the size of the effect. We believe we do address heterogeneity when we state the following on page 14: ‘heterogeneity in their effect sizes (e.g. one 20%, one 50%, one 25%) does not matter for their joint ability to reject the global null hypothesis of no effect in all trials. So for testing the global null, trials are allowed to be heterogeneous in where they are in the space of the alternative hypothesis H 1 = {VE: 20% ≤ VE ≤ 100%}. For estimation, however, it is not clear what the ALL-IN confidence interval is estimating if we assume that the effects in the trials are very different. Still, as a first summary, a typical effect size (Peto, 1987) might be useful if we are unable to estimate a random effects model. The development of confidence sequences for random-effects meta-analysis is a major goal for future work. We do not, however, believe that the evidence in a line of research should be monitored based on whether this interval excludes the null hypothesis, or whether the e-value corresponding to the random-effects null model does: for testing, the global null is much more natural. Waiting for a random-effect model to reach a certain threshold is counter-intuitive, since it might require many small trials to estimate the between-trial variability instead of focusing on testing the treatment effect. Moreover, the goal of rejecting the null hypothesis corresponding to this model can be quite strange. When testing a zero-effect null hypothesis, it assumes that there are true effects of harm and true effects of benefit among the trials and that their mean is exactly zero. ’ Junfeng Wang is right to point out that the COVID-19 vaccine trials are a very special example showing how meta-analysis can be done on interim trial results when many studies are simultaneously ongoing. As also stated in the abstract, ALL-IN meta-analysis can be performed on interim data, but does not have to. The analysis design requires no information about the number of patients in trials or the number of trials eventually included. In Section 1.2 we first give the example where we can collect a Z-statistic for each study out of K studies completed so far, so in a setting of conventional retrospective meta-analysis. What makes ALL-IN meta-analysis special is stated right after: the analysis on interim results is exactly the same. The connection to living systematic reviews is made earlier in the paper, on page 5 where we refer to Simmonds et al (2017). Conventional meta-analysis does not control type-I error rates in living systematic reviews, while ALL-IN meta-analysis does. We do see the need to make this connection more explicit: allowing updates of systematic reviews based on interim data is what “breathes life into living systematic reviews”. Also following another reviewer’s remarks, we will use our example of ALL-IN-META-BCG-CORONA in Section 3 to make more explicit what ALL-IN methods can do that is not possible with any other approach. Future research should definitely focus on whether the method can be extended to network meta-analysis, but we have not conducted such research yet. We are pleased to see that the paper convinced the reviewer that the method is easily extended to IPD meta-analysis. We have added these considerations to the discussion section of the Version 2 paper. Covariate adjustment is a major field of research in the literature on e -values and anytime-valid statistics. We propose to add the following paragraph to the discussion section in Version 2 of the paper: This paper introduces ALL-IN meta-analysis based on Z-score methods of meta-analysis, as introduced in standard works like by Borenstein et al. (2009). We mainly focus on testing rather than estimation. For testing, extensions to IPD meta-analysis based on exact e-values (rather than Z-score approximations) follow easily since both can be combined by multiplication, as shown in our ALL-IN-META-BCG-CORONA example. Within this focus on testing with type-I error control, the heterogeneity question is less explicit, since under the null-hypothesis there is no heterogeneity, as discussed in Section 3.3. In future work we will provide more details on estimation under heterogeneity, with anytime-valid confidence intervals for the fixed-effect (singular), fixed-effects (plural) and random-effects model. Extensions to network meta-analysis seem possible, but are not yet our main direction. With regard to meta-regression, much development can be expected from work on e-values in settings of composite null-hypotheses, for which research is ongoing that extends to linear regression (e.g. Pérez-Ortiz et al., (2024)). For IPD meta-analyses that take covariates into account, analysis of randomized controlled trials can use the so-called ‘Model-X’ e-value approach. Pérez-Ortiz, M. F., Lardy, T., de Heide, R., & Grünwald, P. D. (2024). E-statistics, group invariance and anytime-valid testing. The Annals of Statistics, 52(4), 1410-1432. Grünwald, P., Henzi, A., & Lardy, T. (2024). Anytime-valid tests of conditional independence under model-X. Journal of the American Statistical Association, 119(546), 1554-1565. Minor comments Junfeng is completely right that we forgot something: the betting strategy. We propose to correct the text as follows: For example putting 1/3 on vaccine and 2/3 on placebo: 1/3· 70/170 · 170/70 + 2/3· 100/170 ·170/100 = 1. No matter how we invest in the two outcomes, (e.g. try putting 1/2 on vaccine and 1/2 on placebo, or something different) in expectation under the null we multiply the initial investment by 1 . This is a great comment! Indeed the ones betting on development of an effective vaccine – e.g. the NIH in funding the trials (taking ‘multiple shots on goal’ to just ‘let the chips fall’) – hope to find very few events of COVID-19 in the vaccine group of a clinical trial. We will adjust how we phrase this to eliminate this confusion in Version 2 of the paper. Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Comments on this article Comments (0) Version 2 VERSION 2 PUBLISHED 19 May 2022 ADD YOUR COMMENT Comment keyboard_arrow_left keyboard_arrow_right Open Peer Review Reviewer Status info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Reviewer Reports Invited Reviewers 1 2 3 Version 2 (revision) 19 Jun 25 read Version 1 19 May 22 read read read Junfeng Wang , Utrecht University, Utrecht, The Netherlands Ewelina Rogozinska , University College London, London, UK Shubhendu Trivedi , Massachusetts Institute of Technology (MIT), Cambridge, USA Comments on this article All Comments (0) Add a comment Sign up for content alerts Sign Up You are now signed up to receive this alert Browse by related subjects keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2025 Trivedi S. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 12 Aug 2025 | for Version 2 Shubhendu Trivedi , Massachusetts Institute of Technology (MIT), Cambridge, MA, USA 0 Views copyright © 2025 Trivedi S. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (0) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions No further comments to make. The revised version improves on the original manuscript, which was already top notch in my view. Competing Interests No competing interests were disclosed. Reviewer Expertise Machine Learning, Statistics, Conformal Prediction, Sequential Testing, Drug Discovery. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (0) Trivedi S. Peer Review Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.178216.r393268) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/11-549/v2#referee-response-393268 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2022 Trivedi S. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 21 Oct 2022 | for Version 1 Shubhendu Trivedi , Massachusetts Institute of Technology (MIT), Cambridge, MA, USA 0 Views copyright © 2022 Trivedi S. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions The paper presents a new method for meta-analysis, motivated partly by (and for "breathing life" into) living systematic reviews that are used in the clinical domain which provide recommendations to prevent research waste. The authors christen their method to be ALL-IN meta-analysis, which is Anytime Live and Learning Interim Meta-Analysis. "Anytime" meaning analysis can be updated at any time and can control for type I error irrespective of any other decision making along the way. "Live" allows for a bottomup collaboration of different trials; a trial can be initiated in any way, and we can include data from the meta-analysis itself. "Interim" permits for a combination of data from trials that are still ongoing. The paper begins with a topical motivation from the covid19 pandemic, while emphasizing that the methodology could help better evidence combination, collaboration, and communication during later pandemics, or even smaller clinical trials. Using a single trial and specifications issues by the FDA for the covid19 vaccine trails (regarding vaccine efficiency and evidence against a null hypothesis say 30% VE) , the general betting game that is the language central to much of the contribution of the paper is introduced. It is shown that the same can also be written in terms of likelihood ratios and examples for scores are calculated for a Pfizer trial and a CuraVac trial. The betting based methodology allows the statistical analysis to not simply be all or nothing (like p testing). In the all or nothing setting, we can not continue from one trial to another without violating type I error rates, while in the betting (ALL-IN) setting one can simply update patients later on. This also permits for better efficiency (we can understand the number of participants needed to answer a research question) and collaboration (since we can combine analysis as data becomes available). The language of betting also can be interpreted in various equivalent ways (likelihood ratios, conservative p values, e values) that also allow for easy and crisp communication about the analysis. The intuitions of the language of betting are made more precise using standard tools in the literature (Markov's inequality, and Ville's inequality). Further, the betting score underlying the test is an e-value which further permits statistical analysis (using the tools cited). logrank Z statistics are used as a running example for meta-analysis on the summary statistics -- we can simply collect the Z statistics Z_i from each trial, which can easily be combined (as shown in equation 3). Notably if we add in interim data, then from the perspective of Ville's inequality, they are indistinguishable for testing. The methodology also allows for combining data from trials without requiring a common design -- this can easily done by deciding upon a min mu parameter for each trial, using which one can still get a valid combination of different trials with valid type I guarantees. Further, method not only captures whether an effect is statistically significant or not, it also captures evidence up till now. The language of the "implied target" of Shafer is used to make this precise, which in turn can also be used to quantify how much will the evidence change if a new study with some mu and N is added. The paper also reports testing the methodology during the covid19 pandemic in two meta-analysis. One involving 7 trials, and the other involving 4 trials - considering different populations (healthcare workers, and the elderly). The trials involved testing if BCG could help with COVID19 immunity. The results are discussed while discussing issues (and recommendations) for meta-analysis design, systematic search for trials, systematic reviews for trial inclusion, data upload, and disseminating results. In general, I found the paper very well written. The methodology is described very clearly, along with a glimpse of the underlying statistical tools available. The advantages and recommendations that the methodology has/implies are also discussed in detail. The underlying mathematics for testing is standard, but as far as I understand this is the first application for it in the setting considered in the paper. I would recommend the paper for acceptance. Is the rationale for developing the new method (or application) clearly explained? Yes Is the description of the method technically sound? Yes Are sufficient details provided to allow replication of the method development and its use by others? Yes If any results are presented, are all the source data underlying the results available to ensure full reproducibility? No source data required Are the conclusions about the method and its performance adequately supported by the findings presented in the article? Yes Competing Interests No competing interests were disclosed. Reviewer Expertise Machine Learning, Statistics, Conformal Prediction, Sequential Testing, Drug Discovery. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (1) Author Response 19 Jun 2025 Judith ter Schure, Machine Learning, CWI, Amsterdam, The Netherlands We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Shubhendu for his very kind and thoughtful reflection on our paper. We actually like ‘ Anytime Live and Learning Interim Meta-Analysis’ as a separate interpretation of the ALL-IN acronym, even though this might have been a typo. In this paper we introduced the ALL-IN method by examples in which a smallest effect-size of interest could be set: the FDA COVID-19 vaccine game and the BCG collaboration. While this setting in ubiquitous in theory –with every power analysis setting a maximum sample size controlling type-II errors by pretending to be based on such an effect-size of minimal interest, in practice many clinical researchers have difficulty setting one. Moreover, in noninferiority settings it is even impossible to set one. One of us has discussed these two issues briefly in a discussion piece (Ter Schure, 2024). While the ALL-IN methods presented in our paper are optimal in both simplicity and efficiency in settings with a smallest effect-size of interest, these other settings need further research, as well as their own introduction paper. As these settings require to learn the betting strategy, or alternative hypothesis parameter, from the data, we might as well call it Anytime Live and Learning meta-analysis! ter Schure, J. (2024). Judith ter Schure’s contribution to the Discussion of ‘Safe testing’ by Grünwald, de Heide, and Koolen. Journal of the Royal Statistical Society Series B: Statistical Methodology , 86 (5), 1157-1159. View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Trivedi S. Peer Review Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.77953.r146412) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/11-549/v1#referee-response-146412 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2022 Rogozinska E. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 16 Sep 2022 | for Version 1 Ewelina Rogozinska , MRC Clinical Trials Unit, Institute of Clinical Trials and Methodology, University College London, London, UK 0 Views copyright © 2022 Rogozinska E. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved With Reservations info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions The work by ter Schure and Grünwald outlines an alternative approach to evidence synthesis, aiming to include emerging evidence in real time without increasing type-1 error. The authors suggest that their ALL-IN method can “breathe life into living systematic reviews, through better and simpler statistics, efficiency, collaboration and communication”. As much as I agree with the three final points and fully support this notion, I have some doubts regarding the professed ‘simplicity’ of the proposed statistical approach, implementation, and generalizability of the method. Firstly, the approach has been developed and tested only in one very unusual setting (Covid-19 pandemic), in which the accumulation of evidence over a short space of time was extreme and sharing of data and collaboration was greater than witnessed in previous years. According to Heinze et al. (manuscript in submission) and their proposed framework of four phases of methodological research in biostatistics, the ALL-IN method would be classified as a method in a second phase of its development. Consequently, it requires further evaluation in a range of settings and refinement before it could be considered as a viable alternative to other available methods. This should be discussed in their paper, with declarations more balanced to reflect the single setting in which their method was applied. Secondly, contrary to the authors' claim that the introduction of terminology from game theory makes it easier to communicate the uncertainties, I am finding the sections using betting language difficult to follow. The evidence synthesis community still to some extent grapples with more standard methods of advanced evidence synthesis (Wang et al. BMJ 2021; 373: n736). 1 Thus, the introduction of new concepts (or their reintroduction) should be carefully thought through. Overall, I feel the manuscript would benefit from limiting the use of references to betting and investments to an essential minimum. Finally, the authors present a real-life example of their method using BCG vaccines trials for Covid-19. The presented example resembles an approach more akin to prospective individual participant data (IPD) meta-analysis than a living systematic review or also the referenced FAME approach - both relying on aggregate rather than individual participant data. The challenges associated with accessing IPD, the non-standard approach to data analysis and lack of clear proof of its benefits push this method toward “interesting” developments rather than “a new way forward”. The authors should explain more clearly how their method could help aggregate-level evidence synthesis or refocus the scope to IPD based projects. Furthermore, it would be interesting to learn how the ALL-IN compares to commonly used methods in terms of efficiency and reliability of obtained results. Concluding, the presented method is an interesting approach to evidence synthesis; however, at the current stage of its development, it requires further evaluation of its utility for the evidence synthesis to be able to bet on it. Is the rationale for developing the new method (or application) clearly explained? Yes Is the description of the method technically sound? Partly Are sufficient details provided to allow replication of the method development and its use by others? Yes If any results are presented, are all the source data underlying the results available to ensure full reproducibility? No source data required Are the conclusions about the method and its performance adequately supported by the findings presented in the article? Partly References 1. Wang H, Chen Y, Lin Y, Abesig J, et al.: The methodological quality of individual participant data meta-analysis on intervention effects: systematic review. BMJ . 2021. Publisher Full Text Competing Interests No competing interests were disclosed. Reviewer Expertise Conduct of systematic reviews and methodological reviews, IPD meta-analysis, bias in evidence synthesis, use of evidence synthesis in clinical practice guidelines, I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. reply Respond to this report Responses (1) Author Response 19 Jun 2025 Judith ter Schure, Machine Learning, CWI, Amsterdam, The Netherlands We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Ewelina Rogozinska for her review and for supporting our take on efficiency, collaboration and communication. We also thank her for expressing her concerns about our hopeful, but perhaps naïve, statements of simplicity in the setting of sequential meta-analysis. We indeed do not have any empirical evidence that what we propose is simple to understand. What we do know is that other available approaches to control type-I error rates in living systematic reviews have proven to be very difficult (Simmonds et al, 2017), hardly ever used and even, for general use, were recommended against by a scientific committee advising Cochrane on the matter in 2018 (reference below). Our main argument for simplicity is nicely pointed out by reviewer Shubhendu Trivedi, writing ‘ we can simply collect the Z statistics Z_i from each trial, which can easily be combined (as shown in equation 3). Notably, if we add in interim data, then from the perspective of Ville's inequality, they are indistinguishable for testing. ’ So ALL-IN meta-analysis needs the same summary statistics as default Z-score approaches in conventional meta-analysis, and in terms of implementation that is indeed not simpler, but also not harder than conventional meta-analysis. The major point is that the procedure is the same whether the meta-analysis is the first analysis, or whether it is the 10 th update in a living systematic review, or whether it is the 100 th update in a prospective meta-analysis of interim results of ongoing trials. The latter nicely summarized by Rogozinska as ‘ including emerging evidence in real time without increasing type-1 error ’. There is no other statistical approach in use in meta-analysis for which retaining type-1 error control is that simple*. Other methods need a maximum sample size, an alpha-spending function, a stopping rule to enforce on the trials, and we are convinced that these are considerations that are a lot less simple than what we propose. We have added a section to the Version 2 of the paper making explicit that this lack of sample size restrictions is a unique feature of ALL-IN meta-analysis that does not hold for any other statistical approach in use. The introduction section on Statistics now states (references in the paper): Bottom-up living meta-analysis The lack of restrictions on sample size is a unique feature of ALL-IN meta-analysis that sets it apart from other statistical approaches to living systematic reviews , ‘adaptive’ (Tierney et al., 2021) or ‘real-time’ (Petkova et al, 2020) prospective meta-analysis and ‘metatrials’ (Van Haren et al, 2020). These other approaches require a maximum sample size or a maximum number of studies to guarantee type-I error control and interval coverage for all updates of the meta-analysis. The crucial difference with ALL-IN is that for those methods to be valid, participating in a prospective meta-analysis requires outside control over a trial’s data collection. Outside control is needed in the case of the ‘Framework for prospective adaptive meta-analysis’ (FAME) where a single sample size is set comprised of data from all ongoing trials, possibly at an interim of some of them, and single-analysis statistics is used. The consequence is that the intervals and p-values reported are only valid if never updated, so the analysis assumes that data collection stops after that point. No new analyses are valid after that maximum sample size, since all alpha is spent and possibly accumulation bias is introduced (Ter Schure & Grünwald, 2019). Similarly, outside control is needed in the operationalization of group-sequential or alpha-spending methods that need the information size relative to the final analysis (Simmonds et al, 2017), and therefore also need to enforce such maximum sample size for the reported results to be valid. Finally, even in in simulation-calibrated Bayesian approaches restrictions on interims (often) and maximum sample size (always) are imposed for the simulations to terminate. So the frequentist operating characteristics (type-I error, coverage of intervals) of the analysis are not known if trials wish to continue data collection after that maximum sample size and analyze again, or in other ways deviate from the simulated scenarios. Hence all available methods in living systematic reviews and prospective meta-analysis are by design quite top-down, “using preestablished stopping rules for safety, efficacy, futility, and harm” (Petkova et al, 2020). With regard to simplicity of implementation, we have decided to extend Section 3 on Collaboration with a complete worked-out and reproducible example of an ALL-IN meta-analysis. This shows, among other things, that the simplicity of the statistics strengthens the collaborations when participating trials are faced with difficulties that would otherwise invalidate the analysis. Examples of these include, recruitment difficulties, delays in starting the trial, and meta-analysis results that might make a trial change course and directly influence the future sample size in an unforeseen way. We will address the first, second and final point in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Ewelina Rogozinska that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our V2 will be accepted without any further reservations. Firstly We wrote our paper from the hopeful perspective that many good things in science that came from the COVID-19 pandemic are here to stay, like an increase in prospective meta-analysis, data sharing and standardization of e.g. outcome measures and diagnostics. We also wrote our paper for an audience that agrees that they should. But there is nothing in the ALL-IN method that would prevent its use in a retrospective setting of meta-analysis based on summary statistics from published papers. The presentation based on Z -scores, and the worked-out example that we added in Section 3 show that, even though we favor IPD meta-analysis or collaboratively sharing summary statistics (the exact e -value itself for example), an approximate analysis often is already very good and flexible. We added the following text to the abstract: “ Because the intention of the analysis does not change the validity of the results, the results of the analysis can change the intentions (‘optional stopping’ and ‘optional continuation’ based on the results so far). On the one hand: any analysis can be turned into a living one, or even become ‘real-time’ by updating with new trial data or even including interim data from trials that are still ongoing – without any changes in the cut-offs for testing or the method for interval estimation. On the other hand: no stopping rule needs to be enforced for the analysis to remain valid, so participating in a prospective meta-analysis does not require outside control over data collection.” This characteristic of ALL-IN meta-analysis might even help further popularize these good things in science that came from the pandemic. Secondly We wrote our paper for an audience that considers sequential statistics to be complicated. We believe that is still our main audience because most clinical trials are not analyzed with interim analyses and most meta-analyses updates don’t take into account that repeated analyses invalidate both the p -value and the confidence intervals reported. While spending the better part of the past ten years on sequential analysis, we ourselves consider most of sequential statistics to be complicated. This is why we think that our audience would be confused if we just provided a ‘cook-book’ of ALL-IN methods and simply state that type-I error control and coverage of intervals is guaranteed, while using the same method over and over again. We believe that a strong intuition is needed for an audience to accept that this can be done without any corrections to the method. The game theory terminology ís that strong intuition. While absent in most of applied statistics today, this intuition has always been around in the history of probability theory (e.g. the work of Christiaan Huygens on games in the 17 th century), and sequential statistics (e.g. George Alfred Barnard’s discussion of Wald’s probability ratio test in 1947). In the currently thriving literature on e -values and anytime-valid confidence intervals in mathematical statistics, the intuition of games is present almost everywhere and serves both as intuition to present existing results and as a very fruitful analogy that drives new results, see for example the overview article by Ramdas, Grünwald, Vovk and Shafer (2023). However, we agree that a good discussion is possible on whether references to betting and investments benefit intuition about statistics in general. We think they can, but as you can see from the discussion following Glenn Shafer’s JRSSA paper about is, there is plenty of disagreement. Yet for intuition about the sequential nature of ALL-IN meta-analysis, we do feel that the references to betting are essential for intuition, and the method should not be presented to readers familiar with the difficulties of sequential analysis without providing this intuition. Finally Our extension of the BCG example now shows more clearly that the ALL-IN method can be used on IPD (based on exact e -values), or can be used on summary statistics (in this example: the logrank Z-score and number of events). In fact, liaising with trials to share summary statistics can also be very fruitful just like in the FAME approach (e.g. sharing the exact e-value per trial). All the analyses of the BCG example can be reproduced based on openly available summary statistics. We hope Ewelina Rogozinska agrees that this illustrates how aggregate-level evidence synthesis is just as promising with ALL-IN methods as IPD based projects. We agree that future work should compare ALL-IN to other methods. For this introduction paper, we now focus on the settings where such a comparison is of less concern, as we write in the abstract: if the analysis changes the intentions. Take for example the situation in which a research team decides to update a meta-analysis and commit to a living systematic review, and after a first update decides to liaise with future trials in a prospective meta-analysis for future updates on interim results. In that situation there simply is no other statistical technique that guarantees type-I error control and confidence interval coverage, because ‘optional continuation’ decisions are made that are not prespecified and that no stopping rule-based method can handle. Hence ALL-IN breathes life into living systematic reviews in a way that no other method can. Cochrane Scientific Committee Schmid, C., Senn, S., Sterne, J., Kulinskaya, E., Posch, M., Roes, K., and McKenzie, J. (2018). Should Cochrane apply error-adjustment methods when conducting repeated meta-analyses? *It can be that simple in a fully subjective Bayesian analysis, but we have not seen any of those in use. Bayesian analysis in practice uses default priors (and possibly also informative priors, but never exclusively) and checks for frequentist operating characteristics by simulations. Aaditya Ramdas, Peter Grünwald, Vladimir Vovk, Glenn Shafer "Game-Theoretic Statistics and Safe Anytime-Valid Inference," Statistical Science, Statist. Sci. 38(4), 576-601, (November 2023) Glenn Shafer, Testing by Betting: A Strategy for Statistical and Scientific Communication, Journal of the Royal Statistical Society Series A: Statistics in Society , Volume 184, Issue 2, April 2021, Pages 407–431, https://doi.org/10.1111/rssa.12647 View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Rogozinska E. Peer Review Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.77953.r146413) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/11-549/v1#referee-response-146413 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2022 Wang J. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 19 Aug 2022 | for Version 1 Junfeng Wang , Division of Pharmacoepidemiology and Clinical Pharmacology, Utrecht Institute for Pharmaceutical Sciences, Utrecht University, Utrecht, The Netherlands 0 Views copyright © 2022 Wang J. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved With Reservations info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions In this manuscript, ter Schure and Grünwald proposed a new form of meta-analysis, namely ALL-IN meta-analysis, which can facilitate living systematic reviews. The authors provided a different understanding of evidence synthesis, and a novel way of performing meta-analyses. The new method is illustrated in the language of betting, which is complement with the acronym of ALL-IN. Major comments: The calculation of probability of next event in group X (e.g. in page 3, the fraction of 0.41 of COVID-19 events to occur in the vaccine group, 0.41=70/(100+70)), is only valid when the number of participants is infinite in each group. When the number of participants still at risk in one group decreases, the probability of next event occurring in this group will also decrease. So the constant probability assumption may not be valid in real practice. In page 14 and 15, the authors mentioned heterogeneity several times, however, the authors avoid giving a direct answer or solution to heterogeneous results from primary studies. This should be addressed or at least extensively discussed (as a limitation). The COVID-19 vaccine trials are used as an example. However, all these trials are conducted recently and in a short time period (compared to other treatment). How is the generalizability of this new methods in other treatments? The method is easily extended to IPD meta-analysis. How well this method can be extended to network meta-analysis of multiple treatments? How can covariates, both on study level and individual level, be adjusted in this new framework? Minor comments: In Page 7, section 1.1, the equation “70/170*170/70 + 100/170*170/100 = 1”, seems not correct, the right side should be 2. But I assume the authors forget adding something in the left side of the equation. Please check. “Betting on vaccine” in this paper actually means betting on the next event will occur in the vaccine group (which means vaccine is not effective). It can be a bit misleading, since in common sense, reader may think betting on vaccine means betting on vaccine can protect participants. Is the rationale for developing the new method (or application) clearly explained? Yes Is the description of the method technically sound? Partly Are sufficient details provided to allow replication of the method development and its use by others? Yes If any results are presented, are all the source data underlying the results available to ensure full reproducibility? No source data required Are the conclusions about the method and its performance adequately supported by the findings presented in the article? Partly Competing Interests No competing interests were disclosed. Reviewer Expertise DTA meta-analysis, Clinical prediction models, Health Technology Assessment I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. reply Respond to this report Responses (1) Author Response 04 Mar 2025 Judith ter Schure, Machine Learning, CWI, Amsterdam, The Netherlands We apologize for this very late reply. However late, we still hope to contribute to a complete and accurate scientific literature with a Version 2 of our paper that incorporates all the very useful comments made by the reviewers. Version 2 also further illustrates the unique features of the method by extending the example ALL-IN prospective meta-analysis in the paper with a reflection on the collaboration, summary of results and references to a complete replication package. We thank Junfeng Wang for his thorough reading of our paper. We are especially grateful that he spotted a mistake in Section 1.1, pointed out as Minor comment 1, and was so kind to state “ I assume the authors forget adding something ”. We indeed forgot to add the betting strategy, which we added in our Version 2 of the paper (see below at Minor comment 1). We also found a mistake ourselves in Section 1.3: there was a square root missing below equation (6), needing a division by 2 instead of 4 (which comes from the standard deviation of a Bernoulli-0.5 likelihood). ALL-IN can indeed provide a “ different understanding of evidence synthesis, and a novel way of performing meta-analyses ”, as stated by Junfeng Wang. Although ALL-IN is a novel statistical approach to meta-analysis, it does provides statistical summaries for evidence synthesis that can be reported in a familiar way – e-values take the role of p-values, and anytime-valid confidence intervals the role of conventional confidence intervals. Our worked-out example illustrates this in Version 2 of the paper. Yet due to their anytime-validity, this statistical summary can be Live and can be the Leading source of information in decisions on future research, even at INterim stages of clinical trials. So while the statistical approach fits within mainstream understanding of evidence synthesis as an external and retrospective summary, it can also shift that understanding towards evidence synthesis as an active/involved (advising future trials/liaising with ongoing trials) and possibly prospective tool to facilitate collaboration and increase efficiency. In our Version 2 of the paper we have emphasized this aspect more, and moreover, stressed how the approach to prospective meta-analysis differs from other adaptive approaches to prospective analysis like the Framework for Prospective, Adaptive Meta-analysis (FAME), and Bayesian adaptive methods. The main difference is that the validity of the ALL-IN analysis does not rely on enforcing a maximum sample size or stopping rule, such that prospective collaborations can be more bottom-up rather than outsourcing decisions to an external top-down meta-analysis team. This fact needed a better illustration in Section 3 on Collaboration, so we rewrote that section by extending the discussion of our BCG-CORONA example with the results from that ALL-IN meta-analysis, thus including a completely worked-out example to the paper. We will address all further comments in the order given. We hope that by proposing some adjustments to the paper in reply to each of his comments, we can convince Junfeng Wang that the description of the method is technically sound (question 2, now answered by ‘Partly’) and that the conclusions about the method and its performance are adequately supported by the findings presented in the article (question 5, now answered by ‘Partly’). Hence we hope our Version 2 will be accepted without any further reservations. Major comments We agree with Junfeng Wang that our representation of the FDA game is approximate and not exact, simplifying the story such that we could calculate betting scores / e -values based on the COVID-19 event counts alone (to keep it simple as an illustrative example). Following Junfeng Wang’s concerns, we will make this more explicit in Version 2 of the paper, after the brief mention halfway the first paragraph on the right side of page 3 ( ‘Most COVID-19 vaccine trials randomized large numbers of participants 50:50 vaccine:placebo such that we can assume that also throughout the trial the participants at risk stayed approximately balanced.’ ). We add the following text and footnote there: This allows for a back-of-the-envelope calculation to reinterpret the design for the COVID-19 vaccine trials in the language of betting\footnote{Note that in Section 1 (specifically at the end of Section 1.1) we discuss how to exactly analyze such time-to-event data using e-values taking into account that the risk set changes after occurrence of events and censoring. We expect such a reanalysis to be very well approximated by the simple calculations that are used for illustration in this introduction of the FDA game, at least for large and balanced trials like the one by Pfizer/BioNtech. Even for trials that are a lot smaller, this approximation is quite good. For example, the exact logrank e-value of 1,88 that we report for the NL trial in Table 1 can be approximated using this back-of-the-envelope calculation for the e-value as folllows: . In this trial only 1496 participants were 50:50 randomized (compared to the 40 thousand in the Pfizer/BioNtech trial), and 96 events observed in the treatment and 110 events in the control group (Ter Schure et al, 2022); testing a 20% VE against a 0% VE.} We do believe that our approximation stays true to the essence of these large vaccine trials. For the Pfizer/BioNtech trial, Polack et al (2020) report a 95% vaccine efficacy based on a beta-binomial model taking into account the confirmed cases per 1000 person-years of follow-up based on 8 cases in the vaccine and 162 cases in the control group. This corresponds very well to a simple calculation (1 – 8/162)*100 = 95% assuming a perfect 50:50 ratio of follow-up time or indeed an infinite number of participants in each group. The footnote points to existing text at the end of Section 1.1, the second paragraph of page 8 (in version 1 of the paper), where we do make a recommendation for actual use going forward. Just like a p -value logrank test, the e -value logrank test takes into account, as well-spotted by Junfeng Wang, that ‘ when the number of participants still at risk in one group decreases, the probability of the next event occurring in this group will also decrease’ . Moreover, the exact e -value logrank test can be used in sequential settings in which the p -value logrank test fails, such as in sequential analysis in unbalanced randomized experiments (where p-value tests cannot be used based on group-sequential or alpha spending methods that rely on Gaussian approximations). As we write in the paper: We will use the logrank Z-statistic as a running example for meta-analysis on summary statistics. For an IPD meta-analysis (on individual patient data), however, we recommend to use the exact e-value logrank test from Ter Schure et al. (2024) that is valid regardless of the randomization (e.g. 1:1 balanced or 1:2 unbalanced), the number of participants at risk, the number of events or the size of the effect. We believe we do address heterogeneity when we state the following on page 14: ‘heterogeneity in their effect sizes (e.g. one 20%, one 50%, one 25%) does not matter for their joint ability to reject the global null hypothesis of no effect in all trials. So for testing the global null, trials are allowed to be heterogeneous in where they are in the space of the alternative hypothesis H 1 = {VE: 20% ≤ VE ≤ 100%}. For estimation, however, it is not clear what the ALL-IN confidence interval is estimating if we assume that the effects in the trials are very different. Still, as a first summary, a typical effect size (Peto, 1987) might be useful if we are unable to estimate a random effects model. The development of confidence sequences for random-effects meta-analysis is a major goal for future work. We do not, however, believe that the evidence in a line of research should be monitored based on whether this interval excludes the null hypothesis, or whether the e-value corresponding to the random-effects null model does: for testing, the global null is much more natural. Waiting for a random-effect model to reach a certain threshold is counter-intuitive, since it might require many small trials to estimate the between-trial variability instead of focusing on testing the treatment effect. Moreover, the goal of rejecting the null hypothesis corresponding to this model can be quite strange. When testing a zero-effect null hypothesis, it assumes that there are true effects of harm and true effects of benefit among the trials and that their mean is exactly zero. ’ Junfeng Wang is right to point out that the COVID-19 vaccine trials are a very special example showing how meta-analysis can be done on interim trial results when many studies are simultaneously ongoing. As also stated in the abstract, ALL-IN meta-analysis can be performed on interim data, but does not have to. The analysis design requires no information about the number of patients in trials or the number of trials eventually included. In Section 1.2 we first give the example where we can collect a Z-statistic for each study out of K studies completed so far, so in a setting of conventional retrospective meta-analysis. What makes ALL-IN meta-analysis special is stated right after: the analysis on interim results is exactly the same. The connection to living systematic reviews is made earlier in the paper, on page 5 where we refer to Simmonds et al (2017). Conventional meta-analysis does not control type-I error rates in living systematic reviews, while ALL-IN meta-analysis does. We do see the need to make this connection more explicit: allowing updates of systematic reviews based on interim data is what “breathes life into living systematic reviews”. Also following another reviewer’s remarks, we will use our example of ALL-IN-META-BCG-CORONA in Section 3 to make more explicit what ALL-IN methods can do that is not possible with any other approach. Future research should definitely focus on whether the method can be extended to network meta-analysis, but we have not conducted such research yet. We are pleased to see that the paper convinced the reviewer that the method is easily extended to IPD meta-analysis. We have added these considerations to the discussion section of the Version 2 paper. Covariate adjustment is a major field of research in the literature on e -values and anytime-valid statistics. We propose to add the following paragraph to the discussion section in Version 2 of the paper: This paper introduces ALL-IN meta-analysis based on Z-score methods of meta-analysis, as introduced in standard works like by Borenstein et al. (2009). We mainly focus on testing rather than estimation. For testing, extensions to IPD meta-analysis based on exact e-values (rather than Z-score approximations) follow easily since both can be combined by multiplication, as shown in our ALL-IN-META-BCG-CORONA example. Within this focus on testing with type-I error control, the heterogeneity question is less explicit, since under the null-hypothesis there is no heterogeneity, as discussed in Section 3.3. In future work we will provide more details on estimation under heterogeneity, with anytime-valid confidence intervals for the fixed-effect (singular), fixed-effects (plural) and random-effects model. Extensions to network meta-analysis seem possible, but are not yet our main direction. With regard to meta-regression, much development can be expected from work on e-values in settings of composite null-hypotheses, for which research is ongoing that extends to linear regression (e.g. Pérez-Ortiz et al., (2024)). For IPD meta-analyses that take covariates into account, analysis of randomized controlled trials can use the so-called ‘Model-X’ e-value approach. Pérez-Ortiz, M. F., Lardy, T., de Heide, R., & Grünwald, P. D. (2024). E-statistics, group invariance and anytime-valid testing. The Annals of Statistics, 52(4), 1410-1432. Grünwald, P., Henzi, A., & Lardy, T. (2024). Anytime-valid tests of conditional independence under model-X. Journal of the American Statistical Association, 119(546), 1554-1565. Minor comments Junfeng is completely right that we forgot something: the betting strategy. We propose to correct the text as follows: For example putting 1/3 on vaccine and 2/3 on placebo: 1/3· 70/170 · 170/70 + 2/3· 100/170 ·170/100 = 1. No matter how we invest in the two outcomes, (e.g. try putting 1/2 on vaccine and 1/2 on placebo, or something different) in expectation under the null we multiply the initial investment by 1 . This is a great comment! Indeed the ones betting on development of an effective vaccine – e.g. the NIH in funding the trials (taking ‘multiple shots on goal’ to just ‘let the chips fall’) – hope to find very few events of COVID-19 in the vaccine group of a clinical trial. We will adjust how we phrase this to eliminate this confusion in Version 2 of the paper. View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Wang J. Peer Review Report For: ALL-IN meta-analysis: breathing life into living systematic reviews and prospective meta-analyses [version 2; peer review: 1 approved, 2 approved with reservations] . F1000Research 2025, 11 :549 ( https://doi.org/10.5256/f1000research.77953.r145459) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/11-549/v1#referee-response-145459 Alongside their report, reviewers assign a status to the article: Approved - the paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations - A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved - fundamental flaws in the paper seriously undermine the findings and conclusions Adjust parameters to alter display View on desktop for interactive features Includes Interactive Elements View on desktop for interactive features Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Stay Updated Sign up for content alerts and receive a weekly or monthly email with all newly published articles Register with F1000Research Already registered? Sign in Not now, thanks close PLEASE NOTE If you are an AUTHOR of this article, please check that you signed in with the account associated with this article otherwise we cannot automatically identify your role as an author and your comment will be labelled as a “User Comment”. If you are a REVIEWER of this article, please check that you have signed in with the account associated with this article and then go to your account to submit your report, please do not post your review here. If you do not have access to your original account, please contact us . All commenters must hold a formal affiliation as per our Policies . The information that you give us will be displayed next to your comment. User comments must be in English, comprehensible and relevant to the article under discussion. We reserve the right to remove any comments that we consider to be inappropriate, offensive or otherwise in breach of the User Comment Terms and Conditions . Commenters must not use a comment for personal attacks. When criticisms of the article are based on unpublished data, the data should be made available. I accept the User Comment Terms and Conditions Please confirm that you accept the User Comment Terms and Conditions. Affiliation ✕ refresh Please enter your institution. Note: To add your institution or organisation, start typing the name and then select the correct name from the list. Where applicable, the name will appear in both the original language and in English. Do not paste in the name. If the name does not appear in the drop-down list, we will display the information you have entered. ✕ refresh Country/Region * USA UK Canada China France Germany Afghanistan Aland Islands Albania Algeria American Samoa Andorra Angola Anguilla Antarctica Antigua and Barbuda Argentina Armenia Aruba Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados Belarus Belgium Belize Benin Bermuda Bhutan Bolivia Bosnia and Herzegovina Botswana Bouvet Island Brazil British Indian Ocean Territory British Virgin Islands Brunei Bulgaria Burkina Faso Burundi Cambodia Cameroon Canada Cape Verde Cayman Islands Central African Republic Chad Chile China Christmas Island Cocos (Keeling) Islands Colombia Comoros Congo Cook Islands Costa Rica Cote d'Ivoire Croatia Cuba Cyprus Czech Republic Democratic Republic of the Congo Denmark Djibouti Dominica Dominican Republic Ecuador Egypt El Salvador Equatorial Guinea Eritrea Estonia Ethiopia Falkland Islands Faroe Islands Federated States of Micronesia Fiji Finland France French Guiana French Polynesia French Southern Territories Gabon Georgia Germany Ghana Gibraltar Greece Greenland Grenada Guadeloupe Guam Guatemala Guernsey Guinea Guinea-Bissau Guyana Haiti Heard Island and Mcdonald Islands Holy See (Vatican City State) Honduras Hong Kong Hungary Iceland India Indonesia Iran Iraq Ireland Israel Italy Jamaica Japan Jersey Jordan Kazakhstan Kenya Kiribati Kosovo (Serbia and Montenegro) Kuwait Kyrgyzstan Lao People's Democratic Republic Latvia Lebanon Lesotho Liberia Libya Liechtenstein Lithuania Luxembourg Macao Madagascar Malawi Malaysia Maldives Mali Malta Marshall Islands Martinique Mauritania Mauritius Mayotte Mexico Minor Outlying Islands of the United States Moldova Monaco Mongolia Montenegro Montserrat Morocco Mozambique Myanmar Namibia Nauru Nepal Netherlands Antilles New Caledonia New Zealand Nicaragua Niger Nigeria Niue Norfolk Island North Korea North Macedonia Northern Mariana Islands Norway Oman Pakistan Palau Palestinian Territory Panama Papua New Guinea Paraguay Peru Philippines Pitcairn Poland Portugal Puerto Rico Qatar Reunion Romania Russian Federation Rwanda Saint Helena Saint Kitts and Nevis Saint Lucia Saint Pierre and Miquelon Saint Vincent and the Grenadines Samoa San Marino Sao Tome and Principe Saudi Arabia Senegal Serbia Seychelles Sierra Leone Singapore Slovakia Slovenia Solomon Islands Somalia South Africa South Georgia and the South Sandwich Is South Korea South Sudan Spain Sri Lanka Sudan Suriname Svalbard and Jan Mayen Swaziland Sweden Switzerland Syria Taiwan Tajikistan Tanzania Thailand The Gambia The Netherlands Timor-Leste Togo Tokelau Tonga Trinidad and Tobago Tunisia Turkey Turkmenistan Turks and Caicos Islands Tuvalu UK USA Uganda Ukraine United Arab Emirates United States Virgin Islands Uruguay Uzbekistan Vanuatu Venezuela Vietnam Wallis and Futuna West Bank and Gaza Strip Western Sahara Yemen Zambia Zimbabwe Please select your country/region. You must enter a comment. Competing Interests Please disclose any competing interests that might be construed to influence your judgment of the article's or peer review report's validity or importance. Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Please state your competing interests The comment has been saved. An error has occurred. Please try again. Cancel Post var lTitle = "ALL-IN meta-analysis: breathing life into...".replace("'", ''); var linkedInUrl = "http://www.linkedin.com/shareArticle?url=https://f1000research.com/articles/11-549/v2" + "&title=" + encodeURIComponent(lTitle) + "&summary=" + encodeURIComponent('Read the article by '); var deliciousUrl = "https://del.icio.us/post?url=https://f1000research.com/articles/11-549/v2&title=" + encodeURIComponent(lTitle); var redditUrl = "http://reddit.com/submit?url=https://f1000research.com/articles/11-549/v2" + "&title=" + encodeURIComponent(lTitle); linkedInUrl += encodeURIComponent('ter Schure J and Grünwald P'); var offsetTop = /chrome/i.test( navigator.userAgent ) ? 4 : -10; var addthis_config = { ui_offset_top: offsetTop, services_compact : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_expanded : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_custom : [ { name: "LinkedIn", url: linkedInUrl, icon:"/img/icon/at_linkedin.svg" }, { name: "Mendeley", url: "http://www.mendeley.com/import/?url=https://f1000research.com/articles/11-549/v2/mendeley", icon:"/img/icon/at_mendeley.svg" }, { name: "Reddit", url: redditUrl, icon:"/img/icon/at_reddit.svg" }, ] }; var addthis_share = { url: "https://f1000research.com/articles/11-549", templates : { twitter : "ALL-IN meta-analysis: breathing life into living systematic reviews.... ter Schure J and Grünwald P, published by " + "@F1000Research" + ", https://f1000research.com/articles/11-549/v2" } }; if (typeof(addthis) != "undefined"){ addthis.addEventListener('addthis.ready', checkCount); addthis.addEventListener('addthis.menu.share', checkCount); } $(".f1r-shares-twitter").attr("href", "https://twitter.com/intent/tweet?text=" + addthis_share.templates.twitter); $(".f1r-shares-facebook").attr("href", "https://www.facebook.com/sharer/sharer.php?u=" + addthis_share.url); $(".f1r-shares-linkedin").attr("href", addthis_config.services_custom[0].url); $(".f1r-shares-reddit").attr("href", addthis_config.services_custom[2].url); $(".f1r-shares-mendelay").attr("href", addthis_config.services_custom[1].url); function checkCount(){ setTimeout(function(){ $(".addthis_button_expanded").each(function(){ var count = $(this).text(); if (count !== "" && count != "0") $(this).removeClass("is-hidden"); else $(this).addClass("is-hidden"); }); }, 1000); } close How to cite this report {{reportCitation}} Cancel Copy Citation Details $(function(){R.ui.buttonDropdowns('.dropdown-for-downloads');}); $(function(){R.ui.toolbarDropdowns('.toolbar-dropdown-for-downloads');}); $.get("/articles/acj/74223/178216") new F1000.Clipboard(); new F1000.ThesaurusTermsDisplay("articles", "article", "178216"); $(document).ready(function() { $( "#frame1" ).on('load', function() { var mydiv = $(this).contents().find("div"); var h = mydiv.height(); console.log(h) }); var tooltipLivingFigure = jQuery(".interactive-living-figure-label .icon-more-info"), titleLivingFigure = tooltipLivingFigure.attr("title"); tooltipLivingFigure.simpletip({ fixed: true, position: ["-115", "30"], baseClass: 'small-tooltip', content:titleLivingFigure + " " }); tooltipLivingFigure.removeAttr("title"); $("body").on("click", ".cite-living-figure", function(e) { e.preventDefault(); var ref = $(this).attr("data-ref"); $(this).closest(".living-figure-list-container").find("#" + ref).fadeIn(200); }); $("body").on("click", ".close-cite-living-figure", function(e) { e.preventDefault(); $(this).closest(".popup-window-wrapper").fadeOut(200); }); $(document).on("mouseup", function(e) { var metricsContainer = $(".article-metrics-popover-wrapper"); if (!metricsContainer.is(e.target) && metricsContainer.has(e.target).length === 0) { $(".article-metrics-close-button").click(); } }); var articleId = $('#articleId').val(); if($("#main-article-count-box").attachArticleMetrics) { $("#main-article-count-box").attachArticleMetrics(articleId, { articleMetricsView: true }); } }); var figshareWidget = $(".new_figshare_widget"); if (figshareWidget.length > 0) { window.figshare.load("f1000", function(Widget) { // Select a tag/tags defined in your page. In this tag we will place the widget. _.map(figshareWidget, function(el){ var widget = new Widget({ articleId: $(el).attr("figshare_articleId") //height:300 // this is the height of the viewer part. [Default: 550] }); widget.initialize(); // initialize the widget widget.mount(el); // mount it in a tag that's on your page // this will save the widget on the global scope for later use from // your JS scripts. This line is optional. //window.widget = widget; }); }); } close Error Close Add Reset F1000.MICROSERVICES.AFFILIATION = ''; $(document).ready(function () { $('.js-affiliations-form').each((index, form) => { new AffiliationForm({ formId: form.id, institutionErrorSelector: '.comment-enter-institution', departmentErrorSelector: '.comment-enter-department', placeSelector: '.js-add-comment-place', stateSelector: '.js-add-comment-state', zipCodeSelector: '.js-add-comment-zipcode', countrySelector: '.js-add-comment-country', countryErrorSelector: '.comment-enter-country', }); }); }); $(document).ready(function () { var reportIds = { "146691": 0, "146692": 0, "146693": 0, "139786": 0, "139785": 0, "138447": 0, "138450": 0, "142418": 0, "138451": 0, "142419": 0, "138448": 0, "138449": 0, "142421": 0, "146411": 0, "144238": 0, "146414": 0, "144239": 0, "146412": 27, "144237": 0, "146413": 32, "145458": 0, "393270": 0, "145459": 36, "144240": 0, "393268": 10, "393269": 0, "145462": 0, "140982": 0, "140983": 0, "145460": 0, "145461": 0, "140986": 0, "140987": 0, "140984": 0, "140985": 0, "140988": 1, "140989": 0, }; $(".referee-response-container,.js-referee-report").each(function(index, el) { var reportId = $(el).attr("data-reportid"), reportCount = reportIds[reportId] || 0; $(el).find(".comments-count-container,.js-referee-report-views").html(reportCount); }); var uuidInput = $("#article_uuid"), oldUUId = uuidInput.val(), newUUId = "40391d54-c732-4bba-bab0-04869b725c45"; uuidInput.val(newUUId); $("a[href*='article_uuid=']").each(function(index, el) { var newHref = $(el).attr("href").replace(oldUUId, newUUId); $(el).attr("href", newHref); }); }); An innovative open access publishing platform offering rapid publication and open peer review, whilst supporting data deposition and sharing. Browse Gateways Collections How it Works Contact For Developers Cookie Notice Privacy Notice RSS Submit Your Research Follow us © 2012-2026 F1000 Research Ltd. ISSN 2046-1402 | Legal | Partner of Research4Life • CrossRef • ORCID • FAIRSharing R.templateTests.simpleTemplate = R.template(' $text $text $text $text $text '); R.templateTests.runTests(); var F1000platform = new F1000.Platform({ name: "f1000research", displayName: "F1000Research", hostName: "f1000research.com", id: "1", editorialEmail: "[email protected]", infoEmail: "[email protected]", usePmcStats: true }); $(function(){R.ui.dropdowns('.dropdown-for-authors, .dropdown-for-about, .dropdown-for-myresearch');}); // $(function(){R.ui.dropdowns('.dropdown-for-referees');}); $(document).ready(function () { if ($(".cookie-warning").is(":visible")) { $(".sticky").css("margin-bottom", "35px"); $(".devices").addClass("devices-and-cookie-warning"); } $(".cookie-warning .close-button").click(function (e) { $(".devices").removeClass("devices-and-cookie-warning"); $(".sticky").css("margin-bottom", "0"); }); $("#tweeter-feed .tweet-message").each(function (i, message) { var self = $(message); self.html(linkify(self.html())); }); $(".partner").on("mouseenter mouseleave", function() { $(this).find(".gray-scale, .colour").toggleClass("is-hidden"); }); }); Sign In Remember me Forgotten your password? Sign In Cancel Email or password not correct. Please try again Please wait... $(function(){ // Note: All the setup needs to run against a name attribute and *not* the id due the clonish // nature of facebox... $("a[id=googleSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("GOOGLE"); $("form[id=oAuthForm]").submit(); }); $("a[id=facebookSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("FACEBOOK"); $("form[id=oAuthForm]").submit(); }); $("a[id=orcidSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("ORCID"); $("form[id=oAuthForm]").submit(); }); }); If you've forgotten your password, please enter your email address below and we'll send you instructions on how to reset your password. The email address should be the one you originally registered with F1000. Email address not valid, please try again You registered with F1000 via Google, so we cannot reset your password. To sign in, please click here . If you still need help with your Google account password, please click here . You registered with F1000 via Facebook, so we cannot reset your password. To sign in, please click here . If you still need help with your Facebook account password, please click here . Code not correct, please try again Reset password Cancel Email us for further assistance. Server error, please try again. If your email address is registered with us, we will email you instructions to reset your password. If you think you should have received this email but it has not arrived, please check your spam filters and/or contact for further assistance. Please wait... Register $(document).ready(function () { signIn.createSignInAsRow($("#sign-in-form-gfb-popup")); $(".target-field").each(function () { var uris = $(this).val().split("/"); if (uris.pop() === "login") { $(this).val(uris.toString().replace(",","/")); } }); });

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00