UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis

doi:10.12688/f1000research.55370.3

UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis

2024 · doi:10.12688/f1000research.55370.3

preprint OA: closed

Full text JSON View at publisher

Full text 244,612 characters · extracted from preprint-html · click to expand

UKB.COVID19: an R package for UK Biobank COVID-19... | F1000Research "use strict";function _typeof(t){return(_typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}!function(){var t=function(){var t,e,o=[],n=window,r=n;for(;r;){try{if(r.frames.__tcfapiLocator){t=r;break}}catch(t){}if(r===n.top)break;r=r.parent}t||(!function t(){var e=n.document,o=!!n.frames.__tcfapiLocator;if(!o)if(e.body){var r=e.createElement("iframe");r.style.cssText="display:none",r.name="__tcfapiLocator",e.body.appendChild(r)}else setTimeout(t,5);return!o}(),n.__tcfapi=function(){for(var t=arguments.length,n=new Array(t),r=0;r 3&&2===parseInt(n[1],10)&&"boolean"==typeof n[3]&&(e=n[3],"function"==typeof n[2]&&n[2]("set",!0)):"ping"===n[0]?"function"==typeof n[2]&&n[2]({gdprApplies:e,cmpLoaded:!1,cmpStatus:"stub"}):o.push(n)},n.addEventListener("message",(function(t){var e="string"==typeof t.data,o={};if(e)try{o=JSON.parse(t.data)}catch(t){}else o=t.data;var n="object"===_typeof(o)&&null!==o?o.__tcfapiCall:null;n&&window.__tcfapi(n.command,n.version,(function(o,r){var a={__tcfapiReturn:{returnValue:o,success:r,callId:n.callId}};t&&t.source&&t.source.postMessage&&t.source.postMessage(e?JSON.stringify(a):a,"*")}),n.parameter)}),!1))};"undefined"!=typeof module?module.exports=t:t()}(); dataLayer = dataLayer || []; // Standard GTM initialization - Google Consent Mode handles consent automatically (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl+ '>m_auth=hzk0Vc3qFsQYhCrIoHz68A>m_preview=env-1>m_cookies_win=x';f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-MWFK8L5J'); ;window.NREUM||(NREUM={});NREUM.init={distributed_tracing:{enabled:true},privacy:{cookies_enabled:true},ajax:{deny_list:["bam.nr-data.net"]}}; ;NREUM.loader_config={accountID:"438030",trustKey:"438030",agentID:"772317073",licenseKey:"97f8f67f26",applicationID:"772317073"} ;NREUM.info={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net",licenseKey:"97f8f67f26",applicationID:"772317073",sa:1} ;/*! For license information please see nr-loader-spa-1.236.0.min.js.LICENSE.txt */ (()=>{"use strict";var e,t,r={5763:(e,t,r)=>{r.d(t,{P_:()=>l,Mt:()=>g,C5:()=>s,DL:()=>v,OP:()=>T,lF:()=>D,Yu:()=>y,Dg:()=>h,CX:()=>c,GE:()=>b,sU:()=>_});var n=r(8632),i=r(9567);const o={beacon:n.ce.beacon,errorBeacon:n.ce.errorBeacon,licenseKey:void 0,applicationID:void 0,sa:void 0,queueTime:void 0,applicationTime:void 0,ttGuid:void 0,user:void 0,account:void 0,product:void 0,extra:void 0,jsAttributes:{},userAttributes:void 0,atts:void 0,transactionName:void 0,tNamePlain:void 0},a={};function s(e){if(!e)throw new Error("All info objects require an agent identifier!");if(!a[e])throw new Error("Info for ".concat(e," was never set"));return a[e]}function c(e,t){if(!e)throw new Error("All info objects require an agent identifier!");a[e]=(0,i.D)(t,o),(0,n.Qy)(e,a[e],"info")}var u=r(7056);const d=()=>{const e={blockSelector:"[data-nr-block]",maskInputOptions:{password:!0}};return{allow_bfcache:!0,privacy:{cookies_enabled:!0},ajax:{deny_list:void 0,enabled:!0,harvestTimeSeconds:10},distributed_tracing:{enabled:void 0,exclude_newrelic_header:void 0,cors_use_newrelic_header:void 0,cors_use_tracecontext_headers:void 0,allowed_origins:void 0},session:{domain:void 0,expiresMs:u.oD,inactiveMs:u.Hb},ssl:void 0,obfuscate:void 0,jserrors:{enabled:!0,harvestTimeSeconds:10},metrics:{enabled:!0},page_action:{enabled:!0,harvestTimeSeconds:30},page_view_event:{enabled:!0},page_view_timing:{enabled:!0,harvestTimeSeconds:30,long_task:!1},session_trace:{enabled:!0,harvestTimeSeconds:10},harvest:{tooManyRequestsDelay:60},session_replay:{enabled:!1,harvestTimeSeconds:60,sampleRate:.1,errorSampleRate:.1,maskTextSelector:"*",maskAllInputs:!0,get blockClass(){return"nr-block"},get ignoreClass(){return"nr-ignore"},get maskTextClass(){return"nr-mask"},get blockSelector(){return e.blockSelector},set blockSelector(t){e.blockSelector+=",".concat(t)},get maskInputOptions(){return e.maskInputOptions},set maskInputOptions(t){e.maskInputOptions={...t,password:!0}}},spa:{enabled:!0,harvestTimeSeconds:10}}},f={};function l(e){if(!e)throw new Error("All configuration objects require an agent identifier!");if(!f[e])throw new Error("Configuration for ".concat(e," was never set"));return f[e]}function h(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");f[e]=(0,i.D)(t,d()),(0,n.Qy)(e,f[e],"config")}function g(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");var r=l(e);if(r){for(var n=t.split("."),i=0;i {r.d(t,{D:()=>i});var n=r(50);function i(e,t){try{if(!e||"object"!=typeof e)return(0,n.Z)("Setting a Configurable requires an object as input");if(!t||"object"!=typeof t)return(0,n.Z)("Setting a Configurable requires a model to set its initial properties");const r=Object.create(Object.getPrototypeOf(t),Object.getOwnPropertyDescriptors(t)),o=0===Object.keys(r).length?e:r;for(let a in o)if(void 0!==e[a])try{"object"==typeof e[a]&&"object"==typeof t[a]?r[a]=i(e[a],t[a]):r[a]=e[a]}catch(e){(0,n.Z)("An error occurred while setting a property of a Configurable",e)}return r}catch(e){(0,n.Z)("An error occured while setting a Configurable",e)}}},6818:(e,t,r)=>{r.d(t,{Re:()=>i,gF:()=>o,q4:()=>n});const n="1.236.0",i="PROD",o="CDN"},385:(e,t,r)=>{r.d(t,{FN:()=>a,IF:()=>u,Nk:()=>f,Tt:()=>s,_A:()=>o,il:()=>n,pL:()=>c,v6:()=>i,w1:()=>d});const n="undefined"!=typeof window&&!!window.document,i="undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self.navigator instanceof WorkerNavigator||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis.navigator instanceof WorkerNavigator),o=n?window:"undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis),a=""+o?.location,s=/iPad|iPhone|iPod/.test(navigator.userAgent),c=s&&"undefined"==typeof SharedWorker,u=(()=>{const e=navigator.userAgent.match(/Firefox[/\s](\d+\.\d+)/);return Array.isArray(e)&&e.length>=2?+e[1]:0})(),d=Boolean(n&&window.document.documentMode),f=!!navigator.sendBeacon},1117:(e,t,r)=>{r.d(t,{w:()=>o});var n=r(50);const i={agentIdentifier:"",ee:void 0};class o{constructor(e){try{if("object"!=typeof e)return(0,n.Z)("shared context requires an object as input");this.sharedContext={},Object.assign(this.sharedContext,i),Object.entries(e).forEach((e=>{let[t,r]=e;Object.keys(i).includes(t)&&(this.sharedContext[t]=r)}))}catch(e){(0,n.Z)("An error occured while setting SharedContext",e)}}}},8e3:(e,t,r)=>{r.d(t,{L:()=>d,R:()=>c});var n=r(2177),i=r(1284),o=r(4322),a=r(3325);const s={};function c(e,t){const r={staged:!1,priority:a.p[t]||0};u(e),s[e].get(t)||s[e].set(t,r)}function u(e){e&&(s[e]||(s[e]=new Map))}function d(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"",t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"feature";if(u(e),!e||!s[e].get(t))return a(t);s[e].get(t).staged=!0;const r=[...s[e]];function a(t){const r=e?n.ee.get(e):n.ee,a=o.X.handlers;if(r.backlog&&a){var s=r.backlog[t],c=a[t];if(c){for(var u=0;s&&u {let[t,r]=e;return r.staged}))&&(r.sort(((e,t)=>e[1].priority-t[1].priority)),r.forEach((e=>{let[t]=e;a(t)})))}function f(e,t){var r=e[1];(0,i.D)(t[r],(function(t,r){var n=e[0];if(r[0]===n){var i=r[1],o=e[3],a=e[2];i.apply(o,a)}}))}},2177:(e,t,r)=>{r.d(t,{c:()=>f,ee:()=>u});var n=r(8632),i=r(2210),o=r(1284),a=r(5763),s="nr@context";let c=(0,n.fP)();var u;function d(){}function f(e){return(0,i.X)(e,s,l)}function l(){return new d}function h(){u.aborted=!0,u.backlog={}}c.ee?u=c.ee:(u=function e(t,r){var n={},c={},f={},g=!1;try{g=16===r.length&&(0,a.OP)(r).isolatedBacklog}catch(e){}var p={on:b,addEventListener:b,removeEventListener:y,emit:v,get:x,listeners:w,context:m,buffer:A,abort:h,aborted:!1,isBuffering:E,debugId:r,backlog:g?{}:t&&"object"==typeof t.backlog?t.backlog:{}};return p;function m(e){return e&&e instanceof d?e:e?(0,i.X)(e,s,l):l()}function v(e,r,n,i,o){if(!1!==o&&(o=!0),!u.aborted||i){t&&o&&t.emit(e,r,n);for(var a=m(n),s=w(e),d=s.length,f=0;fn,p:()=>i});var n=r(2177).ee.get("handle");function i(e,t,r,i,o){o?(o.buffer([e],i),o.emit(e,t,r)):(n.buffer([e],i),n.emit(e,t,r))}},4322:(e,t,r)=>{r.d(t,{X:()=>o});var n=r(5546);o.on=a;var i=o.handlers={};function o(e,t,r,o){a(o||n.E,i,e,t,r)}function a(e,t,r,i,o){o||(o="feature"),e||(e=n.E);var a=t[o]=t[o]||{};(a[r]=a[r]||[]).push([e,i])}},3239:(e,t,r)=>{r.d(t,{bP:()=>s,iz:()=>c,m$:()=>a});var n=r(385);let i=!1,o=!1;try{const e={get passive(){return i=!0,!1},get signal(){return o=!0,!1}};n._A.addEventListener("test",null,e),n._A.removeEventListener("test",null,e)}catch(e){}function a(e,t){return i||o?{capture:!!e,passive:i,signal:t}:!!e}function s(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;window.addEventListener(e,t,a(r,n))}function c(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;document.addEventListener(e,t,a(r,n))}},4402:(e,t,r)=>{r.d(t,{Ht:()=>u,M:()=>c,Rl:()=>a,ky:()=>s});var n=r(385);const i="xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx";function o(e,t){return e?15&e[t]:16*Math.random()|0}function a(){const e=n._A?.crypto||n._A?.msCrypto;let t,r=0;return e&&e.getRandomValues&&(t=e.getRandomValues(new Uint8Array(31))),i.split("").map((e=>"x"===e?o(t,++r).toString(16):"y"===e?(3&o()|8).toString(16):e)).join("")}function s(e){const t=n._A?.crypto||n._A?.msCrypto;let r,i=0;t&&t.getRandomValues&&(r=t.getRandomValues(new Uint8Array(31)));const a=[];for(var s=0;s {r.d(t,{Bq:()=>n,Hb:()=>o,oD:()=>i});const n="NRBA",i=144e5,o=18e5},7894:(e,t,r)=>{function n(){return Math.round(performance.now())}r.d(t,{z:()=>n})},7243:(e,t,r)=>{r.d(t,{e:()=>o});var n=r(385),i={};function o(e){if(e in i)return i[e];if(0===(e||"").indexOf("data:"))return{protocol:"data"};let t;var r=n._A?.location,o={};if(n.il)t=document.createElement("a"),t.href=e;else try{t=new URL(e,r.href)}catch(e){return o}o.port=t.port;var a=t.href.split("://");!o.port&&a[1]&&(o.port=a[1].split("/")[0].split("@").pop().split(":")[1]),o.port&&"0"!==o.port||(o.port="https"===a[0]?"443":"80"),o.hostname=t.hostname||r.hostname,o.pathname=t.pathname,o.protocol=a[0],"/"!==o.pathname.charAt(0)&&(o.pathname="/"+o.pathname);var s=!t.protocol||":"===t.protocol||t.protocol===r.protocol,c=t.hostname===r.hostname&&t.port===r.port;return o.sameOrigin=s&&(!t.hostname||c),"/"===o.pathname&&(i[e]=o),o}},50:(e,t,r)=>{function n(e,t){"function"==typeof console.warn&&(console.warn("New Relic: ".concat(e)),t&&console.warn(t))}r.d(t,{Z:()=>n})},2587:(e,t,r)=>{r.d(t,{N:()=>c,T:()=>u});var n=r(2177),i=r(5546),o=r(8e3),a=r(3325);const s={stn:[a.D.sessionTrace],err:[a.D.jserrors,a.D.metrics],ins:[a.D.pageAction],spa:[a.D.spa],sr:[a.D.sessionReplay,a.D.sessionTrace]};function c(e,t){const r=n.ee.get(t);e&&"object"==typeof e&&(Object.entries(e).forEach((e=>{let[t,n]=e;void 0===u[t]&&(s[t]?s[t].forEach((e=>{n?(0,i.p)("feat-"+t,[],void 0,e,r):(0,i.p)("block-"+t,[],void 0,e,r),(0,i.p)("rumresp-"+t,[Boolean(n)],void 0,e,r)})):n&&(0,i.p)("feat-"+t,[],void 0,void 0,r),u[t]=Boolean(n))})),Object.keys(s).forEach((e=>{void 0===u[e]&&(s[e]?.forEach((t=>(0,i.p)("rumresp-"+e,[!1],void 0,t,r))),u[e]=!1)})),(0,o.L)(t,a.D.pageViewEvent))}const u={}},2210:(e,t,r)=>{r.d(t,{X:()=>i});var n=Object.prototype.hasOwnProperty;function i(e,t,r){if(n.call(e,t))return e[t];var i=r();if(Object.defineProperty&&Object.keys)try{return Object.defineProperty(e,t,{value:i,writable:!0,enumerable:!1}),i}catch(e){}return e[t]=i,i}},1284:(e,t,r)=>{r.d(t,{D:()=>n});const n=(e,t)=>Object.entries(e||{}).map((e=>{let[r,n]=e;return t(r,n)}))},4351:(e,t,r)=>{r.d(t,{P:()=>o});var n=r(2177);const i=()=>{const e=new WeakSet;return(t,r)=>{if("object"==typeof r&&null!==r){if(e.has(r))return;e.add(r)}return r}};function o(e){try{return JSON.stringify(e,i())}catch(e){try{n.ee.emit("internal-error",[e])}catch(e){}}}},3960:(e,t,r)=>{r.d(t,{K:()=>a,b:()=>o});var n=r(3239);function i(){return"undefined"==typeof document||"complete"===document.readyState}function o(e,t){if(i())return e();(0,n.bP)("load",e,t)}function a(e){if(i())return e();(0,n.iz)("DOMContentLoaded",e)}},8632:(e,t,r)=>{r.d(t,{EZ:()=>u,Qy:()=>c,ce:()=>o,fP:()=>a,gG:()=>d,mF:()=>s});var n=r(7894),i=r(385);const o={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net"};function a(){return i._A.NREUM||(i._A.NREUM={}),void 0===i._A.newrelic&&(i._A.newrelic=i._A.NREUM),i._A.NREUM}function s(){let e=a();return e.o||(e.o={ST:i._A.setTimeout,SI:i._A.setImmediate,CT:i._A.clearTimeout,XHR:i._A.XMLHttpRequest,REQ:i._A.Request,EV:i._A.Event,PR:i._A.Promise,MO:i._A.MutationObserver,FETCH:i._A.fetch}),e}function c(e,t,r){let i=a();const o=i.initializedAgents||{},s=o[e]||{};return Object.keys(s).length||(s.initializedAt={ms:(0,n.z)(),date:new Date}),i.initializedAgents={...o,[e]:{...s,[r]:t}},i}function u(e,t){a()[e]=t}function d(){return function(){let e=a();const t=e.info||{};e.info={beacon:o.beacon,errorBeacon:o.errorBeacon,...t}}(),function(){let e=a();const t=e.init||{};e.init={...t}}(),s(),function(){let e=a();const t=e.loader_config||{};e.loader_config={...t}}(),a()}},7956:(e,t,r)=>{r.d(t,{N:()=>i});var n=r(3239);function i(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],r=arguments.length>2?arguments[2]:void 0,i=arguments.length>3?arguments[3]:void 0;return void(0,n.iz)("visibilitychange",(function(){if(t)return void("hidden"==document.visibilityState&&e());e(document.visibilityState)}),r,i)}},1214:(e,t,r)=>{r.d(t,{em:()=>v,u5:()=>N,QU:()=>S,_L:()=>I,Gm:()=>L,Lg:()=>M,gy:()=>U,BV:()=>Q,Kf:()=>ee});var n=r(2177);const i="nr@original";var o=Object.prototype.hasOwnProperty,a=!1;function s(e,t){return e||(e=n.ee),r.inPlace=function(e,t,n,i,o){n||(n="");var a,s,c,u="-"===n.charAt(0);for(c=0;c 2?n-2:0),o=2;o {r(A[T],e,w),r(E[T],e,w)})),r(l._A,"fetch",y),t.on(y+"end",(function(e,r){var n=this;if(r){var i=r.headers.get("content-length");null!==i&&(n.rxSize=i),t.emit(y+"done",[null,r],n)}else t.emit(y+"done",[e],n)})),t}const O={},j=["pushState","replaceState"];function S(e){const t=function(e){return(e||n.ee).get("history")}(e);return!l.il||O[t.debugId]++||(O[t.debugId]=1,s(t).inPlace(window.history,j,"-")),t}var P=r(3239);const C={},R=["appendChild","insertBefore","replaceChild"];function I(e){const t=function(e){return(e||n.ee).get("jsonp")}(e);if(!l.il||C[t.debugId])return t;C[t.debugId]=!0;var r=s(t),i=/[?&](?:callback|cb)=([^&#]+)/,o=/(.*)\.([^.]+)/,a=/^(\w+)(\.|$)(.*)$/;function c(e,t){var r=e.match(a),n=r[1],i=r[3];return i?c(i,t[n]):t[n]}return r.inPlace(Node.prototype,R,"dom-"),t.on("dom-start",(function(e){!function(e){if(!e||"string"!=typeof e.nodeName||"script"!==e.nodeName.toLowerCase())return;if("function"!=typeof e.addEventListener)return;var n=(a=e.src,s=a.match(i),s?s[1]:null);var a,s;if(!n)return;var u=function(e){var t=e.match(o);if(t&&t.length>=3)return{key:t[2],parent:c(t[1],window)};return{key:e,parent:window}}(n);if("function"!=typeof u.parent[u.key])return;var d={};function f(){t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}function l(){t.emit("jsonp-error",[],d),t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}r.inPlace(u.parent,[u.key],"cb-",d),e.addEventListener("load",f,(0,P.m$)(!1)),e.addEventListener("error",l,(0,P.m$)(!1)),t.emit("new-jsonp",[e.src],d)}(e[0])})),t}var k=r(5763);const H={};function L(e){const t=function(e){return(e||n.ee).get("mutation")}(e);if(!l.il||H[t.debugId])return t;H[t.debugId]=!0;var r=s(t),i=k.Yu.MO;return i&&(window.MutationObserver=function(e){return this instanceof i?new i(r(e,"fn-")):i.apply(this,arguments)},MutationObserver.prototype=i.prototype),t}const z={};function M(e){const t=function(e){return(e||n.ee).get("promise")}(e);if(z[t.debugId])return t;z[t.debugId]=!0;var r=n.c,o=s(t),a=k.Yu.PR;return a&&function(){function e(r){var n=t.context(),i=o(r,"executor-",n,null,!1);const s=Reflect.construct(a,[i],e);return t.context(s).getCtx=function(){return n},s}l._A.Promise=e,Object.defineProperty(e,"name",{value:"Promise"}),e.toString=function(){return a.toString()},Object.setPrototypeOf(e,a),["all","race"].forEach((function(r){const n=a[r];e[r]=function(e){let i=!1;[...e||[]].forEach((e=>{this.resolve(e).then(a("all"===r),a(!1))}));const o=n.apply(this,arguments);return o;function a(e){return function(){t.emit("propagate",[null,!i],o,!1,!1),i=i||!e}}}})),["resolve","reject"].forEach((function(r){const n=a[r];e[r]=function(e){const r=n.apply(this,arguments);return e!==r&&t.emit("propagate",[e,!0],r,!1,!1),r}})),e.prototype=a.prototype;const n=a.prototype.then;a.prototype.then=function(){var e=this,i=r(e);i.promise=e;for(var a=arguments.length,s=new Array(a),c=0;c e())),t};function m(e,t){i.inPlace(t,["onreadystatechange"],"fn-",E)}function b(){var e=this,t=r.context(e);e.readyState>3&&!t.resolved&&(t.resolved=!0,r.emit("xhr-resolved",[],e)),i.inPlace(e,f,"fn-",E)}if(function(e,t){for(var r in e)t[r]=e[r]}(o,p),p.prototype=o.prototype,i.inPlace(p.prototype,J,"-xhr-",E),r.on("send-xhr-start",(function(e,t){m(e,t),function(e){h.push(e),a&&(y?y.then(A):u?u(A):(w=-w,x.data=w))}(t)})),r.on("open-xhr-start",m),a){var y=c&&c.resolve();if(!u&&!c){var w=1,x=document.createTextNode(w);new a(A).observe(x,{characterData:!0})}}else t.on("fn-end",(function(e){e[0]&&e[0].type===d||A()}));function A(){for(var e=0;e {r.d(t,{t:()=>n});const n=r(3325).D.ajax},6660:(e,t,r)=>{r.d(t,{A:()=>i,t:()=>n});const n=r(3325).D.jserrors,i="nr@seenError"},3081:(e,t,r)=>{r.d(t,{gF:()=>o,mY:()=>i,t9:()=>n,vz:()=>s,xS:()=>a});const n=r(3325).D.metrics,i="sm",o="cm",a="storeSupportabilityMetrics",s="storeEventMetrics"},4649:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageAction},7633:(e,t,r)=>{r.d(t,{Dz:()=>i,OJ:()=>a,qw:()=>o,t9:()=>n});const n=r(3325).D.pageViewEvent,i="firstbyte",o="domcontent",a="windowload"},9251:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageViewTiming},3614:(e,t,r)=>{r.d(t,{BST_RESOURCE:()=>i,END:()=>s,FEATURE_NAME:()=>n,FN_END:()=>u,FN_START:()=>c,PUSH_STATE:()=>d,RESOURCE:()=>o,START:()=>a});const n=r(3325).D.sessionTrace,i="bstResource",o="resource",a="-start",s="-end",c="fn"+a,u="fn"+s,d="pushState"},7836:(e,t,r)=>{r.d(t,{BODY:()=>A,CB_END:()=>E,CB_START:()=>u,END:()=>x,FEATURE_NAME:()=>i,FETCH:()=>_,FETCH_BODY:()=>v,FETCH_DONE:()=>m,FETCH_START:()=>p,FN_END:()=>c,FN_START:()=>s,INTERACTION:()=>l,INTERACTION_API:()=>d,INTERACTION_EVENTS:()=>o,JSONP_END:()=>b,JSONP_NODE:()=>g,JS_TIME:()=>T,MAX_TIMER_BUDGET:()=>a,REMAINING:()=>f,SPA_NODE:()=>h,START:()=>w,originalSetTimeout:()=>y});var n=r(5763);const i=r(3325).D.spa,o=["click","submit","keypress","keydown","keyup","change"],a=999,s="fn-start",c="fn-end",u="cb-start",d="api-ixn-",f="remaining",l="interaction",h="spaNode",g="jsonpNode",p="fetch-start",m="fetch-done",v="fetch-body-",b="jsonp-end",y=n.Yu.ST,w="-start",x="-end",A="-body",E="cb"+x,T="jsTime",_="fetch"},5938:(e,t,r)=>{r.d(t,{W:()=>o});var n=r(5763),i=r(2177);class o{constructor(e,t,r){this.agentIdentifier=e,this.aggregator=t,this.ee=i.ee.get(e,(0,n.OP)(this.agentIdentifier).isolatedBacklog),this.featureName=r,this.blocked=!1}}},9144:(e,t,r)=>{r.d(t,{j:()=>m});var n=r(3325),i=r(5763),o=r(5546),a=r(2177),s=r(7894),c=r(8e3),u=r(3960),d=r(385),f=r(50),l=r(3081),h=r(8632);function g(){const e=(0,h.gG)();["setErrorHandler","finished","addToTrace","inlineHit","addRelease","addPageAction","setCurrentRouteName","setPageViewName","setCustomAttribute","interaction","noticeError","setUserId"].forEach((t=>{e[t]=function(){for(var r=arguments.length,n=new Array(r),i=0;i 1?r-1:0),i=1;i {e.exposed&&e.api[t]&&o.push(e.api[t](...n))})),o.length>1?o:o[0]}(t,...n)}}))}var p=r(2587);function m(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},m=arguments.length>2?arguments[2]:void 0,v=arguments.length>3?arguments[3]:void 0,{init:b,info:y,loader_config:w,runtime:x={loaderType:m},exposed:A=!0}=t;const E=(0,h.gG)();y||(b=E.init,y=E.info,w=E.loader_config),(0,i.Dg)(e,b||{}),(0,i.GE)(e,w||{}),(0,i.sU)(e,x),y.jsAttributes??={},d.v6&&(y.jsAttributes.isWorker=!0),(0,i.CX)(e,y),g();const T=function(e,t){t||(0,c.R)(e,"api");const h={};var g=a.ee.get(e),p=g.get("tracer"),m="api-",v=m+"ixn-";function b(t,r,n,o){const a=(0,i.C5)(e);return null===r?delete a.jsAttributes[t]:(0,i.CX)(e,{...a,jsAttributes:{...a.jsAttributes,[t]:r}}),x(m,n,!0,o||null===r?"session":void 0)(t,r)}function y(){}["setErrorHandler","finished","addToTrace","inlineHit","addRelease"].forEach((e=>h[e]=x(m,e,!0,"api"))),h.addPageAction=x(m,"addPageAction",!0,n.D.pageAction),h.setCurrentRouteName=x(m,"routeName",!0,n.D.spa),h.setPageViewName=function(t,r){if("string"==typeof t)return"/"!==t.charAt(0)&&(t="/"+t),(0,i.OP)(e).customTransaction=(r||"http://custom.transaction")+t,x(m,"setPageViewName",!0)()},h.setCustomAttribute=function(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2];if("string"==typeof e){if(["string","number"].includes(typeof t)||null===t)return b(e,t,"setCustomAttribute",r);(0,f.Z)("Failed to execute setCustomAttribute.\nNon-null value must be a string or number type, but a type of was provided."))}else(0,f.Z)("Failed to execute setCustomAttribute.\nName must be a string type, but a type of was provided."))},h.setUserId=function(e){if("string"==typeof e||null===e)return b("enduser.id",e,"setUserId",!0);(0,f.Z)("Failed to execute setUserId.\nNon-null value must be a string type, but a type of was provided."))},h.interaction=function(){return(new y).get()};var w=y.prototype={createTracer:function(e,t){var r={},i=this,a="function"==typeof t;return(0,o.p)(v+"tracer",[(0,s.z)(),e,r],i,n.D.spa,g),function(){if(p.emit((a?"":"no-")+"fn-start",[(0,s.z)(),i,a],r),a)try{return t.apply(this,arguments)}catch(e){throw p.emit("fn-err",[arguments,this,"string"==typeof e?new Error(e):e],r),e}finally{p.emit("fn-end",[(0,s.z)()],r)}}}};function x(e,t,r,i){return function(){return(0,o.p)(l.xS,["API/"+t+"/called"],void 0,n.D.metrics,g),i&&(0,o.p)(e+t,[(0,s.z)(),...arguments],r?null:this,i,g),r?void 0:this}}function A(){r.e(439).then(r.bind(r,7438)).then((t=>{let{setAPI:r}=t;r(e),(0,c.L)(e,"api")})).catch((()=>(0,f.Z)("Downloading runtime APIs failed...")))}return["actionText","setName","setAttribute","save","ignore","onEnd","getContext","end","get"].forEach((e=>{w[e]=x(v,e,void 0,n.D.spa)})),h.noticeError=function(e,t){"string"==typeof e&&(e=new Error(e)),(0,o.p)(l.xS,["API/noticeError/called"],void 0,n.D.metrics,g),(0,o.p)("err",[e,(0,s.z)(),!1,t],void 0,n.D.jserrors,g)},d.il?(0,u.b)((()=>A()),!0):A(),h}(e,v);return(0,h.Qy)(e,T,"api"),(0,h.Qy)(e,A,"exposed"),(0,h.EZ)("activatedFeatures",p.T),T}},3325:(e,t,r)=>{r.d(t,{D:()=>n,p:()=>i});const n={ajax:"ajax",jserrors:"jserrors",metrics:"metrics",pageAction:"page_action",pageViewEvent:"page_view_event",pageViewTiming:"page_view_timing",sessionReplay:"session_replay",sessionTrace:"session_trace",spa:"spa"},i={[n.pageViewEvent]:1,[n.pageViewTiming]:2,[n.metrics]:3,[n.jserrors]:4,[n.ajax]:5,[n.sessionTrace]:6,[n.pageAction]:7,[n.spa]:8,[n.sessionReplay]:9}}},n={};function i(e){var t=n[e];if(void 0!==t)return t.exports;var o=n[e]={exports:{}};return r[e](o,o.exports,i),o.exports}i.m=r,i.d=(e,t)=>{for(var r in t)i.o(t,r)&&!i.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},i.f={},i.e=e=>Promise.all(Object.keys(i.f).reduce(((t,r)=>(i.f[r](e,t),t)),[])),i.u=e=>(({78:"page_action-aggregate",147:"metrics-aggregate",242:"session-manager",317:"jserrors-aggregate",348:"page_view_timing-aggregate",412:"lazy-feature-loader",439:"async-api",538:"recorder",590:"session_replay-aggregate",675:"compressor",733:"session_trace-aggregate",786:"page_view_event-aggregate",873:"spa-aggregate",898:"ajax-aggregate"}[e]||e)+"."+{78:"ac76d497",147:"3dc53903",148:"1a20d5fe",242:"2a64278a",317:"49e41428",348:"bd6de33a",412:"2f55ce66",439:"30bd804e",538:"1b18459f",590:"cf0efb30",675:"ae9f91a8",733:"83105561",786:"06482edd",860:"03a8b7a5",873:"e6b09d52",898:"998ef92b"}[e]+"-1.236.0.min.js"),i.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),e={},t="NRBA:",i.l=(r,n,o,a)=>{if(e[r])e[r].push(n);else{var s,c;if(void 0!==o)for(var u=document.getElementsByTagName("script"),d=0;d {s.onerror=s.onload=null,clearTimeout(h);var i=e[r];if(delete e[r],s.parentNode&&s.parentNode.removeChild(s),i&&i.forEach((e=>e(n))),t)return t(n)},h=setTimeout(l.bind(null,void 0,{type:"timeout",target:s}),12e4);s.onerror=l.bind(null,s.onerror),s.onload=l.bind(null,s.onload),c&&document.head.appendChild(s)}},i.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.j=364,i.p="https://js-agent.newrelic.com/",(()=>{var e={364:0,953:0};i.f.j=(t,r)=>{var n=i.o(e,t)?e[t]:void 0;if(0!==n)if(n)r.push(n[2]);else{var o=new Promise(((r,i)=>n=e[t]=[r,i]));r.push(n[2]=o);var a=i.p+i.u(t),s=new Error;i.l(a,(r=>{if(i.o(e,t)&&(0!==(n=e[t])&&(e[t]=void 0),n)){var o=r&&("load"===r.type?"missing":r.type),a=r&&r.target&&r.target.src;s.message="Loading chunk "+t+" failed.\n("+o+": "+a+")",s.name="ChunkLoadError",s.type=o,s.request=a,n[1](s)}}),"chunk-"+t,t)}};var t=(t,r)=>{var n,o,[a,s,c]=r,u=0;if(a.some((t=>0!==e[t]))){for(n in s)i.o(s,n)&&(i.m[n]=s[n]);if(c)c(i)}for(t&&t(r);u {i.r(o);var e=i(3325),t=i(5763);const r=Object.values(e.D);function n(e){const n={};return r.forEach((r=>{n[r]=function(e,r){return!1!==(0,t.Mt)(r,"".concat(e,".enabled"))}(r,e)})),n}var a=i(9144);var s=i(5546),c=i(385),u=i(8e3),d=i(5938),f=i(3960),l=i(50);class h extends d.W{constructor(e,t,r){let n=!(arguments.length>3&&void 0!==arguments[3])||arguments[3];super(e,t,r),this.auto=n,this.abortHandler,this.featAggregate,this.onAggregateImported,n&&(0,u.R)(e,r)}importAggregator(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};if(this.featAggregate||!this.auto)return;const r=c.il&&!0===(0,t.Mt)(this.agentIdentifier,"privacy.cookies_enabled");let n;this.onAggregateImported=new Promise((e=>{n=e}));const o=async()=>{let t;try{if(r){const{setupAgentSession:e}=await Promise.all([i.e(860),i.e(242)]).then(i.bind(i,3228));t=e(this.agentIdentifier)}}catch(e){(0,l.Z)("A problem occurred when starting up session manager. This page will not start or extend any session.",e)}try{if(!this.shouldImportAgg(this.featureName,t))return void(0,u.L)(this.agentIdentifier,this.featureName);const{lazyFeatureLoader:r}=await i.e(412).then(i.bind(i,8582)),{Aggregate:o}=await r(this.featureName,"aggregate");this.featAggregate=new o(this.agentIdentifier,this.aggregator,e),n(!0)}catch(e){(0,l.Z)("Downloading and initializing ".concat(this.featureName," failed..."),e),this.abortHandler?.(),n(!1)}};c.il?(0,f.b)((()=>o()),!0):o()}shouldImportAgg(r,n){return r!==e.D.sessionReplay||!1!==(0,t.Mt)(this.agentIdentifier,"session_trace.enabled")&&(!!n?.isNew||!!n?.state.sessionReplay)}}var g=i(7633),p=i(7894);class m extends h{static featureName=g.t9;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];if(super(r,n,g.t9,i),("undefined"==typeof PerformanceNavigationTiming||c.Tt)&&"undefined"!=typeof PerformanceTiming){const n=(0,t.OP)(r);n[g.Dz]=Math.max(Date.now()-n.offset,0),(0,f.K)((()=>n[g.qw]=Math.max((0,p.z)()-n[g.Dz],0))),(0,f.b)((()=>{const t=(0,p.z)();n[g.OJ]=Math.max(t-n[g.Dz],0),(0,s.p)("timing",["load",t],void 0,e.D.pageViewTiming,this.ee)}))}this.importAggregator()}}var v=i(1117),b=i(1284);class y extends v.w{constructor(e){super(e),this.aggregatedData={}}store(e,t,r,n,i){var o=this.getBucket(e,t,r,i);return o.metrics=function(e,t){t||(t={count:0});return t.count+=1,(0,b.D)(e,(function(e,r){t[e]=w(r,t[e])})),t}(n,o.metrics),o}merge(e,t,r,n,i){var o=this.getBucket(e,t,n,i);if(o.metrics){var a=o.metrics;a.count+=r.count,(0,b.D)(r,(function(e,t){if("count"!==e){var n=a[e],i=r[e];i&&!i.c?a[e]=w(i.t,n):a[e]=function(e,t){if(!t)return e;t.c||(t=x(t.t));return t.min=Math.min(e.min,t.min),t.max=Math.max(e.max,t.max),t.t+=e.t,t.sos+=e.sos,t.c+=e.c,t}(i,a[e])}}))}else o.metrics=r}storeMetric(e,t,r,n){var i=this.getBucket(e,t,r);return i.stats=w(n,i.stats),i}getBucket(e,t,r,n){this.aggregatedData[e]||(this.aggregatedData[e]={});var i=this.aggregatedData[e][t];return i||(i=this.aggregatedData[e][t]={params:r||{}},n&&(i.custom=n)),i}get(e,t){return t?this.aggregatedData[e]&&this.aggregatedData[e][t]:this.aggregatedData[e]}take(e){for(var t={},r="",n=!1,i=0;i t.max&&(t.max=e),e 2&&void 0!==arguments[2])||arguments[2];super(e,r,j.t,n),c.il&&((0,t.OP)(e).initHidden=Boolean("hidden"===document.visibilityState),(0,N.N)((()=>(0,s.p)("docHidden",[(0,p.z)()],void 0,j.t,this.ee)),!0),(0,O.bP)("pagehide",(()=>(0,s.p)("winPagehide",[(0,p.z)()],void 0,j.t,this.ee))),this.importAggregator())}}var P=i(3081);class C extends h{static featureName=P.t9;constructor(e,t){let r=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(e,t,P.t9,r),this.importAggregator()}}var R,I=i(2210),k=i(1214),H=i(2177),L={};try{R=localStorage.getItem("__nr_flags").split(","),console&&"function"==typeof console.log&&(L.console=!0,-1!==R.indexOf("dev")&&(L.dev=!0),-1!==R.indexOf("nr_dev")&&(L.nrDev=!0))}catch(e){}function z(e){try{L.console&&z(e)}catch(e){}}L.nrDev&&H.ee.on("internal-error",(function(e){z(e.stack)})),L.dev&&H.ee.on("fn-err",(function(e,t,r){z(r.stack)})),L.dev&&(z("NR AGENT IN DEVELOPMENT MODE"),z("flags: "+(0,b.D)(L,(function(e,t){return e})).join(", ")));var M=i(6660);class B extends h{static featureName=M.t;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(r,n,M.t,i),this.skipNext=0;try{this.removeOnAbort=new AbortController}catch(e){}const o=this;o.ee.on("fn-start",(function(e,t,r){o.abortHandler&&(o.skipNext+=1)})),o.ee.on("fn-err",(function(t,r,n){o.abortHandler&&!n[M.A]&&((0,I.X)(n,M.A,(function(){return!0})),this.thrown=!0,(0,s.p)("err",[n,(0,p.z)()],void 0,e.D.jserrors,o.ee))})),o.ee.on("fn-end",(function(){o.abortHandler&&!this.thrown&&o.skipNext>0&&(o.skipNext-=1)})),o.ee.on("internal-error",(function(t){(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,o.ee)})),this.origOnerror=c._A.onerror,c._A.onerror=this.onerrorHandler.bind(this),c._A.addEventListener("unhandledrejection",(t=>{const r=function(e){let t="Unhandled Promise Rejection: ";if(e instanceof Error)try{return e.message=t+e.message,e}catch(t){return e}if(void 0===e)return new Error(t);try{return new Error(t+(0,D.P)(e))}catch(e){return new Error(t)}}(t.reason);(0,s.p)("err",[r,(0,p.z)(),!1,{unhandledPromiseRejection:1}],void 0,e.D.jserrors,this.ee)}),(0,O.m$)(!1,this.removeOnAbort?.signal)),(0,k.gy)(this.ee),(0,k.BV)(this.ee),(0,k.em)(this.ee),(0,t.OP)(r).xhrWrappable&&(0,k.Kf)(this.ee),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}onerrorHandler(t,r,n,i,o){"function"==typeof this.origOnerror&&this.origOnerror(...arguments);try{this.skipNext?this.skipNext-=1:(0,s.p)("err",[o||new F(t,r,n),(0,p.z)()],void 0,e.D.jserrors,this.ee)}catch(t){try{(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,this.ee)}catch(e){}}return!1}}function F(e,t,r){this.message=e||"Uncaught error with no additional information",this.sourceURL=t,this.line=r}let U=1;const q="nr@id";function G(e){const t=typeof e;return!e||"object"!==t&&"function"!==t?-1:e===c._A?0:(0,I.X)(e,q,(function(){return U++}))}function V(e){if("string"==typeof e&&e.length)return e.length;if("object"==typeof e){if("undefined"!=typeof ArrayBuffer&&e instanceof ArrayBuffer&&e.byteLength)return e.byteLength;if("undefined"!=typeof Blob&&e instanceof Blob&&e.size)return e.size;if(!("undefined"!=typeof FormData&&e instanceof FormData))try{return(0,D.P)(e).length}catch(e){return}}}var X=i(7243);class W{constructor(e){this.agentIdentifier=e,this.generateTracePayload=this.generateTracePayload.bind(this),this.shouldGenerateTrace=this.shouldGenerateTrace.bind(this)}generateTracePayload(e){if(!this.shouldGenerateTrace(e))return null;var r=(0,t.DL)(this.agentIdentifier);if(!r)return null;var n=(r.accountID||"").toString()||null,i=(r.agentID||"").toString()||null,o=(r.trustKey||"").toString()||null;if(!n||!i)return null;var a=(0,_.M)(),s=(0,_.Ht)(),c=Date.now(),u={spanId:a,traceId:s,timestamp:c};return(e.sameOrigin||this.isAllowedOrigin(e)&&this.useTraceContextHeadersForCors())&&(u.traceContextParentHeader=this.generateTraceContextParentHeader(a,s),u.traceContextStateHeader=this.generateTraceContextStateHeader(a,c,n,i,o)),(e.sameOrigin&&!this.excludeNewrelicHeader()||!e.sameOrigin&&this.isAllowedOrigin(e)&&this.useNewrelicHeaderForCors())&&(u.newrelicHeader=this.generateTraceHeader(a,s,c,n,i,o)),u}generateTraceContextParentHeader(e,t){return"00-"+t+"-"+e+"-01"}generateTraceContextStateHeader(e,t,r,n,i){return i+"@nr=0-1-"+r+"-"+n+"-"+e+"----"+t}generateTraceHeader(e,t,r,n,i,o){if(!("function"==typeof c._A?.btoa))return null;var a={v:[0,1],d:{ty:"Browser",ac:n,ap:i,id:e,tr:t,ti:r}};return o&&n!==o&&(a.d.tk=o),btoa((0,D.P)(a))}shouldGenerateTrace(e){return this.isDtEnabled()&&this.isAllowedOrigin(e)}isAllowedOrigin(e){var r=!1,n={};if((0,t.Mt)(this.agentIdentifier,"distributed_tracing")&&(n=(0,t.P_)(this.agentIdentifier).distributed_tracing),e.sameOrigin)r=!0;else if(n.allowed_origins instanceof Array)for(var i=0;i 2&&void 0!==arguments[2])||arguments[2];super(r,n,Z.t,i),(0,t.OP)(r).xhrWrappable&&(this.dt=new W(r),this.handler=(e,t,r,n)=>(0,s.p)(e,t,r,n,this.ee),(0,k.u5)(this.ee),(0,k.Kf)(this.ee),function(r,n,i,o){function a(e){var t=this;t.totalCbs=0,t.called=0,t.cbTime=0,t.end=E,t.ended=!1,t.xhrGuids={},t.lastSize=null,t.loadCaptureCalled=!1,t.params=this.params||{},t.metrics=this.metrics||{},e.addEventListener("load",(function(r){_(t,e)}),(0,O.m$)(!1)),c.IF||e.addEventListener("progress",(function(e){t.lastSize=e.loaded}),(0,O.m$)(!1))}function s(e){this.params={method:e[0]},T(this,e[1]),this.metrics={}}function u(e,n){var i=(0,t.DL)(r);i.xpid&&this.sameOrigin&&n.setRequestHeader("X-NewRelic-ID",i.xpid);var a=o.generateTracePayload(this.parsedOrigin);if(a){var s=!1;a.newrelicHeader&&(n.setRequestHeader("newrelic",a.newrelicHeader),s=!0),a.traceContextParentHeader&&(n.setRequestHeader("traceparent",a.traceContextParentHeader),a.traceContextStateHeader&&n.setRequestHeader("tracestate",a.traceContextStateHeader),s=!0),s&&(this.dt=a)}}function d(e,t){var r=this.metrics,i=e[0],o=this;if(r&&i){var a=V(i);a&&(r.txSize=a)}this.startTime=(0,p.z)(),this.listener=function(e){try{"abort"!==e.type||o.loadCaptureCalled||(o.params.aborted=!0),("load"!==e.type||o.called===o.totalCbs&&(o.onloadCalled||"function"!=typeof t.onload)&&"function"==typeof o.end)&&o.end(t)}catch(e){try{n.emit("internal-error",[e])}catch(e){}}};for(var s=0;s 1?e[1]=i:e.push(i)}else e[0]&&e[0].headers&&s(e[0].headers,n)&&(this.dt=n);function s(e,t){var r=!1;return t.newrelicHeader&&(e.set("newrelic",t.newrelicHeader),r=!0),t.traceContextParentHeader&&(e.set("traceparent",t.traceContextParentHeader),t.traceContextStateHeader&&e.set("tracestate",t.traceContextStateHeader),r=!0),r}}function x(e,t){this.params={},this.metrics={},this.startTime=(0,p.z)(),this.dt=t,e.length>=1&&(this.target=e[0]),e.length>=2&&(this.opts=e[1]);var r,n=this.opts||{},i=this.target;"string"==typeof i?r=i:"object"==typeof i&&i instanceof Y?r=i.url:c._A?.URL&&"object"==typeof i&&i instanceof URL&&(r=i.href),T(this,r);var o=(""+(i&&i instanceof Y&&i.method||n.method||"GET")).toUpperCase();this.params.method=o,this.txSize=V(n.body)||0}function A(t,r){var n;this.endTime=(0,p.z)(),this.params||(this.params={}),this.params.status=r?r.status:0,"string"==typeof this.rxSize&&this.rxSize.length>0&&(n=+this.rxSize);var o={txSize:this.txSize,rxSize:n,duration:(0,p.z)()-this.startTime};i("xhr",[this.params,o,this.startTime,this.endTime,"fetch"],this,e.D.ajax)}function E(t){var r=this.params,n=this.metrics;if(!this.ended){this.ended=!0;for(var o=0;o 2&&void 0!==arguments[2])||arguments[2];super(e,t,we.t,r),this.importAggregator()}}new class{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(0,_.ky)(16);c._A?(this.agentIdentifier=t,this.sharedAggregator=new y({agentIdentifier:this.agentIdentifier}),this.features={},this.desiredFeatures=new Set(e.features||[]),this.desiredFeatures.add(m),Object.assign(this,(0,a.j)(this.agentIdentifier,e,e.loaderType||"agent")),this.start()):(0,l.Z)("Failed to initial the agent. Could not determine the runtime environment.")}get config(){return{info:(0,t.C5)(this.agentIdentifier),init:(0,t.P_)(this.agentIdentifier),loader_config:(0,t.DL)(this.agentIdentifier),runtime:(0,t.OP)(this.agentIdentifier)}}start(){const t="features";try{const r=n(this.agentIdentifier),i=[...this.desiredFeatures];i.sort(((t,r)=>e.p[t.featureName]-e.p[r.featureName])),i.forEach((t=>{if(r[t.featureName]||t.featureName===e.D.pageViewEvent){const n=function(t){switch(t){case e.D.ajax:return[e.D.jserrors];case e.D.sessionTrace:return[e.D.ajax,e.D.pageViewEvent];case e.D.sessionReplay:return[e.D.sessionTrace];case e.D.pageViewTiming:return[e.D.pageViewEvent];default:return[]}}(t.featureName);n.every((e=>r[e]))||(0,l.Z)("".concat(t.featureName," is enabled but one or more dependent features has been disabled (").concat((0,D.P)(n),"). This may cause unintended consequences or missing data...")),this.features[t.featureName]=new t(this.agentIdentifier,this.sharedAggregator)}})),(0,T.Qy)(this.agentIdentifier,this.features,t)}catch(e){(0,l.Z)("Failed to initialize all enabled instrument classes (agent aborted) -",e);for(const e in this.features)this.features[e].abortHandler?.();const r=(0,T.fP)();return delete r.initializedAgents[this.agentIdentifier]?.api,delete r.initializedAgents[this.agentIdentifier]?.[t],delete this.sharedAggregator,r.ee?.abort(),delete r.ee?.get(this.agentIdentifier),!1}}}({features:[J,m,S,class extends h{static featureName=oe;constructor(t,r){if(super(t,r,oe,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;const n=this.ee;let i;(0,k.QU)(n),this.eventsEE=(0,k.em)(n),this.eventsEE.on(se,(function(e,t){this.bstStart=(0,p.z)()})),this.eventsEE.on(ae,(function(t,r){(0,s.p)("bst",[t[0],r,this.bstStart,(0,p.z)()],void 0,e.D.sessionTrace,n)})),n.on(ce+ne,(function(e){this.time=(0,p.z)(),this.startPath=location.pathname+location.hash})),n.on(ce+ie,(function(t){(0,s.p)("bstHist",[location.pathname+location.hash,this.startPath,this.time],void 0,e.D.sessionTrace,n)}));try{i=new PerformanceObserver((t=>{const r=t.getEntries();(0,s.p)(te,[r],void 0,e.D.sessionTrace,n)})),i.observe({type:re,buffered:!0})}catch(e){}this.importAggregator({resourceObserver:i})}},C,xe,B,class extends h{static featureName=de;constructor(e,r){if(super(e,r,de,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;if(!(0,t.OP)(e).xhrWrappable)return;try{this.removeOnAbort=new AbortController}catch(e){}let n,i=0;const o=this.ee.get("tracer"),a=(0,k._L)(this.ee),s=(0,k.Lg)(this.ee),u=(0,k.BV)(this.ee),d=(0,k.Kf)(this.ee),f=this.ee.get("events"),l=(0,k.u5)(this.ee),h=(0,k.QU)(this.ee),g=(0,k.Gm)(this.ee);function m(e,t){h.emit("newURL",[""+window.location,t])}function v(){i++,n=window.location.hash,this[ve]=(0,p.z)()}function b(){i--,window.location.hash!==n&&m(0,!0);var e=(0,p.z)();this[pe]=~~this[pe]+e-this[ve],this[ye]=e}function y(e,t){e.on(t,(function(){this[t]=(0,p.z)()}))}this.ee.on(ve,v),s.on(be,v),a.on(be,v),this.ee.on(ye,b),s.on(ge,b),a.on(ge,b),this.ee.buffer([ve,ye,"xhr-resolved"],this.featureName),f.buffer([ve],this.featureName),u.buffer(["setTimeout"+le,"clearTimeout"+fe,ve],this.featureName),d.buffer([ve,"new-xhr","send-xhr"+fe],this.featureName),l.buffer([me+fe,me+"-done",me+he+fe,me+he+le],this.featureName),h.buffer(["newURL"],this.featureName),g.buffer([ve],this.featureName),s.buffer(["propagate",be,ge,"executor-err","resolve"+fe],this.featureName),o.buffer([ve,"no-"+ve],this.featureName),a.buffer(["new-jsonp","cb-start","jsonp-error","jsonp-end"],this.featureName),y(l,me+fe),y(l,me+"-done"),y(a,"new-jsonp"),y(a,"jsonp-end"),y(a,"cb-start"),h.on("pushState-end",m),h.on("replaceState-end",m),window.addEventListener("hashchange",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("load",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("popstate",(function(){m(0,i>1)}),(0,O.m$)(!0,this.removeOnAbort?.signal)),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}}],loaderType:"spa"})})(),window.NRBA=o})(); window.jQuery || document.write(' ') CKEDITOR_BASEPATH='https://f1000research.com/js/vendor/ckeditor/' window.reactTheme = 'research'; window.MathJax = { CommonHTML: { linebreaks: { automatic: true } }, 'HTML-CSS': { linebreaks: { automatic: true } }, SVG: { linebreaks: { automatic: true } }, AuthorInit: function() { MathJax.Hub.Register.MessageHook('End Process', function () { let timeout = false; // holder for timeout id const delay = 250; // delay after event is "complete" to run callback const reflowMath = function() { const dispFormulas = document.querySelectorAll('.disp-formula.panel'); if (!dispFormulas) { return; } for (const dispFormula of dispFormulas) { const child = dispFormula.querySelector('.MathJax_Preview').nextSibling.firstChild; const isMultiline = MathJax.Hub.getAllJax(dispFormula)[0].root.isMultiline; if (dispFormula.offsetWidth < child.offsetWidth || isMultiline) { MathJax.Hub.Queue(['Rerender', MathJax.Hub, dispFormula]); } } }; window.addEventListener('resize', function() { clearTimeout(timeout); // clear the timeout timeout = setTimeout(reflowMath, delay); // start timing for event "completion" }); }); }, }; if (window.location.hash == '#_=_'){ window.location = window.location.href.split('#')[0] } !function(f,b,e,v,n,t,s){if(f.fbq)return;n=f.fbq=function() {n.callMethod? n.callMethod.apply(n,arguments):n.queue.push(arguments)} ;if(!f._fbq)f._fbq=n; n.push=n;n.loaded=!0;n.version='2.0';n.queue=[];t=b.createElement(e);t.async=!0; t.src=v;s=b.getElementsByTagName(e)[0];s.parentNode.insertBefore(t,s)}(window, document,'script','https://connect.facebook.net/en_US/fbevents.js'); fbq('init', '1641728616063202'); fbq('track', "PixelInitialized", {}); (function(h,o,t,j,a,r){ h.hj=h.hj||function(){(h.hj.q=h.hj.q||[]).push(arguments)}; h._hjSettings={hjid:2318163,hjsv:6}; a=o.getElementsByTagName('head')[0]; r=o.createElement('script');r.async=1; r.src=t+h._hjSettings.hjid+j+h._hjSettings.hjsv; a.appendChild(r); })(window,document,'https://static.hotjar.com/c/hotjar-','.js?sv='); search file_upload Submit your research search menu close search Browse Gateways & Collections How to Publish Submit your Research My Submissions Article Guidelines Article Guidelines (New Versions) Open Data, Software and Code Guidelines Open Data and Accessible Source Materials Guidelines (HSS) Open Data, Software and Code Guidelines (PSE) Prepublication Checks Production Process Posters and Slides Guidelines Document Guidelines Article Processing Charges Peer Review Finding Article Reviewers About How it Works For Reviewers Our Advisors Policies Glossary FAQs For Developers Newsroom Contact My Research Submissions Content and Tracking Alerts My Details Sign In file_upload Submit your research { "@context": "https://schema.org", "@type": "ScholarlyArticle", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://f1000research.com/articles/10-830" }, "headline": "UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis", "datePublished": "2021-08-19T12:28:50", "dateModified": "2024-07-26T14:26:11", "author": [ { "@type": "Person", "name": "Longfei Wang" }, { "@type": "Person", "name": "Victoria E Jackson" }, { "@type": "Person", "name": "Liam G Fearnley" }, { "@type": "Person", "name": "Melanie Bahlo" } ], "publisher": { "@type": "Organization", "name": "F1000Research", "logo": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 480, "width": 60 } }, "image": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 1200, "width": 150 }, "description": "COVID-19 caused by SARS-CoV-2 has resulted in a global pandemic with a rapidly developing global health and economic crisis. Variations in the disease have been observed and have been associated with the genomic sequence of either the human host or the pathogen. Worldwide scientists scrambled initially to recruit patient cohorts to try and identify risk factors. A resource that presented itself early on was the UK Biobank (UKBB), which is investigating the respective contributions of genetic predisposition and environmental exposure to the development of disease. To enable COVID-19 studies, UKBB is now receiving COVID-19 test data for their participants every two weeks. In addition, UKBB is delivering more frequent updates of death and hospital inpatient data (including critical care admissions) on the UKBB Data Portal. This frequently changing dataset requires a tool that can rapidly process and analyse up-to-date data. We developed an R package specifically for the UKBB COVID-19 data, which summarises COVID-19 test results, performs association tests between COVID-19 susceptibility/severity and potential risk factors such as age, sex, blood type, comorbidities and generates input files for genome-wide association studies (GWAS). By applying the R package to data released in April 2021, we found that age, body mass index, socioeconomic status and smoking are positively associated with COVID-19 susceptibility, severity, and mortality. Males are at a higher risk of COVID-19 infection than females. People staying in aged care homes have a higher chance of being exposed to SARS-CoV-2. By performing GWAS, we replicated the 3p21.31 genetic finding for COVID-19 susceptibility and severity. The ability to iteratively perform such analyses is highly relevant since the UKBB data is updated frequently. As a caveat, users must arrange their own access to the UKBB data to use the R package." } { "@context": "http://schema.org", "@type": "BreadcrumbList", "itemListElement": [ { "@type": "ListItem", "position": "1", "item": { "@id": "https://f1000research.com/", "name": "Home" } }, { "@type": "ListItem", "position": "2", "item": { "@id": "https://f1000research.com/browse/articles", "name": "Browse" } }, { "@type": "ListItem", "position": "3", "item": { "@id": "https://f1000research.com/articles/10-830", "name": "UKB.COVID19: an R package for UK Biobank COVID-19 data processing..." } } ] } Home Browse UKB.COVID19: an R package for UK Biobank COVID-19 data processing... ALL Metrics - Views Downloads Get PDF Get XML Cite How to cite this article Wang L, Jackson VE, Fearnley LG and Bahlo M. UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.12688/f1000research.55370.3 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. Close Copy Citation Details Export Export Citation Sciwheel EndNote Ref. Manager Bibtex ProCite Sente EXPORT Select a format first Track Share ▬ ✚ Software Tool Article Revised UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] Longfei Wang https://orcid.org/0000-0002-5143-4146 1,2 , Victoria E Jackson 1,2 , Liam G Fearnley 1,2 , Melanie Bahlo https://orcid.org/0000-0001-5132-0774 1,2 Longfei Wang https://orcid.org/0000-0002-5143-4146 1,2 , Victoria E Jackson 1,2 , Liam G Fearnley 1,2 , Melanie Bahlo https://orcid.org/0000-0001-5132-0774 1,2 PUBLISHED 26 Jul 2024 Author details Author details 1 Department of Medical Biology, The University of Melbourne, Parkville, VIC, 3010, Australia 2 Population Health and Immunity Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, VIC, 3052, Australia Longfei Wang Roles: Data Curation, Formal Analysis, Methodology, Software, Writing – Original Draft Preparation Victoria E Jackson Roles: Data Curation, Software, Validation, Writing – Review & Editing Liam G Fearnley Roles: Validation, Writing – Review & Editing Melanie Bahlo Roles: Conceptualization, Supervision, Writing – Review & Editing OPEN PEER REVIEW DETAILS REVIEWER STATUS This article is included in the Emerging Diseases and Outbreaks gateway. This article is included in the RPackage gateway. This article is included in the Coronavirus (COVID-19) collection. Abstract COVID-19 caused by SARS-CoV-2 has resulted in a global pandemic with a rapidly developing global health and economic crisis. Variations in the disease have been observed and have been associated with the genomic sequence of either the human host or the pathogen. Worldwide scientists scrambled initially to recruit patient cohorts to try and identify risk factors. A resource that presented itself early on was the UK Biobank (UKBB), which is investigating the respective contributions of genetic predisposition and environmental exposure to the development of disease. To enable COVID-19 studies, UKBB is now receiving COVID-19 test data for their participants every two weeks. In addition, UKBB is delivering more frequent updates of death and hospital inpatient data (including critical care admissions) on the UKBB Data Portal. This frequently changing dataset requires a tool that can rapidly process and analyse up-to-date data. We developed an R package specifically for the UKBB COVID-19 data, which summarises COVID-19 test results, performs association tests between COVID-19 susceptibility/severity and potential risk factors such as age, sex, blood type, comorbidities and generates input files for genome-wide association studies (GWAS). By applying the R package to data released in April 2021, we found that age, body mass index, socioeconomic status and smoking are positively associated with COVID-19 susceptibility, severity, and mortality. Males are at a higher risk of COVID-19 infection than females. People staying in aged care homes have a higher chance of being exposed to SARS-CoV-2. By performing GWAS, we replicated the 3p21.31 genetic finding for COVID-19 susceptibility and severity. The ability to iteratively perform such analyses is highly relevant since the UKBB data is updated frequently. As a caveat, users must arrange their own access to the UKBB data to use the R package. READ ALL READ LESS Keywords R package, UK Biobank, COVID-19, GWAS, risk factors Corresponding Author(s) Longfei Wang ( [email protected] ) Close Corresponding author: Longfei Wang Competing interests: No competing interests were disclosed. Grant information: This work was made possible through the Victorian State Government Operational Infrastructure Support and Australian Government National Health and Medical Research Council (NHMRC) independent research Institute Infrastructure Support Scheme (IRIISS). Melanie Bahlo was supported by an NHMRC Investigator Grant (1195236). Access to the UKBB for this project was granted through project ID 36610. Copyright: © 2024 Wang L et al . This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. How to cite: Wang L, Jackson VE, Fearnley LG and Bahlo M. UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.12688/f1000research.55370.3 ) First published: 19 Aug 2021, 10 :830 ( https://doi.org/10.12688/f1000research.55370.1 ) Latest published: 26 Jul 2024, 10 :830 ( https://doi.org/10.12688/f1000research.55370.3 ) Revised Amendments from Version 2 The newly revised article contains additional information as suggested by the reviewer, which includes 1) a discussion of long COVID and relevant functions in the UKB.COVID19 R package; 2) a "Statistical analysis" section in the methods section; 3) a vignette in the UKB.COVID19 R package. The newly revised article contains additional information as suggested by the reviewer, which includes 1) a discussion of long COVID and relevant functions in the UKB.COVID19 R package; 2) a "Statistical analysis" section in the methods section; 3) a vignette in the UKB.COVID19 R package. See the authors' detailed response to the review by Edgar Gonzalez-Kozlova See the authors' detailed response to the review by Virginia Valeria and Annalisa De Silvestri See the authors' detailed response to the review by Thomas Michael Palmer READ REVIEWER RESPONSES Introduction The ongoing global pandemic of coronavirus disease 2019 (COVID-19), caused by a novel coronavirus (severe acute respiratory syndrome coronavirus 2, SARS-CoV-2), has resulted in a rapidly developing global health and economic crisis. Most people with COVID-19 never develop symptoms or suffer mild symptoms. However, about 5% of cases are critical (defined as respiratory failure, septic shock, and/or multiorgan dysfunction or failure) ( Wu and McGoogan 2020 ), possibly leading to lethal lung damage and even death. These and other clinical observations led to the hypothesis that genetic factors in either or both the host and the pathogen could be responsible, at least in part, for this variation. Worldwide scientists scrambled initially to recruit patient cohorts to try and identify genetic risk factors. UK Biobank (UKBB) (RRID: SCR_012815) is a long-term biobank study that recruited 500,000 volunteers aged between 40–69 years in 2006–2010 from across the UK. UKBB’s large-scale database is a global research resource accessible to approved researchers who are undertaking health-related research. All participants provided detailed information about their lifestyle, physical measures and had blood, urine and saliva samples collected. The samples of all participants have undergone SNP array typing and are now also undergoing whole-exome and whole-genome sequencing. UKBB has become a major contributor to the advancement of modern medicine and treatment, enabling a better understanding of a wide range of serious and life-threatening diseases. Researchers can apply for access to the data and worldwide hundreds of researchers are using the UKBB data to carry out research on many different diseases. The UKBB has facilitated first-time analyses on traits such as brain imaging phenotypes ( Elliott et al ., 2018 ). The UK has been badly affected by COVID-19. As of 20 May 2021, there have been over 127,000 reported deaths in the UK, with an estimated 4.5 million infections. Worldwide there have now been more than 3 million reported deaths due to COVID-19, with continually increasing rates of infections in India and South America. The UKBB was an early, available population genetic resource that could be harnessed to better understand COVID-19 risk factors, and with its continuing evolution continues to serve as a powerful cohort to permit such studies. UKBB has taken swift strides to help tackle the global pandemic by undertaking four major initiatives: serology study, COVID-19 repeat imaging study, coronavirus self-test antibody study and health data linkage. UKBB has been receiving COVID-19 test data for previous UKBB participants in England and has linked the test result data with health data. The test results data are being updated every two weeks. In addition, UKBB is making more frequent updates of death and hospital inpatient data (including critical care admissions) on the Data Portal. This rapidly changing dataset requires a tool that can process the up-to-date data as frequently as the data updates, in a standardised, reproducible, and somewhat automated manner to permit rapid re-analysis of the data and to also enable other researchers to use such a tool as a basis for their analyses. Therefore, we developed an R package (version 4.0.5) UKB.COVID-19 which summarises COVID-19 test results, combines test results data with hospitalisation data and death register data, performs association tests between COVID-19 susceptibility/severity and potential risk factors (age, sex, blood type, socioeconomic status, comorbidities etc.) and generates input files for genome-wide association studies (GWAS). Ethics approval was granted through WEHI project 17/09LR by the WEHI’s Human Research Ethics Committee (HREC). Methods Implementation UKB.COVID19 was built in R (version 4.0.5) and currently depends on the following R packages: questionr, data.table, tidyverse, magrittr, here , and dplyr. COVID-19 related data files from UKBB can be directly imported in the R package without any pre-processing. Operation UKB.COVID19 is distributed as part of the CRAN R package repository and is compatible with Mac OS X, Windows, and major Linux operating systems. UKB.COVID19 is maintained at GitHub ( https://github.com/bahlolab/UKB.COVID19 ). The archived source code can be found in http://doi.org/10.5281/zenodo.5174381 ( Wang et al ., 2021 ). All analyses are performed using R (version 4.0.5). All functions and descriptions are listed in Table 1 . Table 1. Description of R functions in the UKB.COVID19 R package. Function Description risk_factor Selects several potential non-genetic risk factors from the linked health data provided by UKBB and generates an output file including the selected risk factors for the downstream analyses. Automatically returns sex, age at birthday in 2020, socioeconomic status, self-reported ethnicity, most recently reported body mass index, most recently reported pack-years of smoking, whether they reside in aged care (based on hospital admissions data, and COVID-19 test data) and blood type. Function also allows users to specify fields of interest (field codes, provided by UK Biobank), and allows the user to specify more intuitive names for selected fields. makePhenotypes Summarises COVID-19 test results data, death register data and hospital inpatient data and returns data.frame and outputs a phenotype file with phenotypes for COVID-19 susceptibility, severity or mortality. comorbidity_summary Summarises disease history records of each individual from the hospital inpatient diagnosis data and generates a file including all comorbidities based on ICD10 code, which can be used in the comorbidity association tests. comorbidity_asso Performs association tests using logistic regression models, adjusts the tested phenotype with covariates and outputs a table comprised of odds ratios (ORs), 95% confidence intervals (CIs) of ORs, and p-values for all the comorbidity categories. sampleQC Collates genetic QC data, as provided by UKBB and outputs lists of samples for inclusion/exclusion, for use with PLINK ( Purcell et al ., 2007 ) and/or SAIGE ( Zhou et al ., 2018 ). Also outputs a csv file summary sample-level QC metrics. variantQC Collates genetic QC data, as provided by UKBB and outputs lists of variants for inclusion in downstream analyses, for use with PLINK and/or SAIGE. makeGWASFiles Output phenotype files, formatted to be used as input for GWAS, or other genetic analyses, with PLINK and/or SAIGE. log_cov Performs association tests using logistic regression models. COVID-19 test results data COVID-19 test results data are being provided to the UKBB by Public Health England (PHE), Public Health Scotland (PHS) and SAIL Databank for English, Scottish and Welsh data respectively. The data have been updated approximately once every two weeks since 16 March 2020. Most samples tested for the COVID-19 disease-causing virus, SARS-CoV-2, are from combined nose/throat swabs. In intensive care settings, lower respiratory tract samples may also have been taken and analysed. The data consists of the encoded participant ID, date the specimen was taken, specimen type (e.g. nasal, nose and throat, sputum), the laboratory that processed the sample, whether the sample was reported as positive or negative for SARS-CoV-2, the requesting organisation description, as well as other variables. The test result data used in the analyses of this report are up to 6 April 2021. Death register data The death register data includes the date of death, the primary and contributory causes of death, coded using the ICD-10 system. The death register data have been updated every one or two months. The death register data used in the analyses of this report are up to 23 March 2021. Hospital inpatient data The hospital inpatient data consist of seven tables: 1) HESIN: the overall master table, providing information on admissions and discharges, the type of admission and other information related to the inpatient record as a whole. 2) HESIN_DIAG: diagnosis codes (ICD-9 or ICD-10) relating to inpatient records, including primary diagnoses and secondary diagnoses. The primary diagnosis is the main condition treated or investigated during the relevant episode. A secondary diagnosis is a clinically relevant contributory factor or issue that impacts the primary diagnosis (including chronic conditions). 3) HESIN_OPER: operations and procedures codes (OPCS-3 or OPCS-4) relating to inpatient episodes. 4) HESIN_CRITICAL: a child table of HESIN containing further information about those hospital episodes that required treatment in a critical care unit. 5) HESIN_PSYCH: a sibling table to HESIN containing fields relating to administrative aspects of psychiatric admissions. 6) HESIN_MATERNITY: a sibling table to HESIN containing fields relating specifically to maternity admissions. 7) HESIN_DELIVERY: Information regarding a child born as a result of a HESIN_MATERNITY record, where applicable. In this study, we use the HESIN, the HESIN_DIAG, the HESIN_OPER, and the HESIN_CRITICAL tables. The hospital inpatient data used in the analyses of this report are up to 5 February 2021. Phenotype definition The makePhenotypes function defines multiple COVID-19 traits, related to susceptibility, severity and mortality, which may be used for association testing and GWAS ( Table 2 ). Table 2. The COVID-19 related phenotypes output from the makePhenotypes function in the UKB.COVID19 R package. Category Trait Variable Description susceptibility pos.neg COVID-19 case vs negative test result - binary variable. 1 = evidence of COVID-19, from one or more of: a) positive test result for SARS-CoV-2 infection; b) admitted to hospital with COVID-19; c) death with COVID-19. 0 = no evidence of COVID-19, due to consistently testing negative for SARS-CoV-2 infection. NA = no evidence of COVID-19, and no record of test result for SARS-CoV-2 infection. pos.ppl COVID-19 case vs the rest of the UKBB participants - binary variable. 1 = evidence of COVID-19, from one or more of: a) positive test result for SARS-CoV-2 infection; b) admitted to hospital with COVID-19; c) death with COVID-19. 0 = any individual, not meeting the criteria for a COVID-19 case. severity hospitalisation COVID-19 cases with hospitalisation vs the rest of COVID-19 cases - binary variable. 1 = evidence of COVID-19 severity level 1 , from one or more of: a) admitted to hospital due to COVID-19; b) received basic critical care or advanced critical care due to COVID-19; c) death due to COVID-19. 0 = no evidence of COVID-19 severity level 1 , even though testing positive for SARS-CoV-2 infection. critical.care COVID-19 cases with critical care vs the rest of COVID-19 cases - binary variable. 1 = evidence of COVID-19 severity level 2 , from one or more of: a) received basic critical care or advanced critical care due to COVID-19; c) death due to COVID-19. 0 = no evidence of COVID-19 severity level 2 , even though testing positive for SARS-CoV-2 infection. advanced.critical.care COVID-19 cases with severity level 3 vs the rest of COVID-19 cases - binary variable. 1 = evidence of COVID-19 severity level 3 , from one or more of: a) received advanced critical care due to COVID-19; c) death due to COVID-19. 0 = no evidence of COVID-19 severity level 3 , even though testing positive for SARS-CoV-2 infection. mortality mortality COVID-19 cases who have died due to COVID-19 vs the rest of COVID-19 cases - binary variable. 1 = death due to COVID-19. 0 = any other COVID-19 cases. For susceptibility analysis, we generated a proxy variable, which includes all participants who have been tested for COVID-19 and define those who received at least one positive result as cases. By 6 April 2021, 77,222 individuals in the UKBB had received COVID-19 tests and 16,562 had tested positive for COVID-19 on at least one occasion. The pheno.type = “susceptibility” option summarises the COVID-19 test results data and generates a susceptibility phenotype for association tests and GWAS. Based on the World Health Organization (WHO) ordinal scale for clinical improvement, we classify severity into four levels. These levels are defined as 1) hospitalisation: individuals admitted to hospital with their primary diagnosis recorded as COVID-19. 2) critical care level 2: individuals required basic treatment in a critical care unit, such as non-invasive ventilation and continuous positive airway pressure, and with their primary diagnosis recorded as COVID-19. 3) critical care level 3: individuals required advanced treatment in a critical care unit, such as invasive ventilation and temporary tracheostomy, and with their primary diagnosis recorded as COVID-19. 4) mortality: individuals died due to COVID-19. The critical care information was summarised from the HESIN_CRITICAL table and the HESIN_OPER table. The critical care level 2 cases are the COVID-19 patients who required at least one “Critical care level 2 days” in the HESIN_CRIRICAL table or received basic respiratory support, such as, E85.2 non-invasive ventilation NEC, in the HESIN_OPER table. The critical care level 3 cases are defined as the COVID-19 patients who required at least one “Critical care level 3 days” in the HESIN_CRIRICAL table or received advanced respiratory support, such as, E85.1 invasive ventilation, in the HESIN_OPER table. The commonly used GWAS tools, such as SAIGE and PLINK, do not support ordinal categorical phenotypes. Therefore, we converted this ordinal variable into four binary variables named “hospitalisation”, “critical care”, “advanced critical care” and “mortality” ( Table 2 ). However, users can get the ordinal variable by simply summing the four binary variables. We assume that participants who were tested COVID-19 positive but did not admit to hospital had no or mild symptoms and hence classified them as controls in severity phenotypes. We compare the test results data and the hospital inpatient data and correct any inconsistency between the two tables. As an example of data inconsistency, up to 5 February 2021, 130 individuals were admitted to the hospital due to COVID-19 but are not recorded in the test result data, while 33 individuals were admitted to the hospital due to COVID-19 but received basic negative COVID-19 test results. This inconsistency is resolved by retaining all 163 individuals and setting their COVID-19 test results as positive. The pheno.type = “severity” option combines COVID-19 test results data and hospital inpatient data and generates three phenotypes for each severity level. For mortality, we include all individuals who received at least one positive test result and define those whose primary cause of death is recorded as being due to COVID-19 as cases. We also compare the test results data and the death register data and correct any inconsistencies. As an example, up to 23 March 2021, 205 individuals died from COVID-19 as reported by the death register data but are not recorded as having positive COVID-19 tests in the test result data while 39 individuals died from COVID-19 but received negative COVID-19 test results. The inconsistency is resolved by retaining all 244 individuals and setting their test results as positive. Therefore, in total 1,042 UKBB participants had died from COVID-19 by 23 March 2021. The pheno.type = “mortality” option combines the COVID-19 test results data and death register data and generates a mortality phenotype. The makePhenotypes function returns results in data.frame format and outputs files in text format for the downstream association tests and genome-wide association tests using PLINK (RRID:SCR_001757) ( Purcell et al ., 2007 ) and SAIGE (Scalable and Accurate Implementation of GEneralized mixed model) ( Zhou et al ., 2018 ). Non-genetic risk factors The risk_factor function generates formatted variables for several non-genetic risk factors from the linked health data provided by UKBB. These variables are all established risk factors for SARS-CoV-2 exposure, and/or COVID-19 severity ( Pijls et al ., 2021 ; Wolff et al ., 2021 ; Booth et al ., 2021 ). The currently selected risk factors are listed in Table 3 . The multi-category variables are converted into multiple dummy variables. For the blood type group factor, three dummy variables encoding the blood types A, AB, and O, are added to the data to compare with blood type B (baseline). For the ethnic background factor, Black, Asian, Mixed, and other ethnic backgrounds (BAME) are added to the data to permit comparison to white Europeans (baseline). Table 3. The current selected risk factors of COVID-19 in the UKB.COVID19 R package. Risk-factor variable Description sex Participant sex. Binary variable 1 = male 0 = female age Age of participant (at 2020 birthday). Numeric bmi Body mass index. Numeric Where multiple longitudinal BMI measurements are available, the most recently recorded value is used. ethnic Self-reported “ethnic group”. Categorical 1 = White, 1001 = British, 1002 = Irish, 1003 = Any other white background. 2 = Mixed, 2001 = White and Black Caribbean, 2002 = White and Black African, 2003 = White and Asian, 2004 = Any other mixed background. 3 = Asian or Asian British, 3001 = Indian, 3002 = Pakistani, 3003 = Bangladeshi, 3004 = Any other Asian background. 5 = Chinese. 4 = Black or Black British, 4001 = Caribbean, 4002 = African, 4003 = Any other Black background. 6 = Other ethinic group. -1 = Do not know. -3 = Prefer not to answer. other.ppl Participant self-reports as “Other ethnic group”. Binary variable 1 = Yes 0 = No black Participant self-reports as “Black or Black British”. Binary variable 1 = Yes 0 = No asian Participant self-reports as “Asian or Asian British”. Binary variable 1 = Yes 0 = No mixed Participant self-reports as “Mixed”. Binary variable 1 = Yes 0 = No white Participant self-reports as “White”. Binary variable 1 = Yes 0 = No SES Socioeconomic status (SES) using a Townsend deprivation index (Black 1988 ). Numeric For the population of a given area, a Townsend deprivation score is the summation of Z scores of four variables: unemployment, non-car ownership, non-home ownership and household overcrowding. A greater Townsend index score implies a greater degree of deprivation. Z scores = (percentage – mean of all percentages)/SD of all percentages. smoke Pack-years of smoking. Numeric Where multiple longitudinal pack-years measurements are available, the most recently recorded value is used. Number of cigarettes per day/20 * (Age stopped smoking - Age start smoking) Note: Individuals who started and gave up smoking before 16 years of age were coded as NA. For individuals who started smoking before 16 but gave up after 16, their age start was set as 16. Individuals who reported starting and stopping smoking at the same age and reported giving up smoking for more than 6 months had pack-years set at 0. blood group Participant blood type. Categorical Participants' blood groups were extracted from imputed genotyped data (Field 23165), which was added in July 2020 as a result of the suggestion that blood group may affect COVID-19 outcomes. Blood groups: AA, AB, AO, BB, BO, OO. O Participant has O-type blood. Binary variable 1 = Yes 0 = No AB Participant has AB-type blood. Binary variable 1 = Yes 0 = No B Participant has B-type blood. Binary variable 1 = Yes 0 = No A Participant has A-type blood. Binary variable 1 = Yes 0 = No inAgedCare Evidence that the participant resides in an Aged Care facility. Binary variable. 1 = Evidence of residing in aged care, based on HES data (admitted from, or discharged to, a nursing, residential care, group home), or from the COVID-19 test data (requesting organisation). 0 = Any individual not having evidence for residing in aged care, as defined above. Simple associations between COVID-19 phenotypes and these common risk factors may be examined using the log_cov function, which performs a logistic regression model and formats the results for quick interpretation. Comorbidities The comorbidity_summary function summarises disease history records of each individual from the hospital inpatient diagnosis data. To meet different research aims the function allows restriction to a period and filtering of annotations by only primary diagnoses or all diagnoses (using the "Date.start", "Date.end" and "primary" arguments, respectively). For illustration, if we are interested in the co-occurrences of COVID-19, we can set the episode start date as 16 March 2020 (“Date.start = 16/03/2020”), when the first COVID-19 test result was recorded and choose to use all diagnoses (“primary = FALSE”). If we are interested in individuals with reported comorbidities that are at a higher risk to SARS-CoV-2, we can choose an episode start time before the COVID-19 outbreak in the UK, for example, “Date.end = 01/01/2020” and only focus on the primary diagnoses (“primary = TRUE”). Comorbidity categories are generated using the block categories in the ICD10 code, which is shown in the second column in Table 4 . We include ICD10 chapters 1-14 and 17 and exclude several chapters such as pregnancy, childbirth, and consequences of external causes etc. For instance, the first category is “A00-A09”, representing intestinal infectious diseases. During a period restricted by the start and end dates, cases are defined as any participants who were diagnosed as any subclasses under the block A00‐A09 in the hospital inpatient diagnosis data. In this way, 164 binary variables are generated and each of them represents a comorbidity category. The R function generates a text file including all comorbidity categories, which can be used in the comorbidity association tests. Table 4. The comorbidity categories. Comorbidity categories are generated using the block categories in the ICD10 code, as shown in the second column. We only included the blocks in chapter 1-14 and 17 and excluded several chapters such as pregnancy, childbirth and consequences of external causes etc. Chapter Block Title I Block A00-A09 Intestinal infectious diseases Block A15-A19 Tuberculosis Block A20-A28 Certain zoonotic bacterial diseases Block A30-A49 Other bacterial diseases Block A50-A64 Infections with a predominantly sexual mode of transmission Block A65-A69 Other spirochaetal diseases Block A70-A74 Other diseases caused by chlamydiae Block A75-A79 Rickettsioses Block A80-A89 Viral infections of the central nervous system Block A92-A99 Arthropod-borne viral fevers and viral haemorrhagic fevers II Block B00-B09 Viral infections characterized by skin and mucous membrane lesions Block B15-B19 Viral hepatitis Block B20-B24 Human immunodeficiency virus [HIV] disease Block B25-B34 Other viral diseases Block B35-B49 Mycoses Block B50-B64 Protozoal diseases Block B65-B83 Helminthiases Block B85-B89 Pediculosis, acariasis and other infestations Block B90-B94 Sequelae of infectious and parasitic diseases Block B95-B98 Bacterial, viral and other infectious agents Block B99-B99 Other infectious diseases III Block C00-C14 Malignant neoplasms of lip, oral cavity and pharynx Block C15-C26 Malignant neoplasms of digestive organs Block C30-C39 Malignant neoplasms of respiratory and intrathoracic organs Block C40-C41 Malignant neoplasms of bone and articular cartilage Block C43-C44 Melanoma and other malignant neoplasms of skin Block C45-C49 Malignant neoplasms of mesothelial and soft tissue Block C50-C50 Malignant neoplasm of breast Block C51-C58 Malignant neoplasms of female genital organs Block C60-C63 Malignant neoplasms of male genital organs Block C64-C68 Malignant neoplasms of urinary tract Block C69-C72 Malignant neoplasms of eye, brain and other parts of central nervous system Block C73-C75 Malignant neoplasms of thyroid and other endocrine glands Block C76-C80 Malignant neoplasms of ill-defined, secondary and unspecified sites Block C81-C96 Malignant neoplasms, stated or presumed to be primary, of lymphoid, haematopoietic and related tissue Block C97-C97 Malignant neoplasms of independent (primary) multiple sites IV Block D00-D09 In situ neoplasms Block D10-D36 Benign neoplasms Block D37-D48 Neoplasms of uncertain or unknown behaviour Block D50-D53 Nutritional anaemias Block D55-D59 Haemolytic anaemias Block D60-D64 Aplastic and other anaemias Block D65-D69 Coagulation defects, purpura and other haemorrhagic conditions Block D70-D77 Other diseases of blood and blood-forming organs Block D80-D89 Certain disorders involving the immune mechanism V Block E00-E07 Disorders of thyroid gland Block E10-E14 Diabetes mellitus Block E15-E16 Other disorders of glucose regulation and pancreatic internal secretion Block E20-E35 Disorders of other endocrine glands Block E40-E46 Malnutrition Block E50-E64 Other nutritional deficiencies Block E65-E68 Obesity and other hyperalimentation Block E70-E90 Metabolic disorders VI Block F00-F09 Organic, including symptomatic, mental disorders Block F10-F19 Mental and behavioural disorders due to psychoactive substance use Block F20-F29 Schizophrenia, schizotypal and delusional disorders Block F30-F39 Mood [affective] disorders Block F40-F48 Neurotic, stress-related and somatoform disorders Block F50-F59 Behavioural syndromes associated with physiological disturbances and physical factors Block F60-F69 Disorders of adult personality and behaviour Block F70-F79 Mental retardation Block F80-F89 Disorders of psychological development Block F90-F98 Behavioural and emotional disorders with onset usually occurring in childhood and adolescence Block F99-F99 Unspecified mental disorder VII Block G00-G09 Inflammatory diseases of the central nervous system Block G10-G14 Systemic atrophies primarily affecting the central nervous system Block G20-G26 Extrapyramidal and movement disorders Block G30-G32 Other degenerative diseases of the nervous system Block G35-G37 Demyelinating diseases of the central nervous system Block G40-G47 Episodic and paroxysmal disorders Block G50-G59 Nerve, nerve root and plexus disorders Block G60-G64 Polyneuropathies and other disorders of the peripheral nervous system Block G70-G73 Diseases of myoneural junction and muscle Block G80-G83 Cerebral palsy and other paralytic syndromes Block G90-G99 Other disorders of the nervous system VIII Block H00-H06 Disorders of eyelid, lacrimal system and orbit Block H10-H13 Disorders of conjunctiva Block H15-H22 Disorders of sclera, cornea, iris and ciliary body Block H25-H28 Disorders of lens Block H30-H36 Disorders of choroid and retina Block H40-H42 Glaucoma Block H43-H45 Disorders of vitreous body and globe Block H46-H48 Disorders of optic nerve and visual pathways Block H49-H52 Disorders of ocular muscles, binocular movement, accommodation and refraction Block H53-H54 Visual disturbances and blindness Block H55-H59 Other disorders of eye and adnexa Block H60-H62 Diseases of external ear Block H65-H75 Diseases of middle ear and mastoid Block H80-H83 Diseases of inner ear Block H90-H95 Other disorders of ear IX Block I00-I02 Acute rheumatic fever Block I05-I09 Chronic rheumatic heart diseases Block I10-I15 Hypertensive diseases Block I20-I25 Ischaemic heart diseases Block I26-I28 Pulmonary heart disease and diseases of pulmonary circulation Block I30-I52 Other forms of heart disease Block I60-I69 Cerebrovascular diseases Block I70-I79 Diseases of arteries, arterioles and capillaries Block I80-I89 Diseases of veins, lymphatic vessels and lymph nodes, not elsewhere classified Block I95-I99 Other and unspecified disorders of the circulatory system X Block J00-J06 Acute upper respiratory infections Block J09-J18 Influenza and pneumonia Block J20-J22 Other acute lower respiratory infections Block J30-J39 Other diseases of upper respiratory tract Block J40-J47 Chronic lower respiratory diseases Block J60-J70 Lung diseases due to external agents Block J80-J84 Other respiratory diseases principally affecting the interstitium Block J85-J86 Suppurative and necrotic conditions of lower respiratory tract Block J90-J94 Other diseases of pleura Block J95-J99 Other diseases of the respiratory system XI Block K00-K14 Diseases of oral cavity, salivary glands and jaws Block K20-K31 Diseases of oesophagus, stomach and duodenum Block K35-K38 Diseases of appendix Block K40-K46 Hernia Block K50-K52 Noninfective enteritis and colitis Block K55-K64 Other diseases of intestines Block K65-K67 Diseases of peritoneum Block K70-K77 Diseases of liver Block K80-K87 Disorders of gallbladder, biliary tract and pancreas Block K90-K93 Other diseases of the digestive system XII Block L00-L08 Infections of the skin and subcutaneous tissue Block L10-L14 Bullous disorders Block L20-L30 Dermatitis and eczema Block L40-L45 Papulosquamous disorders Block L50-L54 Urticaria and erythema Block L55-L59 Radiation-related disorders of the skin and subcutaneous tissue Block L60-L75 Disorders of skin appendages Block L80-L99 Other disorders of the skin and subcutaneous tissue XIII Block M00-M03 Infectious arthropathies Block M05-M14 Inflammatory polyarthropathies Block M15-M19 Arthrosis Block M20-M25 Other joint disorders Block M40-M43 Deforming dorsopathies Block M45-M49 Spondylopathies Block M50-M54 Other dorsopathies Block M60-M63 Disorders of muscles Block M65-M68 Disorders of synovium and tendon Block M70-M79 Other soft tissue disorders Block M80-M85 Disorders of bone density and structure Block M86-M90 Other osteopathies Block M91-M94 Chondropathies Block M95-M99 Other disorders of the musculoskeletal system and connective tissue XIV Block N00-N08 Glomerular diseases Block N10-N16 Renal tubulo-interstitial diseases Block N17-N19 Renal failure Block N20-N23 Urolithiasis Block N25-N29 Other disorders of kidney and ureter Block N30-N39 Other diseases of urinary system Block N40-N51 Diseases of male genital organs Block N60-N64 Disorders of breast Block N70-N77 Inflammatory diseases of female pelvic organs Block N80-N98 Noninflammatory disorders of female genital tract Block N99-N99 Other disorders of the genitourinary system XVII Block Q00-Q07 Congenital malformations of the nervous system Block Q10-Q18 Congenital malformations of eye, ear, face and neck Block Q20-Q28 Congenital malformations of the circulatory system Block Q30-Q34 Congenital malformations of the respiratory system Block Q35-Q37 Cleft lip and cleft palate Block Q38-Q45 Other congenital malformations of the digestive system Block Q50-Q56 Congenital malformations of genital organs Block Q60-Q64 Congenital malformations of the urinary system Block Q65-Q79 Congenital malformations and deformations of the musculoskeletal system Block Q80-Q89 Other congenital malformations Block Q90-Q99 Chromosomal abnormalities, not elsewhere classified The comorbidity_asso function performs association tests between each comorbidity category and the selected phenotype using logistic regression models and adjusts the tested phenotype with covariates, which can be set using the argument “cov.name”. By default, the covariates include sex, age, and BMI. Different ethnic backgrounds can be chosen for the test by setting the argument “population”. By default, all populations are included. It outputs a table comprised of odds ratios (ORs), confidence intervals (CIs) of ORs, and p-values for all the comorbidity categories. Preparation of files for genetic analyses The UKB.COVID19 package provides several functions, to facilitate GWAS, or other genetic analyses using the UKBB data. We provide two functions sampleQC and variantQC , to allow easy cleaning of the genetic data, using quality control (QC) metrics, supplied by UKBB ( Bycroft et al ., 2018 ). A third function, makeGWASFiles , outputs phenotype files, which may be used as input for the GWAS software packages PLINK ( Purcell et al ., 2007 ) and SAIGE ( Zhou et al ., 2018 ). The sampleQC function outputs a csv file summarising sample-level QC metrics, as well as producing lists of IDs for inclusion and/or exclusion in downstream analyses. The function identifies individuals to be excluded from genetic analyses based on: 1) being excluded by UKBB, before imputation due to high heterozygosity or missingness (>5%), 2) sex mismatches between genetically predicted and recorded sex, 3) an apparent excess number of relatives in the UKBB cohort (≥ 10 relatives), 4) putative sex chromosome aneuploidy, 5) withdrawn consent. The user has the option of further restricting to individuals of “White British” ancestry (determined using genetic principal components), by using the ancestry argument. Finally, the user can specify whether they require inclusion/exclusion sample lists to be formatted for PLINK or SAIGE. The variantQC function identifies variants to be included in downstream analyses, based on minor allele frequency (MAF) and imputation quality (INFO score), with thresholds specified by the user (defaults to MAF ≥0.001 and INFO ≥0.5). The function outputs list of variants passing these thresholds are in two formats, given the two types of SNP IDs available in the UKBB imputed genetic data release: 1) snpIncludeSNPIDs_minMaf0.001_minInfo0.5.txt contains the unique SNP identifiers; 2) snpIncludeRSIDs_minMaf0.001_minInfo0.5.txt contains the rsid or the reference panel marker ID (note these IDs are not guaranteed to be unique). The function also outputs a file containing IDs of the subset of SNPs, used by UKBB for calculating ancestry principal components ( Bycroft et al ., 2018 ). This subset of SNPs is suitable for analyses where a pruned set of independent SNPs are preferred, for example for calculation of a genetic relatedness matrix (GRM). The makeGWASFiles function generates a phenotype file, suitable to be used in association analyses by either SAIGE or PLINK ( Purcell et al ., 2007 ) (File format specified by user). The function utilises the phenotypes data frame generated by the makePhenotypes function, with the user able to specify specific phenotypes. The output phenotype file also contains the first 20 ancestry principal components, and genotyping array, as these are likely to be required as covariates in any genetic analyses. The user can also specify additional covariates (e.g. those generated by the risk_factor function), to be outputted to the phenotype file. Finally, the user can choose to output phenotypes, only for the individuals passing all QC (using the output file from sampleQC function), or for all individuals. GWAS We performed QC for the genotype data from UKBB using the sampleQC function, with the ancestry = “WhiteBritish” option, and the variantQC function, with thresholds MAF = 0.01 and INFO = 0.8. Phenotype files for SAIGE were generated using the makeGWASFiles function, containing all variables generated by the risk_factor function. Using the output files from the sampleQC and variantQC functions, we filtered the directly genotyped data using PLINK ( Purcell et al ., 2007 ), and the imputed data using QCTool version 2. We then performed GWAS of all COVID-19 phenotypes using SAIGE ( Zhou et al ., 2018 ). Firstly, the null model was fitted for each phenotype with 20 ancestry procedure codes (PCs), genotypic array, and associated non-genetic risk factors as covariates, and we used the pruned subset SNPs to construct the GRM. Subsequently, genome-wide association testing was undertaken, using the filtered imputed data. Statistical analysis To assess the associations between non-genetic risk factors and COVID-19 phenotypes (including susceptibility, severity, and mortality), we employed multivariable logistic regression models using the ‘glm’ function from the R package stats. Each model adjusted for covariates such as age, sex, and BMI. The tested risk factors included socioeconomic status (SES), smoking status, blood type, ethnic background, and residence in aged care facilities. The logistic regression model for each risk factor was specified as follows: logit (COVID-19 phenotype) ~ risk factor + age + sex + BMI. Comorbidity associations were analyzed using similar multivariable logistic regression models, with COVID-19 phenotypes modeled as: logit (COVID-19 phenotype) ~ comorbidity category + age + sex + BMI + SES + smoking status + aged care status. Odds ratios (ORs) with 95% confidence intervals (CIs) were reported, and p-values were calculated to determine the significance of the associations. To identify genetic variants associated with COVID-19 phenotypes, we performed GWASs using the SAIGE software. Principal component analysis (PCA) was performed to account for population stratification, and the first 20 principal components (PCs) were included as covariates in the analysis. Additionally, we adjusted for age, sex, BMI, SES, smoking status, residence in aged care facilities and genotypic array in the regression models. The association between each SNP and the phenotypes was tested using a logistic regression model, as follows: logit (COVID-19 phenotype) ~ SNP + age + sex + BMI + SES + smoking status + aged care status + genotypic array + PC1-20. To account for multiple testing, the Bonferroni correction was applied. Loci reaching the genome-wide significance threshold (p < 5×10 −8 ) were considered significant. Manhattan plots and quantile-quantile (QQ) plots were generated to visualize the results using R package ggplot2. All analyses were carried out using R (version 4.0.5). Results We applied the R package UKB.COVID19 to the data released in April 2021. The last records in the COVID-19 test results data, the death register data and the hospital inpatient data were recorded on 6 April 2021, 23 March 2021, and 5 February 2021, respectively. By default, the dates for susceptibility, severity and mortality studies were chosen as 6 April 2021, 5 February 2021, and 23 March 2021, accordingly. COVID-19 susceptibility By 6 April 2021, 77,222 UKBB participants had tested for COVID-19. Among these individuals, 16,562 received at least one positive test result and 60,660 received all negative results. First, we tested the associations between a positive test result (as a proxy for COVID-19 susceptibility), and age, sex, and BMI using multivariable logistic regression. The results ( Table 5 ) show increased odds of a positive result in individuals of male sex (OR = 1.08, 95% CI = [1.04,1.11], p-value = 0.00007), with higher BMI (OR = 1.026, 95% CI = [1.0229,1.03], p-value <10 −5 ) and with younger ages (OR = 0.939, 95% CI = [0.937,0.941], p-value <10 −5 ). A possible reason for this result is that the older participants are less active and thus had less chance of being exposed to SARS-CoV-2. Table 5. COVID-19 susceptibility and non-genetic risk factor association test results for all populations and white British. Cases are defined as participants who received at least one COVID-19 positive test result. Controls are those who received only negative results. We tested sex, age and body mass index (BMI) in a multivariable model first and then tested each other factor individually by adjusting sex, age and BMI. SES stands for socioeconomic status. Odds ratio (OR) and p-values (P) are provided. Samples Case/control Statistic Sex Age BMI Blood type Ethnic background inAgedCare SES Smoke A AB O Black Asian Mixed Other All populations 16,562/60,660 OR 1.08 0.94 1.03 0.99 1.09 0.91 1.38 1.88 1.02 1.33 2.13 1.04 1.003 P 0.00007 ≈0 ≈0 0.7 0.1 0.005 ≈0 ≈0 0.9 0.0004 ≈0 ≈0 ≈0 White British 14,767/57,068 OR 1.07 0.94 1.03 1.05 1.10 0.96 2.36 1.04 1.004 P 0.0008 ≈0 ≈0 0.2 0.1 0.2 ≈0 ≈0 ≈0 *≈0 means <10 −5 . Second, we tested each potential risk factor individually with adjustment of age, sex, and BMI. Several publications have already reported that blood type groups are associated with COVID-19 susceptibility ( Zhao et al ., 2020 ; Zietz, Zucker, and Tatonetti 2020 ), including genetic associations with the ABO blood group locus at 9q34.2 (The Severe Covid-19 GWAS Group “Genomewide Association Study of Severe Covid-19 with Respiratory Failure” 2020) . People with blood type A have been consistently reported as being at a higher risk to SARS-CoV-2 and people with blood type O at lower risk ( Zhao et al ., 2020 ). Consistent with these results we find that compared with type B, individuals with blood type O are less susceptible to SARS-CoV-2 (OR =0.91, 95% CI = [0.86,0.97], p-value = 0.005) but we were unable to replicate the type A findings (p-value = 0.7). Compared with white individuals, those who self-identified as Black (OR =1.38, 95% CI = [1.24,1.55], p-value <10 −5 ), Asian (OR =1.88, 95% CI = [1.71,2.07], p-value <10 −5 ) and other ethnic backgrounds (OR =1.33, 95% CI = [1.14,1.55], p-value =0.0004) have higher odds of testing positive for COVID-19. Individuals with a lower socioeconomic status (SES) are also at a higher risk of COVID-19 (OR = 1.041, 95% CI = [1.036,1.047], p-value <10 −5 ). Smoking also contributes to COVID-19 susceptibility (OR =1.003, 95% CI = [1.002,1.004], p-value <10 −5 ). People who are staying at an aged care home are at a significantly higher risk of COVID-19 (OR = 2.13, 95% CI = [1.87,2.43], p-value <10 −5 ), which is in line with the aged care home outbreaks in the UK. We only apply GWAS to the white British participants in the UKBB. Therefore, we performed non-genetic risk factor association tests again for self-reported “white” participants only. It shows that age, sex, BMI, SES, smoking, and if in an aged care home are associated with COVID-19 susceptibility in white British. Incorporation of the two array effects and the first 20 PCs, these risk factors are used to adjust susceptibility in the GWAS. The genome-wide significant COVID-19 susceptibility locus identified in our GWAS is 3p21.31 ( Figure 1 and Table 6 ). The most statistically significant SNP is rs2771616 within the glycine transporter gene SLC6A20 (3p21.31, p-value = 3.36 × 10 −9 ), followed by SNPs rs73062389 (3p21.31; SLC6A20 ; p-value =5.16 × 10 −9 ) and rs73062394 (3p21.31; SLC6A20 ; p-value = 6.68 × 10 −9 ) in strong linkage disequilibrium (LD) (r2 = 1 and r2 = 1) ( Table 7 ). SLC6A20 encodes an amino acid transporter that interacts with ACE2, the main receptor that SARS-CoV-2 uses to gain entry into host cells ( Elhabyan et al ., 2020 ; Hoffmann et al ., 2020 ). This locus has also been previously identified by other studies (The Severe Covid-19 GWAS Group “Genomewide Association Study of Severe Covid-19 with Respiratory Failure”, 2020) , several meta-analyses of which have also made use of the UKBB COVID-19 data (Host Genetics Initiative, 2021) . All genome wide significant GWAS hits with gene annotations are available in Table 7 . Figure 1. The Q-Q plot and Manhattan plot of COVID-19 susceptibility GWAS. Sample size is 61,823. In the Manhattan plot, each point denotes a SNP located on a particular chromosome ( x -axis). The significance level is presented in the y -axis. The red line indicates the threshold for genome-wide significance 5 × 10 −8 while the blue line indicates the threshold for suggestive genome-wide significance 1 × 10 −5 . The light green dots are the genes of interest, which have been reported in other publications (Pairo-Castineira et al ., 2021; “Genomewide Association Study of Severe Covid-19 with Respiratory Failure”, 2020 ), including SLC6A20, LZTFL1, CCR9, FYCO1, CXCR6, XCR1, HLA-G, CCHCR1, NOTCH4, ABO, OAS1, OAS2, OAS3, APOE, DPP9, TYK2, IFNAR2, TMPRSS2, ACE2, and TLR7. The susceptibility phenotype is adjusted by age, sex, body mass index, socioeconomic status, smoking, if in an aged care home, array, and PC1–20. The genome-wide significant COVID-19 susceptibility locus identified is 3p21.31. The most statistically significant SNP is rs2771616 within the glycine transporter gene SLC6A20 (3p21.31, p-value =3.36 × 10 −9 ), followed by SNPs rs73062389 (3p21.31; SLC6A20 ; p-value = 5.16 × 10 −9 ) and rs73062394 (3p21.31; SLC6A20 ; p-value = 6.68 × 10 −9 ) in strong linkage disequilibrium (LD) (r2 = 1 and r2 = 1). Table 6. The most genome-wide significant hits of COVID-19 susceptibility, hospitalisation and critical care genome-wide association studies. Phenotype RsID Chromosome Position Effect/non-effect allele Cytoband P-value Gene Susceptibility rs2271616 3 45838013 G/T p21.31 3.36E-09 SLC6A20 Hospitalisation rs35044562 3 45909024 A/G p21.31 1.55E-10 LZTFL1 Critical care rs35044562 3 45909024 A/G p21.31 2.23E-09 LZTFL1 Table 7. The genome-wide significant hits of COVID-19 susceptibility, hospitalisation and critical care genome-wide association studies. Phenotype RsID Chromosome Position Effect/non-effect allele Cytoband P-value Nearest gene Susceptibility rs2271616 3 45838013 G/T p21.31 3.36E-09 SLC6A20 rs73062389 3 45835417 G/A p21.31 5.16E-09 SLC6A20 rs73062394 3 45839176 A/T p21.31 6.68E-09 SLC6A20 Hospitalisation rs35896106 3 45841938 C/T p21.31 1.15E-08 SLC6A20 rs13071258 3 45843242 G/A p21.31 2.68E-09 SLC6A20 rs17763537 3 45843315 C/T p21.31 8.91E-09 SLC6A20 rs17763569 3 45843439 G/T p21.31 8.91E-09 SLC6A20 rs34668658 3 45844198 A/C p21.31 3.53E-09 SLC6A20 rs17763742 3 45846769 A/G p21.31 4.46E-09 SLC6A20 rs17712877 3 45848760 G/C p21.31 9.41E-09 SLC6A20 rs72893671 3 45850783 T/A p21.31 5.87E-09 SLC6A20 rs17713054 3 45859651 G/A p21.31 5.46E-10 LZTFL1 rs13078854 3 45861932 G/A p21.31 5.43E-10 LZTFL1 rs71325088 3 45862952 T/C p21.31 4.61E-10 LZTFL1 rs10490770 3 45864732 T/C p21.31 5.81E-10 LZTFL1 rs35624553 3 45867440 A/G p21.31 5.67E-10 LZTFL1 3:45871139_GA_G 3 45871139 GA/G p21.31 3.24E-09 LZTFL1 rs67959919 3 45871908 G/A p21.31 5.60E-10 LZTFL1 rs11385942 3 45876459 G/GA p21.31 1.02E-09 LZTFL1 rs35508621 3 45880481 T/C p21.31 5.24E-10 LZTFL1 rs34288077 3 45888690 A/G p21.31 6.34E-10 LZTFL1 rs35081325 3 45889921 A/T p21.31 6.34E-10 LZTFL1 rs35731912 3 45889949 C/T p21.31 6.26E-10 LZTFL1 rs34326463 3 45899651 A/G p21.31 6.26E-10 LZTFL1 rs76374459 3 45900634 G/C p21.31 6.09E-09 LZTFL1 rs73064425 3 45901089 C/T p21.31 5.41E-10 LZTFL1 rs13081482 3 45908116 A/T p21.31 5.43E-10 LZTFL1 rs35652899 3 45908514 C/G p21.31 2.01E-10 LZTFL1 rs35044562 3 45909024 A/G p21.31 1.55E-10 LZTFL1 rs73064431 3 45909528 C/T p21.31 3.55E-09 LZTFL1 rs13092887 3 45909644 C/A p21.31 2.64E-09 LZTFL1 Critical care rs17713054 3 45859651 G/A p21.31 3.76E-09 LZTFL1 rs13078854 3 45861932 G/A p21.31 3.76E-09 LZTFL1 rs71325088 3 45862952 T/C p21.31 2.61E-09 LZTFL1 rs10490770 3 45864732 T/C p21.31 3.89E-09 LZTFL1 rs35624553 3 45867440 A/G p21.31 3.88E-09 LZTFL1 3:45871139_GA_G 3 45871139 GA/G p21.31 4.14E-08 LZTFL1 rs67959919 3 45871908 G/A p21.31 3.96E-09 LZTFL1 rs11385942 3 45876459 G/GA p21.31 6.89E-09 LZTFL1 rs35508621 3 45880481 T/C p21.31 3.27E-09 LZTFL1 rs34288077 3 45888690 A/G p21.31 4.25E-09 LZTFL1 rs35081325 3 45889921 A/T p21.31 4.24E-09 LZTFL1 rs35731912 3 45889949 C/T p21.31 4.01E-09 LZTFL1 rs34326463 3 45899651 A/G p21.31 4.17E-09 LZTFL1 rs76374459 3 45900634 G/C p21.31 5.34E-09 LZTFL1 rs73064425 3 45901089 C/T p21.31 3.83E-09 LZTFL1 rs13081482 3 45908116 A/T p21.31 4.38E-09 LZTFL1 rs35652899 3 45908514 C/G p21.31 3.18E-09 LZTFL1 rs35044562 3 45909024 A/G p21.31 2.23E-09 LZTFL1 rs73064431 3 45909528 C/T p21.31 3.78E-08 LZTFL1 rs13092887 3 45909644 C/A p21.31 3.47E-08 LZTFL1 COVID-19 severity By 5 February 2021, 15,666 UKBB participants received positive COVID-19 test results. 2,104 individuals had been admitted to the hospital due to COVID-19, 1,129 of these individuals received critical care treatments and 1,010 received advanced critical care treatments. The risk factor association test results are presented in Tables 8 and 9 for all populations and self-reported white individuals, respectively. Compared to white individuals, Black, Asian, and other minority ethnic groups are at a higher risk of severe COVID-19. Age, sex, BMI, SES, and smoking are also positively associated with COVID-19 severity. Table 8. COVID-19 severity and non-genetic risk factor association test results for all populations. Cases of hospitalisation include participants who were admitted to hospital and whose primary diagnosis was COVID-19, received critical care treatments, or died from COVID-19. Controls are the rest of the participants who received positive test results. Cases of critical care phenotype include those who received critical care treatments due to COVID-19 or died from COVID-19. Cases of advanced critical care are defined as participants who received advanced critical care treatments or died from COVID-19. We tested sex, age and body mass index (BMI) in a multivariable model first and then tested each other factor individually by adjusting sex, age and BMI. SES stands for socioeconomic status. Odds ratio (OR) and p-values (P) are provided. Severity Case/control Statistic Sex Age BMI Blood type Ethnic background inAgedCare SES Smoke A AB O Black Asian Mixed Other Hospitalisation 2,104/13,562 OR 1.75 1.12 1.07 0.87 0.82 0.94 2.00 1.57 1.07 1.49 2.08 1.08 1.01 P ≈0 ≈0 ≈0 0.2 0.2 0.5 ≈0 0.0003 0.8 0.06 0 ≈0 ≈0 Critical care 1,129/14,537 OR 1.93 1.14 1.07 0.96 1.06 1.11 2.14 1.64 0.56 1.39 2.46 1.07 1.009 P ≈0 ≈0 ≈0 0.8 0.8 0.4 0.00001 0.003 0.3 0.3 ≈0 ≈0 ≈0 Advanced critical care 1,010/14,656 OR 1.82 1.15 1.07 0.99 1.10 1.12 2.24 1.69 0.67 1.28 2.60 1.06 1.009 P ≈0 ≈0 ≈0 0.9 0.6 0.4 0.00001 0.003 0.5 0.4 ≈0 ≈0 ≈0 *≈0 means <10 −5 . Table 9. COVID-19 severity and non-genetic risk factor association test results for white British. Cases of hospitalisation include participants who were admitted to hospital and whose primary diagnosis was COVID-19, received critical care treatments, or died from COVID-19. Controls are the rest of the participants who received positive test results. Cases of critical care phenotype include those who received critical care treatments due to COVID-19 or died from COVID-19. Cases of advanced critical care are defined as participants who received advanced critical care treatments or died from COVID-19. We tested sex, age and body mass index (BMI) in a multivariable model first and then tested each other factor individually by adjusting sex, age and BMI. SES stands for socioeconomic status. Odds ratio (OR) and p-values (P) are provided. Severity Case/control Statistic Sex Age BMI Blood type inAgedCare SES Smoke A AB O Hospitalisation 1,865/12,093 OR 1.75 1.12 1.07 0.94 0.89 1.02 2.05 1.07 1.01 P ≈0 ≈0 ≈0 0.6 0.5 0.8 ≈0 ≈0 ≈0 Critical care 1,006/12,952 OR 2.00 1.14 1.07 1.41 1.21 1.28 2.54 1.06 1.01 P ≈0 ≈0 ≈0 0.3 0.4 0.08 ≈0 ≈0 ≈0 Advanced critical care 902/13,056 OR 1.90 1.16 1.07 1.19 1.29 1.34 2.68 1.05 1.01 P ≈0 ≈0 ≈0 0.2 0.3 0.05 ≈0 0.00001 ≈0 *≈0 means <10 −5 . The results from the GWAS are shown in the quantile-quantile (Q-Q) plots and Manhattan plots in Figures 2–4 . The tested phenotypes are adjusted by age, sex, BMI, SES, smoking, if in an aged care home, array, and PC1–20. The results show that the locus at 3p21.31 is genome-wide significantly associated with COVID-19 hospitalisation and critical care ( Tables 6 and 7 ). Specifically, the most significant SNP for both COVID-19 hospitalisation and critical care GWASs is located in the gene LZTFL1 (rs35044562 in locus 3p21.31; p-value = 1.55 × 10 −10 and p-value = 2.23 × 10 −9 , respectively). According to the Genotype-Tissue Expression ( GTEx ) project, LZTFL1 is widely expressed throughout the body and encodes a protein involved in protein trafficking to primary cilia, which are microtubule-based subcellular organelles acting as antennas for extracellular signals. In T lymphocytes, LZTFL1 participates in the immunologic synapse with antigen-presenting cells, such as dendritic cells (these cells prime T-lymphocyte responses) ( Kaser 2020 ; Seo et al ., 2011 ; Jiang et al ., 2016 ). Figure 2. The Q-Q plot and Manhattan plot of COVID-19 hospitalisation GWAS. Sample size is 11,974. In the Manhattan plot, each point denotes a SNP located on a particular chromosome ( x -axis). The significance level is presented in the y -axis. The red line indicates the threshold for genome-wide significance 5 × 10 −8 while the blue line indicates the threshold for suggestive genome-wide significance 1 × 10 −5 . The light green dots are the genes of interest, including SLC6A20, LZTFL1, CCR9, FYCO1, CXCR6, XCR1, HLA-G, CCHCR1, NOTCH4, ABO, OAS1, OAS2, OAS3, APOE, DPP9, TYK2, IFNAR2, TMPRSS2, ACE2, and TLR7. The hospitalisation phenotype is adjusted by age, sex, body mass index, socioeconomic status, smoking, if in an aged care home, array, and PC1–20. The result shows that the locus at 3p21.31 is genome-wide significantly associated with COVID-19 hospitalisation. The most significant SNP for both COVID-19 hospitalisation GWAS is located in the gene LZTFL1 (rs35044562 in locus 3p21.31; p-value = 1.55 × 10 −10 ). Figure 3. The Q-Q plot and Manhattan plot of COVID-19 critical care GWAS. Sample size is 11,974. In the Manhattan plot, each point denotes a SNP located on a particular chromosome ( x -axis). The significance level is presented in the y -axis. The red line indicates the threshold for genome-wide significance 5 × 10 −8 while the blue line indicates the threshold for suggestive genome-wide significance 1 × 10 −5 . The light green dots are the genes of interest, including SLC6A20, LZTFL1, CCR9, FYCO1, CXCR6, XCR1, HLA-G, CCHCR1, NOTCH4, ABO, OAS1, OAS2, OAS3, APOE, DPP9, TYK2, IFNAR2, TMPRSS2, ACE2, and TLR7. The critical care phenotype is adjusted by age, sex, body mass index, socioeconomic status, smoking, if in an aged care home, array, and PC1–20. The result shows that the locus at 3p21.31 is genome-wide significantly associated with COVID-19 critical care. The most significant SNP for both COVID-19 critical care GWAS is located in the gene LZTFL1 (rs35044562 in locus 3p21.31; p-value = 2.23 × 10 −9 ). Figure 4. The Q-Q plot and Manhattan plot of COVID-19 advanced critical care GWAS. Sample size is 11,974. In the Manhattan plot, each point denotes a SNP located on a particular chromosome ( x -axis). The significance level is presented in the y -axis. The red line indicates the threshold for genome-wide significance 5 × 10 −8 while the blue line indicates the threshold for suggestive genome-wide significance 1 × 10 −5 . The light green dots are the genes of interest, including SLC6A20, LZTFL1, CCR9, FYCO1, CXCR6, XCR1, HLA-G, CCHCR1, NOTCH4, ABO, OAS1, OAS2, OAS3, APOE, DPP9, TYK2, IFNAR2, TMPRSS2, ACE2, and TLR7. The advanced critical care phenotype is adjusted by age, sex, body mass index, socioeconomic status, smoking, if in an aged care home, array, and PC1–20. No genome-wide significant signals were found. COVID-19 mortality By 23 March 2021, 16,465 UKBB participants received positive COVID-19 test results. Among these, 1,042 individuals died from COVID-19. We performed the same association tests for COVID-19 mortality as for susceptibility and severity. The results ( Table 10 ) show that males have a much higher chance of dying from COVID-19 than females (OR = 1.89, 95% CI = [1.63,2.20], p-value <10 −5 ), consistent with previously published results from independent cohorts ( Peckham et al ., 2020 ). The black ethnic group is at a much higher mortality risk from SARS-CoV-2 compared to white individuals (OR = 2.04, 95% CI = [1.38,2.94], p-value = 0.0002). Age, BMI, SES, and smoking are positively associated with COVID-19 mortality. People living in aged care homes are at a much higher risk of dying from COVID-19. For self-reported white individuals, age, sex, BMI, SES, smoking, and being in an aged care home are positively associated with COVID-19 mortality. Therefore, all these covariates were used to adjust the mortality phenotype for GWAS. However, no genome-wide significant signal was detected for this GWAS ( Figure 5 ). Table 10. COVID-19 mortality and non-genetic risk factor association test results for all populations and white British. Cases of mortality include participants whose primary death cause is COVID-19. Controls are the rest of the participants who received positive test results. We tested sex, age and body mass index (BMI) in a multivariable model first and then tested each other factor individually by adjusting sex, age and BMI. SES stands for socioeconomic status. Odds ratio (OR) and p-values (P) are provided. Samples Case/control Statistic Sex Age BMI Blood type Ethnic background inAgedCare SES Smoke A AB O Black Asian Mixed Other All populations 1,042/15,667 OR 1.89 1.17 1.08 0.98 1.11 1.11 2.04 1.56 0.68 1.05 2.52 1.07 1.009 P ≈0 ≈0 ≈0 0.9 0.6 0.4 0.0002 0.01 0.5 0.9 ≈0 ≈0 ≈0 White British 939/13,968 OR 1.96 1.17 1.07 1.13 1.27 1.26 2.62 1.06 1.01 P ≈0 ≈0 ≈0 0.4 0.3 0.1 ≈0 ≈0 ≈0 *≈0 means <10 −5 . Figure 5. The Q-Q plot and Manhattan plot of COVID-19 mortality GWAS. Sample size is 12,790. In the Manhattan plot, each point denotes a SNP located on a particular chromosome ( x -axis). The significance level is presented in the y -axis. The red line indicates the threshold for genome-wide significance 5 × 10 −8 while the blue line indicates the threshold for suggestive genome-wide significance 1 × 10 −5 . The light green dots are the genes of interest, including SLC6A20, LZTFL1, CCR9, FYCO1, CXCR6, XCR1, HLA-G, CCHCR1, NOTCH4, ABO, OAS1, OAS2, OAS3, APOE, DPP9, TYK2, IFNAR2, TMPRSS2, ACE2, and TLR7. The mortality phenotype is adjusted by age, sex, body mass index, socioeconomic status, smoking, if in an aged care home, array, and PC1–20. No genome-wide significant signals were found. COVID-19 comorbidities We were interested in the co-occurrence of COVID-19 and comorbidities in individuals who had suffered from severe COVID-19. Therefore, we divided the hospital inpatient diagnosis records into before and after the COVID-19 pandemic using the date 16 March 2020, when COVID-19 testing commenced in the UK. We performed association testing for each comorbidity using logistic regression models and adjusted COVID-19 severity (if the patient received critical care treatments) by sex, age, BMI, SES, smoking and aged care status. Tables 11 and 12 list the top ten associated diseases with severe COVID-19 before and after 16 March 2020. respectively. From Table 12 , we found that the common co-occurrence associated with COVID-19 are pneumonia, respiratory diseases, renal failure, metabolic disorders, hypertensive diseases, heart disease and other bacterial diseases. People who have ever had mental disorders, influenza and pneumonia, renal failure, respiratory diseases, bacterial, viral, or other infections, malignant neoplasms of lymphoid, haematopoietic and related tissue, or other blood diseases, tend to have severe symptoms after being infected by SARS-CoV-2. Table 11. The top 10 comorbidities associated with COVID-19 severity before COVID-19 testing in the UK. We divided the hospital inpatient diagnosis records into before and after the COVID-19 pandemic using the date 16 March 2020, when COVID-19 testing commenced. We performed association testing for each comorbidity using logistic regression models and adjusted COVID-19 severity (if the patient received critical care treatments) by sex, age, body mass index, socioeconomic status, smoking and aged care status. To show the comorbidities in individuals who had suffered from severe COVID-19, we ranked the p-values before 16 March 2020 and listed the top 10 comorbidities. ICD10 code Diseases Before 16 March 2020 After 16 March 2020 OR 2.50% 97.50% P-value Rank OR 2.50% 97.50% P-value Rank F00-F09 Organic, including symptomatic, mental disorders 2.33 1.86 2.89 4.76E-14 1 2.33 1.88 2.88 5.94E-15 15 J09-J18 Influenza and pneumonia 2.03 1.67 2.46 5.05E-13 2 11.34 9.69 13.28 4.62E-201 1 N17-N19 Renal failure 1.93 1.60 2.30 1.15E-12 3 4.02 3.38 4.78 9.57E-56 4 J95-J99 Other diseases of the respiratory system 2.24 1.77 2.83 1.09E-11 4 13.32 10.94 16.24 1.59E-145 3 J80-J84 Other respiratory diseases principally affecting the interstitium 3.89 2.60 5.78 2.55E-11 5 12.05 8.00 18.28 2.90E-32 6 C81-C96 Malignant neoplasms, stated or presumed to be primary, of lymphoid, haematopoietic and related tissue 3.60 2.44 5.23 4.67E-11 6 5.92 3.93 8.87 8.82E-18 13 B95-B98 Bacterial, viral and other infectious agents 1.93 1.58 2.34 4.81E-11 7 9.01 7.71 10.54 1.22E-166 2 J20-J22 Other acute lower respiratory infections 2.07 1.66 2.58 1.09E-10 8 2.62 1.75 3.87 1.90E-06 31 A30-A49 Other bacterial diseases 2.21 1.72 2.82 3.22E-10 9 3.54 2.71 4.59 5.49E-21 10 D70-D77 Other diseases of blood and blood-forming organs 3.07 2.12 4.39 1.49E-09 10 4.22 2.81 6.29 2.44E-12 18 Table 12. The top 10 comorbidities associated with COVID-19 severity after COVID-19 testing in the UK. We divided the hospital inpatient diagnosis records into before and after the COVID-19 pandemic using the date 16 March 2020, when COVID-19 testing commenced. We performed association testing for each comorbidity using logistic regression models and adjusted COVID-19 severity (if the patient received critical care treatments) by sex, age, body mass index, socioeconomic status, smoking and aged care status. To show the top 10 co-occurrence of COVID-19, we ranked the p-values after 16 March 2020 and listed the top 10 comorbidities. ICD10 code Diseases Before 16 March 2020 After 16 March 2020 OR 2.50% 97.50% P-value Rank OR 2.50% 97.50% P-value Rank J09-J18 Influenza and pneumonia 2.03 1.67 2.46 5.05E-13 2 11.34 9.69 13.28 4.62E-201 1 B95-B98 Bacterial, viral and other infectious agents 1.93 1.58 2.34 4.81E-11 7 9.01 7.71 10.54 1.22E-166 2 J95-J99 Other diseases of the respiratory system 2.24 1.77 2.83 1.09E-11 4 13.32 10.94 16.24 1.59E-145 3 N17-N19 Renal failure 1.93 1.60 2.30 1.15E-12 3 4.02 3.38 4.78 9.57E-56 4 E70-E90 Metabolic disorders 1.43 1.23 1.66 1.76E-06 19 3.38 2.87 3.97 4.48E-49 5 J80-J84 Other respiratory diseases principally affecting the interstitium 3.89 2.60 5.78 2.55E-11 5 12.05 8.00 18.28 2.90E-32 6 I10-I15 Hypertensive diseases 1.23 1.06 1.43 0.007 50 2.40 2.06 2.80 8.37E-29 7 I30-I52 Other forms of heart disease 1.51 1.29 1.76 2.25E-07 15 2.56 2.16 3.02 8.45E-28 8 J40-J47 Chronic lower respiratory diseases 1.45 1.23 1.70 8.18E-06 22 2.68 2.22 3.21 1.45E-25 9 A30-A49 Other bacterial diseases 2.21 1.72 2.82 3.22E-10 9 3.54 2.71 4.59 5.49E-21 10 APOE e4 Several publications have reported that the APOE e4 genotype is associated with COVID-19 susceptibility and severity ( Numbers and Brodaty 2021 ; Kuo et al ., 2020a , 2020b ). APOE e4 is a known risk factor for dementia, which has been replicated many times ( Liu et al ., 2013 ; Safieh, Korczyn, and Michaelson 2019 ; Emrani et al ., 2020 ). One explanation for people with APOE e4 being at higher risk of COVID-19 could be due to a higher risk of exposure, as these individuals are more likely to reside in care homes, which have suffered from high rates of infections. This is particularly likely to be the case in UKBB, where 47% of participants are older than 70 years old. To test this hypothesis, we performed GWAS tests with and without aged care status. The APOE e4 signal was genome-wide significant without aged care status but was gone after aged care status adjustment ( Figure 6 ), suggesting that this finding is not robust and may be due to ascertainment bias. Figure 6. COVID-19 susceptibility GWAS tests with and without aged care status covariate adjustment. a. COVID-19 susceptibility GWAS without care home status covariate adjustment. The model we used is: susceptibility ~ age + sex + BMI + PC1-20 + array + SNP. b. COVID-19 susceptibility GWAS with care home status covariate adjustment. The model we used is: susceptibility ~ age + sex + BMI + PC1-20 + array + inAgedCare + SNP. The APOE e4 signal was genome-wide significant without aged care status but was gone after aged care status adjustment, suggesting that this finding is not robust and may be due to ascertainment bias. Use cases To demonstrate the functionality and utility of UKB.COVID19, we present a basic tutorial for using UKB.COVID19. Due to the restriction of using UKBB data, we illustrate the use cases using simulated data. The SAIGE GWAS script example can be found in Github: https://github.com/bahlolab/UKB.COVID19/tree/main/inst/GWAS . Basic usage Generating a covariate file. The risk_factor function in UKB.COVID19 can be used to generate a covariate file with established risk factors and risk factors of interest by specifying the field code in UKBB main data. library (UKB.COVID19) covar <- risk_factor (ukb.data=covid_example("sim_ukb.tab.gz"), ABO.data=covid_example("sim_covid19_misc.txt.gz"), hesin.file=covid_example("sim_hesin.txt.gz"), res.eng=covid_example("sim_result_england.txt.gz")) head (covar) #> ID sex age bmi ethnic other.ppl black asian mixed white SES smoke blood_group O AB B A inAgedCare #> 1 1 1 74 39.0947 1001 0 0 0 0 1 5.43719 0.000 AO 0 0 0 1 0 #> 2 2 1 58 25.3177 1001 0 0 0 0 1 2.10787 0.000 AO 0 0 0 1 0 #> 3 3 0 51 32.2349 1002 0 0 0 0 1 7.36321 25.625 AO 0 0 0 1 0 #> 4 4 0 56 21.7955 1001 0 0 0 0 1 5.62047 0.000 AO 0 0 0 1 0 #> 6 6 1 67 25.9823 1001 0 0 0 0 1 3.90245 0.000 OO 1 0 0 0 0 Generating COVID-19 susceptibility phenotype file with risk factors. In the output file, columns “pos.neg” and “pos.ppl” are the susceptibility phenotypes, which denote 1) UKBB participants with COVID-19 positive versus negative results 2) and participants with positive results versus all the other participants. phe <- makePhenotypes (ukb.data=covid_example("sim_ukb.tab.gz"), res.eng=covid_example("sim_result_england.txt.gz"), death.file=covid_example("sim_death.txt.gz"), death.cause.file=covid_example("sim_death_cause.txt.gz"), hesin.file=covid_example("sim_hesin.txt.gz"), hesin_diag.file=covid_example("sim_hesin_diag.txt.gz"), hesin_oper.file=covid_example("sim_hesin_oper.txt.gz"), hesin_critical.file=covid_example("sim_hesin_critical.txt.gz"), code.file=covid_example("coding240.txt.gz"), pheno.type = "susceptibility"), #> [1] "965 participants got tested until 2021-04-05." #> [1] "218 participants got positive test results until 2021-04-05." #> [1] "There are 21 deaths with COVID-19. 20 of them primary death cause is COVID-19." #> [1] "50 patients admitted to hospital were diagnosed as COVID-19 until 2021-04-05." #> [1] "32 patients' primary diagnosis is COVID-19." #> [1] "1 patients in hospitalisation with COVID-19 diagnosis but show negative in the result file. Modified their test results." #> [1] "There are 219 COVID-19 patients identified. 32 individuals are admitted to hospital. 3 had been in ICU. 1 had been in advanced ICU." #> [1] "Outputting file: ~/UKB.COVID19/extdata/results/phenotype.txt" head (phe) #> ID pos.neg pos.ppl #> 1 1 1 1 #> 2 2 0 0 #> 3 3 0 0 #> 4 4 0 0 #> 5 5 0 0 #> 6 6 0 0 Performing association tests. The log_cov function performs association tests using logistic regressions. This is an example of association tests between COVID-19 susceptibility and three risk factors: sex, age and BMI. log_cov(pheno=phe, covariates=covar, phe.name="pos.neg", cov.name=c("sex", "age", "bmi")) #> Estimate OR 2.5 % 97.5 % p #> (Intercept) -0.16475743 0.8480994 0.1954585 3.6381032 0.824991899 #> sex1 0.04207813 1.0429760 0.7644672 1.4215535 0.790121307 #> age -0.03080456 0.9696651 0.9519878 0.9876397 0.001009957 #> bmi 0.03625193 1.0369170 1.0076088 1.0667564 0.012568486 Generating a comorbidity summary file. The comorbidity_summary function scans all the hospitalisation records with a given time period and generates a text file. The following example is to generate a comorbidity summary file that includes all the primary and secondary diagnoses in the hospital inpatient data after 16 March 2020. comorb <- comorbidity_summary (ukb.data=covid_example("sim_ukb.tab.gz"), hesin.file=covid_example("sim_hesin.txt.gz"), hesin_diag.file=covid_example("sim_hesin_diag.txt.gz"), ICD10.file=covid_example("ICD10.coding19.txt.gz"), primary = FALSE, Date.start = "16/03/2020") comorb[1:6,1:10] #> ID A00-A09 A15-A19 A20-A28 A30-A49 A50-A64 A65-A69 A70-A74 A75-A79 A80-A89 #> 1 1 1 0 0 1 0 0 0 0 0 #> 2 10 0 0 0 0 0 0 0 0 0 #> 3 100 0 0 0 0 0 0 0 0 0 #> 4 1000 0 0 0 0 0 0 0 0 0 #> 5 101 0 0 0 0 0 0 0 0 0 #> 6 102 0 0 0 0 0 0 0 0 0 Performing association tests between COVID-19 phenotype and comorbidities. This is an example of association tests between COVID-19 susceptibility and all comorbidities. It shows NAs when fitted probabilities numerically 0 or 1 occurred in the logistic regression models. comorb.asso <- comorbidity_asso (pheno=phe, covariates=covar, cormorbidity=comorb, population="white", cov.name=c("sex","age","bmi","SES","smoke","inAgedCare"), phe.name="pos.neg", ICD10.file=covid_example("ICD10.coding19.txt.gz")) head (comorb.asso, 4) #> ICD10 Estimate OR 2.5% 97.5% p #> A00-A09 A00-A09 Intestinal infectious diseases 0.4722864 1.603657 0.756784 3.240022 0.199664372 #> A15-A19 A15-A19 Tuberculosis NA NA NA NA NA #> A20-A28 A20-A28 Certain zoonotic bacterial diseases NA NA NA NA NA #> A30-A49 A30-A49 Other bacterial diseases 1.2246077 3.402831 1.633209 6.978689 0.000873076 Discussion We developed an R package that can reproducibly analyse and produce input files for GWAS studies for COVID-19 traits, using the UKBB resource. The R package can be easily applied to the frequently updated UKBB COVID-19 datasets, facilitating rapid analyses. By applying the R package to data released in April 2021, we found that age, BMI, SES and smoking are positively associated with COVID-19 susceptibility, severity and mortality. Males are at a higher risk of COVID-19 infection than females. People residing in aged care homes were also at higher risk, potentially because they have other pre-existing conditions, and may also have a higher chance of exposure to SARS-CoV-2. By performing GWAS, we replicated previous findings ( Pairo-Castineira et al ., 2021 ; Zeberg and Pääbo, 2020 ; “Genomewide Association Study of Severe Covid-19 with Respiratory Failure”, 2020 ; Host Genetics Initiative, 2021 ) that the locus 3p21.31 is associated with COVID-19 susceptibility and severity. The COVID-19 Host Genetics Initiative brings together the human genetics community to generate, share, and analyse data to learn the genetic determinants of COVID-19 susceptibility, severity, and related outcomes. They have been performing large-scale meta-analyses using existing biobanks, including UKBB, and periodically provide updated releases of their results, making available genome-wide summary statistics, and providing an online browser for exploring the latest results ( https://app.covid19hg.org/ ). We primarily advocate the use of these resources for exploring genetic associations with COVID-19 susceptibility and severity. However, we anticipate our R package will enable researchers to undertake more bespoke genetic analyses, using the most up to date UKBB COVID-19 data, to meet the aim of their studies. Such analyses may include adjusting for non-genetic risk factors or comorbidities, to explore mediators, polygenic risk score analyses, or Mendelian Randomisation studies. Long COVID, also known as post-acute sequelae of SARS-CoV-2 infection, refers to a range of symptoms that persist for weeks or months after the acute phase of COVID-19 has resolved. These symptoms can include fatigue, shortness of breath, cognitive dysfunction, and various other systemic issues, significantly impacting the quality of life of affected individuals. The UKB.COVID19 package provides multiple functions to facilitate long COVID analysis. For instance, the ‘comorbidity_summary’ and ‘comorbidity_asso’ functions can be used to summarise potential long COVID symptoms and assess their associations with risk factors, such as age, sex and certain pre-existing conditions. Furthermore, researchers can focus on subsets of participants reporting persistent symptoms consistent with long COVID to investigate genetic risk factors using GWAS. These analyses hold promise for uncovering the biological underpinnings of long COVID and identifying potential therapeutic targets to alleviate its impact. There are several limitations of UKBB COVID-19 data. First, UKBB is not a nationally or worldwide representative sample. The majority of participants are of white British ethnicity. UKBB participants were more likely to be older, to be female, and to live in less socioeconomically deprived areas than nonparticipants. Compared with the general population, participants were less likely to be obese, to smoke, and to drink alcohol daily and had fewer self-reported health conditions ( Fry et al ., 2017 ). Initiatives such as OpenSafely ( Williamson et al ., 2020 ), have aimed to examine risk factors for COVID-19 disease in an unascertained UK population, via electronic health records. These data, however, are not presently available for use by the wider research community, due to the possibility of re-identification of individuals. The recent OpenSafely flagship paper examined health records of over 17 million individuals in England, of whom 10,926 had a COVID-19 related death, and found that male sex, greater age and deprivation, and non-white ethnicities were major clinical risk factors for mortality. Despite the ascertainment of the UKBB, it is reassuring that these established risk factors are also associated with COVID-19 outcomes in this cohort. Second, the UKBB COVID-19 dataset evolved as testing scaled up in line with the national testing strategy and thus COVID-19 data is also subject to ascertainment bias. UK testing was initially largely restricted to healthcare workers, and those individuals with symptoms in hospitals. A positive result in an individual not recorded as a healthcare worker was therefore a reasonable proxy for severe disease early on in the pandemic. Testing capacity subsequently increased to include more community testing under pillar 2 of the national strategy, and as of 27 April 2020, NHS England directed hospitals to test all non-elective patients admitted overnight, including asymptomatic patients. To maximise ascertainment of cases and to evaluate disease severity, SARS-CoV-2 testing data should be used in combination with linked medical records (i.e. hospital inpatient records and death records) as we have implemented in this package. More recently, UKBB has made primary care records available for COVID-19 research. These data not yet utilised by the UKB.COVID19 package, will further improve case identification. Nonetheless, there are likely to be many individuals in the UKBB who contracted COVID-19, in particular those with milder disease, who will not be captured by the available data. The definition of COVID-19 susceptibility is supposed to be the status of people who get infected or not after exposure to SARS-CoV-2. However, exposure to SARS-CoV-2 is not easy to determine. Furthermore, not everyone has an equal chance of being exposed to SARS-CoV-2 (for example, exposure will vary by occupation), nor does everyone have the same likelihood of being tested, due to testing strategies, as noted above. Such data idiosyncrasies have the potential to distort associations, in observational studies, and also in genetic analyses through population stratification. This issue of ascertainment, or collider bias, in the context of COVID-19, is discussed at length by Griffith et al . (2020) . Analyses using the UKBB data should therefore be undertaken and interpreted within the context of changing testing capacity, and other limitations regarding phenotype definitions. We welcome further suggestions and improvements for this R package, which we hope will reduce the barrier to utilising the UKBB data for COVID-19 research. Data availability All the datasets were obtained from UKBB. To access the UKBB datasets, you need to register as a UKBB researcher ( https://www.ukbiobank.ac.uk/enable-your-research/register ). If you are already an approved UKBB researcher with a project underway and wish to receive these datasets for COVID-19 research purposes, you can register to receive these data by logging into the Access Management System (AMS) ( https://bbams.ndph.ox.ac.uk/ams/resApplications ). How to apply for access to UKBB data: https://www.ukbiobank.ac.uk/enable-your-research/apply-for-access . See COVID-19 data ( https://biobank.ndph.ox.ac.uk/showcase/exinfo.cgi?src=COVID19 ) for registration and access details and Resource 1758 ( https://biobank.ndph.ox.ac.uk/showcase/refer.cgi?id=1758 ) for further information. All genome wide significant GWAS hits with gene annotations are shown in Table 7 . Software availability UKB.COVID19 can be installed via CRAN using install.packages (“UKB.COVID19”). UKB.COVID19 is maintained at https://github.com/bahlolab/UKB.COVID19 . Latest UKB.COVID19 source code is available from: https://github.com/bahlolab/UKB.COVID19 . Archived source code at the time of publication: http://doi.org/10.5281/zenodo.5174381 ( Wang et al ., 2021 ). License: MIT ( https://opensource.org/licenses/MIT ). Acknowledgements This research was conducted using data from UK Biobank ( www.ukbiobank.ac.uk ), a major biomedical database. References Black D:“HEALTH AND DEPRIVATION: Inequality and the North.” J Royal College General Practitioners. 1988; 38 (310):234. Booth A, Reed AB, Ponzo S, et al. :Population Risk Factors for Severe Disease and Mortality in COVID-19: A Global Systematic Review and Meta-Analysis. PloS One .2021; 16 (3): e0247461. PubMed Abstract | Publisher Full Text | Free Full Text Bycroft C, Freeman C, Petkova D, et al. :The UK Biobank Resource with Deep Phenotyping and Genomic Data. Nature. 2018; 562 (7726):203–209. PubMed Abstract | Publisher Full Text | Free Full Text Elhabyan A, Saja E, Ehab S, et al. :The Role of Host Genetics in Susceptibility to Severe Viral Infections in Humans and Insights into Host Genetics of Severe COVID-19: A Systematic Review. Virus Res. 2020; 289 (November): 198163. PubMed Abstract | Publisher Full Text | Free Full Text Elliott LT, Sharp K, Alfaro-Almagro F, et al. :Genome-Wide Association Studies of Brain Imaging Phenotypes in UK Biobank. Nature. 2018; 562 (7726):210–216. PubMed Abstract | Publisher Full Text | Free Full Text Emrani S, Arain HA, DeMarshall C, et al. :APOE4 Is Associated with Cognitive and Pathological Heterogeneity in Patients with Alzheimer’s Disease: A Systematic Review. Alzheimers Res Ther. 2020. PubMed Abstract | Publisher Full Text | Free Full Text Fry A, Littlejohns TJ, Sudlow C, et al. :Comparison of Sociodemographic and Health-Related Characteristics of UK Biobank Participants With Those of the General Population. Am J Epidemiol. 2017; 186 (9):1026–1034. PubMed Abstract | Publisher Full Text | Free Full Text Genomewide Association Study of Severe Covid-19 with Respiratory Failure. New Eng J Med. 2020; 383 (16):1522–1534. Publisher Full Text Griffith GJ, Morris TT, Tudball MJ, et al. :Collider Bias Undermines Our Understanding of COVID-19 Disease Risk and Severity. Nat Commun. 2020; 11 (1): 5749. PubMed Abstract | Publisher Full Text | Free Full Text Hoffmann M, Kleine-Weber H, Schroeder S, et al. :SARS-CoV-2 Cell Entry Depends on ACE2 and TMPRSS2 and Is Blocked by a Clinically Proven Protease Inhibitor. Cell. 2020. PubMed Abstract | Publisher Full Text | Free Full Text Host Genetics Initiative, Covid-19:Mapping the Human Genetic Architecture of COVID-19 by Worldwide Meta-Analysis. MedRxiv. 2021; Reference Source Jiang H, Promchan K, Lin B-R, et al. :LZTFL1 Upregulated by All-Trans Retinoic Acid during CD4+ T Cell Activation Enhances IL-5 Production. J Immunol. 2016; 196 (3):1081–1090. PubMed Abstract | Publisher Full Text | Free Full Text Kaser A:Genetic Risk of Severe Covid-19. New England J Med. 2020. Publisher Full Text Kuo C-L, Pilling LC, Atkins JL, et al. :ApoE e4e4 Genotype and Mortality With COVID-19 in UK Biobank. The Journals of Gerontology. Series A, Biological Sciences and Medical Sciences. 2020a; 75 (9):1801–1803. PubMed Abstract | Publisher Full Text | Free Full Text Kuo C-L, Pilling LC, Atkins JL, et al. :APOE e4 Genotype Predicts Severe COVID-19 in the UK Biobank Community Cohort. The Journals of Gerontology. Series A, Biological Sciences and Medical Sciences. 2020b; 75 (11):2231–2232. PubMed Abstract | Publisher Full Text | Free Full Text Liu C-C, Liu C-C, Kanekiyo T, et al. :Apolipoprotein E and Alzheimer Disease: Risk, Mechanisms and Therapy. Nat Rev. Neurol 2013; 9 (2): 106–118. PubMed Abstract | Publisher Full Text | Free Full Text Numbers K, Brodaty H:The Effects of the COVID-19 Pandemic on People with Dementia. Nat Rev. Neurol. 2021; 17 (2):69–70. PubMed Abstract | Publisher Full Text | Free Full Text Pairo-Castineira E, Clohisey S, Klaric L, et al. :Genetic mechanisms of critical illness in COVID-19. Nature. 2021; 591 :92–98. Publisher Full Text Peckham H, de Gruijter NM , Raine C, et al. :Male Sex Identified by Global COVID-19 Meta-Analysis as a Risk Factor for Death and ITU Admission. Nat Commun. 2020; 11 (1): 6317. PubMed Abstract | Publisher Full Text | Free Full Text Pijls BG, Jolani S, Atherley A, et al. :Demographic Risk Factors for COVID-19 Infection, Severity, ICU Admission and Death: A Meta-Analysis of 59 Studies. BMJ Open. 2021; 11 (1): e044640. PubMed Abstract | Publisher Full Text | Free Full Text Purcell S, Neale B, Todd-Brown K, et al. :PLINK: A Tool Set for Whole-Genome Association and Population-Based Linkage Analyses. Am J Hum Genet. 2007; 81 (3):559–575. PubMed Abstract | Publisher Full Text | Free Full Text Safieh M, Korczyn AD, Michaelson DM:ApoE4: An Emerging Therapeutic Target for Alzheimer’s Disease. BMC Med. 2019. PubMed Abstract | Publisher Full Text | Free Full Text Seo S, Zhang Q, Bugge K, et al. :A Novel Protein LZTFL1 Regulates Ciliary Trafficking of the BBSome and Smoothened. PLoS Genet. 2011; 7 (11): e1002358. PubMed Abstract | Publisher Full Text | Free Full Text Wang L, Jackson VE, Fearnley LG, et al. :UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis. Zenodo. 2021. Publisher Full Text Williamson EJ, Walker AJ, Bhaskaran K, et al. :Factors Associated with COVID-19-Related Death Using OpenSAFELY. Nature. 2020; 584 (7821):430–436. PubMed Abstract | Publisher Full Text | Free Full Text Wolff D, Nee S, Hickey NS, et al. :Risk Factors for Covid-19 Severity and Fatality: A Structured Literature Review. Infection. 2021; 49 (1):15–28. PubMed Abstract | Publisher Full Text | Free Full Text Wu Z, McGoogan JM:Characteristics of and Important Lessons from the Coronavirus Disease 2019 (COVID-19) Outbreak in China: Summary of a Report of 72 314 Cases from the Chinese Center for Disease Control and Prevention. JAMA. 2020; 323 (13):1239–1242. PubMed Abstract | Publisher Full Text Zeberg H, Pääbo S:The Major Genetic Risk Factor for Severe COVID-19 Is Inherited from Neanderthals. Nature. 2020; 587 (7835):610–612. PubMed Abstract | Publisher Full Text Zhao J, Yang Y, Huang H, et al. :Relationship between the ABO Blood Group and the COVID-19 Susceptibility. Clinical Infectious Diseases: An Official Publication of the Infectious Diseases Society of America. August 2020. Publisher Full Text Zhou W, Nielsen JB, Fritsche LG, et al. :Efficiently Controlling for Case-Control Imbalance and Sample Relatedness in Large-Scale Genetic Association Studies. Nat Genet. 2018; 50 (9). Publisher Full Text Zietz M, Zucker J, Tatonetti NP:Associations between Blood Type and COVID-19 Infection, Intubation, and Death. Nat Commun. 2020; 11 (1): 5761. PubMed Abstract | Publisher Full Text | Free Full Text Comments on this article Comments (0) Version 3 VERSION 3 PUBLISHED 19 Aug 2021 ADD YOUR COMMENT Comment Author details Author details 1 Department of Medical Biology, The University of Melbourne, Parkville, VIC, 3010, Australia 2 Population Health and Immunity Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, VIC, 3052, Australia Longfei Wang Roles: Data Curation, Formal Analysis, Methodology, Software, Writing – Original Draft Preparation Victoria E Jackson Roles: Data Curation, Software, Validation, Writing – Review & Editing Liam G Fearnley Roles: Validation, Writing – Review & Editing Melanie Bahlo Roles: Conceptualization, Supervision, Writing – Review & Editing Competing interests No competing interests were disclosed. Grant information This work was made possible through the Victorian State Government Operational Infrastructure Support and Australian Government National Health and Medical Research Council (NHMRC) independent research Institute Infrastructure Support Scheme (IRIISS). Melanie Bahlo was supported by an NHMRC Investigator Grant (1195236). Access to the UKBB for this project was granted through project ID 36610. Article Versions (3) version 3 Revised Published: 26 Jul 2024, 10:830 https://doi.org/10.12688/f1000research.55370.3 version 2 Revised Published: 18 May 2022, 10:830 https://doi.org/10.12688/f1000research.55370.2 version 1 Published: 19 Aug 2021, 10:830 https://doi.org/10.12688/f1000research.55370.1 Copyright © 2024 Wang L et al . This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. Download Export To Sciwheel Bibtex EndNote ProCite Ref. Manager (RIS) Sente metrics Views Downloads F1000Research - - PubMed Central info_outline Data from PMC are received and updated monthly. - - Citations open_in_new 0 open_in_new 0 open_in_new SEE MORE DETAILS CITE how to cite this article Wang L, Jackson VE, Fearnley LG and Bahlo M. UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.12688/f1000research.55370.3 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS track receive updates on this article Track an article to receive email alerts on any updates to this article. TRACK THIS ARTICLE Share Open Peer Review Current Reviewer Status: ? Key to Reviewer Statuses VIEW HIDE Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Version 3 VERSION 3 PUBLISHED 26 Jul 2024 Revised Views 0 Cite How to cite this report: Palmer TM. Reviewer Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.169531.r307380 ) The direct URL for this report is: https://f1000research.com/articles/10-830/v3#referee-response-307380 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 26 Aug 2024 Thomas Michael Palmer , Population Health Sciences, University of Bristol Medical School, Bristol, UK Approved VIEWS 0 https://doi.org/10.5256/f1000research.169531.r307380 I thank the authors for their response and the amendments they have made to the package. The package is now back on CRAN. Therefore it again successfully passes R CMD check. So all of my previous comments ... Continue reading READ ALL I thank the authors for their response and the amendments they have made to the package. The package is now back on CRAN. Therefore it again successfully passes R CMD check. So all of my previous comments have essentially been addressed. My only remaining minor comments are In the vignette I don't think the line: library(here) is required In the vignette you might want to set warning=FALSE on a few code chunks because several chunks generate quite alot of warnings which are distracting when reading it https://cran.r-project.org/web/packages/UKB.COVID19/vignettes/Introduction_to_UKB_COVID19.html Competing Interests: No competing interests were disclosed. Reviewer Expertise: Medical statistics, biostatistics, statistics, R programming. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Palmer TM. Reviewer Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.169531.r307380 ) The direct URL for this report is: https://f1000research.com/articles/10-830/v3#referee-response-307380 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Respond or Comment COMMENT ON THIS REPORT Version 2 VERSION 2 PUBLISHED 18 May 2022 Revised Views 0 Cite How to cite this report: Gonzalez-Kozlova E. Reviewer Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.133689.r287362 ) The direct URL for this report is: https://f1000research.com/articles/10-830/v2#referee-response-287362 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 12 Jun 2024 Edgar Gonzalez-Kozlova , Icahn School of Medicine at Mount Sinai, New York, NY, USA Not Approved VIEWS 0 https://doi.org/10.5256/f1000research.133689.r287362 Dear authors, Fantastic job preparing a package to facilitate data retrieval and analysis. I would like to see a few additions that can only strengthen the article. > Are sufficient details ... Continue reading READ ALL Dear authors, Fantastic job preparing a package to facilitate data retrieval and analysis. I would like to see a few additions that can only strengthen the article. > Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others? Prepare make a vignette available showcasing exactly how you intended the package to be used. While the article describes well the study and package, packages without a vignette are disregarded. > Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool? There is no mention of long covid in the article. Long term effects of COVID19 cant be ignored. Please include a discussion and/or status of long covid patients in the article. Are the conclusions about the tool and its performance adequately supported by the findings presented in the article? A statistics section is missing from the methods section. Every test used in the article should be clearly described and justified in methods. Is the rationale for developing the new software tool clearly explained? Yes Is the description of the software tool technically sound? Yes Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others? Partly Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool? Partly Are the conclusions about the tool and its performance adequately supported by the findings presented in the article? Partly Competing Interests: No competing interests were disclosed. Reviewer Expertise: Computational Biology I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Gonzalez-Kozlova E. Reviewer Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.133689.r287362 ) The direct URL for this report is: https://f1000research.com/articles/10-830/v2#referee-response-287362 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 26 Jul 2024 Longfei Wang , Department of Medical Biology, The University of Melbourne, Parkville, 3010, Australia 26 Jul 2024 Author Response ------------------ Reviewer Comment: 1. Prepare make a vignette available showcasing exactly how you intended the package to be used. While the article describes well the study and package, packages without a ... Continue reading ------------------ Reviewer Comment: 1. Prepare make a vignette available showcasing exactly how you intended the package to be used. While the article describes well the study and package, packages without a vignette are disregarded. Author Response: We appreciate your suggestion. We have created a vignette and updated the UKB.COVID R package on CRAN ( https://cran.r-project.org/web/packages/UKB.COVID19/vignettes/Introduction_to_UKB_COVID19.html ). ------------------ Reviewer Comment: 2. There is no mention of long covid in the article. Long term effects of COVID19 cant be ignored. Please include a discussion and/or status of long covid patients in the article. Author Response: Thank you for your suggestion. We have added a discussion of long COVID and provided relevant functions in UKB.COVID19. Long COVID, also known as post-acute sequelae of SARS-CoV-2 infection, refers to a range of symptoms that persist for weeks or months after the acute phase of COVID-19 has resolved. These symptoms can include fatigue, shortness of breath, cognitive dysfunction, and various other systemic issues, significantly impacting the quality of life of affected individuals. The UKB.COVID19 package provides multiple functions to facilitate long COVID analysis. For instance, the ‘comorbidity_summary’ and ‘comorbidity_asso’ functions can be used to summarise potential long COVID symptoms and assess their associations with risk factors, such as age, sex and certain pre-existing conditions. Furthermore, researchers can focus on subsets of participants reporting persistent symptoms consistent with long COVID to investigate genetic risk factors using GWAS. These analyses hold promise for uncovering the biological underpinnings of long COVID and identifying potential therapeutic targets to alleviate its impact. ------------------ Reviewer Comment: 3. A statistics section is missing from the methods section. Every test used in the article should be clearly described and justified in methods. Author Response: We added a statistics section in the methods section. Statistical analysis To assess the associations between non-genetic risk factors and COVID-19 phenotypes (including susceptibility, severity, and mortality), we employed multivariable logistic regression models using the ‘glm’ function from the R package stats. Each model adjusted for covariates such as age, sex, and BMI. The tested risk factors included socioeconomic status (SES), smoking status, blood type, ethnic background, and residence in aged care facilities. The logistic regression model for each risk factor was specified as follows: logit(COVID-19 phenotype) ~ risk factor + age + sex + BMI. Comorbidity associations were analyzed using similar multivariable logistic regression models, with COVID-19 phenotypes modeled as: logit(COVID-19 phenotype) ~ comorbidity category + age + sex + BMI + SES + smoking status + aged care status. Odds ratios (ORs) with 95% confidence intervals (CIs) were reported, and p-values were calculated to determine the significance of the associations. To identify genetic variants associated with COVID-19 phenotypes, we performed GWASs using the SAIGE software. Principal component analysis (PCA) was performed to account for population stratification, and the first 20 principal components (PCs) were included as covariates in the analysis. Additionally, we adjusted for age, sex, BMI, SES, smoking status, residence in aged care facilities and genotypic array in the regression models. The association between each SNP and the phenotypes was tested using a logistic regression model, as follows: logit(COVID-19 phenotype) ~ SNP + age + sex + BMI + SES + smoking status + aged care status + genotypic array + PC1-20. To account for multiple testing, the Bonferroni correction was applied. Loci reaching the genome-wide significance threshold (p < 5x10 -8 ) were considered significant. Manhattan plots and quantile-quantile (QQ) plots were generated to visualize the results using R package ggplot2. All analyses were carried out using R (version 4.0.5). ------------------ ------------------ Reviewer Comment: 1. Prepare make a vignette available showcasing exactly how you intended the package to be used. While the article describes well the study and package, packages without a vignette are disregarded. Author Response: We appreciate your suggestion. We have created a vignette and updated the UKB.COVID R package on CRAN ( https://cran.r-project.org/web/packages/UKB.COVID19/vignettes/Introduction_to_UKB_COVID19.html ). ------------------ Reviewer Comment: 2. There is no mention of long covid in the article. Long term effects of COVID19 cant be ignored. Please include a discussion and/or status of long covid patients in the article. Author Response: Thank you for your suggestion. We have added a discussion of long COVID and provided relevant functions in UKB.COVID19. Long COVID, also known as post-acute sequelae of SARS-CoV-2 infection, refers to a range of symptoms that persist for weeks or months after the acute phase of COVID-19 has resolved. These symptoms can include fatigue, shortness of breath, cognitive dysfunction, and various other systemic issues, significantly impacting the quality of life of affected individuals. The UKB.COVID19 package provides multiple functions to facilitate long COVID analysis. For instance, the ‘comorbidity_summary’ and ‘comorbidity_asso’ functions can be used to summarise potential long COVID symptoms and assess their associations with risk factors, such as age, sex and certain pre-existing conditions. Furthermore, researchers can focus on subsets of participants reporting persistent symptoms consistent with long COVID to investigate genetic risk factors using GWAS. These analyses hold promise for uncovering the biological underpinnings of long COVID and identifying potential therapeutic targets to alleviate its impact. ------------------ Reviewer Comment: 3. A statistics section is missing from the methods section. Every test used in the article should be clearly described and justified in methods. Author Response: We added a statistics section in the methods section. Statistical analysis To assess the associations between non-genetic risk factors and COVID-19 phenotypes (including susceptibility, severity, and mortality), we employed multivariable logistic regression models using the ‘glm’ function from the R package stats. Each model adjusted for covariates such as age, sex, and BMI. The tested risk factors included socioeconomic status (SES), smoking status, blood type, ethnic background, and residence in aged care facilities. The logistic regression model for each risk factor was specified as follows: logit(COVID-19 phenotype) ~ risk factor + age + sex + BMI. Comorbidity associations were analyzed using similar multivariable logistic regression models, with COVID-19 phenotypes modeled as: logit(COVID-19 phenotype) ~ comorbidity category + age + sex + BMI + SES + smoking status + aged care status. Odds ratios (ORs) with 95% confidence intervals (CIs) were reported, and p-values were calculated to determine the significance of the associations. To identify genetic variants associated with COVID-19 phenotypes, we performed GWASs using the SAIGE software. Principal component analysis (PCA) was performed to account for population stratification, and the first 20 principal components (PCs) were included as covariates in the analysis. Additionally, we adjusted for age, sex, BMI, SES, smoking status, residence in aged care facilities and genotypic array in the regression models. The association between each SNP and the phenotypes was tested using a logistic regression model, as follows: logit(COVID-19 phenotype) ~ SNP + age + sex + BMI + SES + smoking status + aged care status + genotypic array + PC1-20. To account for multiple testing, the Bonferroni correction was applied. Loci reaching the genome-wide significance threshold (p < 5x10 -8 ) were considered significant. Manhattan plots and quantile-quantile (QQ) plots were generated to visualize the results using R package ggplot2. All analyses were carried out using R (version 4.0.5). ------------------ Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 26 Jul 2024 Longfei Wang , Department of Medical Biology, The University of Melbourne, Parkville, 3010, Australia 26 Jul 2024 Author Response ------------------ Reviewer Comment: 1. Prepare make a vignette available showcasing exactly how you intended the package to be used. While the article describes well the study and package, packages without a ... Continue reading ------------------ Reviewer Comment: 1. Prepare make a vignette available showcasing exactly how you intended the package to be used. While the article describes well the study and package, packages without a vignette are disregarded. Author Response: We appreciate your suggestion. We have created a vignette and updated the UKB.COVID R package on CRAN ( https://cran.r-project.org/web/packages/UKB.COVID19/vignettes/Introduction_to_UKB_COVID19.html ). ------------------ Reviewer Comment: 2. There is no mention of long covid in the article. Long term effects of COVID19 cant be ignored. Please include a discussion and/or status of long covid patients in the article. Author Response: Thank you for your suggestion. We have added a discussion of long COVID and provided relevant functions in UKB.COVID19. Long COVID, also known as post-acute sequelae of SARS-CoV-2 infection, refers to a range of symptoms that persist for weeks or months after the acute phase of COVID-19 has resolved. These symptoms can include fatigue, shortness of breath, cognitive dysfunction, and various other systemic issues, significantly impacting the quality of life of affected individuals. The UKB.COVID19 package provides multiple functions to facilitate long COVID analysis. For instance, the ‘comorbidity_summary’ and ‘comorbidity_asso’ functions can be used to summarise potential long COVID symptoms and assess their associations with risk factors, such as age, sex and certain pre-existing conditions. Furthermore, researchers can focus on subsets of participants reporting persistent symptoms consistent with long COVID to investigate genetic risk factors using GWAS. These analyses hold promise for uncovering the biological underpinnings of long COVID and identifying potential therapeutic targets to alleviate its impact. ------------------ Reviewer Comment: 3. A statistics section is missing from the methods section. Every test used in the article should be clearly described and justified in methods. Author Response: We added a statistics section in the methods section. Statistical analysis To assess the associations between non-genetic risk factors and COVID-19 phenotypes (including susceptibility, severity, and mortality), we employed multivariable logistic regression models using the ‘glm’ function from the R package stats. Each model adjusted for covariates such as age, sex, and BMI. The tested risk factors included socioeconomic status (SES), smoking status, blood type, ethnic background, and residence in aged care facilities. The logistic regression model for each risk factor was specified as follows: logit(COVID-19 phenotype) ~ risk factor + age + sex + BMI. Comorbidity associations were analyzed using similar multivariable logistic regression models, with COVID-19 phenotypes modeled as: logit(COVID-19 phenotype) ~ comorbidity category + age + sex + BMI + SES + smoking status + aged care status. Odds ratios (ORs) with 95% confidence intervals (CIs) were reported, and p-values were calculated to determine the significance of the associations. To identify genetic variants associated with COVID-19 phenotypes, we performed GWASs using the SAIGE software. Principal component analysis (PCA) was performed to account for population stratification, and the first 20 principal components (PCs) were included as covariates in the analysis. Additionally, we adjusted for age, sex, BMI, SES, smoking status, residence in aged care facilities and genotypic array in the regression models. The association between each SNP and the phenotypes was tested using a logistic regression model, as follows: logit(COVID-19 phenotype) ~ SNP + age + sex + BMI + SES + smoking status + aged care status + genotypic array + PC1-20. To account for multiple testing, the Bonferroni correction was applied. Loci reaching the genome-wide significance threshold (p < 5x10 -8 ) were considered significant. Manhattan plots and quantile-quantile (QQ) plots were generated to visualize the results using R package ggplot2. All analyses were carried out using R (version 4.0.5). ------------------ ------------------ Reviewer Comment: 1. Prepare make a vignette available showcasing exactly how you intended the package to be used. While the article describes well the study and package, packages without a vignette are disregarded. Author Response: We appreciate your suggestion. We have created a vignette and updated the UKB.COVID R package on CRAN ( https://cran.r-project.org/web/packages/UKB.COVID19/vignettes/Introduction_to_UKB_COVID19.html ). ------------------ Reviewer Comment: 2. There is no mention of long covid in the article. Long term effects of COVID19 cant be ignored. Please include a discussion and/or status of long covid patients in the article. Author Response: Thank you for your suggestion. We have added a discussion of long COVID and provided relevant functions in UKB.COVID19. Long COVID, also known as post-acute sequelae of SARS-CoV-2 infection, refers to a range of symptoms that persist for weeks or months after the acute phase of COVID-19 has resolved. These symptoms can include fatigue, shortness of breath, cognitive dysfunction, and various other systemic issues, significantly impacting the quality of life of affected individuals. The UKB.COVID19 package provides multiple functions to facilitate long COVID analysis. For instance, the ‘comorbidity_summary’ and ‘comorbidity_asso’ functions can be used to summarise potential long COVID symptoms and assess their associations with risk factors, such as age, sex and certain pre-existing conditions. Furthermore, researchers can focus on subsets of participants reporting persistent symptoms consistent with long COVID to investigate genetic risk factors using GWAS. These analyses hold promise for uncovering the biological underpinnings of long COVID and identifying potential therapeutic targets to alleviate its impact. ------------------ Reviewer Comment: 3. A statistics section is missing from the methods section. Every test used in the article should be clearly described and justified in methods. Author Response: We added a statistics section in the methods section. Statistical analysis To assess the associations between non-genetic risk factors and COVID-19 phenotypes (including susceptibility, severity, and mortality), we employed multivariable logistic regression models using the ‘glm’ function from the R package stats. Each model adjusted for covariates such as age, sex, and BMI. The tested risk factors included socioeconomic status (SES), smoking status, blood type, ethnic background, and residence in aged care facilities. The logistic regression model for each risk factor was specified as follows: logit(COVID-19 phenotype) ~ risk factor + age + sex + BMI. Comorbidity associations were analyzed using similar multivariable logistic regression models, with COVID-19 phenotypes modeled as: logit(COVID-19 phenotype) ~ comorbidity category + age + sex + BMI + SES + smoking status + aged care status. Odds ratios (ORs) with 95% confidence intervals (CIs) were reported, and p-values were calculated to determine the significance of the associations. To identify genetic variants associated with COVID-19 phenotypes, we performed GWASs using the SAIGE software. Principal component analysis (PCA) was performed to account for population stratification, and the first 20 principal components (PCs) were included as covariates in the analysis. Additionally, we adjusted for age, sex, BMI, SES, smoking status, residence in aged care facilities and genotypic array in the regression models. The association between each SNP and the phenotypes was tested using a logistic regression model, as follows: logit(COVID-19 phenotype) ~ SNP + age + sex + BMI + SES + smoking status + aged care status + genotypic array + PC1-20. To account for multiple testing, the Bonferroni correction was applied. Loci reaching the genome-wide significance threshold (p < 5x10 -8 ) were considered significant. Manhattan plots and quantile-quantile (QQ) plots were generated to visualize the results using R package ggplot2. All analyses were carried out using R (version 4.0.5). ------------------ Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Version 1 VERSION 1 PUBLISHED 19 Aug 2021 Views 0 Cite How to cite this report: Valeria V and De Silvestri A. Reviewer Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.58938.r126903 ) The direct URL for this report is: https://f1000research.com/articles/10-830/v1#referee-response-126903 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 22 Apr 2022 Virginia Valeria , Servizio di Epidemiologia Clinica e Biostatistica Direzione Scientifica, Fondazione IRCCS Policlinico san Matteo, Pavia, Italy Annalisa De Silvestri , Scientific Direction, IRCCS Policlinico San Matteo Foundation, Pavia, Italy Approved VIEWS 0 https://doi.org/10.5256/f1000research.58938.r126903 Authors developed a potentially useful R-package tool to analyze data from the UKBB COVID-19 database, which summarises COVID-19 test results, and performs association tests between COVID-19 susceptibility/severity and potential risk factors such as age, sex, blood type, comorbidities and generates ... Continue reading READ ALL Authors developed a potentially useful R-package tool to analyze data from the UKBB COVID-19 database, which summarises COVID-19 test results, and performs association tests between COVID-19 susceptibility/severity and potential risk factors such as age, sex, blood type, comorbidities and generates input files for GWAS. The rationale is well explained, sufficient details of the code, methods, and analysis are provided, outputs are well described and conclusions are sound and appropriate. However, some minor points should be considered: It is not clear how comorbidities are retrieved, classified (at which level of ICD-10), and analysed Authors should discuss how they choose to classify severity (the distinction between critical care and advanced critical care for example) and why they choose to include all Covid patients (for example severity 2-3 vs 0-1 instead of severity 2-3 vs 1). Why not consider it as an ordinal variable? Authors should specify if they consider mortality due to Covid or with Covid Is the rationale for developing the new software tool clearly explained? Yes Is the description of the software tool technically sound? Partly Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others? Yes Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool? Yes Are the conclusions about the tool and its performance adequately supported by the findings presented in the article? Yes Competing Interests: No competing interests were disclosed. Reviewer Expertise: biostatistics We confirm that we have read this submission and believe that we have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Valeria V and De Silvestri A. Reviewer Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.58938.r126903 ) The direct URL for this report is: https://f1000research.com/articles/10-830/v1#referee-response-126903 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 18 May 2022 Longfei Wang , Population Health and Immunity Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, 3052, Australia 18 May 2022 Author Response 1. It is not clear how comorbidities are retrieved, classified (at which level of ICD-10), and analysed. We added a new table (Table 4) to show how the comorbidities ... Continue reading 1. It is not clear how comorbidities are retrieved, classified (at which level of ICD-10), and analysed. We added a new table (Table 4) to show how the comorbidities are classified. We modified the following sentences: Comorbidity categories are generated using the block categories in the ICD10 code, which is shown in the second column in Table 4. We include ICD10 chapters 1–14 and 17 and exclude several chapters such as pregnancy, childbirth, and consequences of external causes etc. For instance, the first category is “A00-A09”, representing intestinal infectious diseases. During a period restricted by the start and end dates, cases are defined as any participants who were diagnosed as any subclasses under the block A00-A09 in the hospital inpatient diagnosis data. In this way, 164 binary variables are generated and each of them represents a comorbidity category. Therefore, we can test the association between each comorbidity category and the selected COVID-19 phenotype using logistic regression models. We modified the sentence as follows: The comorbidity.asso function performs association tests between each comorbidity category and the selected phenotype using logistic regression models and adjusts the tested phenotype with covariates, which can be set using the argument “cov.name”. 2. Authors should discuss how they choose to classify severity (the distinction between critical care and advanced critical care for example) and why they choose to include all Covid patients (for example severity 2-3 vs 0-1 instead of severity 2-3 vs 1). Why not consider it as an ordinal variable? Thanks for the suggestion. We re-wrote the following paragraph: Based on the World Health Organization (WHO) ordinal scale for clinical improvement , we classify severity into four levels. These levels are defined as 1) hospitalisation: individuals admitted to hospital with their primary diagnosis recorded as COVID-19. 2) critical care level 2: individuals required basic treatment in a critical care unit, such as non-invasive ventilation and continuous positive airway pressure, and with their primary diagnosis recorded as COVID-19. 3) critical care level 3: individuals required advanced treatment in a critical care unit, such as invasive ventilation and temporary tracheostomy, and with their primary diagnosis recorded as COVID-19. 4) mortality: individuals died due to COVID-19. The critical care information was summarised from the HESIN_CRITICAL table and the HESIN_OPER table. The critical care level 2 cases are the COVID-19 patients who required at least one “Critical care level 2 days” in the HESIN_CRIRICAL table or received basic respiratory support, such as, E85.2 non-invasive ventilation NEC, in the HESIN_OPER table. The critical care level 3 cases are defined as the COVID-19 patients who required at least one “Critical care level 3 days” in the HESIN_CRIRICAL table or received advanced respiratory support, such as, E85.1 invasive ventilation, in the HESIN_OPER table. The commonly used GWAS tools, such as SAIGE and PLINK, do not support ordinal categorical phenotypes. Therefore, we converted this ordinal variable into four binary variables named “hospitalisation”, “critical care”, “advanced critical care” and “mortality” (Table 2). However, users can get the ordinal variable by simply summing the four binary variables. We assume that participants who were tested COVID-19 positive but did not admit to hospital had no or mild symptoms and hence classified them as controls in severity phenotypes. 3. Authors should specify if they consider mortality due to Covid or with Covid Sorry for the unclearness. We defined the mortality case as mortality due to Covid. In the article, we wrote: For mortality, we include all individuals who received at least one positive test result and define those whose primary cause of death is recorded as being due to COVID-19 as cases. To make it clearer, we corrected the definition of mortality in Table 2 from “1 = death with COVID-19” to “1 = death due to COVID-19”. 1. It is not clear how comorbidities are retrieved, classified (at which level of ICD-10), and analysed. We added a new table (Table 4) to show how the comorbidities are classified. We modified the following sentences: Comorbidity categories are generated using the block categories in the ICD10 code, which is shown in the second column in Table 4. We include ICD10 chapters 1–14 and 17 and exclude several chapters such as pregnancy, childbirth, and consequences of external causes etc. For instance, the first category is “A00-A09”, representing intestinal infectious diseases. During a period restricted by the start and end dates, cases are defined as any participants who were diagnosed as any subclasses under the block A00-A09 in the hospital inpatient diagnosis data. In this way, 164 binary variables are generated and each of them represents a comorbidity category. Therefore, we can test the association between each comorbidity category and the selected COVID-19 phenotype using logistic regression models. We modified the sentence as follows: The comorbidity.asso function performs association tests between each comorbidity category and the selected phenotype using logistic regression models and adjusts the tested phenotype with covariates, which can be set using the argument “cov.name”. 2. Authors should discuss how they choose to classify severity (the distinction between critical care and advanced critical care for example) and why they choose to include all Covid patients (for example severity 2-3 vs 0-1 instead of severity 2-3 vs 1). Why not consider it as an ordinal variable? Thanks for the suggestion. We re-wrote the following paragraph: Based on the World Health Organization (WHO) ordinal scale for clinical improvement , we classify severity into four levels. These levels are defined as 1) hospitalisation: individuals admitted to hospital with their primary diagnosis recorded as COVID-19. 2) critical care level 2: individuals required basic treatment in a critical care unit, such as non-invasive ventilation and continuous positive airway pressure, and with their primary diagnosis recorded as COVID-19. 3) critical care level 3: individuals required advanced treatment in a critical care unit, such as invasive ventilation and temporary tracheostomy, and with their primary diagnosis recorded as COVID-19. 4) mortality: individuals died due to COVID-19. The critical care information was summarised from the HESIN_CRITICAL table and the HESIN_OPER table. The critical care level 2 cases are the COVID-19 patients who required at least one “Critical care level 2 days” in the HESIN_CRIRICAL table or received basic respiratory support, such as, E85.2 non-invasive ventilation NEC, in the HESIN_OPER table. The critical care level 3 cases are defined as the COVID-19 patients who required at least one “Critical care level 3 days” in the HESIN_CRIRICAL table or received advanced respiratory support, such as, E85.1 invasive ventilation, in the HESIN_OPER table. The commonly used GWAS tools, such as SAIGE and PLINK, do not support ordinal categorical phenotypes. Therefore, we converted this ordinal variable into four binary variables named “hospitalisation”, “critical care”, “advanced critical care” and “mortality” (Table 2). However, users can get the ordinal variable by simply summing the four binary variables. We assume that participants who were tested COVID-19 positive but did not admit to hospital had no or mild symptoms and hence classified them as controls in severity phenotypes. 3. Authors should specify if they consider mortality due to Covid or with Covid Sorry for the unclearness. We defined the mortality case as mortality due to Covid. In the article, we wrote: For mortality, we include all individuals who received at least one positive test result and define those whose primary cause of death is recorded as being due to COVID-19 as cases. To make it clearer, we corrected the definition of mortality in Table 2 from “1 = death with COVID-19” to “1 = death due to COVID-19”. Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 18 May 2022 Longfei Wang , Population Health and Immunity Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, 3052, Australia 18 May 2022 Author Response 1. It is not clear how comorbidities are retrieved, classified (at which level of ICD-10), and analysed. We added a new table (Table 4) to show how the comorbidities ... Continue reading 1. It is not clear how comorbidities are retrieved, classified (at which level of ICD-10), and analysed. We added a new table (Table 4) to show how the comorbidities are classified. We modified the following sentences: Comorbidity categories are generated using the block categories in the ICD10 code, which is shown in the second column in Table 4. We include ICD10 chapters 1–14 and 17 and exclude several chapters such as pregnancy, childbirth, and consequences of external causes etc. For instance, the first category is “A00-A09”, representing intestinal infectious diseases. During a period restricted by the start and end dates, cases are defined as any participants who were diagnosed as any subclasses under the block A00-A09 in the hospital inpatient diagnosis data. In this way, 164 binary variables are generated and each of them represents a comorbidity category. Therefore, we can test the association between each comorbidity category and the selected COVID-19 phenotype using logistic regression models. We modified the sentence as follows: The comorbidity.asso function performs association tests between each comorbidity category and the selected phenotype using logistic regression models and adjusts the tested phenotype with covariates, which can be set using the argument “cov.name”. 2. Authors should discuss how they choose to classify severity (the distinction between critical care and advanced critical care for example) and why they choose to include all Covid patients (for example severity 2-3 vs 0-1 instead of severity 2-3 vs 1). Why not consider it as an ordinal variable? Thanks for the suggestion. We re-wrote the following paragraph: Based on the World Health Organization (WHO) ordinal scale for clinical improvement , we classify severity into four levels. These levels are defined as 1) hospitalisation: individuals admitted to hospital with their primary diagnosis recorded as COVID-19. 2) critical care level 2: individuals required basic treatment in a critical care unit, such as non-invasive ventilation and continuous positive airway pressure, and with their primary diagnosis recorded as COVID-19. 3) critical care level 3: individuals required advanced treatment in a critical care unit, such as invasive ventilation and temporary tracheostomy, and with their primary diagnosis recorded as COVID-19. 4) mortality: individuals died due to COVID-19. The critical care information was summarised from the HESIN_CRITICAL table and the HESIN_OPER table. The critical care level 2 cases are the COVID-19 patients who required at least one “Critical care level 2 days” in the HESIN_CRIRICAL table or received basic respiratory support, such as, E85.2 non-invasive ventilation NEC, in the HESIN_OPER table. The critical care level 3 cases are defined as the COVID-19 patients who required at least one “Critical care level 3 days” in the HESIN_CRIRICAL table or received advanced respiratory support, such as, E85.1 invasive ventilation, in the HESIN_OPER table. The commonly used GWAS tools, such as SAIGE and PLINK, do not support ordinal categorical phenotypes. Therefore, we converted this ordinal variable into four binary variables named “hospitalisation”, “critical care”, “advanced critical care” and “mortality” (Table 2). However, users can get the ordinal variable by simply summing the four binary variables. We assume that participants who were tested COVID-19 positive but did not admit to hospital had no or mild symptoms and hence classified them as controls in severity phenotypes. 3. Authors should specify if they consider mortality due to Covid or with Covid Sorry for the unclearness. We defined the mortality case as mortality due to Covid. In the article, we wrote: For mortality, we include all individuals who received at least one positive test result and define those whose primary cause of death is recorded as being due to COVID-19 as cases. To make it clearer, we corrected the definition of mortality in Table 2 from “1 = death with COVID-19” to “1 = death due to COVID-19”. 1. It is not clear how comorbidities are retrieved, classified (at which level of ICD-10), and analysed. We added a new table (Table 4) to show how the comorbidities are classified. We modified the following sentences: Comorbidity categories are generated using the block categories in the ICD10 code, which is shown in the second column in Table 4. We include ICD10 chapters 1–14 and 17 and exclude several chapters such as pregnancy, childbirth, and consequences of external causes etc. For instance, the first category is “A00-A09”, representing intestinal infectious diseases. During a period restricted by the start and end dates, cases are defined as any participants who were diagnosed as any subclasses under the block A00-A09 in the hospital inpatient diagnosis data. In this way, 164 binary variables are generated and each of them represents a comorbidity category. Therefore, we can test the association between each comorbidity category and the selected COVID-19 phenotype using logistic regression models. We modified the sentence as follows: The comorbidity.asso function performs association tests between each comorbidity category and the selected phenotype using logistic regression models and adjusts the tested phenotype with covariates, which can be set using the argument “cov.name”. 2. Authors should discuss how they choose to classify severity (the distinction between critical care and advanced critical care for example) and why they choose to include all Covid patients (for example severity 2-3 vs 0-1 instead of severity 2-3 vs 1). Why not consider it as an ordinal variable? Thanks for the suggestion. We re-wrote the following paragraph: Based on the World Health Organization (WHO) ordinal scale for clinical improvement , we classify severity into four levels. These levels are defined as 1) hospitalisation: individuals admitted to hospital with their primary diagnosis recorded as COVID-19. 2) critical care level 2: individuals required basic treatment in a critical care unit, such as non-invasive ventilation and continuous positive airway pressure, and with their primary diagnosis recorded as COVID-19. 3) critical care level 3: individuals required advanced treatment in a critical care unit, such as invasive ventilation and temporary tracheostomy, and with their primary diagnosis recorded as COVID-19. 4) mortality: individuals died due to COVID-19. The critical care information was summarised from the HESIN_CRITICAL table and the HESIN_OPER table. The critical care level 2 cases are the COVID-19 patients who required at least one “Critical care level 2 days” in the HESIN_CRIRICAL table or received basic respiratory support, such as, E85.2 non-invasive ventilation NEC, in the HESIN_OPER table. The critical care level 3 cases are defined as the COVID-19 patients who required at least one “Critical care level 3 days” in the HESIN_CRIRICAL table or received advanced respiratory support, such as, E85.1 invasive ventilation, in the HESIN_OPER table. The commonly used GWAS tools, such as SAIGE and PLINK, do not support ordinal categorical phenotypes. Therefore, we converted this ordinal variable into four binary variables named “hospitalisation”, “critical care”, “advanced critical care” and “mortality” (Table 2). However, users can get the ordinal variable by simply summing the four binary variables. We assume that participants who were tested COVID-19 positive but did not admit to hospital had no or mild symptoms and hence classified them as controls in severity phenotypes. 3. Authors should specify if they consider mortality due to Covid or with Covid Sorry for the unclearness. We defined the mortality case as mortality due to Covid. In the article, we wrote: For mortality, we include all individuals who received at least one positive test result and define those whose primary cause of death is recorded as being due to COVID-19 as cases. To make it clearer, we corrected the definition of mortality in Table 2 from “1 = death with COVID-19” to “1 = death due to COVID-19”. Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Palmer TM. Reviewer Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.58938.r100445 ) The direct URL for this report is: https://f1000research.com/articles/10-830/v1#referee-response-100445 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 02 Dec 2021 Thomas Michael Palmer , Population Health Sciences, University of Bristol Medical School, Bristol, UK Not Approved VIEWS 0 https://doi.org/10.5256/f1000research.58938.r100445 Before I review this R package properly there are some basic fixes to the GitHub repository version which require attention. The package has an unusual history. Two versions have been released on CRAN however as I ... Continue reading READ ALL Before I review this R package properly there are some basic fixes to the GitHub repository version which require attention. The package has an unusual history. Two versions have been released on CRAN however as I can see from the website it was “Archived on 2021-10-06 as email to the maintainer was undeliverable”. So I recommend that the authors contact CRAN to get the package unarchived. The CRAN archive shows versions 0.1.0 and 0.1.1, however the GitHub repo shows version 0.1.0 in its DESCRIPTION file. The repo should have the latest version in it. Whilst the versions listed on CRAN 0.1.0 and 0.1.1 must have been CRAN compliant, otherwise they would not have been allowed on CRAN, unfortunately the code in the GitHub is no longer CRAN compliant and a simple running of R CMD check on the code in the repo gives 2 R CMD check errors and 1 note. These R CMD check errors should be fixed and the R CMD check note should also be fixed by adding the relevant entries to the .Rbuildignore file. The script in the tests/testthat folder does not use any of the testthat functions as it should. This should be improved or removed. Personally I find the name of the package unusual, I don’t prefer full-stops/periods in package names. Returned objects from the functions could be defined under one of the R's class systems, e.g., S3. Is the rationale for developing the new software tool clearly explained? Yes Is the description of the software tool technically sound? No Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others? Partly Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool? Partly Are the conclusions about the tool and its performance adequately supported by the findings presented in the article? No Competing Interests: No competing interests were disclosed. Reviewer Expertise: Medical Statistics / Biostatistics I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Palmer TM. Reviewer Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.58938.r100445 ) The direct URL for this report is: https://f1000research.com/articles/10-830/v1#referee-response-100445 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 10 Jan 2022 Longfei Wang , Population Health and Immunity Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, 3052, Australia 10 Jan 2022 Author Response 1.The package has an unusual history. Two versions have been released on CRAN however as I can see from the website it was “Archived on 2021-10-06 as email to the ... Continue reading 1.The package has an unusual history. Two versions have been released on CRAN however as I can see from the website it was “Archived on 2021-10-06 as email to the maintainer was undeliverable”. So I recommend that the authors contact CRAN to get the package unarchived. My apologies that I was not aware that the package had been archived. I have contacted the CRAN team. They replied that a CRAN team member tried to contact me and the email has got a bounced message notification. However, my email address is correct and has not been changed since I submitted the package. I have resubmitted the package with an increased version number and with minor changes according to your suggestions. It has been unarchived ( https://cran.r-project.org/web/packages/UKB.COVID19/index.html ). 2. The CRAN archive shows versions 0.1.0 and 0.1.1, however the GitHub repo shows version 0.1.0 in its DESCRIPTION file. The repo should have the latest version in it. My apologies that the package on GitHub was out-of-date. I have updated the latest version in GitHub. 3. Whilst the versions listed on CRAN 0.1.0 and 0.1.1 must have been CRAN compliant, otherwise they would not have been allowed on CRAN, unfortunately the code in the GitHub is no longer CRAN compliant and a simple running of R CMD check on the code in the repo gives 2 R CMD check errors and 1 note. These R CMD check errors should be fixed and the R CMD check note should also be fixed by adding the relevant entries to the .Rbuildignore file. I have updated the latest version in GitHub and double checked it with R CMD check. There’s no errors, warnings, or notes from the R CMD check now. 4. The script in the tests/testthat folder does not use any of the testthat functions as it should. This should be improved or removed. Thanks for your suggestion. I have improved the scripts in the tests/testthat folder with proper testthat functions. 5. Personally I find the name of the package unusual, I don’t prefer full-stops/periods in package names. Thanks for your suggestion. The package has been on CRAN for a while. People may have included the package in their scripts. These scripts will break if I change the name of the package. And it may be hard for everyone to find the renamed package. So I decided to keep the name and will definitely use proper names for the packages I build in the future. 6. Returned objects from the functions could be defined under one of the R's class systems, e.g., S3. Thanks for your suggestion. I have defined the returned objects under the S3 class system. 1.The package has an unusual history. Two versions have been released on CRAN however as I can see from the website it was “Archived on 2021-10-06 as email to the maintainer was undeliverable”. So I recommend that the authors contact CRAN to get the package unarchived. My apologies that I was not aware that the package had been archived. I have contacted the CRAN team. They replied that a CRAN team member tried to contact me and the email has got a bounced message notification. However, my email address is correct and has not been changed since I submitted the package. I have resubmitted the package with an increased version number and with minor changes according to your suggestions. It has been unarchived ( https://cran.r-project.org/web/packages/UKB.COVID19/index.html ). 2. The CRAN archive shows versions 0.1.0 and 0.1.1, however the GitHub repo shows version 0.1.0 in its DESCRIPTION file. The repo should have the latest version in it. My apologies that the package on GitHub was out-of-date. I have updated the latest version in GitHub. 3. Whilst the versions listed on CRAN 0.1.0 and 0.1.1 must have been CRAN compliant, otherwise they would not have been allowed on CRAN, unfortunately the code in the GitHub is no longer CRAN compliant and a simple running of R CMD check on the code in the repo gives 2 R CMD check errors and 1 note. These R CMD check errors should be fixed and the R CMD check note should also be fixed by adding the relevant entries to the .Rbuildignore file. I have updated the latest version in GitHub and double checked it with R CMD check. There’s no errors, warnings, or notes from the R CMD check now. 4. The script in the tests/testthat folder does not use any of the testthat functions as it should. This should be improved or removed. Thanks for your suggestion. I have improved the scripts in the tests/testthat folder with proper testthat functions. 5. Personally I find the name of the package unusual, I don’t prefer full-stops/periods in package names. Thanks for your suggestion. The package has been on CRAN for a while. People may have included the package in their scripts. These scripts will break if I change the name of the package. And it may be hard for everyone to find the renamed package. So I decided to keep the name and will definitely use proper names for the packages I build in the future. 6. Returned objects from the functions could be defined under one of the R's class systems, e.g., S3. Thanks for your suggestion. I have defined the returned objects under the S3 class system. Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 10 Jan 2022 Longfei Wang , Population Health and Immunity Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, 3052, Australia 10 Jan 2022 Author Response 1.The package has an unusual history. Two versions have been released on CRAN however as I can see from the website it was “Archived on 2021-10-06 as email to the ... Continue reading 1.The package has an unusual history. Two versions have been released on CRAN however as I can see from the website it was “Archived on 2021-10-06 as email to the maintainer was undeliverable”. So I recommend that the authors contact CRAN to get the package unarchived. My apologies that I was not aware that the package had been archived. I have contacted the CRAN team. They replied that a CRAN team member tried to contact me and the email has got a bounced message notification. However, my email address is correct and has not been changed since I submitted the package. I have resubmitted the package with an increased version number and with minor changes according to your suggestions. It has been unarchived ( https://cran.r-project.org/web/packages/UKB.COVID19/index.html ). 2. The CRAN archive shows versions 0.1.0 and 0.1.1, however the GitHub repo shows version 0.1.0 in its DESCRIPTION file. The repo should have the latest version in it. My apologies that the package on GitHub was out-of-date. I have updated the latest version in GitHub. 3. Whilst the versions listed on CRAN 0.1.0 and 0.1.1 must have been CRAN compliant, otherwise they would not have been allowed on CRAN, unfortunately the code in the GitHub is no longer CRAN compliant and a simple running of R CMD check on the code in the repo gives 2 R CMD check errors and 1 note. These R CMD check errors should be fixed and the R CMD check note should also be fixed by adding the relevant entries to the .Rbuildignore file. I have updated the latest version in GitHub and double checked it with R CMD check. There’s no errors, warnings, or notes from the R CMD check now. 4. The script in the tests/testthat folder does not use any of the testthat functions as it should. This should be improved or removed. Thanks for your suggestion. I have improved the scripts in the tests/testthat folder with proper testthat functions. 5. Personally I find the name of the package unusual, I don’t prefer full-stops/periods in package names. Thanks for your suggestion. The package has been on CRAN for a while. People may have included the package in their scripts. These scripts will break if I change the name of the package. And it may be hard for everyone to find the renamed package. So I decided to keep the name and will definitely use proper names for the packages I build in the future. 6. Returned objects from the functions could be defined under one of the R's class systems, e.g., S3. Thanks for your suggestion. I have defined the returned objects under the S3 class system. 1.The package has an unusual history. Two versions have been released on CRAN however as I can see from the website it was “Archived on 2021-10-06 as email to the maintainer was undeliverable”. So I recommend that the authors contact CRAN to get the package unarchived. My apologies that I was not aware that the package had been archived. I have contacted the CRAN team. They replied that a CRAN team member tried to contact me and the email has got a bounced message notification. However, my email address is correct and has not been changed since I submitted the package. I have resubmitted the package with an increased version number and with minor changes according to your suggestions. It has been unarchived ( https://cran.r-project.org/web/packages/UKB.COVID19/index.html ). 2. The CRAN archive shows versions 0.1.0 and 0.1.1, however the GitHub repo shows version 0.1.0 in its DESCRIPTION file. The repo should have the latest version in it. My apologies that the package on GitHub was out-of-date. I have updated the latest version in GitHub. 3. Whilst the versions listed on CRAN 0.1.0 and 0.1.1 must have been CRAN compliant, otherwise they would not have been allowed on CRAN, unfortunately the code in the GitHub is no longer CRAN compliant and a simple running of R CMD check on the code in the repo gives 2 R CMD check errors and 1 note. These R CMD check errors should be fixed and the R CMD check note should also be fixed by adding the relevant entries to the .Rbuildignore file. I have updated the latest version in GitHub and double checked it with R CMD check. There’s no errors, warnings, or notes from the R CMD check now. 4. The script in the tests/testthat folder does not use any of the testthat functions as it should. This should be improved or removed. Thanks for your suggestion. I have improved the scripts in the tests/testthat folder with proper testthat functions. 5. Personally I find the name of the package unusual, I don’t prefer full-stops/periods in package names. Thanks for your suggestion. The package has been on CRAN for a while. People may have included the package in their scripts. These scripts will break if I change the name of the package. And it may be hard for everyone to find the renamed package. So I decided to keep the name and will definitely use proper names for the packages I build in the future. 6. Returned objects from the functions could be defined under one of the R's class systems, e.g., S3. Thanks for your suggestion. I have defined the returned objects under the S3 class system. Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Comments on this article Comments (0) Version 3 VERSION 3 PUBLISHED 19 Aug 2021 ADD YOUR COMMENT Comment keyboard_arrow_left keyboard_arrow_right Open Peer Review Reviewer Status info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Reviewer Reports Invited Reviewers 1 2 3 Version 3 (revision) 26 Jul 24 read Version 2 (revision) 18 May 22 read Version 1 19 Aug 21 read read Thomas Michael Palmer , University of Bristol Medical School, Bristol, UK Virginia Valeria , Fondazione IRCCS Policlinico san Matteo, Pavia, Italy Annalisa De Silvestri , IRCCS Policlinico San Matteo Foundation, Pavia, Italy Edgar Gonzalez-Kozlova , Icahn School of Medicine at Mount Sinai, New York, USA Comments on this article All Comments (0) Add a comment Sign up for content alerts Sign Up You are now signed up to receive this alert Browse by related subjects keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2024 Palmer T. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 26 Aug 2024 | for Version 3 Thomas Michael Palmer , Population Health Sciences, University of Bristol Medical School, Bristol, UK 0 Views copyright © 2024 Palmer T. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (0) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions I thank the authors for their response and the amendments they have made to the package. The package is now back on CRAN. Therefore it again successfully passes R CMD check. So all of my previous comments have essentially been addressed. My only remaining minor comments are In the vignette I don't think the line: library(here) is required In the vignette you might want to set warning=FALSE on a few code chunks because several chunks generate quite alot of warnings which are distracting when reading it https://cran.r-project.org/web/packages/UKB.COVID19/vignettes/Introduction_to_UKB_COVID19.html Competing Interests No competing interests were disclosed. Reviewer Expertise Medical statistics, biostatistics, statistics, R programming. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (0) Palmer TM. Peer Review Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.169531.r307380) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/10-830/v3#referee-response-307380 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2024 Gonzalez-Kozlova E. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 12 Jun 2024 | for Version 2 Edgar Gonzalez-Kozlova , Icahn School of Medicine at Mount Sinai, New York, NY, USA 0 Views copyright © 2024 Gonzalez-Kozlova E. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Not Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Dear authors, Fantastic job preparing a package to facilitate data retrieval and analysis. I would like to see a few additions that can only strengthen the article. > Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others? Prepare make a vignette available showcasing exactly how you intended the package to be used. While the article describes well the study and package, packages without a vignette are disregarded. > Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool? There is no mention of long covid in the article. Long term effects of COVID19 cant be ignored. Please include a discussion and/or status of long covid patients in the article. Are the conclusions about the tool and its performance adequately supported by the findings presented in the article? A statistics section is missing from the methods section. Every test used in the article should be clearly described and justified in methods. Is the rationale for developing the new software tool clearly explained? Yes Is the description of the software tool technically sound? Yes Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others? Partly Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool? Partly Are the conclusions about the tool and its performance adequately supported by the findings presented in the article? Partly Competing Interests No competing interests were disclosed. Reviewer Expertise Computational Biology I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above. reply Respond to this report Responses (1) Author Response 26 Jul 2024 Longfei Wang, Department of Medical Biology, The University of Melbourne, Parkville, 3010, Australia ------------------ Reviewer Comment: 1. Prepare make a vignette available showcasing exactly how you intended the package to be used. While the article describes well the study and package, packages without a vignette are disregarded. Author Response: We appreciate your suggestion. We have created a vignette and updated the UKB.COVID R package on CRAN ( https://cran.r-project.org/web/packages/UKB.COVID19/vignettes/Introduction_to_UKB_COVID19.html ). ------------------ Reviewer Comment: 2. There is no mention of long covid in the article. Long term effects of COVID19 cant be ignored. Please include a discussion and/or status of long covid patients in the article. Author Response: Thank you for your suggestion. We have added a discussion of long COVID and provided relevant functions in UKB.COVID19. Long COVID, also known as post-acute sequelae of SARS-CoV-2 infection, refers to a range of symptoms that persist for weeks or months after the acute phase of COVID-19 has resolved. These symptoms can include fatigue, shortness of breath, cognitive dysfunction, and various other systemic issues, significantly impacting the quality of life of affected individuals. The UKB.COVID19 package provides multiple functions to facilitate long COVID analysis. For instance, the ‘comorbidity_summary’ and ‘comorbidity_asso’ functions can be used to summarise potential long COVID symptoms and assess their associations with risk factors, such as age, sex and certain pre-existing conditions. Furthermore, researchers can focus on subsets of participants reporting persistent symptoms consistent with long COVID to investigate genetic risk factors using GWAS. These analyses hold promise for uncovering the biological underpinnings of long COVID and identifying potential therapeutic targets to alleviate its impact. ------------------ Reviewer Comment: 3. A statistics section is missing from the methods section. Every test used in the article should be clearly described and justified in methods. Author Response: We added a statistics section in the methods section. Statistical analysis To assess the associations between non-genetic risk factors and COVID-19 phenotypes (including susceptibility, severity, and mortality), we employed multivariable logistic regression models using the ‘glm’ function from the R package stats. Each model adjusted for covariates such as age, sex, and BMI. The tested risk factors included socioeconomic status (SES), smoking status, blood type, ethnic background, and residence in aged care facilities. The logistic regression model for each risk factor was specified as follows: logit(COVID-19 phenotype) ~ risk factor + age + sex + BMI. Comorbidity associations were analyzed using similar multivariable logistic regression models, with COVID-19 phenotypes modeled as: logit(COVID-19 phenotype) ~ comorbidity category + age + sex + BMI + SES + smoking status + aged care status. Odds ratios (ORs) with 95% confidence intervals (CIs) were reported, and p-values were calculated to determine the significance of the associations. To identify genetic variants associated with COVID-19 phenotypes, we performed GWASs using the SAIGE software. Principal component analysis (PCA) was performed to account for population stratification, and the first 20 principal components (PCs) were included as covariates in the analysis. Additionally, we adjusted for age, sex, BMI, SES, smoking status, residence in aged care facilities and genotypic array in the regression models. The association between each SNP and the phenotypes was tested using a logistic regression model, as follows: logit(COVID-19 phenotype) ~ SNP + age + sex + BMI + SES + smoking status + aged care status + genotypic array + PC1-20. To account for multiple testing, the Bonferroni correction was applied. Loci reaching the genome-wide significance threshold (p < 5x10 -8 ) were considered significant. Manhattan plots and quantile-quantile (QQ) plots were generated to visualize the results using R package ggplot2. All analyses were carried out using R (version 4.0.5). ------------------ View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Gonzalez-Kozlova E. Peer Review Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.133689.r287362) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/10-830/v2#referee-response-287362 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2022 De Silvestri A et al. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 22 Apr 2022 | for Version 1 Virginia Valeria , Servizio di Epidemiologia Clinica e Biostatistica Direzione Scientifica, Fondazione IRCCS Policlinico san Matteo, Pavia, Italy Annalisa De Silvestri , Scientific Direction, IRCCS Policlinico San Matteo Foundation, Pavia, Italy 0 Views copyright © 2022 De Silvestri A et al. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Authors developed a potentially useful R-package tool to analyze data from the UKBB COVID-19 database, which summarises COVID-19 test results, and performs association tests between COVID-19 susceptibility/severity and potential risk factors such as age, sex, blood type, comorbidities and generates input files for GWAS. The rationale is well explained, sufficient details of the code, methods, and analysis are provided, outputs are well described and conclusions are sound and appropriate. However, some minor points should be considered: It is not clear how comorbidities are retrieved, classified (at which level of ICD-10), and analysed Authors should discuss how they choose to classify severity (the distinction between critical care and advanced critical care for example) and why they choose to include all Covid patients (for example severity 2-3 vs 0-1 instead of severity 2-3 vs 1). Why not consider it as an ordinal variable? Authors should specify if they consider mortality due to Covid or with Covid Is the rationale for developing the new software tool clearly explained? Yes Is the description of the software tool technically sound? Partly Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others? Yes Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool? Yes Are the conclusions about the tool and its performance adequately supported by the findings presented in the article? Yes Competing Interests No competing interests were disclosed. Reviewer Expertise biostatistics We confirm that we have read this submission and believe that we have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (1) Author Response 18 May 2022 Longfei Wang, Population Health and Immunity Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, 3052, Australia 1. It is not clear how comorbidities are retrieved, classified (at which level of ICD-10), and analysed. We added a new table (Table 4) to show how the comorbidities are classified. We modified the following sentences: Comorbidity categories are generated using the block categories in the ICD10 code, which is shown in the second column in Table 4. We include ICD10 chapters 1–14 and 17 and exclude several chapters such as pregnancy, childbirth, and consequences of external causes etc. For instance, the first category is “A00-A09”, representing intestinal infectious diseases. During a period restricted by the start and end dates, cases are defined as any participants who were diagnosed as any subclasses under the block A00-A09 in the hospital inpatient diagnosis data. In this way, 164 binary variables are generated and each of them represents a comorbidity category. Therefore, we can test the association between each comorbidity category and the selected COVID-19 phenotype using logistic regression models. We modified the sentence as follows: The comorbidity.asso function performs association tests between each comorbidity category and the selected phenotype using logistic regression models and adjusts the tested phenotype with covariates, which can be set using the argument “cov.name”. 2. Authors should discuss how they choose to classify severity (the distinction between critical care and advanced critical care for example) and why they choose to include all Covid patients (for example severity 2-3 vs 0-1 instead of severity 2-3 vs 1). Why not consider it as an ordinal variable? Thanks for the suggestion. We re-wrote the following paragraph: Based on the World Health Organization (WHO) ordinal scale for clinical improvement , we classify severity into four levels. These levels are defined as 1) hospitalisation: individuals admitted to hospital with their primary diagnosis recorded as COVID-19. 2) critical care level 2: individuals required basic treatment in a critical care unit, such as non-invasive ventilation and continuous positive airway pressure, and with their primary diagnosis recorded as COVID-19. 3) critical care level 3: individuals required advanced treatment in a critical care unit, such as invasive ventilation and temporary tracheostomy, and with their primary diagnosis recorded as COVID-19. 4) mortality: individuals died due to COVID-19. The critical care information was summarised from the HESIN_CRITICAL table and the HESIN_OPER table. The critical care level 2 cases are the COVID-19 patients who required at least one “Critical care level 2 days” in the HESIN_CRIRICAL table or received basic respiratory support, such as, E85.2 non-invasive ventilation NEC, in the HESIN_OPER table. The critical care level 3 cases are defined as the COVID-19 patients who required at least one “Critical care level 3 days” in the HESIN_CRIRICAL table or received advanced respiratory support, such as, E85.1 invasive ventilation, in the HESIN_OPER table. The commonly used GWAS tools, such as SAIGE and PLINK, do not support ordinal categorical phenotypes. Therefore, we converted this ordinal variable into four binary variables named “hospitalisation”, “critical care”, “advanced critical care” and “mortality” (Table 2). However, users can get the ordinal variable by simply summing the four binary variables. We assume that participants who were tested COVID-19 positive but did not admit to hospital had no or mild symptoms and hence classified them as controls in severity phenotypes. 3. Authors should specify if they consider mortality due to Covid or with Covid Sorry for the unclearness. We defined the mortality case as mortality due to Covid. In the article, we wrote: For mortality, we include all individuals who received at least one positive test result and define those whose primary cause of death is recorded as being due to COVID-19 as cases. To make it clearer, we corrected the definition of mortality in Table 2 from “1 = death with COVID-19” to “1 = death due to COVID-19”. View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Valeria V and De Silvestri A. Peer Review Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.58938.r126903) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/10-830/v1#referee-response-126903 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2021 Palmer T. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 02 Dec 2021 | for Version 1 Thomas Michael Palmer , Population Health Sciences, University of Bristol Medical School, Bristol, UK 0 Views copyright © 2021 Palmer T. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Not Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Before I review this R package properly there are some basic fixes to the GitHub repository version which require attention. The package has an unusual history. Two versions have been released on CRAN however as I can see from the website it was “Archived on 2021-10-06 as email to the maintainer was undeliverable”. So I recommend that the authors contact CRAN to get the package unarchived. The CRAN archive shows versions 0.1.0 and 0.1.1, however the GitHub repo shows version 0.1.0 in its DESCRIPTION file. The repo should have the latest version in it. Whilst the versions listed on CRAN 0.1.0 and 0.1.1 must have been CRAN compliant, otherwise they would not have been allowed on CRAN, unfortunately the code in the GitHub is no longer CRAN compliant and a simple running of R CMD check on the code in the repo gives 2 R CMD check errors and 1 note. These R CMD check errors should be fixed and the R CMD check note should also be fixed by adding the relevant entries to the .Rbuildignore file. The script in the tests/testthat folder does not use any of the testthat functions as it should. This should be improved or removed. Personally I find the name of the package unusual, I don’t prefer full-stops/periods in package names. Returned objects from the functions could be defined under one of the R's class systems, e.g., S3. Is the rationale for developing the new software tool clearly explained? Yes Is the description of the software tool technically sound? No Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others? Partly Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool? Partly Are the conclusions about the tool and its performance adequately supported by the findings presented in the article? No Competing Interests No competing interests were disclosed. Reviewer Expertise Medical Statistics / Biostatistics I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above. reply Respond to this report Responses (1) Author Response 10 Jan 2022 Longfei Wang, Population Health and Immunity Division, The Walter and Eliza Hall Institute of Medical Research, Parkville, 3052, Australia 1.The package has an unusual history. Two versions have been released on CRAN however as I can see from the website it was “Archived on 2021-10-06 as email to the maintainer was undeliverable”. So I recommend that the authors contact CRAN to get the package unarchived. My apologies that I was not aware that the package had been archived. I have contacted the CRAN team. They replied that a CRAN team member tried to contact me and the email has got a bounced message notification. However, my email address is correct and has not been changed since I submitted the package. I have resubmitted the package with an increased version number and with minor changes according to your suggestions. It has been unarchived ( https://cran.r-project.org/web/packages/UKB.COVID19/index.html ). 2. The CRAN archive shows versions 0.1.0 and 0.1.1, however the GitHub repo shows version 0.1.0 in its DESCRIPTION file. The repo should have the latest version in it. My apologies that the package on GitHub was out-of-date. I have updated the latest version in GitHub. 3. Whilst the versions listed on CRAN 0.1.0 and 0.1.1 must have been CRAN compliant, otherwise they would not have been allowed on CRAN, unfortunately the code in the GitHub is no longer CRAN compliant and a simple running of R CMD check on the code in the repo gives 2 R CMD check errors and 1 note. These R CMD check errors should be fixed and the R CMD check note should also be fixed by adding the relevant entries to the .Rbuildignore file. I have updated the latest version in GitHub and double checked it with R CMD check. There’s no errors, warnings, or notes from the R CMD check now. 4. The script in the tests/testthat folder does not use any of the testthat functions as it should. This should be improved or removed. Thanks for your suggestion. I have improved the scripts in the tests/testthat folder with proper testthat functions. 5. Personally I find the name of the package unusual, I don’t prefer full-stops/periods in package names. Thanks for your suggestion. The package has been on CRAN for a while. People may have included the package in their scripts. These scripts will break if I change the name of the package. And it may be hard for everyone to find the renamed package. So I decided to keep the name and will definitely use proper names for the packages I build in the future. 6. Returned objects from the functions could be defined under one of the R's class systems, e.g., S3. Thanks for your suggestion. I have defined the returned objects under the S3 class system. View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Palmer TM. Peer Review Report For: UKB.COVID19: an R package for UK Biobank COVID-19 data processing and analysis [version 3; peer review: 2 approved, 1 not approved] . F1000Research 2024, 10 :830 ( https://doi.org/10.5256/f1000research.58938.r100445) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/10-830/v1#referee-response-100445 Alongside their report, reviewers assign a status to the article: Approved - the paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations - A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved - fundamental flaws in the paper seriously undermine the findings and conclusions Adjust parameters to alter display View on desktop for interactive features Includes Interactive Elements View on desktop for interactive features Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Stay Updated Sign up for content alerts and receive a weekly or monthly email with all newly published articles Register with F1000Research Already registered? Sign in Not now, thanks close PLEASE NOTE If you are an AUTHOR of this article, please check that you signed in with the account associated with this article otherwise we cannot automatically identify your role as an author and your comment will be labelled as a “User Comment”. If you are a REVIEWER of this article, please check that you have signed in with the account associated with this article and then go to your account to submit your report, please do not post your review here. If you do not have access to your original account, please contact us . All commenters must hold a formal affiliation as per our Policies . The information that you give us will be displayed next to your comment. User comments must be in English, comprehensible and relevant to the article under discussion. We reserve the right to remove any comments that we consider to be inappropriate, offensive or otherwise in breach of the User Comment Terms and Conditions . Commenters must not use a comment for personal attacks. When criticisms of the article are based on unpublished data, the data should be made available. I accept the User Comment Terms and Conditions Please confirm that you accept the User Comment Terms and Conditions. Affiliation ✕ refresh Please enter your institution. Note: To add your institution or organisation, start typing the name and then select the correct name from the list. Where applicable, the name will appear in both the original language and in English. Do not paste in the name. If the name does not appear in the drop-down list, we will display the information you have entered. ✕ refresh Country/Region * USA UK Canada China France Germany Afghanistan Aland Islands Albania Algeria American Samoa Andorra Angola Anguilla Antarctica Antigua and Barbuda Argentina Armenia Aruba Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados Belarus Belgium Belize Benin Bermuda Bhutan Bolivia Bosnia and Herzegovina Botswana Bouvet Island Brazil British Indian Ocean Territory British Virgin Islands Brunei Bulgaria Burkina Faso Burundi Cambodia Cameroon Canada Cape Verde Cayman Islands Central African Republic Chad Chile China Christmas Island Cocos (Keeling) Islands Colombia Comoros Congo Cook Islands Costa Rica Cote d'Ivoire Croatia Cuba Cyprus Czech Republic Democratic Republic of the Congo Denmark Djibouti Dominica Dominican Republic Ecuador Egypt El Salvador Equatorial Guinea Eritrea Estonia Ethiopia Falkland Islands Faroe Islands Federated States of Micronesia Fiji Finland France French Guiana French Polynesia French Southern Territories Gabon Georgia Germany Ghana Gibraltar Greece Greenland Grenada Guadeloupe Guam Guatemala Guernsey Guinea Guinea-Bissau Guyana Haiti Heard Island and Mcdonald Islands Holy See (Vatican City State) Honduras Hong Kong Hungary Iceland India Indonesia Iran Iraq Ireland Israel Italy Jamaica Japan Jersey Jordan Kazakhstan Kenya Kiribati Kosovo (Serbia and Montenegro) Kuwait Kyrgyzstan Lao People's Democratic Republic Latvia Lebanon Lesotho Liberia Libya Liechtenstein Lithuania Luxembourg Macao Madagascar Malawi Malaysia Maldives Mali Malta Marshall Islands Martinique Mauritania Mauritius Mayotte Mexico Minor Outlying Islands of the United States Moldova Monaco Mongolia Montenegro Montserrat Morocco Mozambique Myanmar Namibia Nauru Nepal Netherlands Antilles New Caledonia New Zealand Nicaragua Niger Nigeria Niue Norfolk Island North Korea North Macedonia Northern Mariana Islands Norway Oman Pakistan Palau Palestinian Territory Panama Papua New Guinea Paraguay Peru Philippines Pitcairn Poland Portugal Puerto Rico Qatar Reunion Romania Russian Federation Rwanda Saint Helena Saint Kitts and Nevis Saint Lucia Saint Pierre and Miquelon Saint Vincent and the Grenadines Samoa San Marino Sao Tome and Principe Saudi Arabia Senegal Serbia Seychelles Sierra Leone Singapore Slovakia Slovenia Solomon Islands Somalia South Africa South Georgia and the South Sandwich Is South Korea South Sudan Spain Sri Lanka Sudan Suriname Svalbard and Jan Mayen Swaziland Sweden Switzerland Syria Taiwan Tajikistan Tanzania Thailand The Gambia The Netherlands Timor-Leste Togo Tokelau Tonga Trinidad and Tobago Tunisia Turkey Turkmenistan Turks and Caicos Islands Tuvalu UK USA Uganda Ukraine United Arab Emirates United States Virgin Islands Uruguay Uzbekistan Vanuatu Venezuela Vietnam Wallis and Futuna West Bank and Gaza Strip Western Sahara Yemen Zambia Zimbabwe Please select your country/region. You must enter a comment. Competing Interests Please disclose any competing interests that might be construed to influence your judgment of the article's or peer review report's validity or importance. Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Please state your competing interests The comment has been saved. An error has occurred. Please try again. Cancel Post var lTitle = "UKB.COVID19: an R package for UK Biobank...".replace("'", ''); var linkedInUrl = "http://www.linkedin.com/shareArticle?url=https://f1000research.com/articles/10-830/v3" + "&title=" + encodeURIComponent(lTitle) + "&summary=" + encodeURIComponent('Read the article by '); var deliciousUrl = "https://del.icio.us/post?url=https://f1000research.com/articles/10-830/v3&title=" + encodeURIComponent(lTitle); var redditUrl = "http://reddit.com/submit?url=https://f1000research.com/articles/10-830/v3" + "&title=" + encodeURIComponent(lTitle); linkedInUrl += encodeURIComponent('Wang L et al.'); var offsetTop = /chrome/i.test( navigator.userAgent ) ? 4 : -10; var addthis_config = { ui_offset_top: offsetTop, services_compact : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_expanded : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_custom : [ { name: "LinkedIn", url: linkedInUrl, icon:"/img/icon/at_linkedin.svg" }, { name: "Mendeley", url: "http://www.mendeley.com/import/?url=https://f1000research.com/articles/10-830/v3/mendeley", icon:"/img/icon/at_mendeley.svg" }, { name: "Reddit", url: redditUrl, icon:"/img/icon/at_reddit.svg" }, ] }; var addthis_share = { url: "https://f1000research.com/articles/10-830", templates : { twitter : "UKB.COVID19: an R package for UK Biobank COVID-19 data processing.... Wang L et al., published by " + "@F1000Research" + ", https://f1000research.com/articles/10-830/v3" } }; if (typeof(addthis) != "undefined"){ addthis.addEventListener('addthis.ready', checkCount); addthis.addEventListener('addthis.menu.share', checkCount); } $(".f1r-shares-twitter").attr("href", "https://twitter.com/intent/tweet?text=" + addthis_share.templates.twitter); $(".f1r-shares-facebook").attr("href", "https://www.facebook.com/sharer/sharer.php?u=" + addthis_share.url); $(".f1r-shares-linkedin").attr("href", addthis_config.services_custom[0].url); $(".f1r-shares-reddit").attr("href", addthis_config.services_custom[2].url); $(".f1r-shares-mendelay").attr("href", addthis_config.services_custom[1].url); function checkCount(){ setTimeout(function(){ $(".addthis_button_expanded").each(function(){ var count = $(this).text(); if (count !== "" && count != "0") $(this).removeClass("is-hidden"); else $(this).addClass("is-hidden"); }); }, 1000); } close How to cite this report {{reportCitation}} Cancel Copy Citation Details $(function(){R.ui.buttonDropdowns('.dropdown-for-downloads');}); $(function(){R.ui.toolbarDropdowns('.toolbar-dropdown-for-downloads');}); $.get("/articles/acj/55370/169531") new F1000.Clipboard(); new F1000.ThesaurusTermsDisplay("articles", "article", "169531"); $(document).ready(function() { $( "#frame1" ).on('load', function() { var mydiv = $(this).contents().find("div"); var h = mydiv.height(); console.log(h) }); var tooltipLivingFigure = jQuery(".interactive-living-figure-label .icon-more-info"), titleLivingFigure = tooltipLivingFigure.attr("title"); tooltipLivingFigure.simpletip({ fixed: true, position: ["-115", "30"], baseClass: 'small-tooltip', content:titleLivingFigure + " " }); tooltipLivingFigure.removeAttr("title"); $("body").on("click", ".cite-living-figure", function(e) { e.preventDefault(); var ref = $(this).attr("data-ref"); $(this).closest(".living-figure-list-container").find("#" + ref).fadeIn(200); }); $("body").on("click", ".close-cite-living-figure", function(e) { e.preventDefault(); $(this).closest(".popup-window-wrapper").fadeOut(200); }); $(document).on("mouseup", function(e) { var metricsContainer = $(".article-metrics-popover-wrapper"); if (!metricsContainer.is(e.target) && metricsContainer.has(e.target).length === 0) { $(".article-metrics-close-button").click(); } }); var articleId = $('#articleId').val(); if($("#main-article-count-box").attachArticleMetrics) { $("#main-article-count-box").attachArticleMetrics(articleId, { articleMetricsView: true }); } }); var figshareWidget = $(".new_figshare_widget"); if (figshareWidget.length > 0) { window.figshare.load("f1000", function(Widget) { // Select a tag/tags defined in your page. In this tag we will place the widget. _.map(figshareWidget, function(el){ var widget = new Widget({ articleId: $(el).attr("figshare_articleId") //height:300 // this is the height of the viewer part. [Default: 550] }); widget.initialize(); // initialize the widget widget.mount(el); // mount it in a tag that's on your page // this will save the widget on the global scope for later use from // your JS scripts. This line is optional. //window.widget = widget; }); }); } close Error Close Add Reset F1000.MICROSERVICES.AFFILIATION = ''; $(document).ready(function () { $('.js-affiliations-form').each((index, form) => { new AffiliationForm({ formId: form.id, institutionErrorSelector: '.comment-enter-institution', departmentErrorSelector: '.comment-enter-department', placeSelector: '.js-add-comment-place', stateSelector: '.js-add-comment-state', zipCodeSelector: '.js-add-comment-zipcode', countrySelector: '.js-add-comment-country', countryErrorSelector: '.comment-enter-country', }); }); }); $(document).ready(function () { var reportIds = { "287364": 0, "287365": 0, "287366": 0, "287367": 0, "100741": 0, "100740": 0, "287361": 0, "100743": 0, "287362": 46, "100742": 0, "287363": 0, "92299": 0, "92298": 0, "92301": 0, "287368": 0, "92300": 0, "287369": 0, "287370": 0, "92302": 0, "93209": 0, "93208": 0, "93210": 0, "101923": 0, "101922": 0, "307380": 10, "307381": 0, "126903": 35, "126902": 0, "307379": 0, "126904": 0, "97985": 0, "97984": 0, "97986": 0, "96711": 0, "96713": 0, "96712": 0, "96715": 0, "96714": 0, "303582": 0, "303583": 0, "100445": 53, "303588": 0, "303589": 0, "303590": 0, "303591": 0, "303584": 0, "303585": 0, "303586": 0, "303587": 0, "98539": 0, "296822": 0, "296823": 0, "296828": 0, "296829": 0, "296830": 0, "296831": 0, "138238": 0, "296824": 0, "296825": 0, "93695": 0, "296826": 0, "93694": 0, "138237": 0, "296827": 0, }; $(".referee-response-container,.js-referee-report").each(function(index, el) { var reportId = $(el).attr("data-reportid"), reportCount = reportIds[reportId] || 0; $(el).find(".comments-count-container,.js-referee-report-views").html(reportCount); }); var uuidInput = $("#article_uuid"), oldUUId = uuidInput.val(), newUUId = "92e67a76-3d7d-4e3c-8512-820920013159"; uuidInput.val(newUUId); $("a[href*='article_uuid=']").each(function(index, el) { var newHref = $(el).attr("href").replace(oldUUId, newUUId); $(el).attr("href", newHref); }); }); An innovative open access publishing platform offering rapid publication and open peer review, whilst supporting data deposition and sharing. Browse Gateways Collections How it Works Contact For Developers Cookie Notice Privacy Notice RSS Submit Your Research Follow us © 2012-2026 F1000 Research Ltd. ISSN 2046-1402 | Legal | Partner of Research4Life • CrossRef • ORCID • FAIRSharing R.templateTests.simpleTemplate = R.template(' $text $text $text $text $text '); R.templateTests.runTests(); var F1000platform = new F1000.Platform({ name: "f1000research", displayName: "F1000Research", hostName: "f1000research.com", id: "1", editorialEmail: "[email protected]", infoEmail: "[email protected]", usePmcStats: true }); $(function(){R.ui.dropdowns('.dropdown-for-authors, .dropdown-for-about, .dropdown-for-myresearch');}); // $(function(){R.ui.dropdowns('.dropdown-for-referees');}); $(document).ready(function () { if ($(".cookie-warning").is(":visible")) { $(".sticky").css("margin-bottom", "35px"); $(".devices").addClass("devices-and-cookie-warning"); } $(".cookie-warning .close-button").click(function (e) { $(".devices").removeClass("devices-and-cookie-warning"); $(".sticky").css("margin-bottom", "0"); }); $("#tweeter-feed .tweet-message").each(function (i, message) { var self = $(message); self.html(linkify(self.html())); }); $(".partner").on("mouseenter mouseleave", function() { $(this).find(".gray-scale, .colour").toggleClass("is-hidden"); }); }); Sign In Remember me Forgotten your password? Sign In Cancel Email or password not correct. Please try again Please wait... $(function(){ // Note: All the setup needs to run against a name attribute and *not* the id due the clonish // nature of facebox... $("a[id=googleSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("GOOGLE"); $("form[id=oAuthForm]").submit(); }); $("a[id=facebookSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("FACEBOOK"); $("form[id=oAuthForm]").submit(); }); $("a[id=orcidSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("ORCID"); $("form[id=oAuthForm]").submit(); }); }); If you've forgotten your password, please enter your email address below and we'll send you instructions on how to reset your password. The email address should be the one you originally registered with F1000. Email address not valid, please try again You registered with F1000 via Google, so we cannot reset your password. To sign in, please click here . If you still need help with your Google account password, please click here . You registered with F1000 via Facebook, so we cannot reset your password. To sign in, please click here . If you still need help with your Facebook account password, please click here . Code not correct, please try again Reset password Cancel Email us for further assistance. Server error, please try again. If your email address is registered with us, we will email you instructions to reset your password. If you think you should have received this email but it has not arrived, please check your spam filters and/or contact for further assistance. Please wait... Register $(document).ready(function () { signIn.createSignInAsRow($("#sign-in-form-gfb-popup")); $(".target-field").each(function () { var uris = $(this).val().split("/"); if (uris.pop() === "login") { $(this).val(uris.toString().replace(",","/")); } }); });

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2024) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00