Full text
70,696 characters
· extracted from
preprint-html
· click to expand
Whole genome sequence meta-analyses reveal common and rare genetic associations with critical COVID-19 | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Whole genome sequence meta-analyses reveal common and rare genetic associations with critical COVID-19 View ORCID Profile Athanasios Kousathanas , View ORCID Profile Konrad Rawlik , View ORCID Profile Erola Pairo-Castineira , Fiona Griffiths , View ORCID Profile Wilna Oosthuyzen , View ORCID Profile Sara Clohisey Hendry , View ORCID Profile Tomas Malinauskas , View ORCID Profile Guillaume Butler-Laporte , Prabhu Arumugam , Colin Begg , View ORCID Profile Marc Chadeau-Hyam , Georgia Chan , Graham Cooke , Sally Donovan , Greg Elgar , View ORCID Profile Tom A. Fowler , Peter Goddard , View ORCID Profile Charles Hinds , View ORCID Profile Peter Horby , Lowell Ling , Emma F. Magavern , Fiona Maleady-Crowe , View ORCID Profile Hugh Montgomery , Christopher A. Odhams , View ORCID Profile Peter J.M. Openshaw , Christine Patch , Augusto Rendon , Shahla Salehi , Richard H. Scott , View ORCID Profile Malcolm G Semple , View ORCID Profile Manu Shankar-Hari , Afshan Siddiq , Alex Stuckey , View ORCID Profile Charlotte Summers , Linda Todd , Susan Walker , View ORCID Profile Timothy Walsh , View ORCID Profile Helen Ward , Tala Zainy , GenOMICC Investigators , ISARIC4C Investigators , BQC19 Investigators , GENCOVID Investigators , DeCOI Investigators , POLCOVID Investigators , PMBB Investigators , View ORCID Profile Angie Fawkes , View ORCID Profile Lee Murphy , View ORCID Profile Andy Law , View ORCID Profile Veronique Vitart , Patrick F Chinnery , View ORCID Profile James F Wilson , Matthew A. Brown , View ORCID Profile Paul Elliott , Loukas Moutsianas , Mark J. Caulfield , View ORCID Profile J. Kenneth Baillie doi: https://doi.org/10.1101/2025.11.19.25340573 Athanasios Kousathanas 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Athanasios Kousathanas Konrad Rawlik 2 Baillie Gifford Pandemic Science Hub, Centre for Inflammation Research, The Queen’s Medical Research Institute, University of Edinburgh , 47 Little France Crescent, Edinburgh, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Konrad Rawlik Erola Pairo-Castineira 2 Baillie Gifford Pandemic Science Hub, Centre for Inflammation Research, The Queen’s Medical Research Institute, University of Edinburgh , 47 Little France Crescent, Edinburgh, UK 3 MRC Human Genetics Unit, Institute of Genetics and Cancer, University of Edinburgh, Western General Hospital , Crewe Road, Edinburgh, EH4 2XU, UK 4 Roslin Institute, University of Edinburgh , Easter Bush, Edinburgh, EH25 9RG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Erola Pairo-Castineira Fiona Griffiths 2 Baillie Gifford Pandemic Science Hub, Centre for Inflammation Research, The Queen’s Medical Research Institute, University of Edinburgh , 47 Little France Crescent, Edinburgh, UK 4 Roslin Institute, University of Edinburgh , Easter Bush, Edinburgh, EH25 9RG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Wilna Oosthuyzen 2 Baillie Gifford Pandemic Science Hub, Centre for Inflammation Research, The Queen’s Medical Research Institute, University of Edinburgh , 47 Little France Crescent, Edinburgh, UK 4 Roslin Institute, University of Edinburgh , Easter Bush, Edinburgh, EH25 9RG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Wilna Oosthuyzen Sara Clohisey Hendry 2 Baillie Gifford Pandemic Science Hub, Centre for Inflammation Research, The Queen’s Medical Research Institute, University of Edinburgh , 47 Little France Crescent, Edinburgh, UK 4 Roslin Institute, University of Edinburgh , Easter Bush, Edinburgh, EH25 9RG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sara Clohisey Hendry Tomas Malinauskas 5 Wellcome Centre for Human Genetics, University of Oxford , Roosevelt Drive, Oxford, OX3 7BN, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tomas Malinauskas Guillaume Butler-Laporte 6 Lady Davis Institute for Medical Research , Montréal, Québec, Canada 7 Division of Infectious Diseases, McGill University Health Centre , Montréal, Québec, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Guillaume Butler-Laporte Prabhu Arumugam 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Colin Begg 8 Royal Hospital for Children , Glasgow, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Marc Chadeau-Hyam 9 School of Public Health , Imperial College London, London, UK 10 MRC Centre for Environment and Health , Imperial College London, London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marc Chadeau-Hyam Georgia Chan 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Graham Cooke 11 Department of Infectious Disease , Imperial College London, London, UK 12 National Institute for Health Research Imperial Biomedical Research Centre , London 13 Imperial College Healthcare NHS Trust , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sally Donovan 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Greg Elgar 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tom A. Fowler 14 William Harvey Research Institute, Queen Mary University of London , London, UK 15 Health Protection and Screening Services , Public Health Wales, Cardiff, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tom A. Fowler Peter Goddard 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Charles Hinds 14 William Harvey Research Institute, Queen Mary University of London , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Charles Hinds Peter Horby 16 Centre for Tropical Medicine and Global Health, Nuffield Department of Medicine, University of Oxford, Old Road Campus , Roosevelt Drive, Oxford, OX3 7FZ, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Peter Horby Lowell Ling 17 Department of Anaesthesia and Intensive Care, The Chinese University of Hong Kong , Prince of Wales Hospital, Hong Kong, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Emma F. Magavern 14 William Harvey Research Institute, Queen Mary University of London , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Fiona Maleady-Crowe 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hugh Montgomery 18 UCL Centre for Human Health and Performance , London, W1T 7HA, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Hugh Montgomery Christopher A. Odhams 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Peter J.M. Openshaw 13 Imperial College Healthcare NHS Trust , London, UK 19 National Heart and Lung Institute , Imperial College London, London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Peter J.M. Openshaw Christine Patch 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Augusto Rendon 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Shahla Salehi 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Richard H. Scott 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Malcolm G Semple 20 NIHR Health Protection Research Unit for Emerging and Zoonotic Infections, Institute of Infection, Veterinary and Ecological Sciences University of Liverpool , Liverpool, L69 7BE, UK 21 Respiratory Department, Liverpool Institute for Child Health and Wellbeing , Alder Hey Children’s Hospital, Liverpool, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Malcolm G Semple Manu Shankar-Hari 22 Centre for Inflammation Research, The Queen’s Medical Research Institute, University of Edinburgh , 47 Little France Crescent, Edinburgh, UK 23 Intensive Care Unit, Royal Infirmary of Edinburgh , 54 Little France Drive, Edinburgh, EH16 5SA, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Manu Shankar-Hari Afshan Siddiq 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alex Stuckey 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Charlotte Summers 24 Department of Medicine, University of Cambridge , Cambridge, Cambridgeshire, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Charlotte Summers Linda Todd 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Susan Walker 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Timothy Walsh 23 Intensive Care Unit, Royal Infirmary of Edinburgh , 54 Little France Drive, Edinburgh, EH16 5SA, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Timothy Walsh Helen Ward 9 School of Public Health , Imperial College London, London, UK 12 National Institute for Health Research Imperial Biomedical Research Centre , London 13 Imperial College Healthcare NHS Trust , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Helen Ward Tala Zainy 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Angie Fawkes 25 Edinburgh Clinical Research Facility, Western General Hospital, University of Edinburgh , EH4 2XU, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Angie Fawkes Lee Murphy 25 Edinburgh Clinical Research Facility, Western General Hospital, University of Edinburgh , EH4 2XU, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Lee Murphy Andy Law 4 Roslin Institute, University of Edinburgh , Easter Bush, Edinburgh, EH25 9RG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Andy Law Veronique Vitart 3 MRC Human Genetics Unit, Institute of Genetics and Cancer, University of Edinburgh, Western General Hospital , Crewe Road, Edinburgh, EH4 2XU, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Veronique Vitart Patrick F Chinnery 26 Department of Clinical Neurosciences, School of Clinical Medicine, University of Cambridge, Cambridge Biomedical Campus , Cambridge, UK 27 Medical Research Council Mitochondrial Biology Unit, University of Cambridge, Cambridge Biomedical Campus , Cambridge, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site James F Wilson 3 MRC Human Genetics Unit, Institute of Genetics and Cancer, University of Edinburgh, Western General Hospital , Crewe Road, Edinburgh, EH4 2XU, UK 28 Centre for Global Health Research, Usher Institute of Population Health Sciences and Informatics , Teviot Place, Edinburgh EH8 9AG, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for James F Wilson Matthew A. Brown 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Paul Elliott 9 School of Public Health , Imperial College London, London, UK 10 MRC Centre for Environment and Health , Imperial College London, London, UK 12 National Institute for Health Research Imperial Biomedical Research Centre , London 13 Imperial College Healthcare NHS Trust , London, UK 29 Health Data Research (HDR) UK London at Imperial College , London, UK 30 UK Dementia Research Institute at Imperial College , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Paul Elliott Loukas Moutsianas 1 Genomics England , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mark J. Caulfield 14 William Harvey Research Institute, Queen Mary University of London , London, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site J. Kenneth Baillie 2 Baillie Gifford Pandemic Science Hub, Centre for Inflammation Research, The Queen’s Medical Research Institute, University of Edinburgh , 47 Little France Crescent, Edinburgh, UK 3 MRC Human Genetics Unit, Institute of Genetics and Cancer, University of Edinburgh, Western General Hospital , Crewe Road, Edinburgh, EH4 2XU, UK 4 Roslin Institute, University of Edinburgh , Easter Bush, Edinburgh, EH25 9RG, UK 23 Intensive Care Unit, Royal Infirmary of Edinburgh , 54 Little France Drive, Edinburgh, EH16 5SA, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for J. Kenneth Baillie For correspondence: j.k.baillie{at}ed.ac.uk Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract In susceptible patients, COVID-19 causes life-threatening disease driven by immune-mediated inflammatory lung injury. We have previously shown that multiple common host genetic variants are significantly associated with susceptibility to critical Covid-19, 1 ; 2 ; 3 and in one case, we demonstrated that such variants can inform development of new, effective drug treatment 1 ; 4 . Here we report an association analysis of whole-genome sequences (WGS) from 11,423 cases from the GenOMICC study and 60,628 controls, together with meta-analyses with available genome-wide data ( Fig. 1 ). We identify a rare association signal at SLC50A1 , primarily driven by a missense variant rs147850817 (1:155138217:G:T, Arg201Leu) that may interfere with transport function, and we identify four common association signals near ARF1, ZNF462, KLF13 and MVP genes. Finally, we build a WGS-derived polygenic risk score (PRS) for critical Covid-19, which offers only marginal improvement in risk estimation for the general population but may provide clinically-valuable discrimination for extreme susceptibility. Introduction The COVID-19 pandemic caused an estimated 18.2 million deaths between January 2020 and December 2021 alone 5 . Effective medication has reduced mortality by approximately 50%, but the underlying molecular mechanisms of disease remain poorly understood. Download figure Open in new tab Fig. 1: Overview of analysed cohorts and study analysis pipeline. The analysed cohorts comprised of individuals with severe COVID-19 and individuals with mild COVID-19 symptoms, along with individuals from the 100,000 Genomes Project (100kGP). COVID-19 severe and mild cohorts and a subset of 100kGP individuals were processed using the Genomics England Pipeline 2.0 (Illumina Dragen) with an additional subset of 100kGP individuals processed using a different pipeline (Illumina NSV4). Two separate aggregates were merged after masking of low quality genotypes, followed by sample quality control (sample-QC) for relatedness, sex mismatches and sample level quality. The post-quality control (post-QC) samples were divided into COVID-19 severe cases and three distinct control groups: ctrl-mld, ctrl-dgn, and ctrl-all and the sample breakdown by ancestry is shown. The ctrl-all controls set was used for GWAS analyses, while the ctrl-dgn controls set was used for rare variant aggregate testing (RVAT) analyses, as potential noise from inclusion of data from different processing pipelines is more challenging to quality control for rare variants. The ctrl-mld control set was utilized in sensitivity analyses to validate the primary findings. Additional site-wise quality control appropriate for GWAS and RVAT analyses was performed, followed by meta-analysis with other studies. A breakdown of case and control samples across studies that were included in the GWAS and RVAT meta-analyses is shown. We and others have demonstrated that genetic factors play a critical role in determining an individual’s response to SARS-CoV-2 infection 1 ; 6 ; 2 ; 3 . Our recent large-scale meta-analysis of common variants identified 49 genome-wide significant associations associated with critical illness, highlighting the high polygenicity of severe COVID-19 3 . This observation suggests that polygenic risk scores (PRS) for severe COVID-19 7 ; 8 ; 9 could be potentially useful for identifying individuals at high risk and help prioritorisation of therapeutic interventions 7 ; 8 ; 9 . Whole genome/exome sequencing (WES/WGS) enable the discovery of rare variants whose effect on disease is only detectable when aggregated, for example, across a protein-coding gene. So far, large-scale WES/WGS analyses have identified limited contributions from rare variants 10 ; 2 , with the most robust finding being an association between rare deleterious variants in TLR7 11 . Here we report genetic association analyses for both common and rare variants using WGS data from the largest single cohort of critical COVID-19 patients to date. We integrate these findings through meta-analysis with summary statistics from external cohort studies to identify additional common variant associations and further explore the impact of rare variants through aggregate testing. We also utilise the individual level WGS data to derive a polygenic risk score (PRS) and evaluate its performance in stratifying the general population and identifying individuals at the extremes of risk. Results We conducted genome-wide association analyses (GWAS) using SAIGE 12 for a cohort (GenOMICC) of 11,423 critically ill cases and 60,628 controls. 9,357 (82%) of the cases (7,491 (66%) with WGS data) were part of the primary analysis in our previous report 3 . The controls included 100,000 Genomes Project (100kGP) individuals (n = 49,360) and individuals with mild COVID-19 (n = 11,268) (Methods), recruited by the GenOMICC study in partnership with the Real-time assessment of community transmission (REACT) study. A cohort breakdown by ancestry is shown in Fig. 1 . We carried out separate GWAS analyses for four genetically inferred ancestry groups (African (AFR); East Asian (EAS); European (EUR); South Asian (SAS)) and using all controls (ctrl-all) for main analyses and individuals with mild COVID-19 (ctrl-mld) for sensitivity analyses ( Fig. 1 ). We meta-analysed GWAS results across ancestries and for each set of controls separately using inverse-variance-weighted fixed effects meta-analysis (IVW) (Methods). To maximise power for GWAS discovery, we meta-analysed the GWAS summary statistics from three studies: GenOMICC severe versus ctrl-all (this study), HGIv7-A2_ALL_leave_23andme_and_genomicc 13 , and 23andMe EUR respiratory support 14 ( Fig. 1 ). To account for potential heterogeneity in variant overlap and ancestry composition across studies that could confound conditional analysis on the basis of meta-analysis summaries 15 , we defined locus associations as ±1Mbp regions surrounding top variant signals (Methods). GWAS analysis identified 38 genome-wide significant loci ( P < 5 × 10 − 8 ) ( Fig. 2A ), including putatively novel signals near ARF1, MECOM, ZNF462, KLF13 and MVP genes ( Table 1 ). We used between-study heterogeneity and independent support from meta-analysis of external studies to assess the reliability of these associations. Signals near ARF1, ZNF462, KLF13 had support across studies and no evidence of heterogeneity ( Table 1 ), while MECOM failed these tests ( Table 1 ). The signal near MVP , was strongly significant in a single study (GenOMICC) but could not be confirmed because no other studies have obtained sufficient data. All novel associations had support from multiple variants in Linkage Disequilibrium (LD) (Supplementary Material), and were consistent in sensitivity analyses using alternate sets of controls (Supplementary Material). Variant-to-gene (V2G) scores from OpenTargets Genetics 14 identified the nearest genes as the most likely effector genes for all novel loci and were supported by cross-ancestry fine-mapping 16 and variant effect prediction (VEP) analysis 17 for credible set variants (Supplementary Material). View this table: View inline View popup Download powerpoint Table 1: Lead variants of novel locus association signals in the multi-study GWAS meta-analysis. Variant ids are provided as Chr:pos:ref:alt using GRCh38 coordinates with ref indicating the reference allele and alt being the effect allele. OR, OR CI and P meta are Odds ratios, Odds ratio 95% confidence intervals and P -values for the multi-study meta analysis. P het is the P -value from heterogeneity analysis and set to “-” when there is a single contributing study (i.e. no genotype data available from other studies at this locus). P GenOMICC and P hgi are P -values for lead variants from the GenOMICC multi-ancestry meta-analysis and HGIv7-A2_ALL_leave_23andme_and_genomicc data (HGI), respectively. P GenOMICC is not identical to P when GenOMICC was the only contributing study because of the genomic control adjustment when performing meta-analysis. Variant *3:169081067:C:T is a proxy for 3:169081115:G:GGAT with R 2 =0.999745 in EUR. † indicates the failure to validate the variant near MECOM in HGI at P hgi < 0.0125. Gene column corresponds to the nearest protein-coding gene to lead variant. Download figure Open in new tab Fig. 2: Manhattan plot of multi-study GWAS and gene-based rare variant aggregate testing (RVAT) meta-analyses. (A) GWAS results: Variants that are genome-wide significant at P < 5 × 10 − 8 (red dashed line) and within ±1Mbp of lead variants are highlighted with blue. Annotations indicate nearest gene to lead variant of each signal and text is coloured grey when previously reported and black when novel. Genome wide significant signals without support from the severe vs. mild GWAS ( P < 5 × 10 − 2 and consistent effect direction) are neither highlighted nor annotated. (B) RVAT results: Genes that are gene-wide significant at Bonferroni corrected threshold P < 2.58 × 10 − 6 (red dashed line) are highlighted with blue. Annotations indicate gene and are coloured grey when previously reported and black when novel. ACAT-O gene-based P -values for mild lof mask are shown. To assess the role of rare variants in critical illness, we conducted gene-based aggregate variant testing (RVAT) analysis with REGENIE 18 . For this analysis, we used a subset of 28,091 individuals from our cohorts that were processed via the same alignment and variant calling pipelines, consisting of 11,423 individuals with critical COVID-19 from GenOMICC and 16,668 control individuals. The control group included 11,268 individuals from the mild COVID-19 cohort and 5,400 from the 100kGP cohort (a control set referred to as ctrl-dgn; Fig. 1 ; Methods). RVAT analysis was performed for each of four ancestries separately and meta-analysed with six additional studies ( Fig. 1 and Supplementary material). RVAT meta-analysis identified a significant excess burden of predicted damaging variation in TLR7 and SLC50A1 ( P < 2.58 × 10 − 6 ; Fig. 2B ), with SLC50A1 being a novel RVAT association. SLC50A1 signal was strongest in the GenOMICC European ancestry (EUR) group ( P = 1.53 × 10 − 5 ) and nominally significant ( P < 0.05) in two additional studies and in meta-analysis excluding GenOMICC data (Supplementary Material). SLC50A1 , also known as SWEET1 / RAG1AP1 , lies within the locus indexed by common variant rs41264915 (near THBS3 ), and is partially tagged by a common novel variant signal identified through fine-mapping (see conditional analyses in Supplementary Material). We had also previously reported a significant association for SLC50A1 with critical Covid-19 using common variant gene-level testing 3 . The SLC50A1 RVAT signal was primarily driven by a single missense variant, rs147850817 (1:155138217:G:T, Arg201Leu, OR[95% CI] = 2.31[1.57-3.39]), in the largest contributing cohort (EUR GenOMICC; Supplementary Material). The Arg201Leu mutation replaces a positively charged side chain with a potentially destabilizing bulky hydrophobic one in the tightly packed hydrophilic environment facing the transport channel 19 ( Fig. 3 ) and may interfere with transport function. SLC50A1 is widely expressed in human cells and tissues, including nasal epithelium and large airways. 20 In immune cells it is thought to play a role in recruitment of RAG complex during VD(J) recombination, an essential process in generating diversity among T- and B-cell receptors. Download figure Open in new tab Fig. 3: Structural analysis of the rs147850817 (Arg201Leu) mutation on SLC50A1 (A) AlphaFold model of SLC50A1 (with the N- and C-termini coloured in blue and red, respectively) with computationally docked β -D-glucopyranose. The side chain of Arg201 is shown as spheres and sticks. (B) zoomed-in view of Arg201 and neighbouring residues. Download figure Open in new tab Fig. 4: Comparison of log odds ratios of deciles of the risk score distribution relative to the bottom decile for all three considered risk scores. “PRS” indicates results from the genetic PRS score alone, “Covariate” indicates results from a risk score based on age and sex, and “Combined” indicates results from a risk score incorporating both. Odds ratios where estimated using empirical deciles in the test cohort of European ancestry. We used results from our genome-wide association study (GWAS) on a training set comprising 80% of EUR individuals, the largest ancestry group in our study, to train a polygenic risk score (PRS) for critical covid-19. When evaluated in an independent validation set, the predictive ability of the PRS itself is limited, even within the same ancestry group (AUC 0.63, odds ratio between the bottom and top risk score decile = 2.1 (95% CI 1.03-4.16)). At a population level, PRS slightly improved risk estimation on top of a covariate based risk score (Supplementary Material), increasing validation set AUC from 0.70 to 0.73 (DeLong’s test P-value to be added). Discussion In this study, we identified novel common and rare variant associations with critical COVID-19 by utilising the largest single cohort of critical COVID-19 patients with whole-genome sequencing (WGS) data to date and meta-analysis with other available datasets. We discuss the functional effects of the identified novel variants, potential effector genes, and the clinical utility of a polygenic risk score (PRS) derived from common variants. ARF1 is an important druggable co-factor supporting the replication of different virus families 21 ; 22 , including SARS-CoV2 [ https://doi.org/10.1038/s41467-025-61431-8 ]. Inhibition of ARF1 reduced viral replication, and knockout in mice reduced viral load and lung pathology [ https://doi.org/10.1038/s41467-025-61431-8 ]. The lead variant rs3738681 near ARF1 is an expression quantitative trait locus (eQTL) for both ARF1 and TRIM11 . Counterintuitively the T allele, identified here as protective against critical COVID-19, is associated with increased ARF1 23 . Since the eQTL is based on an aggregate effect in circulating immune cells in blood, a cell-type or -state specific eQTL effect in the opposite direction may well exist, which would reverse the direction of effect. We previously observed a similar phenomenon in which an eQTL affecting PDE4A had an opposite effect in a myeloid monoculture which was not observed in bulk data, revealing a potential therapeutic target. The same T allele is associated with decreased TRIM11 expression in blood 23 . TRIM11 interferes with the replication of some viruses 24 . Proteins encoded by both ARF1 and TRIM11 have been reported to down regulate interferon signalling 25 ; 26 . Either or both of these effects could be important: our previous genetic findings 3 and therapeutic trials 27 , suggest that it is possible that interferon signalling has differential effects in early vs late disease 28 . We found significant disease associations for variants near the genes encoding two transcription factors: ZNF462 and KLF13 . The risk allele at rs60568503, near ZNF462 , has previously been associated with with improved lung function (FEV1/FVC) 14 . KLF13 is a member of the Krüppel-like factor family and regulates Natural Killer (NK) cell differentiation 29 and can enhance the immune response to T-cell activation 30 . The risk allele (G) at the lead variant rs11636034 of that locus is associated with increased KLF13 expression in blood (eQTLgen); upregulated expression of KLF13 in mice leads to proinflammatory hypercytokineaemia 31 . The association signal at rs138640006 ( MVP ) was only detectable in patients of African ancestry. The lead variant rs138640006 is extremely rare or absent in other ancestries (Minor Allele Frequency 1.3 × 10 − 4 in gnomAD NFE, v4.1). Since our UK-based study has recruited by far the largest number of patients in this ancestry group, no replication set exists. MVP (Major Vault Protein), also known as lung disease resistance protein, is widely expressed, including in bronchial epithelium and lung tissue 20 and is involved in a broad range of cellular processes including including type-I interferon response and control of viral infection 32 . The availability of WGS data from the same sequencing and analysis pipeline provided a solid foundation for calculating a PRS for critical Covid-19. In comparison with the elements of ISARIC4C score 33 , the doubling of risk from lowest to highest decile is approximately equivalent to the impact on individual risk of having comorbid illness, or the presence or absence of signs of respiratory failure. Individuals in the top risk decile may benefit from early access to preventative measures. Including findings near ARF1, ZNF462 , and KLF13 with all previously discovered associations 3 , a total of 52 common variants have been significantly associated with critical COVID-19. A variant with an equally strong effect, near MVP , identified exclusively in individuals of predicted African ancestry, could not be confirmed because external data are lacking. This is a further indication that extending genetic research to include the full diversity of human populations is not only a moral, but also a scientific, imperative. SLC50A1 is the second gene, after TLR7 , 10 , in which a significant burden of aggregated rare variants is associated with critical COVID-19. Identifying the cell types and molecular mechanisms mediating the effects of these variants could provide actionable insights for therapeutic research not only in COVID-19, but also in inflammatory lung disease of other aetiologies. Methods Ethics GenOMICC study GenOMICC was approved by the following research ethics committees: Scotland ‘A’ Research Ethics Committee (15/SS/0110) and Coventry and Warwickshire Research Ethics Committee (England, Wales and Northern Ireland) (19/WM/0247). Current and previous versions of the study protocol are available at https://genomicc.org/protocol/ . Recruitment of cases (patients with COVID-19): Patients were recruited to the GenOMICC study in 224 UK intensive care units ( https://genomicc.org ). All individuals had confirmed COVID-19 according to local clinical testing and were deemed, in the view of the treating clinician, to require continuous cardiorespiratory monitoring. In UK practice this kind of monitoring is undertaken in high-dependency or intensive care units. 100 , 000 Genomes project (100kGP: The 100,000 Genomes project was approved by the East of England—Cambridge Central Research Ethics Committee (REF 20/EE/0035). Only individuals from the 100,000 Genomes project for whom WGS data were available and who consented for their data to be used for research purposes were included in the analyses. UK Biobank study: ethical approval for the UK Biobank was previously obtained from the North West Centre for Research Ethics Committee (11/NW/0382). The work described herein was approved by UK Biobank under application number 26041. BQC-19: Each participant or their legal representative (if the participant was incapable to consent) provided informed consent to the biobank. If a participant regained capacity to give consent, informed consent was obtained again directly from the participant. The study was approved by the Jewish General Hospital and Centre Hospitaler de l’Université de Montréal institutional review boards. DeCOI: Informed consent was obtained from each participant or the legal representative. DeCOI received ethical approval by the Ethical Review Board (ERB) of the participating hospitals/centres (Technical University Munich, Munich, Germany; Medical Faculty Bonn, Bonn, Germany; Medical Board of the Saarland, Germany; University Duisburg-Essen, Germany; Medical Faculty Duesseldorf, Duesseldorf, Germany) Penn medicine (PMBB): Recruitment of PMBB participants was approved under IRB protocol 813913 and supported by Perelman School of Medicine at University of Pennsylvania. POLCOVID-Genomika (POLCOVID): All study participants provided written informed consent and received detailed information on the study and associated risk before enrollment. The study was approved by the Bioethics Committee of the Medical University of Bialystok. Swedish Biobank: Informed consent was obtained for all study participants. The study was approved by the National Ethical Review Agency (Sweden) (No. 2020–01623). 23andMe study: participants in this study were recruited from the customer base of 23andMe, a personal genetics company. All individuals included in the analyses provided informed consent and answered surveys online according to the 23andMe protocol for research in humans, which was reviewed and approved by Ethical and Independent Review Services, a private institutional review board ( http://www.eandireview.com ). Case and control cohorts The case cohort comprises patients recruited to the GenOMICC study with confirmed COVID-19 which were deemed to require continuous cardiorespiratory monitoring by their treating clinician. The control cohort comprises: A mild cohort, consisting of individuals with confirmed COVID-19 who experienced mild symptoms: - participants recruited as part of the Real-time assessment of community transmission (REACT) study, and - volunteers enrolled via a microsite who were required to self-report the details of a positive COVID-19 test, on the basis of having experienced mild (non-hospitalised) or asymptomatic COVID-19. Participants from the 100,000 Genomes Project (100kGP) with demographic characteristics as shown in Kousathanas et al. (2022) 2 . For the 100kGP, we excluded individuals with haematological cancers and those who had tested positive at least once, had not tested negative on the same day, and whose test results came from hospital A&E. For the main GWAS, we utilized the full set of controls to maximize power and employed the mild cohort for validation checks. For the gene-based rare variant analyses (RVAT), we utilized a subset of 16,668 controls (of which 11,268 were mild COVID-19 cases and 5,400 individuals were from the 100kGP) that were processed with the same alignment and variant calling pipeline as the case cohort. Cohort breakdowns are shown in Fig. 1 . Genotype calling For the critical and mild COVID-19 cohorts, sequencing data alignment and variant calling were conducted using Genomics England pipeline 2.0 with DRAGEN software (v.3.2.22), aligned to the GRCh38 reference genome with decoy and alternative haplotypes (ALT-aware). Genomes from the 100,000 Genomes Project cohort were processed using the Illumina NSV4 Whole Genome Sequencing Workflow (v.2.6.53.23) with the iSAAC Aligner and Starling Small Variant Caller, aligned to Homo Sapiens NCBI GRCh38 with decoys. A subset of genomes from the cancer program of the 100,000 Genomes Project was reprocessed with the COVID-19 cohort’s DRAGEN pipeline (v.3.2.22) and were included as additional controls in rare variant analyses that were more sensitive to potential batch effects than common variant analyses. Aggregation We aggregated the genomic data using the Dragen gvcfgenotyper version 3.8.1 for DRAGEN processed genomes and with Illumina’s gvcfgenotyper v.2019.02.26 for Illumina NSV4 processed genomes. Variants were normalised with vt v.0.57721. Sample-QC We employed the sample-QC protocol from Kousathanas et al. (2022) 2 by removing samples that failed four BAM-level quality control filters: freemix contamination > 3%, mean autosomal coverage < 25×, per cent mapped reads 5%. We also computed additional metrics: ratio of heterozygous to homozygous genotypes, ratio of insertions to deletions, ratio of transitions to transversions, total deletions, total insertions, total heterozygous SNPs, total homozygous SNPs, total transitions and total transversions. We required that samples were within 4 median absolute deviations (MADs) of the median of each of these statistics and removed outliers. We required concordant phenotypic and genetically inferred sex, either ‘XX’ or ‘XY’ and filtered for unrelated participants (KING-robust pairwise kinship < 0.0442). Site-QC Following Kousathanas et al. (2022) 2 , we masked low-quality genotypes by setting them to missing using the bcftools setGT module: For autosomes, we masked genotypes having DP < 10 or genotype quality ( GQ ) < 20 or heterozygote genotypes failing an ABratio binomial test with P-value < 10 − 3 . For chrX, we masked females as for autosomes. We masked male genotypes having DP < 5 or GQ 2% for GWAS analysis and > 5% for rare variant aggregate testing analysis. Genetic ancestry prediction To infer the genetic ancestry of each individual we followed the same approach as in Kousathanas et al. (2022) 2 . Briefly, we performed principal component analysis (PCA) on unrelated 1KGP3 individuals with GCTA v.1.93.1 beta using high-quality common SNPs, and inferred the fist 20 PCs. We subsequently calculated loadings for each SNP, and used these to project individuals onto the 1KGP3 PCs. We then trained a random forest algorithm from the R package randomForest with the first 10 1KGP3 PCs as features and the super-population ancestry of each individual as labels. These were ‘AFR’ for individuals of African ancestry, ‘AMR’ for individuals of American ancestry, ‘EAS’ for individuals of East Asian ancestry, ‘EUR’ for individuals of European ancestry and ‘SAS’ for individuals of South Asian ancestry. We assigned individuals to a super-population when class probability ≥ 0.8. Individuals for whom no class had probability ≥ 0.8 were not included in the analyses. PCA Following Kousathanas et al. (2022) 2 , we used sets of high quality independent sites to perform PCA using GCTA 34 separately for common (maf ≥ 0.05) and rare (mac ≥ 5 & maf < 0.01) variants and for each of the four population cohorts based on predicted ancestry (AFR, EAS, EUR, SAS), using unrelated individuals. We projected the inferred PCs from unrelated individuals to the entire cohorts for the first 20 common and the first 20 rare variant PCs. Genome-wide association study analyses (GWAS) We performed GWAS analysis using SAIGE version 1.07 12 for each ancestry for sites with maf > 0.005. We used sex, age, age 2 , age × sex, the first 20 principal components from high quality common variants. GWAS site-QC We filtered variants showing evidence for differential missingness or deviations from Hardy-Weinberg equilibrium using plink version 1.9 and with P < 10 − 5 and midP < 10 − 6 , respectively. For analyses using all the 100kGP controls (ctrl-all), we performed an additional control-control comparison to filter sites. 100kGP consisted of individuals processed with two different pipelines, the NSV4 Illumina pipeline and the Genomics England pipeline 2.0. We compared genomic data for a subset of these for which we had sequenced with both platforms and retained variants with relative allele frequency difference between platforms < 1%. Multi-ancestry and multi-study meta-analyses We performed an inverse-variance weighted meta-analysis across all ancestries in our cohort (GenOMICC severe vs ctrl-all) using METAL software 35 . Then, we meta-analysed the results of GenOMICC with HGIv7 critical (A2) leave-GenOMICC-out summary statistics (REF) and 23andme respiratory support summary statistics using the same methodology. Each meta-analysis summary statistics were adjusted with genomic control using METAL. Identification of significant and novel loci We identified significant loci and their lead variants with a recursive procedure: (1) ranked variants by increasing P -value in the multi-study meta-analysis, (2) selected top variant as lead, (3) removed all variants within ±1Mbp of the top variant signal, (4) re-ranked remaining variants, (5) repeated steps 1-4 until no variants remained with P < 5 × 10 − 8 . For the identified lead variants we additionally required nominal significance ( P < 0.05) and consistency in the direction of effect with the GenOMICC severe versus ctrl-mld GWAS meta-analysis. Sentinel variants passing all these criteria are shown in Supplementary Material. We identified novel signals by filtering for genome-wide significant signals that were outside ±1Mbp of the lead variants identified by the latest critical COVID-19 meta-analyses (Pairo-Castineira et al. 2023 3 and HGI version 7 - phenotype A2 13 . The extended MHC/HLA region (GRCh38, chr6:25,726,063-33,400,644) 36 was treated as a single locus. Fine-mapping We used MESuSiE 16 to fine-map locus signals from the multi-study meta-analysis that contained genome-wide significant ( P < 5 × 10 − 8 ) variants in the GenOMICC severe vs ctrl-all GWAS meta-analysis. This allowed us to use the LD from the same population samples as those that generated the GWAS summary statistics, increasing precision. Fine-mapping followed these steps: (1): We performed LD-clumping on GenOMICC severe vs ctrl-all ancestry-specific (AFR, EAS, EUR, SAS) and meta-analysis GWAS summary statistics; (2) we took the lead variants from LD-clumps falling within multi-study meta-analysis loci and defined regions +-500Kbp on each side of the lead variants while merging regions that overlapped; (3) we extracted pairwise LD (r) among all variants and summary statistics across AFR, EAS, EUR, SAS for each region; (4) we performed checks for LD mismatches with MESuSiE kriging rss function and removed SNPs that satisfied either of the following criteria: (a) logLR ≥ 2 and | Z | ≥ 2, (b) z stddiff > 3; (5) we then ran fine-mapping with MESuSiE for each region with defaults: L =10, min abs( r )=0.5, posterior probability for CS ≥ 0.95. We did not perform fine-mapping of the signal in the MHC region. Polygenic risk score (PRS) analysis We calculated a Polygenic risk score (PRS) using the GenOMICC severe vs ctrl-mld cohort. We randomly allocated 80% of European individuals into a training cohort to construct the PRS and used the remaining 20% as a test cohort. We performed a GWAS with training data with the same covariates as the main GWAS. We trained the PRS with LDpred2 37 using the bigsnpR package 38 and the severe vs ctrl-mld cohort European genotypes as reference panel. We calculated a null model with a logistic regression of the gwas covariates (age, sex, age square, aged by sex and 20 PCs), and we compared it to a model adding the PRS. ROC curves and AUC were calculated using the pROC R package 39 . Gene-based rare variant analysis (RVAT) Variant annotation and masks We annotated all variants with variant consequences in the aggregated genomic data mild COVID-19 using VEP version 105 17 . We also annotated all variants with the Combined Annotation-Dependent Depletion pathogenicity score tool (CADD) version 1.6 40 . We filtered variant consequences for the canonical transcript. We defined two masks strict_LoF and mild_LoF . The strict_LoF mask included High confidence Loss of function (HC-LOF) variants as annotated by LOFTEE 41 . The mild_LoF mask included the strict_LoF mask variants and missense and protein-altering mutations that had CADD > = 10. Site-QC We filtered for sites with minor allele frequency (maf) < 0.005 and missingness < 0.05. We additionally tested for differential missingness with plink v1.9 and removed variants with P < 10 − 5 . GenOMICC analysis We performed rare variant analysis on a per ancestry basis using REGENIE version 3.2.5 18 . We used sex, age, age 2 , age × sex, the first 10 principal components from high quality common variants and the first 10 principal components from high quality rare variants. We performed burden, SKAT and ACAT-V tests as implemented in REGENIE and combined P -values across tests and allele frequency threshold classes (singletons and < 0.005) with a single omnibus test (ACAT-O) to maximise power. Multi-study meta-analysis We performed meta-analysis aggregating data from our results (GenOMICC severe vs ctrl-dgn) and other participating studies that matched the analysis plan and contributed summary statistics. Participating studies performed independent RVAT analysis for the GenOMICC phenotype definition (phenotype A2 in Covid-19 HGI was designed to match this), with cases being laboratory confirmed Covid-19 with one or more of the following outcomes: Death ECMO requirement Mechanical ventilation (i.e. intubation) requirement Non-invasive ventilation requirement (i.e. new requirement for BiPAP or CPAP) High-flow oxygen therapy requirement (e.g. Optiflow) and controls being every other participant in each cohort that is not a case. For meta-analysing P -values across studies, we used Stouffer’s weighted Z-score method and weighted with the effective sample size of each study calculated as 4 / (1 / ( N case + N control )). Structural analysis for SLC50A1 SLC50A1 (UniProt ID Q9BRV3) was modelled using AlphaFold2 42 as implemented on the Google Colab webserver 43 . β -D-glucopyranose was computationally docked to AlphaFold models of SLC50A1 using GN-INA (estimated free energy of binding − 6.64 and − 6.77 kcal/mol for wild type and Arg201Leu SLC50A1, respectively) as implemented on the Tamarind Bio webserver 44 . The effect of the Arg201Leu mutation on SLC50A1 stability was calculated using the AlphaFold model of SLC50A1 and mCSM (predicted stability change ΔΔ G − 1.379 kcal/mol, destabilizing) 45 . Structure figures were prepared using the PyMOL Molecular Graphics System 2.6 from Schrödinger, LLC. C-terminal residues Glu211-Thr221 are not shown for simplicity in these figures. Data Availability All other data produced in the present study are available upon reasonable request to the authors https://genomicc.org/data Code availability Analysis plan for rare variant analysis and code shared across participating studies is publicly available at GitHub: https://github.com/genomicsengland/COVID19_GenOMICC_AVT_analysis . Acknowledgements We thank the patients and their loved ones who volunteered to contribute to this study, and the research staff in every intensive care unit who recruited patients. GenOMICC was funded by Sepsis Research (the Fiona Elizabeth Agnew Trust), the Intensive Care Society, a Wellcome Trust Senior Research Fellowship (J.K.Baillie, 223164/Z/21/Z), the Department of Health and Social Care (DHSC), Illumina, LifeArc, the Medical Research Council, UKRI, a BBSRC Institute Strategic Program Support Grant to the Roslin Institute (BBS/E/D/20002172, BBS/E/D/10002070 and BBS/E/D/30002275) and UKRI grants MC PC 20004, MC PC 19025, MC PC 1905, and MRNO2995X/1. ADB acknowledges funding from the Wellcome PhD training fellowship for clinicians (204979/Z/16/Z), the Edinburgh Clinical Academic Track (ECAT) programme. This research is supported in part by the Data and Connectivity National Core Study, led by Health Data Research UK in partnership with the Office for National Statistics and funded by UK Research and Innovation (grant ref MC PC 20029). This study owes a great deal to the National Institute for Healthcare Research Clinical Research Network (NIHR CRN) and the Chief Scientist’s Office (Scotland), who facilitate recruitment into research studies in NHS hospitals, and to the global ISARIC and InFACT consortia. This work forms part of the translational research portfolio of the National Institute for Health and Care Research Barts Biomedical Research Centre. T.M. is supported by Cancer Research UK grant DRCRPG-May23/100002 to C. Siebold. Genomics England: This research was made possible through access to data in the National Genomic Research Library 46 , which is managed by Genomics England Limited (a wholly owned company of the Department of Health and Social Care). The National Genomic Research Library ( https://www.genomicsengland.co.uk/research ) holds data provided by patients and collected by the NHS as part of their care and data collected as part of their participation in research. The National Genomic Research Library is funded by the National Institute for Health Research and NHS England. The Wellcome Trust, Cancer Research UK and the Medical Research Council have also funded research infrastructure. REACT: National Institute for Health and Care Research (NIHR) and UK Research and Innovation (UKRI) - REACT-Genomics England (REACT-GE) (MR/V030841/1) and REACT-Long COVID (REACT-LC) (COV-LT-0040). The REACT study was funded by the UK Department of Health and Social Care with supplemental funding from the Huo Family Foundation. For the purpose of open access, the author has applied a CC BY public copyright licence to any Author Accepted Manuscript version arising from this submission. Footnotes ↵ * A list of authors and their affiliations appears at the end of the paper. References [1]. ↵ Pairo-Castineira , E. et al. Genetic mechanisms of critical illness in COVID-19 . Nature 591 , 92 – 98 ( 2020 ). OpenUrl PubMed [2]. ↵ Kousathanas , A. et al. Whole-genome sequencing reveals host factors underlying critical COVID-19 . Nature 607 , 97 – 103 ( 2022 ). OpenUrl CrossRef PubMed [3]. ↵ Pairo-Castineira , E. et al. GWAS and meta-analysis identifies 49 genetic variants underlying critical COVID-19 . Nature 617 , 1 – 15 ( 2023 ). OpenUrl [4]. ↵ Abani , O. et al. Baricitinib in patients admitted to hospital with COVID-19 (RECOVERY): A ran-domised, controlled, open-label, platform trial and updated meta-analysis . The Lancet 400 , 359 – 368 ( 2022 ). OpenUrl [5]. ↵ Wang , H. et al. Estimating excess mortality due to the covid-19 pandemic: a systematic analysis of covid-19-related mortality, 2020–21 . Lancet (London, England) 399 , 1513 ( 2022 ). URL https://pmc.ncbi.nlm.nih.gov/articles/PMC8912932/ . OpenUrl CrossRef PubMed [6]. ↵ COVID-19 Host Genetics Initiative . Mapping the human genetic architecture of COVID-19 . Nature 600 , 472 – 477 ( 2021 ). OpenUrl CrossRef PubMed [7]. ↵ Horowitz , J. E. et al. Genome-wide analysis provides genetic evidence that ACE2 influences COVID-19 risk and yields risk scores associated with severe disease . Nature Genetics 1 – 11 ( 2022 ). [8]. ↵ Crossfield , S. S. , Chaddock , N. J. , Iles , M. M. , Pujades-Rodriguez , M. & Morgan , A. W. Interplay between demographic, clinical and polygenic risk factors for severe covid-19 . International Journal of Epidemiology 51 , 1384 – 1395 ( 2022 ). URL doi: 10.1093/ije/dyac137 . OpenUrl CrossRef PubMed [9]. ↵ Fritsche , L. G. et al. Uncovering associations between pre-existing conditions and covid-19 severity: A polygenic risk score approach across three large biobanks . PLOS Genetics 19 , e1010907 ( 2023 ). URL https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1010907 . OpenUrl [10]. ↵ Kosmicki , J. A. et al. Pan-ancestry exome-wide association analyses of COVID-19 outcomes in 586,157 individuals . American Journal of Human Genetics 108 , 1350 – 1355 ( 2021 ). OpenUrl PubMed [11]. ↵ Butler-Laporte , G. et al. Exome-wide association study to identify rare variants influencing covid-19 outcomes: Results from the host genetics initiative . PLOS Genetics 18 , e1010367 ( 2022 ). URL https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1010367 . OpenUrl PubMed [12]. ↵ Zhou , W. et al. Efficiently controlling for case-control imbalance and sample relatedness in large-scale genetic association studies . Nature Genetics 50 , 1335 – 1341 ( 2018 ). OpenUrl CrossRef PubMed [13]. ↵ Kanai , M. et al. A second update on mapping the human genetic architecture of covid-19 . Nature 2023 621:7977 621 , E7 – E26 ( 2023 ). URL https://www.nature.com/articles/s41586-023-06355-3 . OpenUrl CrossRef PubMed [14]. ↵ Mountjoy , E. et al. An open approach to systematically prioritize causal variants and genes at all published human gwas trait-associated loci . Nature Genetics 2021 53:11 53 , 1527 – 1533 ( 2021 ). URL https://www.nature.com/articles/s41588-021-00945-5 . OpenUrl CrossRef PubMed [15]. ↵ Kanai , M. et al. Meta-analysis fine-mapping is often miscalibrated at single-variant resolution . Cell genomics 2 ( 2022 ). [16]. ↵ Gao , B. & Zhou , X. Mesusie enables scalable and powerful multi-ancestry fine-mapping of causal variants in genome-wide association studies . Nature Genetics 2024 56:1 56 , 170 – 179 ( 2024 ). OpenUrl CrossRef PubMed [17]. ↵ McLaren , W. et al. The ensembl variant effect predictor . Genome biology 17 , 122 ( 2016 ). OpenUrl CrossRef PubMed [18]. ↵ Mbatchou , J. et al. Computationally efficient whole-genome regression for quantitative and binary traits . Nature Genetics 2021 53:7 53 , 1097 – 1103 ( 2021 ). OpenUrl CrossRef PubMed [19]. ↵ Chen , L. Q. et al. Sugar transporters for intercellular exchange and nutrition of pathogens . Nature 468 , 527 – 532 ( 2010 ). URL https://pubmed.ncbi.nlm.nih.gov/21107422/ . OpenUrl CrossRef PubMed Web of Science [20]. ↵ Forrest , A. R. R. , Kawaji , H. , Rehli , M. , Baillie , J.K. , et al. A promoter-level mammalian expression atlas . Nature 507 , 462 – 470 ( 2014 ). URL https://pubmed.ncbi.nlm.nih.gov/24670764/ . OpenUrl CrossRef PubMed Web of Science [21]. ↵ Viktorova , E. G. et al. The development of resistance to an inhibitor of a cellular protein reveals a critical interaction between the enterovirus protein 2c and a small gtpase arf1 . PLOS Pathogens 19 , e1011673 ( 2023 ). URL https://journals.plos.org/plospathogens/article?id=10.1371/journal.ppat.1011673 . OpenUrl PubMed [22]. ↵ Martínez , J. L. et al. The guanine nucleotide exchange factor gbf1 participates in rotavirus replication . Journal of Virology 93 , e01062 – 19 ( 2019 ). URL doi: 10.1128/JVI.01062-19 . OpenUrl Abstract / FREE Full Text [23]. ↵ Võsa , U. et al. Large-scale cis- and trans-eqtl analyses identify thousands of genetic loci and polygenic scores that regulate blood gene expression . Nature Genetics 2021 53:9 53 , 1300 – 1310 ( 2021 ). URL https://www.nature.com/articles/s41588-021-00913-z . OpenUrl CrossRef PubMed [24]. ↵ Uchil , P. D. , Quinlan , B. D. , Chan , W.-T. , Luna , J. M. & Mothes , W. Trim e3 ligases interfere with early and late stages of the retroviral life cycle . PLoS pathogens 4 , e16 ( 2008 ). OpenUrl CrossRef PubMed [25]. ↵ Hirschenberger , M. et al. Arf1 prevents aberrant type i interferon induction by regulating sting activation and recycling . Nature Communications 2023 14:1 14 , 1 – 20 ( 2023 ). URL https://www.nature.com/articles/s41467-023-42150-4 . OpenUrl CrossRef PubMed [26]. ↵ Lee , Y. , Song , B. , Park , C. & Kwon , K.-S. Trim11 negatively regulates ifneta production and antiviral activity by targeting tbk1 . PloS one 8 , e63255 ( 2013 ). OpenUrl CrossRef PubMed [27]. ↵ Repurposed Antiviral Drugs for Covid-19 — Interim WHO Solidarity Trial Results . New England Journal of Medicine 0 , null ( 2020 ). [28]. ↵ Russell , C. D. , Lone , N. I. & Baillie , J. K. Comorbidities, multimorbidity and covid-19 . Nature medicine 29 , 334 – 343 ( 2023 ). OpenUrl CrossRef PubMed [29]. ↵ Hart , G. T. , Hogquist , K. A. & Jameson , S. C. Krüppel-like factors in lymphocyte biology . The Journal of Immunology 188 , 521 – 526 ( 2012 ). URL doi: 10.4049/jimmunol.1101530 . OpenUrl Abstract / FREE Full Text [30]. ↵ Cao , Z. , Sun , X. , Icli , B. , Wara , A. K. & Feinberg , M. W. Role of krüppel-like factors in leukocyte development, function, and disease . Blood 116 , 4404 – 4414 ( 2010 ). URL doi: 10.1182/blood-2010-05-285353 . OpenUrl Abstract / FREE Full Text [31]. ↵ Wang , A. et al. Klf13 promotes sle pathogenesis by modifying chromatin accessibility of key proinflammatory cytokine genes . Communications biology 7 , 1446 ( 2024 ). OpenUrl PubMed [32]. ↵ Berger , W. , Steiner , E. , Grusch , M. , Elbling , L. & Micksche , M. Vaults and the major vault protein: Novel roles in signal pathway regulation and immunity . Cellular and Molecular Life Sciences 66 , 43 – 61 ( 2009 ). URL https://link.springer.com/article/10.1007/s00018-008-8364-z . OpenUrl CrossRef PubMed Web of Science [33]. ↵ Knight , S. R. et al. Risk stratification of patients admitted to hospital with covid-19 using the ISARIC WHO Clinical Characterisation Protocol: Development and validation of the 4C Mortality Score . BMJ 370 , m3339 ( 2020 ). OpenUrl Abstract / FREE Full Text [34]. ↵ Yang , J. , Lee , S. H. , Goddard , M. E. & Visscher , P. M. GCTA: A tool for genome-wide complex trait analysis . American journal of human genetics 88 , 76 – 82 ( 2011 ). OpenUrl CrossRef PubMed [35]. ↵ Willer , C. J. , Li , Y. & Abecasis , G. R. METAL: Fast and efficient meta-analysis of genomewide association scans . Bioinformatics (Oxford, England) 26 , 2190 – 2191 ( 2010 ). OpenUrl CrossRef PubMed Web of Science [36]. ↵ Horton , R. et al. Gene map of the extended human MHC . Nat Rev Genet 5 , 889 – 899 ( 2004 ). OpenUrl CrossRef PubMed Web of Science [37]. ↵ Privé , F. , Arbel , J. & Vilhjálmsson , B. J. LDpred2: better, faster, stronger . Bioinformatics 36 , 5424 – 5431 ( 2020 ). https://academic.oup.com/bioinformatics/article-pdf/36/22-23/5424/50715856/btaa1029.pdf . OpenUrl [38]. ↵ Privé , F. , Aschard , H. , Ziyatdinov , A. & Blum , M. G. Efficient analysis of large-scale genome-wide data with two R packages: bigstatsr and bigsnpr . Bioinformatics 34 , 2781 – 2787 ( 2018 ). OpenUrl CrossRef PubMed [39]. ↵ Robin , X. et al. proc: an open-source package for r and s+ to analyze and compare roc curves . BMC bioinformatics 12 , 77 ( 2011 ). OpenUrl CrossRef PubMed [40]. ↵ Rentzsch , P. , Witten , D. , Cooper , G. M. , Shendure , J. & Kircher , M. CADD: Predicting the deleteriousness of variants throughout the human genome . Nucleic Acids Research 47 , D886 – D894 ( 2018 ). OpenUrl [41]. ↵ Karczewski , K. J. et al. The mutational constraint spectrum quantified from variation in 141,456 humans . Nature 581 , 434 – 443 ( 2020 ). OpenUrl CrossRef PubMed [42]. ↵ Jumper , J. et al. Highly accurate protein structure prediction with alphafold . Nature 596 , 583 – 589 ( 2021 ). URL https://pubmed.ncbi.nlm.nih.gov/34265844/ . OpenUrl CrossRef PubMed [43]. ↵ Mirdita , M. et al. Colabfold: making protein folding accessible to all . Nature methods 19 , 679 – 682 ( 2022 ). URL https://pubmed.ncbi.nlm.nih.gov/35637307/ . OpenUrl PubMed [44]. ↵ McNutt , A. T. et al. Gnina 1.0: molecular docking with deep learning . Journal of cheminformatics 13 ( 2021 ). URL https://pubmed.ncbi.nlm.nih.gov/34108002/ . [45]. ↵ Pires , D. E. , Ascher , D. B. & Blundell , T. L. mcsm: predicting the effects of mutations in proteins using graph-based signatures . Bioinformatics (Oxford, England) 30 , 335 – 342 ( 2014 ). URL https://pubmed.ncbi.nlm.nih.gov/24281696/ . OpenUrl CrossRef PubMed Web of Science [46]. ↵ National Genomic Research Library ( 2017 ). View the discussion thread. Back to top Previous Next Posted November 21, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Whole genome sequence meta-analyses reveal common and rare genetic associations with critical COVID-19 Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Whole genome sequence meta-analyses reveal common and rare genetic associations with critical COVID-19 Athanasios Kousathanas , Konrad Rawlik , Erola Pairo-Castineira , Fiona Griffiths , Wilna Oosthuyzen , Sara Clohisey Hendry , Tomas Malinauskas , Guillaume Butler-Laporte , Prabhu Arumugam , Colin Begg , Marc Chadeau-Hyam , Georgia Chan , Graham Cooke , Sally Donovan , Greg Elgar , Tom A. Fowler , Peter Goddard , Charles Hinds , Peter Horby , Lowell Ling , Emma F. Magavern , Fiona Maleady-Crowe , Hugh Montgomery , Christopher A. Odhams , Peter J.M. Openshaw , Christine Patch , Augusto Rendon , Shahla Salehi , Richard H. Scott , Malcolm G Semple , Manu Shankar-Hari , Afshan Siddiq , Alex Stuckey , Charlotte Summers , Linda Todd , Susan Walker , Timothy Walsh , Helen Ward , Tala Zainy , GenOMICC Investigators , ISARIC4C Investigators , BQC19 Investigators , GENCOVID Investigators , DeCOI Investigators , POLCOVID Investigators , PMBB Investigators , Angie Fawkes , Lee Murphy , Andy Law , Veronique Vitart , Patrick F Chinnery , James F Wilson , Matthew A. Brown , Paul Elliott , Loukas Moutsianas , Mark J. Caulfield , J. Kenneth Baillie medRxiv 2025.11.19.25340573; doi: https://doi.org/10.1101/2025.11.19.25340573 Share This Article: Copy Citation Tools Whole genome sequence meta-analyses reveal common and rare genetic associations with critical COVID-19 Athanasios Kousathanas , Konrad Rawlik , Erola Pairo-Castineira , Fiona Griffiths , Wilna Oosthuyzen , Sara Clohisey Hendry , Tomas Malinauskas , Guillaume Butler-Laporte , Prabhu Arumugam , Colin Begg , Marc Chadeau-Hyam , Georgia Chan , Graham Cooke , Sally Donovan , Greg Elgar , Tom A. Fowler , Peter Goddard , Charles Hinds , Peter Horby , Lowell Ling , Emma F. Magavern , Fiona Maleady-Crowe , Hugh Montgomery , Christopher A. Odhams , Peter J.M. Openshaw , Christine Patch , Augusto Rendon , Shahla Salehi , Richard H. Scott , Malcolm G Semple , Manu Shankar-Hari , Afshan Siddiq , Alex Stuckey , Charlotte Summers , Linda Todd , Susan Walker , Timothy Walsh , Helen Ward , Tala Zainy , GenOMICC Investigators , ISARIC4C Investigators , BQC19 Investigators , GENCOVID Investigators , DeCOI Investigators , POLCOVID Investigators , PMBB Investigators , Angie Fawkes , Lee Murphy , Andy Law , Veronique Vitart , Patrick F Chinnery , James F Wilson , Matthew A. Brown , Paul Elliott , Loukas Moutsianas , Mark J. Caulfield , J. Kenneth Baillie medRxiv 2025.11.19.25340573; doi: https://doi.org/10.1101/2025.11.19.25340573 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Intensive Care and Critical Care Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4421) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15212) Forensic Medicine (30) Gastroenterology (1121) Genetic and Genomic Medicine (6581) Geriatric Medicine (667) Health Economics (996) Health Informatics (4520) Health Policy (1366) Health Systems and Quality Improvement (1611) Hematology (539) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15906) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (667) Neurology (6580) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1141) Occupational and Environmental Health (956) Oncology (3324) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5432) Public and Global Health (9212) Radiology and Imaging (2193) Rehabilitation Medicine and Physical Therapy (1368) Respiratory Medicine (1194) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ff4055f6e3a52ad',t:'MTc3OTM3MTEyMA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.