A semi-empirical Bayes approach for calibrating weak instrumental bias in sex-specific Mendelian randomization studies

doi:10.1101/2025.01.02.25319889

A semi-empirical Bayes approach for calibrating weak instrumental bias in sex-specific Mendelian randomization studies

2025 · doi:10.1101/2025.01.02.25319889

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

⚙ AI-generated deep summary by claude@2026-06, 2026-06-24 · read from full text ⓘ

The paper develops and evaluates a semi-empirical Bayesian method for sex-specific Mendelian randomization (MR) that calibrates weak instrumental variable bias when one sex group (especially females) has smaller sample sizes. Using the Million Veteran Program, the authors first conducted sex-specific GWAS for multiple suboptimal-sleep phenotypes (insomnia, obstructive sleep apnea, short/long sleep duration, and excessive daytime sleepiness) and then proposed a framework that shares information across sexes to shrink variant–phenotype effects and uses these calibrated estimates in MR; simulations show improved efficiency compared with conventional approaches. They apply the method to estimate causal effects of sleep phenotypes on cardiovascular-related outcomes using sex-specific data from MVP and All of Us, finding notable sex differences, particularly involving obstructive sleep apnea and chronic kidney disease and effects of long sleep duration across several cardiovascular outcomes. A key caveat explicitly noted is that sex-specific analysis can suffer from weaker instruments when female sample sizes are limited, which the proposed method is designed to mitigate but may not fully overcome. The paper does not explicitly discuss endometriosis or adenomyosis; it was included in the corpus via a keyword match in the upstream search index.

Read from the paper's body, not the abstract. Not a substitute for reading the paper. No clinical advice. How this works

Full text 75,539 characters · extracted from preprint-html · click to expand

A semi-empirical Bayes approach for calibrating weak instrumental bias in sex-specific Mendelian randomization studies | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search A semi-empirical Bayes approach for calibrating weak instrumental bias in sex-specific Mendelian randomization studies View ORCID Profile Yu-Jyun Huang , Nuzulul Kurniansyah , View ORCID Profile Daniel F. Levey , View ORCID Profile Joel Gelernter , Jennifer E. Huffman , Kelly Cho , View ORCID Profile Peter W.F. Wilson , Daniel J. Gottlieb , Kenneth M. Rice , View ORCID Profile Tamar Sofer VA Million Veteran Program doi: https://doi.org/10.1101/2025.01.02.25319889 Yu-Jyun Huang 1 Department of Medicine, Harvard Medical School , Boston, MA, USA 2 CardioVascular Institute (CVI), Beth Israel Deaconess Medical Center , Boston, MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yu-Jyun Huang For correspondence: yhuang9{at}bidmc.harvard.edu Nuzulul Kurniansyah 3 Department of Medicine, Brigham and Women’s Hospital , Boston, MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Daniel F. Levey 4 Division of Human Genetics, Department of Psychiatry, Yale University School of Medicine , New Haven, CT, USA 5 Department of Psychiatry, Veterans Affairs Connecticut Healthcare Center , West Haven, CT, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Daniel F. Levey Joel Gelernter 4 Division of Human Genetics, Department of Psychiatry, Yale University School of Medicine , New Haven, CT, USA 5 Department of Psychiatry, Veterans Affairs Connecticut Healthcare Center , West Haven, CT, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Joel Gelernter Jennifer E. Huffman 6 Massachusetts Veterans Epidemiology Research and Information Center , VA Healthcare System, Boston, MA, USA 7 VA Palo Alto Health Care System , Palo Alto, CA, USA 8 Palo Alto Veterans Institute for Research , Palo Alto, CA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kelly Cho 6 Massachusetts Veterans Epidemiology Research and Information Center , VA Healthcare System, Boston, MA, USA 9 Division of Aging, Department of Medicine, Brigham and Women’s Hospital and Harvard Medical School , Boston, MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Peter W.F. Wilson 10 Atlanta VA Healthcare System , Decatur, GA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Peter W.F. Wilson Daniel J. Gottlieb 3 Department of Medicine, Brigham and Women’s Hospital , Boston, MA, USA 6 Massachusetts Veterans Epidemiology Research and Information Center , VA Healthcare System, Boston, MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kenneth M. Rice 11 Department of Biostatistics, University of Washington , Seattle, WA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tamar Sofer 1 Department of Medicine, Harvard Medical School , Boston, MA, USA 2 CardioVascular Institute (CVI), Beth Israel Deaconess Medical Center , Boston, MA, USA 3 Department of Medicine, Brigham and Women’s Hospital , Boston, MA, USA 12 Department of Biostatistics, Harvard T.H. Chan School of Public Health , Boston, MA, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tamar Sofer Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Strong sex differences exist in sleep phenotypes and also cardiovascular diseases (CVDs). However, sex-specific causal effects of sleep phenotypes on CVD-related outcomes have not been thoroughly examined. Mendelian randomization (MR) analysis is a useful approach for estimating the causal effect of a risk factor on an outcome of interest when interventional studies are not available. We first conducted sex-specific genome-wide association studies (GWASs) for suboptimal-sleep phenotypes (insomnia, obstructive sleep apnea (OSA), short and long sleep durations, and excessive daytime sleepiness) utilizing the Million Veteran Program (MVP) dataset. We then developed a semi-empirical Bayesian framework that (i) calibrates variant-phenotype effect estimates by leveraging information across sex groups, and (ii) applies shrinkage sex-specific effect estimates in MR analysis, to alleviate weak instrumental bias when sex groups are analyzed in isolation. Simulation studies demonstrate that the causal effect estimates derived from our framework are substantially more efficient than those obtained through conventional methods. We estimated the causal effects of sleep phenotypes on CVD-related outcomes using sex-specific GWAS data from the MVP and All of Us. Significant sex differences in causal effects were observed, particularly between OSA and chronic kidney disease, as well as long sleep duration on several CVD-related outcomes. By applying shrinkage estimates for instrumental variable selection, we identified multiple sex-specific significant causal relationships between OSA and CVD-related phenotypes. Introduction Investigating sex differences in health and disease mechanisms is a leading public health research priority ( 1 – 3 ). Sex differences are evident in various health conditions, including suboptimal-sleep phenotypes and cardiovascular diseases (CVD). For example, there is a higher prevalence of insomnia in women ( 4 , 5 ), whereas obstructive sleep apnea (OSA) is more common in men ( 6 , 7 ). Cardiovascular-related diseases, such as myocardial infarction and hypertension, generally present with a higher incidence in male adults compared to females ( 8 – 10 ). Increasing numbers of genome-wide association studies (GWASs), which like other genomic studies often analyze only autosomal chromosomes, have identified strong signals of sex differences ( 11 – 13 ). Examples include, but are not limited to, sex differences in genetic variant effect sizes ( 14 – 16 ), sex-specific genetic risk associations ( 17 – 19 ), and sex-biased gene/protein expression level ( 20 – 22 ). Sex-specific causal effects of modifiable risk exposures on outcomes can, under some conditions, be obtained via Mendelian randomization (MR) analysis ( 23 ). But because of the paucity of sex-specific interventional studies, or studies with sufficient sex-stratified sample sizes, much remains unknown about how sex-specific causal effects may inform targeted disease treatments or interventions, ultimately limiting efforts to reduce sex-disparities in health ( 24 – 26 ). MR analysis is widely used in genetic epidemiology because it can, using GWAS summary statistics alone ( 27 – 29 ), estimate causal effects from observational data. Sex-specific MR analysis, however, is limited in comparison: not only is each sex group’s sample size smaller than total in any one study, but GWAS reporting is not always sex-specific. This problem can be worse where GWAS participation is biased for structural or other reasons. For example, in the Million Veteran Program (MVP) of the U.S. Department of Veterans Affairs (VA) healthcare system, which collected genetic data and extensive phenotypes from U.S. veterans, only 10% of participants are female, in line with the proportion of female veterans. In sex-specific MR analysis, the small sample size of the female MVP population particularly limits the strength of instrumental variables (IVs) identified from female-specific GWAS, which may make causal effect estimates unstable, due to the corresponding weak IV bias ( 30 , 31 ). To address these challenges, we (i) performed sex-specific GWASs of sleep phenotypes in the MVP, (ii) developed a novel statistical approach to enhance the precision of sex-specific variant-phenotype effect estimates by leveraging information across sex groups, and (iii) integrated our new shrinkage estimator into MR analyses to improve causal effect estimates, particularly for the sex group with smaller sample sizes. Motivation for this approach comes from recent findings of high correlation, between females and males, of the genetic components of multiple traits, ( 32 , 33 ), suggesting that many (though not all) variant associations are similar between sexes. Thus, focusing on MVP where the female population is small, our approach is to borrow information from the male population in an adaptive manner to improve female-specific variant effect estimates and, ultimately, exposure-outcome causal effect estimates. The proposed approach, incorporating the spirit of both transfer learning and empirical Bayes, uses male-specific effect size estimates to specify prior distributions on the female-specific variant-exposure effect sizes (i.e., using information from the larger sample to improve power in the smaller). The inverse variance-weighted meta-analysis estimator and the adaptive weight estimator (proposed for analyzing secondary outcome in case-control studies ( 34 – 36 )) can both be derived as the posterior mean in the proposed framework. In simulation studies, compared to standard use of variant-exposure summary statistics in MR analysis, our approach improves efficiency of exposure-outcome causal effect estimates. Finally, using sex-specific data from the MVP, along with genetic association results from the All of Us (AoU) study, we applied a two-sample MR approach to estimate the causal effects of sleep phenotypes on CVD-related outcomes. Our method identified several sex-specific causal associations. Specifically, insomnia was causally associated with an increased risk of chronic kidney disease in females, long sleep was linked to a higher risk of hypertension in females, and short sleep was associated with an increased risk of coronary artery disease in males. A statistically significant sex difference in the causal effect of OSA on chronic kidney disease was also identified. In addition, using shrinkage estimates for IV selection, we detected several statistically significant causal effects of OSA on CVD-related outcomes, as well as distinct sex differences in the causal patterns of long sleep on CVD-related outcomes, with higher risks observed in female populations Results Overview of semi-empirical Bayesian method for calibrating genetic variant effect size estimates utilizing information across groups While our method is general, we focus on the need to improve the estimation of variant effect sizes in the relatively small MVP female population, and do so by borrowing information from the male population. Another simplification we make in the exposition is to focus on the “exposure” GWAS, even though the same framework can be applied to any trait GWAS, regardless of its role in an MR analysis. Throughout this paper, we use γ to represent “variant-exposure” effect size and Γ to represent “variant-outcome” effect size. To motivate our method, we consider a Bayesian prior on γ jF , the female-specific effect size of the j th SNP on the sleep phenotype, specifically where θ jF , τ jF are the prior mean and standard deviation (SD). The approximate distribution of the “raw” female-specific effect size estimate (i.e., an estimate that relies on female data only) is given by where is the estimated standard error of .The normality assumption here is appropriate due to large GWAS sample sizes ( 30 , 31 ), regardless of the specific method used for estimation (maximum likelihood, method of moments, etc.). Potential specification of the prior distribution of female-specific SNP effect sizes and resulting posterior estimates An intuitive way to borrow information from the male to the female population is to specify the prior mean and variance in equation (1) as the male-specific effect size estimate and its estimated variance. Formally the prior is leading to posterior where the posterior mean and the posterior variance . The posterior mean is a weighted average of the prior mean and sample mean, with the weight on the group-specific estimates being proportional to their precisions (i.e., the inverse of their variances). The posterior mean and variance are exactly identical to those from conventional fixed-effects inverse-variance meta-analysis of the sex-specific estimates ( ,FE meta estimate henceforth). A more detailed illustration of the Bayesian approach to meta-analysis is given by Dominguez and Rice ( 37 ). The FE meta estimate has been shown to be as efficient as pooling individual level data when effects are identical across combined studies ( 38 ), so in that setting there is no penalty to using meta-analysis over any standard competing method. However, the estimated strength of sex differences, i.e., , is not considered in the FE meta estimate, or standard competing methods, making their use unappealing when there are strong sex differences. To incorporate information on sex differences we consider the prior with: Here, the prior mean is the efficient FE meta estimate ,while the sex differences strength is incorporated in the prior variance. The posterior distribution of γ jF is Normal, with mean , where , is related to both the difference in effect size estimate and the variance of male-specific estimate. Here, is .The posterior variance can be written as . Because the posterior mean adapts to the observed differences in sex-specific estimates, we call it the adaptive posterior mean (APM) estimator .From equation (5) , the prior mean and variance depend not only on sex-specific groups but also on sex-combined effect estimates and sex differences. This is why we refer to our framework as a “semi-empirical” Bayes approach. The APM estimator is related to the adaptive weight (AW) estimator, initially proposed for gene-environment interactions or gene-secondary outcome associations in case-control studies ( 34 – 36 ). In both of those contexts, the AW estimator was developed to provide a population-level estimate (i.e., not specific to either cases or controls) by adaptively combining information from both groups using weighting. In our approach, we instead use shrinkage estimators to obtain group-specific estimates. The APM estimator also differs from the original AW in its weighting parameters. APM incorporates the variance of group-specific effect estimates into its prior variance for γ jF , to avoid underestimating posterior variance due to smaller estimated sex differences (i.e., ). In other words, if in equation (5) , then the APM estimator may reduce to AW estimator. Table 1 summarizes the derivation of the FE meta and APM estimates under the Bayesian Normal-Normal modeling scheme. As stated earlier, we focus on the calibration of the female effects ,but the framework is general and can be similarly applied to the male population. View this table: View inline View popup Download powerpoint Table 1: The proposed Bayesian framework for calibrating variant-trait effect size estimates. Exposure-outcome causal effect estimation Two-sample MR approaches estimate causal effects from two independent sets of summary statistics. These describe variant associations with an exposure and with the outcome phenotype, where the variants are selected to be valid IVs for the exposure of interest ( 30 , 39 – 42 ). A causal effect β j of exposure on outcome can then be estimated using estimated associations of variant j with exposure and outcome via the Wald ratio estimate . Using multiple IVs, the causal effect β can then be estimated by aggregating estimates from all valid IVs through various weighting approaches. Here, we assume all p variants are valid IVs. Our proposed Bayesian framework, therefore, is conceptualized as a preliminary step before applying MR methods. More specifically, we first calibrate the estimates using the proposed Bayesian models. Then, the shrinkage estimates, i.e., the posterior means and SDs in equation (4) , are used as inputs in existing MR algorithms. The causal effect is then estimated using the newly-estimated and (with corresponding posterior SD) coupled with the (raw) variant-outcome estimated effect sizes. A schematic overview of the sex-specific MR analysis and the proposed Bayesian framework is illustrated in Figure 1 . Download figure Open in new tab Figure 1: Schematic overview of the sex-specific MR analysis Panel a: A causal diagram underlying the MR framework in this manuscript. We consider sleep-related phenotypes to be the exposure factors and cardiovascular-related measures as outcome variables. β F denotes the underlying female-specific exposure-outcome causal effect; β M represents the underlying male-specific exposure-outcome causal effect. In our analysis, β F and β M are estimated separately. The GWAS summary statistics for sleep phenotypes were derived from the MVP dataset, and the summary statistics for cardiovascular-related diseases were computed in AoU; Panel b: Estimation of sex-specific causal effects using the two-sample MR approaches. The inputs for the two-sample MR methods are raw sex-specific exposure (female: and ;male: and ) and outcome GWAS summary statistics (female: and ;male: ;and ) from independent samples. The outputs are: estimated female-specific causal effect and estimated male-specific causal effect ;Panel c: The proposed Bayesian method. We first calibrate the raw sex-specific exposure effects by borrowing information from one sex group to the other, or across both sex groups. Two-sample MR analysis then uses the shrinkage exposure summary statistics, i.e., the posterior mean and posterior SD of γ (female: E { π ( γ jF |.)} and SD { π ( γ jF |.)}; male: E { π ( γ jM |.)} and SD { π ( γ jM |.)}), with the raw outcome summary statistics to provide a more robust basis for causal effect estimation. We use π ( γ |.) to denote the posterior distribution of γ . The outputs are the estimated female-specific causal effect ,and estimated male-specific causal effect ,both use shrinkage exposure effect estimates in their construction. Abbreviations. MR: Mendelian randomization; F: female; M: male; CVD: cardiovascular disease; GWAS: genome-wide association study; MVP: million veteran program; AoU: All of Us; SD: standard deviation; cML: constrained maximum likelihood. Simulation studies We used simulation primarily to evaluate and compare the performance of exposure-outcome causal effect ( β ) estimation using raw and shrinkage variant-exposure effect ( and ) with respective SD estimates, combined with each selected MR method, particularly focusing on the population with a smaller sample size (female population in our analysis). For our Bayesian methods, the posterior SDs of γ are treated as standard error estimates. We also incorporated the AW estimator as another approach for calibrating female-specific variant-exposure effect size estimates. The two-sample MR methods considered in the analyses are summarized in Table 4 (Method section). To mimic the structure of the MVP dataset, we generated 2,000 female individuals and 20,000 male individuals for the exposure GWAS, maintaining a similar proportion of females to males as in the MVP. For the outcome GWAS, we generated balanced datasets of 10,000 individuals for both female and male populations. We generated 100 independent SNPs as IVs in all simulations, with all allele frequencies set at 0.3. We use D jγ = γ jF − γ jM , j = 1,2, …, p to denote the sex differences strength in variants associations with the exposure, γ . We write D γ = { j | D jγ = γ jF − γ jM ≠ 0} and | D γ | to represent the set of variants and the number of variants having sex-differences in variant-exposure effect size, respectively. We considered three simulation scenarios: (i) fixed D jγ = 0.05 if variant j ∈ D γ (ii) random D jγ , and (iii) using MVP OSA GWAS summary statistics to guide the simulated differences D jγ , which consists of strong D jγ patterns with weak IVs in the female population (average F -statistic < 10). In simulation (ii) and (iii), all variants j ∈ D γ (i.e., | D γ | = 100). Within each simulation, we also considered a few levels of sex differences in “causal effect” settings (i.e., β F ≠ β M ). A more detailed description of the simulation studies is provided in Method section, Supplementary Note 1, Supplementary Tables 1 and 2. We summarized the estimation performance of the and in the simulation studies using two metrics: (i) the mean squared error (MSE) of the estimated effect and (ii) the 95% confidence interval’s actual coverage of the true effect, where these 95% confidence intervals for β under each MR method were computed using standard asymptotic normality properties. The female results from simulations with no sex-differences in causal effect are presented in Figure 2 and summarized below. The full simulation results, including the male-specific causal effect estimation and sex differences in causal effect simulations are summarized in Supplementary Note 1 and Supplementary Figures 1, 2 and 3. Download figure Open in new tab Figure 2: MSE and 95% confidence interval coverage rate for female-specific causal effect estimation Results from estimating female-specific causal effects β F under three simulation scenarios where there are no sex differences in causal effects (i.e., β F = β M ) across sex groups. The left panel shows the MSEs of the estimated female-specific causal effects, while the right panel presents the 95% confidence interval coverage rates of the true underlying causal effects. We considered five two-sample MR methods for estimating the causal effect, which are: MR-RAPS (RAPS), constrained maximum likelihood (cML), contaminated mixture (ConMix), weighted median (W-median), penalized and robust IVW (IVW*). In the MSE results, the un-calibrated approach is represented by the bars with darker colors. The other three shrinkage approaches are shown in gradient colors. For the coverage rate results, the un-calibrated approach is shown in the first column, and other three shrinkage approaches are shown in the second to forth column. The results of fixed sex differences in variant-exposure effect simulation are presented in panels a and b. The results of random sex differences in variant-exposure effect simulations are shown in panels c and d. The results of using MVP OSA GWAS summary statistics to guide sex-specific variant-exposure effect simulation are shown in panels e and f. The underlying true causal effect is set at β F = 0.1 in panels a, b, c, and d. The underlying true causal effects are shown in the top of panel e and bottom of panel f. MSEs were computed over 1000 simulation replicates. Intervals around the estimated MSE correspond to the MSE +/-one estimated standard error. Abbreviation: MSE: mean square error; MR: Mendelian randomization; APM: adaptive posterior mean; AW: adaptive weight; diff: different level of sex differences in variant-exposure effects; Cor: correlation between female and male variant-exposure effect; CE: causal effect; RAPS: MR-RAPS; cML: constrained maximum likelihood; ConMix: contaminated mixture; W-median: weighted medium; IVW*: penalized and robust IVW Results from simulation settings (i): fixed sex differences in variant-exposure effect sizes These results are provided in Figures 2a and 2b . The estimator performs best when D γ included only 10% of the variants (i.e., | D γ | = 10), compared to other shrinkage methods and to the raw .The estimators and had similar performance, and both gave better causal effect estimates than ,regardless of the proportion of variants with sex differences. We found that performed better than when more than 10% of variants were being selected into D γ . The coverage rate of β F using was lower than that of when many variants were included in D γ . However, always achieved a smaller MSE for β F compared with . Results from simulation settings (ii): random sex differences in variant-exposure effect sizes For random sex differences in γ ( Figures 2c and 2d ), when every variant has a sex difference, i.e., all j ∈ D γ , and most sex-difference D jγ were strong, the estimate that relies on the had a higher MSE than the one relying on raw .In contrast, using improved β F estimation performance in terms of MSE and performed similarly to .In these simulations, the APM estimator resulted in similar 95% confidence interval coverage compared to the raw approach, but achieved a smaller MSE for β F . Results from simulation settings (iii): using OSA GWAS summary statistics to guide the simulated variant-exposure effect sizes Here, all variants have substantial sex differences in γ , while the selected variants are all weak IVs, meaning that the F -statistic < 10 in the female population. Using and improved β F estimation as demonstrated by improved MSE ( Figures 2e and 2f ). In most cases, performed better .Both and approaches improved the 95% confidence interval coverage rate compared to using .These results highlight that even though less information could be transferred from the male to the female population in these simulations (due to the strong sex differences in γ ), adaptive estimates still improved causal effect estimation. Moreover, the estimates had a larger MSE than the raw approach under most settings and performed poorly when the female causal effect β F was non-null. In summary, borrowing power from the stratum with the larger to the stratum with the lower sample size, using adaptive (i.e., shrinkage) variant-exposure estimates ( and ) improves β F estimates. Using estimators and performed well in most simulation studies, regardless of the degree of sex differences D γj . The estimator performs best when the underlying true γ were similar in the two groups. Among the two-sample MR methods considered, MR-RAPS, known to perform well when weak instruments are used, demonstrated at least no worse performance than other methods. Secondary simulation studies Supplementary Note 2 and Supplementary Figures 4-14 provide results from secondary simulation studies. We expanded upon simulation settings (i), and further examined two scenarios: (a) estimation at the presence of pleiotropic effects of some IVs and (b) calibration of both γ and Γ effect estimates using the proposed framework. We also considered an increased sample size of the female population in the exposure GWAS to simulate a scenario where borrowing information from the male group may be less useful. Lastly, we evaluated the performance of a test for sex differences, i.e., a test of the null hypothesis H 0 : β F = β M , using the estimated β F and β M based on various MR approaches (but always using the raw variant-exposure and variant-outcome associations ). In brief, and substantially improved the estimation accuracy of γ (Supplementary Figures 4-6) compared to the in all settings and, under substantial sex differences, performed better than .In the simulations where pleiotropy was present, still produced a lower MSE when estimating β F compared to (Supplementary Figure 7). The penalized and robust IVW, contaminated mixture, cML, and MR-RAPS performed similarly in this analysis. Additional calibration of Γ improved the estimation of the β F when males and females had the same causal effect, but not otherwise (Supplementary Figures 9-10). When increasing female sample sizes, the shrinkage approaches resulted nearly the same MSE for β F estimation as the ,indicating that no estimation efficiency is lost when using the shrinkage approaches, even though potentially less information is transferred from the male population (Supplementary Figures 11-12). The test of sex differences in the causal effect between groups showed that MR-RAPS controls Type I error rate close to the nominal level under most settings while maintaining similar power compared to other MR methods for detecting sex differences in causal effects (Supplementary Figures 13-14). Sex-specific causal estimates of the effect of sleep phenotypes on cardiovascular-related outcomes We estimated sex-specific causal effects of sleep phenotypes on CVD-related outcomes. The sample sizes for MVP’s sex-specific sleep GWAS are reported in Supplementary Table 4. Specifically, we considered five binary sleep traits: OSA, insomnia, short sleep duration, long sleep duration, and excessive daytime sleepiness (sleepiness). The outcomes were six binary CVD-related phenotypes from All of Us: atrial fibrillation (AF), coronary artery disease (CAD), chronic kidney disease (CKD), heart failure (HF), hypertension (HTN), and type 2 diabetes mellitus (T2DM). Characteristics of AoU individuals in the variant-phenotype association analysis are summarized in Supplementary Table 5. The MVP sex-specific GWAS of sleep phenotypes were conducted using the same procedure described elsewhere ( 16 ). We only included individuals from the White Harmonized race/ethnicity and genetic ancestry (HARE) group, as the large HARE group. Based on these results, we selected variants and performed variant-outcome associations in the AoU dataset, focusing on the group of White individuals. For each trait, two types of analyses were performed: one adjusting for body mass index (BMI) and the other not adjusting for BMI. We only used common variants (minor allele frequency ≥ 0.01) with imputation quality score ≥ 0.8. More details are summarized in Method section. The results of sex-specific sleep GWASs are presented in Supplementary Figures 23-32. IV selection strategies We applied p-value thresholding and clumping procedures on exposure GWASs using the “clump_data” function from the “TwoSampleMR” R package (version 0.6.8). The clumping window was set to 10,000 kb, correlation threshold to 0.001, and the European population reference panel was used. Due to having a limited number of variants (or even no variants) with p-value<5×10 −8 and p-value<10 −7 in the female sleep GWASs, p-value threshold of 10 −5 was selected for IV selection in all analyses. The number of variants remaining after p-value thresholding (p-value<10 −5 ) and clumping is given in Supplementary Table 3. We then matched the list of variants selected as targeted IVs from exposure GWAS to the variants in AoU datasets. The exposure and outcome dataset were harmonized using the “harmonise_data” function from the TwoSampleMR R package (version 0.6.8) to remove variants with unmatched allele frequency and palindromic patterns. We employed two strategies for IV selection in the primary analyses: (i) sex-specific IVs were selected based on sex-specific GWAS results ,potentially resulting in different variants for male and female analyses; (ii) IVs were selected using APM shrinkage estimates ,where additional “potential” IVs that could not be selected using estimates (due to the smaller female sample size) were included by borrowing information from the male population. In the secondary analysis, we applied the estimate for IV selection in sex-combined analyses. Sex-specific causal estimates The primary results use the estimates with the MR-RAPS method, specifically developed for handling weak instruments and demonstrating superior performance in simulation studies. Figure 3 shows sex-specific causal effect estimates of sleep-related phenotypes on CVD-related outcomes, employing two different IV selection strategies: IVs selected based on sex-specific (primary, proposed) and sex-specific .Three pairs of causal effect estimates showed statistically significant associations (p < 0.05) when IVs were selected using :insomnia on CKD in females (OR: 1.23, 95% CI: 1.01–1.49), long sleep on HTN in females (OR: 1.03, 95% CI: 1.00–1.06), and short sleep on CAD in males (OR: 1.32, 95% CI: 1.03–1.69). In contrast, when was used for IV selection, enabling more potential IVs to be selected by borrowing information across sex groups, additional statistically significant causal estimates were identified. For instance, in females, we found a significant effect of OSA on T2DM (OR: 1.32, 95% CI: 1.05–1.66) in BMI-unadjusted analyses, and also an effect on HTN (OR: 1.14, 95% CI: 1.03– 1.25) in BMI-adjusted analyses. Notably, the causal relationships between insomnia and CKD (OR: 1.38, 95% CI: 1.07–1.79) and long sleep and HTN (OR: 1.04, 95% CI: 1.00–1.09) were replicated in the APM selection analyses (both from BMI-adjusted analyses). Among males, multiple causal associations were observed between OSA and CVD-related outcomes, including OSA on CKD (BMI-unadjusted: OR: 1.23, 95% CI: 1.03–1.47; BMI-adjusted: OR: 1.27, 95% CI: 1.02–1.59) and OSA on HF in the BMI-unadjusted analysis (OR: 1.30, 95% CI: 1.04–1.63). Download figure Open in new tab Figure 3: Results from sex-specific causal effect estimation Panel a provides sex-specific causal effect estimates with the corresponding 95% CIs based on IVs selected by using ,while panel b shows result using for IV selection. The estimated causal effects (from MR-RAPS) with variant-exposure effect estimates are displayed on an OR scale. In each panel, variant-phenotype estimates without BMI adjustment are displayed at the top, and those with BMI adjustment are shown at the bottom. Female-specific results are indicated in the color orange, while results from male-specific are shown in color green. Vertical dashed lines indicate the null causal effects. The exposure variables are shown as the titles of the sub-panels, while row names of the sub-panels provide the outcome variables. Statistically significant results (p-value < 0.05) for either the female or male-specific analysis are highlighted with yellow background. Abbreviations: CI: confidence interval; APM: adaptive posterior mean; IV: instrumental variable; MR-RAPS: MR using robust adjusted profile score method; MR: Mendelian randomization; OR: odds ratio; BMI: body mass index; OSA: obstructive sleep apnea; AF: atrial fibrillation; CAD: coronary artery disease; CKD: chronic kidney disease; HF: heart failure; HTN: hypertension; T2DM: type 2 diabetes mellitus. Sex-differences tests identified statistically significant difference in the causal effect of OSA on CKD, with a stronger effect in males ( Table 3 and Supplementary Figure 15). Also, there were sex differences in the causal effects of long sleep on several CVD-related outcomes: long sleep increased risk of CAD, CKD, HTN, and T2DM in females, but was protective in males. These sex differences were statistically significant in the APM IV selection analysis ( Table 3 and Supplementary Figures 17-18). The key findings from sex-specific causal effect estimation and sex-difference tests are summarized in Tables 2 and 3 . Full results, including the causal estimation using ,and ,as well as the sex-differences in causal estimates tests, are summarized in Supplementary Figures 15-18. Causal estimates from other considered MR methods are summarized in Supplementary Data 1 ( IV selection) and Supplementary Data2 ( IV selection). View this table: View inline View popup Download powerpoint Table 2: Top findings from sex-specific exposure-outcome causal effect estimation. View this table: View inline View popup Download powerpoint Table 3: Top findings from sex-differences test in causal effect estimation. View this table: View inline View popup Table 4: Two-sample MR methods used in both simulation studies and real data analyses. Results from secondary analyses We compared the causal effect estimates of MR-RAPS, which we used in the primary analyses, with those of MR-PRESSO ( 43 ), a widely used approach for detecting IVs with pleiotropic effects, and removing them, in MR analysis. The results from two methods are similar and are summarized in Supplementary Note 3 and Supplementary Figures 19-20. A comparison of causal effect estimates and for IV selection is presented in Supplementary Figure 21. In the male population, more consistent results were observed across different IV selection methods. For the analysis of OSA phenotype, the selection strategy identified more variants as IVs in both male and female populations. Several significant causal effects between OSA and CVD-related outcomes were only detected when was used for IV selection. These findings highlight an additional advantage of our proposed shrinkage estimate: it not only helps correct for weak IV bias in causal effect estimation but also enhances the IV selection process, increasing the potential for identifying novel causal effects. Lastly, we applied for IV selection in the sex-combined analysis. In this analysis, the variant-exposure effect estimates were based on ,and the variant-outcome effect estimates were computed from a sex-combined analysis in the AoU study. The results are shown in Supplementary Figure 22. Significant causal relationships between OSA and several CVD-related outcomes were identified, consistent with the findings from the primary analysis using for IV selection. However, some of the significant sex-specific findings, such as the female-specific causal effect of long sleep on HTN, were not identified in sex-combined analysis, likely because the is closed to due to the predominantly male sample size in MVP. Discussion We performed sex-specific analysis of the causal associations of sleep-related phenotypes on CVD-related outcomes. These are biologically important relationships with immediate clinical relevance. The primary rationale in our work was to ameliorate the weak instrumental bias in MR analysis. This can be done by incorporating information from auxiliary datasets. In our case, we used a male-specific dataset to improve female-specific statistics. Acknowledging likely sex differences between male and female individuals, this led to the need for an adaptive estimator, that will intelligently utilize information across the two sex groups. Female-specific causal estimation is limited by smaller sample sizes (relative to males), leading to large variability in γ F estimates, also known as weak IV bias. Thus, we introduced a framework to calibrate the estimates by borrowing information from the male group. We first demonstrated that the FE meta estimate is a special case of our proposed framework. We then proposed the APM estimate, which adaptively transfers information across sex groups by considering the strength of sex difference in γ , in a data-driven manner. Simulation studies demonstrated that: (i) employing the shrinkage estimates can substantially improve the efficiency of causal estimation for the population with smaller sample size; (ii) using estimate is less sensitive to the existence of sex differences in γ compared to the use of ;(iii) no estimation efficiency is lost by applying the to the population with larger sample sizes. In real data analyses, we identified several sex-difference patterns between the causal association of sleep phenotypes and CVD-related outcomes, including OSA on CKD as well as long sleep duration on several CVD-related outcomes, offering potential implications for research in sex-specific cardiovascular medicine. The method itself has broader applicability, and could be used to address sex differences for a large set of complex traits. We also applied the proposed framework to the male population, which had a larger sample size in our analysis. The results indicate that less information could be borrowed from the smaller female population. Although the shrinkage approaches did not improve causal effect estimation for the male population in our simulations, using them did not result in a loss of estimation efficiency compared to using the raw .This supports the usefulness of incorporating shrinkage approaches (especially the adaptive approach) into MR analysis when relevant summary statistics are available, regardless of the corresponding sample size. Due to the low number of IVs that could be used when applying a genome-wide significance threshold (5×10 −8 ) for selecting IVs, we considered a lower p-value threshold (p-value < 10 −5 ), as suggested by ( 27 ) as the minimal threshold value for selecting IVs in two-sample MR analysis. However, this strategy may increase the risk of including several weak instruments in MR analysis. Therefore, we applied advanced MR methods that address weak instrumental bias, including MR-RAPS ( 30 ), cML ( 42 ), the contaminated mixture model ( 41 ), and robust MR methods ( 40 ), in simulations and in real data analyses. We used MR-RAPS, which demonstrated superior performance in our simulation studies, as the primary method. The consistent findings between MR-RAPS and cML, the top two methods with the best estimation performance in simulation studies, also increase confidence in the data analyses findings. Additionally, to examine the influence of violating horizontal pleiotropy assumption due to using a lower p-value threshold to select IVs, we compared the causal effect estimated by MR-RAPS with MR-PRESSO ( 43 ), another widely used approach developed for detecting horizontal pleiotropy effects, in our primary and secondary analyses. The results from MR-RAPS and MR-PRESSO are highly consistent, increasing the expected reliability of our findings using MR-RAPS. Motivated by the need to address low female sample size in MVP data and the use of its summary statistics as exposure GWAS in MR analysis, our focus has been calibrating the “variant-exposure” effect estimates. However, we did not apply such calibrations to the outcome GWASs, as the balanced sample sizes across sex-specific GWAS in AoU suggest that further calibrations might not substantially improve the efficiency of sex-specific summary statistics, compared to the exposure GWASs. Simultaneously incorporating the proposed Bayesian framework into exposure and outcome GWASs will be of interest if both GWASs were performed with limited sample sizes. Some limitations of this work should be discussed. First, the lack of sex-specific sleep GWASs limits the selection of IVs using independent datasets. Instead, we directly utilized the MVP sleep GWASs for the IV selection. Various IV selection strategies were implemented, including sex-specific selection, sex-specific APM estimates selection in primary analysis, and sex-combined selection by using FE meta estimate in secondary analysis. The results, however, are only sometimes consistent across different selection methods, indicating that the estimation of causal effects is sensitive to the IV selection process. This sensitivity could be due to the strength of the IVs and differences in the underlying genetic architecture captured by the different IVs. The second limitation arises from the test of sex differences in exposure-outcome causal effects. We applied the conventional two-sample t-test, performed under the assumption of independence between the two compared samples. Therefore, we included only the causal estimates from the raw estimates ( and ) in our sex-differences test. We did not use any shrinkage estimates, because they may result in correlated causal effect estimates between sexes. A correlated version, which includes a covariance correction between estimates ( 32 ), can be exploited. However, quantifying the covariance between causal estimates across sexes using shrinkage approaches is challenging and a topic of future work. Lastly, we used only European ancestry individuals, due to the larger sample sizes available. Assessing the transferability of our findings to other populations, and potentially applying a similar shrinkage framework to improve IVs in non-European ancestry populations, is of interest for future research. Methods Sex-specific sleep GWAS in MVP We performed sex-specific GWAS for four sleep phenotypes (insomnia, long sleep duration, short sleep duration, and excessive daytime sleepiness), and used GWAS summary statistics from previously published GWAS of OSA ( 16 ), using MVP participants. Sleep phenotypes are described below. We only used the European (White) harmonized race/ethnicity and genetic ancestry (HARE) group to match the genetic ancestry of the available outcome sex-stratified GWAS from the AoU. We removed related individuals based on kinship coefficient ≥ 0.0884, where the kinship coefficients were estimated using KING v.2.0 ( 44 ), so that all analyses were conducted using an unrelated set of individuals (including, participants from across the female and male strata were unrelated). Sex chromosome checks confirmed biological sex. Variants were filtered based on imputation quality, requiring INFO score of at least R 2 ≥ 0.6, and minor allele frequency ≥ 0.01. The sample sizes of each sleep GWAS are summarized in Supplementary Table 4. Across analyses, sample sizes ranged from ∼15,000 to 30,000 (female stratum), and from ∼204,000 to 380,000 (male stratum). All analyses were adjusted for age and the first 10 PCs of genetic data. For each trait, two GWASs were performed: with and without BMI adjustment. BMI-adjusted models had BMI as a covariate using both linear and squared terms. Analyses were performed separately using PLINK v2.00a3LM ( 45 ) in each sex strata. Sleep phenotypes Sleep phenotypes were defined as previously reported ( 16 ). In detail, OSA was defined based on the VA electronic health record using a multimodal automated phenotyping (MAP) algorithm ( 46 ). The MAP algorithm was applied to predict general sleep apnea and OSA separately. For each trait, it resulted in a score, roughly mapping to a likelihood of having these condition. The final OSA definition combined both scores. Insomnia status was inferred using a MAP algorithm applied to relevant ICD codes and elements extracted using natural language processing. Insomnia status was inferred at the same age as was used in OSA analysis, i.e., a participant was considered to have insomnia if their age of first insomnia ICD code was on or before their age in the OSA analysis. All other sleep phenotypes were self-reported based on the baseline questionnaire administered to program participants. Long sleep was defined as sleep duration > 9, and short sleep as sleep duration < 6, where the sleep duration was defined based on the response to the question about hours of sleep in a typical day, with responses ranging from “5 or less” to “10 or more”, with increments of 0.5. The excessive daytime sleepiness phenotype was based on the questionnaire item “Feeling excessively sleepy during the day (does not include regular naps),” with yes or no answers. All five sleep phenotypes are binary. Association analysis with CVD-related outcomes in AoU We used short-read whole-genome sequencing (srWGS) data (version 7) from the AoU study to conduct associations analysis with CVD-related outcomes. To reduce memory storage requirements, we focused on genomics data pre-filtered by the following criteria: population-specific allele frequency ≥ 1% or population-specific allele count > 100 (data from: gs://fc-aou-datasets-controlled/v7/wgs/short_read/snpindel/acaf_threshold_v7.1). To align with summary statistics computed from the White HARE group in the MVP, we included only White individuals (as determined by self-reported race and ethnicity) in the AoU analysis. Related individuals were excluded based on information available in gs://fc-aou-datasets-controlled/v7/wgs/short_read/snpindel/aux/relatedness/relatedness_flagged_samples.tsv. We also restricted the analysis to adults aged 18 to 95, with BMI values ranging from 17 to 55. Following data preprocessing, approximately 114,000 White individuals were included, consisting of 67,600 females and 46,400 males. The exact sample size varied slightly depending on the phenotype analyzed. Six binary CVD-related phenotypes were considered: atrial fibrillation (AF), coronary artery disease (CAD), cardiovascular disease (CVD), heart failure (HF), hypertension (HTN), and type 2 diabetes mellitus (T2DM). Detailed selection criteria for these clinical outcomes and the corresponding SNOMED codes and OMOP Concept IDs in the AoU study, are provided in Supplementary Table 6. More details are summarized in Supplementary Note 4. After matching the variants identified as IVs according to the exposure dataset (MVP), we conducted single-variant association analyses with the six binary outcomes. We used logistic regression, adjusting for age and 16 genetic principal components (PCs) as covariates in the BMI-unadjusted analyses. For the BMI-adjusted analyses, the models were further adjusted for BMI, including both linear and quadratic terms. The effect sizes of the variants, i.e., log(OR) with the corresponding SD, were then extracted for use as summary statistics in the outcome GWAS. Simulation studies We performed simulation studies to evaluate the performance of exposure-outcome causal effect ( β F and β M ) estimation, using shrinkage variant-exposure effect estimates ( and ) as input to existing two-sample MR approaches. We then compared these to the use of un-calibrated (“raw”) effect estimates .We employed three settings to simulate sex-specific variant-exposure effect sizes ( γ F and γ M ) and considered different levels of sex difference in causal effect within each setting. Simulation data generation We simulated data via the structural model presented in Figure 1a , where we use S (for sleep phenotype) to represent the exposure variable and C (for CVD phenotype) to represent the outcome variable. We first generated individual-level genetic data for two datasets, corresponding to two populations used in an exposure GWAS (one population) and an outcome GWAS (second population). In each of the datasets, the data-generating process was carried out independently in the females and males. We set the female sample size of the exposure dataset to and the sample size of the male stratum to matching the sex sample size proportions in the MVP. In the outcome dataset, the female and male sample sizes were set to be the same: .For each genetic variant, allele counts were generated independently from Binomial (2,0.3) distribution, assuming that all variants are in linkage equilibrium with allele frequencies of 0.3. In the exposure dataset, we generated the exposure variable according to the following linear model: With p = 100 independent variants. U represents the unknown confounder, which we generated from Normal distribution with mean 0 but different variances in female and male populations: U Fi ∼ N (0,1) and U Mi ∼ N (0,0.5). The random errors ε Fi and ε Mi were generated from standard Normal distribution for both the female and male populations. The γ jF and γ Mj are the underlying sex-specific variant-exposure effect sizes. The effect size of the unknown confounder to the exposure association was set as α = 0.1 for both the female and male populations. Next, we use the same strategy to generate the genetic data, exposure variable, and unmeasured confounder for the outcome population, The superscript O is used to distinguish the exposure simulated in the outcome population from the exposure simulated in the exposure population. Based on the generated exposure variables and , we then generate the outcome variable through the following linear equation: where ϕ denotes the effect sizes of the unknown confounder on the outcome, which we set to ϕ = 0.1. The β F and β M are the underlying true female and male exposure-outcome causal effects, respectively. After generating the individual level data, we get the exposure GWAS and outcome GWAS summary statistics by fitting marginal linear regression models. The exposure data summary statistics and are obtained by fitting the following regression using the data generated from equations (6.1) and (6.2) : And the same procedure is applied for the outcome population The outcome GWAS summary statistics are and . We performed simulations in three simulation settings that differed in the way that variant-exposure effect sizes were simulated. In all settings, we considered a few models for the exposure-outcome effect sizes: no sex differences in exposure-outcome causal effects, with β F = β M = 0.1; and sex differences in exposure-outcome causal effect with β M = 0.1, and β F ∈ {0,0.05,0.15}. Simulation settings 1: Fixed sex differences in variant-exposure effects Here, we simulated from settings in which the sex-difference in γ F and γ M , when there were such differences, were weak and fixed across variants. Male-specific variant-exposure effect sizes were γ jM = 0.1, j = 1,2, …, 100 for all variants. For the female population, some variants had the same effect size as in the male population ( γ jF = 0.1), and a subset of variants (sized 10, 50, or 90 out of 100) had female-specific variant-exposure effect size γ jF = 0.05. For the sex differences in causal effect setting, we fixed 50% of variants to have sex differences in variant-exposure effect sizes. Simulation settings 2: Random sex differences in variant-exposure effects Here, we simulated from settings in which the sex-difference in variant-exposure effect sizes, when there were such differences, were strong. For each variant j = 1,2, …, 100, we generated a pair of female and male variant-exposure effect size from a bi-variate normal distribution , where we considered different covariance value ϕ to set the correlations between ( γ F , γ M ) equaling to either 0.5, 0.7, and 0.9 when the exposure-outcome causal effect was the same in males and females ( β F = β M = 0.1). When β M ≠ β F , the correlation between ( γ F , γ M ) was set to 0.7. Finally, the variant exposure effect sizes γ F , γ M were randomly generated in each simulation replicate. Simulation settings 3: Summary statistics from MVP OSA GWAS guide variant-exposure effect sizes Here, we used the summary statistics from the MVP OSA GWAS (without BMI adjustment) to guide the γ F and γ M values. First, we conducted p-value thresholding and clumping for the male-specific summary statistics. The p-value threshold was set at 10 −5 , with clumping window of 10,000 kb and a correlation threshold of 0.001, using the European population reference panel from 1000 genome. This resulted in a list of 110 variants. We randomly selected 100 variants used their male- and female-specific effect size estimates from the MVP GWAS in simulations. Two-sample MR methods We applied the following two-sample MR methods, described in Table 4 , over the raw and calibrated estimated variant effect sizes. Data availability Summary statistics from sex-specific sleep trait GWASs will become available on the dbGaP repository “Veterans Administration (VA) Million Veteran Program (MVP) Summary Results from Omics Studies”, study accession phs001672. Data from the NIH All of Us study are available via institutional data access for researchers who meet the criteria for access to confidential data. To register as a researcher with All of Us, researchers may use the following URL and complete the laid-out steps: https://www.researchallofus.org/register/ . The srWGS genomic data were available on: gs://fc-aou-datasets-controlled/v7/wgs/short_read/ . The R code used to implement the proposed semi-empirical Bayes framework, two-sample MR approaches, and simulation studies is available on the GitHub repository: https://github.com/Gene-Huang/sex-specific-MR . The harmonized summary statistics for both exposure and outcome GWASs used in our data analysis are also provided in the repository. Author contributions Conceptualization: TS. Methodology: Y-JH, TS. Formal analysis: Y-JH, NK. Resources, Project administration, Funding acquisition: KC, PWFW. Data Curation: KC, JEH. Writing – original draft: Y-JH, TS. Writing – review & editing: NK, DFL, JG, JEH, KC, PWFW, DJG, KMR. Visualization: Y-JH. Supervision: TS. Competing interests YJH, NK, DFL, JG, JEH, KC, PWFW, DJG, KMR, and TS declare no competing interests. Acknowledgements We are grateful to the Million Veteran Program participants and staff. This research is based on data from the Million Veteran Program, Office of Research and Development, Veterans Health Administration, and was supported by the Million Veteran Program-MVP000 and BX004821. This publication does not represent the views of the Department of Veteran Affairs or the United States Government. The Million Veteran Program core acknowledgements for publications are stated in Supplementary Note 5. We gratefully acknowledge All of Us participants for their contributions and also thank the National Institutes of Health’s All of Us Research Program for making available the participant data examined in this study. The All of Us Research Program is supported by the National Institutes of Health, Office of the Director: Regional Medical Centers: 1 OT2 OD026549; 1 OT2 OD026554; 1 OT2 OD026557; 1 OT2 OD026556; 1 OT2 OD026550; 1 OT2 OD 026552; 1 OT2 OD026553; 1 OT2 OD026548; 1 OT2 OD026551; 1 OT2 OD026555; IAA #: AOD 16037; Federally Qualified Health Centers: HHSN 263201600085U; Data and Research Center: 5 U2C OD023196; Biobank: 1 U24 OD023121; The Participant Center: U24 OD023176; Participant Technology Systems Center: 1 U24 OD023163; Communications and Engagement: 3 OT2 OD023205; 3 OT2 OD023206; and Community Partners: 1 OT2 OD025277; 3 OT2 OD025315; 1 OT2 OD025337; 1 OT2 OD025276. The of Us Research Program would not be possible without the partnership of its participants. This research was further supported by National Institute on Aging grant R01AG080598 and by the National Heart, Lung, and Blood Institute grant R01HL161012. References 1. ↵ Khramtsova EA , Davis LK , Stranger BE . The role of sex in the genomics of human complex traits . Nat Rev Genet . 2019 Mar ; 20 ( 3 ): 173 – 90 . OpenUrl CrossRef PubMed 2. Mauvais-Jarvis F , Bairey Merz N , Barnes PJ , Brinton RD , Carrero J-J , DeMeo DL , et al. Sex and gender: modifiers of health, disease, and medicine . Lancet . 2020 Aug 22 ; 396 ( 10250 ): 565 – 82 . OpenUrl CrossRef PubMed 3. ↵ Ji H , Niiranen TJ , Rader F , Henglin M , Kim A , Ebinger JE , et al. Sex differences in blood pressure associations with cardiovascular outcomes . Circulation . 2021 Feb 16 ; 143 ( 7 ): 761 – 3 . OpenUrl CrossRef PubMed 4. ↵ Zeng L-N , Zong Q-Q , Yang Y , Zhang L , Xiang Y-F , Ng CH , et al. Gender Difference in the Prevalence of Insomnia: A Meta-Analysis of Observational Studies . Front Psychiatry . 2020 Nov 20 ; 11 : 577429 . OpenUrl PubMed 5. ↵ Dib R , Gervais NJ , Mongrain V. A review of the current state of knowledge on sex differences in sleep and circadian phenotypes in rodents . Neurobiol Sleep Circadian Rhythms . 2021 Nov ; 11 : 100068 . OpenUrl PubMed 6. ↵ Bonsignore MR , Saaresranta T , Riha RL . Sex differences in obstructive sleep apnoea . Eur Respir Rev . 2019 Dec 31 ; 28 ( 154 ). 7. ↵ Won CHJ , Reid M , Sofer T , Azarbarzin A , Purcell S , White D , et al. Sex differences in obstructive sleep apnea phenotypes, the multi-ethnic study of atherosclerosis . Sleep . 2020 May 12 ; 43 ( 5 ). 8. ↵ Millett ERC , Peters SAE , Woodward M. Sex differences in risk factors for myocardial infarction: cohort study of UK Biobank participants . BMJ . 2018 Nov 7 ; 363 : k4247 . OpenUrl Abstract / FREE Full Text 9. Connelly PJ , Currie G , Delles C. Sex differences in the prevalence, outcomes and management of hypertension . Curr Hypertens Rep . 2022 Jun ; 24 ( 6 ): 185 – 92 . OpenUrl PubMed 10. ↵ Schulte KJ , Mayrovitz HN . Myocardial infarction signs and symptoms: females vs . males. Cureus . 2023 Apr 13 ; 15 ( 4 ): e37522 . OpenUrl PubMed 11. ↵ Kasimatis KR , Abraham A , Ralph PL , Kern AD , Capra JA , Phillips PC . Evaluating human autosomal loci for sexually antagonistic viability selection in two large biobanks . Genetics . 2021 Mar 3 ; 217 ( 1 ): 1 – 10 . OpenUrl CrossRef PubMed 12. Pirastu N , Cordioli M , Nandakumar P , Mignogna G , Abdellaoui A , Hollis B , et al. Genetic analyses identify widespread sex-differential participation bias . Nat Genet . 2021 May ; 53 ( 5 ): 663 – 71 . OpenUrl CrossRef PubMed 13. ↵ Huang Y , Shan Y , Zhang W , Lee AM , Li F , Stranger BE , et al. Deciphering genetic causes for sex differences in human health through drug metabolism and transporter genes . Nat Commun . 2023 Jan 12 ; 14 ( 1 ): 175 . OpenUrl PubMed 14. ↵ Pulit SL , Stoneman C , Morris AP , Wood AR , Glastonbury CA , Tyrrell J , et al. Meta-analysis of genome-wide association studies for body fat distribution in 694 649 individuals of European ancestry . Hum Mol Genet . 2019 Jan 1 ; 28 ( 1 ): 166 – 74 . OpenUrl CrossRef PubMed 15. Huang Y , Hui Q , Gwinn M , Hu Y-J , Quyyumi AA , Vaccarino V , et al. Sexual differences in genetic predisposition of coronary artery disease . Circ Genom Precis Med . 2021 Feb ; 14 ( 1 ): e003147 . OpenUrl 16. ↵ Sofer T , Kurniansyah N , Murray M , Ho Y-L , Abner E , Esko T , et al. Genome-wide association study of obstructive sleep apnoea in the Million Veteran Program uncovers genetic heterogeneity by sex . EBioMedicine . 2023 Apr ; 90 : 104536 . OpenUrl PubMed 17. ↵ Koch E , Nyberg L , Lundquist A , Pudas S , Adolfsson R , Kauppi K. Sex-specific effects of polygenic risk for schizophrenia on lifespan cognitive functioning in healthy individuals . Transl Psychiatry . 2021 Oct 11 ; 11 ( 1 ): 520 . OpenUrl PubMed 18. Gui Y , Zhou X , Wang Z , Zhang Y , Wang Z , Zhou G , et al. Sex-specific genetic association between psychiatric disorders and cognition, behavior and brain imaging in children and adults . Transl Psychiatry . 2022 Aug 26 ; 12 ( 1 ): 347 . OpenUrl PubMed 19. ↵ Zhang C , Ye Y , Zhao H. Comparison of Methods Utilizing Sex-Specific PRSs Derived From GWAS Summary Statistics . Front Genet . 2022 Jul 8 ; 13 : 892950 . OpenUrl CrossRef PubMed 20. ↵ Mayne BT , Bianco-Miotto T , Buckberry S , Breen J , Clifton V , Shoubridge C , et al. Large Scale Gene Expression Meta-Analysis Reveals Tissue-Specific, Sex-Biased Gene Expression in Humans . Front Genet . 2016 Oct 13 ; 7 : 183 . OpenUrl CrossRef PubMed 21. Oliva M , Muñoz-Aguirre M , Kim-Hellmuth S , Wucher V , Gewirtz ADH , Cotter DJ , et al. The impact of sex on gene expression across human tissues . Science . 2020 Sep 11 ; 369 ( 6509 ). 22. ↵ Wingo AP , Liu Y , Gerasimov ES , Vattathil SM , Liu J , Cutler DJ , et al. Sex differences in brain protein expression and disease . Nat Med . 2023 Sep ; 29 ( 9 ): 2224 – 32 . OpenUrl CrossRef PubMed 23. ↵ Smith GD , Ebrahim S. “Mendelian randomization”: can genetic epidemiology contribute to understanding environmental determinants of disease? Int J Epidemiol . 2003 Feb ; 32 ( 1 ): 1 – 22 . OpenUrl CrossRef PubMed Web of Science 24. ↵ Zhao JV , Luo S , Schooling CM . Sex-specific Mendelian randomization study of genetically predicted insulin and cardiovascular events in the UK Biobank . Commun Biol . 2019 Sep 5 ; 2 : 332 . OpenUrl PubMed 25. Mielke MM , Miller VM . Improving clinical outcomes through attention to sex and hormones in research . Nat Rev Endocrinol . 2021 Oct ; 17 ( 10 ): 625 – 35 . OpenUrl PubMed 26. ↵ Ardissino M , Slob EAW , Carter P , Rogne T , Girling J , Burgess S , et al. Sex-Specific Reproductive Factors Augment Cardiovascular Disease Risk in Women: A Mendelian Randomization Study . J Am Heart Assoc . 2023 Mar 7 ; 12 ( 5 ): e027933 . OpenUrl PubMed 27. ↵ Burgess S , Butterworth A , Thompson SG . Mendelian randomization analysis with multiple genetic variants using summarized data . Genet Epidemiol . 2013 Nov ; 37 ( 7 ): 658 – 65 . OpenUrl CrossRef PubMed 28. Holmes MV , Ala-Korpela M , Smith GD . Mendelian randomization in cardiometabolic disease: challenges in evaluating causality . Nat Rev Cardiol . 2017 Oct ; 14 ( 10 ): 577 – 90 . OpenUrl CrossRef PubMed 29. ↵ Slob EAW , Burgess S. A comparison of robust Mendelian randomization methods using summary data . Genet Epidemiol . 2020 Jun ; 44 ( 4 ): 313 – 29 . OpenUrl CrossRef PubMed 30. ↵ Zhao Q , Wang J , Hemani G , Bowden J , Small DS . Statistical inference in two-sample summary-data Mendelian randomization using robust adjusted profile score . Ann Statist . 2020 Jun ; 48 ( 3 ): 1742 – 69 . OpenUrl 31. ↵ Ye T , Shao J , Kang H. Debiased inverse-variance weighted estimator in two-sample summary-data Mendelian randomization . Ann Statist . 2021 Aug 1 ; 49 ( 4 ). 32. ↵ Bernabeu E , Canela-Xandri O , Rawlik K , Talenti A , Prendergast J , Tenesa A. Sex differences in genetic architecture in the UK Biobank . Nat Genet . 2021 Sep 7 ; 53 ( 9 ): 1283 – 9 . OpenUrl CrossRef PubMed 33. ↵ Zhu C , Ming MJ , Cole JM , Edge MD , Kirkpatrick M , Harpak A. Amplification is the primary mode of gene-by-sex interaction in complex human traits . Cell Genomics . 2023 May 10 ; 3 ( 5 ): 100297 . OpenUrl PubMed 34. ↵ Mukherjee B , Chatterjee N. Exploiting gene-environment independence for analysis of case-control studies: an empirical Bayes-type shrinkage estimator to trade-off between bias and efficiency . Biometrics . 2008 Sep ; 64 ( 3 ): 685 – 94 . OpenUrl CrossRef PubMed Web of Science 35. Li H , Gail MH , Berndt S , Chatterjee N. Using cases to strengthen inference on the association between single nucleotide polymorphisms and a secondary phenotype in genome-wide association studies . Genet Epidemiol . 2010 Jul ; 34 ( 5 ): 427 – 33 . OpenUrl CrossRef PubMed 36. ↵ Li H , Gail MH . Efficient adaptively weighted analysis of secondary phenotypes in case-control genome-wide association studies . Hum Hered . 2012 Jun 15 ; 73 ( 3 ): 159 – 73 . OpenUrl CrossRef PubMed 37. ↵ Domínguez Islas C , Rice KM . Bayesian approaches to fixed effects meta-analysis . Res Synth Methods . 2022 Jul ; 13 ( 4 ): 520 – 32 . OpenUrl PubMed 38. ↵ Lin DY , Zeng D. Meta-analysis of genome-wide association studies: no efficiency gain in using individual participant data . Genet Epidemiol . 2010 Jan ; 34 ( 1 ): 60 – 6 . OpenUrl CrossRef PubMed Web of Science 39. ↵ Bowden J , Davey Smith G , Burgess S. Mendelian randomization with invalid instruments: effect estimation and bias detection through Egger regression . Int J Epidemiol . 2015 Apr ; 44 ( 2 ): 512 – 25 . OpenUrl CrossRef PubMed 40. ↵ Rees JMB , Wood AM , Dudbridge F , Burgess S. Robust methods in Mendelian randomization via penalization of heterogeneous causal estimates . PLoS ONE . 2019 Sep 23 ; 14 ( 9 ): e0222362 . OpenUrl CrossRef PubMed 41. ↵ Burgess S , Foley CN , Allara E , Staley JR , Howson JMM . A robust and efficient method for Mendelian randomization with hundreds of genetic variants . Nat Commun . 2020 Jan 17 ; 11 ( 1 ): 376 . OpenUrl CrossRef PubMed 42. ↵ Xue H , Shen X , Pan W. Constrained maximum likelihood-based Mendelian randomization robust to both correlated and uncorrelated pleiotropic effects . Am J Hum Genet . 2021 Jul 1 ; 108 ( 7 ): 1251 – 69 . OpenUrl CrossRef PubMed 43. ↵ Verbanck M , Chen C-Y , Neale B , Do R. Detection of widespread horizontal pleiotropy in causal relationships inferred from Mendelian randomization between complex traits and diseases . Nat Genet . 2018 May ; 50 ( 5 ): 693 – 8 . OpenUrl CrossRef PubMed 44. ↵ Manichaikul A , Mychaleckyj JC , Rich SS , Daly K , Sale M , Chen W-M. Robust relationship inference in genome-wide association studies . Bioinformatics . 2010 Nov 15 ; 26 ( 22 ): 2867 – 73 . OpenUrl CrossRef PubMed Web of Science 45. ↵ Chang CC , Chow CC , Tellier LC , Vattikuti S , Purcell SM , Lee JJ . Second-generation PLINK: rising to the challenge of larger and richer datasets . Gigascience . 2015 Feb 25 ; 4 ( 1 ): s13742. #x2013;015–0047–8. OpenUrl CrossRef 46. ↵ Liao KP , Sun J , Cai TA , Link N , Hong C , Huang J , et al. High-throughput multimodal automated phenotyping (MAP) with application to PheWAS . J Am Med Inform Assoc . 2019 Nov 1 ; 26 ( 11 ): 1255 – 62 . OpenUrl CrossRef PubMed 47. Bowden J , Davey Smith G , Haycock PC , Burgess S. Consistent Estimation in Mendelian Randomization with Some Invalid Instruments Using a Weighted Median Estimator . Genet Epidemiol . 2016 May ; 40 ( 4 ): 304 – 14 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted January 02, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following A semi-empirical Bayes approach for calibrating weak instrumental bias in sex-specific Mendelian randomization studies Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share A semi-empirical Bayes approach for calibrating weak instrumental bias in sex-specific Mendelian randomization studies Yu-Jyun Huang , Nuzulul Kurniansyah , Daniel F. Levey , Joel Gelernter , Jennifer E. Huffman , Kelly Cho , Peter W.F. Wilson , Daniel J. Gottlieb , Kenneth M. Rice , Tamar Sofer medRxiv 2025.01.02.25319889; doi: https://doi.org/10.1101/2025.01.02.25319889 Share This Article: Copy Citation Tools A semi-empirical Bayes approach for calibrating weak instrumental bias in sex-specific Mendelian randomization studies Yu-Jyun Huang , Nuzulul Kurniansyah , Daniel F. Levey , Joel Gelernter , Jennifer E. Huffman , Kelly Cho , Peter W.F. Wilson , Daniel J. Gottlieb , Kenneth M. Rice , Tamar Sofer medRxiv 2025.01.02.25319889; doi: https://doi.org/10.1101/2025.01.02.25319889 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15228) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6599) Geriatric Medicine (668) Health Economics (997) Health Informatics (4536) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9231) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a0071ee6de090db4',t:'MTc3OTU3MTM5NQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00