HCDPD: A Heterogeneous Causal Framework for Disease Pattern Detection in Medical Imaging

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 64,560 characters · extracted from preprint-html · click to expand
HCDPD: A Heterogeneous Causal Framework for Disease Pattern Detection in Medical Imaging | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search HCDPD: A Heterogeneous Causal Framework for Disease Pattern Detection in Medical Imaging View ORCID Profile Rongjie Liu , Chengchun Shi , Rui Song , Marc Niethammer , Tengfei Li , Hongtu Zhu doi: https://doi.org/10.1101/2025.04.15.25325904 Rongjie Liu 1 Department of Statistics, University of Georgia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rongjie Liu Chengchun Shi 2 Department of Biostatistics, London School of Economics and Political Science Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rui Song 3 Amazon Find this author on Google Scholar Find this author on PubMed Search for this author on this site Marc Niethammer 4 Department of Computer Science and Engineering and Department of Neurological Surgery, University of California San Diego Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tengfei Li 5 Departments of Radiology, Computer Science, and Biostatistics, University of North Carolina at Chapel Hill Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hongtu Zhu 5 Departments of Radiology, Computer Science, and Biostatistics, University of North Carolina at Chapel Hill Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: htzhu{at}email.unc.edu Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Understanding the causal effects of diseases on body organs through medical imaging is crucial for advancing research and improving clinical outcomes. This paper introduces a novel causal inference framework, Heterogeneous Causal Disease Pattern Detection (HCDPD), designed to map the complex causal pathways from early-stage diseases to latent disease patterns and their manifestation in organs as observed in later-stage medical images. HCDPD serves as a potential outcome framework for multivariate responses. It is particularly valuable in scenarios where patients exhibit significant heterogeneity, while normal controls remain relatively homogeneous. Through the application of advanced Bayesian inference techniques, our method effectively estimates both direct and indirect causal effects within the HCDPD framework. We applied HCDPD to the Osteoarthritis Initiative (OAI) dataset, successfully identifying and delineating diverse disease patterns across different patients. This capability provides critical insights that can inform early interventions and tailor personalized treatment strategies in clinical practice. 1 Introduction Medical imaging techniques, such as Magnetic Resonance Imaging (MRI), are essential tools for examining the anatomy, functionality, and molecular pathways of various organs. These imaging methods significantly enhance the detection and analysis of biomarkers in both clinical and research settings ( Oei et al. 2022 , Kavur et al. 2021 , Zhu et al. 2023 ). Imaging biomarkers are particularly critical for diagnosing conditions like dementia and for evaluating cardiovascular risks through cardiac imaging. This supports clinical decision-making, treatment planning, and the assessment of new therapies in clinical trials ( Therriault et al. 2024 ). Over time, the field has expanded to include a variety of methodologies aimed at detecting disease patterns. These range from functional regression and latent variable models to advanced image analysis techniques that leverage region-specific features to identify disease signatures ( Liu & Zhu 2021 , Huang et al. 2022 , 2015, Davatzikos et al. 2008 ). Furthermore, the advent of deep learning has substantially enhanced automated feature extraction, facilitating the accurate identification of complex conditions such as brain tumors ( Sharif et al. 2020 , Manhas et al. 2022 ). Despite these advances, integrating causal inference with disease pattern detection within a potential outcome framework remains under-explored, highlighting a significant research gap. The potential outcome framework evaluates outcomes under an intervention relative to hypothetical scenarios in which the intervention did not occur. Central to this framework are the concepts of potential or counterfactual outcomes, grounded in assumptions such as the Stable Unit Treatment Value Assumption (SUTVA), ignorability, and positivity. Comprehensive reviews of these concepts can be found in Rubin (2005) and Li, Ding & Mealli (2023) . Recently, this framework has been extended to accommodate complex data types, such as spatial data, offering innovative methodologies and insights for causal inference across diverse research fields ( Reich et al. 2021 , Luo et al. 2024 ). Our proposed method, the Heterogeneous Causal Disease Pattern Detection (HCDPD), introduces a novel potential outcome framework tailored for comparing high-dimensional medical images under different treatment conditions across the same subjects ( Figure 1(a) ). The data consists of observations {( x i , g i , Y i ) : i = 1, …, n } collected from n independent subjects. Here, x i denotes baseline covariates (e.g., age, gender), g i represents an early-stage clinical diagnosis (with g i = 0 for control subjects and g i > 0 indicating case subjects), and Y i = { Y i,s : s ∈ 𝒮} is the observed high-dimensional medical image for subject i , where s indicates a pixel within a common spatial domain 𝒮. Download figure Open in new tab Figure 1: (a) The HCDPD framework: x i represents baseline covariates (e.g., age, gender), g i denotes an early-stage clinical diagnosis ( g i = 0: control, g i > 0: case subjects), Y i is a subsequent medical image, and B i,s indicates the latent disease pattern. Causal inference challenges include: (b) spatial location heterogeneity among patients and (c) spatial spillover effects within each patient. Under the classical causal inference framework, each individual i possesses multiple potential outcomes: Y i (0) and Y i ( g i ) for cases where g i > 0, where Y i ( g ) = { Y i,s ( g ) : s ∈ 𝒮} represents the medical image under treatment condition g . The Individualized Treatment Effect (ITE) for individual i is defined as ITE i ( g i ) = Y i ( g i ) − Y i (0) for g i > 0, while the Conditional Average Treatment Effect (CATE) is given by CATE i ( g , x ) = 𝔼 [ Y i ( g ) − Y i (0)| x i = x ] = µ g ( x ) − µ 0 ( x ), with µ g ( x ) = 𝔼 [ Y i ( g )| x i = x ] for all g ≥ 0. In contrast to many existing approaches within the potential outcomes framework ( Imbens & Rubin 2015 , Rubin 2005 , Li, Ding & Mealli 2023 ), significant challenges arise when aggregating ITE and CATE across individuals. We highlight two primary challenges in detail below. The first challenge is spatial location heterogeneity , referring to the significant variability of disease-affected regions across different patients ( Huang et al. 2015 , Liu & Zhu 2021 , Banerjee et al. 2017 , Sharif et al. 2020 , Shaker et al. 2017 ). To characterize this heterogeneity, we define an individual latent disease map for subject i as B i = { B i,s : s ∈ 𝒮}, where B i,s = 1 indicates a disease-affected pixel and B i,s = 0 denotes a normal pixel. Disease-affected regions, denoted as , generally differ in size, shape, and spatial location among patients ( Huang et al. 2015 , Liu & Zhu 2021 ). Traditional approaches within the potential outcomes framework often neglect this individual-level spatial heterogeneity, failing to distinguish treatment effects between disease-affected pixels and unaffected pixels . To demonstrate this issue, averaging the individual disease maps B i across subjects results in an empirical population disease probability map (PDPM), as illustrated in Figure 1(b) . The PDPM from real data shows regions with disease probabilities significantly greater than zero yet consistently below or near 0.5, underscoring substantial spatial location heterogeneity across individuals. The second challenge is the spatial spillover effect , which emerges due to strong spatial dependencies among neighboring pixels. In disease detection, the presence of disease in one location can influence surrounding tissues or areas, violating SUTVA. Violation of SUTVA due to spatial spillover effects leads to biased and inconsistent estimates, potentially causing incorrect causal inferences ( Corrado & Fingleton 2012 ). Figure 1(c) demonstrates an estimated disease map , highlighting multiple detected disease-affected areas rather than isolated disease pixels. This observation suggests that a pixel’s disease status B i,s is influenced by its neighboring pixels’ status, denoted as , which we calculate by averaging the treatment statuses of adjacent pixels. Addressing this spatial spillover is critical for accurate and reliable causal assessments in spatial medical imaging analysis. The literature on spatial causal inference can broadly be categorized into two main groups: methods that adjust for unmeasured confounders and methods that address spatial interference ( Pollmann 2020 , Reich et al. 2021 ). Within the first group, structured models such as Conditional Autoregressive (CAR) and Spatial Autoregressive (SAR) models are popular approaches for mitigating or eliminating confounding biases ( Banerjee et al. 2003 , Lee 2007 ). The second group, focusing on spatial interference, can be further divided into four distinct subcategories: (i) Exposure Mapping : Methods that define general forms of interference using exposure mapping ( Aronow & Samii 2017 ). (ii) Partial Interference : Approaches that divide populations into non-overlapping blocks to manage intra-block interference ( Sobel 2006 , Tchetgen & VanderWeele 2012 , Pollmann 2020 ). (iii) Local or Network-based Interference : Techniques employing local or network-based assumptions to evaluate exposure effects among experimental units situated in geographical spaces or networks ( Bakshy et al. 2014 , Puelz et al. 2022 ). (iv) Congestion or Pricing Interference : Recent advancements that introduce novel estimators to address interference arising from congestion or pricing dynamics in two-sided markets ( Munro et al. 2021 , Johari et al. 2022 ). Although these methodologies constitute a robust toolkit tailored to handle the diverse challenges posed by spatial data and interference patterns, none are directly applicable to the specific problem of disease pattern detection described earlier. In this paper, we introduce HCDPD with four major contributions as follows: (i) . We are among the first to apply the potential outcomes framework to detect disease patterns in medical imaging, effectively quantifying the heterogeneous causal effects of disease on the human body. (ii) . We propose the HCDPD framework, which integrates a functional outcome model, a latent exposure model, and SAR priors. The functional outcome model accounts for the effects of baseline covariates and unobserved latent disease maps on imaging outcomes, while the latent exposure model estimates the probability of disease-affected pixels based on observed covariates. The SAR priors capture spatial spillover effects in both imaging outcomes and latent disease maps. (iii) . We utilize Markov Chain Monte Carlo (MCMC) techniques to quantify uncertainties in disease pattern detection and parameter estimation. This approach allows us to estimate posterior distributions, which facilitate causal inference regarding the impact of treatments or exposures on outcomes. (iv). We perform sensitivity analyses to evaluate the impact of different prior distributions or model specifications on the causal estimates. Additionally, we assess counterfactuals using the estimated parameters to infer causal effects of various interventions on different outcomes. The code and data corresponding to our proposed method are also publicly available on GitHub. The paper is structured as follows: Section 2 outlines the motivation based on real Osteoarthritis Initiative (OAI) data and the key scientific questions. Section 3 introduces the HCDPD framework, including its model structures, Bayesian estimation methods, and a sensitivity analysis. Section 4 demonstrates the effectiveness of proposed method through simulations and experimental validation. Section 5 presents an analysis of real OAI data, followed by the conclusions. Finally, Section 6 provides a concise summary of the study. 2 Data Description This paper analyzes data from the OAI, a comprehensive, multi-center observational study conducted over a span of more than ten years (2004-2019), involving both men and women aged from 45 to 79 ( Attur et al. 2020 ). The study enrolled 4,796 participants between 2004 and 2006, consisting of three sub-cohorts: a progression sub-cohort (n=1,389), an incidence sub-cohort (n=3,285), and a normal control unexposed reference sub-cohort (n=122). Detailed information about the OAI is available at http://www.oai.ucsf.edu/ . This study focuses primarily on baseline observations to explore the causality between individual OA patterns and corresponding knee cartilage thickness maps. Knee OA is the leading cause of chronic pain, affecting approximately 21.2% of the U.S. population ( Fallon 2023 ). It is a primary contributor to disability, often necessitating medical intervention. With an aging population and rising obesity rates, the prevalence of knee OA is projected to increase significantly ( Heidari 2011 ). Previous research has established correlations between clinical variables such as age, gender, and body mass index (BMI) and the severity of knee OA ( Schaefer et al. 2017 , Huang et al. 2022 ). Furthermore, comorbid conditions like diabetes mellitus and obesity have been associated with increased pain severity in knee OA patients ( Shin 2014 ). Thus, our study aims to identify individual-specific disease patterns of knee OA and evaluate their causal impacts. MRI is a valuable tool for assessing knee joint degeneration due to its ability to capture detailed characteristics of the joint, including cartilage morphology and biochemical composition ( Huang et al. 2022 ). This study mainly focuses on the left knee femoral cartilage (FC) thickness maps extracted from the MRI scans of 4,418 subjects (2,566 females and 1,852 males) at baseline in OAI. The detailed image preprocessing steps can be found in Section 5 .1. The MR images, acquired using 3.0 Tesla Siemens Trio MRI scanners, have uniform dimensions of 384 × 384 × 160 with a resolution of 0.36 × 0.36 × 0.7 mm 3 per voxel, and are labeled with Kellgren-Lawrence grades (KLG, Kellgren & Lawrence 1957 ) ranging from 0 (normal) to 4 (severe osteoarthritis). The detailed demographic and KLG information in our study are summarized in Table 1 . View this table: View inline View popup Download powerpoint Table 1: The detailed demographic (Gender, BMI, and Age) and KLG information in our study. The mean and standard deviations for BMI and Age are reported for each KLG group as well. This paper seeks to address three key scientific questions as follows: (Q1) How can we estimate ITEs for all subjects with g i > 0? (Q2) How can we establish the causal pathway from KLG to individual OA-related abnormal patterns to corresponding knee cartilage thickness maps? (Q3) How can we evaluate the spatial location heterogeneity across patients? The subsequent sections of the paper present a causal inference framework designed to answer these questions. 3 Methodology To address (Q1)-(Q3) in Section 2 , we introduce the HCDPD framework, including causal estimands and identification assumptions ( Section 3.1 ), statistical modeling ( Section 3.2 ), and Bayesian inference (Section 3.3). 3.1 Causal Estimands and Identification Assumptions To address (Q1) , our first set of causal estimands is all ITEs in the disease group. Recall that ITE i ( g i ) = (ITE i,s ( g i ) : s ∈ 𝒮) = Y i ( g i ) − Y i (0) for the i -th subject with g i > 0. Since we only have Y i ( g i ) in the disease group, it remains to estimate Y i (0). We estimate Y i (0) via regression to learn its conditional mean 𝔼 [ Y i (0)| x i ]. This leads to the following ITE estimator: where µ 0 ( x i ) = ( µ 0, s ( x i ) : s ∈ 𝒮) = ( E [ Y i,s (0)| x i ] : s ∈ 𝒮). We begin by recalling and introducing some notations. For each s ∈ 𝒮, we denote its set of spatial neighboring grids (e.g., the nearest neighboring grids) in 𝒮 as 𝒩 ( s ). For the i -th subject with g i > 0, recall that the disease region is defined by . We define the neighboring closure of as . Furthermore, we define as the normal region of the i -th subject, which is an open set of 𝒮. If the i -th subject belongs to the control group, then and . We next impose the following stability condition. Assumption 3.1. (Stability): The distribution of ( Y i,s ( g i ), x i ) for each in the i-th case subject is the same as that of {( Y i ′ , s (0), x i ′ )} for all i′ in the control group . This assumption requires that the organ within the normal region of a case subject remains unchanged. It is grounded in the relative stability of organ structure and function across normal subjects, given the covariates (e.g., age) in x . The stability assumption will play a key role in modeling treatments and imaging outcomes in Section 3.2 . We need an unconfounded assignment mechanism assumption such that the treatment assignment is independent of the potential outcomes conditional on x . Assumption 3.2. (Unconfoundedness): The treatment assignment g i is independent of the potential outcomes { Y i ( g ) : g ≥ 0} conditional on x i . Assumption 3.2 states that there is no unmeasured confounding. Assumption 3.3. (Positivity): 0 < c min < P ( x i | g = 0) /P ( x i | g +) < c max < ∞ hold for all x i in the g + group, where P ( x | g ) denotes the conditional distribution of x in the [ g ] group . Assumption 3.3 is related to overlap and balance , referring to the similarity in the distributions of the covariates between the g + and g = 0 groups. Under Assumptions 3.1 and 3.3, we can consistently estimate and then approximate the disease region through modeling the effect of B i = { B i,s : s ∈ 𝒮} on for each i in the [ g +] group. To address (Q2) and (Q3) , our first strategy is to construct an estimator of the underlying disease region B i across all pixels s for the i -th patient. In practice, the B i can be approximated by either frequentist or Bayesian approaches. More details will be discussed in Section 3.3 . Next, to explicitly establish the causal pathway g i → B i → Y i in Figure 1 (a) , we also need to model p ( Y i ( g i )| g i , x i ) as follows: We need the following assumption about p ( Y i ( g i )| B i , x i ). Assumption 3.4. (Conditional Independence and Spatial Interference): , where u i = ( u i,s : s ∈ 𝒮) represents the spatial random effects , is the average treatment from 𝒩 ( s ), and | 𝒩 ( s )| is the number of pixels within 𝒩 ( s ). In Assumption 3.4, we address spatial interference specifically through information contained within 𝒩 ( s ) and introduce u i to capture spatial correlations among Y i ( g ). These points will be elaborated in Section 3.2 . Furthermore, to characterize OA-related spatial spillover effects from neighborhood 𝒩 ( s ), we incorporate the average treatment indicator alongside B i,s into the conditional distribution of Y i,s ( g i ). Similar strategies are commonly employed in models such as Gaussian hidden Markov models ( Besag 1986 ) and Potts models (Krahenbühl & Koltun 2011). The primary function of is to quantify the extent of local disease prevalence around pixel s . Practically, other statistics derived from B i,s could also be employed to enhance modeling of spatial spillover effects. To ensure the validity of our assumptions, we verify them using real data, detailed in the Supplementary Material S4. Under Assumptions 3.1-3.4, we introduce another set of causal estimands at pixel s for the i -th patient in the [ g +] group. These estimands capture individual disease region patterns and potential spatial heterogeneity across patients. We define the total spatial effect (TSE) at pixel s as follows: Furthermore, the TSE at pixel s can be divided in the direct spatial effect (DSE) and the indirect spatial effect (ISE) as follows: Given B i , the TSE captures the overall causal relationship between B i and Y i . Importantly, the indirect spatial effect, ISE i,s ( b ), effectively models spatial spillover by quantifying how the surrounding disease region influences the cartilage thickness outcome at pixel s . 3.2 Modeling Treatment and Imaging Outcomes To explicitly model g i → B i → Y i in Figure 1 , we introduce a hierarchical image-on-scalar regression model, including (i) a functional outcome model for in Assumption 3.4 and (ii) a latent exposure model for . (i) Functional outcome model Given the OA-related treatment map, to characterize the effects of baseline covariates and treatment on imaging outcomes while accounting for spatial carryover effect and individual random variation, we introduce a functional outcome model as follows, where α s is a p x -dimensional vector representing the fixed effects from observed confounding factors at pixel s . Parameters γ and τ quantify the OA-related treatment effects directly at pixel s and from its spatial neighborhood 𝒩 ( s ), respectively. We assume uniform OA-related treatment effects across pixels within the disease region . Additionally, the terms { u i,s , s ∈ 𝒮} represent individual stochastic imaging outcomes modeled as a zero-mean Gaussian process, and ϵ i,s denotes Gaussian measurement error with distribution . Thus, under model (5), the potential imaging outcome at pixel s primarily depends on the OA-related treatments at pixel s and its neighborhood, alongside effects from observed confounding factors. Figure 1(a) presents the Directed Acyclic Graph (DAG) illustrating this hierarchical image-on-scalar regression model. In order to infer the individual processes u i,s from the imaging outcomes, we employ the Bayesian functional principal component analysis approach (FPCA, Kowal & Bourgeois 2020 , Zeng et al. 2021 ), where the observed imaging outcomes are projected into lower-dimensional representations and then the first few dominant principal components as the predictors in model (5). Specifically, we assume the individual imaging outcome process u i,s has the following Karhunen-Loeve decomposition , where and are, respectively, the corresponding normal orthogonal eigenfunctions and principal component (PC) scores of the i -th subject. In practice, we usually posit a similar model that truncates to the first J PCs of the imaging outcome process: where the eigenfunctions are derived based on the subjects at normal stage ( Liu & Zhu 2021 , Huang et al. 2022 ). Furthermore, we assume that the PC scores come from the zero-mean Gaussian distribution with shrinking variances, i.e., and . We select the truncation term J based on the fraction of explained variance (FEV) being greater than 85%, namely . (ii) Latent exposure model To further characterize the latent OA-related treatment map, we propose a latent exposure model: where logit(·) denotes the logit function and is a function parameterized by θ B,s , representing the log odds ratio for case subjects at pixel s . For control subjects ( g i = 0), the log odds ratio is zero, implying B i,s = 0 for all pixels, consistent with the assumption . Typically, the function κ (·) is specified using domain knowledge. A common linear form is , where and θ B,s = ( β s , ϱ ) captures effects related to baseline covariates, KLG levels, and neighborhood influences. More complex structures, including nonlinear functions or higher-order interactions, can also be considered. 3.3 Posterior Inference of Causal Estimands We first introduce a prior independence assumption, ensuring independent prior distributions for θ Y in model (5) and θ B = { θ B,s } s ∈𝒮 in model (7): Assumption 3.5. (Prior Independence): Parameters θ Y (imaging outcome model) and θ B (exposure assignment model) are distinct and independent a priori . Assumption 3.5, common in Bayesian causal inference, facilitates model specification and simplifies computational procedures ( Li, Ding & Mealli 2023 ). Under Assumptions 3.1– 3.5, the posterior distribution of θ Y is independent of θ B . Thus, given the latent disease regions and priors π ( θ Y ), we fit the functional outcome model (5), sample from the posterior of , and construct FPCA representations for { u i,s }. Similarly, with the latent disease regions and prior π ( θ B ), we fit the latent exposure model (7) and sample from the posterior distribution of θ B . We then derive the plug-in estimator µ 0 ( x i ) in (1) as . Consequently, we estimate the individual treatment effect (ITE) for each subject as and the spatial causal estimands: and . Note that the estimated DSE remains consistent across pixels and subjects. The central aspect of estimating causal estimands is identifying the disease region B i for each individual i ∈ [ g +]. In a frequentist framework, we compute the standardized Individual Treatment Effect (SITE) estimator , where . Under Assumptions 3.1–3.3, is consistently estimable without explicit distributional assumptions for Y i ( g i ). Thus, we approximate the disease region as: for each i ∈ [ g +], where C q,s is the q % quantile (commonly 0.5 or 0.1) of the reference distribution ℱ 0, s , consistently estimated from the control group per Assumption 3.1. In contrast, the Bayesian approach provides both point estimates and posterior distributions of B i . Given posterior samples of θ Y and θ B , we generate M samples from the conditional posterior distribution. Then, we estimate the disease region as: for each i ∈ [ g +], where p 0 ∈ (0, 1) is a predefined threshold. We also derive individual posterior disease probability maps as for each i ∈ [ g +]. Additionally, the overall Population Disease Probability Map (PDPM) and subgroup-specific PDPMs based on KLG levels ( k = 1, 2, 3, 4) are computed as and , respectively. To address uncertainty, correlation, and sparsity in spatial effects on imaging outcomes and OA-related latent exposures, we adopt a Bayesian approach utilizing spatial autore-gressive spike-and-slab priors for parameters α s and coefficients in θ B . Specifically, for the l -th element α l,s in α s , we impose the following prior: where Bernoulli( ν α,l,s ), and d (0) denotes the degenerate distribution concentrated at zero. The complete details on priors and posterior inference are derived via Gibbs sampling, with specifics provided in the Supplementary Material S1 and S2. A crucial aspect in disease region detection using MCMC sampling of B i is selecting the threshold p 0 . The value of p 0 significantly influences the extent of identified disease regions—smaller values result in larger detected regions, an ongoing challenge in anomaly detection research ( Muñoz-Ramírez et al. 2022 ). Additionally, this threshold affects the estimation of causal estimands. We set p 0 = 0.5 for simulations and real data analyses. To further examine the sensitivity of this choice, we will perform a detailed sensitivity analysis, exploring how various p 0 values influence the variability of causal estimands. 4 Simulation Studies We assessed the performance of the proposed framework through simulation studies using semi-realistic data derived from actual 2D femoral cartilage (FC) thickness maps of the left knee obtained from the OAI study. Detailed descriptions of the OAI dataset and image preprocessing procedures are provided in Section 5 . In our simulation design, we incorporated key demographic and clinical covariates, Gender (coded as 1 for females), Age, and BMI, as potential confounding factors. In addition, we included two interaction terms: Gender × BMI and Gender × Age, to capture possible interaction effects. Both Age and BMI were standardized via Z-transformation. For both simulation studies and real data analyses, we employed consistent Gibbs sampling settings. Each MCMC run consisted of 2,200 iterations, with the first 200 iterations discarded as burn-in. To mitigate autocorrelation among samples, we implemented a thinning interval of 20, yielding 100 posterior samples. Posterior means computed from these thinned samples were used as point estimates for all model parameters. The convergence of our MCMC sampling was checked using multiple trace plots (see the Supplementary Material S3). The true parameters for simulation settings were based on coefficients α s and estimated from normal controls in the real dataset. Disease regions for each simulated patient were randomly selected across cartilage pixels, varying in size, shape (square or circle), and number. The disease-related coefficients were set as γ = −0.17 and τ = −0.22. Additionally, the first three principal components (PCs) from the real data analysis were used to generate individual stochastic imaging outcomes u i,s , with PC scores drawn from η i,j ∼ N (0, 8 − 2 j ) for j = 1, 2, 3. Given the generated disease regions and confounding factors, we fitted our latent exposure model (7), where , consistent with our real data analysis. The estimated coefficients β s , φ s , and ϱ served as the true parameters for simulations. We generated 100 datasets, each containing 320 synthetic thickness maps (200 normal controls and 120 patients with disease). Example thickness maps and disease regions for five randomly selected subjects are displayed in Figure 2 (Columns 1 and 2). Download figure Open in new tab Figure 2: Simulated thickness maps (Column 1), true disease regions (Column 2), detected disease regions (Column 3), and estimated causal effects (ITE (Column 4), TSE (Column 5), and ISE (Column 6)) for five randomly selected subjects. We applied our proposed method to these simulated datasets. The detected disease regions for selected subjects are shown in Figure 2 (Column 3). The overall accuracy was assessed using five metrics: Rand Index (RI), Adjusted Rand Index (ARI), Homogeneity (HOM), Completeness (COM), and Normalized Mutual Information (NMI). Averaged metrics per simulation are summarized in Figure S1. Our findings indicated: (i) high consistency between true and detected regions, with average RI around 0.95 and ARI around 0.80; (ii) HOM, COM, and NMI metrics consistently above 0.65, suggesting effective capture of disease region patterns. Additionally, the small variability across simulations (standard deviation < 0.015) demonstrated robustness in detecting disease regions. We evaluated the estimation performance of the varying coefficients { α s , β s , φ s }, and causal estimands (ITE, TSE, DSE, and ISE). For each simulation dataset, we calculated accuracy (ACC) in detecting sparsity structures in { α s , β s , φ s } and mean squared errors (MSE) defined as: The results summarized in Figure S2 (left) showed average MSE and ACC values of approximately 0.6 and 0.7 for α s , and 1.0 and 0.85 for { β s , φ s }, respectively, indicating robust performance in sparsity detection and parameter estimation. Next, we examined causal estimands (ITE, TSE, DSE, ISE). Figure 2 (Columns 4-6) displays estimated ITE, TSE, and ISE for selected subjects, demonstrating causal effect patterns closely aligned with disease region patterns. The MSEs for TSE and ISE across all diseased patients were computed as: Figure S2 (right) summarizes these results, with DSE yielding an MSE of 0.0002, high-lighting our method’s reliability in estimating causal effects. Finally, we conducted two sensitivity analyses to evaluate robustness: (i) varying the threshold p 0 in MCMC sampling and (ii) modifying Assumption 3.2 about unmeasured confounding. For the first analysis, we tested p 0 values ranging from 0.3 to 0.8 and found negligible impacts on TSE and ISE estimation (Figure S3, left). In the second analysis, we introduced a binary unmeasured confounder w i,s , extending model (6) as: where ξ y measures confounder influence. Testing ξ y values from 0 to 0.15, we observed minimal bias for ξ y ≤ 0.1, with biases increasing moderately thereafter (Figure S3, right). These analyses demonstrate our method’s robustness to moderate violations of Assumption 3.2. 5 Real Data Analysis 5.1 MRI data preprocessing In this study, we implemented the image preprocessing pipeline introduced by Huang et al. (2022) to derive two-dimensional (2D) FC thickness maps from MRI data obtained in the OAI study. This pipeline comprises four main steps: segmentation and meshing, computation of 3D thickness maps, registration, and projection onto a 2D plane. First, we segmented the FC region from 3D MRI scans using a U-Net-based neural network ( Huang et al. 2022 ). After segmentation, we constructed triangular meshes for each FC region using the marching cubes algorithm. Cartilage thickness at each mesh vertex was then calculated by measuring the shortest distance from the vertex to the opposite cartilage surface. Next, the cartilage meshes were spatially aligned to a common atlas space via a deep registration network. This step ensured consistency by mapping each MR image to an unbiased atlas, which was previously constructed using an atlas-building method ( Huang et al. 2022 ). In the final stage, we projected the 3D atlas-aligned points onto a 2D plane and interpolated the thickness measurements to generate spatially coherent, flattened 2D FC thickness maps. Ultimately, we obtained baseline left-knee FC thickness maps for 4,418 subjects (2,566 females and 1,852 males) from the OAI study. 5.2 Addressing questions (Q1)-(Q3) We fitted our proposed models (5) and (7) to the OAI data, in which . Besides the extracted left knee FC thickness maps, We also considered some baseline covariates (Age, Gender, BMI) and their interactions (Gender × BMI and Gender × Age) as possible confounders, where both BMI and Age were normalized through the Z-transformation. By applying the FPCA, we chose the top 30 PCs that can explain over 85% of variance in the imaging outcomes. The illustration of all the selected PCs can be found in the Figure S5. The estimated varying coefficients { α s , β s , φ s } are presented in Figure S6. To more effectively assess the significance of the estimated effects and identify regions with notable impact, the t-test statistics for the varying coefficients across pixels are presented in Figure 3 . Download figure Open in new tab Figure 3: OAI data analysis: t test statistic maps of estimated α s in model (5) (top) and t test statistic maps of estimated β s and φ s in model (7) (bottom). The key findings are summarized as follows. First, with respect to overall average cartilage thickness, we observe high absolute t -statistics across multiple subregions of the left knee FC, reflecting the impact of various confounding factors. Notably, gender-related effects are not uniformly distributed. For example, in the subregion highlighted by the yellow circle in Figure 3 , there is no significant difference between males and females. In contrast, BMI and Age exhibit more consistent and widespread associations with cartilage thickness throughout the left knee FC. Interestingly, interaction terms such as Gender × BMI and Gender × Age show significant effects in the same subregion, suggesting that older females with higher BMI are more prone to reduced cartilage thickness and smaller FC volumes. These findings align well with existing clinical evidence in the literature ( Silverwood et al. 2015 , Szilagyi et al. 2023 ). Second, different confounders appear to drive consistent spatial patterns of disease associations across most subregions of the left FC. For instance, certain regions, indicated by the green arrow in the second row of Figure 3 , show no significant association with any of the covariates considered, indicating potential robustness or distinct biological behavior in those areas. Next, to address the scientific question (Q3), i.e., the spatial location heterogeneity across patients, we investigated the detected disease regions and their heterogeneity across subjects. We randomly selected one OA patient from each of the four KLG groups (KLG=1,2,3, and 4), and their 2D FC thickness maps along with the detected disease regions are presented in Figure 4 (Columns 1 and 2). It can be found that the detected disease regions vary across subjects in terms of their number, size, shape, and location, which is consistent with our assumption of spatial heterogeneity. Besides the disease region at the individual level, we are also interested in the disease pattern at the population level. First, the PDPMs for each KLG-based subpopulation are presented in Figure 5 . It can be found that the population-level disease pattern becomes more prevalent as the KLG level increases. In addition to the KLG-based population-level PDPM, we also derived the PDPMs for different potential subgroups with distinct disease patterns. Following the idea in Huang et al. (2022) , we adopted the non-negative matrix factorization (NMF) method and applied it to the posterior probabilities of disease regions, , for all OA patients. We detected three subgroups, where the demographic information and the PDPM for each subgroup are presented in Table 2 and Figure 6(a) , respectively. View this table: View inline View popup Download powerpoint Table 2: OAI data analysis: the detailed demographic information (Gender, BMI, Age, KLG) in each of the three detected subgroups. The mean and standard deviations for BMI and Age are reported for each subgroup as well. Download figure Open in new tab Figure 4: OAI data analysis: detected disease regions and estimated causal estimands for four patients randomly selected from different KLG groups (KLG=1,2,3,4 for row=1,2,3,4). Column 1: thickness map; Column 2: detected disease region; Column 3: estimated ITE; Column 4: estimated TSE; and Column 5: estimated ISE. Download figure Open in new tab Figure 5: OAI data analysis: PDPM for different KLG-based subpopulations. Column 1: PDPM for patients with KLG=1; Column 2: PDPM for patients with KLG=2; Column 3: PDPM for patients with KLG=3 or 4; Column 4: PDPM for all OA patients (KLG=1,2,3 or 4). Download figure Open in new tab Figure 6: OAI data analysis: (a) PDPMs for three detected latent subgroups; (b) Parcellations of the left knee FC (Li, Luo, Chen, Huang, Shen, Xu et al. 2023): 1. Lateral Posterior Femur (LPF), 2: Lateral Central Femur (LCF), 3: Lateral Anterior Femur (LAF), 4: Medial Anterior Femur (MAF), 5: Medial Central Femur (MCF), and 6: Medial Posterior Femur (MPF). Some key findings are summarized below. First, the three detected subgroups possess distinct disease patterns. According to the parcellations of the left knee FC (Li, Luo, Chen, Huang, Shen, Xu et al. 2023), the population disease patterns located in three different subregions (see Figure 6(b) ), including Lateral Anterior Femur (LAF), Medial Central Femur (MCF), and Medial Anterior Femur (MAF). The cartilage thickness and volume in all of these subregions have been found commonly reduced among OA patients in existing literature ( Neogi et al. 2009 , Hayashi et al. 2014 , Roemer et al. 2022 ). In particular, in one study focusing on the heterogeneity of cartilage damage ( Roemer et al. 2022 ), most mild and severe OA patients had cartilage damage in MCF, which is consistent with the derived PDPM from Subgroup 2, in which 84% patients were assessed with KLG≥ 2. Furthermore, compared to Subgroups 1 and 3, Subgroup 2 involves patients with higher BMI and higher pecentage of severe OA patients (KLG=3 or 4), which is consistent with the disease pattern in the PDPM for patients with KLG=3 or 4 ( Figure 5 ). Similar disease patterns have also been revealed in existing literature ( Huang et al. 2022 , Li, Luo, Chen, Huang, Shen, Xu et al. 2023). To address the scientific questions (Q1), we calculated the estimated ITEs for all OA patients. The ITEs for selected OA patients are presented in Figure 4 (Column 3). It can be found that, the ITEs vary across patients and the OA-related effect (ITE with negative values) is highly consistent with the detected disease pattern. To further address the scientific questions (Q2), i.e., the causal pathway g i → B i → Y i , we fist estimated the KLG-related effect, , which is shown in Figure S6, and the corresponding t -statistic map shown in Figure 3 . Then, we calculated the estimated causal estimands, including TSE, ISE, and DSE, for all OA patients. The TSE and ISE for the four randomly selected OA patients are presented in Figure 4 (Columns 4 and 5), and the estimated DSE is -0.21, which is invariant across pixels and subjects. The results demonstrate that the causal estimands strongly depend on the heterogeneous disease regions, and the spillover effects exist in the spatial causal effects across different patients, highlighting both spatial location heterogeneity between patients and spatial spillover effects within each subject. Finally, similar to the simulation studies, we conducted two sensitivity analyses to evaluate robustness: (i) varying the threshold p 0 in MCMC sampling and (ii) modifying Assumption 3.2 about unmeasured confounding. We also checked whether Assumptions 3.1 and 3.3 are reasonably satisfied in our real data analysis. In the first sensitivity analysis, we tested p 0 at four different values, i.e., p 0 = 0.3, 0.4, 0.6 and 0.7. Since there was no ground truth in our real data analysis, we set the estimated causal estimands with p 0 = 0.5 as the gold standard. Then, under each setting of p 0 , posterior estimates of model parameters were obtained, and the MSEs of both TSE and ISE were calculated across diseased subjects. Results are summarized in Figure S7 (left). We observed that the ISE estimates remained stable across all p 0 settings, whereas the MSE of the TSE increased as p 0 grew larger. Given the relationship among TSE, ISE, and DSE, this result suggests that increasing p 0 leads to smaller detected disease regions, which in turn adversely affects the accuracy of DSE estimation, i.e., estimation of γ . In the second sensitivity analysis, we assessed the impact of unmeasured confounding by testing ξ y at three different values, i.e., ξ y ∈ {0, 0.1, 0.2}, where ξ y = 0 corresponds to the assumption of no unmeasured confounding. Thickness maps were simulated under model (8), using the previously estimated parameters from the real data as ground truth. We then refitted the proposed model to each simulated dataset and computed MSEs for the TSE and ISE across all subjects, treating the estimated TSE and ISE based on the original real data as the ground truth. The results, displayed in Figure S7 (right), indicate that the influence of unmeasured confounding becomes negligible when ξ y ≥ 0.2, suggesting that the proposed method is robust even under mild violations of Assumption 3.2. Next, to assess Assumption 3.1, which requires the consistency of pixel-wise cartilage thickness distributions between normal controls and non-diseased regions in OA patients, we estimated the density functions using pixel-level residuals from two sources: (i) all pixels in the normal control group and (ii) pixels located in radiographically normal regions within each KLG-based subpopulation. Estimated density functions of these residuals, shown in Figure S8, reveal substantial overlap across all KLG-based subpopulations and the control group. To verify Assumption 3.3, which posits similar distributions of covariates between normal controls and OA patients, we compared the distributions of interaction terms, i.e., Gender × Age and Gender × BMI, for normal controls and different KLG-based subpopulations. Estimated density functions for these covariates, shown in Figures S9 and S10, exhibit consistent patterns across all subpopulations. 6 Conclusion We have proposed a novel causal inference framework, HCDPD, designed to uncover complex causal pathways linking early-stage diseases to latent disease patterns and their manifestations in organs, as observed through later-stage medical imaging. By leveraging advanced Bayesian inference techniques, our method accurately estimates both direct and indirect causal effects within the HCDPD framework. We have validated our approach using the OAI dataset, where it has successfully identified and characterized diverse disease patterns across different patients. Notably, this innovative framework is versatile and can be adapted to analyze imaging datasets for various other diseases. Despite these advancements, several avenues for future research remain. First, it would be valuable to investigate how the clinical diagnosis g i and baseline covariates x i influence the size of the disease region. Given that can be interpreted as the relative size of the local disease region, we could develop a deep regression model (DRM) to examine the relationship between { g i , x i } and across pixels. To achieve this, we could identify a subset of pixels, 𝒮 0 ⊂ 𝒮, such that their corresponding neighborhoods {𝒩 ( s ), s ∈ 𝒮 0 } form a partition of 𝒮. The proposed DRM can then be formulated as: , where h (·) is an unknown function approximated by a deep learning architecture, such as a multilayer perceptron (MLP). Additionally, since the OAI study is a longitudinal cohort, it would be highly beneficial to extend our causal framework to a spatiotemporal version. This extension would allow for the comprehensive modeling of disease pattern heterogeneity across subjects, pixels, and time points, significantly enhancing the framework’s applicability in longitudinal studies. Data Availability Github Acknowledgment The authors used ChatGPT solely for language editing and proofreading. Footnotes Drs. Niethammer and Zhu are partially supported by the National Institutes of Health (NIH) grants 1R01AR082684 and 1OT2OD038045-01. Dr. Zhu is also partially supported by the Gillings Innovation Laboratory on gen- erative AI and the National Institute on Aging (NIA) of the National Institutes of Health (NIH) grants U01AG079847, 1R01AG085581, RF1AG082938, and R01AR082684. The content is solely the responsibility of the authors and does not necessarily represent the official views of these institutes. References ↵ Aronow , P. M. & Samii , C. ( 2017 ), ‘ Estimating average causal effects under general interference, with application to a social network experiment ’, The Annals of Applied Statistics 11 ( 4 ), 1912 – 1947 . OpenUrl ↵ Attur , M. , Krasnokutsky , S. , Zhou , H. , Samuels , J. , Chang , G. , Bencardino , J. et al. ( 2020 ), ‘ The combination of an inflammatory peripheral blood gene expression and imaging biomarkers enhance prediction of radiographic progression in knee osteoarthritis ’, Arthritis Research & Therapy 22 , 208 . OpenUrl PubMed ↵ Bakshy , E. , Eckles , D. & Bernstein , M. S. ( 2014 ), Designing and deploying online field experiments , in ‘Proceedings of the 23rd International Conference on World Wide Web ’, pp. 283 – 292 . ↵ Banerjee , S. , Carlin , B. P. & Gelfand , A. E. ( 2003 ), Hierarchical Modeling and Analysis for Spatial Data , Chapman and Hall/CRC , New York . ↵ Banerjee , S. , Masulli , F. & Sushmita , M. ( 2017 ), ‘ Brain tumor detection and classification from multi-channel mris using deep learning and transfer learning ’, IEEE Access pp. 1 – 9 . ↵ Besag , J. ( 1986 ), ‘ On the statistical analysis of dirty pictures ’, Journal of the Royal Statistical Society Series B: Statistical Methodology 48 ( 3 ), 259 – 279 . OpenUrl CrossRef ↵ Corrado , L. & Fingleton , B. ( 2012 ), ‘ Where is the economics in spatial econometrics? ’, Journal of Regional Science 52 ( 2 ), 210 – 239 . OpenUrl CrossRef ↵ Davatzikos , C. , Fan , Y. , Wu , X. , Shen , D. & Resnick , S. M. ( 2008 ), ‘ Detection of prodromal Alzheimer’s disease via pattern classification of magnetic resonance imaging ’, Neurobiology of Aging 29 ( 4 ), 514 – 523 . OpenUrl CrossRef PubMed Web of Science Durante , D. ( 2017 ), ‘ A note on the multiplicative gamma process ’, Statistics & Probability Letters 122 , 198 – 204 . OpenUrl ↵ Fallon , E. A. ( 2023 ), ‘ Prevalence of diagnosed arthritis—united states , 2019 – 2021 ’, Morbidity and Mortality Weekly Report 72 . OpenUrl ↵ Hayashi , D. , Felson , D. , Niu , J. , Hunter , D. , Roemer , F. , Aliabadi , P. et al. ( 2014 ), ‘ Preradiographic osteoarthritic changes are highly prevalent in the medial patella and medial posterior femur in older persons: Framingham oa study ’, Osteoarthritis and Cartilage 22 ( 1 ), 76 – 83 . OpenUrl CrossRef PubMed Web of Science ↵ Heidari , B. ( 2011 ), ‘ Knee osteoarthritis prevalence, risk factors, pathogenesis and features: Part I ’, Caspian Journal of Internal Medicine 2 ( 2 ), 205 . OpenUrl PubMed ↵ Huang , C. , Shan , L. , Charles , H. C. , Wirth , W. , Niethammer , M. & Zhu , H. ( 2015 ), ‘ Diseased region detection of longitudinal knee magnetic resonance imaging data ’, IEEE Transactions on Medical Imaging 34 ( 9 ), 1914 – 1927 . OpenUrl PubMed ↵ Huang , C. , Xu , Z. , Shen , Z. , Luo , T. , Li , T. , Nissman , D. , Nelson , A. , Golightly , Y. , Niethammer , M. & Zhu , H. ( 2022 ), ‘ DADP: dynamic abnormality detection and progression for longitudinal knee magnetic resonance images from the osteoarthritis initiative ’, Medical Image Analysis 77 , 102343 . OpenUrl PubMed ↵ Imbens , G. W. & Rubin , D. B. ( 2015 ), Causal inference in statistics, social, and biomedical sciences , Cambridge University Press , New York . ↵ Johari , R. , Li , H. , Liskovich , I. & Weintraub , G. Y. ( 2022 ), ‘ Experimental design in two-sided platforms: An analysis of bias ’, Management Science 68 ( 10 ), 7069 – 7089 . OpenUrl ↵ Kavur , A. E. , Gezer , N. S. , Barış , M. , Aslan , S. , Conze , P.-H. , Groza , V. et al. ( 2021 ), ‘ Chaos challenge-combined (CT-MR) healthy abdominal organ segmentation ’, Medical Image Analysis 69 , 101950 . OpenUrl CrossRef PubMed ↵ Kellgren , J. & Lawrence , J. ( 1957 ), ‘ Radiological assessment of osteo-arthrosis ’, Annals of the Rheumatic Diseases 16 ( 4 ), 494 . OpenUrl FREE Full Text ↵ Kowal , D. R. & Bourgeois , D. C. ( 2020 ), ‘ Bayesian function-on-scalars regression for high-dimensional data ’, Journal of Computational and Graphical Statistics 29 ( 3 ), 629 – 638 . OpenUrl Krähenbühl , P. & Koltun , V. ( 2011 ), Efficient inference in fully connected crfs with gaussian edge potentials , in ‘Proceedings of the 25th International Conference on Neural Information Processing Systems ’, pp. 109 – 117 . ↵ Lee , L.-F. ( 2007 ), ‘ Identification and estimation of econometric models with group interactions, contextual factors and fixed effects ’, Journal of Econometrics 140 ( 2 ), 333 – 374 . OpenUrl CrossRef Web of Science ↵ Li , F. , Ding , P. & Mealli , F. ( 2023 ), ‘ Bayesian causal inference: a critical review ’, Philosophical Transactions of the Royal Society A 381 ( 2247 ), 20220153 . OpenUrl PubMed Li , T. , Luo , T. , Chen , B. , Huang , C. , Shen , Z. , Xu , Z. et al. ( 2023 ), ‘ Charting aging trajectories of knee cartilage thickness for early osteoarthritis risk prediction: An MRI study from the osteoarthritis initiative cohort ’, medRxiv . ↵ Liu , R. & Zhu , H. ( 2021 ), ‘ Statistical disease mapping for heterogeneous neuroimaging studies ’, Canadian Journal of Statistics 49 ( 1 ), 10 – 34 . OpenUrl ↵ Luo , S. , Yang , Y. , Shi , C. , Yao , F. , Ye , J. & Zhu , H. ( 2024 ), ‘ Policy evaluation for temporal and/or spatial dependent experiments ’, Journal of the Royal Statistical Society Series B: Statistical Methodology 86 ( 3 ), 623 – 649 . OpenUrl ↵ Manhas , J. , Gupta , R. K. & Roy , P. P. ( 2022 ), ‘ A review on automated cancer detection in medical images using machine learning and deep learning based computational techniques: challenges and opportunities ’, Archives of Computational Methods in Engineering 29 ( 5 ), 2893 – 2933 . OpenUrl Montagna , S. , Tokdar , S. T. , Neelon , B. & Dunson , D. B. ( 2012 ), ‘ Bayesian latent factor regression for functional and longitudinal data ’, Biometrics 68 ( 4 ), 1064 – 1073 . OpenUrl PubMed ↵ Muñoz-Ramírez , V. , Kmetzsch , V. , Forbes , F. , Meoni , S. , Moro , E. & Dojat , M. ( 2022 ), ‘ Subtle anomaly detection: Application to brain MRI analysis of de novo Parkinsonian patients ’, Artificial Intelligence in Medicine 125 , 102251 . OpenUrl PubMed ↵ Munro , E. , Wager , S. & Xu , K. ( 2021 ), ‘ Treatment effects in market equilibrium ’, arXiv preprint arXiv:2109.11647 . ↵ Neogi , T. , Felson , D. , Niu , J. , Lynch , J. , Nevitt , M. , Guermazi , A. et al. ( 2009 ), ‘ Cartilage loss occurs in the same subregions as subchondral bone attrition: a within-knee subregion-matched approach from the multicenter osteoarthritis study ’, Arthritis Care & Research: Official Journal of the American College of Rheumatology 61 ( 11 ), 1539 – 1544 . OpenUrl ↵ Oei , E. , Hirvasniemi , J. , van Zadelhoff , T. & van der Heijden , R. ( 2022 ), ‘ Osteoarthritis year in review 2021: imaging ’, Osteoarthritis and Cartilage 30 ( 2 ), 226 – 236 . OpenUrl PubMed ↵ Pollmann , M. ( 2020 ), ‘ Causal inference for spatial treatments ’, arXiv preprint arXiv:2011.00373 . ↵ Puelz , D. , Basse , G. , Feller , A. & Toulis , P. ( 2022 ), ‘ A graph-theoretic approach to randomization tests of causal effects under general interference ’, Journal of the Royal Statistical Society Series B: Statistical Methodology 84 ( 1 ), 174 – 204 . OpenUrl ↵ Reich , B. J. , Yang , S. , Guan , Y. , Giffin , A. B. , Miller , M. J. & Rappold , A. ( 2021 ), ‘ A review of spatial causal inference methods for environmental and epidemiological applications ’, International Statistical Review 89 ( 3 ), 605 – 634 . OpenUrl PubMed ↵ Roemer , F. W. , Felson , D. T. , Stefanik , J. J. , Rabasa , G. , Wang , N. , Crema , M. D. et al. ( 2022 ), ‘ Heterogeneity of cartilage damage in kellgren and lawrence grade 2 and 3 knees: the most study ’, Osteoarthritis and Cartilage 30 ( 5 ), 714 – 723 . OpenUrl PubMed ↵ Rubin , D. B. ( 2005 ), ‘ Causal inference using potential outcomes: design, modeling, decisions ’, Journal of the American Statistical Association 100 ( 469 ), 322 – 331 . OpenUrl CrossRef Web of Science ↵ Schaefer , L. F. , Sury , M. , Yin , M. , Jamieson , S. , Donnell , I. , Smith , S. E. et al. ( 2017 ), ‘ Quantitative measurement of medial femoral knee cartilage volume–analysis of the OA Biomarkers Consortium FNIH Study cohort ’, Osteoarthritis and Cartilage 25 ( 7 ), 1107 – 1113 . OpenUrl PubMed ↵ Shaker , M. , Erdogmus , D. , Dy , J. & Bouix , S. ( 2017 ), ‘ Subject-specific abnormal region detection in traumatic brain injury using sparse model selection on high dimensional diffusion data ’, Medical Image Analysis 37 , 56 – 65 . OpenUrl PubMed ↵ Sharif , M. I. , Li , J. P. , Naz , J. & Rashid , I. ( 2020 ), ‘ A comprehensive review on multi-organs tumor detection based on machine learning ’, Pattern Recognition Letters 131 , 30 – 37 . OpenUrl ↵ Shin , D. ( 2014 ), ‘ Association between metabolic syndrome, radiographic knee osteoarthritis, and intensity of knee pain: results of a national survey ’, The Journal of Clinical Endocrinology & Metabolism 99 ( 9 ), 3177 – 3183 . OpenUrl PubMed ↵ Silverwood , V. , Blagojevic-Bucknall , M. , Jinks , C. , Jordan , J. , Protheroe , J. & Jordan , K. ( 2015 ), ‘ Current evidence on risk factors for knee osteoarthritis in older adults: a systematic review and meta-analysis ’, Osteoarthritis and Cartilage 23 ( 4 ), 507 – 515 . OpenUrl CrossRef PubMed ↵ Sobel , M. E. ( 2006 ), ‘ What do randomized studies of housing mobility demonstrate? causal inference in the face of interference ’, Journal of the American Statistical Association 101 ( 476 ), 1398 – 1407 . OpenUrl CrossRef Web of Science ↵ Szilagyi , I. A. , Waarsing , J. H. , van Meurs , J. B. , Bierma-Zeinstra , S. M. & Schiphof , D. ( 2023 ), ‘ A systematic review of the sex differences in risk factors for knee osteoarthritis ’, Rheumatology 62 ( 6 ), 2037 – 2047 . OpenUrl PubMed ↵ Tchetgen , E. J. T. & VanderWeele , T. J. ( 2012 ), ‘ On causal inference in the presence of interference ’, Statistical Methods in Medical Research 21 ( 1 ), 55 – 75 . OpenUrl CrossRef PubMed ↵ Therriault , J. , Schindler , S. E. , Salvadó , G. , Pascoal , T. A. , Benedet , A. L. , Ashton , N. J. et al. ( 2024 ), ‘ Biomarker-based staging of alzheimer disease: rationale and clinical applications ’, Nature Reviews Neurology 20 ( 4 ), 232 – 244 . OpenUrl PubMed ↵ Zeng , S. , Rosenbaum , S. , Alberts , S. C. , Archie , E. A. & Li , F. ( 2021 ), ‘ Causal mediation analysis for sparse and irregular longitudinal data ’, The Annals of Applied Statistics 15 ( 2 ), 747 – 767 . OpenUrl Zhang , J.-T. & Chen , J. ( 2007 ), ‘ Statistical inferences for functional data ’, The Annals of Statistics 35 ( 3 ), 1052 – 1079 . OpenUrl Zhu , H. , Li , R. & Kong , L. ( 2012 ), ‘ Multivariate varying coefficient model for functional responses ’, The Annals of statistics 40 ( 5 ), 2634 . OpenUrl PubMed ↵ Zhu , H. , Li , T. & Zhao , B. ( 2023 ), ‘ Statistical learning methods for neuroimaging data analysis with applications ’, Annual Review of Biomedical Data Science 6 , 73 – 104 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted April 16, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following HCDPD: A Heterogeneous Causal Framework for Disease Pattern Detection in Medical Imaging Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share HCDPD: A Heterogeneous Causal Framework for Disease Pattern Detection in Medical Imaging Rongjie Liu , Chengchun Shi , Rui Song , Marc Niethammer , Tengfei Li , Hongtu Zhu medRxiv 2025.04.15.25325904; doi: https://doi.org/10.1101/2025.04.15.25325904 Share This Article: Copy Citation Tools HCDPD: A Heterogeneous Causal Framework for Disease Pattern Detection in Medical Imaging Rongjie Liu , Chengchun Shi , Rui Song , Marc Niethammer , Tengfei Li , Hongtu Zhu medRxiv 2025.04.15.25325904; doi: https://doi.org/10.1101/2025.04.15.25325904 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Radiology and Imaging Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15227) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6597) Geriatric Medicine (668) Health Economics (997) Health Informatics (4534) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9230) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a0025f884921c13d',t:'MTc3OTUyMTYxNA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-06-16T06:25:30.133384+00:00