Hierarchical Bayesian Modelling of Interoceptive Psychophysics

doi:10.1101/2025.08.27.672360

Hierarchical Bayesian Modelling of Interoceptive Psychophysics

2025 · doi:10.1101/2025.08.27.672360

preprint OA: gold CC-BY-4.0

📄 Open PDF Full text JSON View at publisher

Full text 59,115 characters · extracted from preprint-html · click to expand

Hierarchical Bayesian Modelling of Interoceptive Psychophysics | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Hierarchical Bayesian Modelling of Interoceptive Psychophysics View ORCID Profile Arthur S. Courtin , Jesper Fischer Ehmsen , View ORCID Profile Leah Banellis , View ORCID Profile Francesca Fardo , View ORCID Profile Micah G. Allen doi: https://doi.org/10.1101/2025.08.27.672360 Arthur S. Courtin 1 Center for Functionally Integrative Neuroscience, Department of Clinical Medicine, Aarhus University , Aarhus ( Denmark ) 2 Institute of NeuroScience, Université catholique de Louvain , Brussels ( Belgium ) Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Arthur S. Courtin For correspondence: arthur.courtin{at}cfin.au.dk micah{at}cfin.au.dk Jesper Fischer Ehmsen 1 Center for Functionally Integrative Neuroscience, Department of Clinical Medicine, Aarhus University , Aarhus ( Denmark ) Find this author on Google Scholar Find this author on PubMed Search for this author on this site Leah Banellis 1 Center for Functionally Integrative Neuroscience, Department of Clinical Medicine, Aarhus University , Aarhus ( Denmark ) Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Leah Banellis Francesca Fardo 1 Center for Functionally Integrative Neuroscience, Department of Clinical Medicine, Aarhus University , Aarhus ( Denmark ) 3 Center Danish Pain Research Center, Department of Clinical Medicine, Aarhus University , Aarhus ( Denmark ) Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Francesca Fardo Micah G. Allen 1 Center for Functionally Integrative Neuroscience, Department of Clinical Medicine, Aarhus University , Aarhus ( Denmark ) 4 Cambridge Psychiatry Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Micah G. Allen For correspondence: arthur.courtin{at}cfin.au.dk micah{at}cfin.au.dk Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Interoception, the capacity to sense, perceive, and metacognitively appraise viscerosensory and homeostatic signals, is a growing focus in psychology and psychiatry. Adaptive psychophysical tasks now allow quantification of perceptual sensitivity, bias, and precision in cardiac and respiratory domains. However, accurately estimating these parameters often requires large numbers of trials or participants, posing practical challenges, especially in clinical research where participant availability and tolerance are limited. One approach to reduce participant burden while maintaining statistical rigour is to optimise data analysis. Here, we present hierarchical Bayesian models tailored for cardiac and respiratory interoceptive psychophysics that efficiently estimate sensitivity, bias, and precision at both individual and group levels. Using simulations and empirical data, we validate these models and demonstrate that they allow enhanced inference relative to conventional approaches. To support adoption, we provide openly-accessible resources, including a tutorial on how to implement and fit these models in R (written with researchers without modelling expertise in mind) and an app for sample-size justification. These tools facilitate robust, efficient, and generalisable modelling of interoceptive performance, enabling rigorous studies even with limited trials or participant availability. Introduction Interoception, the sensing and appraisal of internal bodily states, has emerged as a key focus in psychology and psychiatry, linking brain-body signalling to cognition, emotion, and self-awareness ( Cameron, 2001 ; Craig, 2002 ). For example, interoceptive monitoring of cardiac signals has been linked to emotional arousal and fear processing, and it is frequently investigated in relation to anxiety, panic, and psychosis ( Adams et al., 2022 ; Ehlers et al., 1995 ; Jeganathan et al., 2025 ). Despite this potential as a psychiatric biomarker, methods for quantifying and modelling interoception remain limited. To address this issue, we present a hierarchical Bayesian modelling approach for interoceptive psychophysics, spanning the respiratory and cardiac domains. Interoception research faces several methodological limitations, especially when considering clinical or developmental applications ( Desmedt et al., 2023 ). In particular, commonly used methods such as the heartbeat counting task or self-report questionnaires suffer from psychometric issues related to construct validity and other related issues ( Desmedt, Heeren, et al., 2022 ; Desmedt, Van Den Houte, et al., 2022 ; Zamariola et al., 2018 ). Classical psychophysical tasks address some of these limitations, but require a substantial number of trials for reliable estimates, resulting in long testing times and high cognitive burdens that are often impractical for paediatric or clinical populations. Even in healthy volunteers, extended testing sessions risk reducing data quality due to distraction, habituation, or fatigue. This trade-off between validity and feasibility indicates the need for methods that yield valid and reliable estimates while minimising burden on participants. Without such advances, rigorous investigation of the proposed link between interoception and mental health will remain substantially constrained. To this end, we recently developed and validated two new tasks: the Heart Rate Discrimination Task (HRDT) and the Respiratory Resistance Sensitivity Task (RRST). These tasks enable the estimation of individual participants’ cardiac and respiratory interoceptive psychometric functions, whose thresholds and slopes provide indices of perceptual sensitivity or bias and perceptual precision, respectively ( Legrand et al., 2022 ; Nikolova et al., 2022 ). Unlike classical tasks, the HRDT and RRST use an adaptive Bayesian algorithm to select stimulus intensity at each trial based on the participant’s previous responses, drastically reducing the number of trials required to obtain reliable estimates ( Kontsevich & Tyler, 1999 ). Adoption of these tasks partially helps resolve the trade-off, improving both theoretical validity and estimation efficiency. Beyond adaptive stimulus placement, the number of trials needed to reach a specific level of estimation precision can also be reduced by analysing the data more efficiently. In most psychophysical research (including in the exteroceptive domain), adaptive algorithms are used both to choose the stimuli and to calculate each participant’s threshold and slope. These values are then treated as single data points in the group analysis. This procedure is limited in two ways: it ignores the uncertainty associated with each individual estimate and it prevents partial pooling of information across participants. As a result, group-level parameters can be biased, variability estimates corrupted, and statistical power reduced, especially when only a few trials are used or when data is noisy. Following others ( Mezzetti et al., 2023 ; Prins, 2024 ), we use an alternative approach: rather than analysing the parameter estimates returned by the Bayesian adaptive algorithm, we fit a hierarchical Bayesian model directly to the raw trial-wise data (stimulus intensity and participant response). This model simultaneously estimates individual- and group-level psychometric function parameters, explicitly accounting for the nested structure of the data (e.g., trials are contained with participants, themselves within groups), propagating uncertainty, and enabling principled pooling of information across participants. Compared with conventional methods, this should yield more precise group-level estimates, offering equivalent statistical power with fewer trials. To evaluate the validity and practical benefits of this approach, we conducted a series of analyses on simulated and empirical data. First, we built hierarchical models tailored to our tasks and assessed their ability to recover true parameter values (parameter recovery) and to distinguish between alternative data-generating processes (model recovery). Second, we fit these models to a large existing dataset to derive empirically informed priors and establish a realistic generative basis for simulations. Third, we used simulations to systematically compare hierarchical and conventional analyses across a range of design choices (trial number, sample size, and inference on threshold or slope). This allowed us both to quantify the power gains afforded by hierarchical modelling and to provide empirically grounded guidance on the minimal testing burden required for rigorous inference, regardless of the analytical approach. Finally, we provide a documented toolkit of open resources to facilitate the adoption of hierarchical models by researchers working with the HRDT and RRST, regardless of their prior modelling experience. These resources include a Shiny app for exploring the impact of design choices on power to aid sample size selection and justification, ready-made Stan models for common experimental designs, and an R script demonstrating how to use the brms library to fit these hierarchical models to data, diagnose them, as well as interpret and report their results. Methods The simulations and analyses reported in this manuscript were conducted in MATLAB (The Mathworks, USA) and R ( R Core Team, 2021 ), using the Rstudio interface ( Allaire, 2012 ), and toolboxes Palamedes ( Prins & Kingdom, 2018 ), cmdstanr ( Gabry et al., 2024 ), loo ( Vehtari et al., 2024 ), and tidyverse ( Wickham et al., 2019 ). All simulation and analysis scripts, as well as the raw data used in this manuscript, are available in the project repository: https://github.com/embodied-computation-group/Hierarchical-Interoception . Tasks The Heart Rate Discrimination Task The Heart Rate Discrimination Task (HRDT) is a psychophysical paradigm designed to measure the bias and precision of individuals’ beliefs about their own cardiac signals (c.f. Legrand et al . for an in-depth description). Participants begin each trial by attending to their heart for a brief period, during which their actual heart rate is measured via pulse oximetry. This is followed by the presentation of auditory tones paced at a frequency slightly above or below their recorded heart rate. Participants are asked to judge whether the tone sequence was faster or slower than their perceived heart rate during the preceding interval, making a two-alternative forced choice. After making this binary decision, they provide a confidence rating on a visual analogue scale ranging from ‘guess’ to ‘certain’. To account for potential confounds such as general temporal estimation ability, the task includes an exteroceptive control condition in which participants make the same faster/slower judgement, but between two sequences of tones unrelated to their own heart rate. The Respiratory Resistance Sensitivity Task The Respiratory Resistance Sensitivity Task (RRST) is a psychophysical paradigm designed to measure sensitivity and precision of inspiratory resistance detection (c.f. Nikolova et al . for an in-depth description). On each trial, participants take two successive inhalations through a closed respiratory circuit. During one of these inhalations, a brief increase in resistance is introduced by partially compressing the circuit tubing using a computer-controlled motor. Following the pair of breaths, participants indicate which inhalation felt more difficult, making a two-interval forced-choice judgement. They then rate their confidence in this decision using a visual analogue scale ranging from ‘guess’ to ‘certain’. Adaptive stimulus selection Rather than using a pre-selected or random sequence of stimulus intensities, the HRDT and RRST rely on an adaptive Bayesian algorithm, known as the ψ (psi) method, to optimise stimulus placement based on the previous responses of the participant ( Kontsevich & Tyler, 1999 ). To do so, this method keeps track of an approximate probability distribution over the values of the slope and threshold of the psychometric function (PF), used at the beginning of each trial to select optimal stimulus intensity and updated at the end based on the participant’s response. Hierarchical modelling The ψ method also returns estimates for the slope and threshold of the PF, derived from the probability distribution it tracks across trials. However, using these point estimates as data for group-level inference is problematic because it ignores the uncertainty of each individual estimate and prevents information from being shared across participants. To address this, we propose to use hierarchical PF models to fit the trial-wise data generated by the tasks, enabling joint estimation of parameters across individuals and conditions. This approach preserves uncertainty, enables principled pooling of information, and improves the robustness of group-level inference. HRDT model parameterisation In the HRDT, the PF describes how the probability of judging a tone sequence as “faster” depends on the difference in beats per minute (ΔBPM) between the auditory feedback and either the participant’s own heart rate (interoceptive condition) or a reference tone (exteroceptive condition). This relationship is modelled as a sigmoid function with three parameters: the threshold (α), the slope (β), and the lapse rate (λ) ( Fig. 1 ). Download figure Open in new tab Figure 1. Psychometric function formulations Top panel: Psychometric functions (PF) used for the Heart Rate Discrimination Task (HRDT). The plot shows two possible PF formulations (i.e, the Gaussian and the Gumbel) relating ΔBPM to the probability of a “faster” response. The threshold (α) sets the midpoint of the function, the slope (β) controls its steepness, and the lapse rate (λ) adjusts the upper and lower asymptotes. The Gaussian assumes symmetry around the threshold, while the Gumbel allows for asymmetry. Bottom panel: PF used for the Respiratory Resistance Sensitivity Task (RRST). This Weibull function models the probability of a correct response as a function of relative obstruction. The lower asymptote is fixed at chance level (50%) and the lapse rate defines the upper asymptote. The equations for all PFs are shown on the right. The threshold α represents the ΔBPM value at which participants are equally likely to respond “faster” or “slower” (i.e., the point of subjective equality or bias). The slope β controls how quickly the probability of a “faster” response changes as the ΔBPM increases, providing an index of precision. The lapse rate λ accounts for incorrect responses due to inattentiveness or random responses and determines how much the lower and upper bounds of the function deviate from zero and one. Different mathematical formulations can be used for the PF ( Kingdom & Prins, 2010 ). Prior work with the HRDT used a Gaussian cumulative distribution function (CDF), which assumes symmetry around the threshold. However, if responses to positive versus negative ΔBPM differ in shape, a function that allows asymmetry, such as the Gumbel CDF, may provide a better fit. We therefore compare the Gaussian and the Gumbel formulations in our analyses. For each PF formulation, we implemented two hierarchical models in Stan ( Stan Development Team, 2019 ): one for single-condition data (interoceptive or exteroceptive only) and one for data including two conditions (e.g., the interoceptive and exteroceptive HRDT conditions). The models estimate participant-level threshold, slope, and lapse rate parameters, along with the mean and standard deviations of their group-level distributions ( Fig. 2 ). Download figure Open in new tab Figure 2. Structure of the hierarchical models for HRDT data Schematic of the hierarchical models used to estimate psychometric function parameters from trial-wise HRDT data, for either a single condition (left panel) or combined interoceptive and exteroceptive conditions (right panel). Each trial is modelled using stimulus intensity (x) and an observed binary response (y), which is predicted by a psychometric function with parameters specific to each participant and condition. The predicted response probability is denoted by θ. Participant-level parameters, threshold (α), slope (β), and lapse rate (λ), are drawn from group-level distributions with mean (μ) and standard deviation (σ). In the two-condition model, thresholds and slopes are allowed to vary by condition, while the lapse rate is held constant across conditions. We used weakly informative Gaussian priors for group-level parameters. In the single condition model, the group mean threshold prior (μ α ) was set so that 95% of its mass spanned from −100 to +100 ΔBPM. Because the impact of the slope parameter β depends on the specific PF formulation, we selected priors based on the implied spread of the function, defined as the ΔBPM range between 2.5% and 97.5% response probability, assuming no lapse. Group-level slope priors (μ β ) were chosen such that this spread would fall between 1 and 200 BPM. For the group mean lapse rate parameter (μ λ ), the prior covered values corresponding to rates ranging from zero to.25 with 95% of its probability mass. Group-level standard deviation parameters (σ α ,σ β ,σ λ ) were given half-Gaussian priors centred at zero, with variances matched to their corresponding group mean priors. In the two-condition model, both thresholds and slopes were allowed to vary between the interoceptive and exteroceptive conditions, while the lapse rate was constrained to be equal across conditions as it was treated as a nuisance parameter. This extension introduced four additional hyperparameters: the group-level difference in threshold and slope , and the corresponding between-subject standard deviations . Threshold and slope values for the reference condition were indexed with a subscript i. Priors for group differences were Gaussian with mean zero and matched variance to the reference condition priors. RRST model parameterisation In the RRST, the responses of the participants are transformed to reflect their accuracy (correct/incorrect) rather than the inhalation in which they indicated feeling a higher inspiratory load (first/second). We kept the same accuracy coding for the analysis. These binary responses are modelled using a PF that describes the probability of a correct response as a function of occlusion level. To model this data, we used the Weibull CDF as it is the only PF commonly used for strictly positive stimulus intensities ( Fig. 1 )( Kingdom & Prins, 2010 ). The threshold parameter α corresponds to the obstruction level at which participants respond correctly on ∼80% of trials in the absence of lapses, marking the Just Noticeable Difference. The slope parameter β determines how rapidly accuracy increases as inspiratory resistance increases, with higher values indicating faster transitions from chance to ceiling performance. The lower asymptote of the PF is fixed at 50% (chance level) and the upper asymptote is defined by the lapse rate λ, which accounts for errors due to inattention or reporting mistakes. We implemented a hierarchical model using the same structure as for the single-condition HRDT ( Fig. 2 , left panel) but using a Weibull CDF psychometric function. As with the HRDT, the model was implemented in Stan and fitted to trial-wise RRST data. We used weakly informative Gaussian priors for all group-level parameters. For the group mean threshold prior (μ α ), 95% of the prior mass was set to span the full stimulus range (zero to 17 in arbitrary obstruction units, corresponding to 0–100% relative obstruction). For the group mean slope prior (μ β ), we selected a prior corresponding to PF spreads ranging from one unit (5% obstruction) to 17 units (100% obstruction). The group mean lapse rate prior (μ λ ) was set to cover rates between zero and.25 with 95% probability. Group-level standard deviations (σ α ,σ β ,σ λ ) were assigned half-Gaussian priors centred at zero, with variances matched to the corresponding group mean priors. Parameter and model recovery We conducted simulation-based validation analyses to assess two aspects of model performance for the HRDT: whether our modelling framework can reliably identify the correct PF formulation (model recovery) and whether it accurately estimates group-level parameters (parameter recovery). To this end, we generated 100 synthetic datasets for each HRDT PF formulation (Gaussian and Gumbel). Each dataset included 50 simulated participants completing 80 trials of the HRDT (single-condition model). Hyperparameter values were drawn from the prior distributions defined in the corresponding hierarchical models. Each synthetic dataset was then fit using both hierarchical Stan models (one per PF formulation). For each model fit, we used four MCMC chains of 2,000 iterations each (1,000 warm-up, 1,000 sampling), with a target acceptance probability of.99 and a maximum tree depth of 12. To reduce computational cost, any model fit exhibiting divergent transitions after warm-up was excluded from further analysis. Model recovery was evaluated using approximate leave-one-out cross-validation (LOO-CV) with moment matching ( Vehtari et al., 2017 ). For each dataset, we identified the best-fitting model as the one with the highest expected log pointwise predictive density (ELPD). The proportion of correct recoveries (i.e., cases where the model used to generate the data was also selected as the best-fitting model) was tested against chance level (.5) by computing the recovery rate 90% credible intervals (CI) using a Beta(1,1) conjugate prior. Parameter recovery was evaluated for each group-level mean parameter (threshold, slope, and lapse rate) by computing the proportion of datasets for which the true generating value fell within the 90% posterior credible interval. This was done separately for each model, using only the datasets simulated with the corresponding PF formulation. As with model recovery, we computed 90% CIs for these recovery rates using a Beta(1,1) conjugate prior. Model fitting to experimental data To identify the optimal PF formulation for the HRDT and to derive empirically informed priors for future hierarchical modelling of HRDT and RRST data, we applied our models to the Visceral Mind Project (VMP) dataset. Dataset The VMP is a large-scale neuroimaging study that was conducted at the Centre of Functionally Integrative Neuroscience (Aarhus University), also described elsewhere ( Banellis et al., 2025 ; Böhme et al., 2025 ; Ehmsen et al., 2025 ; Nikolova et al., 2025 ). A total of 566 participants (360 females, 205 males, one other gender; median age = 24, age range = 18-56) took part in the study. All participants had normal or corrected-to-normal vision, were fluent in Danish or English, and met standard MRI eligibility criteria. Of these, 544 completed at least 60 trials of the HRDT. After excluding 31 participants due to technical errors, data from 513 participants were included in the analysis (324 females, 188 males, one other gender, median age = 24, age range = 18-56). Out of the total sample, 315 participants completed 120 trials of the RRST. Data from 48 were excluded due to suspected equipment malfunction, identified via unusually low task accuracy. The final RRST sample included 267 participants (184 females, 83 males, median age = 24, age range = 18-52). Model formulation, estimation, comparison, and interpretation We applied the hierarchical models described above to the VMP dataset. For the HRDT, each model (one two-condition model for each formulation) was fitted using four MCMC chains of 2,000 iterations per chain, with 1,000 warm-up and 1,000 sampling iterations. The RRST single-condition model was fitted using four chains of 6,000 iterations, with 3,000 warm-up and 3,000 sampling iterations. For both HRDT and RRST models, the average target acceptance probability was set to.90 and the maximum tree depth to 12. All model fits were evaluated using standard convergence diagnostics. We confirmed that no divergent transitions occurred during the sampling phase, that the sampler did not reach the maximum tree depth during that phase, and that posterior exploration was adequate as indicated by energy Bayesian fraction of missing information (E-BFMI) values greater than.1 for all chains. Additionally, all estimated R-hat values were below 1.01 for group-level parameters, consistent with well-mixed chains and reliable posterior estimates. To determine which PF provided the best fit for the HRDT data, we performed approximate LOO-CV using the loo package ( Vehtari et al., 2017 ). Model comparison was based on differences in expected log pointwise predictive density (ΔELPD). Differences smaller than four were interpreted as too minor to be of practical significance. For larger ΔELPD values, we considered a model to outperform another when the difference exceeded 1.64 times the standard error of the ΔELPD estimate, corresponding to a conventional one-sided test with a.05 significance threshold. To quantify the strength of evidence, we also calculated the probability that the lower-ranked model would outperform the best-fitting model, assuming Gaussian-distributed ΔELPD estimation errors. To evaluate condition effects in the HRDT, we examined the posterior distributions of the group-level differences in threshold and slope between interoceptive and exteroceptive conditions. We computed the posterior probability that each difference was less than zero and derived a two-sided pseudo p-value using the formula 2 * min ( P (θ < 0), 1 − P (θ < 0)). This pseudo-p-value was then compared to the conventional threshold of.05 to assess the presence of statistically meaningful condition effects. Power analysis To assess the impact of standardised effect size (Cohen’s d ) and key experimental design parameters (namely the number of participants and the number of trials) on statistical power, we conducted a simulation-based power analysis. The simulated experimental design was analogous to participants completing the interoceptive version of the HRDT under two conditions. Simulations For each combination of effect size, parameter of interest (slope or threshold), sample size, and trial count, we generated 100 synthetic datasets. We examined effect sizes of 0 (null), 0.2 (small), 0.5 (medium), and 0.8 (large); sample sizes of 15, 30, 60, and 120 participants; and trial counts of 30, 60, and 90 trials per condition. To build realistic simulated HRD datasets, we simulated synthetic participant responses using the PF shape identified as optimal in the VMP data (i.e., the Gaussian CDF) and we used the ψ method to adaptively select stimulus intensities based on these responses, mimicking the functioning of the HRDT toolbox. For the control condition, group-level hyperparameters were sampled from the posterior distribution estimated from the VMP dataset. Between-subject standard deviations of the treatment effect were set to half the standard deviation of the corresponding control condition parameter and the group mean difference was set to achieve the specified standardised effect size. Model parameterisation and estimation Each simulated dataset was fitted using the same hierarchical Bayesian model applied to the VMP dataset. Models were estimated using four MCMC chains with 2,000 iterations each (1,000 warm-up, 1,000 sampling), with a target acceptance probability of.90 and a maximum tree depth of 12. Model fits that failed to meet standard sampling quality criteria, defined as the absence of divergent transitions after warm-up, no iteration exceeding the maximum tree depth, and no chain exhibiting an E-BFMI below.1, were excluded from further analysis. Comparison of statistical power under different experimental and analysis designs Because the true effect size was known for each simulated dataset, we were able to directly assess statistical power, defined as the proportion of simulations in which the null hypothesis (H 0 ) was correctly rejected. We compared two analytical approaches. In the conventional approach, we used the t.test function from R on the estimates of the psychometric parameters (slope or threshold) returned by the ψ method. A p-value below.05 was considered sufficient to reject H 0 . In the hierarchical refitting approach, H 0 was rejected if the 95% posterior credible interval for the group mean condition difference did not include zero. To extrapolate power estimates beyond the specific combinations of effect size, sample size, and trial number used in the simulations, we fitted logistic regression models in Stan to predict the probability of H 0 rejection as a function of experimental parameters. The model was written as: where d is the standardised effect size (Cohen’s d ), s is the number of participants, t is the number of trials, and the κ terms are regression coefficients estimated from the data. Separate logistic regression models were fit for slope and threshold effects. Each was estimated using the same sampling parameters and diagnostic criteria used for the power analysis fits. Results Parameter and model recovery Model comparison successfully selected the generative psychometric function (PF) formulation at rates well above chance ( Fig. 3A ). Specifically, model recovery rates were 86% (90% CI [.76,.92]) for the Gaussian model and 97% (90% CI [.90,.99]) for the Gumbel model, with both significantly exceeding the 50% chance level (90% CI did not include.5). These results indicate that our modelling framework is capable of distinguishing between underlying generative functions with high reliability. Download figure Open in new tab Figure 3. Simulation-based model and parameter recovery analysis The upper left panel reports model recovery performance, showing the proportion of datasets in which the generating psychometric function (Gaussian or Gumbel) was correctly identified. Recovery rates significantly exceeded chance level (.5), with 90% credible interval (CI) shown in brackets. The remaining panels display parameter recovery results for the group-means: threshold, log(slope), and logit(lapse). Estimated values (y-axis) are plotted against the true generating values (x-axis), with 90% credible intervals represented by vertical bars (blue for Gaussian, yellow for Gumbel). The black identity line indicates perfect correspondence. Recovery rates for each model and parameter are indicated in the lower right of each panel. High recovery rates confirm the reliability of the hierarchical estimation procedure across psychometric function types. Posterior estimates for group-mean thresholds, slopes, and lapse rates closely matched the ground truth values used to simulate the data ( Fig. 3B-D ). Across all combinations of parameter types and function formulations, parameter recovery rates were high, ranging from 88% (90% CI [.79,.93]) to 97% (90% CI [.90,.99]), with most estimates tightly distributed around the identity line. The alignment between estimated and true hyperparameter values further supports the validity of the modelling approach. Model fitting to experimental data Heart Rate Discrimination Task Model comparison using the VMP HRDT dataset indicated that the Gaussian PF provided a significantly better fit than the Gumbel formulation (ΔELPD ± SE: ™95.8 ± 18.5, P(ΔELPD>0) <.001), consistent with a symmetrical mapping between stimulus intensity (ΔBPM) and response probability. The fitted Gaussian PF model revealed systematic differences between the interoceptive and exteroceptive conditions. Group-level posterior mean PFs and 95% CI are shown in Fig. 4 , together with a representation (shaded areas) of the posterior predictive distribution for individual participants’ PFs, illustrating inter-individual variability. Download figure Open in new tab Figure 4. HRDT model fit from the Visceral Mind Project dataset The top panels display the fitted psychometric functions for the interoceptive (blue) and exteroceptive (green) conditions of the Heart Rate Discrimination Task (N = 513). Solid lines show the group-level posterior mean PFs and dotted lines indicate the 95% credible intervals (CI). Shaded bands represent the expected range of psychometric functions for new participants at varying credible intervals (60%, 80%, 90%, and 95%). The bottom panels show posterior distributions of the group-level differences between conditions for the log-transformed slope (left) and the threshold (right). Distributions are centred well above zero, indicating that interoceptive judgements were both more biased (higher threshold) and less precise (lower slope) compared to exteroceptive judgements. The posterior probabilities for these differences being greater than zero exceed.999, confirming strong evidence for a condition effect. Posterior distributions of group-level differences between conditions provide clear evidence of these differences. Participants tended to underestimate their heart rate relative to an external auditory cue, as reflected by a positive threshold difference (posterior mean = 9.43, 95% CI [8.44, 10.41]). In addition, interoceptive judgements were less precise than exteroceptive ones, evidenced by a shallower slope (log-slope difference posterior mean = 0.47, 95% CI [0.42, 0.53]). These results indicate that interoceptive performance is characterised by greater bias and reduced reliability compared to exteroceptive comparison judgements. Respiratory Resistance Sensitivity Task The posterior mean RRST PF and corresponding 95% CI, displayed in Fig. 5 , indicate that participants needed high levels of obstruction (∼50%) to start reliably identifying the breath during which resistance was applied. However, most participants reached perfect discrimination at complete obstruction. As for the HRDT plot, inter-individual variability is illustrated by the shaded areas representing the posterior predictive distribution. Download figure Open in new tab Figure 5. RRST model fit from the Visceral Mind Project dataset The figure shows the fitted psychometric functions (PF) for the RRST (N = 267). The solid line indicates the group-level posterior mean PF, while the dashed line represents the 95% credible interval. Shaded regions show the posterior predictive range within which a new participant’s psychometric function is expected to fall, visualised across 60%, 80%, 90%, and 95% intervals. Recommended priors Based on the hierarchical fits to the VMP HRDT and RRST data, we derived empirically informed prior distributions for use in future applications of these models. These recommended priors are implemented in the Stan model files provided in the project repository. To facilitate broader accessibility, we also provide an R script demonstrating how to use the brms library to fit HRDT or RRST data using the recommended models and priors; diagnose the fits; and visualise, interpret, and report the results of these fits, without requiring expertise in Stan ( Bürkner, 2017 ). Simulation-based power analysis To evaluate how the number of trials and the number of participants influence our ability to estimate and detect differences in PF parameters (i.e., statistical power), we conducted a simulation-based power analysis. We also aimed to test the theoretical prediction that inference based on hierarchical models would have better power than the common practice of using conventional tests on point estimates returned by the ψ method. Simulation results ( Figs. 6 and 7 ) demonstrate that increasing either the number of participants or the number of task trials consistently increases statistical power. Power was generally higher for threshold than for slope comparisons under matched designs, with threshold comparison power approaching asymptotic levels at lower numbers of trials and participants. Download figure Open in new tab Figure 6. Simulation-based power analysis for a two-session within-participant design: effect on the HRDT threshold The top panels show simulated power curves for threshold comparisons under varying sample sizes (columns) and trial counts (rows) and across a range of standardised effect sizes (Cohen’s d). Points indicate the observed proportion of simulations in which the null hypothesis was rejected, with vertical lines showing 95% credible intervals. The orange and purple lines correspond to model fits from the hierarchical and paired t-test approaches, respectively. The bottom panels display the power surfaces predicted by the logistic regression model fit to the hierarchical approach simulations. Contour lines indicate combinations of sample size and trial count predicted to yield 80% power for each approach. Hierarchical modelling consistently outperforms the paired sample t-test on extracted ψ estimates. Download figure Open in new tab Figure 7. Simulation-based power analysis for a two-session within-participant design: effect on the HRDT slope The top panels show simulated power curves for slope comparisons under varying sample sizes (columns) and trial counts (rows) and across a range of standardised effect sizes (Cohen’s d). Points indicate the observed proportion of simulations in which the null hypothesis was rejected, with vertical lines showing 95% credible intervals. The orange and purple lines correspond to model fits from the hierarchical and paired t-test approaches, respectively. The bottom panels display the power surfaces predicted by the logistic regression model fit to the hierarchical approach simulations. Contour lines indicate combinations of sample size and trial count predicted to yield 80% power for each approach. Compared to threshold analyses, power increases more gradually, and hierarchical modelling again consistently provides improved sensitivity. Importantly, comparisons of analytical methods showed that hierarchical Bayesian modelling consistently outperformed the common approach of extracting point estimates from the ψ method and analysing them with paired-sample t-tests. This advantage was most pronounced under low-to-moderate sample sizes and effect sizes. For completeness, the Supplementary Materials also include a comparison between our hierarchical modelling approach and a principled use of the paired sample t-test relying on bootstrapping to propagate uncertainty. Posterior estimates from the logistic regression power models are reported in Table S1 (see Supplementary Materials) and can be used to guide future study design. To facilitate reuse, we provide an interactive Shiny application that allows users to compute statistical power as a function of effect size, number of participants, and number of trials. Discussion This study introduces hierarchical Bayesian modelling of interoceptive performance in the Heart Rate Discrimination Task (HRDT) and Respiratory Resistance Sensitivity Task (RRST). Our initial simulations confirmed that this approach can correctly identify the underlying psychometric function (PF) and accurately estimate group-level parameters. We then used it to fit HRDT and RRST data obtained from a large cohort of healthy volunteers. This allowed us to confirm that the Gaussian formulation of the PF provides a good fit to HRDT data and to derive empirically-informed prior distributions for future research. Finally, we used simulations to compare the statistical power achieved by inference based on hierarchical models compared to the common practice of running tests on the parameter estimates returned by the HRDT for each participant and condition. These simulations showed that hierarchical models consistently achieve superior or equal power to the conventional inference strategy. Our results demonstrate the practical advantages of hierarchical Bayesian modelling over conventional inference strategies. In addition to being more principled, hierarchical models often achieve comparable statistical power with less data. For example, 30 trials per participant across 20 participants are sufficient to detect a standardised threshold difference of 0.5 with 80% power when using hierarchical models, whereas the conventional approach would require 30 participants (a 50% increase). Similar benefits are observed for slope estimation: 50 trials per participant across 50 participants is sufficient to detect a 0.5 standardised slope difference with 80% power using hierarchical models, while 60 trials are needed with the conventional approach (a 20% increase). These gains in power allow for shorter testing sessions or smaller sample sizes without compromising statistical rigour. While these benefits apply to all researchers working with psychophysical interoception tasks, they are particularly valuable for clinical and paediatric studies, where participant availability and tolerance are often limited. Importantly, the statistical power gains achieved with hierarchical models could be further enhanced by incorporating more informative priors than those used in our simulations. Informative priors narrow the plausible range of parameter values, thereby improving estimation precision. We therefore recommend that future analyses make use of empirically-grounded priors, such as those we derived from the VMP dataset. Whereas adopting hierarchical modelling has clear benefits, we must also acknowledge some practical limitations. Fitting these models is more computationally intensive than conventional analyses. However, with current software and hardware, this is feasible on standard computers. Moreover, the reduction in data collection time afforded by hierarchical models far outweighs the additional analysis time (our demonstration models fit the data of 50 participants in approximately five minutes). Implementing these models also requires some expertise, particularly in selecting appropriate priors and diagnosing model fits. To lower this barrier, we provide an R Markdown tutorial showing how to implement these models, including guidance on model parameterisation, diagnosis, priors, visualisation, and reporting. We chose the brms library for this tutorial because it is well-documented, leverages the Stan software for fast and reliable sampling, and uses a syntax similar to popular R regression functions such as lm . We also provide a Shiny app that allows researchers to interactively explore the results of our power analysis. Users can examine how varying numbers of trials, participants, and effect sizes influence statistical power, helping to make informed decisions about study design. Importantly, the app requires no programming experience, making it accessible to a wide range of researchers. While the estimates depend on the specific choices made in our simulations, this represents the first tool for HRDT sample size justification and offers a practical resource for planning future studies. Beyond interoception, our results are relevant to psychophysics more broadly. This study is among the few to systematically examine how both trial number and participant count jointly influence estimation precision or statistical power in psychophysical designs ( Baker et al., 2021 ; Prins, 2024 ). We extend these previous studies by systematically assessing statistical power across different effect sizes, highlighting a general asymmetry in the estimation of threshold and slope parameters. Threshold effects are consistently easier to detect than slope effects, with power reaching an asymptote at lower trial and participant numbers. This asymmetry reflects the statistical structure of psychometric functions rather than specific properties of our tasks. In the Gaussian formulation, the threshold corresponds to a mean and the slope (β) to the inverse of a standard deviation. Because variance is defined in relation to the mean, its estimation cannot be more precise than that of the mean itself. Although the thresholds and slopes in other PF formulations, like the Weibull used for the RRST, do not map perfectly onto means and variances, they still capture analogous parameters, suggesting that this intuition generalises across PF types. This has direct consequences for experimental design: reliable detection of slope effects will almost invariably require larger samples or trial counts than threshold effects, whereas threshold differences can often be detected with relatively few observations. Moreover, while our analyses were developed specifically for HRDT and RRST, they can be readily adapted to other tasks, such as those in vision science or thermosensation research. The provided resources offer researchers a straightforward way to implement hierarchical Bayesian approaches in similar paradigms, providing a practical template for extending hierarchical modelling across a broad range of psychophysical experiments. In conclusion, this study presents hierarchical Bayesian models of interoceptive performance in the HRDT and RRST. The approach enables robust estimation of interoceptive bias, sensitivity, and precision directly from trial-wise data. Through simulations and empirical validation, we demonstrate that it improves statistical power over standard analyses, particularly when trial numbers or sample sizes are limited. Our results also provide empirically grounded priors to guide future studies. Accompanied by open-source tools and an interactive power estimation app, this analysis approach supports transparent, efficient, and generalisable model-based inference in interoception research and can readily be adapted to other psychophysical paradigms. Acknowledgement This research was supported by the Lundbeckfonden Fellowship R272-2017-4345 (MGA, LB), the European Research Council Grant ERC-2020-StG-948788 (MGA, ASC), the Lundbeckfonden Experiment Grant R436-2023-991 (FF, JFE), and the European Research Council Grant ERC-2020-StG-948838 (FF, ASC). Funder Information Declared European Research Council, https://ror.org/0472cxd90 , ERC-2020-StG-948788 , ERC-2020-StG-948838 Lundbeck Foundation , R436-2023-991 , R272-2017-4345 Footnotes https://github.com/embodied-computation-group/Hierarchical-Interoception References ↵ Adams , K. L. , Edwards , A. , Peart , C. , Ellett , L. , Mendes , I. , Bird , G. , & Murphy , J. ( 2022 ). The association between anxiety and cardiac interoceptive accuracy: A systematic review and meta-analysis . Neuroscience & Biobehavioral Reviews , 140 , 104754 . doi: 10.1016/j.neubiorev.2022.104754 OpenUrl CrossRef PubMed ↵ Allaire , J. ( 2012 ). RStudio: Integrated development environment for R . Boston, MA , 770 ( 394 ), 165 – 171 . OpenUrl ↵ Baker , D. H. , Vilidaite , G. , Lygo , F. A. , Smith , A. K. , Flack , T. R. , Gouws , A. D. , & Andrews , T. J. ( 2021 ). Power contours: Optimising sample size and precision in experimental psychology and human neuroscience . Psychological Methods , 26 ( 3 ), 295 – 314 . doi: 10.1037/met0000337 OpenUrl CrossRef PubMed ↵ Banellis , L. , Nikolova , N. , Ehmsen , J. , Courtin , A. , Vejlø , M. , Tyrer , A. , Böhme , R. , Fardo , F. , & Allen , M. ( 2025 ). Interoceptive ability is uncorrelated across respiratory and cardiac axes: A large scale psychophysical study . OSF . doi: 10.31234/osf.io/s56v4_v1 OpenUrl CrossRef ↵ Böhme , R. A. , Banellis , L. , Vejlø , M. , Allen , M. G. , & Fardo , F. ( 2025 ). No evidence for a link between mental health symptoms and pain thresholds . Anxiety, Stress, & Coping , 0 ( 0 ), 1 – 17 . doi: 10.1080/10615806.2025.2534858 OpenUrl CrossRef ↵ Bürkner , P.-C. ( 2017 ). brms: An R Package for Bayesian Multilevel Models Using Stan . Journal of Statistical Software , 80 , 1 – 28 . doi: 10.18637/jss.v080.i01 OpenUrl CrossRef PubMed ↵ Cameron , O. G. ( 2001 ). Interoception: The Inside Story—A Model for Psychosomatic Processes . Psychosomatic Medicine , 63 ( 5 ), 697 – 710 . OpenUrl Abstract / FREE Full Text ↵ Craig , A. D. ( 2002 ). How do you feel? Interoception: the sense of the physiological condition of the body . Nature Reviews Neuroscience , 3 ( 8 ), 655 – 666 . doi: 10.1038/nrn894 OpenUrl CrossRef PubMed Web of Science ↵ Desmedt , O. , Heeren , A. , Corneille , O. , & Luminet , O. ( 2022 ). What do measures of self-report interoception measure? Insights from a systematic review, latent factor analysis, and network approach . Biological Psychology , 169 , 108289 . doi: 10.1016/j.biopsycho.2022.108289 OpenUrl CrossRef ↵ Desmedt , O. , Luminet , O. , Walentynowicz , M. , & Corneille , O. ( 2023 ). The new measures of interoceptive accuracy: A systematic review and assessment . Neuroscience & Biobehavioral Reviews , 153 , 105388 . doi: 10.1016/j.neubiorev.2023.105388 OpenUrl CrossRef ↵ Desmedt , O. , Van Den Houte , M. , Walentynowicz , M. , Dekeyser , S. , Luminet , O. , & Corneille , O. ( 2022 ). How Does Heartbeat Counting Task Performance Relate to Theoretically-Relevant Mental Health Outcomes? A Meta-Analysis . Collabra: Psychology , 8 ( 1 ), 33271 . doi: 10.1525/collabra.33271 OpenUrl CrossRef ↵ Ehlers , A. , Breuer , P. , Dohn , D. , & Fiegenbaum , W. ( 1995 ). Heartbeat perception and panic disorder: Possible explanations for discrepant findings . Behaviour Research and Therapy , 33 ( 1 ), 69 – 76 . doi: 10.1016/0005-7967(94)E0002-Z OpenUrl CrossRef PubMed ↵ Ehmsen , J. F. , Nikolova , N. , Christensen , D. E. , Banellis , L. , Böhme , R. A. , Brændholt , M. , Courtin , A. S. , Krænge , C. E. , Mitchell , A. G. , Sardeto Deolindo , C. , Steenkjær , C. H. , Vejlø , M. , Mathys , C. , Allen , M. G. , & Fardo , F. ( 2025 ). Thermosensory predictive coding underpins an illusion of pain . Science Advances , 11 ( 11 ), eadq0261 . doi: 10.1126/sciadv.adq0261 OpenUrl CrossRef ↵ Gabry , J. , Češnovar , R. , Johnson , A. , & Bronder , S. ( 2024 ). cmdstanr: R Interface to ‘CmdStan’ . https://mc-stan.org/cmdstanr/ ↵ Jeganathan , J. , Campbell , M. E. J. , Legrand , N. , Allen , M. , & Breakspear , M. ( 2025 ). Aberrant Cardiac Interoception in Psychosis . Schizophrenia Bulletin , 51 ( 1 ), 208 – 216 . doi: 10.1093/schbul/sbae078 OpenUrl CrossRef ↵ Kingdom , F. A. , & Prins , N. ( 2010 ). Psychophysics: A practical introduction . Academic Press London . ↵ Kontsevich , L. L. , & Tyler , C. W. ( 1999 ). Bayesian adaptive estimation of psychometric slope and threshold . Vision Res , 39 ( 16 ), 2729 – 2737 . OpenUrl CrossRef PubMed Web of Science ↵ Legrand , N. , Nikolova , N. , Correa , C. , Brændholt , M. , Stuckert , A. , Kildahl , N. , Vejlø , M. , Fardo , F. , & Allen , M. ( 2022 ). The heart rate discrimination task: A psychophysical method to estimate the accuracy and precision of interoceptive beliefs . Biological Psychology , 168 , 108239 . doi: 10.1016/j.biopsycho.2021.108239 OpenUrl CrossRef PubMed ↵ Mezzetti , M. , Ryan , C. P. , Balestrucci , P. , Lacquaniti , F. , & Moscatelli , A. ( 2023 ). Bayesian hierarchical models and prior elicitation for fitting psychometric functions . Frontiers in Computational Neuroscience, 17 . doi: 10.3389/fncom.2023.1108311 OpenUrl CrossRef ↵ Nikolova , N. , Ehmsen , J. F. , Banellis , L. , Brændholt , M. , Steenkjær , C. , Vejlø , M. , Fardo , F. , & Allen , M. G. ( 2025 ). Microstructural Brain Correlates of Inter-individual Differences in Respiratory Interoception . Journal of Neuroscience . doi: 10.1523/JNEUROSCI.0787-24.2025 OpenUrl Abstract / FREE Full Text ↵ Nikolova , N. , Harrison , O. , Toohey , S. , Brændholt , M. , Legrand , N. , Correa , C. , Vejlø , M. , Jensen , M. S. , Fardo , F. , & Allen , M. ( 2022 ). The respiratory resistance sensitivity task: An automated method for quantifying respiratory interoception and metacognition . Biological Psychology , 170 , 108325 . doi: 10.1016/j.biopsycho.2022.108325 OpenUrl CrossRef PubMed ↵ Prins , N. ( 2024 ). Easy, bias-free Bayesian hierarchical modeling of the psychometric function using the Palamedes Toolbox . Behavior Research Methods , 56 ( 1 ), 485 – 499 . doi: 10.3758/s13428-023-02061-0 OpenUrl CrossRef PubMed ↵ Prins , N. , & Kingdom , F. A. A. ( 2018 ). Applying the Model-Comparison Approach to Test Specific Research Hypotheses in Psychophysical Research Using the Palamedes Toolbox . Frontiers in Psychology , 9 . doi: 10.3389/fpsyg.2018.01250 OpenUrl CrossRef PubMed ↵ R Core Team . ( 2021 ). R: A language and environment for statistical computing. [Computer software] . R Foundation for Statistical Computing . ↵ Stan Development Team . ( 2019 ). Stan Modeling Language Users Guide and Reference Manual 2.21.0 . ↵ Vehtari , A. , Gabry , J. , Magnusson , M. , Yao , Y. , Bürkner , P.-C. , Paananen , T. , & Gelman , A. ( 2024 ). loo: Efficient leave-one-out cross-validation and WAIC for Bayesian models . https://mc-stan.org/loo/ ↵ Vehtari , A. , Gelman , A. , & Gabry , J. ( 2017 ). Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC . Statistics and Computing , 27 ( 5 ), 1413 – 1432 . doi: 10.1007/s11222-016-9696-4 OpenUrl CrossRef ↵ Wickham , H. , Averick , M. , Bryan , J. , Chang , W. , McGowan , L. D. , François , R. , Grolemund , G. , Hayes , A. , Henry , L. , & Hester , J. ( 2019 ). Welcome to the Tidyverse . Journal of Open Source Software , 4 ( 43 ), 1686 . OpenUrl ↵ Zamariola , G. , Maurage , P. , Luminet , O. , & Corneille , O. ( 2018 ). Interoceptive accuracy scores from the heartbeat counting task are problematic: Evidence from simple bivariate correlations . Biological Psychology , 137 , 12 – 17 . doi: 10.1016/j.biopsycho.2018.06.006 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted September 01, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Hierarchical Bayesian Modelling of Interoceptive Psychophysics Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Hierarchical Bayesian Modelling of Interoceptive Psychophysics Arthur S. Courtin , Jesper Fischer Ehmsen , Leah Banellis , Francesca Fardo , Micah G. Allen bioRxiv 2025.08.27.672360; doi: https://doi.org/10.1101/2025.08.27.672360 Share This Article: Copy Citation Tools Hierarchical Bayesian Modelling of Interoceptive Psychophysics Arthur S. Courtin , Jesper Fischer Ehmsen , Leah Banellis , Francesca Fardo , Micah G. Allen bioRxiv 2025.08.27.672360; doi: https://doi.org/10.1101/2025.08.27.672360 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Neuroscience Subject Areas All Articles Animal Behavior and Cognition (7622) Biochemistry (17648) Bioengineering (13871) Bioinformatics (41880) Biophysics (21423) Cancer Biology (18558) Cell Biology (25460) Clinical Trials (138) Developmental Biology (13364) Ecology (19866) Epidemiology (2067) Evolutionary Biology (24290) Genetics (15589) Genomics (22475) Immunology (17711) Microbiology (40327) Molecular Biology (17145) Neuroscience (88473) Paleontology (666) Pathology (2827) Pharmacology and Toxicology (4816) Physiology (7635) Plant Biology (15114) Scientific Communication and Education (2044) Synthetic Biology (4286) Systems Biology (9815) Zoology (2268)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-21T05:10:58.409756+00:00

License: CC-BY-4.0