Managing Data Uncertainty and Machine Learning for Adult ADHD Classification Using Accelerometry: OBF-Psychiatric Case Study

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 85,601 characters · extracted from preprint-html · click to expand
Managing Data Uncertainty and Machine Learning for Adult ADHD Classification Using Accelerometry: OBF-Psychiatric Case Study | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Managing Data Uncertainty and Machine Learning for Adult ADHD Classification Using Accelerometry: OBF-Psychiatric Case Study View ORCID Profile Paola A. Castillo-Gutierrez doi: https://doi.org/10.1101/2025.08.26.25332257 Paola A. Castillo-Gutierrez 1 School of Engineering and Sciences, Tecnologico de Monterrey Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Paola A. Castillo-Gutierrez For correspondence: a00843399{at}tec.mx Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract This study aims to enhance our understanding of ADHD individuals through accelerometer analysis while developing a framework for managing data uncertainty in digital biomarker research. Our primary emphasis is on identifying and mitigating biases within the OBF-Psychiatric dataset for ADHD and CONTROL groups, exploring how these biases influence machine learning model performance and generalizability. To balance patient inclusion with data quality, we applied innovative Pareto optimization and rigorous quality criteria, establishing optimal temporal windows, selecting 16:00 to 23:00 hours involving 53 patients from 77 available. Statistical analysis used robust Brunner-Munzel tests with False Discovery Rate correction, examining 34 comprehensive features spanning statistical, complexity, and frequency-based domains. Following thorough corrections, no significant differences in motor activity features emerged between the ADHD and CONTROL groups in quality-controlled data. Multidimensional scaling confirmed considerable overlap between groups. We assessed six traditional supervised machine learning algorithms through Leave-One-Patient-Out cross-validation: Logistic Regression, Random Forest, Support Vector Machine, Multilayer Perceptron, K-Nearest Neighbors, and XGBoost, plus baseline classifiers. Performance was evaluated across three data quality configurations to assess data processing consequences. Notably, performance systematically declined as data quality improved, with ROC AUC dropping from 75% (uncleaned) to 54% (quality-controlled). Our analysis suggests that the ADHD and CONTROL groups are indistinguishable using traditional feature-based motor activity patterns with these data collected in naturalistic conditions. However, the methodological contributions of this study provide foundations for future appropriately powered research and underscore key considerations for accelerometry utility in detecting adult ADHD and other psychiatric disorders in real-world scenarios. Introduction Attention Deficit Hyperactivity Disorder (ADHD) is a neurodevelopmental disorder that affects approximately 5% of children globally [ 1 ] and is among the most common psychiatric disorders in childhood [ 2 ]. According to a meta-analysis conducted in 2020 [ 3 ], the adult ADHD landscape revealed two distinct prevalence patterns: the proportion of persistent cases (individuals who maintained their childhood diagnosis into adulthood) stood at 2.58%, while the rate of adult-onset cases (those who exhibited symptoms in adulthood without a childhood diagnosis) reached 6.76%, corresponding to 139.84 million and 366.33 million affected individuals worldwide, respectively. However, these figures may be imprecise due to evolving diagnostic criteria in adults, the historical underrepresentation of females in research, and the persistent stigma surrounding the disorder [ 4 ] [ 5 ]. Norway presents an ideal research context due to its universal and free healthcare system, where the private sector is marginal [ 6 ]. This unified system effectively diminishes variations in healthcare access and insurance status as potential confounding factors in ADHD diagnosis patterns. An extensive registry-based analysis by Solberg et al., drawing data from four national Norwegian databases (The Medical Birth Registry, Prescription Database, Patient Registry, and National Educational Database), investigated gender-specific patterns of psychiatric comorbidity in a cohort of 40,000 adults with ADHD [ 7 ]. The results demonstrated marked gender differences in comorbidity patterns: women exhibited significantly higher prevalence rates for anxiety, depression, bipolar disorder, and personality disorders, while men showed elevated rates of schizophrenia and substance use disorder [ 7 ]. Traditionally, licensed clinicians diagnose ADHD in adults primarily through structured clinical interviews, retrospective assessment of childhood symptoms, and application of diagnostic guidelines, such as the DSM-5, which focuses on manifestations of inattention, hyperactivity, and impulsivity [ 8 ] [ 5 ]. While this approach has effectively identified many cases, it remains limited by its reliance on subjective reporting and recall bias [ 4 ]. Furthermore, the substantial symptom overlap between ADHD and other common psychiatric disorders in adulthood can significantly impair clinicians’ diagnostic accuracy, leading to misdiagnosis and delayed treatment interventions [ 9 ] [ 4 ] [ 6 ]. Recent scientific findings have enhanced the identification standards for ADHD, although the evaluation instruments and assessment methodologies have stayed fundamentally the same [ 10 ] [ 5 ]. Significantly, the evolved data-driven and theoretical models designed to elucidate how neurodevelopmental disorders present across multiple dimensions have yet to be integrated into experimental research and everyday clinical practice [ 11 ] [ 12 ]. Most studies rely on magnetic resonance imaging (MRI) data to characterize the brain structure of individuals with ADHD, making it the primary quantitative method for comparison in research literature, thus far [ 10 ] [ 13 ]. The MRI procedures require expensive equipment, specialized facilities, and considerable time investments, making them impractical for routine diagnostic procedures. Additionally, MRI environments can be particularly challenging for individuals with ADHD symptoms, as these settings demand extended periods of stillness and can provoke anxiety [ 14 ]. In contrast, physiological signals such as motion data from accelerometers and inter-beat intervals, gathered with electrocardiography or plethysmography, offer more accessible and costeffective alternatives. Researchers collect these biomedical signals in naturalistic settings, with minimal patient cooperation required, while enabling continuous monitoring capabilities [ 15 ]. Despite the advantages, a considerable gap remains in understanding the potential and constraints of accelerometry as an alternative to biomedical signals, particularly in distinguishing ADHD within the adult population [ 16 , 13 ]. This knowledge gap is especially striking given the increasing demand for objective, scalable diagnostic and prognostic tools that healthcare professionals readily integrate into diverse clinical environments. Successful machine learning studies require a clear clinical rationale, robust validation strategies, a detailed methodology that includes feature engineering and preprocessing, a considerable amount of data, and transparent reporting of limitations and generalizability constraints. This paper aims to enhance our understanding of accelerometry signals by characterizing the limitations researchers should thoroughly evaluate when using the ADHD and CONTROL data from the OBF-Psychiatric Dataset [ 17 ] to classify patients using machine learning. The principal research question that guides this work is: To what extent can traditional supervised machine learning models, using accelerometer-based feature extraction on an aggregated daily basis, differentiate ADHD patients from a control group of adults for the OBF-Psychiatric selected dataset? For this problem and data, the secondary research question is: How does the three-way trade-off between maximizing patient inclusion, maximizing recording duration, and minimizing confounding factors affect the performance of accelerometry-based ADHD classification models? This work presents the following contributions: Comprehensive analysis of the ADHD and CONTROL data incorporated within the OBF-Psychiatric dataset [ 17 ], including detailed characterization of the inherent properties of both accelerometer measurements and clinical variables. A rigorous quality assessment of the acquired accelerometry signals, alongside the development of customized exclusion criteria specifically designed to address data acquisition anomalies in the samples. To ensure analytical consistency across patients and diagnostic classifications, we identified time windows using a Pareto front approach, which determined the set of non-dominated solutions that balance patient inclusion and window duration. This application could be transferable to other accelerometry or time series cases where quality constraints restrict the generalizable window across several patients. The resulting curated dataset, containing records from both the CONTROL and ADHD groups, has been made publicly accessible through Zenodo to facilitate further research in this domain [ 18 ]. Comparison of basic and complex features extracted from accelerometer data across various time-frame selection approaches using inferential statistical modeling. Development and evaluation of binary machine learning classifiers for ADHD detection based on accelerometry data, with performance metrics systematically assessed at the patient level across three carefully selected temporal configurations: Analysis incorporating all available complete-day recordings (00:00-23:59) from all patients (n=77) without additional quality filtering. Analysis using the Pareto-optimal window (16:00-23:00) that maximizes patient inclusion (n=53) while maintaining data quality criteria and minimizing Euclidean distance to a “Utopia point” established at (77 patients, 24 hours). Full 24-hour analysis (00:00-23:59) using the same 53 patients identified through the Pareto-optimal window criteria. The structure of this paper is organized as follows: Section II provides a literature review of relevant studies related to this research. Section III outlines the methodology utilized. Section IV presents the results derived from the machine learning and statistical inference experiments, along with a discussion of the findings. Section V addresses several limitations and special considerations to be aware of when handling these data. Finally, Section VI summarizes the work and identifies potential future research directions and opportunities. Literature Review Neurobiology and Clinical Aspects of ADHD The presentation of ADHD varies considerably across individuals, reflecting diverse developmental trajectories and manifestation patterns; rather than being a binary manifestation of symptoms, it was suggested as a spectrum [ 19 ]. As a psychiatric condition, ADHD continues to be the subject of extensive research aimed at better defining its boundaries with other disorders and understanding its heterogeneous nature. However, despite these efforts, the fundamental causes and developmental pathways that give rise to ADHD continue to elude complete scientific explanation [ 20 ]. Various psychological frameworks have been proposed to explain the origins of symptomatic hyperactivity and motor activity in ADHD, building upon foundational neuropsychological theories that emphasize executive dysfunction and inhibitory control deficits [ 21 , 22 ]. These include: (1) the State Regulation Model, which examines arousal and activation mechanisms; (2) Multiple Pathway Theories, which consider diverse developmental routes to the disorder; and (3) the Dynamic Developmental Theory of ADHD, which focuses on altered reinforcement processes [ 14 ]. The Dynamic Developmental Theory of ADHD attributes hyperactivity and motor symptoms to a hypo-functioning mesolimbic dopamine branch, resulting in altered reinforcement of new behaviors and inadequate extinction of previously reinforced behaviors [ 23 ]. This theory predicts that hyperactivity develops gradually through a combination of disrupted positive reinforcement and deficient extinction processes, ultimately manifesting as excessive motor activity from an accumulation of behavioral responses that cannot be adequately regulated due to narrowed time windows for associating antecedent stimuli and behavior with consequences [ 14 , 23 , 24 ]. Both DSM-IV and DSM-5 explicitly note that adults may “feel restless” instead of running or climbing, and both require symptoms in two or more settings, underscoring that motor hyperactivity can be situational [ 25 ] [ 8 ]. Hyperactivity symptoms generally diminish with age [ 26 ]. Table I summarizes the main differences between DSM-IV and DSM-5, describing the presentation of hyperactivity and motor symptoms, along with other factors such as age of onset. The DSM-IV [ 25 ] and ICD-10 [ 27 ] are two systems that share similar lists of symptoms, with one key difference being that the DSM-IV explicitly recognizes distinct subtypes, including predominantly inattentive presentation, whereas ICD-10 employs a more unified approach to hyperkinetic disorders. The ICD-10 hyperkinetic disorder (HDK) does not formally recognize ADHD, but resembles the combined presentation ADHD type of ADHD because there are symptoms of inattention and hyperactivity/impulsivity. View this table: View inline View popup Download powerpoint Table 1. Comparison of ADHD Diagnostic Criteria Between DSM-IV (1994) [ 25 ] and DSM-5 (2013) [ 8 ] The symptom profile of ADHD extends beyond traditional conceptualizations of attention deficits and hyperactivity, encompassing complex patterns of cognitive and behavioral dysregulation. Individuals with ADHD often experience intense fluctuations in attention capacity, demonstrating the ability to hyperfocus on engaging tasks while struggling with sustained attention in other contexts. This pattern suggests an underlying dysregulation of attention mechanisms rather than a simple deficit [ 28 ]. Attention deficits in ADHD may result from insufficient dopamine activity in the prefrontal cortex, a hypothesis supported by the therapeutic efficacy of stimulant medications that enhance dopaminergic function [ 29 ]. Executive function challenges manifest in difficulties with time management, organization, and task completion. Clinical evidence suggests adults experience a broader range of impairments than children, particularly in executive functioning and emotional regulation [ 30 , 4 ]. Despite the challenges they face, individuals with ADHD often possess remarkable strengths that can contribute to their success in adulthood. They frequently demonstrate exceptional creativity, boundless curiosity, and natural problem-solving abilities. Their characteristic energy, nonconformist thinking, adventurous spirit, spontaneity, and keen sense of humor represent valuable attributes that, when properly channeled, can become significant advantages in appropriate contexts and career paths [ 19 ]. Accelerometry Signals Physical activity is a complex human behavior that requires multiple dimensions to be considered in order to reveal a comprehensive behavioral profile. The psychological factors of cognitive and emotional function reflect the desire to maintain productivity, independence, and an active interaction (movement) within their environment [ 31 ]. Accelerometry has emerged as the most commonly used and evaluated objective method in clinical and epidemiological research to capture movement, offering a solution to the limitations of subjective measurement approaches [ 32 ]. Modern accelerometers consist of small sensors that register acceleration along specific axes and can be worn at various body locations, with the hip, wrist, and thigh being the most common placements. Accelerometers provide a convenient and unobtrusive method for continuous data collection. Multiple accelerometers are used in research and consumer devices, including piezoelectric, piezoresistive, and capacitive accelerometers, each utilizing different mechanisms to detect acceleration forces [ 31 ]. Accelerometer signals can be obtained either as raw acceleration data measured in gravitational units (g) or as processed activity counts after applying algorithms to the raw signals. The medical-grade Actiwatch AW4 device, specifically, employs a piezoelectric element with a seismic mass that detects movement above a threshold of 0.05g in multiple directions (anteriorposterior, mediolateral, and vertical planes), with a primary focus on the vertical axis (omniaxial type), converting mechanical acceleration into digital signals. Frequency filters are set to a range of 3 to 11 Hz. This device samples acceleration at a predefined 32 Hz frequency, selects the highest sample value in one second and ultimately transforms it into activity counts using an algorithm where 128 counts per second corresponds to approximately an acceleration of 5g (which is also considered as a maximum for human acceleration typical range in the device manual) [ 33 ]. These counts are then summed and stored in predefined epoch lengths (commonly 15, 30, or 60 seconds) to quantify movement intensity over time. The device manual indicates that it is not suitable to use underwater, as this could damage the accelerometer [ 33 ]. According to the Nyquist theorem, to accurately capture a signal, the sampling rate must be at least twice the frequency of the signal being measured. Human movement acceleration signals predominantly occur below 10 Hz, suggesting that even relatively modest sampling rates are adequate for capturing the full spectrum of human movement patterns [ 34 ]. This presents an important trade-off in accelerometer design, as higher sampling rates and resolutions provide more detailed data but require significantly more memory and power; however, this is increasingly becoming less of a concern with ongoing technological development. For typical human movement monitoring, an accelerometer with a 2G measurement capability is generally sufficient [ 34 ]. However, the accelerometry device domain presents substantial challenges for researchers attempting to compare results across studies or establish universal clinical findings. The commercial market features accelerometers with widely varying technical specifications, including hardware-level filtering mechanisms, sampling frequencies, output metrics (such as activity counts or raw acceleration), and proprietary analytical algorithms that manufacturers protect as intellectual property. These technical inconsistencies, along with differences in clinical protocols for device placement, epoch durations, and non-wear-time imputation algorithms, create significant confounding factors when comparing digital biomarkers derived from different accelerometry systems [ 35 ]. When utilizing the same accelerometer model across a study, researchers should still conduct precision and accuracy testing on each sensor unit before data collection to minimize device-specific variance and systematically document the calibration procedure or establish a criterion value [ 36 , 35 , 34 ]. It is recommended to adopt a seven-day analysis period in clinical protocols for accelerometry-based physical activity assessment, as this duration strikes a balance between data reliability and participant compliance while capturing sufficient behavioral variability across weekdays and weekends [ 32 ]. Designing the post-processing techniques for these signals requires knowledge of biomechanics, psychology, physiology, engineering, computer science, and statistics. These multidisciplinary considerations play a pivotal role in ensuring accurate measurement with accelerometers and valid interpretations. Related Works The HYPERAKTIV dataset [ 15 ], which is now part of the OBF-Psychiatric dataset, has established itself as a foundational resource for ADHD research utilizing wearable sensors and motor activity data from adults, representing one of the few publicly available repositories of this kind [ 15 ]. The analytical approaches applied to these actigraphy measurements have consistently employed binary classification frameworks to distinguish ADHD from defined groups, with multiple studies augmenting the control group by combining clinical subjects with control individuals from complementary datasets, such as PSYKOSE and DEPRESJON [ 15 , 37 , 38 , 17 , 39 ]. Feature extraction has employed diverse methodologies across studies, ranging from traditional statistical features to automated tools like the Tsfresh Python library [ 40 ] emerging as the predominant tool for generating statistical features from time series data, followed by various dimensionality reduction and feature selection techniques including PCA [ 41 , 37 ], UMAP, and t-SNE [ 38 ]. Validation methodologies have varied across studies, with researchers employing K-fold cross-validation [ 41 ] and percentage splits of training/testing data [ 15 , 37 ]. In some cases, leave-one-out cross-validation [ 42 ] is used. Recent innovations by Thelagathoti and Ali [ 43 , 44 ] employing network-based approaches represent promising steps toward more interpretable models. The reported classification accuracies using these data to date range from 72% [ 15 ] to 99.2% [ 38 ]. In Fasmer’s earlier 2015 study [ 45 ], researchers reported experiencing logistical issues and patient compliance problems during the study. They utilized a more restricted sample of only 32 ADHD patients and 20 CONTROL subjects after data quality inspection for six days. Their analysis excluded recordings containing daytime (07:00-23:00) inactivity periods exceeding one hour from the six-day analysis, but found no significant differences between the ADHD, CONTROL, and CLINICAL groups using activity counts for one half hour as the unit of measurement. In Fasmer’s 2020 diurnal graph theory study of these ADHD patients [ 46 ], researchers employed a methodology that analyzed only the first continuous 24-hour period (00:00-23:59) without data gaps for each participant. Within this timeframe, they focused on 6-hour morning periods (8:00 AM to 2:00 PM) and evening periods (6:00 PM to midnight), explicitly excluding the midnight to 8:00 AM interval from their analysis. The number of subjects considered in the 2020 study varied across measurements, with 30 control subjects and 40-42 for ADHD patients. Both the 2015 and 2020 studies mention that participants were instructed to wear actigraphs at all times and only remove them during showering or bathing. While previous works demonstrate the potential of machine learning approaches for ADHD classification using accelerometry data, additional considerations regarding assumptions, data preprocessing steps, signal noise handling, and evaluation could further strengthen advancements in the field. To our knowledge, this study represents the first comprehensive approach to addressing these considerations, utilizing the ADHD and CONTROL data from the OBF-Psychiatric dataset and machine learning techniques. Methodology We implemented multi-objective Pareto optimization to identify optimal temporal windows that balance patient inclusion with data quality constraints. The analysis included rigorous data preprocessing with custom quality criteria, extraction of 34 statistical and complexity features from accelerometer signals, and evaluation of six traditional machine learning algorithms using leave-one-patient-out cross-validation. Statistical inference was conducted using non-parametric tests with False Discovery Rate correction, while model performance was assessed across three distinct data quality configurations to evaluate the impact of preprocessing decisions on classification outcomes. Data Collection The ADHD data encompasses data collected from 103 patients referred to a private psychiatric outpatient clinic in Norway who required diagnostic evaluation for various psychiatric conditions, specifically attention deficit/hyperactivity disorder (ADHD), mood disorders, or anxiety disorders. Data collection commenced in February 20, 2009. Diagnostic assessments for all participants were conducted by experienced and certified psychiatrists, utilizing the Mini-International Neuropsychiatric Interview (MINI Plus, version 5.0) [ 47 ], which was enhanced when possible with data from collateral sources, particularly relatives, concerning symptoms of ADHD in childhood [ 48 ]. A consensus final diagnosis was established using the DSM-IV and ICD-10 criteria, following thorough discussions of each case among various psychiatrists [ 48 ] [ 17 ]. Among these new referral patients, 51 were diagnosed with ADHD; however, motor activity data were recorded for only 45 of the ADHD patients (24 males and 21 females). This data collection occurred under naturalistic conditions, allowing participants to engage in their typical daily activities while wearing an accelerometer. The study incorporated inclusive criteria, accommodating comorbidities frequently observed in conjunction with substance abuse. Patients diagnosed with ADHD presented additional psychiatric conditions such as anxiety, bipolar disorder, or unipolar depression, resulting in substantial overlap with other psychiatric disorders. Additionally, the dataset features a category labeled “OTHER,” which denotes the presence of additional unspecified psychiatric disorders. The ages of participants ranged from 18 to 65 years. Concerning medication status, the majority of participants were unmedicated at the time of data collection, with only one individual within the ADHD cohort receiving a prescription for stimulants, but some receiving antidepressants, anxiolytics/benzodiazepines, and sleep medications (hypnotics) [ 45 ]. Participants maintained their medication regimen unchanged throughout the actigraphy monitoring period, continuing to take the same drugs they were using at the time of referral [ 48 ]. All collected data were anonymized to protect patient confidentiality, with each participant identified only by a unique numeric ID. The CONTROL group data comprised 32 individuals (20 females and 12 males) with an age range between 21 and 66 years, recruited as part of the original DEPRESJON and PSYKOSE datasets, with data collection for this group beginning on October 2, 2002. This cohort included 23 employees from Bergen University and a psychiatric nursing home, five medical students, and four general practitioner patients who had no psychiatric history. No individuals in the control group had any previous diagnosis of mood disorders or psychotic conditions. Similar to the HYPERAKTIV dataset, accelerometry for the control group was performed under naturalistic conditions. For this analysis, only patients diagnosed with ADHD and individuals from the CONTROL group from the OBF-Psychiatric dataset were selected for inclusion, despite the presence of patients in the CLINICAL group with accelerometry recorded in naturalistic settings. This decision was made to reduce heterogeneity and clarify the classification structure, as the CLINICAL group encompassed individuals with a variety of diagnoses and comorbidities. While the DEPRESSION and SCHIZOPHRENIA groups from the PSYKOSE and DEPRESJON datasets were available, they were not included due to their differing protocol descriptions. The SCHIZOPHRENIA group consisted entirely of hospitalized patients who were not working, predominantly male, which raises concerns about the underrepresentation of females and potential fallacious results when using this group. Meanwhile, the DEPRESSION group included both hospitalized and outpatient individuals. In parallel, this approach allows for a preliminary assessment of whether meaningful differences exist between individuals with ADHD and those without psychiatric conditions in naturalistic conditions. Accelerometer data were recorded for both groups using the Actiwatch AW4 on the right wrist. Exploratory visuals of medication distribution and demographics from these groups, such as gender, age, and psychiatric comorbidities, are included in the repository [ 18 ]. The original protocols for data collection from the studies received approval from the Norwegian Regional Medical Research Ethics Committee West, and their specific numbers are included in the OBF-Psychiatric data descriptor paper [ 17 ]. Data Cleaning and Preprocessing While the literature suggests analyzing seven consecutive days, this was not feasible due to the limitations of the available data. The number of patients in the ADHD group who met this criterion dramatically declined from 45 to just 6, significantly restricting our analytical capabilities. The activity data was structured into matrices for analytical clarity, with each matrix representing complete 24-hour periods (1,440 minutes from 00:00 to 23:59) as individual vectors. For the CONTROL group, there were even more days recorded than those reported in the clinical study, extending to weeks. We decided to consider only the maximum number of days documented in the original.csv file for this group, as established in the “days” column, by considering the full days available and skipping the first incomplete day. For the ADHD group, 45 patients were found, totaling 253 complete days, while the CONTROL group consisted of 32 patients, totaling 402 complete days. We graphed the motor activity for each patient across daily segments and visually identified several signal acquisition issues; examples of these are shown in Figure 1 . Since it remains unclear whether participants consistently wore the watch during both nighttime and daytime or periodically removed it, we searched for an interval between 7:00 and 23:00 hours that could be generalizable across patients and classes, considering the following quality criteria: Download figure Open in new tab Figure 1. Examples of signals with acquisition anomalies. No day segment should have 180+ consecutive minutes with zero values during presumed wake time (07:00-23:00). No day segment should have 120+ consecutive minutes with values between 0-10 during presumed wake time (07:00-23:00). No day segment should have 60+ consecutive minutes with the same value (if that value is > 10). No day segment should contain extreme values between 7680 and 8000. No day segment should contain more than 70% of zeros during the day segment. No day segment should contain only a maximum value of 20 counts or less. Each window must have at least 5 hours of data, and each patient should have six consecutive days available. We applied the principles of Pareto optimality to identify a set of non-dominated solutions that optimize both patient inclusion and window duration. An exhaustive search of the Pareto front examined all possible time windows from 7:00 to 23:00, with a minimum window size of 5 hours. For each potential time window, we evaluated the number of patients who met the quality criteria along with the corresponding window duration. A solution was considered Pareto-optimal if no other window could simultaneously include more patients and span a longer duration. Let p ( w ) represent the number of patients meeting quality criteria for window w , and D ( w ) represent the duration of window w in hours. A window w 1 dominates another window w 2 if and only if: Genetic Algorithms and other heuristic methods are commonly used to identify Pareto-optimal solutions within large search spaces. However, the specific dimensionality of our problem permitted a comprehensive exploration of the entire solution space, ensuring the accurate identification of the actual Pareto front rather than merely an approximation. We defined a “Utopia point”, specifically at (77 patients, 24 hours), as the ideal maximum in the absence of confounding factors. This point served as a reference for calculating Euclidean distance to identify the solution from the Pareto Front that minimizes this distance. Our exhaustive search identified six non-dominated solutions forming the Pareto front (see Figure 2 ). From this set, we selected only one key window for detailed analysis: Download figure Open in new tab Figure 2. Global Pareto Front for Time Window Optimization in Actigraphy Data Analysis. 1. Optimal Window (maximizing patient inclusion and minimizing Euclidean distance) 16:00-23:00 (7 hours), which included 53 patients (28 ADHD and 25 CONTROL) The ADHD group had no more than 7 days available, whereas the CONTROL group did; however, we only considered the first 6-day quality sequence for the CONTROL group. To ensure robust findings and evaluate the impact of different temporal selection criteria, we extended our analysis beyond the Pareto-optimal windows to include two additional temporal configurations. First, we conducted an analysis that incorporated all available complete-day recordings (00:00-23:59) without additional quality filtering, thereby maximizing cohort size at the expense of potentially including lower-quality segments. Second, we performed a complete 24-hour analysis (00:00-23:59) using the same 53 patients and six days identified through the Pareto-optimal window criteria (16:00-23:00). This multi-configuration approach facilitated the evaluation of classifier performance sensitivity to temporal window selection and quality filtering parameters. The processed actigraph recording files, adapted to each Pareto window configuration, are available in the Zenodo repository [ 18 ], including both “TRIMMED” (Pareto-window-specific) and “COMPLETE” (full 24-hour recordings for the same patients) versions. Feature Extraction We extracted nine core basic features per day segment for each patient: mean, standard deviation, median, percentage of zeros (minutes with value 0 for activity), quartile 25, quartile 75, percentile 95, skewness, and kurtosis. Additionally, an extended set of 25 complementary features was calculated to capture comprehensive activity patterns: Median absolute deviation (MAD), 10% trimmed mean, amplitude, percentage of minutes in ranges 1–100, 101–500, 501–1000, 1001–2000 counts, percentage of low activity values ≤ 40, M60 (mean of 60 highest activity values), intensity gradient, activity above mean percentage, activity variability ratio, Gini coefficient, activity fragmentation index, heavy tail ratio (P99/P75), extreme proportion (>P90 threshold), Detrended Fluctuation Analysis (DFA) scaling exponent, high frequency power (1/30 to 1/2 cycles per minute), low frequency power (1/210 to 1/60 cycles per minute), HF/LF ratio, sample entropy with embedding dimension m = 2 and tolerance r = 0.2× standard deviation, normalized Lempel-Ziv complexity using median-based binarization, permutation Lempel-Ziv complexity with embedding dimension m = 3 and autocorrelation at 30 and 90-minute lags. The files with the daily and patient-level extracted features are available in the repository [ 18 ]. The normality of each feature distribution was assessed using the Shapiro-Wilk test, while homogeneity of variance was evaluated through Levene’s test to determine the appropriate statistical inference methodology. For this part, we aggregated the features by calculating the mean across all six days for each patient, as independence cannot be assumed between daily observations from the same individual; however, it can be reasonably established between different patients. Based on these preliminary analyses, the Brunner-Munzel Test was selected as the most appropriate non-parametric method for between-group comparisons. For comparison with results obtained using the Tsfresh library, the Mann-Whitney U Test was additionally included in the analysis. Multiple comparison correction was performed using the Benjamini-Hochberg false discovery rate (FDR) procedure with α = 0.05 to control the expected proportion of false positives among rejected hypotheses. We considered 34 comparisons because of the number of features. Statistical significance was established at p < 0.05. We calculated the effect sizes (Cohen’s d) to quantify the magnitude of differences. The 95% confidence intervals for the mean values of each group and each feature were estimated using bootstrap resampling with 1000 iterations. Feature-to-feature relationships among motor activity metrics were analyzed using Spearman’s rank correlation, a non-parametric method suitable for these data. Demographic effects on features were assessed through Kruskal-Wallis tests with FDR corrections across gender (4 subgroups: ADHD-female, ADHD-male, CONTROL-female, CONTROL-male) and age subgroups (6 subgroups across three age ranges: 17-34, 35-51, and 52-69 years per diagnostic group). Multidimensional Scaling in three dimensions was used to see the classifying potential of the set of features at the daily level per class. We chose this dimension because it preserves the most information that could be lost through dimensionality reduction, while maintaining the visualization capabilities. Machine Learning Model Training and Validation The traditional machine learning models chosen for evaluation were Logistic Regression (LR), Random Forest (RF), Support Vector Machine (SVM), Multilayer Perceptron (MLP), K-Nearest Neighbors (KNN), and XGBoost. Fixed hyperparameters and Python notebook code are available in the repository for reproducibility. Baseline classifiers with Most Frequent, Stratified, and Uniform strategies were implemented for benchmark comparison against traditional machine learning models. These baseline classifiers were trained using the overall class distribution from the complete patient dataset rather than following the leave-one-patient-out cross-validation scheme. This approach ensures stable baseline references that reflect the inherent class proportions in the data, avoiding the distribution distortions that could arise from small sample scenarios. The baseline classifiers operate at the patient level and provide predictions based solely on learned class frequencies, without requiring majority voting aggregation. A leave-one-patient-out cross-validation approach was employed, wherein each patient’s data was systematically excluded in each fold to ensure robust model generalization. Within each fold, features were independently normalized using Min-Max scaling, applied on a per-feature basis to constrain values between 0 and 1 while preserving the relative distribution within each feature and transforming the test set at each iteration independently. Class imbalance was addressed through the application of the Synthetic Minority Over-sampling Technique (SMOTE) [ 49 ], applied exclusively to the training partition of each fold and with a random seed (42). Predictions were subsequently generated at the daily level (either complete period or segment), providing temporal granularity. The final label assigned for each patient is based on the majority voting of the daily predictions. Model performance was evaluated using a comprehensive set of metrics: precision, recall, F1 score, Receiver Operating Characteristic Area Under the Curve (ROC AUC), accuracy, and Matthews Correlation Coefficient (MCC). These metrics were selected to provide a balanced assessment of classification performance across different dimensions of model evaluation. Intra-patient and inter-patient analyses The intra-patient analyses examine behavioral dynamics within individual participants across time, encompassing two key dimensions: temporal correlations that quantify the relationship between each feature and time progression within each patient’s longitudinal data using Spearman correlation coefficients between time points and feature values; and variability analysis that captures the coefficient of variation to identify features with high intra-individual fluctuation. Complementing these within-subject analyses, the inter-patient methods investigate patterns across individuals within each diagnostic group, focusing on consistency using intraclass correlation coefficients (ICC) calculated as to quantify how homogeneous patient groups are in their feature expressions. These analyses were made only for the Pareto optimal window. Results and Discussion Our implementation of multi-objective Pareto optimization for temporal window selection, shown in Figure 2 , revealed methodological trade-offs. The exhaustive evaluation of 78 window combinations (51,090 individual assessments) identified six non-dominated solutions, with the optimal window (16:00-23:00, 7 hours, 53 patients, 68.8% inclusion) representing the best compromise between temporal coverage and sample size while minimizing Euclidean distance to the utopian point (77 patients, 24 hours). This multi-objective methodology offers a transferable tool for managing competing priorities in wearable sensor data analysis, establishing a rigorous approach for multi-criteria decision-making that can be adapted across research domains. To our knowledge, no prior studies have applied multi-objective Pareto optimization specifically for temporal window selection with quality criteria in accelerometry/actigraphy data. While temporal window selection and Pareto optimization exist as a general concept, the systematic Pareto approach to balance competing objectives in psychiatric accelerometry is novel. In this study, Pareto optimization is employed as a search algorithm with two competing goals: maximizing patient inclusion and maximizing temporal window duration while maintaining data quality criteria. This multi-objective approach proves particularly advantageous in clinical research contexts compared to traditional single-objective search algorithms. Unlike conventional optimization methods that focus exclusively on identifying a single “best” solution, the Pareto framework systematically evaluates the entire solution space to identify non-dominated alternatives, providing clinical researchers with a comprehensive view of available trade-offs. This visualization of the complete decision landscape enables clinical personnel to make informed choices based on their specific research priorities, whether emphasizing larger sample sizes for statistical power or prioritizing data quality for signal integrity. Traditional search algorithms such as grid search, depth-first search (DFS), or breadthfirst search (BFS) would converge to a single optimal point, obscuring alternative viable solutions that might better align with clinical constraints or research objectives. For larger search spaces involving more extensive datasets with increased patient populations and longer monitoring periods, incorporating advanced multi-objective algorithms such as NSGA-II (Non-dominated Sorting Genetic Algorithm II) could provide enhanced efficiency in exploring complex solution landscapes. However, in scenarios where only the single best approximate solution is required, classical approaches such as Genetic Algorithms or hyperheuristics might offer computational advantages while sacrificing the comprehensive decision-making transparency that makes Pareto optimization particularly valuable for clinical research applications. After implementing FDR correction to account for multiple testing, none of the extracted motor activity features yielded statistically significant group differences between participants with ADHD and typically developing controls using Mann-Whitney U and Brunner-Munzel non-parametric tests within the optimal Pareto window. When using the same patients from the optimal Pareto window but with complete day data, none of the features were found to be significant. Only when using the uncleaned data did we find a significant feature: activity fragmentation with p < 0.01 in the Brunner-Munzel test after FDR correction. The Kruskal-Wallis statistical analysis for gender and age did not reveal any differences between the subgroups using the Pareto optimal window. The three-dimensional Multidimensional Scaling (MDS) analysis provided compelling visual confirmation of our statistical findings. No clear clustering patterns emerged that distinguished the two diagnostic groups, as data points from both classes intermixed throughout the three-dimensional space in all temporal-quality configurations. Figure 3 corroborates the statistical inference results by demonstrating that when reducing the 34-feature space to its most informative three dimensions, the inherent structure of the data does not support class separation with the Pareto optimal window. The absence of distinct clusters or boundaries in the MDS plots (see repository) provides independent confirmation that the features lack sufficient discriminative power to distinguish ADHD from control patterns reliably. Download figure Open in new tab Figure 3. Multidimensional Scaling visualization showing the clustering capability of motor activity features in the optimal Pareto window configuration. Download figure Open in new tab Figure 4. Distribution of psychiatric comorbidities in ADHD patients within the Pareto-optimal window (n=53). Download figure Open in new tab Figure 5. Medication usage patterns among ADHD patients in the Pareto-optimal window (n=53). Download figure Open in new tab Figure 6. Gender distribution across ADHD and CONTROL groups in the Pareto-optimal window (n=53). Download figure Open in new tab Figure 7. Age distribution across ADHD and CONTROL groups in the Pareto-optimal window (n=53). The feature-to-feature correlation analysis, using Spearman’s rank correlation, revealed high multicollinearity among many extracted features, thereby reducing the effective dimensionality of the discriminative information. This redundancy explains, in part, why machine learning models struggled to identify unique discriminative patterns, as much of the apparent information content was duplicated across correlated features. Notably, the absence of significant differences persisted across all feature categories, from simple statistical measures to sophisticated complexity metrics and frequency domain analyses. This comprehensive result reinforces the conclusion that the lack of discriminative power is not due to inadequate feature engineering, but rather reflects the fundamental similarity between groups in their motor activity patterns when using a feature-based approach. This conclusion is consistent with Fasmer’s initial observations [ 45 ], which indicated no significant differences between the ADHD and CLINICAL groups over six days. However, it is crucial to recognize that the subjects and the criteria used to filter data in his study differ from those applied in this paper. Additionally, the observation that hyperactivity tends to decline with age [ 26 ] could provide a clinical rationale for the absence of significant differences when analyzing adults. However, the limitations of the dataset and the cross-sectional design inhibit definitive causal conclusions. Furthermore, among the ADHD patients in the original dataset, only seven presented with ADHD as an isolated condition without psychiatric comorbidities. This complicates the ability to confirm whether observed motor activity patterns are attributable to ADHD specifically rather than to other psychiatric conditions, such as unipolar depression, that may have predominant effects on motor behavior. The machine learning evaluation across six traditional algorithms (Logistic Regression, Random Forest, Support Vector Machine, Multilayer Perceptron, K-Nearest Neighbors, and XGBoost) revealed a systematic pattern. Our analysis across three data quality configurations demonstrates how performance metrics degrade as data quality improves. With uncleaned data, as presented in Table II , Logistic Regression achieved a seemingly acceptable performance: 69% accuracy, 75% ROC AUC, and 37% Matthews Correlation Coefficient, with an ADHD precision of 76%, recall of 69%, and F1-score of 72% ( Table III ). With the same patients as the Pareto window, but considering their full six days, Logistic Regression performance degrades: accuracy drops to 62%, ROC AUC to 59%, and MCC to 24% ( Table IV ). With rigorous Pareto-optimal filtering, as shown in Table VI , all models converge to random performance, with the best performer in Table VII (MLP) achieving only 58% accuracy and 54% ROC AUC, essentially equivalent to random classification. This systematic degradation (ROC AUC: 75% → 59% → 54%) provides empirical proof that the uncleaned models were detecting systematic technical artifacts. More detailed results of these models, visualizations, and feature importance analysis can be found in the Zenodo repository [ 18 ]. The machine learning models employed a leave-one-out cross-validation strategy with majority voting at the daily level. This denotes that the patterns learned by the models were not generalizable across users, a finding that aligns with the high variability between subjects shown in the boxplots of the patient-level features. View this table: View inline View popup Download powerpoint Table 2. Results ML Complete Uncleaned ADHD CONTROL days Daily Features - Leave-One-Patient-Out Performance Summary View this table: View inline View popup Download powerpoint Table 3. Results ML Complete Uncleaned ADHD CONTROL days Daily Features - Logistic Regression Results (Best Model) View this table: View inline View popup Download powerpoint Table 4. Results ML Pareto Complete Day Daily Features - Leave-One-Patient-Out Performance Summary View this table: View inline View popup Download powerpoint Table 5. Results ML Pareto Complete Day Daily Features - Logistic Regression Results (Best Model) View this table: View inline View popup Download powerpoint Table 6. Results ML Pareto Trimmed Daily Features - Leave-One-Patient-Out Performance Summary View this table: View inline View popup Download powerpoint Table 7. Results ML Pareto Trimmed Daily Features - Multi-Layer Perceptron (MLP) Results (Best Model) A separate intra-patient variability analysis of the Pareto optimal window was conducted to assess the similarity between the features of different days from the same patient. The majority of the features remained stable, indicating that the days of the same patient were similar (coefficient of variation 20 in ADHD versus ~ 2.5 in controls), autocorrelation_90min (CV ~ 8 in ADHD versus ~ 4 in controls), and autocorrelation_30min (CV ~ 2.5 in ADHD versus ~ 5). While ADHD showed slightly higher variability in specific features, these differences were modest and insufficient for reliable discrimination. Differences in mean temporal correlations were minimal (< 0.05 − 0.1) for most features, while individual patient variability (error bars) completely dominated any between-group differences. Only dfa_scaling_exponent and sample_entropy showed slightly different temporal correlations, but these remained within the range of individual variability. For the inter-patient consistency analysis via Intraclass Correlation Coefficients (ICC) of the majority of features, groups were remarkably similar (differences < 0.1), pointing to equivalent levels of internal homogeneity. Basic features like mean and trimmed_mean showed practically identical ICCs between groups. The singular notable exception was activity_fragmentation , which showed clearly lower ICC in ADHD (~ 0.37) versus controls (~ 0.75), suggesting greater inter-patient heterogeneity in fragmentation patterns among ADHD patients. This indicates that a monitoring approach with an individualized focus could be better suited to these data. High intra-patient daily feature similarity but high inter-patient variability could explain the poor classification performance of the models. Additionally, it may reinforce the conclusion that employing a K-fold validation at the daily level is inadequate, as it will mix days from the same patient between test and training splits, potentially leading the model to learn which patient is by similarity rather than because a discernible pattern exists across the classes. This separate variability analysis can be performed before deciding whether a machine learning approach is suitable, as it can provide a reasonable estimate of the expected behavior of the machine learning models. The original protocol description for the ADHD study was obtained following communication with the Norwegian Regional Medical Research Ethics Committee West and consultation with the original principal investigator to facilitate a detailed analysis. The ADHD clinical protocol 251.08 underwent several modifications during its final implementation. A sleep diary was intended to be collected from each participant; however, due to incomplete data, it was ultimately excluded from the articles related to the dataset. The goal described in the protocol was to categorize ADHD participants into two distinct cohorts based on DSM-IV age criteria: one group comprised patients diagnosed according to standard age of onset, and the other included those diagnosed after the age of 7, alongside a third group of patients exhibiting minimal or no ADHD symptoms. The consensus diagnosis procedure involved four psychiatrists (KM, WF, OBF, and JØB) instead of the originally planned two (KM and WF). Although an inter-rater reliability assessment was attempted through videotaped interviews, it was deemed too time-consuming and consequently discontinued by the investigators. The protocol 150.01, which pertains to the PSYKOSE/DEPRESJON datasets from which the CONTROL group was derived, was not located by the committee or the original researchers and remains unavailable for analysis. From these changes, the principal limitation derived is the lack of information about each patient’s sleep patterns, which affects the interpretation of the accelerometry data. Additionally, the fact that the initial aim was to evaluate subgroups of the ADHD class suggests that variability in the activity patterns of the ADHD group was expected to be found depending on the age of onset, implying that a “one size fits all” approach may not accurately reflect motor activity in ADHD patients. Given the substantial limitations in data quality, our findings should be interpreted as a quality assessment rather than a definitive statement about the clinical utility of accelerometry for detecting adult ADHD. Our findings that machine learning models demonstrate limited discriminative performance when rigorous data quality criteria are applied to the OBF-Psychiatric dataset represent a significant methodological contribution to the field. This result challenges assumptions about the direct applicability of machine learning approaches to clinical psychiatric classification with these data and underscores the critical importance of data quality assessment frameworks in clinical AI development. The development of standardized protocols for accelerometry data collection, processing, and quality assessment would facilitate meaningful comparisons across studies and reduce the risk of artifact exploitation. The results presented should not discourage accelerometry research for ADHD but rather highlight the need for appropriately powered validation studies using innovative methodological approaches. Limitations Data collection was conducted using a device equipped with an omnidirectional accelerometer, which may not capture movement as accurately as tri-axis accelerometers can. The placement of the device on the wrist may have also constrained its ability to detect movements, such as continuous leg movement while sitting. The epoch used in this study is defined as a sum over a one-minute duration, which complicates direct comparisons with devices that utilize different settings, such as those that calculate epochs as means rather than sums. Additionally, there remains uncertainty regarding the extremely high acceleration values in the signals, as a count value between 7680 and 8000 per minute suggests an equivalent of 5g acceleration (128 counts) during all 60 seconds of the epoch. The metrological validation of the devices used to collect these accelerometry signals was not documented in the datasets descriptor articles or related previous works, suggesting this validation was not performed during the study. There is no record of sleep hours, making it unlikely to determine with certainty which periods correspond to sleep time or to derive sleep metrics. Although patients received explicit instructions to wear the watch continuously and remove it only when bathing, we cannot verify which patients fully adhered to this protocol. Potential equipment damage, such as water exposure, might explain the unusual recordings observed during visual inspection of the actigraphy data. The signals contain extensive periods with zero count values. While this could indicate that the patient remained motionless or asleep during that time, it could also result from the watch being removed from their wrist. Furthermore, the device manual describes the sleep analysis functionality, where a value of at least 40 counts in a 1-minute epoch with medium sensitivity classifies a patient as awake [ 33 ]. This threshold increases the probability that extended periods with zero counts or values less than 40 may be due to not wearing the watch or sensor damage. In terms of study design limitations, the application of strict quality exclusion criteria in this paper resulted in the absence of data for a specific day of the week for each person, as well as the loss of data from ADHD patients and control individuals. From a clinical and diagnostic perspective, it is essential to note that the standards for diagnosing ADHD have evolved since the DSM-IV criteria were utilized in this study, and our understanding of the disorder continues to expand. Moreover, there is currently no definitive framework for the assessment of adult ADHD patients based on motor activity patterns. Notably, the dataset descriptor does not report the assessment of potential medical conditions that could influence motor activity patterns or the presentation of ADHD symptoms, such as endocrinological conditions. Regarding sample characteristics, there were demographic imbalances, particularly in age and gender distribution between groups. While we found no significant differences between demographic subgroups in the features extracted, the limited sample size prevents definitive conclusions about model fairness and generalizability across demographic groups. The heterogeneity of ADHD, particularly in terms of comorbidities in the sample, adds further complexity. Substance abuse disorders and different medications in certain ADHD patients may also have impacted their behavioral patterns, complicating causal relationships. Our final small sample size (n=53) was determined by data availability rather than a priori power calculations, which represents a common challenge when working with pre-existing clinical datasets and cross-sectional designed studies. Also, this sample size complicates the ability to obtain a separate validation set distinct from the training and testing sets for hyperparameter tuning. No patients or members of the general public were involved in the design, conduct, or interpretation of this study, as we utilized a previously collected secondary dataset. Conclusion Although the application of machine learning classifiers to accelerometry data has the potential to reveal patterns, the suboptimal data quality, small sample size, and confounding factors in these datasets limit the usefulness of classifiers in capturing the general characteristics of ADHD through accelerometry. Since model performance can vary significantly based on the training data used, preprocessing techniques, feature selection methods, validation strategies, and fixed hyperparameters, machine learning alone may introduce a layer of technical bias and subjectivity into the digital phenotyping of ADHD problems. Moreover, the substantial risk of overfitting in such small datasets requires a cautious interpretation of reported performance metrics. Importantly, despite having numerical observations, we cannot establish an objective view based on these data. Previous works published using the CONTROL or ADHD data from the OBF-Psychiatric dataset, or related ones (HYPERAKTIV, DEPRESJON, and PSYKOSE), require further evaluation. This approach is deliberately reductionist, condensing the complex, non-stationary, and stochastic characteristics of continuous accelerometer signals into simplified daily summary statistics. Although this aggregation reduces temporal resolution and may hinder important within-day variability patterns, it provides practical benefits for initial classification and exploratory analysis, including lower computational requirements and enhanced interpretability. It is important to note that even with more sophisticated time series approaches, the same data confounding factors would persist. While these data can support many advanced numerical models, poor data quality will severely limit the effectiveness of any model. Although we suggested a method for identifying a window that meets multiple criteria, signal quality should be prioritized from the outset of the data collection process. The quality filters applied in this paper are focused on this specific real-world case and are not ideal; they could be adapted or improved. Several technical constraints of the Actiwatch AW4, while present in our study, are less problematic in current accelerometry research. Modern devices, such as the GENEactiv [ 50 ], offer water resistance, circadian rhythm assessment, extended battery life, and enhanced data quality capabilities. The quality and viability of health machine learning models can also be addressed with TRIPOD-AI guidelines [ 51 ] and the recommendations for clinical predictive models discussed in Steyerberg’s book [ 52 ]. Fasmer’s visionary accelerometer research [ 45 , 46 ] opens opportunities with modern devices and public discussion of accelerometry in psychiatric research. Understanding the data is the most valuable step we can take before deciding if a machine-learning approach is necessary. As researchers, we must maintain our focus; the primary objective of studying acceleration is to develop functional tools that assist clinicians and patients in identifying areas for improvement and training, rather than showcasing the best classifying machine learning algorithm with the highest accuracy or other performance metrics. Psychiatric and psychological research are more likely to have confounding factors than an “absolute typical norm”, but still, we have to try to approach the truth with the most detail possible to ensure traceability and aim to standardize procedures to provide reliable tools with reproducible tests. Data Availability All data produced are available online at Zenodo. https://doi.org/10.5281/zenodo.16396176 Acknowledgment This preprint was created using the LaPreprint template ( https://github.com/roaldarbol/lapreprint ) by Mikkel Roald-Arbøl https://orcid.org/0000-0002-9998-0058 . AI assistance disclosure: Grammarly Pro was used for grammar and spelling corrections throughout the manuscript. Claude Sonnet 4 was used for language editing and sentence structure improvements. All scientific content, analysis, and conclusions remain the original work of the author. Footnotes Data availability: Repository containing curated data, code, statistical analysis and visualizations is available on Zenodo . Funding: P. Castillo-Gutierrez thanks SECIHTI and Tecnologico de Monterrey for the grant fund received during her graduate scientific studies. Competing interests: The author declare no competing interests. References [1]. ↵ Guilherme V. Polanczyk , Erik G. Willcutt , Giovanni A. Salum , Christian Kieling , and Luis A. Rohde . “ ADHD prevalence estimates across three decades: An updated systematic review and meta-regression analysis ”. In: International Journal of Epidemiology 43 . 2 ( 2014 ), pp. 434 – 442 . DOI: 10.1093/ije/dyt261 . OpenUrl CrossRef PubMed [2]. ↵ Kapil Sayal , Vibhore Prasad , David Daley , Tamsin Ford , and David Coghill . “ ADHD in children and young people: Prevalence, care pathways, and service provision ”. In: The Lancet Psychiatry 5 . 2 ( 2018 ), pp. 175 – 186 . DOI: 10.1016/S2215-0366(17)30167-0 . OpenUrl CrossRef PubMed [3]. ↵ Peige Song , Maoyuan Zha , Qingqing Yang , Yachen Zhang , Xue Li , and Igor Rudan . “ The prevalence of adult attention-deficit hyperactivity disorder: A global systematic review and meta-analysis ”. In: Journal of Global Health 11 ( 2021 ), p. 04009 . DOI: 10.7189/jogh.11.04009 . OpenUrl CrossRef [4]. ↵ L. Eugene Taylor , Emily A. Kaplan-Kahn , Rui A. Lighthall , and Kevin M. Antshel . “ Adult-Onset ADHD: A Critical Analysis and Alternative Explanations ”. In: Child Psychiatry & Human Development 53 . 4 ( 2022 ), pp. 635 – 653 . DOI: 10.1007/s10578-021-01159-w . OpenUrl CrossRef PubMed [5]. ↵ Stephen V. Faraone et al. “ The World Federation of ADHD International Consensus Statement: 208 Evidence-based conclusions about the disorder ”. In: Neuroscience & Biobehavioral Reviews 128 ( 2021 ), pp. 789 – 818 . DOI: 10.1016/j.neubiorev.2021.01.022 . OpenUrl CrossRef PubMed [6]. ↵ Arnstein Mykletun et al. “ Causal modelling of variation in clinical practice and long-term outcomes of ADHD using Norwegian registry data: The ADHD controversy project ”. In: BMJ Open 11 . 1 ( 2021 ), e041698 . DOI: 10.1136/bmjopen-2020-041698 . OpenUrl Abstract / FREE Full Text [7]. ↵ B. S. Solberg , A. Halmøy , A. Engeland , J. Igland , J. Haavik , and K. Klungsøyr . “ Gender differences in psychiatric comorbidity: a population-based study of 40 000 adults with attention deficit hyperactivity disorder ”. In: Acta Psychiatr Scand 137 . 3 ( 2018 ), pp. 176 – 186 . DOI: 10.1111/acps.12845 . OpenUrl CrossRef PubMed [8]. ↵ American Psychiatric Association . Diagnostic and statistical manual of mental disorders. 5th . Arlington, VA : American Psychiatric Publishing , 2013 . [9]. ↵ Samuele Cortese et al. “ Incidence, prevalence, and global burden of ADHD from 1990 to 2019 across 204 countries: Data, with critical re-analysis, from the Global Burden of Disease study ”. In: Molecular Psychiatry 28 . 11 ( 2023 ), pp. 4823 – 4830 . DOI: 10.1038/s41380-023-02228-3 . OpenUrl CrossRef PubMed [10]. ↵ Gilberto Rivera , Alejandro Rosete , Bernabé Dorronsoro , and Nelson Rangel-Valdez. Jorge Hernández-Capistran , Luis N. Sánchez-Morales , Giner Alor-Hernández , Maricela Bustos-López , and José L. Sánchez-Cervantes . “ Machine and Deep Learning Algorithms for ADHD Detection: A Review ”. In: Innovations in Machine and Deep Learning . Ed. by Gilberto Rivera , Alejandro Rosete , Bernabé Dorronsoro , and Nelson Rangel-Valdez. Vol. 134 . Springer Nature Switzerland , 2023 , pp. 163 – 191 . DOI: 10.1007/978-3-031-40688-1_8 . OpenUrl CrossRef [11]. ↵ Michelle Miller et al. “ Delineating early developmental pathways to ADHD: Setting an international research agenda ”. In: JCPP Advances 3 . 2 ( 2023 ), e12144 . DOI: 10.1002/jcv2.12144 . OpenUrl CrossRef [12]. ↵ Jan Buitelaar et al. “ Toward Precision Medicine in ADHD ”. In: Frontiers in Behavioral Neuroscience 16 ( 2022 ), p. 900981 . DOI: 10.3389/fnbeh.2022.900981 . OpenUrl CrossRef [13]. ↵ Hui Wen Loh , Chui Ping Ooi , Prabal Datta Barua , Elizabeth E. Palmer , Filippo Molinari , and U. Rajendra Acharya . “ Automated detection of ADHD: Current trends and future perspective ”. In: Computers in Biology and Medicine 146 ( 2022 ), p. 105525 . DOI: 10.1016/j.compbiomed.2022.105525 . OpenUrl CrossRef [14]. ↵ Caterina Gawrilow , Jan Kühnhausen , Juliane Schmid , and Gertraud Stadler . “ Hyperactivity and motoric activity in ADHD: Characterization, assessment, and intervention ”. In: Frontiers in Psychiatry 5 ( 2014 ). DOI: 10.3389/fpsyt.2014.00171 . OpenUrl CrossRef [15]. ↵ Steven A. Hicks et al. “ HYPERAKTIV: An Activity Dataset from Patients with Attention-Deficit/Hyperactivity Disorder (ADHD) ”. In: Proceedings of the 12th ACM Multimedia Systems Conference . 2021 , pp. 314 – 319 . DOI: 10.1145/3458305.3478454 . OpenUrl CrossRef [16]. ↵ Elham Ghasemi , Maryam Ebrahimi , and Esmaeil Ebrahimie . “ Machine learning models effectively distinguish attention-deficit/hyperactivity disorder using event-related potentials ”. In: Cognitive Neurodynamics 16 . 6 ( 2022 ), pp. 1335 – 1349 . DOI: 10.1007/s11571-021-09746-2 . OpenUrl CrossRef PubMed [17]. ↵ Enrique Garcia-Ceja et al. “ OBF-Psychiatric, a motor activity dataset of patients diagnosed with major depression, schizophrenia, and ADHD ”. In: Scientific Data 12 . 1 ( 2025 ), p. 32 . DOI: 10.1038/s41597-025-04384-3 . OpenUrl CrossRef [18]. ↵ Paola A. Castillo-Gutiérrez . Managing Data Uncertainty and Machine Learning for Adult ADHD Classification Using Accelerometry: OBF-Psychiatric Case Study . 2025 . DOI: 10.5281/zenodo.16396176 . OpenUrl CrossRef [19]. ↵ Jemma A. Sedgwick , Aleya Merwood , and Philip Asherson . “ The positive aspects of attention deficit hyperactivity disorder: a qualitative investigation of successful adults with ADHD ”. In: ADHD Attention Deficit and Hyperactivity Disorders 11 . 3 ( 2019 ), pp. 241 – 253 . DOI: 10.1007/s12402-018-0277-6 . OpenUrl CrossRef [20]. ↵ Sabin Koirala et al. “ Neurobiology of attention-deficit hyperactivity disorder: Historical challenges and emerging frontiers ”. In: Nature Reviews Neuroscience ( 2024 ). DOI: 10.1038/s41583-024-00869-z . OpenUrl CrossRef [21]. ↵ Russell A. Barkley . “ Behavioral inhibition, sustained attention, and executive functions: constructing a unifying theory of ADHD ”. In: Psychological Bulletin 121 . 1 ( 1997 ), pp. 65 – 94 . DOI: 10.1037/0033-2909.121.1.65 . OpenUrl CrossRef PubMed Web of Science [22]. ↵ Joseph Sergeant . “ The cognitive-energetic model: an empirical approach to attention-deficit hyperactivity disorder ”. In: Neuroscience & Biobehavioral Reviews 24 . 1 ( 2000 ), pp. 7 – 12 . DOI: 10.1016/S0149-7634(99)00060-3 . OpenUrl CrossRef PubMed Web of Science [23]. ↵ Terje Sagvolden , Espen Borgå Johansen Heidi Aase , and Vivienne A. Russell . “ A dynamic developmental theory of attention-deficit/hyperactivity disorder (ADHD) predominantly hyperactive/impulsive and combined subtypes ”. In: Behavioral and Brain Sciences 28 . 3 ( 2005 ), pp. 397 – 419 . DOI: 10.1017/S0140525X05000075 . OpenUrl CrossRef PubMed Web of Science [24]. ↵ Espen Borgå Johansen Heidi Aase , Anneke Meyer , and Terje Sagvolden . “ Attention-deficit/hyperactivity disorder (ADHD) behaviour explained by dysfunctioning reinforcement and extinction processes ”. In: Behavioural Brain Research 130 . 1-2 ( 2002 ), pp. 37 – 45 . DOI: 10.1016/S0166-4328(01)00434-X . OpenUrl CrossRef PubMed Web of Science [25]. ↵ American Psychiatric Association . Diagnostic and statistical manual of mental disorders. 4th . Washington, DC : American Psychiatric Association , 1994 . [26]. ↵ Stephen V. Faraone , Joseph Biederman , and Eric Mick . “ The age-dependent decline of attention deficit hyperactivity disorder: a meta-analysis of follow-up studies ”. In: Psychological Medicine 36 . 2 ( 2006 ), pp. 159 – 165 . DOI: 10.1017/S003329170500471X . OpenUrl CrossRef PubMed Web of Science [27]. ↵ World Health Organization . The ICD-10 Classification of Mental and Behavioural Disorders: Clinical Descriptions and Diagnostic Guidelines . Geneva : World Health Organization , 1992 . [28]. ↵ Alessio Bellato , Iti Arora , Chris Hollis , and Maddie J. Groom . “ Is autonomic nervous system function atypical in attention deficit hyperactivity disorder (ADHD)? A systematic review of the evidence ”. In: Neuroscience & Biobehavioral Reviews 108 ( 2020 ), pp. 182 – 206 . DOI: 10.1016/j.neubiorev.2019.11.001 . OpenUrl CrossRef [29]. ↵ Mary V. Solanto . “ Dopamine dysfunction in AD/HD: integrating clinical and basic neuroscience research ”. In: Behavioural Brain Research 130 . 1-2 ( 2002 ), pp. 65 – 71 . DOI: 10.1016/S0166-4328(01)00431-4 . OpenUrl CrossRef PubMed Web of Science [30]. ↵ Matthew J. Silverstein , Stephen V. Faraone , T. Lindsey Leon , Joseph Biederman , Thomas J. Spencer , and Lenard A. Adler . “ The Relationship Between Executive Function Deficits and DSM-5-Defined ADHD Symptoms ”. In: Journal of Attention Disorders 24 . 1 ( 2020 ), pp. 41 – 51 . DOI: 10.1177/1087054718804347 . OpenUrl CrossRef PubMed [31]. ↵ Alan Godfrey , Roisin Conway , Deirdre Meagher , and Gearóid ÓLaighin . “ Direct measurement of human movement by accelerometry ”. In: Medical Engineering & Physics 30 . 10 ( 2008 ), pp. 1364 – 1386 . DOI: 10.1016/j.medengphy.2008.09.005 . OpenUrl CrossRef PubMed Web of Science [32]. ↵ Fangyu Liu , Amal A. Wanigatunga , and Jennifer A. Schrack . “ Assessment of Physical Activity in Adults Using Wrist Accelerometers ”. In: Epidemiologic Reviews 43 . 1 ( 2021 ), pp. 65 – 93 . DOI: 10.1093/epirev/mxab004 . OpenUrl CrossRef [33]. ↵ CamNtech Ltd . The Actiwatch User Manual. CamNtech Ltd . 2009 . [34]. ↵ Daniel Arvidsson , Jonatan Fridolfsson , and Mats Börjesson . “ Measurement of physical activity in clinical practice using accelerometers ”. In: Journal of Internal Medicine 286 . 2 ( 2019 ), pp. 137 – 153 . DOI: 10.1111/joim.12908 . OpenUrl CrossRef PubMed [35]. ↵ Emma Letts , J. S. Jakubowski , S. King-Dowling , K. Clevenger , D. Kobsar , and J. Obeid . “ Accelerometer techniques for capturing human movement validated against direct observation: a scoping review ”. In: Physiological Measurement 45 . 7 ( 2024 ), 07TR01 . DOI: 10.1088/1361-6579/ad45aa . OpenUrl CrossRef [36]. ↵ A. Routen , D. Upton , M. Edwards , and D. Peters . “ Intra- and inter-instrument reliability of the Actiwatch 4 accelerometer in a mechanical laboratory setting ”. In: Journal of Human Kinetics 31 ( 2012 ), pp. 17 – 24 . DOI: 10.2478/v10078-012-0002-z . OpenUrl CrossRef [37]. ↵ V. Shafna and S. D. Madhu Kumar . “ Optimal interval and feature selection in activity data for detecting attention deficit hyperactivity disorder ”. In: Computers in Biology and Medicine 179 ( 2024 ), p. 108909 . DOI: 10.1016/j.compbiomed.2024.108909 . OpenUrl CrossRef [38]. ↵ M. Misgar and M. Bhatia . “ Advancing ADHD diagnosis: using machine learning for unveiling ADHD patterns through dimensionality reduction on IoMT actigraphy signals ”. In: International Journal of Information Technology ( 2024 ). DOI: 10.1007/s41870-024-01895-x . OpenUrl CrossRef [39]. ↵ Peter Jakobsen et al. “ PSYKOSE: A Motor Activity Database of Patients with Schizophrenia ”. In: 2020 IEEE 33rd International Symposium on Computer-Based Medical Systems (CBMS) . 2020 , pp. 303 – 308 . DOI: 10.1109/CBMS49503.2020.00064 . OpenUrl CrossRef [40]. ↵ Maximilian Christ , Nils Braun , Julius Neuffer , and Andreas W. Kempa-Liehr . “ Time Series FeatuRe Extraction on basis of Scalable Hypothesis tests (tsfresh – A Python package) ”. In: Neurocomputing 307 ( 2018 ), pp. 72 – 77 . DOI: 10.1016/j.neucom.2018.03.067 . OpenUrl CrossRef [41]. ↵ Amandeep Kaur and Karanjeet Singh Kahlon . “ Accurate Identification of ADHD among Adults Using Real-Time Activity Data ”. In: Brain Sciences 12 . 7 ( 2022 ), p. 831 . DOI: 10.3390/brainsci12070831 . OpenUrl CrossRef [42]. ↵ Azhar Mohd , A. M. Ali , and S. A. Halim . “ Detecting ADHD Subjects Using Machine Learning Algorithm ”. In: 2022 IEEE International Conference on Computing (ICOCO) . 2022 , pp. 299 – 304 . DOI: 10.1109/ICOCO56118.2022.10031796 . OpenUrl CrossRef [43]. ↵ Ravi K. Thelagathoti and H. H. Ali . “ A Network Analysis Approach for the Classification of Psychiatric Disorders Using Multi-Modal Data ”. In: 2023 IEEE International Conference on Bioinformatics and Biomedicine (BIBM). Istanbul, Turkiye , 2023 , pp. 2831 – 2836 . DOI: 10.1109/BIBM58861.2023.10385720 . OpenUrl CrossRef [44]. ↵ Ravi K. Thelagathoti and H. H. Ali . “ Mobility-Based Community Analysis for Early Detection of Complex Psychiatric Disorders ”. In: 2024 IEEE 12th International Conference on Healthcare Informatics (ICHI). Orlando, FL, USA , 2024 , pp. 205 – 213 . DOI: 10.1109/ICHI61247.2024.00034 . OpenUrl CrossRef [45]. ↵ Ole Bernt Fasmer , Kristin Mjeldheim , Wenche Førland , Anita Lill Hansen , Steven Dilsaver , Ketil Joachim Oedegaard , and Jan Øystein Berle . “ Motor Activity in Adult Patients with Attention Deficit Hyperactivity Disorder ”. In: Psychiatry Investigation 12 . 4 ( 2015 ), p. 474 . DOI: 10.4306/pi.2015.12.4.474 . OpenUrl CrossRef [46]. ↵ Ole Bernt Fasmer et al. “ Diurnal variation of motor activity in adult ADHD patients analyzed with methods from graph theory ”. In: PLOS ONE 15 . 11 ( 2020 ), e0241991 . DOI: 10.1371/journal.pone.0241991 . OpenUrl CrossRef [47]. ↵ David V. Sheehan et al. “ The Mini-International Neuropsychiatric Interview (M.I.N.I.): the development and validation of a structured diagnostic psychiatric interview for DSM-IV and ICD-10 ”. In: Journal of Clinical Psychiatry 59 . Suppl. 20 ( 1998 ), pp. 22 – 33 . OpenUrl PubMed [48]. ↵ Ole Bernt Fasmer et al. “ Linear and non-linear analyses of Conner’s Continuous Performance Test-II discriminate adult patients with attention deficit hyperactivity disorder from patients with mood and anxiety disorders ”. In: BMC Psychiatry 16 . 1 ( 2016 ), p. 284 . DOI: 10.1186/s12888-016-0993-4 . OpenUrl CrossRef PubMed [49]. ↵ Nitesh V. Chawla , Kevin W. Bowyer , Lawrence O. Hall , and W. Philip Kegelmeyer . “ SMOTE: Synthetic Minority Oversampling Technique ”. In: Journal of Artificial Intelligence Research 16 ( 2002 ), pp. 321 – 357 . OpenUrl [50]. ↵ ActivInsights Ltd . GENEActiv: Raw Data Wearable for Clinical & Public Health Research . https://activinsights.com/technology/geneactiv/ . 2025 . [51]. ↵ Gary S. Collins et al. “ TRIPOD+AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods ”. In: BMJ 385 ( 2024 ), e078378 . DOI: 10.1136/bmj-2023-078378 . OpenUrl FREE Full Text [52]. ↵ Ewout W. Steyerberg . Clinical Prediction Models: A Practical Approach to Development, Validation, and Updating . 2nd. Cham, Switzerland : Springer , 2019 . View the discussion thread. Back to top Previous Next Posted August 27, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Managing Data Uncertainty and Machine Learning for Adult ADHD Classification Using Accelerometry: OBF-Psychiatric Case Study Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Managing Data Uncertainty and Machine Learning for Adult ADHD Classification Using Accelerometry: OBF-Psychiatric Case Study Paola A. Castillo-Gutierrez medRxiv 2025.08.26.25332257; doi: https://doi.org/10.1101/2025.08.26.25332257 Share This Article: Copy Citation Tools Managing Data Uncertainty and Machine Learning for Adult ADHD Classification Using Accelerometry: OBF-Psychiatric Case Study Paola A. Castillo-Gutierrez medRxiv 2025.08.26.25332257; doi: https://doi.org/10.1101/2025.08.26.25332257 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (569) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4442) Dentistry and Oral Medicine (444) Dermatology (383) Emergency Medicine (609) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1510) Epidemiology (15230) Forensic Medicine (30) Gastroenterology (1126) Genetic and Genomic Medicine (6609) Geriatric Medicine (668) Health Economics (998) Health Informatics (4542) Health Policy (1370) Health Systems and Quality Improvement (1613) Hematology (543) HIV/AIDS (1266) Infectious Diseases (except HIV/AIDS) (15923) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (147) Nephrology (668) Neurology (6607) Nursing (346) Nutrition (999) Obstetrics and Gynecology (1146) Occupational and Environmental Health (957) Oncology (3337) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (664) Pediatrics (1693) Pharmacology and Therapeutics (692) Primary Care Research (712) Psychiatry and Clinical Psychology (5448) Public and Global Health (9237) Radiology and Imaging (2202) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (596) Sexual and Reproductive Health (714) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a019c3966ef74193',t:'MTc3OTc2Njg4NA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00