Segmentation-Guided Development of Visual Classification Criteria for Alzheimer’s Disease

doi:10.1101/2025.09.29.25336750

Segmentation-Guided Development of Visual Classification Criteria for Alzheimer’s Disease

2025 · doi:10.1101/2025.09.29.25336750

preprint OA: gold CC-BY-4.0

📄 Open PDF Full text JSON View at publisher

Full text 44,833 characters · extracted from preprint-html · click to expand

Segmentation-Guided Development of Visual Classification Criteria for Alzheimer’s Disease | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Segmentation-Guided Development of Visual Classification Criteria for Alzheimer’s Disease Mirjam Peters , View ORCID Profile David Steinbart , View ORCID Profile Alexander Hammers , View ORCID Profile Rolf A. Heckemann , the Alzheimer’s Disease Neuroimaging Initiative doi: https://doi.org/10.1101/2025.09.29.25336750 Mirjam Peters 1 Department of Medical Radiation Sciences, University of Gothenburg , 41345 Gothenburg, Sweden Find this author on Google Scholar Find this author on PubMed Search for this author on this site David Steinbart 2 Charité – Universitätsmedizin Berlin, Dept. Neurology , 13353 Berlin, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for David Steinbart Alexander Hammers 3 King’s College London & Guy’s and St Thomas’ PET Centre , London SE1 7EH, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Alexander Hammers Rolf A. Heckemann 1 Department of Medical Radiation Sciences, University of Gothenburg , 41345 Gothenburg, Sweden Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rolf A. Heckemann For correspondence: rolf.heckemann{at}gu.se Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Alzheimer’s disease (AD) causes progressive structural brain changes that precede clinical symptoms by years. Detecting these changes using structural MRI remains challenging, especially in early stages and when relying on visual interpretation alone. Automated semantic segmentation methods offer anatomical precision and objective measurements, but their outputs are rarely used to support human visual assessment. In this study, we explored whether such segmentation outputs can be used to guide a non-expert investigator in developing and applying interpretable diagnostic criteria. We used images from the Alzheimer’s Disease Neuroimaging Initiative (ADNI) and implemented a structured, segmentation-informed workflow in which a novice with no prior training in radiology or neuroanatomy developed classification rules based on visual appearance and volumetric readouts through three guided pilot phases. In a fourth phase, the investigator applied these criteria to an independent subset of ADNI images while blinded to the respective ADNI participants’ diagnostic labels. Using an anatomical segmentation model (MAPER) with training data from a pre-release version of the Hammers Adult Brain Atlas Database (120 brain regions), the investigator focused on the piriform cortex (PC). The choice of PC was context-driven, reflecting an ongoing quantitative study of PC volume. A binary classification (AD-like versus CN-like) rule based on PC volume ( 430 mm 3 ), supported by assessments of PC shape and global atrophy, yielded an accuracy of 0.71 across 200 cases spanning four diagnostic groups. Accuracy increased to 0.77 when the analysis was restricted to CN and AD cases (with intermediate pathology (MCI) excluded). These results show that segmentation-guided visual workflows can enable non-experts to apply anatomically grounded classification criteria with moderate accuracy. Our framework can be expanded to other regions and promises to be useful for generating interpretable models, for supporting explainable AI, and for accelerating the acquisition of diagnostic skills. Introduction Alzheimer’s disease (AD) is a progressive neurodegenerative disorder in which brain structure and function deteriorate gradually, often years before clinical symptoms become evident. Detecting these early structural changes remains a diagnostic challenge. While biomarkers such as amyloid PET and cerebrospinal fluid assays have improved predictive accuracy, magnetic resonance imaging (MRI) remains one of the most widely used and accessible tools for evaluating brain structure in suspected AD cases [ 1 , 2 ]. The primary purpose of structural MRI is currently to exclude alternative, especially reversible, causes of cognitive decline (tumours, vascular lesions, normal-pressure hydrocephalus, and others), rather than to confirm early-stage AD [ 3 ]. Visual assessment of MR images relies on expert radiologists examining full image stacks using established criteria. While this process is robust and clinically trusted, it is optimized for detecting gross abnormalities or focal lesions, not for identifying diffuse, subtle atrophy patterns typical of early neurodegenerative disease. Additionally, improvements in spatial resolution increase the stack sizes and visible details, raising cognitive load and interpretive complexity. Even structured rating systems, such as the medial temporal atrophy (MTA) score [ 4 ], while reproducible, are limited in specificity and do not consistently detect early-stage disease [ 3 , 5 ]. Automated image analysis, particularly anatomical segmentation, offers a complementary approach (e.g. [ 6 , 7 ]). Such tools extract region-specific measurements with objectivity and reproducibility, and can support individual-level diagnostic reasoning [ 8 – 10 ]. However, segmentation output typically serves as input to statistical classifiers or group-level analyses, rather than being used to enhance visual interpretation. The result is a persistent disconnect: expert visual diagnosis is interpretive but subjective, while automated methods are objective but opaque [ 11 ]. We address this gap by proposing a hybrid diagnostic workflow in which automated segmentation using MAPER [ 12 ] serves as a scaffold for developing human-interpretable visual criteria. Rather than training a machine to classify cases, we guided a novice investigator (MP) with no prior radiological training through a segmentation-informed process for developing and applying visual classification rules. We focused on the piriform cortex (PC), a medial temporal lobe structure that is implicated in early AD pathology but is rarely featured in clinical assessments [ 13 , 14 ]. This focus was context-driven, reflecting the availability of the Hammers Adult Brain Atlas Database [ 15 – 20 ] in a pre-release version that included the PC as a distinct anatomical label. In previous work, we showed that MAPER produces accurate segmentations of the PC on the basis of this atlas database, even in cases with known temporal pathology [ 21 ]. The investigator refined diagnostic criteria in three pilot phases using images and segmentations from the ADNI2 cohort and then applied them to a test set while blinded to the diagnostic label of the participants in the test set. We report classification performance and examine whether this hybrid workflow enables non-experts to derive anatomically grounded, reproducible diagnostic insight. Data collection and sharing for this project was funded by the Alzheimer’s Disease Neuroimaging Initiative (ADNI) (National Institutes of Health Grant U01 AG024904) and DOD ADNI (Department of Defense award number W81XWH-12-2-0012). Methods Study Design We chose a cross-sectional design using structural MR brain images to develop and test human-interpretable diagnostic criteria for Alzheimer’s disease (AD). The study combined qualitative and quantitative elements and followed a structured, four-phase workflow. The core objective was to evaluate whether segmentation-informed visual criteria could be developed and applied by a novice investigator without prior radiological training (MP, an upper-secondary level student conducting her final-year project). No prior hypotheses were formulated, and the analysis was exploratory. Data Source MR images were obtained from the Alzheimer’s Disease Neuroimaging Initiative Phase 2 (ADNI2) database. All images were T1-weighted structural scans acquired at the screening visit (labelled “ADNI2 Screening MRI-New Pt”) and selected from cognitively normal (CN), early or late mild cognitive impairment (EMCI, LMCI), and AD diagnostic groups (the SMC (subjective memory complaint) was not considered). The EMCI and LMCI groups were pooled and reassigned, based on the most recent diagnostic label provided by ADNI: participants whose label had changed to AD were assigned to the progressive (pMCI) group; all others to the stable (sMCI) group. An initial set of 722 images served as the source pool, from which independent samples were selected for the three pilot phases using stratified randomization without replacement. A final subset of 200 images was selected for classification testing. The sample size was determined pragmatically: based on the time required to analyze cases in the pilot phases, the final cohort size was adjusted to fit within the available time frame for the project. Information provided to comply with the ADNI Data Use Agreement: The images were obtained from the ADNI database (adni.loni.usc.edu). The ADNI was launched in 2003 as a public-private partnership, led by Principal Investigator Michael W. Weiner, MD. The original goal of ADNI was to test whether serial magnetic resonance imaging (MRI), positron emission tomography (PET), other biological markers, and clinical and neuropsychological assessment can be combined to measure the progression of mild cognitive impairment (MCI) and early Alzheimer’s disease (AD). The current goals include validating biomarkers for clinical trials, improving the generalizability of ADNI data by increasing diversity in the participant cohort, and to provide data concerning the diagnosis and progression of Alzheimer’s disease to the scientific community. For up-to-date information, see adni.loni.usc.edu. Image Segmentation and Preprocessing All MR images were processed using MAPER (Multi-Atlas Propagation with Enhanced Registration), a machine-learning method for automatic anatomical segmentation [ 12 ]. MAPER uses prior anatomical labels derived from the Hammers Adult Brain Atlas Database, which is based on manual expert segmentations of high-resolution MR images [, @gousiasAutomaticSegmentationBrain 15 c]. A pre-release version distinguishing 120 regions (plus background) was used. This version included the piriform cortex (PC) as a distinct region. We chose this region to build on our previous work showing PC involvement in AD [ 21 ]. Furthermore, we had prevously shown that the neighbouring amygdala is important and involved early [ 7 , 22 ]. Segmentation was performed on the Tetralith high-performance computing cluster at the National Supercomputer Centre, Linköping University, Sweden. Each image was processed with MAPER to produce anatomical label maps. Volumetric estimates were derived and adjusted for intracranial volume as measured with Pincram [ 23 ]. Shell scripting was used to open each image in FSLeyes twice, centring the view on the right and left PC, and providing the relevant measures along with the region name and number. The investigator explored the images interactively in FSLeyes, with full access to segmentation overlays, region metadata, and three-plane navigation. As part of her induction, she became familiar with the tools for toggling label-contour map overlays and efficiently navigating the image volume. Segmentation was used strictly for anatomical guidance and volume estimation. In particular, no artificial neural network or machine-learning model was trained based on the label maps. Further technical details regarding image acquisition, preprocessing, and segmentation parameters have been reported previously [ 21 ]. Download figure Open in new tab Figure 1: Sample image and label-contour map overlay on a T1-weighted MR image in FSLeyes. The crosshair location is programmatically set to the centre of gravity of one of the PC regions (here: left). The default zoom setting provides a global view; the figure shows a zoomed view of the left medial temporal lobe. Label contours are displayed as hairlines by default; in the figure, the outline width is set to 3 for better visibility. Criteria Development Workflow The development of diagnostic criteria followed three pilot phases, each conducted by the same novice investigator. In the first phase, the investigator examined 14 images (balanced between CN and AD cases) with diagnostic labels disclosed. This initial stage served to familiarise the investigator with the anatomical appearance of cases across the disease spectrum and to identify candidate features for classification. Particular attention was directed toward the piriform cortex and adjacent medial temporal structures, reflecting the common practice of evaluating medial temporal lobe regions as an ensemble. The design of each subsequent pilot phase was informed by the outcome of the preceding one. Key decisions — including sample size, diagnostic group composition, degree of blinding, and classification objectives — were made by consensus between the investigator and the senior co-author, who served as methodological advisor. An important determinant of this process was the investigator’s self-assessed confidence: the pacing and scope of each pilot phase were adjusted to match her readiness to apply emerging criteria independently. The data used at each stage were removed from the source pool at each stage (sampling without replacement). During the first pilot phase, the investigator did not acquire the level of confidence she desired. We therefore selected six images (3 CN and 3 AD) to be reviewed in a second pilot phase according to the same approach and priorities as used for the first phase. In the third pilot phase, 40 images (10 from each diagnostic group: CN, sMCI, pMCI, and AD) were selected. Diagnostic labels were withheld. The investigator was instructed to classify each case as either AD-like or CN-like based on observed patterns. For classification purposes, sMCI was grouped with CN and pMCI with AD, reflecting expected clinical trajectories. The investigator had access to MAPER-derived volumetric data for all regions, but no other metadata. After classification, ADNI-assigned diagnostic labels were disclosed and used to refine a set of visual and quantitative criteria including (1) piriform cortex volume (scalar values separately for left and right, corrected for intracranial volume), (2) PC shape in sagittal and frontal planes as judged visually, and (3) global atrophy as judged visually. The contributions of the criteria were weighted and combined according to the investigator’s subjective impressions. This subjective weighting turned out to favour the volumetry results to a greater degree than anticipated. Accordingly, we decided to evaluate a purely quantitative classification in addition to the combined approach (cf. next Section “Final Classification Phase”). Final Classification Phase The final test phase involved 200 images (50 per diagnostic group), drawn randomly from the remaining ADNI2 pool, which consisted of 662 images after the pilot samples had been removed (722–14–6–40). The investigator, blinded to the ADNI diagnostic labels but aware of the proportions, applied the refined criteria from the pilot phases and classified each case as either AD-like or CN-like. As in the third pilot phase, sMCI and pMCI were grouped with CN and AD, respectively. For each image, PC volume, PC shape, and overall appearance of atrophy were recorded. As an additional reference point, we determined diagnostic class predictions (AD-like and CN-like) using the sum volume of the left and right PC, with a cut-off value set to the median of the testing group and treating participants with sub-threshold volumes as AD-like and those with supra-threshold volumes as CN-like. This is reported as the “numeric analysis” in Section Results. The age distribution in the test set, by diagnostic group, was as follows (min/ median/ max): AD (56.6/77.8/90.4 years), CN (56.3/72.2/84.5), pMCI (55.1/73.6/88.5), and sMCI (56.0/71.4/87.6). Following classification, diagnostic labels were disclosed. Confusion matrices were generated for two scenarios: the full cohort of 200 cases and a subset containing only the AD and CN groups (n = 100). Performance metrics were calculated, including accuracy, sensitivity, and specificity [ 24 ]. Results Classification Performance In Table 1 , four confusion matrices are summarized, based on analysing the full set and the subset; both using numeric analysis and visual analysis. The numeric analysis is based on the median sum volume (left and right piriform cortex) of the group (887 mm 3 ) as a cut-off. View this table: View inline View popup Download powerpoint Table 1: Confusion matrices and performance metrics for numeric (Num) and visual (Vis) analysis of all cases ( n = 200, “All”) and for CN and AD only ( n = 100, “cnad”). Read “ →“ as “predicted to be”. PPV: Positive Predictive Value. Full Cohort (n = 200) When all diagnostic groups were included, the classification yielded an overall accuracy of 0.71. This level of accuracy is significantly different from chance performance (binomial test; null hypothesis: accuracy 0.5; p = 6 × 10 -9 ), indicating that the criteria captured relevant anatomical differences. Key metrics are shown in Table 1 . CN vs AD Subgroup (n = 100) In the subset containing only CN and AD cases (50 each), the classification accuracy was higher at 0.77 (p = 6 × 10 -8 ). This higher performance reflects the greater anatomical separation between unambiguous CN and AD cases. It also underscores the challenge of applying visual criteria to intermediate disease stages. Classification Criteria Three features were identified as useful for classifying cases: Piriform cortex volume: a bilateral average volume threshold of 430 mm 3 (adjusted for total intracranial volume) was used. 430 mm 3 : classified as CN < 430 mm 3 : classified as AD Piriform cortex morphology Sagittal View CN: The PC shows a distinct, well-defined morphology. It typically resembles: – A horseshoe rotated 90° to the right or – A “V” or “Y” rotated 90° to the right AD: The PC appears blurred or amorphous. Some resemblance to the above shapes may persist, but boundaries are less sharp and overall definition is lost. Frontal View CN: The PC retains a sharply delineated shape. Commonly described as: – A horseshoe rotated 90° to the right/left – A rotated horseshoe with a longer and narrower caudal arm AD: The PC appears rounded or blob-like, with reduced structural clarity and less distinction from surrounding tissue. Overall Brain Appearance (Global Atrophy) CN (Cognitively Normal): Preserved global brain structure; no clear signs of diffuse atrophy. AD (Alzheimer’s Disease): Evident global atrophy, visible as cortical thinning and ventricular enlargement. The investigator gained the impression that PC volumes were the most decisive feature. Shape and global atrophy assessments contributed supporting evidence but were insufficient on their own to guide confident classification. Discussion Principal Findings A segmentation-guided workflow enabled a novice investigator without prior experience in radiology or neuroanatomy to classify structural MR brain images with moderate accuracy. Using visual criteria developed through structured guidance and informed by automated segmentation, the investigator achieved an overall accuracy of 0.71 across a cohort spanning cognitively normal (CN) participants as well as participants with stable mild cognitive impairment (sMCI), progressive MCI (pMCI), and Alzheimer’s disease (AD). When classification was restricted to CN and AD cases, accuracy increased to 0.76. These results demonstrate that non-experts, given appropriate tools and a structured workflow, can apply anatomically grounded rules with performance well above chance. Interpretation This performance level compares favourably with published interrater agreement for visual diagnosis of AD, particularly given the minimalist rule set, lack of feedback during the classification phase, and absence of prior domain training [ 3 , 5 ]. In fact, radiologists evaluating structural scans often reach only moderate concordance: early studies of medial temporal atrophy (MTA) ratings found interobserver Cohen’s kappa values around 0.6 at best [ 25 ]. The strongest predictor in our workflow was a bilateral piriform cortex (PC) volume threshold of 430 mm 3 , supported by secondary assessments of PC shape and global atrophy. These simple, interpretable criteria were sufficient to identify many cases accurately, especially at the extremes of the diagnostic spectrum. The piriform cortex is not commonly included in routine radiological workflows, nor is it standard in public segmentation atlases. Conventional neuroimaging analyses tend to focus on hippocampi, ventricles, and neocortical regions; by contrast, the primary olfactory cortex (of which the PC is a part) is often overlooked or merged with adjacent structures [ 13 , 21 ]. The present project was conducted alongside a quantitative study correlating PC volumes with cognitive test scores (Steinbart D et al., submitted); thus, the focus on PC was context-driven. Whether better classification performance could have been achieved by targeting other regions is not addressed here, and the PC’s neighbouring regions likely influenced the investigator’s impression. Nevertheless, the results underline the value of segmentation tools that provide access to a broad and detailed set of anatomical labels. Segmentation-informed analysis improves explainability by linking predictions to specific anatomical structures rather than opaque statistical features. Volumetric features such as piriform and hippocampal atrophy are inherently interpretable, and can be verified directly on the images by clinicians. This aligns with current goals in explainable AI, which prioritize transparency and region-level justification of outputs [ 11 ]. Limitations The classification task was binary: all cases were assigned to either CN or AD, including those with mild cognitive impairment. Grouping sMCI with CN and pMCI with AD was pragmatically motivated by expected clinical trajectories, but inevitably oversimplifies pathological heterogeneity. Diagnostic uncertainty in borderline cases likely contributed to the modest overall accuracy and the observed lower performance when MCI cases were included. Mild discrepancies in age distributions between diagnostic groups were evident in a manner that reflected the age distribution in the source pool. This could have influenced classification performance. As age is correlated with brain atrophy [ 26 ], the investigator may have had an unintended advantage when distinguishing AD from CN cases, as the latter were younger. Future studies should consider explicit age-matching or adjustment of volume thresholds to address this potential bias. Other limitations include the absence of intra-rater consistency checks or inter-rater validation, and the dependence on a specific segmentation toolchain. Although MAPER provided structural guidance, its performance depends on image quality, registration accuracy, and the chosen atlas. The piriform cortex label used here is not yet part of the standard Hammers atlas release, and reproducibility is therefore contingent on access to an equivalent alternative. Automated volumetric measures are also known to vary across software implementations and atlas definitions [ 27 ]. These variations may subtly influence regional estimates and could affect classification outcomes. Further validation using alternate pipelines and segmentation protocols will be needed to confirm generalization. Implications and Future Work These results support the idea that automated segmentation can serve as a scaffold for human visual diagnostics, not merely as an input to statistical or machine learning classifiers. This has implications for explainable AI, where transparency and anatomical interpretability remain major challenges, as well as for radiological education, where guided, region-based workflows may help shorten training curves. Given the global shortage of radiologists, the results point towards the possibility of engaging less highly trained staff to read images, for example in low-resource settings. The fact that the numeric classification based on a volume cutoff showed approximately the same accuracy as the combined analysis underlines the value of segmentation as a plausible adjunct to analysis by radiologists. If the stakes are lower than in the typical diagnostic situation, for example when enriching clinical trials with participants who are more likely to progress to AD from MCI, standalone use of automatic segmentation and volumetry can be contemplated. The approach warrants further investigation. Future studies could explore whether other novice analysts achieve similar performance, whether radiology trainees develop interpretive skills more quickly, whether expert refinement of the criteria yields higher accuracy, and whether the method generalizes to other brain structures or disease contexts. Other possible avenues are to construct a visually-informed severity scale and determine correlations with neuropsychological assessments (e.g. Montreal Cognitive Assessment (“MoCa Test”) available in ADNI2). With appropriate validation, segmentation-informed visual workflows may become a useful component of hybrid diagnostic protocols. Conclusion This study demonstrates that a segmentation-guided workflow can enable non-experts to develop and apply anatomically interpretable classification criteria for Alzheimer’s disease using structural MRI. With minimal prior knowledge, a brief training period, and no feedback during testing, a novice investigator achieved moderate accuracy by combining volumetric and visual features of the piriform cortex. These findings highlight the value of integrating automatic segmentation into visual workflows, both to enhance explainability and to support structured training. Segmentation-informed protocols may help bridge the gap between quantitative image analysis and human interpretability, making them a promising component of future hybrid diagnostic systems. Declarations Ethics The ADNI study is conducted under permissions from the respective institutional review boards at the participating medical centres. All participants give written informed consent. Author contributions MP contributed to the study design, developed and tested the classification criteria, calculated the result tables, and wrote a dissertation that served as the basis of the manuscript. DS contributed the manual segmentations of the piriform cortex that are part of the atlas database used for the study, approved the overall study design, and co-edited the manuscript. AH contributed the Hammers Atlas Database, approved the overall study design, and co-edited the manuscript. RAH developed the study concept and design, supervised the pilot studies, carried out the MAPER segmentations, and edited the manuscript. Use of generative AI Portions of this manuscript were developed with the assistance of a large language model (GPT-4o, OpenAI, San Francisco, USA), which was used to support drafting, editing, translation, statistical reasoning, and structuring of the text. The tool did not generate original scientific content or perform any data analysis autonomously. All intellectual contributions, interpretation of results, and final decisions regarding content and conclusions were made by the authors, who take full responsibility for the work. Funding disclosures The School of Biomedical Engineering and Imaging Sciences is supported by the Wellcome EPSRC Centre for Medical Engineering at King’s College London (WT203148 / Z / 16 / Z) and the Department of Health via the National Institute for Health Research (NIHR) comprehensive Biomedical Research Centre award to Guy’s & St Thomas’ NHS Foundation Trust in partnership with King’s College London and King’s College Hospital NHS Foundation Trust. For the purposes of open access, the authors have applied a Creative Commons Attribution (CC BY) licence to any Accepted Author Manuscript version arising, in accordance with King’s College London’s Rights Retention policy. The computations, data handling, and visualizations were enabled by resources provided by the National Academic Infrastructure for Supercomputing in Sweden (NAISS), partially funded by the Swedish Research Council through grant agreement no. \ 202206725. Competing interests The authors declare no competing interests. Data availability statement All relevant results are summarized in the manuscript. Individual-level information will not be made available as per the ADNI Data Use Agreement. The Hammers Adult Brain Atlas Database is available at https://brain-development.org/brain-atlases/adult-brain-atlases/ . References 1. ↵ van Oostveen WM , de Lange ECM. Imaging Techniques in Alzheimer’s Disease: A Review of Applications in Early Diagnosis and Longitudinal Monitoring . Int J Mol Sci . 2021 Feb 20 [cited 2025 Apr 4]; 22 ( 4 ): 2110 . Available from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7924338/ OpenUrl CrossRef PubMed 2. ↵ Chouliaras L , O’Brien JT . The use of neuroimaging techniques in the early and differential diagnosis of dementia . Mol Psychiatry . 2023 Oct [cited 2025 Apr 4]; 28 ( 10 ): 4084 – 97 . Available from: https://www.nature.com/articles/s41380-023-02215-8 OpenUrl CrossRef PubMed 3. ↵ Rau A , Urbach H. The MTA score—simple and reliable, the best for now? Eur Radiol . 2021 [cited 2025 Apr 4]; 31 ( 12 ): 9057 – 9 . Available from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8589815/ OpenUrl CrossRef PubMed 4. ↵ Scheltens P , Leys D , Barkhof F , Huglo D , Weinstein HC , Vermersch P , et al. At-rophy of medial temporal lobes on MRI in “probable” Alzheimer’s disease and normal ageing: Diagnostic value and neuropsychological correlates . J Neurol Neurosurg Psychiatry . 1992 Oct 1 [cited 2025 May 16]; 55 ( 10 ): 967 – 72 . Available from: https://jnnp.bmj.com/content/55/10/967 OpenUrl Abstract / FREE Full Text 5. ↵ Lombardi G , Crescioli G , Cavedo E , Lucenteforte E , Casazza G , Bellatorre AG , et al. Structural magnetic resonance imaging for the early diagnosis of dementia due to Alzheimer’s disease in people with mild cognitive impairment . Cochrane Database Syst Rev . 2020 [cited 2025 Apr 4];(3). Available from: https://www.cochranelibrary.com/cdsr/doi/10.1002/14651858.CD009628.pub2/full 6. ↵ Heckemann RA , Hajnal JV , Aljabar P , Rueckert D , Hammers A. Automatic anatomical brain MRI segmentation combining label propagation and decision fusion . Neuroimage . 2006 Oct 15; 33 ( 1 ): 115 – 26 . Available from: https://www.ncbi.nlm.nih.gov/pubmed/16860573 OpenUrl CrossRef PubMed Web of Science 7. ↵ Heckemann RA , Keihaninejad S , Aljabar P , Gray KR , Nielsen C , Rueckert D , et al. Automatic morphometry in Alzheimer’s disease and mild cognitive impairment . Neuroimage . 2011 Jun 15; 56 ( 4 ): 2024 – 37 . Available from: https://www.ncbi.nlm.nih.gov/pubmed/21397703 OpenUrl CrossRef PubMed 8. ↵ Heckemann RA , Hammers A , Rueckert D , Aviv RI , Harvey CJ , Hajnal JV . Automatic volumetry on MR brain images can support diagnostic decision making . BMC Med Imaging . 2008 May 23; 8 : 9 . Available from: https://www.ncbi.nlm.nih.gov/pubmed/18500985 OpenUrl PubMed 9. Yamanakkanavar N , Choi JY , Lee B. MRI Segmentation and Classification of Human Brain Using Deep Learning for Diagnosis of Alzheimer’s Disease: A Survey . Sensors (Basel) . 2020 Jun 7 [cited 2025 Apr 4]; 20 ( 11 ): 3243 . Available from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7313699/ OpenUrl PubMed 10. ↵ Zhou K , Piao S , Liu X , Luo X , Chen H , Xiang R , et al. A novel cascade machine learning pipeline for Alzheimer’s disease identification and prediction . Front Aging Neurosci . 2023 Jan 16 [cited 2025 Apr 4]; 14 : 1073909 . Available from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9884698/ OpenUrl PubMed 11. ↵ Saoud LS , AlMarzouqi H. Explainable early detection of Alzheimer’s disease using ROIs and an ensemble of 138 3D vision transformers . Sci Rep . 2024 Nov 12 [cited 2025 Apr 4]; 14 ( 1 ): 27756 . Available from: https://www.nature.com/articles/s41598-024-76313-0 OpenUrl PubMed 12. ↵ Heckemann RA , Keihaninejad S , Aljabar P , Rueckert D , Hajnal JV , Hammers A , et al. Improving intersubject image registration using tissue-class information benefits robustness and accuracy of multi-atlas based anatomical segmentation . Neuroimage . 2010 May 15; 51 ( 1 ): 221 – 7 . Available from: https://www.ncbi.nlm.nih.gov/pubmed/20114079 OpenUrl CrossRef PubMed Web of Science 13. ↵ Galovic M , Baudracco I , Wright-Goff E , Pillajo G , Nachev P , Wandschneider B , et al. Association of Piriform Cortex Resection With Surgical Outcomes in Patients With Temporal Lobe Epilepsy . JAMA Neurol . 2019 Jun 1 [cited 2025 Apr 4]; 76 ( 6 ): 690 – 700 . Available from: https://doi.org/10.1001/jamaneurol.2019.0204 OpenUrl PubMed 14. ↵ Moein Taghavi H , Karimpoor M , van Staalduinen EK , Young CB , Georgiadis M , Leventis S , et al. Elevated tau in the piriform cortex in Alzheimer’s but not Parkinson’s disease using PET‐MR . Alzheimers Dement (Amst) . 2024 Nov 23 [cited 2025 Apr 4]; 16 ( 4 ): e70040 . Available from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC11585164/ OpenUrl 15. ↵ Hammers A , Allom R , Koepp MJ , Free SL , Myers R , Lemieux L , et al. Three‐dimensional maximum probability atlas of the human brain, with particular reference to the temporal lobe . Hum Brain Mapp . 2003 Aug [cited 2025 Apr 4]; 19 ( 4 ): 224 – 47 . Available from: https://onlinelibrary.wiley.com/doi/10.1002/hbm.10123 OpenUrl CrossRef PubMed Web of Science 16. Gousias IS , Rueckert D , Heckemann RA , Dyet LE , Boardman JP , Edwards AD , et al. Automatic segmentation of brain MRIs of 2-year-olds into 83 regions of interest . Neuroimage . 2008 Apr 1 [cited 2025 Apr 4]; 40 ( 2 ): 672 – 84 . Available from: https://www.sciencedirect.com/science/article/pii/S1053811907010634 OpenUrl CrossRef PubMed Web of Science 17. Faillenot I , Heckemann RA , Frot M , Hammers A. Macroanatomy and 3D probabilistic atlas of the human insula . Neuroimage . 2017 Apr 15 [cited 2025 Apr 4]; 150 : 88 – 98 . Available from: https://www.sciencedirect.com/science/article/pii/S1053811917300964 OpenUrl CrossRef PubMed 18. Wild HM , Heckemann RA , Studholme C , Hammers A. Gyri of the human parietal lobe: Volumes, spatial extents, automatic labelling, and probabilistic atlases . Plos One . 2017 Aug 28 [cited 2025 Apr 4]; 12 ( 8 ): e0180866 . Available from: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0180866 OpenUrl CrossRef PubMed 19. Ahsan RL , Allom R , Gousias IS , Habib H , Turkheimer FE , Free S , et al. Volumes, spatial extents and a probabilistic atlas of the human basal ganglia and thalamus . Neuroimage . 2007 Nov 1; 38 ( 2 ): 261 – 70 . Available from: https://www.ncbi.nlm.nih.gov/pubmed/17851093 OpenUrl CrossRef PubMed Web of Science 20. ↵ Hammers A , Chen CH , Lemieux L , Allom R , Vossos S , Free SL , et al. Statisti-cal neuroanatomy of the human inferior frontal gyrus and probabilistic atlas in astandard stereotaxic space . Hum Brain Mapp . 2007 Jan ; 28 ( 1 ): 34 – 48 . OpenUrl CrossRef PubMed 21. ↵ Steinbart D , Yaakub SN , Steinbrenner M , Guldin LS , Holtkamp M , Keller SS , et al. Automatic and manual segmentation of the piriform cortex: Method development and validation in patients with temporal lobe epilepsy and Alzheimer’s disease . Hum Brain Mapp . 2023 Apr 13 [cited 2025 Apr 4]; 44 ( 8 ): 3196 – 209 . Available from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10171523/ OpenUrl CrossRef PubMed 22. ↵ Klein-Koerkamp Y , Heckemann RA , Ramdeen KT , Moreaud O , Keignart S , Krainik A , et al. Amygdalar atrophy in early Alzheimer’s disease . Curr Alzheimer Res . 2014 Mar ; 11 ( 3 ): 239 – 52 . Available from: https://www.ncbi.nlm.nih.gov/pubmed/24484275 OpenUrl CrossRef PubMed 23. ↵ Heckemann RA , Ledig C , Gray KR , Aljabar P , Rueckert D , Hajnal JV , et al. Brain Extraction Using Label Propagation and Group Agreement: Pincram . PLoS One . 2015 ; 10 ( 7 ): e0129211 . Available from: https://www.ncbi.nlm.nih.gov/pubmed/26161961 OpenUrl CrossRef PubMed 24. ↵ Powers DMW . Evaluation: From precision, recall and F-measure to ROC , informedness, markedness and correlation . 2020 [cited 2025 May 16]. Available from: http://arxiv.org/abs/2010.16061 25. ↵ Scheltens P , Launer LJ , Barkhof F , Weinstein HC , van Gool WA . Visual assessment of medial temporal lobe atrophy on magnetic resonance imaging: Interobserver reliability . J Neurol . 1995 Sep ; 242 ( 9 ): 557 – 60 . Available from: https://www.ncbi.nlm.nih.gov/pubmed/8551316 OpenUrl CrossRef PubMed Web of Science 26. ↵ Liu RSN , Lemieux L , Bell GS , Sisodiya SM , Shorvon SD , Sander JWAS , et al. A longitudinal study of brain morphometrics using quantitative magnetic resonance imaging and difference image analysis . Neuroimage . 2003 Sep 1 [cited 2025 May 19]; 20 ( 1 ): 22 – 33 . Available from: https://www.sciencedirect.com/science/article/pii/S1053811903002192 OpenUrl CrossRef PubMed Web of Science 27. ↵ Yaakub SN , Heckemann RA , Keller SS , McGinnity CJ , Weber B , Hammers A. On brain atlas choice and automatic segmentation methods: A comparison of MAPER & FreeSurfer using three atlas databases . Sci Rep . 2020 Feb 18 [cited 2025 Apr 4]; 10 ( 1 ): 2837 . Available from: https://www.nature.com/articles/s41598-020-57951-6 OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted September 30, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Segmentation-Guided Development of Visual Classification Criteria for Alzheimer’s Disease Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Segmentation-Guided Development of Visual Classification Criteria for Alzheimer’s Disease Mirjam Peters , David Steinbart , Alexander Hammers , Rolf A. Heckemann , the Alzheimer’s Disease Neuroimaging Initiative medRxiv 2025.09.29.25336750; doi: https://doi.org/10.1101/2025.09.29.25336750 Share This Article: Copy Citation Tools Segmentation-Guided Development of Visual Classification Criteria for Alzheimer’s Disease Mirjam Peters , David Steinbart , Alexander Hammers , Rolf A. Heckemann , the Alzheimer’s Disease Neuroimaging Initiative medRxiv 2025.09.29.25336750; doi: https://doi.org/10.1101/2025.09.29.25336750 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Radiology and Imaging Subject Areas All Articles Addiction Medicine (570) Allergy and Immunology (864) Anesthesia (302) Cardiovascular Medicine (4445) Dentistry and Oral Medicine (444) Dermatology (383) Emergency Medicine (609) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1515) Epidemiology (15236) Forensic Medicine (30) Gastroenterology (1127) Genetic and Genomic Medicine (6610) Geriatric Medicine (669) Health Economics (1000) Health Informatics (4549) Health Policy (1370) Health Systems and Quality Improvement (1613) Hematology (543) HIV/AIDS (1266) Infectious Diseases (except HIV/AIDS) (15926) Intensive Care and Critical Care Medicine (1104) Medical Education (623) Medical Ethics (147) Nephrology (668) Neurology (6613) Nursing (346) Nutrition (999) Obstetrics and Gynecology (1147) Occupational and Environmental Health (957) Oncology (3341) Ophthalmology (975) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1694) Pharmacology and Therapeutics (693) Primary Care Research (714) Psychiatry and Clinical Psychology (5458) Public and Global Health (9244) Radiology and Imaging (2205) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1197) Rheumatology (596) Sexual and Reproductive Health (715) Sports Medicine (530) Surgery (713) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a0234f5ebf074193',t:'MTc3OTg2Njk4Mg=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-21T05:10:58.409756+00:00

License: CC-BY-4.0