The Cognitive Safety Net: Comparing Human and AI Diagnostic Reasoning during Complex Clinical Situations

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 54,757 characters · extracted from preprint-html · click to expand
The Cognitive Safety Net: Comparing Human and AI Diagnostic Reasoning during Complex Clinical Situations | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search The Cognitive Safety Net: Comparing Human and AI Diagnostic Reasoning during Complex Clinical Situations Antonin Audion , Mathieu Henkeme , View ORCID Profile Baptiste Balanca , View ORCID Profile Marc Lilot , Thomas Rimmelé , Ismail Abaakil , View ORCID Profile Jean-Christophe Cejka doi: https://doi.org/10.1101/2025.10.06.25335641 Antonin Audion 1 Centre for Medical Simulation, University Claude Bernard Lyon 1, CLESS (‘Centre Lyonnais d’Enseignement par Simulation en Santé’) , SimuLyon, Lyon, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mathieu Henkeme 1 Centre for Medical Simulation, University Claude Bernard Lyon 1, CLESS (‘Centre Lyonnais d’Enseignement par Simulation en Santé’) , SimuLyon, Lyon, France 3 Hospices Civils of Lyon, Department of Anaesthesia and Intensive Care, Edouard Herriot University Hospital , Lyon, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site Baptiste Balanca 1 Centre for Medical Simulation, University Claude Bernard Lyon 1, CLESS (‘Centre Lyonnais d’Enseignement par Simulation en Santé’) , SimuLyon, Lyon, France 2 University Claude Bernard Lyon 1, INSERM U1290, Research on Healthcare Performance (RESHAPE) , Lyon, France 5 Hospices Civils of Lyon, Neuro Anaesthesiology and Intensive Care Department, Wertheimer University Hospital , Bron, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Baptiste Balanca Marc Lilot 1 Centre for Medical Simulation, University Claude Bernard Lyon 1, CLESS (‘Centre Lyonnais d’Enseignement par Simulation en Santé’) , SimuLyon, Lyon, France 2 University Claude Bernard Lyon 1, INSERM U1290, Research on Healthcare Performance (RESHAPE) , Lyon, France 4 Hospices Civils of Lyon, Paediatric Cardiac, Thoracic, and Vascular Anaesthesia and Intensive Care, Unit 11, Medical-Surgical Department of Congenital Cardiology of the Fetus, Child, and Adult, Louis Pradel University Hospital , Bron, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marc Lilot Thomas Rimmelé 1 Centre for Medical Simulation, University Claude Bernard Lyon 1, CLESS (‘Centre Lyonnais d’Enseignement par Simulation en Santé’) , SimuLyon, Lyon, France 2 University Claude Bernard Lyon 1, INSERM U1290, Research on Healthcare Performance (RESHAPE) , Lyon, France 3 Hospices Civils of Lyon, Department of Anaesthesia and Intensive Care, Edouard Herriot University Hospital , Lyon, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ismail Abaakil 1 Centre for Medical Simulation, University Claude Bernard Lyon 1, CLESS (‘Centre Lyonnais d’Enseignement par Simulation en Santé’) , SimuLyon, Lyon, France 3 Hospices Civils of Lyon, Department of Anaesthesia and Intensive Care, Edouard Herriot University Hospital , Lyon, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jean-Christophe Cejka 1 Centre for Medical Simulation, University Claude Bernard Lyon 1, CLESS (‘Centre Lyonnais d’Enseignement par Simulation en Santé’) , SimuLyon, Lyon, France 2 University Claude Bernard Lyon 1, INSERM U1290, Research on Healthcare Performance (RESHAPE) , Lyon, France 3 Hospices Civils of Lyon, Department of Anaesthesia and Intensive Care, Edouard Herriot University Hospital , Lyon, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jean-Christophe Cejka For correspondence: jean-christophe.cejka{at}inserm.fr Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Background Diagnostic error in high-stakes clinical environments remains a significant cause of preventable harm. While a new generation of customisable digital cognitive aids (cDCAs) has shown a capacity to improve performance, achieve robust competence, and double learning retention, the potential for artificial intelligence (AI) to augment the foundational, anticipatory reasoning that precedes action is not well understood. This study aims to compare the diagnostic reasoning strategies of experienced anaesthesiology residents with those of a large language model (LLM) during a simulated, complex and realistic anaesthesiology scenario. Methods We conducted a comparative analysis within a high-fidelity simulation randomised controlled trial (Anticipamax, NCT06487208 ). Thirty-four experienced anaesthesiology residents and a conversational LLM (ChatGPT-4) managed a perioperative shock of deliberately multifactorial aetiology. Diagnostic lotteries—sets of hypotheses with assigned plausibility scores—were collected before and after the simulation. We implemented a novel analytical framework based on the social choice Condorcet method , to rank not only individual hypotheses but also to compare the complete diagnostic strategies as the case evolved. Results The AI and residents demonstrated distinct reasoning profiles. Initially, the AI produced an exhaustive, non-hierarchical analysis, correctly identifying septic shock among its top, similarly-scored hypotheses. Residents, in contrast, employed a pragmatic, focused strategy, prioritising immediate surgical risks and unanimously identifying an experience-based risk (gas embolism) that the AI systematically overlooked, and consistently reserved a portion of their reasoning for uncertainty, termed ‘Place for Doubt’. After the clinical evolution, both converged on septic shock. A ‘complex scrutiny’ analysis of the overall strategies revealed that the residents’ focused and adaptive reasoning was consistently ranked as strategically superior to the AI’s exhaustive but diluted approach. Conclusions AI demonstrates a powerful capacity for broad diagnostic anticipation, acting as a potential safeguard against premature diagnostic closure . Experienced residents exhibit a strategically superior reasoning process in its focus and adaptation. Our findings support a powerful synergy where the AI serves as a ‘Cognitive Safety Net’ to augment, not replace, the contextualised judgment of the human practitioner. What is already known on this topic Human error in healthcare is a global prominent cause of death. ‘Traditional’ cognitive support tools (e.g., paper checklists) have been shown to improve technical skills during medical crises, but their impact on non-technical skills is limited and their clinical adoption remains low. A new generation of customisable digital cognitive aids ( cDCAs ) can significantly improve both technical and non-technical performance, fostering better team management and crisis resolution. Information on how clinicians deliver the best anticipatory clinical reasoning is scarce. Recent work comparing machine-learning models to clinicians in trauma triage found comparable accuracy but only moderate agreement, suggesting a collaborative paradigm and motivating deeper analyses of the reasoning process itself. However, a critical gap remains in understanding the underlying nature of the diagnostic reasoning strategies that lead to these outcomes. The ‘how’ of human and AI reasoning, especially in dynamic, anticipatory clinical tasks, is not well understood. What this study adds This is the first study to directly compare in action the diagnostic reasoning strategies o f clinicians and a large language model (AI). It introduces a novel analytical framework based on the Condorcet social choice method to move beyond simple performance scores and rigorously model and rank the overall quality of diagnostic strategies in a simulated daily complex situation. The findings support a model of human-AI complementarity , where the AI excels at broad, exhaustive analysis, while clinicians demonstrate a superior, focused, and adaptive strategic reasoning, suggesting the human’s role as a meta-cognitive supervisor of AI-driven exhaustive but ‘diluted’ insights. Introduction Medical error in complex, high-pressure environments like the operating theatre remains a leading cause of preventable harm and mortality 1 , 2 . Many such errors stem not from a fundamental lack of knowledge, but from failures in cognition under duress, where cognitive overload can lead to diagnostic fixation and critical omissions. Inspired by high-reliability industries, healthcare has adopted cognitive support tools to mitigate these risks. The first generation of these tools, primarily paper-based checklists and static algorithms, proved effective at improving adherence to technical protocols but showed limited impact on crucial non-technical skills such as team communication and dynamic decision-making 3 . More recently, customisable digital cognitive aids (cDCAs) like MAX have overcome many of these limitations, with multiple randomised controlled trials demonstrating their capacity to double learning retention 4 , significantly enhance both technical and non-technical performance during simulated high-stakes scenarios 5 – 9 , and cut total error—defined as the sum of systematic deviation from standards (bias 2 ) and inter-individual variability (variance)—by 75% 10 . This progress raises a new scientific question: now that we have tools to better execute tasks, can we augment the very foundation of clinical action—the anticipatory reasoning process itself? The emergence of powerful artificial intelligence (AI) offers a potential pathway. Recent prospective evidence in pre-hospital trauma care showed that a machine-learning model and clinicians achieved similar predictive performance but only moderate agreement, with complementary error profiles; crucially, combining human and algorithm yielded a net sensitivity gain 11 . This strengthens the rationale for our strategy-level comparison. Rather than testing ‘who is more accurate’, our study models ‘how each system reasons’ under uncertainty, using a unique analytical framework derived from social choice theory—the Condorcet method 12 . By comparing not just static lists, but the evolution of ranked priorities and the overall strategic quality of diagnostic ‘lotteries’ , we can directly compare the judgments of clinicians and of a generalist LLM (ChatGPT-4). Dissecting the strengths, weaknesses, and potential synergies of these two forms of intelligence is a critical step towards a future where clinical reasoning is not replaced, but robustly augmented. Methods Context This comparative analysis was conducted as an ancillary study using data from the ‘Anticipamax’ randomised controlled trial ( ClinicalTrials.gov ID NCT06487208 ) 13 . The parent trial’s protocol included the systematic collection of diagnostic lotteries from participants. The main trial included 34 residents, and the present human-AI comparative analysis focused on the diagnostic lotteries collected from the first 25 consecutively enrolled participants, a sample size defined by the logistical constraints of the parallel master’s degree project by AA. The standardised, high-fidelity simulation scenario is detailed in ‘Supplementary Appendix 1’. It involved the management of a poly-pathological patient who subsequently developed a state of shock intraoperatively. The aetiology of the shock was deliberately designed to be multifactorial, with no single ‘correct’ answer revealed at the end of the scenario. Ethics statement The Anticipamax study used high-fidelity simulation with volunteer anaesthesiology residents. No patient data were used. Participants provided informed consent to participate and to the anonymised use of their responses for research. According to local regulation, the project was accepted by the Ethical Committee of the French Society of Anaesthesiology-Intensive Care (CERAR, ‘Comité d’Éthique pour la Recherche en Anesthésie-Réanimation’ ) as educational research without patient involvement (IRB 00010254- 2024– 019). Study Design Participants were briefed on the patient’s clinical history as part of a pre-interventional handover from a departing ‘colleague’ (same simulation instructor all along the trial). Immediately following this briefing, they were instructed to compile a list of foreseeable complications, assigning a plausibility score to each. This process was repeated upon completion of the simulated scenario, yielding two diagnostic lotteries—lists of diagnoses, each assigned an estimated plausibility—for each participant, designated ‘Before’ and ‘After’. Participant Groups The study included two participant groups. The human cohort comprised 34 experienced anaesthesiology residents aged 26 to 30 (second to fifth year of medical specialty training in the French system (DES 2–5), roughly corresponding to specialty training years ST3–ST6/7 in the UK system).This specific population was chosen deliberately. Being in an advanced stage of their curriculum, they possess a comprehensive and, critically, an up-to-date knowledge of current guidelines and pathophysiology. This minimises the risk of knowledge deficit or simple forgetting being a confounding factor, allowing the study to focus specifically on the structure and strategy of their reasoning process when faced with uncertainty. The AI counterpart was the ChatGPT-4 large language model (OpenAI, California, USA). For the ‘Before’ analysis, it was prompted with the identical text-based case information provided to the residents. For the ‘After’ analysis, to ensure a fair comparison, the key chronological events of each unique human simulation session were transcribed and used to prompt the AI for its revised diagnostic lottery. Analytical Framework: The Condorcet Method To move beyond simple performance scores, we developed a novel framework based on the Condorcet method, a robust tool from social choice theory 12 . This approach allowed us to model and rank not just individual hypotheses but entire diagnostic strategies. Each participant’s list of ranked diagnoses was treated as a ‘diagnostic lottery’—a portfolio of plausibilities reflecting their reasoning under uncertainty. The Condorcet method was deliberately chosen as it allows for the evaluation of holistic diagnostic strategies rather than isolated variables. By analyzing pairwise preferences within each diagnostic lottery, this method preserves the relational structure of clinical reasoning, enabling a comparison of the overall strategic quality and adaptability of human versus AI judgment under uncertainty Following data standardisation , where raw diagnostic terms were algorithmically harmonised using a predefined lexicon, our analysis proceeded in two stages. First, a ‘ Simple Scrutiny’ ranked individual hypotheses by conducting pairwise ‘duels’ to identify which diagnoses were most consistently preferred across all lotteries. Second, a ‘ Complex Scrutiny’ assessed the overall quality of each complete diagnostic strategy (lottery) by comparing it against every other strategy, yielding a robust ranking of the most strategically sound reasoning approaches. This dual-level analysis reveals the structure, focus, and adaptability of reasoning for both clinicians and the AI. The detailed computational protocol, theoretical justification, and mathematical underpinnings of this method are provided in the Supplementary Appendix 2 and 3. Results Ranking of Diagnostic Hypotheses The Simple Condorcet Scrutiny revealed a clear evolution in diagnostic reasoning. Before the case management, clinicians ranked haemorrhagic shock highest, whilst the AI prioritised septic shock . After the crisis, both groups converged on septic shock as the dominant hypothesis. Tables 1.1 and 1.2 show the top four most dominant hypotheses for each group before (post-briefing) and at the very end of the simulation. View this table: View inline View popup Download powerpoint Table 1.1: Ranking of Diagnostic Hypotheses (Simple Condorcet Scrutiny) before the simulation. View this table: View inline View popup Download powerpoint Table 1.2: Ranking of Diagnostic Hypotheses (Simple Condorcet Scrutiny) after the simulation. View this table: View inline View popup Download powerpoint Table 2: Mean Strategic Performance Scores (POPU & GRANU) Ranking of Overall Diagnostic Strategies The Complex Scrutiny, evaluating each entire diagnostic lottery, showed that the clinicians’ strategies were, on average, ranked higher than the AI’s. The clinicians demonstrated a marked improvement in their strategic popularity score after the crisis (80.7 to 99.0), indicating a highly effective adaptation to the clinical data. Visualisation of Diagnostic Reasoning Structures - Chord Graphic representation To visualise the underlying structure and dynamics of the diagnostic reasoning, the pairwise dominance relationships from the Simple Condorcet Scrutiny were rendered as Chord diagrams ( Figure 1 ). These diagrams function as maps of the collective diagnostic mind of each group. In these maps, a dominant hypothesis appears as a node with numerous, thick outgoing arrows, indicating it consistently won its pairwise duels against other hypotheses. A highly contested hypothesis , in contrast, shows both significant incoming and outgoing arrows, marking it as a central but debated topic in the reasoning process. Finally, the overall structure reveals the nature of the group’s reasoning: a simple, hierarchical diagram with one clear dominant node suggests strong consensus and diagnostic convergence, whereas a complex, web-like diagram indicates a broader and less hierarchical differential. Download figure Open in new tab Figure 1: Comparative Analysis of Diagnostic Reasoning Structures via Condorcet Scrutiny. The figure presents a 2x2 grid of Chord diagrams, visualising the results of the Simple Condorcet Scrutiny for the four experimental groups. Each main arc on the circumference represents a single diagnostic hypothesis, with colours kept consistent across all four diagrams for direct comparison. The directed arrows between arcs illustrate the dominance relationship from a source (dominant) to a target (dominated) hypothesis. The thickness of each arrow is proportional to the total number of pairwise ‘duels’ won by the source hypothesis against the target hypothesis across all diagnostic lotteries within that group. The panels show: (A) Experienced Residents - Before: The baseline reasoning structure of the anaesthesiology residents prior to the simulated event. (B) AI - Before: The baseline reasoning structure of the AI based on the same initial case information. (C) Experienced Residents - After: The adapted reasoning structure of the residents following the management of the simulated event. (D) AI - After: The adapted reasoning structure of the AI after being prompted with the evolving clinical data. Discussion The fundamental nature of clinical reasoning is not a simple dichotomy between right and wrong, but a dynamic interplay of strategy and uncertainty 14 , 15 . This study reveals that human and artificial intelligences do not simply compete on accuracy but operate as distinct, complementary reasoning engines . This discovery moves the conversation beyond a simplistic human-versus-machine narrative to a more sophisticated understanding of human-AI collaboration in medicine, where the goal is not replacement, but the creation of a more robust, resilient form of augmented clinical intelligence. Unveiling Two Minds at Work: The Pragmatic Filter vs. the Exhaustive Map The core of our study lies in the dissection of two fundamentally different approaches to resolving diagnostic uncertainty. The AI’s approach is one of exhaustive, high-fidelity transmission Its initial diagnostic lottery, which assigned nearly equal plausibility scores to its top hypotheses (septic shock at 54, acute hepatic failure at 52, haemorrhagic shock at 48 and respiratory decompensation at 44), exemplifies a systematic, probabilistic mapping of the data. This lack of a strong initial hierarchy, while strategically less decisive, provides a powerful breadth reducing the initial ‘universal’ (i.e. all existing diagnoses) uncertainty to a manageable set of possibilities, thereby, acting as a robust safeguard against cognitive fixation and premature diagnostic closure. In stark contrast, the residents’ reasoning is a process of aggressive, context-sensitive filtering . Their lotteries were sharply hierarchical, with haemorrhagic shock (48) clearly dominating septic shock (38). Crucially, this focused strategy included the unanimous identification of gas embolism (22)—a context-specific risk entirely absent from the AI’s initial analysis. This finding exemplifies a classical Naturalistic Decision Making (NDM) model 15 , where experts recognize familiar patterns associated with the specific context-liver surgery, rather than analytically deducing solutions. This intuitive pattern-matching, born from embodied experience, represents a first, distinct advantage of human cognition. The most profound distinction, however, resides in the fourth diagnostic choice . While the AI proposes another specific clinical entity with a similar plausibility (Respiratory Decompensation, score 44), the residents formalize a core principle of expert cognition: the ‘Place for Doubt’ (score 5). This is not a failure to hypothesize, but rather an efficient metacognitive strategy —the explicit allocation of cognitive resources to residual uncertainty. After the crisis, even with a near-certain primary diagnosis of septic shock, the residents maintained this ‘Place for Doubt’ in their final lottery (tied for 4th rank, score 8), demonstrating that this cognitive safety net persists even as certainty increases. This concept of complementary reasoning profiles is further substantiated by recent work in trauma care 11 , 16 , which found only moderate agreement and distinct error patterns between machine learning models and clinicians, with combined performance exceeding that of either alone. An Information-Theoretic Perspective: Managing Clinical Entropy These contrasting strategies can be powerfully framed using the lens of information theory 17 . A complex clinical situation represents a system of high entropy—a measure of disorder and uncertainty. Effective clinical reasoning is a battle to reduce this entropy and find the ‘signals’ (i.e. the correct diagnoses) within a sea of noise 18 . Humans and AI employ radically different, yet complementary, strategies to win this battle. The AI’s multi-hypothesis lottery represents a state of relatively high entropy, acting as a ‘ broadband communication channel’ that faithfully transmits all potential signals. Conversely, the residents’ focused lottery represents a low-entropy state; the human expert acts as a ‘ highly selective adaptive filter’ , using the non-linearities of experience to amplify the perceived signal and suppress the noise, thus making a decision actionable. Interestingly, the ‘Place for Doubt’ is the human’s unique method for managing irreducible entropy —acknowledging the noise that always remains in a complex biological system. The Chord diagrams offer a phenomenological visualization of these entropic states . The complex, web-like structure of the AI’s ‘Before’ diagram ( Fig. 1B ), with its numerous, fine, balanced arrows, is the graphical signature of a high-entropy , non-hierarchical state. Conversely, the simpler, more radial structure of the residents’ ‘Before’ diagram ( Fig. 1A ), dominated by a few thick arrows, visually represents a low-entropy state of strong consensus and clear strategic focus . The Synthesis: A New Model for Augmented Reasoning —The Cognitive Safety Net Recognizing these distinct strategies allows us to propose a new model of collaboration: the ‘Cognitive Safety Net’ . In this model, the true value of AI is not to provide a ‘better’ answer, but to serve as an exhaustive knowledge base that prevents the primary failure mode of expert cognition: fixation bias. Its broad analysis acts as the cognitive safety net, prompting the clinician to consider possibilities outside their immediate focus. The human practitioner, however, remains the ultimate strategist and meta-cognitive supervisor . They are responsible for applying ‘contextual wisdom’ from experience-based heuristics—context, experience, and an understanding of the patient’s trajectory— to the probabilistic outputs of the AI and managing uncertainty—a process exemplified by their ‘Place for Doubt’. The clinician’s role evolves from being a repository of knowledge to being an expert arbiter of it . This supervisory capacity is critical, as a paradox of AI in medicine is that its very logical coherence can induce anchoring bias in the user. The clinician’s role is therefore to step back and maintain a global perspective. This model also clarifies the distinction between knowledge and competence . The AI provides structured information (‘knowledge’), but it cannot manage a team, perform a procedure, or handle the immense cognitive load of a real high-stakes situation (‘competence’). As Kassirer articulated, true clinical competence is not merely the possession of knowledge, but its effective application under conditions of uncertainty 14 —a process that remains fundamentally human. This transition from knowledge to competence is precisely where the clinician is most vulnerable to cognitive overload, a challenge that makes the supervisory role over an AI untenable without cognitive support. This is where efficient procedural cognitive aids become essential. In an optimal workflow, the AI assists in the diagnostic phase (‘ What is happening’ ), after which a proven, action-oriented cDCA guides the team through validated procedures (‘ How to manage it ’). By offloading the procedural burden, these tools free the clinician’s cognitive bandwidth to effectively supervise all necessary data—which include the AI’s insights— and manage the evolving clinical situation. Current evidence already indicates that tailored cDCAs can considerably decrease human errors, improve both technical and non-technical skills across a wide range of situations 3 , 5 – 9 , 13 , and double learning memorization 4 . Thus, further enhancing human capacity with this dual-support system—AI for enriching reasoning, cDCAs for contextualized action—is a pragmatic, ethical, and highly efficient path to reducing error in healthcare. Implications and Future Horizons: Towards a Risk-Weighted Analysis The ‘Cognitive Safety Net’ model suggests that AI integration should prioritize tools that broaden the diagnostic horizon and highlight not just the most likely events, but also the most critical ‘black swan’ scenarios ( ‘What if’) —those that are highly unlikely but would be catastrophic. This would elevate the AI from an analytical tool to a true partner in navigating high-stakes uncertainty. In practice, this synergy could be channelled through cDCAs, presenting context-specific workflows while embedding AI-derived insights among other contextualized data, in a usable form at the bedside. Strengths and Limitations The primary strength of this study is its novel methodology. By using a realistic scenario with deliberate ambiguity and applying the Condorcet method to ‘diagnostic lotteries’, we shifted the analytical focus from the diagnostic result to the reasoning process . Our multi-layered evaluation—ranking individual hypotheses via ‘Simple Scrutiny’ and entire strategies via ‘Complex Scrutiny’—provided a granular framework for modeling the how of clinical reasoning, not just the what . A potential limitation is the use of a generalist LLM (ChatGPT-4). However, this was a deliberate choice to compare the specialized, experience-driven reasoning of clinicians against a powerful, non-specialist form of general logical inference, revealing their foundational differences. Future research should compare these findings with medically fine-tuned models. Similarly, we chose experienced residents to focus on the structure of reasoning in a cohort with a highly current knowledge base, neutralizing the variable of knowledge obsolescence. Our research widely confirms that when equipped with our cDCAs, this population’s performance is considerably enhanced 5 – 10 , 13 , mitigating a potential performance gap with senior clinicians, who remain subject to human error in action. Another potential limitation is the sample size discrepancy between the full Anticipamax trial cohort (N=34) and the cohort included in this specific human-AI reasoning analysis (N=25). This difference is due to the nature of our work as a planned ancillary study, with a timeline constrained by a parallel master’s degree project by AA. However, we argue that this does not invalidate our conclusions for two reasons. First, a sample of 25 experienced residents is substantial for a study focused on modeling the process and structure of reasoning, rather than achieving statistical power for a clinical outcome. Second, the reasoning profiles we identified—the AI’s exhaustive mapping versus the residents’ pragmatic filtering—emerged with remarkable consistency and clarity within this cohort. It is therefore unlikely that the inclusion of the final nine participants would have fundamentally altered these distinct strategic signatures. Conclusion This work reveals that AI is not poised to replace the clinical mind , but to augment it . While the focused, adaptive strategy of clinicians remains superior in a dynamic, high-stakes situation, AI provides a powerful, broad-based anticipatory analysis that complements human reasoning. By integrating AI as a ‘Cognitive Safety Net’ within a human-centred workflow, we can leverage the strengths of both intelligences to create a more robust and safer system of care. Ensuring future generations of AI are designed with this synergistic, human-in-the-loop principle is not only the best choice for quality of care, but also a profound ethical imperative. Data Availability All data produced in the present study are available upon reasonable request to the authors Supplementary Information accompanies this paper Supplementary Appendix 1 . Case Scenario Briefing. Supplementary Appendix 2 . Methodological Framework and Rationale. Supplementary Appendix 3 . Detailed Analytical Protocol – Condorcet scores. Supplementary Appendix 1: Case Scenario Briefing Patient: Mrs. M., 51-year-old female. Procedure: Right hepatectomy via laparotomy. Clinical Background: You are the anaesthetist taking over the care of Mrs. M. The patient was recently diagnosed with colonic adenocarcinoma with two synchronous hepatic metastases. Given her age and otherwise limited comorbidities, the multidisciplinary team (MDT) meeting approved a surgical approach with curative intent. Consequently, one month ago, she underwent a right colectomy plus a radiofrequency ablation (RFA) of the hepatic dome lesion. The postoperative course was complicated by haemorrhagic shock from the RFA site, requiring embolization of the right hepatic artery. This subsequently led to purulent necrosis of the right lobe of the liver. Following the failure of both radiological drainage and empirical antibiotic therapy (cefotaxime and metronidazole), and in the context of persistent, low-grade sepsis, an indication for today’s right hepatectomy was confirmed. Past Medical History Medical Syncope secondary to second-degree atrioventricular block (AVBII) in 2022, leading to the insertion of a pacemaker (last checked in 2023). Familial osteoporosis. Post-smoking chronic obstructive pulmonary disease (COPD), with significant winter wheezing. Surgical Appendicular peritonitis at age 38. Femoral fracture from a fall (secondary to syncope) in 2022. Received a blood transfusion in 2022. Social & Allergies: 30-pack-year smoking history; quit 2 years ago following the diagnosis of her heart block. No significant alcohol intake. No known drug allergies (NKDA). Supplementary Appendix 2: Methodological Framework and Rationale 1. The Rationale for a Condorcet-Based Framework: A Practical Example To illustrate the robustness of the ‘Condorcet method’ compared to ‘simpler voting systems’, consider this clinical example. After a patient evaluation, three main plausible diagnoses arise (say A, B and C), and five clinicians are asked to provide their own ‘diagnostic lotteries’ ( Table A1 ): View this table: View inline View popup Download powerpoint Table A1: Example Diagnostic Lotteries from Five Clinicians The Failure of Simple Metrics A simple analysis based on total plausibility score would declare Diagnosis A the winner (Total Score: 190). Similarly, a plurality vote based on first preferences results in a tie between Diagnosis A and C (2 votes each), failing to identify a clear winner and ignoring the nuances of the other preferences. Both results are heavily skewed by the strong conviction of a minority (Clinicians 1 & 2). The Simple Condorcet Scrutiny: A Cyclical Paradox When we analyse the pairwise ‘duels’ based on how many clinicians prefer one option over another, a paradox emerges: Duel A vs. B : Clinicians 1, 2, 4, and 5 prefer A to B. Clinician 3 prefers B to A. - > A wins 4-1 . Duel B vs. C : Clinicians 1, 2, and 3 prefer B to C. Clinicians 4 and 5 prefer C to B. -> B wins 3-2 . Duel C vs. A : Clinicians 3, 4, and 5 prefer C to A. Clinicians 1 and 2 prefer A to C. -> C wins 3-2 . This creates a cycle of preferences ( A > B > C > A ), known as a Condorcet Paradox . There is no single winning diagnosis that can defeat all others, leaving the result undecided. The Complex Scrutiny: Identifying the Winning Strategy This is where the power of comparing entire lotteries becomes evident. When we run the Complex Scrutiny (POPU and GRANU scores) on the five lotteries: View this table: View inline View popup Download powerpoint Table A2: Complex Scrutiny Results for the Example Lotteries The winning strategy is clearly that of Clinicians 1 and 2 . Their lottery—{ A: 60, B: 30, C: 10 }—is identified as the most robust because it represents the best strategic compromise: it shows strong conviction for the most likely diagnosis (A) while still assigning a reasonable plausibility to a key differential (B), making it the most resilient option in pairwise comparisons against more polarised strategies. This example demonstrates how our method moves beyond simple votes to identify the most coherent and strategically sound reasoning process. 2. Theoretical Foundation: Randomized Condorcet Voting and the Lottery Space The analytical framework of this study is grounded in the principles of Randomized Condorcet Voting , a mathematically elegant solution to the paradoxes inherent in collective decision-making. The fundamental problem of standard voting systems, including the simple Condorcet method, is the potential for intransitivity of collective preferences . As shown in the example above, it is possible for a group to prefer A over B, B over C, yet prefer C over A, resulting in a cycle with no clear winner. This paradox has been a central challenge in social choice theory for centuries. The theoretical innovation, formalised in recent mathematical literature, is to resolve this paradox by changing the ‘solution space’. Instead of seeking a winner from the set of discrete candidates (our diagnoses), the method seeks a winner from the infinite set of lotteries . A lottery is a probability distribution over the set of candidates. In our study, each participant’s diagnostic list, with its assigned plausibilities, is a lottery. To apply the Condorcet principle to this new space, a method for comparing two lotteries, P and Q, is required. The preference of the electorate for P over Q is determined by the probability that a candidate randomly drawn from lottery P is preferred by a majority to a candidate randomly drawn from lottery Q. This elevates the comparison from the level of single options to the level of overall strategies. The central theorem underpinning this method is profound: under general conditions, there is always a unique Condorcet-winning lottery , even when a winning candidate does not exist. The paradox vanishes. By moving to a probabilistic solution space, the existence and uniqueness of a ‘best choice’ are guaranteed. This is the justification for our two-level analysis. The Simple Scrutiny seeks the winning diagnosis but may fail due to paradoxes. The Complex Scrutiny operates on the higher level of lotteries, providing a more stable and robust assessment that identifies the superior overall reasoning strategy. 3. Connecting Theory to Clinical Practice: A Bayesian and Decision-Theoretic Framework The application of this mathematical framework 19 to clinical reasoning 20 is justified by two core principles: 3.1 The Bayesian Clinician: Clinical reasoning is an inherently Bayesian process A clinician does not operate in a world of certainties, but of probabilities. The plausibility scores assigned in a ‘diagnostic lottery’ can be seen as subjective probabilities—Bayesian priors—that are continuously updated as new information becomes available (a change in vital signs, a lab result). A clinician’s expertise lies in their ability to effectively update these probabilities in real time. Our plausibility-weighted Condorcet method is therefore a formal means of aggregating these evolving, probabilistic beliefs to find a robust group consensus. 3.2. Decision-Making as a Bet As decision scientist and former professional poker player Annie Duke argues 21 , expert decision-making under uncertainty is not about finding the ‘ truth ’, but about making the ‘ best possible bet ’ given incomplete information. Seeking a single winning diagnosis is akin to going ‘ all-in ’ on a single hand. It is a brittle strategy that ignores the spectrum of possibilities. Reasoning in ‘lotteries,’ by contrast, is analogous to managing a portfolio of possibilities. The clinician is not betting on one outcome but is distributing their ‘ capital of belief ” across a range of potential futures. Our Complex Scrutiny, therefore, does not reward the best guess; it rewards the best betting strategy . It identifies the reasoning process that is the most robust, balanced, and likely to succeed across the full range of potential outcomes. This aligns perfectly with the reality of high-stakes clinical work, where the goal is not to be right once, but to be consistently resilient in the face of uncertainty. Supplementary Appendix 3: Detailed Analytical Protocol – Condorcet scores This document details the step-by-step computational protocol used to analyse the diagnostic lotteries and generate the Condorcet-based rankings. Condorcet scores were originally designed by Nicolas de Condorcet (1743-1794, scientist, mathematician, philosopher, politician) to identify the candidate who best reflects the collective preference of an electorate, i.e. the one who would defeat every other rival in a head-to-head contest 12 . 1. Data Standardisation The initial step consisted of standardising all diagnostic terms to ensure semantic consistency. Raw diagnostic lotteries from all participants (human and AI) were compiled. Each diagnostic term was compared against a master reference file of approved standardised terms. If a term did not have an exact match, it was compared against a second file containing known variants. If a match was found, the term was replaced by its corresponding standardised term. Any remaining non-standardised terms were flagged for manual review and categorisation by an experienced clinician (JCC). A final verification of the standardisation for each term was automatically performed by a purpose-configured GPT (ChatGPT4, OpenAI, California). 2. Simple Condorcet Scrutiny: Hypothesis Ranking This analysis aimed to rank individual diagnostic hypotheses based on their dominance. Creation of the Normalised Plausibility Matrix ([PN]) : A matrix was constructed where each row i represents a participant’s lottery and each column j represents a standardised hypothesis. The cell PN(i,j) contains the plausibility score for that hypothesis in that lottery (or 0 if absent). Creation of the Pairwise Wins Matrix ([ARROWS]) : An N x N matrix (where N is the number of unique hypotheses) was created. For each lottery, a ‘duel’ was conducted between every pair of hypotheses (j, k) . If PN(i,j) > PN(i,k) , the cell ARROWS(j,k) was incremented by 1. The final matrix thus contains the total number of times each hypothesis ‘won’ a direct comparison against another in terms of quoted plausibility. In the spirit of the Condorcet score, this is equivalent to counting the number of ‘arrows’ originating from a ‘dominant’ hypothesis and pointing towards ‘dominated’ hypotheses. Calculation of Final Hypothesis Ranking : The total Condorcet score for each hypothesis j was calculated by summing all values in its corresponding row in the [ARROWS] matrix. This final score represents its overall dominance, and hypotheses were ranked accordingly. 3. Complex Scrutiny: Strategy Ranking This analysis aimed to rank the overall quality of each complete diagnostic strategy (lottery). Creation of the Granularity Matrix ([GRANU]) : Objective: To capture the ‘granularity’ of comparisons between lotteries by counting the number of hypotheses for which one lottery dominates another. An M x M matrix (where M is the number of lotteries) was created. For each pair of lotteries (i, l) , the cell GRANU(i,l) was populated with the number of hypotheses for which lottery i had a strictly greater plausibility score than lottery l . This measures the ‘depth’ of dominance. Creation of the Popularity Matrix ([POPU]) : Objective: To simplify the ordering relationship into a binary (win/loss) outcome between lotteries, without regard to the granularity of the comparisons. An M x M matrix was created from the [GRANU] matrix. The cell POPU(i,l) was set to 1 if GRANU(i,l) > 0 , and 0 otherwise. This represents a simple binary ‘win/loss’ outcome for each lottery duel. Calculation of Final Strategy Ranking : The final Popularity Score for each lottery was the sum of its row in the [POPU] matrix. The final Granularity Score was the sum of its row in the [GRANU] matrix. These scores were then used to calculate the average strategic performance of each of the four experimental groups. 4. Graphical Visualisation Chord Diagram Preparation : The results of the scrutinies are formatted into a standardised JavaScript matrix. Each object in the matrix represents an ordering relationship as a connection between a ‘source’ and a ‘target’ object (e.g., ‘hypotheses’ for the simple Condorcet scrutiny, or ‘lotteries’ for the complex scrutinies), with an associated ‘value’. In each Chord diagram, an arrow with a thickness corresponding to its ‘value’ is drawn from the ‘source’ to the ‘target’. The matrix is then inserted into an HTML template file to generate a Chord diagram for each subgroup. Footnotes Competing interests: JCC is the president of a university-affiliated start-up (MEDAE) developing the cDCA used in this research. During the study, AA received salary support from MEDAE for a master’s degree internship. The other authors declare no competing interests. Funding: No specific funding was received for this work. References v2 1. ↵ Makary , M. A. & Daniel , M. Medical error—the third leading cause of death in the US . BMJ 353 , i2139 ( 2016 ). OpenUrl FREE Full Text 2. ↵ To Err Is Human: Building a Safer Health System . ( National Academies Press , Washington, D.C ., 2000 ). doi: 10.17226/9728 . OpenUrl CrossRef 3. ↵ Marshall , S. D. Lost in translation? Comparing the effectiveness of electronic-based and paper-based cognitive aids . Br. J. Anaesth . 119 , 869 – 871 ( 2017 ). OpenUrl PubMed 4. ↵ Paraschiv , A.-P. et al. Use of a Digital Cognitive Aid Improves Memorization of Military Caregivers After High-Fidelity Simulations of Combat Casualty Care . Mil. Med . 188 , e295 – e300 ( 2023 ). OpenUrl PubMed 5. ↵ Lelaidier , R. et al. Use of a hand-held digital cognitive aid in simulated crises: the MAX randomized controlled trial . Br. J. Anaesth . 119 , 1015 – 1021 ( 2017 ). OpenUrl PubMed 6. Truchot , M. et al. Use of a Digital Cognitive Aid in the Early Management of Simulated War Wounds in a Combat Environment, a Randomized Trial . Mil. Med . 185 , e1077 – e1082 ( 2020 ). OpenUrl PubMed 7. Paraschiv , A.-P. et al. Impact of a Digital Cognitive Aid on the Performance of Military Healthcare Teams During Critical Care Management in a Warfront Injury Situation: A Simulation Randomized Controlled Study . Simul. Healthc. J. Soc. Simul. Healthc . 17 , 163 – 169 ( 2022 ). OpenUrl 8. Benguigui , L. et al. A Customizable Digital Cognitive Aid for Neonatal Resuscitation: A Simulation-Based Randomized Controlled Trial . Simul. Healthc. J. Soc. Simul. Healthc . 19 , 302 – 308 ( 2024 ). OpenUrl 9. ↵ Donzé , P. et al. ‘Read-and-do’ response to a digital cognitive aid in simulated cardiac arrest: the Medical Assistance eXpert 2 randomised controlled trial . Br. J. Anaesth . 123 , e160 – e163 ( 2019 ). OpenUrl PubMed 10. ↵ Cejka et al. Customisable digital cognitive aids cut total error by 75% defining a threshold for irreducible human error in clinical care: a pooled analysis of five randomised trials . Preprint at https://submit.medrxiv.org/submission/download?msid=MEDRXIV/2025/337246&roleName=author&filetype=source ( 2025 ). 11. ↵ Gauss , T. et al. Comparison of machine learning and human prediction to identify trauma patients in need of hemorrhage control resuscitation (ShockMatrix study): a prospective observational study . Lancet Reg. Health - Eur . 55 , 101340 ( 2025 ). OpenUrl PubMed 12. ↵ Condorcet , J. A. N. de C. de . Essai sur l’application de l’analyse à la probabilité des décisions rendues à la pluralité des voix. (de l’Imprimerie royale , 1785 ). doi: 10.3931/E-RARA-3791 . OpenUrl CrossRef 13. ↵ Henkeme et al. Fostering Adaptive Safety Behaviours through Proactive Anticipation: A randomised trial of a digital cognitive aid in high-stakes clinical situations. In preparation . 14. ↵ Kassirer , J. P. Diagnostic Reasoning . Ann. Intern. Med . 110 , 893 – 900 ( 1989 ). OpenUrl CrossRef PubMed Web of Science 15. ↵ Klein , G. A. Sources of Power: How People Make Decisions . ( MIT Press , Cambridge, Mass , 1998 ). 16. ↵ Leone , M. , Martin-Loeches , I. & Pirracchio , R. Humans versus machine learning models: improving the outcome of trauma patients . Lancet Reg. Health - Eur . 55 , 101356 ( 2025 ). OpenUrl PubMed 17. ↵ Shannon , C. E. A Mathematical Theory of Communication . Bell Syst. Tech. J . 27 , 379 – 423 ( 1948 ). OpenUrl CrossRef 18. ↵ Krause Paul . Information Theory and Medical Decision Making. in Studies in Health Technology and Informatics (IOS Press , 2019 ). doi: 10.3233/SHTI190108 . OpenUrl CrossRef 19. ↵ Jaynes , E. T. Probability Theory: The Logic of Science . ( Cambridge university press, Cambridge , 2003 ). 20. ↵ Pauker , S. G. & Kassirer , J. P. Decision Analysis . N. Engl. J. Med . 316 , 250 – 258 ( 1987 ). OpenUrl CrossRef PubMed Web of Science 21. ↵ Duke , A. Thinking in Bets: Making Smarter Decisions When You Don’t Have All the Facts . ( Portfolio/Penguin, New York , 2018 ). View the discussion thread. Back to top Previous Next Posted October 07, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following The Cognitive Safety Net: Comparing Human and AI Diagnostic Reasoning during Complex Clinical Situations Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share The Cognitive Safety Net: Comparing Human and AI Diagnostic Reasoning during Complex Clinical Situations Antonin Audion , Mathieu Henkeme , Baptiste Balanca , Marc Lilot , Thomas Rimmelé , Ismail Abaakil , Jean-Christophe Cejka medRxiv 2025.10.06.25335641; doi: https://doi.org/10.1101/2025.10.06.25335641 Share This Article: Copy Citation Tools The Cognitive Safety Net: Comparing Human and AI Diagnostic Reasoning during Complex Clinical Situations Antonin Audion , Mathieu Henkeme , Baptiste Balanca , Marc Lilot , Thomas Rimmelé , Ismail Abaakil , Jean-Christophe Cejka medRxiv 2025.10.06.25335641; doi: https://doi.org/10.1101/2025.10.06.25335641 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Systems and Quality Improvement Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffb5577fbeef047',t:'MTc3OTQ0NzgwMQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00