Validation of Synthesa AI, a Large Language Model-Based Screening Tool for Systematic Reviews: Results from Nine Studies

doi:10.1101/2025.07.16.25331632

Validation of Synthesa AI, a Large Language Model-Based Screening Tool for Systematic Reviews: Results from Nine Studies

2025 · doi:10.1101/2025.07.16.25331632

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 51,079 characters · extracted from preprint-html · click to expand

Validation of Synthesa AI, a Large Language Model-Based Screening Tool for Systematic Reviews: Results from Nine Studies | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Validation of Synthesa AI, a Large Language Model-Based Screening Tool for Systematic Reviews: Results from Nine Studies Lefteris Teperikidis PharmD , Christos Trampoukis , Kyiakos Polymenakos doi: https://doi.org/10.1101/2025.07.16.25331632 Lefteris Teperikidis PharmD 1 Synthesa, Inc. , 19 West 24th St., New York, NY 10010, USA 2 Clinical Research Unit, Special Unit for Biomedical Research and Education (SUBRE), School of Medicine, Aristotle University of Thessaloniki , Thessaloniki, Greece 3 Third Department of Cardiology, Ippokratio General Hospital, Aristotle University of Thessaloniki , Thessaloniki, Greece Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: lefteris{at}synthesa.ai Christos Trampoukis 1 Synthesa, Inc. , 19 West 24th St., New York, NY 10010, USA MSc Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kyiakos Polymenakos 1 Synthesa, Inc. , 19 West 24th St., New York, NY 10010, USA PhD Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Systematic review screening is often burdensome, prone to human error, and requires significant manual effort. Synthesa AI, a large language model (LLM)-based tool, was developed to address these challenges by offering a transparent and prompt-driven approach to abstract screening. In this validation study, Synthesa AI was evaluated across 17 benchmark meta-analyses encompassing nine clinical domains. Using user-defined PICOS criteria, the tool screened a total of 270,626 abstracts retrieved from PubMed and Scopus. Synthesa AI accurately identified all 163 benchmark-included studies, yielding a sensitivity of 100% and a pooled specificity of 99.4%. Remarkably, it reduced reviewer workload by 91.7%, flagging only 1,797 abstracts for manual review. Furthermore, the tool identified 32 relevant studies that had been missed in the original reviews, representing a 19.6% increase in evidence yield. These findings demonstrate that Synthesa AI delivers high precision, efficiency, and reproducibility in systematic review workflows. Its auditable and deterministic architecture adheres to Good Machine Learning Practice (GMLP) guidelines, making it suitable for both academic and regulatory applications. Synthesa AI represents a promising solution for living systematic reviews and large-scale evidence synthesis initiatives, offering a transformative alternative to traditional human-led screening. Introduction Systematic reviews and meta-analyses constitute the methodological cornerstone of evidence-based medicine, serving as essential instruments for aggregating clinical evidence, informing practice guidelines, and underpinning regulatory and policy decisions ( 1 – 3 ). Their value in synthesizing data from randomized controlled trials and observational studies is well-established across practically every domain in Medicine. However, the initial stages of conducting a systematic review—particularly the screening of titles and abstracts—remain notoriously inefficient. These phases are not only labor-intensive and time-consuming but also susceptible to inter-reviewer variability, leading to inconsistencies in the selection of eligible studies ( 4 – 6 ). The limitations of current human-centric workflows are especially pronounced in high-throughput or rapidly evolving research landscapes, where evidence accumulates at a pace that outstrips the practical capacities of human teams ( 7 – 9 ). The growing volume of biomedical literature has created a methodological bottleneck that threatens the timeliness, reproducibility, and scalability of evidence synthesis ( 10 , 11 ). In fields such as cardiovascular medicine, obesity, and diabetes—where systematic reviews may necessitate the appraisal of thousands of citations—screening processes often consume disproportionate resources while risking the inadvertent exclusion of relevant studies. Conventional machine learning approaches to semi-automated screening have demonstrated incremental improvements but are constrained by rigid training pipelines and limited generalizability across clinical contexts ( 5 , 12 , 13 ). The recent advent of large language models (LLMs) introduces a transformative opportunity to reconfigure the screening paradigm. These models, grounded in transformer architectures and pretrained on vast corpora of biomedical and general-domain text, have demonstrated remarkable aptitude for natural language understanding, contextual reasoning, and decision support—attributes that are well-aligned with the interpretive demands of systematic review screening tasks ( 14 , 15 ). Synthesa AI is a novel abstract screening platform that operationalizes these capabilities within a reproducible and customizable framework for evidence synthesis. Built upon state-of-the-art LLMs, Synthesa AI emulates the inferential logic applied by human reviewers by evaluating biomedical abstracts against user-defined inclusion criteria structured according to the PICOS framework (Population, Intervention, Comparator, Outcome, and Study Design). The system is designed to produce transparent, auditable, and reproducible decisions, flagging abstracts as “Included,” “Excluded,” or “Potentially Relevant,” with accompanying justifications. Importantly, the tool does not rely on pre-labeled training data and instead uses deterministic prompts to drive decision-making, ensuring consistent outputs across screening runs for a given input and prompt configuration. The present report introduces Synthesa AI to the academic and regulatory communities by providing results from nine independent validation studies conducted across a diverse array of clinical domains. These studies were conceived to evaluate the tool’s operating characteristics—specifically sensitivity, specificity, speed, reproducibility, and scalability—relative to human review benchmarks. In each validation, Synthesa AI was assessed for its ability to accurately identify all studies included in published systematic reviews or meta-analyses, while also evaluating its capacity to uncover additional relevant studies that had been overlooked by prior human screening efforts. The validation design also considered regulatory acceptability, aligning the development of Synthesa AI with emerging frameworks such as the U.S. Food and Drug Administration’s Good Machine Learning Practice (GMLP) principles. Methods Study Overview This study evaluated the performance of Synthesa AI, a large language model-based abstract screening tool, across nine independent validation exercises. Each validation study was designed to assess the tool’s sensitivity, specificity, and overall screening efficiency in identifying eligible studies for systematic reviews. The validation paradigm was constructed to simulate real-world evidence synthesis workflows and to test the model’s generalizability across a wide range of clinical questions, therapeutic domains, and study populations. Eight of the nine validation studies were retrospective in design and benchmarked against published meta-analyses from high-impact, peer-reviewed journals, including one Cochrane systematic review. These benchmark reviews were selected on the basis of methodological rigor, publication recency, and relevance to high-throughput areas of clinical research, including cardiovascular medicine, critical care, psychiatry, respiratory diseases, infectious diseases, and immunization. The remaining validation study—the ninth—combined evidence from several previously published systematic reviews with a prospective, double-screened, blinded human adjudication process, enabling a hybrid benchmark that incorporates both historical and real-time human gold standards. Synthesa AI Architecture and Screening Protocol Synthesa AI is a deterministic LLM-powered tool engineered for binary relevance classification of biomedical abstracts based on a structured PICOS (Population, Intervention, Comparator, Outcome, Study Design) framework. For each validation study, the user (i.e., the analyst team) provided a PICOS prompt derived from the eligibility criteria of the original meta-analysis. Once provided with the user-defined PICOS, the tool applies a two-stage screening logic: an initial filter to exclude studies based on publication type and study design (e.g., case reports, editorials, or irrelevant study types), followed by PICO element extraction and comparison against user-defined criteria. Each abstract is processed independently and classified as “Included,” “Excluded,” or “Potentially Relevant.” These classifications are supplemented with accompanying rationales and flagged terms to facilitate downstream auditability and human review. Synthesa AI operates in a stateless fashion, ensuring reproducibility of outputs for a given set of inputs and prompts. Validation Workflow The benchmark studies were used to calibrate the tool prior to full-scale screening. Calibration involved the extraction of all abstracts from studies included in each benchmark meta-analysis. These abstracts were run through Synthesa AI using preliminary PICOS prompts. In cases where the tool failed to identify one or more of the included studies, the prompt was iteratively refined until all included studies were successfully flagged as “Included” or “Potentially Relevant.” This calibration process was essential to ensure that the tool was sufficiently sensitive to detect all known eligible studies, and it mirrors the pragmatic tuning step employed in actual systematic review workflows. Following calibration, a new, broader literature search was conducted for each clinical topic using PubMed and Scopus, compared to that used in the benchmark reports. The goal was to execute a more inclusive and sensitivity-maximizing search strategy than that employed in the original meta-analyses. Synthesa AI was then used to screen the entire corpus of retrieved abstracts. In all cases, flagged abstracts underwent blinded human review to adjudicate final inclusion status. Human Adjudication and Gold Standard Comparison For the eight benchmark-driven validation studies, the reference standard was the list of studies included in the published meta-analyses. Sensitivity was defined as the proportion of these benchmark-included studies that were correctly flagged as “Included” or “Potentially Relevant” by Synthesa AI. Specificity was calculated as the proportion of excluded abstracts that were correctly classified as “Excluded.” The ninth validation study used a hybrid gold standard. In this case, all flagged abstracts were subjected to prospective double human review, with discrepancies resolved by consensus. This allowed a direct comparison between Synthesa AI’s screening decisions and those of blinded human reviewers. The inclusion of this prospective adjudication process allowed us to evaluate the tool’s performance in a setting without reliance on prior publications, thereby addressing potential biases introduced by calibration to known included studies. Data Synthesis and Statistical Analysis Across all nine studies, pooled sensitivity and specificity estimates were calculated. Descriptive statistics were used to quantify the number of abstracts flagged, the number of relevant studies identified, and the proportion of newly identified studies not captured by benchmark reports. Screening burden reduction was estimated as the percentage decrease in the number of abstracts requiring human review, relative to full manual screening. Results Study 1: STEMI and Multivessel Percutaneous Coronary Intervention (PCI) Strategies The first validation study focused on the identification of RCTs comparing revascularization strategies in patients with ST-elevation myocardial infarction (STEMI) and multivessel coronary artery disease. We included a prospective double human screening, while the benchmark comprised six published meta-analyses, including a total of 24 unique included studies ( 16 – 21 ). Synthesa AI and the two human reviewers screened a total of 5,460 abstracts. The tool flagged 188 abstracts for human review. Within this flagged subset, all 24 benchmark-included RCTs were correctly classified as relevant, yielding a sensitivity of 100%. Moreover, Synthesa AI identified two additional eligible RCTs that had not been captured by the benchmark reviews—one that had been published after the benchmarks’ respective search windows, and one that had been published prior ( 22 , 23 ). These two studies were also flagged as relevant by both human reviewers. Ultimately, of the 188 flagged citations, 162 were ultimately judged not to meet inclusion criteria, resulting in a specificity of 97.2%. Study 2: Inhaled Reliever Therapies for Asthma The second validation study evaluated Synthesa AI in the context of reliever inhaler therapies for asthma. The benchmark for this analysis was the network meta-analysis conducted by Rayner et al. ( 24 ), which had included 23 RCTs indexed in PubMed. In contrast to the benchmark study’s screening of 3,179 abstracts, a broader PubMed query conducted for this validation yielded 30,719 abstracts. Synthesa AI screened the entire corpus and flagged 247 abstracts for manual review. The tool successfully identified all 23 PubMed-indexed RCTs reported in the Rayner et al. meta-analysis, achieving a sensitivity of 100%. In addition, Synthesa AI identified 15 further eligible RCTs that were not included in the benchmark analysis ( 25 – 39 ), expanding the total number of included studies to 38 and increasing the overall sample size from 45,117 to 64,036 participants. Of the 247 flagged citations, 209 were adjudicated as false positives, yielding a specificity of 99.2%. Study 3: Antidepressants for Irritable Bowel Syndrome (IBS) The third validation study aimed to replicate and extend the findings of a recent meta-analysis conducted by Temido et al. ( 40 ), which evaluated the efficacy of antidepressants in the treatment of irritable bowel syndrome (IBS). A literature search using both PubMed and Scopus yielded a total of 28,645 abstracts. Synthesa AI screened this full dataset and flagged 86 abstracts for human adjudication. The tool successfully identified all 20 RCTs included in the benchmark meta-analysis, resulting in a sensitivity of 100%. Furthermore, it identified six additional RCTs that reported binary outcomes suitable for meta-analysis but had not been included in the benchmark publication ( 41 – 45 ). An additional two RCTs were also detected that, while relevant, did not contain extractable outcome data compatible with quantitative synthesis ( 46 , 47 ). These findings brought the total number of eligible studies to 28. Of the 86 abstracts flagged by Synthesa AI, 58 were ultimately excluded, resulting in a specificity of 99.38%. Study 4: Antiplatelet versus Anticoagulation Therapy in Heart Failure with Sinus Rhythm The fourth validation study evaluated Synthesa AI’s ability to replicate the findings of a Cochrane systematic review that compared the efficacy and safety of antiplatelet agents versus anticoagulants in patients with heart failure who maintained sinus rhythm ( 48 ). A PubMed search yielded a total of 48,250 abstracts, which were then screened by Synthesa AI. The tool flagged 103 abstracts for human review and correctly identified all four RCTs included in the benchmark Cochrane review, resulting in a sensitivity of 100%. No additional eligible studies were identified. Of the 103 flagged abstracts, 99 were false positives, corresponding to a specificity of 99.8%. Study 5: APOC3 Antisense Oligonucleotides in Hypertriglyceridemia The fifth validation study assessed APOC3-targeting antisense oligonucleotides for the treatment of hypertriglyceridemia. Four recently published meta-analyses served as benchmarks for this evaluation ( 49 – 52 ). A PubMed search yielded a total of 1,226 abstracts. Synthesa AI screened this corpus and flagged 37 abstracts for human review. Among these, the tool correctly identified all 10 RCTs previously included in the benchmark meta-analyses, corresponding to a sensitivity of 100%. Additionally, the tool identified one further eligible RCT that was published after the search cutoffs of the reference meta-analyses and thus not included in their final datasets ( 53 ). Of the 37 flagged abstracts, 24 were ultimately classified as false positives, resulting in a specificity of 98%. Study 6: Dexmedetomidine and Postoperative Delirium in Cardiac Surgery The sixth validation study assessed Synthesa AI’s performance in replicating a meta-analysis by Hunt et al., which investigated the impact of perioperative dexmedetomidine administration on postoperative delirium in patients undergoing cardiac surgery ( 54 ). A combined search of PubMed and Scopus yielded 23,667 abstracts. Synthesa AI processed the entire corpus and flagged 235 abstracts for further review. All 12 benchmark RCTs were correctly identified by the tool, achieving a sensitivity of 100%. Importantly, no additional eligible studies were identified beyond those already included in the benchmark meta-analysis. Of the 235 flagged abstracts, 223 were classified as false positives, resulting in a specificity of 99.06%. Study 7: Coadministration of Pneumococcal Vaccines with Influenza or SARS-CoV-2 Vaccines The seventh validation study examined the safety and immunogenicity of coadministration strategies involving pneumococcal vaccines with either influenza or SARS-CoV-2 vaccines. The benchmark reference was a meta-analysis by Rahimi et al. ( 55 ), which included 17 relevant studies. A highly sensitive PubMed search was conducted for this study, yielding a total of 69,207 abstracts. Synthesa AI screened the entire corpus and flagged 726 abstracts for manual review. All 17 benchmark studies were successfully identified by the tool , corresponding to a 100% sensitivity. Additionally, the tool identified two eligible studies published after the cutoff date of the reference meta-analysis ( 56 , 57 ), as well as one relevant study that had been published earlier but was not included in the benchmark dataset ( 58 ). These findings increased the total number of relevant studies to 20. Of the 726 flagged abstracts, 703 were adjudicated as false positives, yielding a specificity of 99%. Study 8: Nebulized Antibiotics for Prevention of Ventilator-Associated Pneumonia (VAP) The eighth validation study assessed Synthesa AI’s performance in replicating the results of a meta-analysis that examined the efficacy of nebulized antibiotics for preventing ventilator-associated pneumonia in critically ill patients ( 59 ). A total of 19,303 abstracts were retrieved from PubMed using a broad search strategy. Synthesa AI flagged 31 abstracts for manual adjudication. The tool correctly identified all four benchmark studies, achieving a sensitivity of 100%. No additional eligible studies were identified beyond those previously reported in the reference meta-analysis. Of the 31 flagged abstracts, 27 were deemed false positives, resulting in a specificity of 99.9%. Study 9: Vitamin D Supplementation for Prevention of Acute Respiratory Infections The ninth validation study addressed the use of vitamin D supplementation to prevent acute respiratory infections, using a recently published meta-analysis by Jolliffe et al. as the benchmark reference ( 60 ). A total of 29,307 abstracts were retrieved via PubMed. Synthesa AI screened the entire dataset and flagged 269 abstracts for adjudication. All 46 benchmark studies were correctly identified by the tool, resulting in a sensitivity of 100%. In addition, Synthesa AI identified three additional RCTs that met inclusion criteria ( 61 – 63 ). These findings increased the total number of relevant studies to 49. Of the 269 flagged abstracts, 223 were ultimately excluded, resulting in a specificity of 99.3%. Cross-Study Performance Metrics Across the nine validation studies conducted, Synthesa AI screened a total of 270,626 abstracts, a screening volume that represents a 1,380% increase over the 19,595 abstracts collectively reviewed in the 17 published benchmark meta-analyses used as reference standards. Despite the vast increase in scale, the tool flagged only 1,797 abstracts for manual review—representing a 91.7% reduction in screening burden— while maintaining perfect sensitivity (100%) across all studies. That is, Synthesa AI successfully identified every study included in the respective benchmark analyses, regardless of clinical domain, study design heterogeneity, or corpus size ( Table 1 ). View this table: View inline View popup Download powerpoint Table 1: Summary of Synthesa AI’s performance metrics Specificity across all validation exercises remained consistently high. The pooled specificity, defined as the proportion of correctly excluded abstracts among all those not included in benchmark reviews, was calculated at 99.4% ( Figure 1 ). This estimate was derived from individual study-specific specificity rates, which ranged from 97.2% to 99.9%, and reflects the tool’s ability to minimize false positive rates even in extremely large and diverse literature corpora. Importantly, Synthesa AI identified a total of 32 relevant studies that were not included in the original benchmark publications. These additional studies were identified within the same pool of 1,797 abstracts flagged for human review, resulting in a 19.6% increase in relevant study yield relative to the 163 benchmark-included trials. Download figure Open in new tab Figure 1: Synthesa AI’s pooled specificity across the nine validation studies. Discussion This validation study provides compelling evidence that Synthesa AI, an LLM-based abstract screening tool, achieves performance metrics that meet or exceed those of traditional human screening in systematic reviews. Across nine independent validation exercises, Synthesa AI achieved perfect sensitivity (100%) in identifying all relevant studies included in benchmark meta-analyses, while maintaining a pooled specificity of 99.4%. These findings demonstrate not only the tool’s capacity to emulate expert-level screening behavior but also its operational feasibility for large-scale deployment in evidence synthesis workflows. A particularly salient finding of this evaluation is Synthesa AI’s capacity to substantially reduce the manual workload associated with abstract screening. Across a cumulative corpus of over 270,000 abstracts, the tool flagged only 1,797 abstracts for human review, corresponding to a 91.7% reduction in reviewer burden compared to full manual screening. Importantly, this screening efficiency did not come at the cost of reduced sensitivity or precision. On the contrary, the tool identified 32 additional relevant studies beyond those included in the 17 benchmark meta-analyses, a 19.6% increase in yield. The clinical and methodological implications of these findings are substantial. First, the study indirectly quantifies the limitations of human-led abstract screening, which is inherently constrained by fatigue, inter-reviewer variability, and practical resource limitations. The observation that approximately one in five relevant studies were absent from benchmark reviews—despite their adherence to stated inclusion criteria—raises important concerns regarding the completeness and reproducibility of traditional systematic reviews. Synthesa AI, in contrast, demonstrated perfect reproducibility across all validation runs; its deterministic prompt architecture ensures that identical inputs yield identical outputs, a property not shared by either human reviewers or probabilistic machine learning classifiers. Second, the findings support the viability of integrating LLM-based screening tools into regulatory and academic settings where methodological rigor, auditability, and transparency are non-negotiable. Synthesa AI’s architecture enables full traceability of every inclusion and exclusion decision, with rationales and flagged terms available for documentation and adjudication. This feature aligns well with emerging frameworks such as the U.S. Food and Drug Administration’s Good Machine Learning Practice (GMLP) guidelines, which emphasize transparency, reproducibility, and human oversight in AI-assisted decision-making ( 64 ). As such, Synthesa AI holds promise not only as an academic research instrument but also as a tool for regulatory-grade literature reviews in contexts such as pharmacovigilance, clinical evaluation reports, and health technology assessments. Third, the results of this study reinvigorate the long-standing vision of living systematic reviews. By enabling full corpus screening in near-real time, Synthesa AI renders feasible what had previously been considered operationally prohibitive—namely, the continuous updating of systematic reviews in response to newly published evidence. This capability could transform the responsiveness of evidence synthesis to fast-moving therapeutic domains such as infectious disease, oncology, and cardiovascular medicine, where the evidence base evolves rapidly and policy decisions must keep pace. Despite its strengths, the present study is not without limitations. The calibration of Synthesa AI was based on known included studies in the benchmark reviews, which may introduce bias by optimizing the tool for sensitivity against those specific inclusion patterns. This means that generalizability to entirely novel topics without an existing gold standard cannot be inferred directly from the current results. Future work will expand on this approach to further evaluate the tool’s de novo screening performance under real-world, prospective conditions. In conclusion, Synthesa AI delivers a transformative improvement in the accuracy, completeness, and efficiency of abstract screening for systematic reviews. By substantially reducing reviewer burden, identifying previously missed studies, and enabling reproducible decision-making at scale, Synthesa AI addresses longstanding challenges in evidence synthesis. These findings set a new benchmark for what can be achieved in systematic review methodology and point toward a future in which LLM-based tools play a central role in the curation of clinical evidence. Figure Legend: Forest plot of pooled specificity across the 9 topics Disclosures Lefteris Teperikidis, Christos Trampoukis, and Kyriakos Polymenakos are co-founders of Synthesa, Inc., the company that develops the tools used in this validation study. Lefteris Teperikidis has consulted for SCRIPPS Research, Callibr BV, Parexel, Bruker GmbH, IVDeology, Pharmassist, Accuscript, Remedica, and PARI GmbH, outside the present work. Data Availability All data produced in the present study are available upon reasonable request to the authors References 1. ↵ Chalmers I , Hedges LV , Cooper H . A brief history of research synthesis . Eval Health Prof . 2002 ; 25 ( 1 ): 12 – 37 . OpenUrl CrossRef PubMed 2. Guyatt G . Users’ Guides to the Medical Literature: A Manual for Evidence-Based Clinical Practice, 3E: McGraw Hill LLC ; 2014 . 3. ↵ Institute of Medicine Committee on Standards for Systematic Reviews of Comparative Effectiveness R . In: Eden J , Levit L , Berg A , Morton S , editors. Finding What Works in Health Care: Standards for Systematic Reviews . Washington (DC) : National Academies Press (US) Copyright 2011 by the National Academy of Sciences. All rights reserved.; 2011 . 4. ↵ Borah R , Brown AW , Capers PL , Kaiser KA . Analysis of the time and workers needed to conduct systematic reviews of medical interventions using data from the PROSPERO registry . BMJ Open . 2017 ; 7 ( 2 ): e012545 . OpenUrl Abstract / FREE Full Text 5. ↵ O’Mara-Eves A , Thomas J , McNaught J , Miwa M , Ananiadou S . Erratum to: Using text mining for study identification in systematic reviews: a systematic review of current approaches . Syst Rev . 2015 ; 4 : 59 . 6. ↵ Waffenschmidt S , Knelangen M , Sieben W , Bühn S , Pieper D . Single screening versus conventional double screening for study selection in systematic reviews: a methodological systematic review . BMC Med Res Methodol . 2019 ; 19 ( 1 ): 132 . OpenUrl CrossRef PubMed 7. ↵ Ioannidis JP . The Mass Production of Redundant, Misleading, and Conflicted Systematic Reviews and Meta-analyses . Milbank Q . 2016 ; 94 ( 3 ): 485 – 514 . OpenUrl CrossRef PubMed 8. Elliott JH , Turner T , Clavisi O , Thomas J , Higgins JP , Mavergames C , et al. Living systematic reviews: an emerging opportunity to narrow the evidence-practice gap . PLoS Med . 2014 ; 11 ( 2 ): e1001603 . OpenUrl CrossRef PubMed 9. ↵ Marshall IJ , Wallace BC . Toward systematic review automation: a practical guide to using machine learning tools in research synthesis . Syst Rev . 2019 ; 8 ( 1 ): 163 . OpenUrl CrossRef PubMed 10. ↵ Bastian H , Glasziou P , Chalmers I . Seventy-five trials and eleven systematic reviews a day: how will we ever keep up? PLoS Med . 2010 ; 7 ( 9 ): e1000326 . OpenUrl CrossRef PubMed 11. ↵ Shojania KG , Sampson M , Ansari MT , Ji J , Doucette S , Moher D . How quickly do systematic reviews go out of date? A survival analysis . Ann Intern Med . 2007 ; 147 ( 4 ): 224 – 33 . OpenUrl CrossRef PubMed Web of Science 12. ↵ Cohen AM , Hersh WR , Peterson K , Yen PY . Reducing workload in systematic review preparation using automated citation classification . J Am Med Inform Assoc . 2006 ; 13 ( 2 ): 206 – 19 . OpenUrl CrossRef PubMed 13. ↵ Marshall IJ , Noel-Storr A , Kuiper J , Thomas J , Wallace BC . Machine learning for identifying Randomized Controlled Trials: An evaluation and practitioner’s guide . Res Synth Methods . 2018 ; 9 ( 4 ): 602 – 14 . OpenUrl PubMed 14. ↵ Vaswani A , Shazeer N , Parmar N , Uszkoreit J , Jones L , Gomez AN , et al. Attention Is All You Need . 2023 . 15. ↵ Lee J , Yoon W , Kim S , Kim D , Kim S , So CH , et al. BioBERT: a pre-trained biomedical language representation model for biomedical text mining . Bioinformatics . 2019 ; 36 ( 4 ): 1234 – 40 . OpenUrl CrossRef 16. ↵ Ueyama HA , Akita K , Kiyohara Y , Takagi H , Briasoulis A , Wiley J , et al. Optimal Strategy for Complete Revascularization in ST-Segment Elevation Myocardial Infarction and Multivessel Disease: A Network Meta-Analysis . J Am Coll Cardiol . 2025 ; 85 ( 1 ): 19 – 38 . OpenUrl PubMed 17. Martino G , Quarta R , Greco F , Spaccarotella C , Indolfi C , Curcio A , et al. Physiology-Versus Angiography-Guided Complete Coronary Revascularization in STEMI Patients with Multivessel Disease: A Network Meta-Analysis . J Clin Med . 2025 ; 14 ( 2 ). 18. Almizel AM , Levett JY , Zolotarova T , Eisenberg MJ . Meta-Analysis Comparing Immediate Versus Staged Complete Revascularization for ST-Elevation Myocardial Infarction With Multivessel Disease . Am J Cardiol . 2025 ; 239 : 75 – 81 . OpenUrl PubMed 19. Franco AJ , Krishna MM , Joseph M , Ezenna C , Bakir ZE , Sudo RYU , et al. Complete versus culprit-only percutaneous coronary intervention in elderly patients with acute coronary syndrome and multivessel coronary artery disease: A systematic review and meta-analysis . Cardiovasc Revasc Med . 2025 ; 70 : 1 – 9 . OpenUrl PubMed 20. Gonnah AR , Awad AK , Helmy AE , Elsnhory AB , Shazly O , Abousalima SA , et al. Comparing FFR-Guided Complete Revascularization and Conservative Management for Non-Culprit Lesions in STEMI Patients With Multivessel Disease: A Systematic Review and Meta-Analysis . Catheter Cardiovasc Interv . 2025 ; 105 ( 3 ): 633 – 42 . OpenUrl PubMed 21. ↵ Singh S , Tantry US , Bliden K , Saad M , Gurbel PA , Abbott JD , et al. Meta-Analysis of Physiology-Guided Complete or Culprit Lesion-Only Percutaneous Coronary Interventions in Myocardial Infarction . Am J Cardiol . 2024 ; 232 : 49 – 56 . OpenUrl PubMed 22. ↵ Rumiz E , Valero E , Fernandez C , Vilar JV , Pellicer M , Cubillos A , et al. In-hospital versus after-discharge complete revascularization in patients with ST segment elevation myocardial infarction and multivessel disease. REVIVA-ST trial . PLoS One . 2024 ; 19 ( 5 ): e0303284 . OpenUrl PubMed 23. ↵ Dambrink JH , Debrauwere JP , van ’t Hof AW , Ottervanger JP , Gosselink AT , Hoorntje JC , et al. Non-culprit lesions detected during primary PCI: treat invasively or follow the guidelines? EuroIntervention . 2010 ; 5 ( 8 ): 968 – 75 . OpenUrl CrossRef PubMed 24. ↵ Rayner DG , Ferri DM , Guyatt GH , O’Byrne PM , Brignardello-Petersen R , Foroutan F , et al. Inhaled Reliever Therapies for Asthma: A Systematic Review and Meta-Analysis . Jama . 2025 ; 333 ( 2 ): 143 – 52 . OpenUrl PubMed 25. ↵ Berggren F , Ekström T . A cost-effectiveness study comparing the as-needed use of formoterol (Oxis) and terbutaline (Bricanyl) in patients with moderate to severe asthma . Respir Med . 2001 ; 95 ( 9 ): 753 – 8 . OpenUrl PubMed 26. Bisgaard H , Le Roux P , Bjåmer D , Dymek A , Vermeulen JH , Hultquist C . Budesonide/formoterol maintenance plus reliever therapy: a new strategy in pediatric asthma . Chest . 2006 ; 130 ( 6 ): 1733 – 43 . OpenUrl CrossRef PubMed Web of Science 27. Goossens LM , Riemersma RA , Postma DS , van der Molen T , Rutten-van Mölken MP . An economic evaluation of budesonide/formoterol for maintenance and reliever treatment in asthma in general practice . Adv Ther . 2009 ; 26 ( 9 ): 872 – 85 . OpenUrl PubMed 28. Hardy J , Baggott C , Fingleton J , Reddel HK , Hancox RJ , Harwood M , et al. Budesonide-formoterol reliever therapy versus maintenance budesonide plus terbutaline reliever therapy in adults with mild to moderate asthma (PRACTICAL): a 52-week, open-label, multicentre, superiority, randomised controlled trial . Lancet . 2019 ; 394 ( 10202 ): 919 – 28 . OpenUrl CrossRef PubMed 29. Johansson G , Andreasson EB , Larsson PE , Vogelmeier CF . Cost effectiveness of budesonide/formoterol for maintenance and reliever therapy versus salmeterol/fluticasone plus salbutamol in the treatment of asthma . Pharmacoeconomics . 2006 ; 24 ( 7 ): 695 – 708 . OpenUrl CrossRef PubMed 30. Louis R , Joos G , Michils A , Vandenhoven G . A comparison of budesonide/formoterol maintenance and reliever therapy vs. conventional best practice in asthma management . Int J Clin Pract . 2009 ; 63 ( 10 ): 1479 – 88 . OpenUrl CrossRef PubMed 31. Papi A , Marku B , Scichilone N , Maestrelli P , Paggiaro P , Saetta M , et al. Regular versus as-needed budesonide and formoterol combination treatment for moderate asthma: a non-inferiority, randomised, double-blind clinical trial . Lancet Respir Med . 2015 ; 3 ( 2 ): 109 – 19 . OpenUrl PubMed 32. Rabe KF , Pizzichini E , Ställberg B , Romero S , Balanzat AM , Atienza T , et al. Budesonide/formoterol in a single inhaler for maintenance and relief in mild-to-moderate asthma: a randomized, double-blind trial . Chest . 2006 ; 129 ( 2 ): 246 – 56 . OpenUrl CrossRef PubMed Web of Science 33. Richter K , Hartmann U , Metzenauer P , Magnussen H . Randomised trial comparing as-needed versus regular treatment with formoterol in patients with persistent asthma . Respir Med . 2007 ; 101 ( 3 ): 467 – 75 . OpenUrl PubMed 34. Scicchitano R , Aalbers R , Ukena D , Manjra A , Fouquert L , Centanni S , et al. Efficacy and safety of budesonide/formoterol single inhaler therapy versus a higher dose of budesonide in moderate to severe asthma . Curr Med Res Opin . 2004 ; 20 ( 9 ): 1403 – 18 . OpenUrl CrossRef PubMed Web of Science 35. Sears MR , Boulet LP , Laviolette M , Fitzgerald JM , Bai TR , Kaplan A , et al. Budesonide/formoterol maintenance and reliever therapy: impact on airway inflammation in asthma . Eur Respir J . 2008 ; 31 ( 5 ): 982 – 9 . OpenUrl Abstract / FREE Full Text 36. Papi A , Nicolini G , Baraldi E , Boner AL , Cutrera R , Rossi GA , et al. Regular vs prn nebulized treatment in wheeze preschool children . Allergy . 2009 ; 64 ( 10 ): 1463 – 71 . OpenUrl CrossRef PubMed Web of Science 37. Pilcher J , Patel M , Smith A , Davies C , Pritchard A , Travers J , et al. Combination budesonide/formoterol inhaler as maintenance and reliever therapy in Māori with asthma . Respirology . 2014 ; 19 ( 6 ): 842 – 51 . OpenUrl PubMed 38. Price D , Wirén A , Kuna P . Cost-effectiveness of budesonide/formoterol for maintenance and reliever asthma therapy . Allergy . 2007 ; 62 ( 10 ): 1189 – 98 . OpenUrl CrossRef PubMed Web of Science 39. ↵ Quirce S , Barcina C , Plaza V , Calvo E , Muñoz M , Ampudia R , et al. A comparison of budesonide/formoterol maintenance and reliever therapy versus conventional best practice in asthma management in Spain . J Asthma . 2011 ; 48 ( 8 ): 839 – 47 . OpenUrl CrossRef PubMed Web of Science 40. ↵ Temido MJ , Cristiano M , Gouveia C , Mesquita B , Figueiredo P , Portela F . Antidepressants in irritable bowel syndrome: a systematic review and meta-analysis of randomized controlled trials . Ann Gastroenterol . 2025 ; 38 ( 3 ): 284 – 93 . OpenUrl PubMed 41. ↵ Myren J , Groth H , Larssen SE , Larsen S . The effect of trimipramine in patients with the irritable bowel syndrome. A double-blind study . Scand J Gastroenterol . 1982 ; 17 ( 7 ): 871 – 5 . OpenUrl PubMed Web of Science 42. Tabas G , Beaves M , Wang J , Friday P , Mardini H , Arnold G . Paroxetine to treat irritable bowel syndrome not responding to high-fiber diet: a double-blind, placebo-controlled trial . Am J Gastroenterol . 2004 ; 99 ( 5 ): 914 – 20 . OpenUrl CrossRef PubMed Web of Science 43. Wright-Hughes A , Ford AC , Alderson SL , Ow PL , Ridd MJ , Foy R , et al. Low-dose titrated amitriptyline as second-line treatment for adults with irritable bowel syndrome in primary care: the ATLANTIS RCT . Health Technol Assess . 2024 ; 28 ( 66 ): 1 – 161 . OpenUrl CrossRef 44. Drossman DA , Toner BB , Whitehead WE , Diamant NE , Dalton CB , Duncan S , et al. Cognitive-behavioral therapy versus education and desipramine versus placebo for moderate to severe functional bowel disorders . Gastroenterology . 2003 ; 125 ( 1 ): 19 – 31 . OpenUrl CrossRef PubMed Web of Science 45. ↵ Tack J , Broekaert D , Fischler B , Van Oudenhove L , Gevers AM , Janssens J . A controlled crossover study of the selective serotonin reuptake inhibitor citalopram in irritable bowel syndrome . Gut . 2006 ; 55 ( 8 ): 1095 – 103 . OpenUrl Abstract / FREE Full Text 46. ↵ Myren J , Løvland B , Larssen SE , Larsen S . A double-blind study of the effect of trimipramine in patients with the irritable bowel syndrome . Scand J Gastroenterol . 1984 ; 19 ( 6 ): 835 – 43 . OpenUrl PubMed Web of Science 47. ↵ Sharbafchi MR , Afshar Zanjani H , Saneian Z , Feizi A , Daghaghzadeh H , Adibi P . Effects of Duloxetine on Gastrointestinal Symptoms, Depression, Anxiety, Stress, and Quality of Life in Patients with the Moderate-to-Severe Irritable Bowel Syndrome . Adv Biomed Res . 2023 ; 12 : 249 . 48. ↵ Kozieł-Siołkowska M , Shantsila E , Shantsila A , Lip GY . Antiplatelet versus anticoagulation treatment for people with heart failure in sinus rhythm . Cochrane Database Syst Rev . 2025 ; 6 ( 6 ): Cd003333 . OpenUrl PubMed 49. ↵ Mahmoud A , Abdelsayed K , Mohamed AA , Najah Q , Abdulkader A , Ali K , et al. Safety and efficacy of antisense oligonucleotides on triglyceride, apolipoprotein C-III, and other lipid parameters levels in hypertriglyceridemia; a network meta-analysis of randomized controlled trials . Lipids Health Dis . 2025 ; 24 ( 1 ): 109 . OpenUrl PubMed 50. de Moura de Souza M , Mendes BX , Defante MLR , de Athayde de Hollanda Morais BA , Martins OC , Prizão VM , et al. Apolipoprotein C-III inhibitors for the treatment of hypertriglyceridemia: a meta-analysis of randomized controlled trials . Metabolism . 2025 ; 167 : 156187 . 51. Masson W , Lobo M , Nogueira JP , Corral P , Barbagelata L , Siniawski D . Inhibitors of apolipoprotein C3, triglyceride levels, and risk of pancreatitis: a systematic review and meta-analysis . Rev Endocr Metab Disord . 2024 ; 25 ( 5 ): 817 – 25 . OpenUrl PubMed 52. ↵ Kamrul-Hasan ABM , Dutta D , Nagendra L , Mondal S , Bhattacharya S , Kalra S . Safety and Efficacy of the Novel RNA Interference Therapies for Hypertriglyceridemia and Mixed Hyperlipidemia Management: A Systematic Review and Meta-analysis . Endocr Pract . 2024 ; 30 ( 11 ): 1103 – 12 . OpenUrl PubMed 53. ↵ Watts GF , Rosenson RS , Hegele RA , Goldberg IJ , Gallo A , Mertens A , et al. Plozasiran for Managing Persistent Chylomicronemia and Pancreatitis Risk . N Engl J Med . 2025 ; 392 ( 2 ): 127 – 37 . OpenUrl PubMed 54. ↵ Hunt T , Payne T , Brophy JM , Irons J , Wang AY , Cartwright C , et al. Perioperative dexmedetomidine for the prevention of postoperative delirium after cardiac surgery: a systematic review, Bayesian meta-analysis, and Bayesian re-analysis of the DECADE trial . Br J Anaesth . 2025 ; 134 ( 6 ): 1671 – 82 . OpenUrl PubMed 55. ↵ Rahimi HK , Jasim AA , Rezahosseini O , Harboe ZB . Immunogenicity and adverse effects of pneumococcal vaccines co-administered with influenza or SARS-CoV-2 vaccines in adults: A systematic review and Meta-analysis . Vaccine . 2025 ; 59 : 127293 . 56. ↵ Jorda A , Prager M , Pracher L , Haselwanter P , Jackwerth M , Al Jalali V , et al. Immunogenicity, safety, and reactogenicity of concomitant administration of the novavax vaccine against Omicron XBB.1.5 (NVX-CoV2601) and a 20-valent pneumococcal conjugate vaccine in adults aged ≥60 years: A randomised, double-blind, placebo-controlled, non-inferiority trial . J Infect . 2025 ; 90 ( 2 ): 106405 . OpenUrl PubMed 57. ↵ Omole T , Pelayo E , Weinberg AS , Chalkias S , Endale Z , Tamms G , et al. Safety, Tolerability, and Immunogenicity of the Pneumococcal Vaccines PPSV23 or PCV15 Co-Administered with a Booster Dose of mRNA-1273 SARS-CoV-2 Vaccine in Healthy Adults ≥50 Years of Age . Vaccines (Basel ). 2025 ; 13 ( 2 ). 58. ↵ Song JY , Cheong HJ , Tsai TF , Chang HA , Choi MJ , Jeon JH , et al. Immunogenicity and safety of concomitant MF59-adjuvanted influenza vaccine and 23-valent pneumococcal polysaccharide vaccine administration in older adults . Vaccine . 2015 ; 33 ( 36 ): 4647 – 52 . OpenUrl PubMed 59. ↵ Yuan X , Feng Y , Wan J , Zhang S , Bao Q , Liu X , et al. Effect of nebulised inhalation of antibiotics on preventing ventilator-associated pneumonia in critically ill patients: a systematic review and meta-analysis . BMJ Open . 2025 ; 15 ( 5 ): e093868 . OpenUrl Abstract / FREE Full Text 60. ↵ Jolliffe DA , Camargo CA , Jr. , Sluyter JD , Aglipay M , Aloia JF , Bergman P , et al. Vitamin D supplementation to prevent acute respiratory infections: systematic review and meta-analysis of stratified aggregate data . Lancet Diabetes Endocrinol . 2025 ; 13 ( 4 ): 307 – 20 . OpenUrl PubMed 61. ↵ Harrison SE , Oliver SJ , Kashi DS , Carswell AT , Edwards JP , Wentz LM , et al. Influence of Vitamin D Supplementation by Simulated Sunlight or Oral D3 on Respiratory Infection during Military Training . Med Sci Sports Exerc . 2021 ; 53 ( 7 ): 1505 – 16 . OpenUrl 62. Mayan I , Somech R , Lev A , Cohen AH , Constantini NW , Dubnov-Raz G . Thymus Activity, Vitamin D, and Respiratory Infections in Adolescent Swimmers . Isr Med Assoc J . 2015 ; 17 ( 9 ): 571 – 5 . OpenUrl PubMed 63. ↵ Zurita-Cruz J , Fonseca-Tenorio J , Villasís-Keever M , López-Alarcón M , Parra-Ortega I , López-Martínez B , et al. Efficacy and safety of vitamin D supplementation in hospitalized COVID-19 pediatric patients: A randomized controlled trial . Front Pediatr . 2022 ; 10 : 943529 . 64. ↵ Food US , Drug A , Health C , Medicines, Healthcare products Regulatory A. Good Machine Learning Practice for Medical Device Development: Guiding Principles. FDA, Health Canada , MHRA ; 2021 2021/10/01. View the discussion thread. Back to top Previous Next Posted July 17, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Validation of Synthesa AI, a Large Language Model-Based Screening Tool for Systematic Reviews: Results from Nine Studies Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Validation of Synthesa AI, a Large Language Model-Based Screening Tool for Systematic Reviews: Results from Nine Studies Lefteris Teperikidis PharmD , Christos Trampoukis , Kyiakos Polymenakos medRxiv 2025.07.16.25331632; doi: https://doi.org/10.1101/2025.07.16.25331632 Share This Article: Copy Citation Tools Validation of Synthesa AI, a Large Language Model-Based Screening Tool for Systematic Reviews: Results from Nine Studies Lefteris Teperikidis PharmD , Christos Trampoukis , Kyiakos Polymenakos medRxiv 2025.07.16.25331632; doi: https://doi.org/10.1101/2025.07.16.25331632 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffbedefdffde2c5',t:'MTc3OTQ1NDA0Ng=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00