Full text
32,994 characters
· extracted from
preprint-html
· click to expand
Evaluation of Large Language Model-Generated Patient Information for Communicating Radiation Risk | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Evaluation of Large Language Model-Generated Patient Information for Communicating Radiation Risk View ORCID Profile Alice Gutowski , View ORCID Profile Daniel Carrion , View ORCID Profile Mohamed Khaldoun Badawy doi: https://doi.org/10.1101/2025.07.23.25332093 Alice Gutowski 1 Monash Imaging, Monash Health , Clayton, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Alice Gutowski For correspondence: alice.gutowski{at}monashhealth.org Daniel Carrion 1 Monash Imaging, Monash Health , Clayton, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Daniel Carrion Mohamed Khaldoun Badawy 1 Monash Imaging, Monash Health , Clayton, Australia 2 Department of Medical Imaging and Radiation Sciences, Monash University , Melbourne, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Mohamed Khaldoun Badawy Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Background Large language models are increasingly used to generate patient information in healthcare. However, their ability to communicate complex topics, such as the risks associated with radiation from medical imaging, remains unclear. Purpose This study evaluated the quality, relevance, and readability of patient information generated by large language models for communicating radiation risks associated with computed tomography and interventional cardiology procedures. Methods Five large language models were prompted to generate patient information for two clinical scenarios: computed tomography and interventional cardiology. The information was assessed by medical physicists, radiographers, and health literacy specialists using a structured survey containing rating scales and free-text feedback. Statistical analyses included tests for normality, group comparisons using non-parametric methods, and thematic analysis of qualitative responses. Results Twelve healthcare professionals participated. Significant differences were identified among professional groups in their scoring of readability, language suitability, and tone, particularly for higher-risk procedures. Health literacy specialists reported significant differences between large language models across most criteria, while medical physicists and radiographers identified fewer differences. Qualitative feedback revealed variability in how well the models balanced technical accuracy with accessible language, with some including inaccurate or irrelevant information. Conclusion Large language models show potential in supporting the development of patient information for radiation risk communication; however, substantial variability remains in the quality and appropriateness of the content. Multidisciplinary review is essential, and sole reliance on large language model-generated materials is not recommended. Further research involving patient evaluation is required to assess the real-world impact of these tools in clinical settings. Introduction The integration of artificial intelligence (AI) into healthcare is progressing rapidly, with large language models (LLMs) increasingly used to generate patient-facing information [ 1 ]. These tools offer the potential to enhance understanding, reduce health literacy barriers, and support shared decision-making [ 2 ]. While many studies have assessed the accuracy, readability, and clinical relevance of LLM-generated information, this research has predominantly focused on general medical conditions and patient concerns, treatment overviews, or clinical correspondence [ 3 ] [ 4 ] [ 5 ] [ 6 ] [ 7 ] [ 8 ] [ 9 ] [ 10 ] [ 11 ]. Radiology and medical imaging present a unique context for patient communication, particularly regarding exposure to ionising radiation. Patient anxiety surrounding diagnostic or interventional imaging is well documented and may influence decision-making or compliance with recommended procedures [ 12 ] [ 13 ]. Effective risk communication is, therefore, crucial to support informed consent; however, existing approaches vary considerably [ 14 ]. In typical clinical practice, for lower-risk examinations, such as computed tomography (CT) scans, radiation risks are often communicated through posters or leaflets displayed in clinical areas. In contrast, higher-risk procedures, such as interventional cardiology, require more detailed verbal discussions and written information to explain potential deterministic effects. Despite the growing use of LLMs to generate patient education materials [ 9 ] [ 10 ] [ 11 ], no studies to date have specifically evaluated their ability to communicate radiation risks to patients. Radiation risk communication is inherently complex, requiring accurate, accessible, and appropriately balanced information to support informed decision-making [ 15 ] [ 16 ]. This study aimed to assess the quality, relevance, and readability of LLM-generated patient information on radiation risks for both low- and high-risk imaging procedures. The findings will inform the potential role of LLMs in supporting radiation risk communication within clinical practice. Methods Study Design This study evaluated the quality and relevance of patient information generated by LLMs for communicating radiation risks associated with medical imaging procedures. Five LLMs were selected for assessment based on their widespread use in healthcare research. These included: Copilot (GPT-4-turbo, accessed 2025-03-26), ChatGPT (4o-2025-01-29), Claude (3.5 Sonnet-2024-10-22), Llama (3.3-70b-instruct), and Mistral (small-24b-instruct-2501). These were chosen based on common models chosen in existing research in this area [ 3 ] [ 4 ] [ 5 ] [ 6 ] [ 7 ] [ 8 ]. Two clinical scenarios were selected to represent procedures with differing levels of radiation risk and corresponding information requirements: a CT scan and an interventional cardiology procedure. The CT scan represents a standard, lower-risk examination where brief patient information is typically provided. In contrast, interventional cardiology procedures carry a higher risk of deterministic effects, requiring more detailed risk communication. To ensure consistency, a standardised prompt was developed and piloted for each scenario. These prompts were refined through iterative testing with two medical physicists to optimise clarity and relevance. The final prompts were: Prompt 1: Generate educational information to provide to patients before undergoing an interventional cardiology procedure. Include a brief, accessible overview of the radiation risks to support informed decision-making. The information should be appropriate for patients with diverse health literacy, age, and cultural backgrounds . Prompt 2: Generate educational information to provide to patients before undergoing a CT scan. Include a brief, accessible overview of the radiation risks to support informed decision-making. The information should be appropriate for patients with diverse health literacy, age, and cultural backgrounds . All LLMs, except CoPilot, were accessed simultaneously using the Chatbot Arena platform ( https://chat.lmsys.org/ ), which allows for unbiased comparison of responses from multiple models within the same interface and under identical conditions. CoPilot was accessed separately through the institutional licence, as this model is integrated into the organisation’s IT environment. This ensured the local, deployed version was evaluated alongside publicly accessible models. Survey Design and Data Collection The generated information sheets were anonymised and compiled for assessment. A web-based survey was developed using Google Forms to evaluate the quality, relevance, and readability of each information sheet. The survey was reviewed for content validity by the two authors, both medical physicists with over 10 and 5 years of professional experience, respectively, before distribution. Three groups of healthcare professionals were invited to participate: medical physicists, radiographers, and health literacy specialists. These groups were selected to provide complementary perspectives on the technical accuracy, clinical relevance, and readability of the generated information. Participants were recruited through internal departmental emails at the study site, with voluntary and anonymous participation. The survey contained the following components: Demographic Information Job role (medical physicist, radiographer, health literacy specialist). Evaluation Criteria: For each information sheet, participants rated on a 5-point Likert scale (1 = Poor, 5 = Excellent): Accuracy of procedural information. Accuracy of radiation risk information. Relevance of content to the examination and associated risks. Comprehensiveness of radiation risk discussion. Balance of risk and benefit conveyed. Readability and ease of understanding. Language appropriateness for diverse patient groups. Tone suitability for risk communication. Qualitative Feedback : Free-text fields for anticipated patient response and additional comments. Data analysis All data were analysed using Python version 3.11.7 with the following libraries: pandas (2.1.4), numpy (v1.26.4), scipy (v1.11.4), and matplotlib (v3.8.0). Statistical significance was set at p < 0.05. Data normality was assessed using the Shapiro-Wilk test. As the data were not normally distributed, non-parametric tests were applied. Median scores and interquartile ranges were calculated for each model and professional group. The Kruskal-Wallis test was used to assess differences in responses between professional groups for each evaluation criterion. Within-group comparisons of LLM performance were conducted using the Friedman test for related samples. Where significant differences were identified, post hoc pairwise comparisons with Bonferroni correction were applied. Free-text responses underwent thematic analysis following the Braun and Clarke framework [ 17 ]. Themes were not pre-defined but emerged through inductive analysis of the qualitative data, consistent with the Braun and Clarke framework. The two authors independently coded the data, with discrepancies resolved through discussion to enhance the reliability of the results. Results A total of 12 respondents completed the survey, comprising four medical physicists, four radiographers, and four health literacy specialists. Table 1 presents the median overall score and interquartile range for each LLM, combining results from both clinical scenarios. Overall, ChatGPT, Llama, and Mistral achieved higher median scores compared to Claude and Copilot. View this table: View inline View popup Download powerpoint Table 1. Median (interquartile range) overall score for each LLM, by professional group and combined. Figures 1a–1f illustrate the distribution of scores for each evaluation criterion, stratified by professional group and clinical scenario. Download figure Open in new tab Download figure Open in new tab Download figure Open in new tab Fig 1(a)-(f). Heatmaps of the average score across all participants across all numerical evaluation criteria, for medical physicists, radiographers and health literacy, split by information sheet modality The Kruskal–Wallis test was conducted to examine differences in evaluation scores between professional groups. For the CT scenario, significant differences were observed in readability, p = .049 and language appropriateness, p = .002. In the interventional cardiology scenario, significant differences were identified in the accuracy of information, p = .012; comprehensiveness of the radiation risk discussion, p = .043; readability, p = .001; language appropriateness, p <.001; and tone suitability, p = .002. The Friedman test was used to assess whether significant differences existed in LLM performance within each professional group. Among medical physicists, no statistically significant differences were found between the LLMs for any evaluation criterion in either clinical scenario. Similarly, no significant differences were observed among radiographers for the interventional cardiology scenario. However, for the CT scenario, a significant difference was found in the “Relevance of information” criterion, p = .023. Among health literacy specialists, significant differences between LLMs were observed across most evaluation criteria for both scenarios, p = .050. Evaluation criteria without a significant difference included “Radiation risk information” for both the interventional cardiology scenario, p = .128, and CT, p = .161, and “Readability” for interventional cardiology, p = .114 When responses from all professional groups were combined, the Friedman test revealed statistically significant differences between LLMs for several evaluation criteria. These included accuracy of information, p = .013; relevance of information, p = .001; comprehensiveness of the radiation risk discussion, p = .001; balance of risk and benefit conveyed, p = .007; readability, p = .020; and language appropriateness, p <.001. Thematic analysis of free-text responses identified recurring strengths and weaknesses for each LLM, summarised in Table 2 . The identified themes were developed through inductive coding of participant responses, reflecting recurring issues related to accuracy and relevance of information, risk communication and emotional impact, language complexity and clarity, and patient engagement. View this table: View inline View popup Download powerpoint Table 2. Summary of the common themes and phrases identified in the free-text questions, categorised by model and job role. Each category is named, and the associated theme given, where T1 represents accuracy and relevance of information, T2 represents risk communication and emotional impact, T3 represents language complexity and clarity and T4 represents patient engagement. Discussion This study evaluated the quality, relevance, and readability of LLM-generated patient information for communicating radiation risks in medical imaging. The findings demonstrate that while LLMs can produce generally acceptable information, significant variability exists between models and across different professional perspectives. Notably, no single LLM consistently outperformed others across all evaluation criteria or professional groups. Differences in scoring between professional groups were most pronounced in areas relating to readability, language appropriateness, and tone. Although there are no existing studies directly comparing these groups, previous research does demonstrate that those with expertise in or high levels of health literacy may apply more stringent standards when evaluating patient information [ 18 ]. The health literacy specialists in this study identified significant differences between LLMs across most criteria, suggesting that LLM-generated information may not yet consistently meet best practice standards for accessible, patient-centred communication. In contrast, medical physicists and radiographers demonstrated fewer significant differences between LLMs, with their assessments appearing to focus primarily on the presence of key technical information. This difference in evaluation emphasis highlights the need for a multidisciplinary approach when assessing or implementing LLM-generated patient materials. While technical accuracy is essential, appropriate language, readability, and tone are equally important to ensure information is accessible, reduce patient anxiety, and support informed decision-making [ 19 ]. Thematic analysis of qualitative feedback further supported these findings. LLMs varied in their ability to balance technical content with accessible language. For example, ChatGPT was generally considered easy to understand but lacked sufficient detail in some areas, while Llama provided more detailed information but used complex or potentially alarming language. Several models included inaccurate or irrelevant information, such as references to protective equipment or genetic risks, which may confuse or unnecessarily concern patients. These results are consistent with recent studies assessing LLMs in other healthcare domains, which have similarly reported variability in accuracy, relevance, and appropriateness of LLM-generated patient information [ 3 ] [ 4 ] [ 5 ] [ 6 ] [ 7 ] [ 8 ] [ 9 ] [ 10 ] [ 11 ]. Importantly, this study extends those findings to the underexplored area of radiation risk communication, where the complexity of the topic and potential for patient anxiety underscore the need for clear, accurate, and carefully worded information. This study has several limitations. First, the small sample size (n = 12 participants) limits the statistical power of the study, particularly in detecting more subtle differences between models. As such, these results should be interpreted with caution and may not be fully generalisable to broader clinical or patient populations. Second, LLMs are evolving rapidly; the specific model versions assessed may have evolved by the time of publication. Third, only healthcare professionals assessed the generated information. While these experts are well-positioned to evaluate the accuracy and appropriateness of LLM-generated materials, the actual impact on patient understanding, anxiety, and decision-making can only be determined through direct patient feedback. These findings suggest that while LLMs may assist in drafting or refining radiation risk information, sole reliance on LLM-generated content is not yet recommended. A multidisciplinary review process, involving technical experts and health literacy specialists, remains essential to ensure accuracy, accessibility, and appropriateness. Future research should include patient-centred evaluations of LLM-generated materials to assess their real-world impact on understanding, satisfaction, and informed consent. Additionally, as LLM technology continues to evolve, ongoing assessment will be required to monitor improvements in model performance and suitability for use in radiation risk communication. Conclusions This study evaluated the ability of large language models (LLMs) to generate patient information for communicating radiation risks associated with medical imaging procedures. While LLMs demonstrated potential in producing relevant and generally understandable content, considerable variability was observed in the quality, accuracy, and readability of the information generated. No single LLM consistently performed well across all evaluation criteria or professional groups, and significant differences were identified in how technical experts, such as medical physicists and radiographers, and health literacy specialists assessed the materials. These findings underscore the importance of a multidisciplinary approach to evaluating LLM-generated patient information, particularly in the context of complex topics such as radiation risk. Although LLMs may serve as a useful tool to support the development of patient-facing materials, their use in isolation is not recommended. Careful human review, with input from both technical and health literacy experts, remains essential to ensure that information provided to patients is accurate, balanced, and accessible. As LLM technology continues to evolve, ongoing evaluation will be necessary to monitor improvements in model performance and suitability for clinical communication. Future research should incorporate direct patient feedback through usability testing, comprehension assessments, or anxiety measures to determine the real-world effectiveness of these materials. Data Availability All data produced in the present study are available upon reasonable request to the authors Statements and Declarations Funding The authors declare that no funds, grants, or other support were received during the preparation of this manuscript. Competing Interests The authors have no relevant financial or non-financial interests to disclose. Author Contributions All authors contributed to the study conception and design. AG completed survey design, data collection and analysis. MB and AG made significant contributions to the draft writeup. All authors read and approved the final manuscript. Ethics Approval No patient or sensitive data was collected for this project; hence an ethics review was not required. Informed Consent Not applicable. Consent to Participate Not applicable. Consent to Publish Not applicable. References [1]. ↵ Madabushi HT , Jones MD . Large language models in healthcare information research: making progress in an emerging field . BMJ Quality & Safety . 2025 ; 34 : 73 – 76 . doi: 10.1136/bmjqs-2024-017896 OpenUrl FREE Full Text [2]. ↵ Miao J , Thongprayoon C , Kashani KB , Cheungpasitporn W. Artificial intelligence as a tool for improving health literacy in kidney care . PLOS Digital Health . 2025 ; 4 : e0000746 . doi: 10.1371/journal.pdig.0000746 OpenUrl CrossRef [3]. ↵ Lim B , Seth I , Cuomo R , Kenney PS , Ross RJ , Sofiadellis F , Pentangelo P , Ceccaroni A , Alfano C , Rozen WM . Can AI answer my questions? Utilizing artificial intelligence in the perioperative assessment for abdominoplasty patients . Aesthetic Plastic Surgery . 2024 ; 48 : 4712 – 4724 . doi: 10.1007/s00266-024-04157-0 OpenUrl CrossRef [4]. ↵ Monroe CL , Abdelhafez YG , Atsina K , Aman E , Nardo L , Madani MH . Evaluation of responses to cardiac imaging questions by the artificial intelligence large language model ChatGPT . Clinical Imaging . 2024 ; 112 : 110193 . doi: 10.1016/j.clinimag.2024.110193 OpenUrl CrossRef [5]. ↵ Alasker A , Alsalamah S , Alshathri N , Almansour N , Alsalamah F , Alghafees M , AlKhamees M , Alsaikhan B. Performance of large language models (LLMs) in providing prostate cancer information . BMC Urology . 2024 ; 24 : 177 . doi: 10.1186/s12894-024-01570-0 OpenUrl CrossRef [6]. ↵ Su Z , Jin K , Wu H , Luo Z , Grzybowski A , Ye J. Assessment of large language models in cataract care information provision: a quantitative comparison . Ophthalmology and Therapy 2025 ; 14 : 103 – 116 . doi: 10.1007/s40123-024-01066-y OpenUrl CrossRef [7]. ↵ Khaldi A , Machayekhi S , Salvagno M , Maniaci A , Vaira LA , La Via L , Taccone FS , Lechien JR . Accuracy of ChatGPT responses on tracheotomy for patient education . European Archives of Oto-Rhino-Laryngology . 2024 ; 281 : 6167 – 6172 . doi: 10.1007/s00405-024-08859-8 OpenUrl CrossRef [8]. ↵ Ali SR , Dobbs TD , Hutchings HA , Whitaker IS . Using ChatGPT to write patient clinic letters . The Lancet Digital Health . 2023 ; 5 : E179 – E181 . doi: 10.1016/S2589-7500(23)00048-1 OpenUrl CrossRef [9]. ↵ Sridharan K , Sivaramakrishnan G. Investigating the capabilities of advanced large language models in generating patient instructions and patient educational material . European Journal of Hospital Pharmacy . 2024 ;Published Online First. doi: 10.1136/ejhpharm-2024-004245 OpenUrl Abstract [10]. ↵ Pompili D , Richa Y , Collins P , Richards H , Hennessey DB . Using artificial intelligence to generate medical literature for urology patients: a comparison of three different large language models . World Journal of Urology . 2024 ; 42 : 455 . doi: 10.1007/s00345-024-05146-3 OpenUrl CrossRef [11]. ↵ McCarthy CJ , Berkowitz S , Ramalingam V , Ahmed M. Evaluation of an artificial intelligence chatbot for delivery of IR patient education material: a comparison with societal website content . Journal of Vascular and Interventional Radiology . 2023 ; 34 : 1760 - 1768.e32 . doi: 10.1016/j.jvir.2023.05.037 OpenUrl CrossRef [12]. ↵ Lo Re G , De Luca R , Muscarneri F , Dorangricchia P , Picone D , Vernuccio F , Salerno S , La Tona G , Pinto A , Midiri M , Russo A , Lagalla R , Cicero G. Relationship between anxiety level and radiological investigation. Comparison among different diagnostic imaging exams in a prospective single-center study . La radiologia medica 2016 ; 121 : 763 – 768 . doi: 10.1007/s11547-016-0664-z OpenUrl CrossRef [13]. ↵ Heyer CM , Thuring J , Lemburg SP , Kreddig N , Hasenbring M , Dohna M , Nicolas V. Anxiety of patients undergoing CT imaging—an underestimated problem? Academic Radiology . 2015 ; 22 : 105 – 112 . doi: 10.1016/j.acra.2014.07.014 OpenUrl CrossRef [14]. ↵ Nyathi M , Thabane N. Evaluation of Doctor–Patient Communication Regarding Medical Examinations Involving Ionizing Radiation: A Cross-Sectional Study . Advances in Public Health . 2025 ; 5000212 . doi: 10.1155/adph/5000212 OpenUrl CrossRef [15]. ↵ Badawy MK , Khamwan K , Carrion D. A pilot study of generative AI video for patient communication in radiology and nuclear medicine . Health and Technology . 2025 ; 15 : 395 – 404 . doi: 10.1007/s12553-025-00945-z OpenUrl CrossRef [16]. ↵ Alawad S , Abujamea A. Awareness of radiation hazards in patients attending radiology departments . Radiation and Environmental Biophysics . 2021 ; 60 : 453 – 4583 . doi: 10.1007/s00411-021-00919-5 OpenUrl CrossRef PubMed [17]. ↵ Braun V , Clarke V. Using thematic analysis in psychology . Qualitative Research in Psychology . 2006 ; 3 : 77 – 101 . doi: 10.1191/1478088706qp063oa OpenUrl CrossRef [18]. ↵ Diviani N , van den Putte B , Meppelink CS , van Weert JCM. Exploring the role of health literacy in the evaluation of online health information: insights from a mixed-methods study . Patient Education and Counseling . 2016 ; 99 : 1017 – 1025 . doi: 10.1016/j.pec.2016.01.007 OpenUrl CrossRef [19]. ↵ Younger C , Wagner M , Douglas C , Warren-Forward H. Describing ionising radiation risk in the clinical setting: a systematic review . Radiography . 2019 ; 25 : 83 – 90 . doi: 10.1016/j.radi.2018.11.002 OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted July 24, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Evaluation of Large Language Model-Generated Patient Information for Communicating Radiation Risk Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Evaluation of Large Language Model-Generated Patient Information for Communicating Radiation Risk Alice Gutowski , Daniel Carrion , Mohamed Khaldoun Badawy medRxiv 2025.07.23.25332093; doi: https://doi.org/10.1101/2025.07.23.25332093 Share This Article: Copy Citation Tools Evaluation of Large Language Model-Generated Patient Information for Communicating Radiation Risk Alice Gutowski , Daniel Carrion , Mohamed Khaldoun Badawy medRxiv 2025.07.23.25332093; doi: https://doi.org/10.1101/2025.07.23.25332093 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Radiology and Imaging Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4426) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15222) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6589) Geriatric Medicine (667) Health Economics (997) Health Informatics (4525) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (971) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9221) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (711) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9fff1681fa338650',t:'MTc3OTQ4NzE2NQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.