The Promise and Peril of Large Language Models in Digital Health: GPT-4 Personalizes Cardiovascular Patient Education but Amplifies Gender Biases

doi:10.1101/2025.11.19.25340616

The Promise and Peril of Large Language Models in Digital Health: GPT-4 Personalizes Cardiovascular Patient Education but Amplifies Gender Biases

2025 · doi:10.1101/2025.11.19.25340616

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 30,109 characters · extracted from preprint-html · click to expand

The Promise and Peril of Large Language Models in Digital Health: GPT-4 Personalizes Cardiovascular Patient Education but Amplifies Gender Biases | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search The Promise and Peril of Large Language Models in Digital Health: GPT-4 Personalizes Cardiovascular Patient Education but Amplifies Gender Biases View ORCID Profile Samah Khan , Georgeta Vaidean doi: https://doi.org/10.1101/2025.11.19.25340616 Samah Khan 1 New York University School of Global Public Health Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Samah Khan For correspondence: sk10596{at}nyu.edu Georgeta Vaidean 2 FIU Herbert Wertheim College of Medicine M.D., M.P.H., Ph.D. Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Background Gender-neutral patient education materials often overlook critical sex-based differences in cardiovascular disease (CVD). Large Language Models (LLMs) like GPT-4 offer a potential tool for personalizing health communication, but their ability to correct gender gaps without introducing new biases is unknown. Methods We identified seven publicly available English-language CVD prevention handouts from major health organizations. Using GPT-4 API in August 2025, we generated gender-specific revisions for a 55-year-old male and female audience via standardized prompts. We provided structured prompts instructing the model to include evidence-based, sex-specific risk factors and symptoms. Original and revised materials were evaluated using Flesch-Kincaid Reading Ease, a novel 10-point gender-inclusivity checklist, and qualitative thematic analysis. Findings GPT-4 revisions substantially improved gender-inclusivity scores (Original median: 3.0/10, IQR=0.0-3.0; Male-tailored median: 8.0, IQR=7.0-9.0; Female-tailored median: 10.0, IQR=10.0-10.0). Readability was maintained. However, qualitative analysis revealed that while female-tailored versions excelled at incorporating biological facts (e.g., menopause), male-tailored versions often missed key clinical factors. For instance, 4 of 7 revisions failed to mention erectile dysfunction as a CVD risk marker. Revisions also occasionally relied on social stereotypes (e.g., “bottling up emotions”). Furthermore, both versions showed evidence of linguistic bias (framing female symptoms as ‘atypical,’ thereby reinforcing the male-centric clinical paradigm) and gendered assumptions in recommended activities. Conclusion LLMs can rapidly improve gender-specificity in patient education but can also perpetuate harmful stereotypes and linguistic biases likely absorbed from their training data. Their use requires careful, critical oversight to avoid undermining and to potentially advance health equity. Why was this study done? Heart disease affects men and women differently, but most patient education materials tend to be the same for everyone. We wanted to see if Artificial Intelligence, specifically large language models like GPT-4, could help rewrite these materials to be more accurate and helpful for each gender. What did the researchers do and find? We took seven heart disease prevention online handouts from major public health and clinical organizations. We used GPT-4 to create new versions specifically for men and for women, including male-specific risk markers (e.g., erectile dysfunction) and female-specific risk enhancers (e.g., menopause). We found that the original handouts were not very gender-specific. The AI-revised versions were dramatically better, especially for women, providing more relevant information without making the text harder to read. However, we also found the AI sometimes made mistakes, like using stereotypes about how men handle emotions. What do these findings mean? This means AI can be a powerful assistant for creating drafts, but it’s not a replacement for human expertise. To be used safely, a healthcare professional must always check the AI’s work to ensure it is medically accurate and free from harmful stereotypes. Introduction Cardiovascular disease (CVD) remains the foremost cause of mortality globally, yet its manifestation, risk profiles, and optimal prevention strategies exhibit significant variation between sexes [ 1 , 2 ]. For instance, women are more likely to present with “atypical” symptoms such as fatigue, nausea, or back pain during a myocardial infarction, often leading to delays in diagnosis and treatment [ 3 , 4 ]. Biological factors, including the cardioprotective role of endogenous estrogen pre-menopause and its decline thereafter, fundamentally alter women’s CVD risk trajectory [ 5 ]. Conversely, men often develop CVD at a younger age and face distinct risk markers, such as erectile dysfunction [ 6 ]. Despite these well-documented differences, a substantial portion of patient-facing educational materials on CVD prevention remains gender-neutral [ 7 ]. This “one-size-fits-all” approach, while simplifying content creation, risks flattening critical nuances, potentially leaving patients ill-informed about their specific risks and symptoms. The field of health literacy has long advocated for tailored communication to improve comprehension and self-efficacy [ 8 ], yet creating multiple versions of materials is a resource-intensive process. The emergence of Large Language Models (LLMs) like GPT-4 offers a potential paradigm shift. These models can generate human-like text and follow complex instructions, presenting an unprecedented opportunity to automate the personalization of health information at scale [ 9 ]. Preliminary research has explored LLMs for improving readability and simplifying medical jargon [ 10 ], but their ability to address nuanced issues of gender representation and equity in clinical content remains largely unexplored. We hypothesized that GPT-4 can be strategically prompted to revise existing CVD prevention materials into gender-specific versions that are both highly readable and significantly more inclusive of evidence-based sex differences. Specifically, we aimed to: Quantify the extent of gender-specific content in widely used patient education handouts, and Evaluate the efficacy of GPT-4 in enhancing gender representation while preserving or improving readability. Our study therefore aims to serve as a critical, real-world evaluation of a prominent AI tool for a core digital health application: the scalable personalization of patient-facing communication, with further potential to support more equitable health education materials. Results 1. Quantitative Improvements in Gender-Specificity and Readability The seven original patient education handouts demonstrated moderate readability (Median Flesch Reading Ease = 64.0, IQR = 60.7-71.2). However, their performance on the gender-inclusivity checklist was poor (Median Score = 3.0, IQR = 0.0-3.0). Notably, materials from the CDC and NHS scored 0/10, containing no sex-specific content. MedLine Plus scored highest (6/10) but still omitted key differentiators like symptom variation. No original material explicitly discussed hormonal influences like testosterone’s role in male cardiovascular health or pregnancy-related risks for women. GPT-4 revision led to a dramatic and consistent improvement in gender-inclusivity scores for both male- and female-tailored versions ( Figure 1 ). The increase was most pronounced for female-tailored materials, which achieved a perfect median score of 10.0 (IQR = 10.0-10.0). All seven female-tailored versions addressed female-predominant symptoms, menopause-related risk, and hormonal influences. Male-tailored versions also showed significant improvement (Median Score = 8.0, IQR = 7.0-9.0), though they occasionally omitted specific factors like erectile dysfunction. View this table: View inline View popup Download powerpoint Table 1. Readability and Gender-Inclusivity Scores for Original and GPT-4 Revised Patient Education Materials. Download figure Open in new tab Figure 1. Boxplot of gender-inclusivity checklist scores (0-10 scale) for original, male-tailored, and female-tailored patient education materials. The female-tailored condition demonstrates near-perfect performance, with a median score of 10 and a tight interquartile range (IQR) at the maximum value. A single score of 9 falls below the IQR, indicated by the whisker . A Friedman test revealed a statistically significant difference in gender-inclusivity scores across the three conditions (χ 2 (2) = 14.0, p < 0.001). Post-hoc Nemenyi tests confirmed that both male-tailored (p = 0.020) and female-tailored (p < 0.001) scores were significantly higher than original scores. The difference between male and female-tailored scores was not statistically significant (p = 0.47). GPT-4 revisions not only enhanced gender-specificity but also maintained high readability. The median Flesch Reading Ease score was 73.2 (IQR = 68.8-75.6) for male-tailored and 70.3 (IQR = 67.8-73.8) for female-tailored versions. A separate Friedman test found no statistically significant difference in readability scores across the three conditions (χ 2 (2) = 5.43, p = 0.066), indicating that the process of adding complex, specific information did not compromise textual accessibility. 2. Qualitative Analysis Reveals Persistent Stereotypes and Linguistic Biases A qualitative analysis of the GPT-4 outputs uncovered instances where the model amplified societal biases. Linguistic Framing of Male-as-Default : In describing symptoms, the NHS female revision noted women experience “‘less common’ heart attack symptoms.” This framing positions the male clinical presentation as the norm and the female presentation as a deviation, potentially reinforcing diagnostic disparities. Omission of Male-Specific Biology : Despite explicit prompting, most male-tailored revisions failed to mention erectile dysfunction as a CVD risk marker or discuss the role of testosterone. Reinforcement of Social Stereotypes : The model occasionally substituted clinical facts with social tropes. For example, a male revision generated by GPT-4 stated, “Men are more likely to bottle up emotions, which can lead to stress-related health problems,” a non-evidence-based generalization. Gendered Assumptions in Preventive Actions : Recommendations for physical activity and overcoming barriers often reflected traditional gender roles. For example, when a gender-neutral original handout listed ‘housekeeping’ as an example of physical activity, this was retained only in its female-tailored GPT-4 revision. In contrast, a male-tailored revision of a different source suggested ‘affordable gyms,’ demonstrating how the model imposed gendered assumptions on neutral content. Discussion 4.1. Principal Findings Our study suggests that GPT-4 was highly effective at integrating evidence-based, sex-specific information into CVD prevention materials, with a particularly strong performance in creating content for women. This likely reflects a more substantial corpus of literature and public health messaging addressing the historical neglect of women’s cardiovascular health. The improvement in male-tailored content, while significant, was less complete, suggesting that nuanced men’s health topics are less prominently featured in the model’s training data. 4.2. Implications of Bias and Stereotype Perpetuation Our most critical finding is that the quantitative success of LLMs in adding content can mask profound qualitative failures in equity. GPT-4 did not merely reflect biases present in its training data; in several instances, it actively generated and amplified them. The consistent omission of erectile dysfunction as a CVD risk marker, despite explicit male-context prompting, constitutes a critical clinical oversight and reveals a gap in the model’s assimilation of men’s health literature. Phrases like “bottling up emotions” replace biomedical fact with social stereotypes. The description of female symptoms as “less common” is not merely a semantic issue but a manifestation of the androcentric bias historically prevalent in medicine, which can have tangible consequences for women’s care [ 11 ]. These biases likely stem from the model’s training on vast datasets that contain societal and medical biases. Without explicit correction, the model replicates these patterns. This indicates that using LLMs for health equity purposes should not be a passive process. It requires active, critical prompting and oversight. 4.3. Limitations and Future Directions This study has several limitations. The sample size of seven handouts, while diverse, is small. The evaluation relied on an ad hoc checklist developed for this study, informed by established literature. Our evaluation, while incorporating a structured checklist and qualitative analysis, did not include patient perspectives on the perceived empathy, trustworthiness, or usefulness of the AI-generated materials. We did not test the materials with actual patients to assess comprehension or behavior change, a vital next step. Future research should explore LLM customization for non-binary individuals, adaptation into other languages, and application to other disease areas. A key direction for future work is to develop and test “bias-aware” prompting strategies that explicitly instruct the model to avoid stereotypes and use patient-centric language. Furthermore, evaluation should include feedback from diverse patient groups to assess not just accuracy but also perceived empathy and relevance. 4.4. Conclusion In conclusion, LLMs like GPT-4 represent a powerful but double-edged sword for creating equitable health education. They can rapidly generate more personalized content but can also codify existing biases. Therefore, their implementation must be guided by a framework of critical digital health literacy, where AI assists in drafting, but human expertise (particularly in ethics and health equity) remains essential for editing, validation, and ensuring the promotion of authentic patient-centered care. Materials and Methods 1. Material Selection To simulate a patient’s information-seeking behavior, we conducted a systematic search using the Google search engine on August 17, 2025. The search query ‘how to prevent a heart attack’ was used. We included the top 7 unique English-language patient education handouts from the first page of results, representing major public health and clinical organizations (e.g., CDC, NHS, Mayo Clinic). This approach was chosen to capture the materials a typical patient is most likely to encounter during a common information-seeking behavior, thereby enhancing the external validity of our study. All original handouts are available in the OSF repository. 2. LLM Revision Process Each handout was processed using the OpenAI GPT-4 API on August 17, 2025 with the specified prompts. The chat completion API was used in a temporary (stateless) session to ensure that each prompt was processed independently without influence from previous interactions. The phrase ‘where clinically established’ was included in the prompts to constrain the model’s output to only those sex-specific differences supported by robust clinical evidence, thereby minimizing the generation of unverified or speculative content. We used the GPT-4 model with a temperature setting of 0 to maximize determinism and reproducibility. Prompts were refined through an iterative piloting process with three handouts to establish reliability. These pilot handouts were not included in the final analysis of the seven primary handouts. All outputs were saved verbatim. “Revise this heart disease prevention material for a 55-year-old male audience at a 6th-grade reading level. Where clinically established, explicitly include: Male-predominant symptom presentations Biological risk factors more common in men Prevention strategies with different efficacy/considerations for men Maintain strict medical accuracy and cite only evidence-based differences.” “Revise this heart disease prevention material for a 55-year-old female audience at a 6th-grade reading level. Where clinically established, explicitly include: Female-predominant symptom presentations Biological risk factors more common in women Prevention strategies with different efficacy/considerations for women Maintain strict medical accuracy and cite only evidence-based differences.” 3. Evaluation Metrics A novel 10-point gender-inclusivity checklist was developed de novo based on a comprehensive review of literature on sex-based differences in CVD [12, 13, 14, 15, 16, 17]. It consisted of 5 domains: (1) sex-specific symptoms, (2) biological risk factors, (3) hormonal influences, (4) sex-specific risk markers (e.g., erectile dysfunction), and (5) considerations for prevention strategies. Each domain was scored 0 (absent), 1 (mentioned but incomplete), or 2 (clearly and accurately described), for a total of 10 points. All materials were scored by a single reviewer using this structured, criteria-based checklist to ensure consistency in application. The objective nature of the checklist items (e.g., presence/absence of a specific risk factor) was designed to minimize subjective interpretation. 4. Data Analysis Descriptive statistics (medians and interquartile ranges (IQR)) were used to summarize readability and gender-inclusivity scores. Given the small sample size (N=7 handouts) and the ordinal nature of the checklist scores, the non-parametric Friedman test was used to compare gender-inclusivity scores across the three revision types (Original, Male-tailored, Female-tailored). A Friedman test was also performed on the Flesch Reading Ease scores. A statistically significant Friedman test was followed by post-hoc pairwise comparisons. Statistical analysis was performed using StatsKingdom.com. A p-value of < 0.05 was considered statistically significant. Data Availability Statement All data and materials are available in the OSF repository: https://doi.org/10.17605/OSF.IO/YNA6K References 1. ↵ Keteepe-Arachi T , Sharma S. Cardiovascular disease in Women: Understanding symptoms and risk factors . European Cardiology Review [Internet] . 2017 Jan 1; 12 ( 1 ): 10 . Available from: https://pmc.ncbi.nlm.nih.gov/articles/PMC6206467/ OpenUrl 2. ↵ Garcia M , Mulvagh SL , Merz CNB , Buring JE , Manson JE . Cardiovascular disease in women . Circulation Research [Internet] . 2016 Apr 14; 118 ( 8 ): 1273 – 93 . Available from: https://pmc.ncbi.nlm.nih.gov/articles/PMC4834856/ OpenUrl 3. ↵ DeVon HA , Mirzaei S , Zègre-Hemsey J. Typical and atypical symptoms of acute coronary syndrome: Time to retire the terms? Journal of the American Heart Association [Internet] . 2020 Mar 25; 9 ( 7 ): e015539 . Available from : doi: 10.1161/jaha.119.015539 OpenUrl CrossRef 4. ↵ Schulte KJ , Mayrovitz HN . Myocardial Infarction Signs and Symptoms: Females vs . Males. Cureus [Internet] . 2023 Apr 13; Available from: https://pmc.ncbi.nlm.nih.gov/articles/PMC10182740/ 5. ↵ Ryczkowska K , Adach W , Janikowski K , Banach M , Bielecka-Dabrowa A. Menopause and women’s cardiovascular health: is it really an obvious relationship? Archives of Medical Science [Internet] . 2022 Dec 10; 19 ( 2 ): 458 – 66 . Available from: https://pmc.ncbi.nlm.nih.gov/articles/PMC10074318/ OpenUrl 6. ↵ Yannas D , Frizza F , Vignozzi L , Corona G , Maggi M , Rastrelli G. Erectile dysfunction is a hallmark of cardiovascular disease: unavoidable matter of fact or opportunity to improve men’s health? Journal of Clinical Medicine [Internet] . 2021 May 20; 10 ( 10 ): 2221 . Available from: https://pmc.ncbi.nlm.nih.gov/articles/PMC8161068/ OpenUrl 7. ↵ Hilleary RS , Jabusch SM , Zheng B , Jiroutek MR , Carter CA . Gender disparities in patient education provided during patient visits with a diagnosis of coronary heart disease . Women S Health [Internet] . 2019 Jan 1; 15 : 1745506519845591 . Available from: https://pmc.ncbi.nlm.nih.gov/articles/PMC6535750/ OpenUrl 8. ↵ Bhattad PB , Pacifico L. Empowering patients: Promoting patient education and health literacy . Cureus [Internet] . 2022 Jul 27; 14 ( 7 ): e27336 . Available from: https://pmc.ncbi.nlm.nih.gov/articles/PMC9411825/ OpenUrl 9. ↵ Aydin S , Karabacak M , Vlachos V , Margetis K. Large language models in patient education: a scoping review of applications in medicine . Frontiers in Medicine [Internet] . 2024 Oct 29; 11 : 1477898 . Available from: https://pmc.ncbi.nlm.nih.gov/articles/PMC11554522/ OpenUrl 10. ↵ Pal A , Wangmo T , Bharadia T , Ahmed-Richards M , Bhanderi M , Kachhadiya R , et al. Generative AI/LLMS for plain language medical information for patients, caregivers and general public: Opportunities, risks and ethics . Patient Preference and Adherence [Internet] . 2025 Jul 1 ;Volume 19 : 2227 – 49 . Available from: https://pmc.ncbi.nlm.nih.gov/articles/PMC12325106/ OpenUrl 11. ↵ Ferry AV , Anand A , Strachan FE , Mooney L , Stewart SD , Marshall L , et al. Presenting symptoms in men and women diagnosed with myocardial infarction using Sex-Specific criteria . Journal of the American Heart Association [Internet] . 2019 Aug 21; 8 ( 17 ): e012307 . Available from: https://pubmed.ncbi.nlm.nih.gov/31431112/ OpenUrl 12. Hamid AA , Beckett R , Wilson M , Jalal Z , Cheema E , Obe DAJ , et al. Gender Bias in Diagnosis, Prevention, and Treatment of Cardiovascular Diseases: A Systematic review . Cureus [Internet] . 2024 Feb 15; 16 ( 2 ): e54264 . Available from : doi: 10.7759/cureus.54264 OpenUrl CrossRef 13. Gao Z , Chen Z , Sun A , Deng X. Gender differences in cardiovascular disease . Medicine in Novel Technology and Devices [Internet] . 2019 Dec 1; 4 : 100025 . Available from : doi: 10.1016/j.medntd.2019.100025 OpenUrl CrossRef 14. Rajendran A , Minhas AS , Kazzi B , Varma B , Choi E , Thakkar A , et al. Sex-specific differences in cardiovascular risk factors and implications for cardiovascular disease prevention in women . Atherosclerosis [Internet] . 2023 Sep 4; 384 : 117269 . Available from : doi: 10.1016/j.atherosclerosis.2023.117269 OpenUrl CrossRef PubMed 15. Rodgers JL , Jones J , Bolleddu SI , Vanthenapalli S , Rodgers LE , Shah K , et al. Cardiovascular Risks Associated with Gender and Aging . Journal of Cardiovascular Development and Disease [Internet] . 2019 Apr 27; 6 ( 2 ): 19 . Available from : doi: 10.3390/jcdd6020019 OpenUrl CrossRef PubMed 16. Saeed A , Kampangkaew J , Nambi V. Prevention of cardiovascular disease in women . Methodist DeBakey Cardiovascular Journal [Internet] . 2017 Oct 1; 13 ( 4 ): 185 . Available from : doi: 10.14797/mdcj-13-4-185 OpenUrl CrossRef 17. Suman S , Pravalika J , Manjula P , Farooq U. Gender and CVD-does it really matters? Current Problems in Cardiology [Internet] . 2023 Jan 21; 48 ( 5 ): 101604 . Available from : doi: 10.1016/j.cpcardiol.2023.101604 OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted November 20, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following The Promise and Peril of Large Language Models in Digital Health: GPT-4 Personalizes Cardiovascular Patient Education but Amplifies Gender Biases Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share The Promise and Peril of Large Language Models in Digital Health: GPT-4 Personalizes Cardiovascular Patient Education but Amplifies Gender Biases Samah Khan , Georgeta Vaidean medRxiv 2025.11.19.25340616; doi: https://doi.org/10.1101/2025.11.19.25340616 Share This Article: Copy Citation Tools The Promise and Peril of Large Language Models in Digital Health: GPT-4 Personalizes Cardiovascular Patient Education but Amplifies Gender Biases Samah Khan , Georgeta Vaidean medRxiv 2025.11.19.25340616; doi: https://doi.org/10.1101/2025.11.19.25340616 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4536) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (541) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00c8c268844e748',t:'MTc3OTYyODI5OQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00