Sociodemographic Bias in Large Language Model Clinical Trial Screening

doi:10.1101/2025.11.15.25340177

Sociodemographic Bias in Large Language Model Clinical Trial Screening

2025 · doi:10.1101/2025.11.15.25340177

preprint OA: closed CC-BY-NC-ND-4.0

📄 Open PDF Full text JSON View at publisher

Full text 27,145 characters · extracted from preprint-html · click to expand

Sociodemographic Bias in Large Language Model Clinical Trial Screening | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Sociodemographic Bias in Large Language Model Clinical Trial Screening View ORCID Profile Shelly Soffer , View ORCID Profile Mahmud Omar , View ORCID Profile Orly Efros , View ORCID Profile Donald U. Apakama , View ORCID Profile Aya Mudrik , Robert Freeman , View ORCID Profile Girish N Nadkarni , View ORCID Profile Eyal Klang doi: https://doi.org/10.1101/2025.11.15.25340177 Shelly Soffer 1 Institute of Hematology, Davidoff Cancer Center, Rabin Medical Center ; Petah Tikva, Israel 2 Gray School of Medicine, Tel Aviv University , Tel Aviv, Israel MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Shelly Soffer For correspondence: Soffer.shelly{at}gmail.com Eyal.Klang{at}mountsinai.org Mahmud Omar 3 Windreich Department of AI and Human Health, Icahn School of Medicine at Mount Sinai , 1 Gustave L. Levy Place; New York, NY 10029, United States 4 The Hasso Plattner Institute for Digital Health at Mount Sinai, Department of Medicine, Icahn School of Medicine at Mount Sinai ; New York, NY 10019, United States MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Mahmud Omar Orly Efros 2 Gray School of Medicine, Tel Aviv University , Tel Aviv, Israel 5 National Hemophilia Center and Thrombosis & Hemostasis Institute, Sheba Medical Center , Ramat Gan, Israel MD, MHA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Orly Efros Donald U. Apakama 3 Windreich Department of AI and Human Health, Icahn School of Medicine at Mount Sinai , 1 Gustave L. Levy Place; New York, NY 10029, United States 4 The Hasso Plattner Institute for Digital Health at Mount Sinai, Department of Medicine, Icahn School of Medicine at Mount Sinai ; New York, NY 10019, United States 6 Institute for Health Equity Research, Icahn School of Medicine at Mount Sinai New York ; NY, United States 7 Department of Emergency Medicine, Icahn School of Medicine at Mount Sinai New York ; NY, United States MD, MS Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Donald U. Apakama Aya Mudrik 8 Ben-Gurion University of the Negev ; Be’er Sheva, Israel MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Aya Mudrik Robert Freeman 3 Windreich Department of AI and Human Health, Icahn School of Medicine at Mount Sinai , 1 Gustave L. Levy Place; New York, NY 10029, United States 4 The Hasso Plattner Institute for Digital Health at Mount Sinai, Department of Medicine, Icahn School of Medicine at Mount Sinai ; New York, NY 10019, United States RN Find this author on Google Scholar Find this author on PubMed Search for this author on this site Girish N Nadkarni 3 Windreich Department of AI and Human Health, Icahn School of Medicine at Mount Sinai , 1 Gustave L. Levy Place; New York, NY 10029, United States 4 The Hasso Plattner Institute for Digital Health at Mount Sinai, Department of Medicine, Icahn School of Medicine at Mount Sinai ; New York, NY 10019, United States MD, MPH Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Girish N Nadkarni Eyal Klang 3 Windreich Department of AI and Human Health, Icahn School of Medicine at Mount Sinai , 1 Gustave L. Levy Place; New York, NY 10029, United States 4 The Hasso Plattner Institute for Digital Health at Mount Sinai, Department of Medicine, Icahn School of Medicine at Mount Sinai ; New York, NY 10019, United States MD Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Eyal Klang For correspondence: Soffer.shelly{at}gmail.com Eyal.Klang{at}mountsinai.org Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Background Large language models (LLMs) are increasingly used in randomized clinical trial (RCT) screening, but their potential for sociodemographic bias remains unclear. Objective To determine whether LLM-based trial screening judgments vary with patient sociodemographic characteristics when clinical details and eligibility criteria are held constant. Design, Setting, and Participants Cross-sectional evaluation of Phase II–III RCT protocols from ClinicalTrials.gov (U.S. adult populations; 2023–2024). For each protocol, we created 15 physician-validated clinical vignettes rendered in 34 versions: one control (no identifiers) and 33 identity variants spanning gender, race/ethnicity, socioeconomic status, homelessness, unemployment, and sexual orientation. Exposures Identity labels applied to otherwise identical vignettes, evaluated by nine contemporary LLMs. Main Outcomes and Measures Primary eligibility domain score (1–5 Likert scale) comparing identity variants versus control. Secondary: adherence, resources, risk–benefit, and trust/attitude domains. Mixed-effects models estimated adjusted mean differences with multiplicity-corrected P values; differences <.10 considered trivial. Results Of 69 protocols, 58 met inclusion criteria. Analysis of 5,324,400 model evaluations showed eligibility judgments were largely stable: most identity-related differences fell within ±0.05 (transgender woman −.008 [95% CI −.04 to .02]; White male .036 [.01 to .07]). Only homelessness exceeded the trivial threshold (−.121 [−.15 to −.09], P<.001). Secondary domains revealed socioeconomic gradients, particularly for adherence (homeless −.595, P<.001) and resources (homeless −.715, P<.001), with smaller trust/attitude effects and negligible risk–benefit differences. Conclusions and Relevance Bias in LLM–assisted trial screening is conditional. Within fixed criteria, models reason consistently; outside them, they echo the inequities of their data. Responsible deployment in clinical research depends on preserving that boundary so that automation strengthens fairness in trial access rather than inheriting distortion. Introduction LLMs are increasingly used for randomized controlled trial (RCT) workflows, from cohort selection to eligibility screening 1 – 4 . Early systems have achieved high accuracy on cohort-selection benchmarks and can generate clinician-interpretable rationales 3 . Yet, parallel studies in clinical decision support and patient-facing tools report systematic sociodemographic bias in LLM outputs 5 – 9 . These two trends point in opposite directions: rising performance and rising concern. If trial automation inherits bias, it could quietly distort who gains access to experimental therapies. We therefore asked whether LLM-based screening decisions change with patient sociodemographic descriptors when clinical details and trial criteria remain identical. We explored this by applying multiple LLM systems to Phase II–III trial protocols from ClinicalTrials.gov . Methods Trial Protocols Study design is shown in Figure 1 . Download figure Open in new tab Figure 1. Study Design. (a) Generation and validation. We selected 58 U.S. adult Phase II–III RCT protocols and generated 15 standardized vignettes per trial (5 eligible, 5 ineligible, 5 borderline), then created 34 identity variations per vignette; two physicians independently validated the materials. (b) Prompted runs. Each vignette–identity combination was evaluated by an ensemble of 9 LLMs, yielding more than 5 million prompted runs. (c) Analysis. Models answered 20 structured screening questions across five domains (primary outcome: Eligibility; secondary: Risk–Benefit, Adherence & Retention, Resource Sufficiency, Trust & Attitude). Outputs were parsed programmatically, and linear mixed-effects models estimated adjusted mean differences versus the matched control vignette per domain and identity. Abbreviations: LLM, large language model; RCT, randomized clinical trial. We sampled Phase II–III trials in adult populations (≥ 18 years) registered in the United States on ClinicalTrials.gov . Trials were eligible if they were initiated or updated during 2023–2024 and included a publicly available study protocol. Case Generation For each trial protocol, we used Claude 3.5 Sonnet to generate 15 standardized clinical vignettes: five clearly eligible, five clearly ineligible, and five borderline cases requiring clinical judgment without explicit exclusion violations. All vignettes were strictly clinical, omitted sociodemographic cues, and used gender-neutral language. Prompt design, including vignette classes, formatting rules, and three exemplar cases is detailed in Supplementary Section 1 . Two board-certified physicians ( SS and MO ) independently reviewed all vignettes for medical accuracy and neutrality before model testing. Sociodemographic Perturbations For each vignette, we first created a control version without sociodemographic identifiers (retaining age as a clinical variable). We then generated 33 identity variations covering gender, race or ethnicity, socioeconomic status indicators (high-income, low-income, unemployed, homelessness), and sexual orientation. The complete identifier list and selection logic are in Supplementary Section 2 . Questions Posed to LLMs Models answered 20 questions grouped into five domains: eligibility likelihood; risk–benefit perception adherence; resource sufficiency; and trust (the complete list of questions appears in Supplementary Section 3 ). Each model received (a) the full protocol (b) the vignette (control or identity-perturbed) (c) the question ( Supplementary Section 4 ). Models We evaluated 9 contemporary LLMs ( Supplementary Section 5 ). Because Claude 3.5 Sonnet was used in the vignette-generation pipeline, it was excluded from the evaluation phase to avoid circularity. Primary Outcome Our primary endpoint was the Eligibility domain score, comparing each sociodemographic variation against the control vignette for the same case and protocol. Secondary domains captured Adherence, Resources, Risk–Benefit, and Trust/Attitude. Statistical analysis We estimated group-wise differences with linear mixed-effects models, treating identity label as a fixed effect and including random effects to account for repeated observations by vignette and by model. For each domain, we computed the adjusted mean difference for each identity variant versus the control vignette (identity-free), with 95% Wald confidence intervals and Benjamini–Hochberg–adjusted P values for multiplicity within domain. For interpretation tied to screening relevance, differences <0.10 on the 1– 5 scale were considered trivially small. Results Overview Of 69 RCT protocols retrieved from ClinicalTrials.gov , 58 met inclusion criteria and were analyzed. For each protocol we generated 15 vignettes and queried 9 LLMs across 34 sociodemographic identity labels (including a control), yielding 5,324,400 model–question evaluations. The most common disease areas were oncology (n=13), infectious diseases (n=12), and neurology/pain (n=7) ( Supplement section 6 ). Primary Outcome: Eligibility Across identity groups, eligibility differences were small ( Figure 2A ). Most adjusted contrasts fell within ±0.05. Examples include transgender woman (–0.008; 95% CI –0.04 to 0.02; p=1.00) and a modest positive shift for a White male (0.036; 95% CI 0.01 to 0.07; p=.024). Race and ethnicity labels alone were close to zero once socioeconomic status (SES) was accounted for [e.g., Black (0.008; 95% CI –0.02 to 0.04; p=1.00), Asian (–0.004; 95% CI –0.04 to 0.03; p=1.00), White (0.024; 95% CI –0.01 to 0.06; p=.559)]. The largest deviation was observed for homelessness (–0.121; 95% CI –0.15 to –0.09; p<.001). Using a ±0.10 margin as the threshold for practical relevance, eligibility differences were trivial for nearly all identity labels. Download figure Open in new tab Download figure Open in new tab Download figure Open in new tab Download figure Open in new tab Download figure Open in new tab Figure 2. Identity-level effects across 5 domains. A, Eligibility; B, Adherence & Retention; C, Resource Sufficiency/Burden; D, Risk–Benefit; E, Trust & Attitude. Points show the mixed-effects model–adjusted mean difference on a 1–5 scale comparing identity-perturbed with control vignettes for each identity. Secondary Outcomes Adherence. An SES gradient was evident: Homeless (–0.595; 95% CI –0.61 to –0.58; p<.001), Black Homeless (–0.542; 95% CI –0.56 to –0.53; p<.001), Low-income (–0.138; 95% CI –0.16 to –0.12; p<.001), and High-income (0.087; 95% CI 0.07 to 0.11; p<.001) ( Figure 2B ). Item effects clustered in adherence/retention and resource items (homeless −0.54 to −1.04; high income +0.11 to +0.29). Resources. This domain showed the largest SES dispersion: Homeless (–0.715; 95% CI –0.73 to –0.70; p<.001), Black Homeless (–0.689; 95% CI –0.71 to –0.67; p<.001), Low-income (–0.207; 95% CI –0.22 to –0.19; p<.001), High-income (0.129; 95% CI 0.11 to 0.15; p<.001). ( Figure 2C ). Trust/Attitude. Differences were smaller but SES-linked: (0.053; 95% CI 0.04 to 0.07; p<.001); Low-income (–0.103; 95% CI –0.12 to –0.09; p<.001); Homeless (–0.337; 95% CI –0.35 to –0.33; p<.001); Black Homeless (–0.372; 95% CI –0.38 to –0.36; p<.001) ( Figure 2D ). Risk–Benefit. Effects centered near zero (range, –0.086 to 0.023); no label reached ≥0.10. Homeless was most negative (–0.086; 95% CI –0.11 to –0.06; p<.001) and High-income slightly positive (0.023; 95% CI 0.00 to 0.05; p=.058) ( Figure 2E ). Aggregated, domain-level results are in the Supplement Table 1, Supplement Table 2 , and Supplement Figure 1 . Discussion Early evidence suggested that language models inherit human bias across medical tasks 5 – 9 . We expected similar distortion in trial screening, where eligibility decisions can quietly determine who gains access to therapy. Yet, under controlled conditions, eligibility judgments proved largely stable. When the clinical content and protocol were fixed, identity alone rarely changed the outcome. This finding contrasts with prior studies showing demographic disparities in diagnostic reasoning, triage recommendations, and note generation 5 , 7 , 8 . Omar et al. reported higher odds of urgent triage and inpatient referral for marginalized groups, and Zack et al. observed racial and gender differentials in GPT-class reasoning tasks 7 , 8 . In contrast, our results suggest that when models are confined to explicit trial criteria, outputs remain consistent across identities. Bias reappeared only when prompts extended beyond formal rules. In adherence, resources, and trust, socioeconomic cues, especially homelessness and low income, lowered scores, echoing societal hierarchies. These domains invite inference about behavior and capacity, and there the models mirror the patterns they have learned. The implication is clear: the risk lies not in eligibility logic but in its periphery. If these “soft” judgments influence recruitment or resource allocation, disparities may re-enter through operational pathways rather than inclusion criteria. The appropriate safeguard is separation: keep eligibility tethered to protocol language and treat auxiliary domains as planning variables, not filters. Our scope differs from recent equity work in clinical trial matching and medical question answering 10 . That study introduced sociodemographic cues into prompts and found group-dependent changes in ranking and QA performance, motivating mitigation at those stages. Here, we held protocols constant, stripped prompts of social determinants of health (SDOH) content, and examined eligibility judgments as the endpoint. Several limitations qualify these findings. We analyzed U.S. adult Phase II–III trials from 2023–2024, which may not generalize to pediatric or international contexts. Socioeconomic descriptors were simplified proxies, and standardized vignettes cannot fully capture clinical nuance. Models were tested at single time points using default configurations, and newer iterations could behave differently. In conclusion, bias in large language model–assisted trial screening is conditional. Within fixed criteria, models reason consistently; outside them, they echo the inequities of their data. Responsible deployment in clinical research depends on preserving that boundary so that automation strengthens fairness in trial access rather than inheriting distortion. Data Availability All data analyzed in this study were obtained from publicly accessible sources (ClinicalTrials.gov) and are available online. Authors’ Contributions Dr Soffer and Dr Mahmud had full access to all the data in the study and takes responsibility for the integrity of the data and the accuracy of the data analysis. Concept and design: Soffer, Omar, Nadkarni, Klang. Acquisition, analysis, or interpretation of data: Soffer, Omar, Klang. Drafting of the manuscript: Soffer, Omar, Klang. Critical revision of the manuscript for important intellectual content: All authors. Statistical analysis: Soffer, Omar. Supervision: Klang, Nadkarni. Conflict of Interest Disclosures None reported. Funding/Support This work was partly supported by the Clinical and Translational Science Awards (CTSA) grant UL1TR004419 from the National Center for Advancing Translational Sciences. Data Sharing Statement All data analyzed in this study were obtained from publicly accessible sources ( ClinicalTrials.gov ) and are available online. Acknowledgment References 1. ↵ Gupta S , Basu A , Nievas M , et al. PRISM: Patient Records Interpretation for Semantic clinical trial Matching system using large language models . NPJ Digit Med . 2024 ; 7 ( 1 ): 305 . doi: 10.1038/s41746-024-01274-7 OpenUrl CrossRef PubMed 2. Jin Q , Wang Z , Floudas CS , et al. Matching patients to clinical trials with large language models . Nat Commun . 2024 ; 15 ( 1 ): 9074 . doi: 10.1038/s41467-024-53081-z OpenUrl CrossRef PubMed 3. ↵ Wornow M , Lozano A , Dash D , Jindal J , Mahaffey KW , Shah NH . Zero-Shot Clinical Trial Patient Matching with LLMs . NEJM AI . 2025 ; 2 ( 1 ). doi: 10.1056/AIcs2400360 OpenUrl CrossRef 4. ↵ Srinivasan A , Berkowitz J , Friedrich NA , Kivelson S , Tatonetti NP . Large Language Model Analysis of Reporting Quality of Randomized Clinical Trial Articles: A Systematic Review . JAMA Netw Open . 2025 ; 8 ( 8 ): e2529418 . doi: 10.1001/jamanetworkopen.2025.29418 OpenUrl CrossRef 5. ↵ Omar M , Sorin V , Agbareia R , et al. Evaluating and addressing demographic disparities in medical large language models: a systematic review . Int J Equity Health . 2025 ; 24 ( 1 ): 57 . doi: 10.1186/s12939-025-02419-0 OpenUrl CrossRef PubMed 6. Levartovsky A , Omar M , Nadkarni GN , Kopylov U , Klang E. Sociodemographic Bias in Large Language Model–Assisted Gastroenterology . JAMA Netw Open . 2025 ; 8 ( 9 ): e2532692 . doi: 10.1001/jamanetworkopen.2025.32692 OpenUrl CrossRef PubMed 7. ↵ Omar M , Soffer S , Agbareia R , et al. Sociodemographic biases in medical decision making by large language models . Nat Med . 2025 ; 31 ( 6 ): 1873 – 1881 . doi: 10.1038/s41591-025-03626-6 OpenUrl CrossRef PubMed 8. ↵ Zack T , Lehman E , Suzgun M , et al. Assessing the potential of GPT-4 to perpetuate racial and gender biases in health care: a model evaluation study . Lancet Digit Health . 2024 ; 6 ( 1 ): e12 – e22 . doi: 10.1016/S2589-7500(23)00225-X OpenUrl CrossRef 9. ↵ Chin MH , Afsar-Manesh N , Bierman AS , et al. Guiding Principles to Address the Impact of Algorithm Bias on Racial and Ethnic Disparities in Health and Health Care . JAMA Netw Open . 2023 ; 6 ( 12 ): e2345050 . doi: 10.1001/jamanetworkopen.2023.45050 OpenUrl CrossRef 10. ↵ Ji Y , Ma W , Sivarajkumar S , et al. Mitigating the risk of health inequity exacerbated by large language models . NPJ Digit Med . 2025 ; 8 ( 1 ): 246 . doi: 10.1038/s41746-025-01576-4 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted November 17, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Sociodemographic Bias in Large Language Model Clinical Trial Screening Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Sociodemographic Bias in Large Language Model Clinical Trial Screening Shelly Soffer , Mahmud Omar , Orly Efros , Donald U. Apakama , Aya Mudrik , Robert Freeman , Girish N Nadkarni , Eyal Klang medRxiv 2025.11.15.25340177; doi: https://doi.org/10.1101/2025.11.15.25340177 Share This Article: Copy Citation Tools Sociodemographic Bias in Large Language Model Clinical Trial Screening Shelly Soffer , Mahmud Omar , Orly Efros , Donald U. Apakama , Aya Mudrik , Robert Freeman , Girish N Nadkarni , Eyal Klang medRxiv 2025.11.15.25340177; doi: https://doi.org/10.1101/2025.11.15.25340177 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4536) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (541) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00b0db7aecd8e2e',t:'MTc3OTYxMjYzNA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-24T02:00:01.246996+00:00

License: CC-BY-NC-ND-4.0