Patient-Reported Challenges in Lymphoma Diagnosis: Analysis of Online Forum Narratives Using Artificial Intelligence

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 46,806 characters · extracted from preprint-html · click to expand
Patient-Reported Challenges in Lymphoma Diagnosis: Analysis of Online Forum Narratives Using Artificial Intelligence | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Patient-Reported Challenges in Lymphoma Diagnosis: Analysis of Online Forum Narratives Using Artificial Intelligence View ORCID Profile Feng He , View ORCID Profile Jose M Valderas doi: https://doi.org/10.1101/2025.09.07.25335273 Feng He a Centre for Research in Health Systems Performance (CRiHSP), National University of Singapore , Singapore Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Feng He For correspondence: feng.he{at}u.nus.edu Jose M Valderas a Centre for Research in Health Systems Performance (CRiHSP), National University of Singapore , Singapore Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jose M Valderas Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF ABSTRACT Background Lymphoma diagnosis remains challenging due to diverse subtypes and nonspecific presentations. While prior research focused primarily on clinical accuracy, how patients experience and describe these challenges remain understudied. This study systematically analyzed online patient narratives to investigate their perspectives on diagnostic difficulties. Methods We developed an artificial intelligence (AI) pipeline integrating DeepSeek large language model, optical character recognition, and transformer-embedded keyword clustering to analyse online narratives reporting diagnostic discrepancies or misdiagnosis from China’s largest lymphoma forum for patients and caregivers (house086.com). The pipeline extracted patient demographics, timelines, diagnostic barriers, facilitators, outcomes, and AI-graded severity. External validation (n=400) against manually-derived labels assessed pipeline reliability. Multivariable logistic regression examined associations between barriers, facilitators, and the binarized severity scores. Results Over the study period (2011-2025), patients reporting diagnostic difficulties doubled, while the rate of severe outcomes declined. From 2016 narratives (median patient age 47; 59% family-authored), AI-assisted keyword taxonomy identified 7 diagnostic facilitators, 11 barriers, and 5 consequences. Psychological distress was the most common consequence of diagnostic challenges (90%). Clinician-related issues (91%) and case complexity (77%) were the most prevalent barriers, but inappropriate initial treatment conferred the greatest risk (OR 19.06, 11.30-32.17). Among facilitators, specialist input reduced severe outcomes by 40% (OR 0.60, 0.44-0.81), while peer networks (OR 0.62) and clinician expertise (OR 0.65) provided additional protection. Conclusion This large-scale analysis of patient narratives identified factors underlying patient-perceived diagnostic difficulties in lymphoma. AI enables scalable analysis of patient-generated data, offering insights into targeted quality improvement and digital health interventions in patient safety. INTRODUCTION Lymphoma accounts for 3-4% of all cancers worldwide, with more than half a million new cases reported annually [ 1 ]. Accurate diagnosis of lymphoma is a major challenge in clinical practice, mainly because of its diverse subtypes and nonspecific symptoms in early stages [ 2 ]. Expert reviews in the United States, United Kingdom, and Europe have reported diagnostic change in 5.8-27.3% of the referred cases [ 2 – 4 ], while studies in China documented rates as high as 39.8% [ 5 ]. These diagnostic uncertainties can delay appropriate care, expose patients to harmful treatment, and may contribute to long-term health disparities [ 3 , 6 ]. Prior research has focused on accuracy improvement and error prevention through clinician training, advanced imaging, or pathology support [ 3 – 8 ], but little is known about how patients themselves cope with diagnostic difficulties and uncertainties. Treatment navigation often requires patients and families to advocate for second opinions, pursue tertiary care, and endure financial and psychological strain [ 6 , 9 ], as overturning an initial diagnosis can be difficult for clinicians and institutional support is often lacking. Global Patient Survey (GPS, 2024) of 11,170 lymphoma patients and caregivers by Lymphoma Coalition further highlighted ongoing gaps in communication, shared decision-making, and psychosocial support across countries, underscoring the need to better understand patient experiences during the diagnostic process [citation]. Patient narratives provide critical insights into these processes, but conventional surveys and registries capture them only at small scale and limited depth [ 10 – 12 ]. Recent advances in artificial intelligence (AI), particularly large language models (LLMs), enable scalable and systematic analyses of patient-generated data from social media [ 13 – 15 ]. While these self-reported narratives inherently carry limitations regarding clinical verification [ 10 ], their vast scale and richness provide a complementary, patient-cantered perspective to existing clinical literature [ 10 , 16 ]. Applying LLMs to these narratives may therefore help identify factors associated with patient-perceived diagnostic difficulties, providing actionable insights into service delivery gaps [ 13 – 16 ]. Building on this, the present study aimed to (1) systematically identify patient-reported barriers and facilitators in lymphoma diagnosis, and (2) quantify the influence of specific barriers and facilitators on the severity of patient-perceived consequences, using an AI-driven analysis of online forum narratives. METHODS Study Design and Data Collection We conducted a retrospective cross-sectional study of patient narratives from House086.com, China’s largest online forum dedicated to lymphoma patients and caregivers [ 13 ]. To capture diverse patient terminology related to diagnostic difficulties (e.g., misdiagnosis, diagnostic error, pathological discrepancy), a keyword list was iteratively developed through exploratory manual review ( Supplementary Table S1 ). All relevant posts published between April 2011 and October 2025 were then identified through keyword searches and manually reviewed to generate a curated list of candidate webpage links (URLs) at the time of data collection (June-October, 2025). To retrieve and process raw webpage content from URLs, we developed an AI-based extraction pipeline ( Figure 1 ), with optical character recognition (easyOCR [ 17 ]) to extract texts from attached medical report images. Text and OCR-derived content were analysed by DeepSeek, an LLM with strong performance in Chinese text comprehension [ 18 ]. From multi-user discussion threads, DeepSeek identified users with potential diagnostic difficulties, then standardized their narratives into structured case profiles for quantitative analysis. Download figure Open in new tab Figure 1. AI Pipeline for Extracting and Structuring Patient Forum Narratives. Forum posts from House086.com were curated through keyword searches and manual review into a list of webpage links. Text contents were parsed from the HTML source, and text appearing within attached medical images was transcribed using optical character recognition. All textual information was analysed by a large language model (DeepSeek), which identified individual users reporting potential diagnostic challenges and converted their narratives into structured case profiles capturing demographics, diagnostic history, and patient-reported barriers, facilitators, and consequences. A predefined keyword taxonomy, refined through embedding-based semantic clustering, guided the classification of keywords. Quality scores (1–5) were assigned to each case; a stratified 20% subsample underwent independent human validation for both case inclusion and outcome severity. Keyword Extraction and Taxonomy Given the heterogeneity in content and style of patient narratives, the LLM was initially allowed to freely propose salient keywords for patient-reported diagnostic facilitators, barriers, and consequences during pilot analysis. The resulted keywords were mostly Chinese colloquial expressions and sometimes, English medical abbreviations. Hence we generated word embeddings using a multilingual sentence-transformer model (paraphrase-multilingual-MiniLM-L12-v2) to capture contextual and cross-lingual semantic similarity. Agglomerative hierarchical clustering with ward distance was then applied to group semantically related terms. Resulting clusters were manually reviewed and consolidated further into 7 facilitators (e.g., specialist input, family support, peer network), 11 barriers (e.g., case complexity, specimen issues, clinician related issues), and 5 consequences (disease progression, treatment-related harm, psychological distress, loss of time, and financial strain) based on clinical literature and pilot analysis. In formal analyses, the LLM with our predefined keyword taxonomy as prompt was allowed to choose up to three keywords per category (barriers, facilitators, and consequences) for each patient narrative. Variable Definitions and Outcome Patient-reported diagnostic challenges were defined as the description of misdiagnosis or diagnostic errors directly, or providing sufficient evidence of a diagnosis that was later contradicted or revised, as determined by the LLM from context. Username, user type (patient, family member, or others), and posting year were directly parsed from the HTML content, whereas patient demographics (age, sex), diagnostic encounters (date, hospital, diagnosis), and patient experience (barriers, facilitators, and consequences) were inferred directly from post contents by the LLM. For text quality control, a confidence score (1-5) was assigned by LLM to each narrative. For the primary outcome, an overall severity of patient-reported consequences was graded by LLM on a scale of 1 to 5, with 5 indicating the worst consequences. Due to score concentration, we binarized the outcome into severe consequences (score ≥4) versus moderate-or-lower severity (≤3). A secondary outcome was defined as reporting clinically relevant consequences (disease progression or treatment-related harm) in those having a severity score ≥4. Data Preprocessing The inclusion and exclusion criteria workflow was summarized in Figure 2 . From over three million forum posts, 15776 were returned by keyword search. After manual review, 13220 posts irrelevant to the study scope or duplicates of already selected posts were excluded. The remaining 2613 URLs for AI processing contributed 2865 narratives related to diagnostic difficulties. Of these, 849 narratives were excluded as they lacked meaningful description of the diagnostic history (n=1), flagged by DeepSeek as borderline case profiles (n=355), or had low content quality (confidence score ≤2, n=16). For multiple narratives from the same user, only the most recent and highest-quality version was retained (excluding n=423). Cases reporting single diagnostic encounter were manually reviewed and dropped if no evidence of diagnostic discrepancy was retrieved from the post (n=50). The final analytic dataset comprised 2016 unique profiles for analyses. For the secondary outcome, a refined subset of 917 high-quality profiles (confidence score ≥4, with known patient sex, known delay duration, and multiple encounters) was used. Download figure Open in new tab Figure 2. Data Cleaning Workflow for Patient Forum Posts. From more than three million posts, irrelevant entries were excluded using keyword screening and manual review. Candidate posts were processed through the AI pipeline to extract diagnosis-related details. After quality control and de-duplication, 2016 unique case profiles were retained for analysis. Download figure Open in new tab Figure 3. Barriers and Facilitators Associated With Patient-Reported Outcomes Left: Forest plot from multivariable logistic regression showing odds ratios (95% CI) for each barrier and facilitator in relation to unfavorable outcome (defined as severity ≥4 in the primary model, additionally with the presence of clinically relevant consequences in the secondary model). Filled dots indicate p ≤ 0.05; open dots indicate p > 0.05. Right: Frequency of barriers and facilitators across the analytic cohort (n = 2016). Bars show the proportion of patient narratives mentioning each factor, with barriers in red and facilitators in blue, ordered by descending frequency within each category. Due to the anonymous nature of forum discussions, missing data was found in patient age, sex, delay duration, and the number of encounters. To preserve the sample size and possible information, all missing values were as a separate category (“Not Disclosed”), with continuous variables converted to categorical ones for quantitative analysis. Human Validation To validate the reliability of AI-assisted data extraction, an external human validation was performed by three healthcare professionals fluent in Chinese. A stratified random subsample of 400 cases (approximately 20% of study population) was independently reviewed by coders with access to the original websites for discussion texts and uploaded medical records (including pathology reports, clinical notes, and treatment records where available). For case inclusion validation, each reviewer independently assessed whether each case genuinely reported diagnostic difficulties, defined as patient-reported misdiagnosis, or discrepancies between initial diagnosis and subsequent opinions. Final human labels were determined by majority vote. Positive predictive value (PPV) was computed by comparing human labels against AI classifications. For severity validation, human coders, blind to AI outputs, independently graded consequence severity on the same 1-5 scale. A median human score was computed for each case. Agreement between median human ratings and AI-generated scores was assessed using Cohen’s κ for binarized outcome (severe vs moderate-or-lower severity). Statistical Analyses For population characteristics, continuous variables were summarized as mean with standard deviation (SD) or medians with interquartile ranges (IQRs) as appropriate for the variable, while categorical variables were summarized as counts (row %). For keyword frequencies, counts with column % were used. Inter-group differences were tested with Student’s t-Test, Mann-Whitney U test, or chi-square as appropriate. Associations between AI-identified barriers, facilitators, and binarized consequence severity were estimated using multivariable logistic regression, adjusting for patient age, sex, user type, posting year, delay duration, and the number of diagnostic encounters. All numeric variables were categorized to account for potential non-linear effects (see Table 1 for details). Odds ratios (ORs) with 95% confidence intervals (CIs), and corresponding p-values were reported. Multicollinearity was assessed using the Cramér’s V [ 19 ]. View this table: View inline View popup Download powerpoint Table 1. Population characteristics by the patient-reported consequence severity All analyses were done using R version 4.2.0 and Python 3.11.13. All codes used for data collection, preprocessing, and statistical analysis will be made publicly available upon publication. Ethics This study analysed patient narratives from an open-access online forum in China (House086.com). All data were publicly accessible without registration or login requirements, and the platform terms of use permit analysis of forum contents without further consent. No identifiable personal information was collected or analysed. Only de-identified and aggregate results are reported. Use of publicly available, legally obtained, and de-identified data that do not harm individuals or involve sensitive personal information is consistent with relevant data-use regulations in China. The protocol has also been submitted to the National University of Singapore Institutional Review Board and is currently under review. The study adhered to the principles of the Declaration of Helsinki. RESULTS Population Characteristics Of the 2016 analyzed cases, 929 (46.1%) were classified as severe consequences (score ≥4), whereas 1087 (53.9%) were classified as moderate-or-lower (≤3) ( Table 1 ) . A high proportion did not disclose patient age (65.7%) or sex (31.1%). The majority of posts were authored by family members (59.2%), followed by patient themselves (38.6%). Self-authored posts were significantly more likely to report moderate-or-lower consequences compared to family-authored posts (60.3% vs 50.3%). From 2011-2025, the number of users reporting diagnostic challenges increased by over 100%. Concurrently, the proportion of moderate-or-lower severity outcomes significantly improved, rising from 41.9% in 2011-2015, to 53.6% in 2016-2020, and 59.1% in 2021-2025. Patients who reported three or more diagnostic encounters were more likely to have severe consequences (58.1% vs 43.5% in those reporting only two encounters). Furthermore, patients with moderate-or-lower severity reported a significantly shorter median diagnostic delay (1.0 month, IQR 1.0-3.0) than those with severe consequences (4.0 months, 2.0-11.0). Keyword Taxonomy and Frequencies AI-assisted taxonomy development yielded 11 patient-reported barriers, 7 facilitators, and 5 consequences. These categories, spanning clinical, systemic, and personal domains, are detailed with their corresponding definitions in Table 2 . View this table: View inline View popup Download powerpoint Table 2. Embedding-based mapping of barriers, facilitators, and consequences. The most frequently described consequence of diagnostic difficulties was psychological distress, reported in 90.1% of the narratives ( Table S2 ). Analysis by consequence severity revealed significant concentration of clinically relevant consequences in the severe group, with disease progression four times more prevalent (66.8% vs 15.5%), and treatment-related harm twice as prevalent (52.2% vs 24.5%). Conversely, loss of time was significantly more prevalent in the moderate-or-lower severity group (69.8% vs 61.6%). Among barriers, clinician-related issues (90.6%) and case complexity (77.3%) were overwhelmingly reported ( Table S3 ). While patient-level barriers (e.g., patient psychological resistance, limited health literacy, and physical limitations) and family/financial constraints were mentioned less frequently (all <7%). The most common facilitators were specialist input (66.7%), patient self-advocacy (61.1%), and tertiary-hospital care (60.3%). Multivariable Regression Analysis In the primary model (outcome: severe consequences, score ≥4), all 11 AI-identified barriers were significantly associated with an increased likelihood of severe consequences ( Figure 4 , Table S3 ). The strongest predictor was misguided prior treatment (OR 19.06, 95% CI 11.30-32.17). Other factors with exceptionally high risk were system level inefficiency (OR 8.93, 6.00-13.29), family and financial constraints (OR 8.47, 4.05-17.73), and patient physical limitations (OR 7.40, 3.37-16.26). Notably, the most frequently reported barrier, clinician-related issues, was also a major risk factor (OR 6.00, 3.74-9.65). Among facilitators, specialist input (OR 0.60, 0.44-0.81), peer network engagement (OR 0.62, 0.45-0.87) and clinician expertise (OR 0.65, 0.46-0.90) were significantly associated with a decreased likelihood of severe consequences. A notable finding was the association of family support with increased odds of severe consequences in the primary model (OR 1.49, 1.02-2.17), though this effect disappeared in the secondary model (outcome: clinically critical consequences). The secondary model focusing on disease progression or treatment harm, generally showed consistent directionality but larger effect sizes for all barriers, and highlighted specialized interventions as a significant protective factor (OR 0.40, 0.20-0.81), which was non-significant in the primary model. Collinearity testing showed moderate correlation between two pairs of facilitators: tertiary hospitals and peer networks (phi=0.51), and specialized interventions and specialist input (0.41), but both were retained due to their distinct clinical implications. Human validation of 400 randomly sampled cases yielded PPV of [%] for case identification and inter-rater agreement of κ=[to be added] for severity grading. DISCUSSION Key Findings This large-scale AI-driven study of online patient narratives yielded several key insights for diagnostic challenges in lymphoma. First, while the number of narratives describing diagnostic difficulties more than doubled from 2011 to 2025, the overall trend suggested a significant improvement in patient-reported outcomes over the same period. Our analyses revealed clinician-related issues (90.6%) and case complexity (77.3%) as the most frequently reported barriers, yet the strongest predictors of severe consequences included misguided prior treatment (OR 19.06), system-level inefficiency (OR 8.93), and family or financial constraints (OR 8.47). Specialist input (66.7%) and patient self-advocacy emerged as the leading facilitators in treatment navigation, with specialist input, peer network, and clinician expertise serving as protective factors against severe outcomes. Together, these results provide a patient-centered taxonomy and a quantitative risk map that complements existing literature on diagnostic safety. Comparison with Existing Evidence Our barrier-facilitator framework echoes prior work on cancer care quality, where delays, fragmentation, and inadequate psychosocial support were key barriers, and multidisciplinary coordination acted as facilitators [ 11 ]. The recent global survey by Lymphoma Coalition similarly revealed significant information gaps and treatment regret, with Chinese respondents reporting particularly severe challenges: only 15-18% understood their diagnostic process (vs. 59% globally), and 53% expressed treatment regret (vs. 11% globally) (citation). However, that survey, while valuable, could not identify the specific risk factors or mechanisms underlying these disparities. Our study extends this literature by providing a quantitative risk map derived directly from the patient experience in a Chinese context. For example, while the GPS found that 91% of Chinese patients considered treatment cost a major concern, our regression analysis demonstrates that financial constraints were associated with an 8.5-fold increase in odds of severe consequences (OR 8.47). Similarly, the “information gaps” highlighted in the GPS correspond in our data to clinician-related issues (OR 6.00) and inadequate specialist access. The high prevalence of case complexity (77.3%) and specimen issues (23.8%) as AI-identified barriers is consistent with long-standing recognition that pathology and histopathological interpretation in lymphoma are inherently challenging [ 2 – 4 ]. From the patient perspective, clinician-related issues were the most common barrier (90.6%). While this high frequency does not necessarily imply malpractice, it highlights prevalent gaps in training and communication that compromise quality care [ 6 , 20 ]. More importantly, our analysis distinguished between these high-frequency issues and high-impact risk factors. Inappropriate initial treatment, although reported in a minority of cases (7.8%), manifested as the strongest barrier in logistic regression (OR 19.06), exemplifying the risks of initiating therapy without diagnostic certainty. This is aligned with evidence by Proctor et al. that approximately 11% of lymphoma treatment plans were revised following discordant diagnoses [ 3 ]. Similarly, the high OR for system level inefficiency (OR 8.93), together with specialist access barriers and medical resources shortages, points to the structural and systemic deficiencies as critical drivers of severe consequences, often surpassing individual clinician errors. Beyond the hospital domain, patient and family-level barriers, including psychological resistance, health illiteracy, financial pressure, and physical limitations, also pose substantial, though less frequent risks. Among the facilitators, specialist input and clinician expertise represented positive forces from within the healthcare system, while peer networks, formed by patients and caregivers, also emerged as an important source of support. According to Engler et al., cancer patients actively seek out stories from peers to better understand treatment choices, manage emotional impact, and reduce feelings of isolation [ 16 ]. Such experience-sharing may complement clinical information and foster emotional coping, particularly in the face of diagnostic uncertainty or difficulty. A key finding was the counterintuitive association of family support with increased odds of severe consequences in the primary model. The predominance of family-authored narratives (59.2%) highlights the central role of family advocacy in treatment navigation. Rather than suggesting family support is inherently harmful, this association likely reflects reverse causality or confounding by indication, where family support may only become intensely necessary and reported in narratives when a case is already complicated, delayed, or progressing poorly, emerging as a “last resort” strategy after prolonged uncertainty [ 6 , 21 ]. These findings suggest that individual-level efforts are insufficient without systemic support, in contrast to specialist input that represents structured intervention. Finally, psychological distress was the most frequently mentioned consequence (90.1%) regardless of consequence severity. While not directly predictive of clinical harm, such distress may evolve into resistance to further diagnostic testing and follow-up compliance, potentially undermining long-term health status. Clinically relevant harms (disease progression and treatment-related harm) and financial strain were markedly more common in those reporting severe consequences. Similar to our taxonomy, prior work also described psychological, family, and financial consequences of problematic care events, revealing the multifaceted nature of patient-perceived harm [ 6 , 9 ]. Implication for Practice and Digital Health Applications Our findings suggest that targeted interventions are needed to systematically address barriers to patient-centred lymphoma care. International initiatives, such as the Nebraska Lymphoma Study Group, the North Central London Lymphoma network, and the French Lymphopath Network show how centralized expert review can reduce diagnostic errors [ 2 – 4 ]. However, such frameworks remain difficult to implement in countries with limited resources and large patient populations, where infrastructural gaps often leave patients responsible for complex tasks like arranging the transfer of pathology slides for second opinions. Digital health tools may offer complementary and scalable solutions to address diagnostic challenges at multiple levels. First, to combat the frequent barriers of case complexity and specimen issues, AI-assisted pathology systems have achieved high accuracy in lymphoma detection and hold promise for reducing time and man-power costs [ 7 , 8 ]. Second, patient-facing LLM applications, such as chatbots, may enhance health literacy, provide immediate psychological support, and facilitate shared decision-making. By providing clear, accessible information, these tools could potentially mitigate friction associated with clinician-related issues and fostering trust in diagnostic process [ 20 , 22 , 23 ]. Third, system-level tools such as decision-support platforms, triage algorithms, telemedicine, and outpatient robotics could help address financial, logistical, and physical barriers, particularly in under-resourced regions. Critically, we observed that patients are already adopting AI tools independently to interpret pathology reports and disease information. While recent work has raised concerns that patients may over-trust AI-generated advice even when inaccurate [ 24 ]. Our findings complement this perspective by examining a population already exposed to potential diagnostic inaccuracies, often in settings where specialist access is limited. In such contexts, patients described turning to AI for its perceived neutrality and convenience, underscoring the urgent need for health systems to integrate safe, evidence-based digital tools with appropriate medical oversight. Strengths and Limitations This study has several strengths. To our knowledge, it is the first large-scale, AI-driven analysis focusing on patients who experienced diagnostic difficulties in oncology, an important but often overlooked population. By systematically analysing over 2000 online patient narratives, we were able to capture authentic, patient-cantered perspectives that are less constrained by traditional clinical or institutional framing, highlighting the value of patient voices in chronic disease management [ 14 ]. The AI pipeline provided a consistent, scalable, and reproducible method for structuring patient-generated text, thereby offering a transferable framework to digital health research across different health systems and disease areas. The external human validation sub-study, using independent clinical records, enhances the overall credibility of the AI-extracted data. Several limitations must be acknowledged, primarily related to the nature of patient-reported data. First, the data reflects patient-perceived diagnostic discordance rather than clinically verified error rates. Given the retrospective account, potential for recall bias or limited medical terminology, patient narratives are inherently framed from their perspectives rather than representing objective clinical truth. Nevertheless, these narratives carry unique value by capturing the patient’s lived experience of uncertainty and providing direct insights into specific healthcare delivery gaps. Second, the anonymous nature of the online forum resulted in substantial missing demographic data (e.g., 65.7% missing age), which limited the scope of subgroup analyses. While we mitigated this by coding "Not Disclosed" as a separate category, this approach cannot fully eliminate potential confounding effects. Third, methodological limitations in data extraction included occasional OCR failure on low-resolution images and the necessity of limiting AI analysis to the initial text and up to 10 images per post, which may have introduced some information loss. This was partly mitigated by the LLM’s contextual inference capability and the design of a case confidence score to exclude low-quality narratives from the final analysis. Moreover, the LLM-graded consequence severity score clustered at 3 (49.4%) and 4 (42.3%), requiring us to binarize the outcome (score ≥4 vs ≤3) for a balanced and stable regression model. Finally, as all narratives were extracted from a single, Chinese-language platform, the results may not be generalizable to other populations. Future Research Future work will focus on longitudinal analyses of patient narratives to examine the dynamic interplay of barriers and facilitators over time, distinguishing short-from long-term consequences perceived by patients and families. Item response theory [ 25 ] may be incorporated to improve the precision and reliability of patient-reported outcome measurements. Health economics methods could help provide a more comprehensive assessment of the societal and financial burden imposed by diagnostic challenges. Beyond lymphoma, the AI-driven analytic framework developed in this study could be extended to patient-authored texts across other diseases and healthcare systems, enabling systematic incorporation of patient perspectives into clinical research and practice. CONCLUSION This large-scale, AI-based analysis of more than 2000 online forum narratives identified patient-perceived barriers and facilitators in lymphoma diagnosis. Our findings reveal that diagnostic safety requires not only hospital-based interventions but also active engagement of patients, families, peer networks, and health systems. More broadly, this study demonstrates the feasibility of leveraging patient-generated social media data to inform clinical practice and digital health innovation, underscoring the potential of AI tools to systematically evaluate health services and improve patient-perceived quality of care. FUNDING This research received no external funding. CONFLICT OF INTEREST All authors declare no competing interests. AUTHOR CONTRIBUTIONS (Provisional, may be updated in subsequent revisions) Feng He: Conceptualization, Methodology, Software, Data Curation, Formal Analysis, Writing, Visualization, Project Administration. José M. Valderas: Supervision, Methodology, Writing. Data Availability The de-identified dataset may be shared upon reasonable request. DATA & CODE AVAILABILITY All codes used for data collection, preprocessing, and statistical analysis will be made publicly available upon publication. The de-identified dataset may be shared upon reasonable request. ACKNOWLEDGMENTS We thank participants of online patient forums for sharing their experiences, which made this research possible. ChatGPT was used for language polishing, and Claude was used for code development. All scientific content and analyses were conducted and verified by the author. Footnotes 1. Added an external human validation study (n=400) involving three independent coders fluent in Chinese with biomedical or health research backgrounds; relevant statistics will be reported in the next update. 2. Incorporated methodological and editorial feedback from Prof. Jose M. Valderas and added him as senior author. 3. Corrected the Methods section to reflect the actual use of transformer-based semantic clustering (instead of LDA) for developing the keyword taxonomy; the taxonomy itself remains unchanged. 4. Expanded the dataset to 2,016 cases with improved data quality through updated prompt engineering and pipeline refinement. 5. Added a secondary outcome (clinically relevant consequences) and updated regression analyses accordingly. 6. Revised the title, refined figures, and improved caption clarity. REFERENCES 1. ↵ Sung , H. , et al. , Global cancer statistics 2020: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries . CA : a cancer journal for clinicians , 2021 . 71 ( 3 ): p. 209 – 249 . OpenUrl 2. ↵ Laurent , C. , et al. , Impact of expert pathologic review of lymphoma diagnosis: study of patients from the French Lymphopath Network . Journal of clinical Oncology , 2017 . 35 ( 18 ): p. 2008 – 2017 . OpenUrl PubMed 3. ↵ Proctor , I.E. , et al. , Importance of expert central review in the diagnosis of lymphoid malignancies in a regional cancer network . Journal of Clinical Oncology , 2011 . 29 ( 11 ): p. 1431 – 1435 . OpenUrl Abstract / FREE Full Text 4. ↵ Bowen , J.M. , et al. , Lymphoma diagnosis at an academic centre: rate of revision and impact on patient care . Br J Haematol , 2014 . 166 ( 2 ): p. 202 – 8 . OpenUrl PubMed 5. ↵ Deng , J. , et al. , Misdiagnosis analysis of 2291 cases of haematolymphoid neoplasms . Frontiers in Oncology , 2023 . 13 : p. 1128636 . OpenUrl PubMed 6. ↵ Singh , H. , et al. , The global burden of diagnostic errors in primary care . BMJ Qual Saf , 2017 . 26 ( 6 ): p. 484 – 494 . OpenUrl Abstract / FREE Full Text 7. ↵ Syrykh , C. , et al. , Accurate diagnosis of lymphoma on whole-slide histopathology images using deep learning . NPJ digital medicine , 2020 . 3 ( 1 ): p. 63 . OpenUrl PubMed 8. ↵ Li , D. , et al. , A deep learning diagnostic plaPorm for diffuse large B-cell lymphoma with high accuracy across multiple hospitals . Nature communications , 2020 . 11 ( 1 ): p. 6004 . OpenUrl PubMed 9. ↵ Mazor , K.M. , et al. , Toward patient-centered cancer care: patient perceptions of problematic events, impact, and response . Journal of Clinical Oncology , 2012 . 30 ( 15 ): p. 1784 – 1790 . OpenUrl Abstract / FREE Full Text 10. ↵ Tsianakas , V. , et al. , Using patients’ experiences to identify priorities for quality improvement in breast cancer care: patient narratives, surveys or both? BMC Health Services Research , 2012 . 12 ( 1 ): p. 271 . OpenUrl PubMed 11. ↵ Wagner , E.H. , et al. , The quality of cancer patient experience: perspectives of patients, family members, providers and experts . Quality and Safety in Health Care , 2010 . 19 ( 6 ): p. 484 . OpenUrl Abstract / FREE Full Text 12. ↵ Nartey , Y. , et al. , Is the English Cancer Patient Experience Survey representative? A comparative analysis with the National Lung Cancer Audit . Lung Cancer , 2020 . 140 : p. 27 ti 34 . OpenUrl PubMed 13. ↵ Ning , K.G. , H ; Franklin , M ; Yang , X ; Wei , R ; Song , Z ; Xu , H ; Leng , L ; Liu , M ; Dai , J ; Zhang , J ; Zeng , R ; Hou , Y ; Wang , R ; Liu , Z ; Huang , C ; Cai , R ; Liu , H ; Xia , LC, Analysis of China’s Largest Blood Cancer Patient Forum: Regional Inequities in Health-Seeking Behavior and Need for Enhanced Online Patient Support . J Med Internet Res , 2025 . 14. ↵ Dreisbach , C. , et al. , A systematic review of natural language processing and text mining of symptoms from electronic patient-authored text data . Int J Med Inform , 2019 . 125 : p. 37 – 46 . OpenUrl CrossRef PubMed 15. ↵ Siddiqui , Z.A. , et al. , Leveraging social media data to study disease and treatment characteristics of Hodgkin’s lymphoma Using Natural Language Processing methods . PLOS Digit Health , 2025 . 4 ( 3 ): p. e0000765 . OpenUrl 16. ↵ Engler , J. , et al. , Using others’ experiences. Cancer patients’ expectations and navigation of a website providing narratives on prostate, breast and colorectal cancer . Patient Education and Counseling , 2016 . 99 ( 8 ): p. 1325 – 1332 . OpenUrl PubMed 17. ↵ JaidedAI . EasyOCR: Ready-to-use Optical Character Recognition with 80+ Supported Languages . 2020 2025-10-27 [cited 2025 2025-09-02]; Available from: hhps://github.com/JaidedAI/EasyOCR . 18. ↵ Gibney , E ., China’s cheap, open AI model DeepSeek thrills scientists . Nature , 2025 . 638 ( 8049 ): p. 13 - 14 . OpenUrl PubMed 19. ↵ Rea , L.M. and R.A. Parker , Designing and conduc-ng survey research: A comprehensive guide . 2014 : John Wiley & Sons . 20. ↵ Salwei , M.E. , J.S. Ancker , and M.B. Weinger , The decision aid is the easy part: workflow challenges of shared decision making in cancer care . JNCI: Journal of the National Cancer Institute , 2023 . 115 ( 11 ): p. 1271 – 1277 . OpenUrl PubMed 21. ↵ Rezaei , M. , et al. , Caregiving consequences in cancer family caregivers: a narrative review of qualitative studies . Frontiers in Public Health , 2024 . 12 : p. 1334842 . OpenUrl PubMed 22. ↵ Thirunavukarasu , A.J. , et al. , Large language models in medicine . Nature Medicine , 2023 . 29 ( 8 ): p. 1930 – 1940 . OpenUrl CrossRef PubMed 23. ↵ Arora , A. and A. Arora , The promise of large language models in health care . The Lancet , 2023 . 401 ( 10377 ): p. 641 . OpenUrl 24. ↵ Shekar , S. , et al. , People Overtrust AI-Generated Medical Advice despite Low Accuracy . NEJM AI , 2025 . 2 ( 6 ): p. AIoa2300015 . OpenUrl 25. ↵ Chang , C.-H. and B.B. Reeve , Item response theory and its applications to patient-reported outcomes measurement . Evaluation & the health professions , 2005 . 28 ( 3 ):p. 264 - 282 . OpenUrl CrossRef PubMed Web of Science View the discussion thread. Back to top Previous Next Posted November 19, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Patient-Reported Challenges in Lymphoma Diagnosis: Analysis of Online Forum Narratives Using Artificial Intelligence Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Patient-Reported Challenges in Lymphoma Diagnosis: Analysis of Online Forum Narratives Using Artificial Intelligence Feng He , Jose M Valderas medRxiv 2025.09.07.25335273; doi: https://doi.org/10.1101/2025.09.07.25335273 Share This Article: Copy Citation Tools Patient-Reported Challenges in Lymphoma Diagnosis: Analysis of Online Forum Narratives Using Artificial Intelligence Feng He , Jose M Valderas medRxiv 2025.09.07.25335273; doi: https://doi.org/10.1101/2025.09.07.25335273 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Systems and Quality Improvement Subject Areas All Articles Addiction Medicine (567) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4411) Dentistry and Oral Medicine (443) Dermatology (380) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1505) Epidemiology (15205) Forensic Medicine (30) Gastroenterology (1119) Genetic and Genomic Medicine (6575) Geriatric Medicine (666) Health Economics (994) Health Informatics (4511) Health Policy (1365) Health Systems and Quality Improvement (1608) Hematology (537) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15903) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (666) Neurology (6573) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1139) Occupational and Environmental Health (954) Oncology (3319) Ophthalmology (968) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (662) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5423) Public and Global Health (9205) Radiology and Imaging (2191) Rehabilitation Medicine and Physical Therapy (1367) Respiratory Medicine (1191) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9fec6d14deeb1b23',t:'MTc3OTI5MTQ4Mg=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00