Identifying Psychiatric Manifestations in Outpatients with Depression and Anxiety: A Large Language Model-Based Approach

doi:10.1101/2025.01.03.24318117

Identifying Psychiatric Manifestations in Outpatients with Depression and Anxiety: A Large Language Model-Based Approach

2025 · doi:10.1101/2025.01.03.24318117

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 82,293 characters · extracted from preprint-html · click to expand

Identifying Psychiatric Manifestations in Outpatients with Depression and Anxiety: A Large Language Model-Based Approach | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Identifying Psychiatric Manifestations in Outpatients with Depression and Anxiety: A Large Language Model-Based Approach View ORCID Profile Shihao Xu , View ORCID Profile Yiming Yan , Yanli Ding , Feng Li , Shu Zhang , Haoyun Tang , Chao Luo , Yan Li , Hao Liu , Yu Mei , Wenjie Gu , Hong Qiu , Yong Wang , Jianyin Qiu , Tao Yang , Zike Wang , Qing Zhang , Haiyang Geng , Yunyun Han , Jun Shao , Nils Opel , Lidong Bing , Min Zhao , Yifeng Xu , Xun Jiang , View ORCID Profile Jianhua Chen doi: https://doi.org/10.1101/2025.01.03.24318117 Shihao Xu 2 Theta Health Inc. , California, United States 3 Tianqiao and Chrissy Chen Institute , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Shihao Xu Yiming Yan 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yiming Yan Yanli Ding 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Feng Li 2 Theta Health Inc. , California, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site Shu Zhang 2 Theta Health Inc. , California, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site Haoyun Tang 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Chao Luo 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yan Li 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hao Liu 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yu Mei 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Wenjie Gu 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hong Qiu 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yong Wang 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jianyin Qiu 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tao Yang 3 Tianqiao and Chrissy Chen Institute , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Zike Wang 2 Theta Health Inc. , California, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site Qing Zhang 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 4 Shanghai Key Laboratory of Psychotic Disorder , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Haiyang Geng 3 Tianqiao and Chrissy Chen Institute , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yunyun Han 3 Tianqiao and Chrissy Chen Institute , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jun Shao 2 Theta Health Inc. , California, United States Find this author on Google Scholar Find this author on PubMed Search for this author on this site Nils Opel 6 University Hospital Jena Department of Psychiatry and Psychotherapy , Jena, Germany 7 German Centre for Mental Health (DZPG) , Berlin, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lidong Bing 3 Tianqiao and Chrissy Chen Institute , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Min Zhao 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 4 Shanghai Key Laboratory of Psychotic Disorder , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yifeng Xu 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 4 Shanghai Key Laboratory of Psychotic Disorder , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Xun Jiang 2 Theta Health Inc. , California, United States 3 Tianqiao and Chrissy Chen Institute , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: xun.jiang{at}thetahealth.ai jianhua.chen{at}smhc.org.cn Jianhua Chen 1 Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine , Shanghai, China 3 Tianqiao and Chrissy Chen Institute , Shanghai, China 4 Shanghai Key Laboratory of Psychotic Disorder , Shanghai, China 5 Shanghai Clinical Research Center for Mental Health , Shanghai, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jianhua Chen For correspondence: xun.jiang{at}thetahealth.ai jianhua.chen{at}smhc.org.cn Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Purpose Accurate psychiatric diagnosis and assessment are crucial for effective treatment. However, while current data-driven approaches emphasize diagnostic outcomes, the process of decoding the underlying symptom expressions in patients’ language and mapping them to well-defined psychiatric terminology has received relatively little attention. This study investigates the potential of Large Language Models (LLMs) to automate the identification of diagnostic categories and symptoms from psychiatrist-patient dialogues, to provide interpretable insights and support automatic diagnosis. Methods We analyzed audio recordings from 1160 psychiatric diagnostic interviews, primarily involving patients with depressive disorder and anxiety disorder. A clinical entities corpus was formed by leveraging clinical annotations in EMRs (e.g., chief complaints, mental status, elements in assessment scales) and widely used assessment scales. LLMs were utilized to identify clinical symptoms, rate assessment scales, and an ensemble learning pipeline was designed to classify diagnostic results and symptoms with 10-fold cross-validation. Results The system achieved 86.9% accuracy for identifying the appearance of clinical annotations and 74.7% (77.2%) accuracy for identifying anxiety (depression) symptoms. Patients with depression and anxiety, diagnosed using ICD-10 codes, were differentiated with an accuracy of 75.5%. Analysis of LLM-generated features shows that depression cases exhibited prominent markers of anhedonia and decreased volition, whereas anxiety disorders were characterized by tension and an inability to relax. Conclusion This study demonstrates the potential of integrating LLM technology with linguistic and acoustic features to enhance psychiatric diagnostics. The developed pipeline effectively predicts psychiatric diagnoses and provides interpretable insights, showcasing a valuable tool for clinicians in mental health assessment. 1 Introduction Depression and anxiety disorders represent two of the most prevalent mental health conditions globally. Globally, it is estimated that over 300 million people suffer from major depressive disorders, which is equivalent to 4.4% of the world’s population. A similar number of people suffer from anxiety disorders, often with co-occurring depression [ 7 ]. The emerging field of digital phenotyping, which involves the nuanced quantification of human phenotypic expression at the individual level through digital device data, offers a quantitative approach to longitudinal observation [ 2 ]. The emerging field of digital phenotyping, characterized by continuous and nuanced quantification of human phenotypic expression at the individual level by leveraging digital device data, provides a quantitative approach for longitudinal observation [ 2 ]. Researchers have demonstrated that social signals (e.g., linguistics, speech, etc.) play a crucial role in the diagnosis and assessment of patients with depression and anxiety [ 37 , 21 ]. In particular, the content of a patient’s speech provides rich information about their mental state, cognitive patterns, and emotional experiences [ 29 , 46 ]. The linguistic features, topic choices, and narrative structures employed by individuals can offer valuable insights into their psychological well-being [ 46 ]. Recent advances in NLP, particularly in LLMs such as GPT [ 32 ], Gemini [ 39 ], and Qwen [ 40 ], demonstrate diverse capabilities in clinical reasoning, social media analysis, and psychiatric education [ 31 ], which could potentially provide objective, data-driven insights in psychiatry. Moreover, LLMs are able to process, generate, and respond to natural language inputs, which fit naturally into the NIMH’s Research Domain Criteria (RDoC) framework, which suggests new ways of classifying mental disorders based on dimensions of observable behaviors [ 30 ]. In recent psychiatric studies, these LLMs excel at understanding and generating complex linguistic patterns with human-like performance, making them widely explored for social media content analysis [ 22 , 42 ], treatment performance enhancement [ 41 , 1 , 6 ], chat counselor [ 27 , 25 ], and supporting clinical decision-making [ 44 , 11 ] from an evidence-based practice perspective. Although LLMs demonstrate linguistic understanding and generation, they remain relatively scarce in producing objective digital biomarkers in psychiatry [ 4 ]. Studies have shown that the speech of patients with depression and anxiety contains distinctive quantitative verbal and nonverbal digital markers compared to healthy controls [ 46 , 21 ], but these characteristics often remain too subtle for humans to perceive actionable insights, making their practical application and improvement challenging [ 13 ]. LLM is able to generate diagnostic results and provide reasoning steps, benefiting from a large amount of pre-training data. However, the interpretation and alignment of answers or decisions generated by LLM remain challenging [ 23 ]. Moreover, most studies on depression and anxiety rely primarily on two data sources: social media and structured clinical reports, and are often constrained by limited data availability [ 37 ]. Distinguishing between depression and anxiety in clinical settings remains challenging due to the overlap of symptoms and the high comorbidity rate, with limited research on the discovery of objective biomarkers for both conditions [ 4 ]. In addition, during clinical interviews, psychiatrists translate patients’ informal symptom descriptions into professional diagnostic terminology; however, there remains a lack of approaches to automatically and effectively bridge this “semantic gap” between patients and clinicians. To address these gaps in existing research, we collected a comprehensive dataset of psychiatric interviews at the Shanghai Mental Health Center (SMHC) in China, comprising over 15,000 minutes of speech recordings from 1,160 individual outpatients with 25 different diagnoses. These recordings, primarily featuring patients diagnosed with depression and anxiety disorders, were collected in unstructured real-world environments to ensure ecological validity. To mimic the characteristics of clinical diagnosis, we designed a corpus of clinical indicators that incorporates diagnostic criteria, main complaints, mental status evaluations, and components from assessment scales using the Electronic Medical Records (EMRs) in the SMHC and widely-used assessment scales. Subsequently, we employed the pre-trained LLM to indicate the appearance of a corpus of clinical-related symptoms, rate the components of several assessment scales, and further fine-tuned the LLM with clinical annotations from professional psychiatrists to enhance its understanding of clinical-related concepts. In parallel, we extracted linguistic usage patterns and acoustic features to broaden the spectrum of biomarkers. Through the fusion of these modalities, we constructed an ensemble machine-learning pipeline capable of predicting both outpatient diagnostic groups and symptoms with moderately high accuracies. Moreover, we conducted an in-depth analysis of salient patterns between different diagnostic groups to enhance clinical interpretability. Our results demonstrate that objective cues extracted by the LLM, combined with other behavioral markers, can serve as valuable features for differentiating diagnostic groups and identifying symptom disclosure, potentially enhancing both the efficiency and effectiveness of psychiatric diagnosis and assessment in clinical practice. 2 Method This study collected the audio recording of 1,160 participants between August 2023 and January 2024, in collaboration with the SMHC. The overall pipeline is shown in Figure 1 . Firstly, we preprocessed and anonymized the recordings before transcribing them automatically into text and performing manual corrections. Secondly, we collaborated with professional psychiatrists to design a set of clinical entities and leveraged the LLM to identify these concepts using the transcripts as input, enhancing the LLM based on the psychiatrists’ annotations through supervised fine-tuning (SFT). Linguistic and acoustic features were extracted from both the transcripts and the speech. Finally, we utilized different modalities to train an ensemble machine learning pipeline to differentiate diagnostic groups and the major symptoms. Download figure Open in new tab Fig. 1: Diagram of the analysis pipeline. The audio recordings were collected during the diagnosis interview for outpatients. We extracted four types of feature sets from the recordings, two of which utilized LLM. These feature sets were utilized to classify different groups of participants and predict the appearance of depression and anxiety symptoms. 2.1 Participants The study sample comprised outpatients from the SMHC who attended daily clinical diagnostic interviews. Participants were aged 12 to 80 years and were fluent in Mandarin. Inclusion criteria required individuals to be capable of providing informed consent and to be free from physical illnesses that could affect their participation. All diagnoses were established using the Chinese version of WHO International Classification of Diseases, Tenth Revision (ICD-10) [ 20 ]. The study protocol was approved by the Ethics Committee of the SMHC institutional review board (IRB) to ensure compliance with ethical research standards. Specifically, the recording setup consisted of a microphone placed between the psychiatrist and the participant, connected to a computer. At the beginning of each interview, participants were asked to read a standardized 30-second text passage, followed by the standard diagnostic procedure. All clinical information was documented in the EMR system by the psychiatrists. To protect the privacy of participants, all audio recordings and associated meta-information underwent a thorough manual de-identification process. 2.2 Feature Extraction We extracted a comprehensive clinical entity set to cover the intermediate features that assist psychiatrists in the diagnosis and assessment process: clinical observations and standardized assessment scales, which we designate as clinical-related and assessment-related feature sets. As compensation, we measured the linguistic usage and acoustic characteristics and form as individual feature sets. In the following paragraphs, we will introduce how we build and extract these feature sets in detail. 2.2.1 Clinical-related features The clinical-related feature set encompasses essential depression and anxiety indicators extracted from EMRs with comprehensive descriptions (shown in Appendix Table A1 ). This feature set was developed through a collaborative approach involving both psychiatrists and LLM analysis. Firstly, the process began with extracting 218 clinical entities from three sections in the EMR system: chief complaint, personal medical history, and psychiatric examination. These entities represent predefined features within the documentation framework of the SMHC EMR system based on psychiatric diagnostic systems, textbooks, and experts’ opinions. Then, we included a supplementary of 44 additional symptoms identified through clinical expertise and diagnostic criteria (e.g., DSM-5 and ICD-10) suggested by psychiatrists. We then utilized the Gemini 1.5 Pro [ 39 ] to generate descriptions for all clinical entities, using the Chinese version of the DSM-5 guidance [ 3 ] as a reference, leveraging the model’s strong extended context window capability. Through iterative psychiatric review, redundant and irrelevant items specific to depression and anxiety were eliminated, resulting in a refined set of 138 validated clinical-related features. After rigorously defining the clinical-related features, we leverage LLM to judge whether these symptoms occur in the diagnostic conversation. In this study, we employed Qwen2-72B-Instruct [ 47 ] as the base model to extract clinical and assessment-related features from the clinical dialogue, due to its proficiency in processing the Chinese language and suitability for offline deployment in hospital settings. SFT is a technique that adapts large language models to downstream tasks through supervised learning on domain-specific data. To better adapt to the specific medical terminology and clinical reasoning patterns in our healthcare context, we fine-tuned the model using psychiatrists’ annotations in EMRs. We present the prompt used to generate clinical-related features and fine-tune LLM in Table 1 . The fine-tuning was implemented using LLaMA-Factory 1 , and the inference process utilized vLLM 2 . The experiment was conducted on 4 A100 GPUs. View this table: View inline View popup Download powerpoint Table 1: Prompt template for clinical-related feature generation. The content within the curly braces is the demographic, symptom descriptions, and dialogue information that form the prompt. We first began with structuring EMR data to create reliable training labels for the SFT. Since EMRs contain unstructured text fields where psychiatrists document patient information, we employed the LLM to analyze these 1,160 EMRs. For each EMR, we leveraged LLM to evaluate the presence of above mentioned 138 predefined clinical features, including similar expressions and synonyms, generating a boolean value list (yes/no) for each record. The prompt for querying the LLM to generate labels from EMRs is shown in Appendix Table A3 . Secondly, we implemented a rigorous filtering process for choosing high-quality data for SFT. We first leverage LLM to verify whether the information recorded in EMRs was adequately reflected in the interview dialogue transcripts, yielding 877 valid examples. Then, we collaborated with specialist psychiatrists to establish comprehensive evaluation criteria, encompassing five standards for psychiatric examination, one for chief complaints, and five for present illness history assessment. By using these criteria as the prompt (shown in Appendix Table A3 ), we employed the LLM to evaluate each case and select the top 60% (527 examples) as high-quality cases based on the total score. From these high-quality cases, we allocated 477 cases for the SFT and 50 cases for the high-quality test set. The 50 high-quality test cases and 633 lower-quality cases are combined as a completed test set to evaluate the accuracy of clinical-related feature extraction. Subsequently, we fine-tuned the Qwen2-72B-Instruct model with Low-Rank Adaptation (LoRA) [ 19 ]. The LLM SFT involves training a pre-trained model on datasets with explicit input-output pairs to optimize the model’s performance on specific down-stream tasks. LoRA is a parameter-efficient fine-tuning technique that adds small, trainable rank decomposition matrices to the LLM’s existing weights, allowing for efficient model adaptation while keeping most of the original model parameters frozen. The model was trained using the following hyperparameters: LoRA rank of 8, LoRA alpha of 16, batch size of 8, and an initial learning rate of 1e-4 for 7,000 steps. During inference using the vLLM framework, we restricted the model’s output to a single token “Yes” or “No” as the binary output, while we also extracted the probability output for these two tokens from the whole vocabulary. After normalization of the probabilities, along with the binary outputs, we formed 276 features in the clinical-related feature set. 2.2.2 Assessment-related features The assessment-related feature set incorporates data from six validated psychiatric rating scales, combining self-rating and peer-rating instruments. Self-rating scales include SCL-90 [ 9 ], SDS [ 49 ], and SAS [ 48 ], while peer-rating scales comprise HAMD [ 15 ], HAMA [ 16 ], and MADRS [ 43 ], totaling 177 items in all. These scales were selected for their proven reliability in clinical practice and research, offering comprehensive symptom coverage. We designed two meta-prompts to enable the LLM to mimic both psychiatrists and patients in rating assessment scales in a zero-shot manner, as illustrated in Appendix Table A2 . The scales’ content and rating guidelines were integrated into the prompts for LLM to generate the features. For instance, when extracting features related to the first item of the HAMD, which measures depressed mood, we use the peer-rating meta-prompt to instruct the LLM to evaluate the severity of the patient’s depressed mood on a 0-4 scale based on age, gender, and conversation content, where 0 indicates the absence of depression and 4 represents severe depression. When the conversation lacks sufficient information about the depressed mood, the LLM is prompted to return “NULL”. Similar to the clinical-related feature extraction, we extracted and normalized the logits of related tokens from the last layer of LLM and served as the features for classification and prediction tasks, resulting in a total of 1,199 features. We did not SFT the LLM for assessment-related feature extraction, since we do not have sufficient assessment scale labels. 2.2.3 Linguistic features In addition to the features generated by LLM, we extracted verbal features through two bag-of-words approaches: LIWC [ 33 ] and TF-IDF [ 36 ], both of which measure the frequency of word occurrence within a document. The LIWC tool is specifically designed to provide rich insights into psychological states, including emotions, thinking styles, and social concerns. It comprises word counts for 63 categories, including 52 categories related to linguistic counts (e.g., function words, common verbs, numbers, etc.), psychological processes (e.g., affect, sociality, cognition, perception, drive, etc.), and personal concern (e.g., work, home, religion, etc.), as well as 7 emotional categories (e.g., happy, sad, fear, etc.) and 4 general text metrics (e.g., the number of unique words, words in LIWC dictionary, etc.). We normalized the LIWC category counts by the total number of words. The TF-IDF algorithm, which stands for Term Frequency-Inverse Document Frequency, is a popular technique used in text analysis to determine the importance of words within a document or collection of documents. Unlike simple word counting, TF-IDF considers both how often a word appears in a specific document and how common or rare that word is across all documents. This approach helps identify words that are particularly characteristic or important to specific documents. In this study, TF-IDF was applied alongside LIWC to provide a more comprehensive analysis of the verbal features in the documents, offering insights into both the frequency and relevance of words used by the subjects. We applied Jieba 3 for Chinese character segmentation, resulting in a total of 27,000 features. 2.2.4 Acoustic features In addition to examining the verbal aspects of participants’ speech, we preprocessed the audio and extracted low-level acoustic and prosodic features using the OpenSMILE toolkits [ 12 ]. The audio recordings were manually edited to obscure names, addresses, and personally identifiable information before analysis. To reduce the impact of environmental noise and the varying distance from the microphone to the participant on recording quality, we used the pyAudacity toolkit 4 and the FFmpeg-normalized toolkit 5 to reduce the noise with a parameter of 12dB and normalize the volume to -23dB respectively. OpenSMILE is a versatile, customizable suite of acoustic features for signal processing and machine learning applications. We utilized OpenSMILE’s emobase_live4 configuration to extract the following LLDs: intensity, loudness, 12 MFCCs, pitch (F0), voicing probability (VoiceProb), F0 envelope (F0env), 8 line spectral frequencies (LSF), and Zero-Crossing Rate (ZCR). Next, we applied various functions to these LLDs and their delta coefficients, including minimum and maximum values with their relative positions (minPos and maxPos), range, mean, linear regression coefficients (linregc1–2), linear and quadratic error, standard deviation (STD), skewness, kurtosis, quartile values (quartile1–3), and interquartile ranges (iqr1-2, iqr2-3, iqr1–3). This process yielded 988 features to represent each speech utterance. Before LLD computation, pauses and silences were eliminated from the speech to create a continuous signal. We then extracted 988 emotion-based prosodic features using a 100 ms sliding window over the entire speech sample. Lastly, we calculated these emotion-based features’ maximum, minimum, mean, and standard deviation to compose the final set of OpenSMILE features, totaling 3,952 features. 2.3 Classification method As explained in previous sections, we extracted five feature sets using LLM and existing toolkits: clinical-related, assessment-related, LIWC, TF-IDF, and OpenSMILE features. Subsequently, we built a machine learning pipeline to fuse the outputs from multiple feature sets to predict the appearance of the symptom and classify diagnostic groups, which was implemented using Scikit-learn 1.2.0 in Python 3.10. Notably, as detailed in Section 2.2.1 , we fine-tuned the LLM using 138 high-quality clinical annotations to improve its ability to identify clinical concepts. We excluded diagnostic labels from this process to prevent data leakage. To ensure robust validation, we employed 10-fold cross-validation (10-fold CV). This method involves dividing the data into 10 subsets, iteratively training the model on 9 subsets, and testing it on the remaining subset. The process is repeated 10 times, with each subset serving as the test set once, and the model’s performance is averaged across all iterations. To address the challenge of class imbalance, we applied the Synthetic Minority Oversampling Technique (SMOTE) [ 5 ], which generates synthetic data for minority classes. Furthermore, we performed z-score standardization on all features, resulting in standardized features with a mean of 0 and a standard deviation of 1. This step ensures that all features are on a comparable scale, preventing any single feature from dominating the analysis due to its magnitude. We also implemented probability calibration to standardize predictions from each feature set. This process involved an internal CV on the training set of the outer CV to obtain the probability distribution on training data, which were then used to calibrate test set predictions [ 46 ]. Moreover, based on the feature importance ranked by the classifier on training data, we filtered out features whose importance values fell below the mean to reduce unimportant features. For the final prediction, we employed a late fusion technique, a multi-modal machine learning approach that involved averaging the standardized prediction outputs from all feature sets to produce the final output. This method allows for the integration of diverse information sources while maintaining the integrity of each feature set’s contribution to the final prediction. 2.4 Performance metrics To evaluate the performance of the LLM in extracting clinical features from interview dialogues, we employed standard information extraction metrics: precision and recall. Precision measures the proportion of correctly identified symptoms among all symptoms extracted by the LLM, while recall measures the proportion of symptoms correctly extracted from the EMR annotations. Given that psychiatrists may not document every symptom mentioned during interviews in the EMRs, recall serves as a particularly valuable metric in our evaluation framework. Precision and recall are calculated as follows: Precision = TP / (TP + FP); Recall = TP / (TP + FN), where TP (True Positives) represents symptoms correctly identified by both the LLM, FP (False Positives) represents symptoms incorrectly extracted by the LLM, and FN (False Negatives) represents symptoms present in the EMR but missed by the LLM. For classification and prediction tasks, we utilize a comprehensive set of standard metrics. Our analysis primarily focuses on balanced accuracy (BAC), which is particularly effective for imbalanced datasets by averaging sensitivity (SEN) and specificity (SPE). This metric provides a robust measure of overall performance, accounting for both true positive and true negative rates. In addition to BAC, we employ several other metrics to ensure a thorough assessment: positive predictive value (PPV), negative predictive value (NPV), and area under the precision-recall curve (AUPRC). The AUPRC, like BAC, is well-suited for machine learning tasks involving imbalanced data and offers valuable insights into model performance across various classification thresholds. Understanding the key distinguishing features among various mental health conditions is crucial for improving diagnostic accuracy, developing targeted interventions, and enhancing our overall comprehension of these disorders. To address this critical need, we employed a comprehensive approach to identify the most important features distinguishing between different mental health conditions. We utilized various feature sets, including LLM-generated clinical and assessment-related features, LIWC categories, and TF-IDF terms, and applied the Mann-Whitney U test with FDR correction across all feature sets to calculate p-values and measure feature importance. Features are ranked by their p-values, with those below 0.05 indicating a statistically significant difference between the two groups. 3 Results 3.1 Sample The study included 1,160 individuals, yielding about 15,000 minutes of speech data. All participants received diagnoses based on the ICD-10 [ 20 ]. The sample comprised 553 participants diagnosed with “Depressive Episode” or “Depressive Disorder” (DP), 426 diagnosed with “Anxiety Disorder” or “Anxiety State” (ANX), and 181 classified as “Others” (patients not diagnosed with DP or ANX). Table 2 presents the demographic characteristics of the participants. Moreover, based on the clinical annotations of symptom episodes in the EMRs, we categorized the participants into four groups: patients who experienced/presented anxiety symptoms (A), participants who experienced/presented depressive symptoms (D), participants who experienced/p- resented mixed depressive and anxiety symptoms (M), and participants without experienced/presented depressive and anxiety symptoms (N). View this table: View inline View popup Download powerpoint Table 2: Demographics of all participants. 3.2 LLM-generated clinical-related features evaluation We evaluated the performance of LLM-generated clinical symptoms on the entire test samples and those with high-quality EMR, as shown in Table 3 . Our evaluation of LLM-based clinical symptom extraction demonstrated a significant performance improvement after the SFT, with the accuracy increased from 81.2% to 86.9% on the test set and 83.7% to 89.1% on the high-quality test set. The recall metric showed substantial improvements, increasing from 66.1% to 81.1% on the whole test set and from 74.0% to 86.1% on the high-quality test set, indicating enhanced capability in identifying symptoms documented by psychiatrists in the EMR. Meanwhile, precision improved from 81.2% to 87.4% on the test set and from 84.2% to 89.5% on the high-quality test set. This precision increase, coupled with recall improvement, suggests that the fine-tuned model became more comprehensive in detecting symptoms from clinical dialogues. View this table: View inline View popup Download powerpoint Table 3: Performance comparison of LLM-generated clinical-related features between Zero-shot and SFT approaches. We present a comparative analysis of classification performance using clinical-related features extracted by the LLM in Figure 2 , comparing three feature sets: features extracted in a zero-shot manner, features extracted from the fine-tuned LLM, and psychiatrists’ annotations derived from EMRs. Across all classification tasks, features from the fine-tuned LLM consistently demonstrate superior performance. For instance, in distinguishing between depression and anxiety diagnoses (A vs. D), the fine-tuned LLM achieves a BAC of 74.8%. In identifying depression (D vs. N) and anxiety symptoms (A vs. N), the BAC reaches 79.8% and 72.2% respectively. These results underscore the potential of fine-tuned LLMs for accurate and automated clinical manifestation extraction. Download figure Open in new tab Fig. 2: Comparative analysis of classification performance using the clinical-related features extracted by LLM in zero-shot, the SFT, and the annotations from EMRs across different classification tasks. 3.3 Classification of diagnostic groups The results of automated classification tasks for distinguishing between ANX, DP, and Others groups (not diagnosed with ANX or DP) using various linguistic and LLM- generated features are shown in Table 4 . For the binary classification task (ANX vs. DP), the model achieved a BAC of 75.5%, an F1 score of 0.762, and an AUPRC of 0.824, indicating good overall performance (permutation test p-value < 0.01, same for other tasks). In the three-way classification task (ANX vs. DP vs. Other), the model’s performance was achieved with a BAC of 65.6% and an F1 score of 0.656, presenting a significant gain compared to the majority baseline (47.7%). View this table: View inline View popup Download powerpoint Table 4: Results for classification of ANX, DP, and Others groups. 3.4 Prediction of depression and anxiety symptoms In addition to identifying diagnostic results by ICD-10 code, we predicted whether participants exhibited symptoms of depression, anxiety, mixed depression/anxiety, or no symptoms at all, as shown in Table 5 . In the anxiety vs. no anxiety (A vs. N) classification task, the model achieved a sensitivity of 0.683 and specificity of 0.810 for detecting anxiety, with an overall F1 score of 0.754 and BAC of 74.7%. For the depression vs. no depression (D vs. N) task, the model performed slightly better, with a sensitivity of 0.806 and specificity of 0.737 for detecting depression, resulting in an F1 score of 0.783 and a BAC of 77.2%. When distinguishing between anxiety, depression, mixed symptoms, and no depression and anxiety symptoms (A vs. D vs. M vs. N), we achieved an AUPRC of 0.606 and a BAC of 60.7%, which achieved a significant improvement of about 30% compared to the majority baseline. View this table: View inline View popup Download powerpoint Table 5: Results for classification of participants with depression (D), anxiety (A), mixed depression and anxiety (M), and no depression and anxiety symptoms (N). 3.5 Interpretability The analysis revealed distinctive patterns across different mental health conditions and feature sets ( Table 6 ). In differentiating ANX from DP, clinical-related features emphasized anxiety-specific symptoms such as “Unable to relax”, “Uncontrollable restlessness”, and “Anxiety”, contrasting with depressive symptoms like “Sadness” and “Anhedonia”. Assessment measures showed a mixed profile, with both anxiety indicators (HAMD_Somatic anxiety) and depression markers (HAMD_Depressed mood). LIWC analysis revealed heightened use of anxiety and fear-related language, and TF-IDF identified anxiety-related terms. For depression detection, clinical-related features highlighted core depressive symptoms, with “Depressed mood”, “Loss of interest”, and “Anhedonia” emerging as primary indicators of depression. The assessment-related features showed strong signals from SCL-90 scales, particularly in items related to feelings of sadness and loss of interest. LIWC analysis identified significant usage patterns in sadness-related words and negative emotions, while TF-IDF analysis captured depression-specific terms and notably, negation patterns (e.g., “Don’t want”, “No”, etc.). For anxiety identification, clinical-related features strongly centered on anxiety manifestations, such as “Unable to relax”, “Anxiety”, and “Worry.” The assessment-related features prominently featured inner tension and somatic anxiety, along with various SCL-90 anxiety-related items. Both LIWC and TF-IDF analyses consistently identified anxiety-specific language patterns, with LIWC showing “Anxiety” and “Fear” as top features, and TF-IDF highlighting terms related to physical symptoms (e.g., “Palpitations”, “Heartbeat”, etc.) and worry. View this table: View inline View popup Download powerpoint Table 6: Top ten salient features for each feature set in paired classification tasks. 4 Discussion Inspired by promising early research on digital phenotypes for diagnosing and classifying symptoms in psychiatric patients, we investigated using signal processing and state-of-the-art LLM to capture symptom-related expression cues in outpatient conversations. Subsequently, we developed an ensemble classification pipeline to automatically differentiate between clinical diagnostic outcomes and the presence of symptoms. Although recent studies have demonstrated promising capabilities of utilizing LLMs in medical diagnosis [ 14 ], applications in mental health have predominantly centered on developing conversational agents [ 38 ], while the potential of extracting precise symptoms from psychiatric conversations for evidence-based diagnosis has not been fully explored. In this study, we investigated the efficacy of LLM in detecting clinical and assessment-related symptoms. Our investigation revealed that without any additional training, the model achieved a recall rate of 77.3% on high-quality dialogue-case pairs, and increased to 86.1% by fine-tuning the LLM using clinical annotations. This aligns with recent observations regarding LLMs’ strong zero-shot performance in healthcare domains and the fine-tuning could further boost LLM performance [ 24 ]. Furthermore, this enhanced base capability led to substantial improvements across all downstream classification and prediction tasks (e.g., the classification accuracy for ANX and DP increased from 72% to 75%). Current approaches to automated symptom detection predominantly rely on traditional natural language processing methods with predefined linguistic categories or rule-based systems [ 45 , 46 ], which often struggle to capture the complex presentation of psychiatric symptoms in natural conversation. Some researchers have explored the use of LLMs to assist in medical information retrieval [ 26 ]. We further investigated the information extraction capabilities in clinical dialogues and enhanced them through SFT. Our study demonstrated moderate to high performance in anxiety symptom detection (BAC=74.7%, AUPRC=0.813), depression symptoms detection (BAC=77.2%, AUPRC=0.866), and a four-class classification of patients with anxiety/depression/mixed/none symptoms (BAC=60.7%, AUPRC=0.606). As shown in the summarization of existing literature (in Appendix A4), while anxiety detection in social media text has demonstrated promising results with high accuracy [ 35 ], the performance of similar methods on spoken language data, such as interview transcripts and therapy dialogues, remains limited with accuracy rates below 65%. Recent advances combining LLM embedding with acoustic features have shown improved results, reaching 75% accuracy in a small cohort of 65 patients [ 21 ]. While depression detection studies have reported wide-ranging accuracy rates (65%-95%), some results should be interpreted with caution due to several methodological limitations: small sample sizes [ 45 ], reliance on PHQ screening tools rather than clinical diagnoses [ 17 ], data collection in structured experimental settings [ 10 ], and not studied the first-episode outpatients in real-world, unstructured clinical environments. Our study leverages clinical diagnoses from psychiatrists and achieved a moderate to high accuracy in this more challenging and unstructured clinical setting demonstrating the robustness of our approach. This success particularly highlights the potential of LLMs in extracting and analyzing clinical symptoms for predicting anxiety and depression in outpatient populations, offering a more ecologically valid and scalable solution for mental health screening and monitoring. DP and ANX present significant diagnostic challenges due to their high prevalence, frequent comorbidity, and overlapping symptomatology [ 18 ]. By leveraging LLM-generated features, our approach achieved robust performance in distinguishing these disorders, with a BAC of 75.5% and AUPRC of 0.824 for binary classification between DP and ANX, and the performance outperformed the directly using LLMs as classifiers (see Appendix A5). In the more challenging multi-class scenario (ANX vs. DP vs. Others), the model maintained reasonable performance with a BAC of 65.6% and AUPRC of 0.715. Prior approaches to differentiating depression and anxiety disorders, such as cognitive tasks [ 34 ] and structured questionnaires [ 28 ], have achieved accuracy rates of 70-80%. In addition, we tested the classification performance of each assessment scale as the feature set, where the results are presented in Appendix Table A4 . We observed that assessment-related features, particularly from scales like SCL- 90, HAMD, and MADRS, showed strong discriminatory power across all comparisons, and early fusion and late fusion present similar classification performance. A potential reason is that these scales contain sufficient depression-related symptoms, which are key components for differentiating different groups. To our knowledge, no study has explored the objective diagnosis of DP and ANX using speech data from clinical interviews, potentially due to a lack of data and inherent subjectivity. Our study addresses a critical gap by analyzing the linguistic and symptom-related markers in various participant groups, providing objective cues to assist psychiatrists. The feature analysis provides several key insights into the differential characteristics of different groups of participants, as shown in Table 6 . We also illustrate the distribution of clinical and assessment-related features for each group of participants in Appendix Table A3 and Table A4 . The clinical-related features demonstrate clear condition-specific patterns: features that show more importance in patients with depression cluster around mood (sadness and disappointment) and motivational disturbances (anhedonia, reduced volition), while anxiety features predominantly reflect an inability to relax and worry. The observation for depression is in line with previous studies which also observed that patients with depression presented blunted facial affect and increased sadness in language [ 21 , 42 ] and anhedonia is specific to depression [ 8 ]. For anxiety recognition, the consistency of findings across different feature sets strengthens the reliability of these discriminators. For instance, the prominence of somatic symptoms in anxiety, captured in both assessment-related features and TF-IDF terms, suggests this could be a robust marker. Similarly, the persistent appearance of mood-related terms in depression across multiple feature sets reinforces their diagnostic utility. In summary, this study demonstrates the potential of using LLM to analyze digital biomarkers in speech for automatic assistance in psychosis diagnosis and assessment. Our model achieved promising accuracy in identifying individuals with anxiety and depression symptoms, as well as differentiating between DP and ANX groups. Using LLMs to extract clinically relevant features and rate assessment scales improved the interpretability of the results, offering a novel approach to bridging the gap between automated analysis and clinical practice. While further research is needed, our findings suggest that well-developed LLMs could potentially serve as valuable tools in standardizing psychiatric evaluation and decision-making. 5 Limitations Our study has several limitations that should be addressed in future research. The absence of detailed symptom severity measures during the experiment limits our ability to correlate speech patterns with specific symptom intensities. Additionally, the study’s focus on specific disorders and potential biases in data collection may affect the generalizability of the results. Future work should prioritize the inclusion of comprehensive symptom severity assessments and explore the application of this approach to a broader range of mental health conditions. Besides, in the future, we will collect more data to perform longitudinal analysis, as it could provide insights into how linguistic patterns evolve with symptom progression or treatment response. Furthermore, expanding the use of more advanced LLMs in this context could potentially enhance the extraction of nuanced clinical concepts and provide even more detailed, interpretable insights for clinicians. Validating the model’s performance in diverse clinical settings and with larger, more diverse patient populations will be crucial to ensure its practical utility and generalizability. These advancements could significantly contribute to improving the efficiency and objectivity of consultations for depression, anxiety, and potentially other mental health disorders. Data Availability The data and features processed in the intermediate process in the present study are available upon reasonable request to the authors Author contributions S.X. conceptualized the study, developed the large language model methodology, designed the machine learning pipeline, conducted the experiments, and wrote the original draft. Y.Y. and Y.D. performed clinical concept verification and data analysis. F.L. and S.Z. developed the large language model methodology, performed data processing, and conducted the experiments. T.Y. and H.G. provided technical support and data resources. J.S., X.J., and J.C. supervised the project, acquired funding, and reviewed the final manuscript. N.O. reviewed the final manuscript. Other authors contributed to the clinical data collection. All authors contributed to the manuscript revision and approved the submitted version. Competing interests The authors declare no competing interests. Data availability The code and data can be found at https://github.com/Shanda-Group-Ltd/SMHC_llm_psychiatry_study A Descriptions of clinical-related features View this table: View inline View popup Table A1: The description of clinical-related features extracted from EMRs and DSM/ICD. B Additional classification results Download figure Open in new tab Fig. A1: The classification results for different feature sets. Download figure Open in new tab Fig. A2: The classification results for different assessment scales. Early fusion combines features before classification, and late fusion merges individual classifier outputs by simply averaging the output probabilities. C Prompt Templates View this table: View inline View popup Download powerpoint Table A2: Prompt templates for assessment-related feature generation. View this table: View inline View popup Download powerpoint Table A3: Prompt templates used for the clinical-related annotations generation, dialogue quality evaluation, and EMR quality evaluation. View this table: View inline View popup Table A4: Literature Review of related data-driven studies on identifying depression (DP) and general anxiety disorders (ANX). D LLMs as classifiers We conducted a systematic investigation of diagnostic efficacy in clinical dialogue classification utilizing the Qwen2-72B-Instruct LLM, implementing two distinct methodological approaches: direct classification and intermediate feature extraction. We designed a suitable prompt, added the dialogue content to the prompt to ask the LLM whether it is depression or anxiety, and let the LLM output only one of the two, and then we captured the probabilities of the two judgment output tokens to calculate the classification metrics. To ensure robust evaluation, we employed a stratified data partitioning strategy, allocating 60% of both DP and ANX samples for training, 20% for validation, and 20% for testing. In the SFT paradigm, we fine-tuned the model on the training set with LoRA, employed validation loss as the stopping criterion, and evaluated performance on the held-out test set. For comparative assessment, both zero-shot prompting and intermediate feature extraction approaches were evaluated on the same test set, maintaining consistency across all methods. We observed that using the approach in this paper provides better performance than directly using LLM as the classifier even fine-tuning the model. View this table: View inline View popup Download powerpoint Table A5: Comparison of classification performance using Qwen2-72B-Instruct (Zero-shot and SFT) and Ours (LLM-generated feature sets with ensemble random forest classifiers) for DP/ANX classification. E Distribution illustration of salient features Download figure Open in new tab Fig. A3: Violin plots of the distribution of top four silent clinical-related features (p-value < 0.01) across diagnostic groups (Anxious [A], Depressed [D], Mixed Anxiety/Depression [M], and No Anxiety/Depression [N]). Each feature, identified in the plot titles, was selected based on statistical significance (lowest p-values) within its respective diagnostic group and exhibited higher median values than other groups. Download figure Open in new tab Fig. A4: Violin plots of the distribution of four silent assessment-related features (p-value < 0.01) across diagnostic groups (Anxious [A], Depressed [D], Mixed Anxiety/Depression [M], and No Anxiety/Depression [N]). Each feature, identified in the plot titles, was selected based on statistical significance (lowest p-values) within its respective diagnostic group and exhibited higher median values than other groups. Feature nomenclature follows the format: Scale_SymptomName_Rating, where a ‘NULL’ rating indicates the absence of symptom identified by LLM. Acknowledgements This study was funded by 2023-TX-018 from Tianqiao and Chrissy Chen Institute (TCCI) with the Programe of Chen Frontier Lab for AI and Mental Health (TCCI) - Shanghai Mental Health Center (SMHC). J.C. was supported by 82071500 from the National Natural Science Foundation of China and 21XD1423300 from the Program of Shanghai Academic/Technology Research Leader. We deeply appreciate every participant involved in this study and all the efforts made by TCCI and SMHC colleagues who are not on the author list. Footnotes ↵ # Contributing authors: shihao.xu{at}thetahealth.ai ; ↵ 1 https://github.com/hiyouga/LLaMA-Factory ↵ 2 https://github.com/vllm-project/vllm ↵ 3 https://github.com/fxsjy/jieba ↵ 4 https://github.com/asweigart/pyaudacity ↵ 5 https://github.com/slhck/ffmpeg-normalize References [1]. ↵ Aryan Agrawal . “Illuminate: A novel approach for depression detection with explainable analysis and proactive therapy using prompt engineering” . In: arXiv e-prints , arXiv:2402.05127 (Feb. 2024 ), arXiv:2402.05127. doi: 10.48550/arXiv . 2402.05127. arXiv: 2402.05127 [cs.CL]. OpenUrl CrossRef [2]. ↵ Alaa Althubaiti . “ Information Bias in Health Research: Definition, Pitfalls, and Adjustment Methods ”. In: Journal of Multidisciplinary Healthcare 9 ( May 2016 ), pp. 211 – 217 . issn: 1178-2390. doi: 10.2147/JMDH.S104807 . OpenUrl CrossRef PubMed [3]. ↵ American Psychiatric Association . U nderstanding Mental Disorders: Your Guide to DSM-5® . Washington, DC London, England , 2015 . isbn: 978-1-58562- 491-1. [4]. ↵ Prabal Datta Barua et al. “ Artificial Intelligence Assisted Tools for the Detection of Anxiety and Depression Leading to Suicidal Ideation in Adolescents: A Review ”. In: Cognitive Neurodynamics 18 . 1 ( Feb. 2024 ), pp. 1 – 22 . issn: 1871-4080, 1871-4099. doi: 10.1007/s11571-022-09904-0 . OpenUrl CrossRef [5]. ↵ N. V. Chawla et al. “ SMOTE: Synthetic Minority Over-sampling Technique ”. In: Journal of Artificial Intelligence Research 16 (June 2002 ), pp. 321 – 357 . issn: 1076-9757. doi: 10.1613/jair.953 . OpenUrl CrossRef PubMed [6]. ↵ Zhiyu Chen , Yujie Lu , and William Yang Wang . Empowering Psychotherapy with Large Language Models: Cognitive Distortion Detection through Diagnosis of Thought Prompting . Oct. 2023. doi: 10.48550/arXiv.2310.07146 . arXiv : 2310.07146 [cs]. OpenUrl CrossRef [7]. ↵ Parth Chodavadia et al. “ Prevalence and Economic Burden of Depression and Anxiety Symptoms among Singaporean Adults: Results from a 2022 Web Panel ”. In: BMC Psychiatry 23 ( Feb. 2023 ), p. 104 . issn: 1471-244X. doi: 10.1186/s12888-023-04581-7 . OpenUrl CrossRef [8]. ↵ Lee Anna Clark and David Watson . “ Tripartite Model of Anxiety and Depression: Psychometric Evidence and Taxonomic Implications ”. In: Journal of Abnormal Psychology 100 . 3 ( 1991 ), pp. 316 – 336 . issn: 1939-1846. doi: 10.1037/0021-843X.100.3.316 . OpenUrl CrossRef PubMed Web of Science [9]. ↵ Leonard R. Derogatis and Rachael Unger . “Symptom Checklist-90-Revised”. en . In : ( 2010 ), pp. 1 – 2 . doi: 10.1002/9780470479216.corpsy0970 . OpenUrl CrossRef [10]. ↵ Hamdi Dibeklioğlu et al. “ Multimodal Detection of Depression in Clinical Interviews ”. In: Proceedings of the … ACM International Conference on Multimodal Interaction. ICMI (Conference ) 2015 ( Nov. 2015 ), pp. 307 – 310 . doi: 10.1145/2818346.2820776 . OpenUrl CrossRef [11]. ↵ Zohar Elyoseph , Inbar Levkovich , and Shiri Shinan-Altman . “ Assessing Prognosis in Depression: Comparing Perspectives of AI Models, Mental Health Professionals and the General Public ”. In: Family Medicine and Community Health 12 . Suppl 1 ( Jan. 2024 ), e002583 . issn: 2305-6983. doi: 10.1136/fmch-2023-002583 . OpenUrl Abstract / FREE Full Text [12]. ↵ Florian Eyben , Martin Wöllmer , and Björn Schuller . “Opensmile: the munich versatile and fast open-source audio feature extractor” . In: Proceedings of the 18th ACM international conference on Multimedia . MM ’10. New York, NY, USA : Association for Computing Machinery , Oct. 25, 2010 , pp. 1459 – 1462 . isbn: 978-1-60558-933-6. doi: 10.1145/1873951.1874246 . url: 10.1145/1873951.1874246. OpenUrl CrossRef [13]. ↵ Sharon Ferguson et al. “ The Explanation That Hits Home: The Characteristics of Verbal Explanations That Affect Human Perception in Subjective Decision-Making ”. In: Proc. ACM Hum.-Comput. Interact . 8 (CSCW2 Nov. 8, 2024 ), 517:1–517:37. doi: 10.1145/3687056 . url: https://dl.acm.org/doi/10.1145/3687056 (visited on 11/21/2024). OpenUrl CrossRef [14]. ↵ Ethan Goh et al. “ Large Language Model Influence on Diagnostic Reasoning: A Randomized Clinical Trial ”. In: JAMA Network Open 7 . 10 ( Oct. 2024 ), e2440969 . issn: 2574-3805. doi: 10.1001/jamanetworkopen.2024.40969 . (Visited on 11/28/2024). OpenUrl CrossRef [15]. ↵ M. Hamilton . “The Hamilton Rating Scale for Depression”. en. In: Assessment of Depression . Ed. by Norman Sartorius and Thomas A. Ban. Berlin , Heidelberg: Springer Berlin Heidelberg , 1986 , pp. 143 – 152 . isbn: 978-3-642-70486-4. doi: 10.1007/978-3-642-70486-4_14 . url: 10.1007/978-3-642-70486-4_14. OpenUrl CrossRef [16]. ↵ Max Hamilton . “The Assessment of Anxiety States by Rating”. en . In: British Journal of Medical Psychology 32 . 1 ( 1959 ). _eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/j.2044-8341.1959.tb00467.x , pp. 50 – 55 . issn: 2044-8341. doi: 10.1111/j.2044-8341.1959.tb00467.x . url: https://onlinelibrary.wiley.com/doi/abs/10.1111/j.2044-8341.1959.tb00467.x . OpenUrl CrossRef PubMed Web of Science [17]. ↵ Amir Harati et al. “ Speech-Based Depression Prediction Using Encoder-Weight-Only Transfer Learning and a Large Corpus ”. In: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) . June 2021 , pp. 7273 – 7277 . doi: 10.1109/ICASSP39728.2021.9414208 . OpenUrl CrossRef [18]. ↵ J. M. Hettema et al. “ Identification and Validation of Mixed Anxiety– Depression ”. In: Psychological Medicine 45 .14 ( Oct. 2015 ), pp. 3075 – 3084 . issn: 0033-2917, 1469–8978. doi: 10.1017/S0033291715001038 . OpenUrl CrossRef [19]. ↵ Edward J. Hu , et al. LoRA: Low-Rank Adaptation of Large Language Models . Oct. 2021 . doi: 10.48550/arXiv.2106.09685 . arXiv : 2106.09685. OpenUrl CrossRef [20]. ↵ ICD-10 Version : 2016 . https://icd.who.int/browse10/2016/en . url: https://iris.who.int/handle/10665/42980 . [21]. ↵ Zifan Jiang et al. “ Multimodal Mental Health Digital Biomarker Analysis From Remote Interviews Using Facial, Vocal, Linguistic, and Cardiovascular Patterns ”. In: IEEE journal of biomedical and health informatics 28 . 3 ( Mar. 2024 ), pp. 1680 – 1691 . issn: 2168-2208. doi: 10.1109/JBHI.2024.3352075 . OpenUrl CrossRef [22]. ↵ Xiaochong Lan et al. Depression Detection on Social Media with Large Language Models . Mar. 2024 . arXiv: 2403.10750 [cs]. [23]. ↵ Hannah R. Lawrence et al. “ The Opportunities and Risks of Large Language Models in Mental Health ”. In: JMIR Mental Health 11 . 1 (July 2024 ), e59479 . doi: 10.2196/59479 . OpenUrl CrossRef [24]. ↵ Hannah R. Lawrence et al. “ The Opportunities and Risks of Large Language Models in Mental Health ”. In: JMIR Mental Health 11 . 1 (July 29, 2024 ), e59479. doi: 10.2196/59479 . url: https://mental.jmir.org/2024/1/e59479 . OpenUrl CrossRef [25]. ↵ Junkai Li et al. Agent Hospital: A Simulacrum of Hospital with Evolvable Medical Agents . May 2024 . doi: 10.48550/arXiv.2405.02957 . arXiv: 2405.02957 [cs]. OpenUrl CrossRef [26]. ↵ Lei Li et al. AutoMIR: Effective Zero-Shot Medical Information Retrieval without Relevance Labels . Oct. 26, 2024 . doi: 10.48550/arXiv.2410.20050 . arXiv: 2410. 20050. url: http://arxiv.org/abs/2410.20050 . OpenUrl CrossRef [27]. ↵ June M. Liu , et al. ChatCounselor: A Large Language Models for Mental Health Support . Sept. 2023 . doi: 10.48550/arXiv.2309.15461 . arXiv: 2309.15461 [cs]. OpenUrl CrossRef [28]. ↵ Kevin Liu , Brian Droncheff , and Stacie L. Warren . “ Predictive Utility of Symptom Measures in Classifying Anxiety and Depression: A Machine-Learning Approach ”. In: Psychiatry Research 312 (June 2022 ), p. 114534 . issn: 0165-1781. doi: 10.1016/j.psychres.2022.114534 . OpenUrl CrossRef [29]. ↵ Daniel M. Low , Kate H. Bentley , and Satrajit S. Ghosh . “ Automated Assessment of Psychiatric Disorders Using Speech: A Systematic Review ”. In: Laryngoscope Investigative Otolaryngology 5 . 1 ( Jan. 2020 ), pp. 96 – 116 . issn: 2378-8038. doi: 10.1002/lio2.354 . OpenUrl CrossRef PubMed [30]. ↵ Lisa Marzano et al. “ The Application of mHealth to Mental Health: Opportunities and Challenges”. In: The Lancet . Psychiatry 2 . 10 ( Oct. 2015 ), pp. 942 – 948 . issn: 2215-0374. doi: 10.1016/S2215-0366(15)00268-0 . OpenUrl CrossRef PubMed [31]. ↵ Mahmud Omar et al. “ Applications of Large Language Models in Psychiatry: A Systematic Review ”. In: Frontiers in Psychiatry 15 (June 2024 ). issn: 1664-0640. doi: 10.3389/fpsyt.2024.1422807 . OpenUrl CrossRef [32]. ↵ Openai . ChatGPT . https://chatgpt.com/chat . 2024 . (Visited on 12/16/2024). [33]. ↵ James W. Pennebaker et al. “The development and psychometric properties of LIWC2015” . In: ( 2015 ). url: https://repositories.lib.utexas.edu/items/705e81ca-940d-4c46-94ec-a52ffdc3b51f . [34]. ↵ Thalia Richter et al. “ Using Machine Learning-Based Analysis for Behavioral Differentiation between Anxiety and Depression ”. In: Scientific Reports 10 . 1 ( Oct. 2020 ), p. 16381 . issn: 2045-2322. doi: 10.1038/s41598-020-72289-9 . OpenUrl CrossRef [35]. ↵ Tisha Sadariya and Shanti Verma . “ Early Prediction and Detection of Anxiety Level Using Support Vector Machine ”. In: Proceedings of Data Analytics and Management. Ed. by Abhishek Swaroop et al. Singapore: Springer Nature , 2023 , pp. 279 – 291 . isbn: 978-981-9965-50-2. doi: 10.1007/978-981-99-6550-2_22 . OpenUrl CrossRef [36]. ↵ Claude Sammut and Geoffrey I. “TF–IDF” . In: Encyclopedia of Machine Learning . Ed. by Claude Sammut and Geoffrey I. Webb. Boston, MA : Springer US , 2010 , pp. 986 – 987 . isbn: 978-0- 387-30164-8. doi: 10.1007/978-0-387-30164-8_832 . url: 10.1007/978-0-387-30164-8_832. OpenUrl CrossRef [37]. ↵ Chandra Mani Sharma , Darsh Damani , and Vijayaraghavan M. Chariar . “ Review and Content Analysis of Textual Expressions as a Marker for Depressive and Anxiety Disorders (DAD) Detection Using Machine Learning ”. In: Discover Artificial Intelligence 3 . 1 ( Nov. 2023 ), p. 38 . issn: 2731-0809. doi: 10.1007/s44163-023-00090-4 . OpenUrl CrossRef [38]. ↵ Elizabeth C. Stade et al. “ Large language models could change the future of behavioral healthcare: a proposal for responsible development and evaluation ”. In: npj Mental Health Research 3 . 1 (Apr. 2, 2024 ), pp. 1–12. issn: 2731-4251. doi: 10.1038/s44184-024-00056-z . url: https://www.nature.com/articles/s44184-024-00056-z . OpenUrl CrossRef [39]. ↵ Gemini Team et al. Gemini 1.5: Unlocking Multimodal Understanding across Millions of Tokens of Context . Aug. 2024 . doi: 10.48550/arXiv.2403.05530 . arXiv: 2403.05530. (Visited on 12/16/2024). OpenUrl CrossRef [40]. ↵ Qwen Team . Qwen2.5: A Party of Foundation Models . Sept. 2024 . url: https://qwenlm.github.io/blog/qwen2.5/ . [41]. ↵ Xiao Wang , Kai Liu , and Chunlei Wang . “ Knowledge-Enhanced Pre-training Large Language Model for Depression Diagnosis and Treatment ”. In: 2023 IEEE 9th International Conference on Cloud Computing and Intelligent Systems (CCIS) . Aug. 2023 , pp. 532 – 536 . doi: 10.1109/CCIS59572.2023.10263217 . OpenUrl CrossRef [42]. ↵ Andrew Yates Yuxi Wang , Diana Inkpen , and Prasadith Kirinde Gamaarachchige . “Explainable Depression Detection Using Large Language Models on Social Media Data” . In: Proceedings of the 9th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2024) . Ed. by Andrew Yates et al. St. Julians, Malta : Association for Computational Linguistics , Mar. 2024 , pp. 108 – 126 . [43]. ↵ Janet B. W. Williams and Kenneth A. Kobak . “Development and reliability of a structured interview guide for the Montgomery Asberg Depression Rating Scale (SIGMA)”. eng . In: The British Journal of Psychiatry: The Journal of Mental Science 192 . 1 ( Jan. 2008 ), pp. 52 – 58 . issn: 0007-1250. doi: 10.1192/bjp.bp.106.032532 . OpenUrl Abstract / FREE Full Text [44]. ↵ Alison W. Xin et al. “Using Large Language Models to Detect Outcomes in Qualitative Studies on Adolescent Depression” . In: Artificial Intelligence and Data Science for Healthcare: Bridging Data-Centric AI and People-Centric Healthcare . June 2024 . [45]. ↵ Shihao Xu et al. “ Automated Verbal and Non-verbal Speech Analysis of Interviews of Individuals with Schizophrenia and Depression ”. In: 2019 41st Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC) . July 2019 , pp. 225 – 228 . doi: 10.1109/EMBC.2019.8857071 . OpenUrl CrossRef [46]. ↵ Shihao Xu et al. “ Identifying Psychiatric Manifestations in Schizophrenia and Depression from Audio-Visual Behavioural Indicators through a Machine-Learning Approach ”. In: Schizophrenia 8 . 1 ( Nov. 2022 ), pp. 1 – 13 . issn: 2754-6993. doi: 10.1038/s41537-022-00287-z . OpenUrl CrossRef [47]. ↵ An Yang , et al. Qwen2 Technical Report . Sept. 2024 . doi: 10.48550/arXiv.2407 . 10671. arXiv: 2407.10671. OpenUrl CrossRef [48]. ↵ W. W. Zung . “A rating instrument for anxiety disorders”. eng . In: Psychosomatics 12 . 6 ( 1971 ), pp. 371 – 379 . issn: 0033-3182. doi: 10.1016/S0033-3182(71)71479-0 . OpenUrl CrossRef [49]. ↵ William W. K. Zung . “ A Self-Rating Depression Scale ”. In: Archives of General Psychiatry 12 . 1 ( Jan. 1965 ), pp. 63 – 70 . issn: 0003-990X. doi: 10.1001/archpsyc.1965.01720310065008 . url: 10.1001/archpsyc.1965. 01720310065008. OpenUrl CrossRef PubMed Web of Science Appendix References [1]. Sharifa Alghowinem et al. “ Cross-cultural detection of depression from nonverbal behaviour ”. In: 2015 11th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG) . Vol. 1 . May 2015 , pp. 1 – 8 . doi: 10.1109/FG.2015.7163113 . OpenUrl CrossRef [2]. Sharifa Alghowinem et al. “ Multimodal Depression Detection: Fusion Analysis of Paralinguistic, Head Pose and Eye Gaze Behaviors ”. In: IEEE Transactions on Affective Computing 9 . 4 ( Oct. 2018 ), pp. 478 – 490 . issn: 1949-3045. doi: 10.1109/TAFFC.2016.2634527 . OpenUrl CrossRef [3]. Ayah Zirikly Hannah Burkhardt et al. “Comparing Emotion Feature Extraction Approaches for Predicting Depression and Anxiety” . In: Proceedings of the Eighth Workshop on Computational Linguistics and Clinical Psychology . Ed. by Ayah Zirikly et al. Seattle, USA : Association for Computational Linguistics , July 2022 , pp. 105 – 115 . doi: 10.18653/v1/2022.clpsych-1.9 . OpenUrl CrossRef [4]. Hamdi Dibeklioğlu et al. “ Multimodal Detection of Depression in Clinical Interviews ”. In: Proceedings of the 2015 ACM on International Conference on Multimodal Interaction . ICMI ’15 . New York, NY, USA: ACM , 2015 , pp. 307 – 310 . isbn: 978-1-4503-3912-4. doi: 10.1145/2818346.2820776 . OpenUrl CrossRef [5]. Caroline Wanderley Espinola et al. “Detection of major depressive disorder using vocal acoustic analysis and machine learning—an exploratory study”. en . In: Research on Biomedical Engineering 37 . 1 ( Mar. 2021 ), pp. 53 – 64 . issn: 2446-4740. doi: 10.1007/s42600-020-00100-9 . OpenUrl CrossRef [6]. Amir Harati et al. “ Speech-Based Depression Prediction Using Encoder-Weight-Only Transfer Learning and a Large Corpus ”. In: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) . June 2021 , pp. 7273 – 7277 . doi: 10.1109/ICASSP39728.2021.9414208 . OpenUrl CrossRef [7]. Kevin Hilbert et al. “ Separating Generalized Anxiety Disorder from Major Depression Using Clinical, Hormonal, and Structural MRI Data: A Multimodal Machine Learning Study ”. In: Brain and Behavior 7 . 3 ( Feb. 2017 ), e00633 . doi: 10.1002/brb3.633 . OpenUrl CrossRef [8]. Zhaocheng Huang et al. “Domain Adaptation for Enhancing Speech-Based Depression Detection in Natural Environmental Conditions Using Dilated CNNs”. en . In: Interspeech 2020. ISCA, Oct . 2020 , pp. 4561 – 4565 . doi: 10.21437/Interspeech.2020-3135 . OpenUrl CrossRef [9]. Zifan Jiang et al. “ Multimodal Mental Health Digital Biomarker Analysis From Remote Interviews Using Facial, Vocal, Linguistic, and Cardiovascular Patterns ”. In: IEEE journal of biomedical and health informatics 28 . 3 ( Mar. 2024 ), pp. 1680 – 1691 . issn: 2168-2208. doi: 10.1109/JBHI.2024.3352075 . OpenUrl CrossRef [10]. Jyoti Joshi et al. “Can body expressions contribute to automatic depression analysis?” In: 2013 10th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG) . IEEE . 2013 , pp. 1 – 7 . [11]. Jina Kim et al. “ A Deep Learning Model for Detecting Mental Illness from User Content on Social Media ”. In: Scientific Reports 10 . 1 (July 2020 ), p. 11846 . issn: 2045-2322. doi: 10.1038/s41598-020-68764-y . OpenUrl CrossRef [12]. Namhee Kwon et al. “ Detecting Anxiety and Depression from Phone Conversations Using X-Vectors ”. In: SMM22, Workshop on Speech, Music and Mind 2022 . ISCA , Sept. 2022 , pp. 1 – 5 . doi: 10.21437/SMM.2022-1 . OpenUrl CrossRef [13]. Kevin Liu , Brian Droncheff , and Stacie L. Warren . “ Predictive Utility of Symptom Measures in Classifying Anxiety and Depression: A Machine-Learning Approach ”. In: Psychiatry Research 312 (June 2022 ), p. 114534 . issn: 0165-1781. doi: 10.1016/j.psychres.2022.114534 . OpenUrl CrossRef [14]. Lu-Shih Alex Low et al. “Detection of Clinical Depression in Adolescents’Speech During Family Interactions”. en . In: IEEE Transactions on Biomedical Engineering 58 . 3 ( Mar. 2011 ), pp. 574 – 586 . issn: 0018-9294, 1558-2531. doi: 10.1109/TBME.2010.2091640 . OpenUrl CrossRef [15]. Y. Lu et al. “ Robust Speech and Natural Language Processing Models for Depression Screening ”. In: 2020 IEEE Signal Processing in Medicine and Biology Symposium (SPMB) . Dec. 2020 , pp. 1 – 5 . doi: 10.1109/SPMB50085.2020.9353611 . OpenUrl CrossRef [16]. Kuan Ee Brian Ooi , Margaret Lech , and Nicholas B. Allen . “Multichannel Weighted Speech Classification System for Prediction of Major Depression in Adolescents” . en. In: IEEE Transactions on Biomedical Engineering 60.2 (Feb. 2013 ), pp. 497 – 506 . issn: 0018-9294, 1558-2531. doi: 10.1109/TBME.2012.2228646 . OpenUrl CrossRef [17]. Syed Arbaaz Qureshi et al. “Improving Depression Level Estimation by Concurrently Learning Emotion Intensity”. en . In: IEEE Computational Intelligence Magazine 15 . 3 ( Aug. 2020 ), pp. 47 – 59 . issn: 1556-603X, 1556-6048. doi: 10.1109/MCI.2020.2998234 . OpenUrl CrossRef [18]. Thalia Richter et al. “ Machine Learning-Based Diagnosis Support System for Differentiating between Clinical Anxiety and Depression Disorders ”. In: Journal of Psychiatric Research 141 (Sept. 2021 ), pp. 199 – 205 . issn: 0022-3956. doi: 10.1016/j.jpsychires.2021.06.044 . OpenUrl CrossRef [19]. Thalia Richter et al. “ Using Machine Learning-Based Analysis for Behavioral Differentiation between Anxiety and Depression ”. In: Scientific Reports 10 . 1 ( Oct. 2020 ), p. 16381 . issn: 2045-2322. doi: 10.1038/s41598-020-72289-9 . OpenUrl CrossRef [20]. Michelle Hewlett Sanchez et al. “Using prosodic and spectral features in detecting depression in elderly males” . In: 12th Annual Conference of the International Speech Communication Association . Aug. 2011 , pp. 3001 – 3004 . [21]. Stefan Scherer et al. “Investigating Voice Quality as a Speaker-Independent Indicator of Depression and PTSD” . en. In: INTERSPEECH-2013 (Aug. 2013 ), pp. 847 – 851 . [22]. Takaya Taguchi . “Major depressive disorder discrimination using vocal acoustic features”. en . In: Journal of Affective Disorders ( 2018 ), p. 7 . [23]. Bazen Gashaw Teferra and Jonathan Rose . “ Predicting Generalized Anxiety Disorder From Impromptu Speech Transcripts Using Context-Aware Transformer-Based Neural Networks: Model Evaluation Study ”. In: JMIR Mental Health 10 . 1 ( Mar. 2023 ), e44325 . doi: 10.2196/44325 . OpenUrl CrossRef [24]. Lana G. Tennenhouse et al. “ Machine-Learning Models for Depression and Anxiety in Individuals with Immune-Mediated Inflammatory Disease ”. In: Journal of Psychosomatic Research 134 (July 2020 ), p. 110126 . issn: 00223999. doi: 10.1016/j.jpsychores.2020.110126 . OpenUrl CrossRef [25]. Michel Valstar et al. “AVEC 2016 - Depression, Mood, and Emotion Recognition Workshop and Challenge” . In: arXiv:1605.01600 [cs] (Nov. 2016 ). [26]. Shihao Xu et al. “ Identifying Psychiatric Manifestations in Schizophrenia and Depression from Audio-Visual Behavioural Indicators through a Machine-Learning Approach ”. In: Schizophrenia 8 . 1 ( Nov. 2022 ), pp. 1 – 13 . issn: 2754-6993. doi: 10.1038/s41537-022-00287-z . OpenUrl CrossRef [27]. Le Yang, et al. “ Decision Tree Based Depression Classification from Audio Video and Language Information ”. In: Proceedings of the 6th International Workshop on Audio/Visual Emotion Challenge . AVEC ’16 . New York, NY, USA : Association for Computing Machinery , Oct. 2016 , pp. 89 – 96 . isbn: 978-1-4503-4516-3. doi: 10.1145/2988257.2988269 . OpenUrl CrossRef [28]. Yang Yu , Qi Li , and Xiaoqian Liu . “ Automatic Anxiety Recognition Method Based on Microblog Text Analysis ”. In: Frontiers in Public Health 11 ( Mar. 2023 ). issn: 2296-2565. doi: 10.3389/fpubh.2023.1080013 . OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted January 03, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Identifying Psychiatric Manifestations in Outpatients with Depression and Anxiety: A Large Language Model-Based Approach Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Identifying Psychiatric Manifestations in Outpatients with Depression and Anxiety: A Large Language Model-Based Approach Shihao Xu , Yiming Yan , Yanli Ding , Feng Li , Shu Zhang , Haoyun Tang , Chao Luo , Yan Li , Hao Liu , Yu Mei , Wenjie Gu , Hong Qiu , Yong Wang , Jianyin Qiu , Tao Yang , Zike Wang , Qing Zhang , Haiyang Geng , Yunyun Han , Jun Shao , Nils Opel , Lidong Bing , Min Zhao , Yifeng Xu , Xun Jiang , Jianhua Chen medRxiv 2025.01.03.24318117; doi: https://doi.org/10.1101/2025.01.03.24318117 Share This Article: Copy Citation Tools Identifying Psychiatric Manifestations in Outpatients with Depression and Anxiety: A Large Language Model-Based Approach Shihao Xu , Yiming Yan , Yanli Ding , Feng Li , Shu Zhang , Haoyun Tang , Chao Luo , Yan Li , Hao Liu , Yu Mei , Wenjie Gu , Hong Qiu , Yong Wang , Jianyin Qiu , Tao Yang , Zike Wang , Qing Zhang , Haiyang Geng , Yunyun Han , Jun Shao , Nils Opel , Lidong Bing , Min Zhao , Yifeng Xu , Xun Jiang , Jianhua Chen medRxiv 2025.01.03.24318117; doi: https://doi.org/10.1101/2025.01.03.24318117 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Psychiatry and Clinical Psychology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15227) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6597) Geriatric Medicine (668) Health Economics (997) Health Informatics (4534) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9230) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a003e2b9bc15300f',t:'MTc3OTUzNzQ3Mw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-06-17T06:32:23.968882+00:00