Leveraging Language Embeddings from EMA Surveys to Predict Perceived Social Isolation among Stroke Survivors

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 53,160 characters · extracted from preprint-html · click to expand
Leveraging Language Embeddings from EMA Surveys to Predict Perceived Social Isolation among Stroke Survivors | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Leveraging Language Embeddings from EMA Surveys to Predict Perceived Social Isolation among Stroke Survivors Yunda Liu , Alex W. K. Wong , Mandy W. M. Fong , Christopher L. Metts , Yun Shi , View ORCID Profile Sunghoon Ivan Lee doi: https://doi.org/10.1101/2025.07.17.25331714 Yunda Liu 1 Manning College of Information and Computer Sciences, University of Massachusetts Amherst , Amherst, MA 01003 USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alex W. K. Wong 1 Manning College of Information and Computer Sciences, University of Massachusetts Amherst , Amherst, MA 01003 USA 2 Center for Rehabilitation Outcomes Research , Shirley Ryan AbilityLab, IL 60611 USA 3 Department of Physical Medicine and Rehabilitation, Northwestern University Feinberg School of Medicine , IL 60611 USA 4 Department of Medical Social Sciences, Northwestern University Feinberg School of Medicine , IL 60611 USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: wwong{at}sralab.org silee{at}cs.umass.edu Mandy W. M. Fong 5 Department of Physical Medicine and Rehabilitation, Northwestern University Feinberg School of Medicine , IL 60611 USA 6 Michigan Avenue Neuropsychologists , IL 60601 USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Christopher L. Metts 7 Department of Pathology and Laboratory Medicine, Medical University of South Carolina , SC 29425 USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yun Shi 8 Department of Neurology, New York University Grossman School of Medicine , NY 10017 USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sunghoon Ivan Lee 1 Manning College of Information and Computer Sciences, University of Massachusetts Amherst , Amherst, MA 01003 USA Roles: Senior Member, IEEE Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sunghoon Ivan Lee For correspondence: wwong{at}sralab.org silee{at}cs.umass.edu Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Perceived social isolation (PSI) significantly affects the emotional well-being of stroke survivors, necessitating effective monitoring and prediction for timely, targeted interventions. While Ecological Momentary Assessment (EMA) has been increasingly used to identify precursor characteristics of PSI, existing prediction methods rely on handcrafted features, which often fail to capture the semantic richness and contextual relationships among survey questions. In this study, we propose a novel approach to predict PSI by processing structured EMA data with language embeddings. A total of 11,802 EMA surveys were collected from 218 stroke survivors, the largest dataset of its kind in social isolation research for this population. Language embeddings were extracted from the structured EMA surveys using a pre-trained language model. These embeddings were then processed by training an autoencoder to generate compact latent representations, which were used for the downstream PSI prediction. Our findings show that the proposed approach achieves accurate PSI prediction, with a weighted F 1 score of 0.84 and a weighted AUPRC of 0.92, outperforming traditional handcrafted features. Furthermore, by leveraging only three carefully selected questions, our method can optimize a trade-off between validity and usability. This study demonstrates an efficient method for real-time monitoring of psychosocial outcomes in stroke survivors, with potential implications for early intervention and personalized care. I. INTRODUCTION S TROKE can lead to physical and cognitive impairments that significantly disrupt survivors’ daily functioning [ 1 ]. These disruptions often result in a profound disconnection from pre-stroke self-identity and environments, driven by challenges such as job loss and role changes, leading to reduced social and community engagement [ 2 ]. While rehabilitation seeks to improve motor and cognitive functioning, many stroke survivors face persistent psychosocial challenges [ 1 ], which often intensify during the later stages of recovery [ 3 ]. Consequently, social isolation is common, affecting nearly half of stroke survivors 12 months post-stroke [ 4 ], [ 5 ]. Beyond causing emotional distress [ 6 ], [ 7 ], social isolation is a risk factor for adverse clinical outcomes, including poorer functional recovery, greater functional decline, and an increased risk of recurrent stroke [ 8 ]. Thus, there is a critical need to monitor and predict perceived social isolation (PSI) for timely, targeted interventions. To enable continuous monitoring, Ecological Momentary Assessment (EMA) has been widely used as an effective tool for collecting behavioral and psychosocial data in naturalistic environments [ 9 ]. EMA involves repeated real-time assessments, aiming to reduce recall bias and enhance ecological validity by capturing fluctuations in individuals’ experiences and behaviors in daily life [ 9 ], [ 10 ]. Moreover, with advancements in mobile technologies, EMA has become increasingly flexible and scalable, making it practical for longitudinal monitoring of psychosocial outcomes [ 11 ]. Prior research has demonstrated that smartphone-based EMA can accurately measure mood states, post-stroke symptoms, and daily behaviors among stroke survivors, offering valuable insights into their real-world experiences and recovery trajectories [ 12 ]–[ 14 ]. To predict PSI using EMA data, existing approaches typically employ machine learning models trained on manually crafted, discrete features from patients’ responses [ 15 ]–[ 17 ]. These features, often derived from simple statistical metrics (e.g., mean, standard deviation) [ 18 ] and categorical encoding [ 19 ], have shown promise in capturing basic response patterns. However, such approaches often overlook the nuanced semantic meanings embedded in natural language and fail to model complex inter-question relationships within survey data. To address these limitations, recent studies—though limited— have begun applying natural language processing (NLP) to EMA data analysis [ 20 ], [ 21 ]. Yet, these emerging approaches either sacrifice semantic precision or require free-text inputs, which can be burdensome for patients during frequent, daily assessments, ultimately limiting their near real-time predictive utility [ 22 ]. In this work, we present a data analytic pipeline for predicting PSI among stroke survivors by leveraging NLP techniques to extract contextualized language embeddings from structured EMA data. Building on the recent success in language models, we hypothesized that the use of language embeddings enables richer, more comprehensive representations of stroke survivors’ momentary experiences and mental states, thereby enhancing PSI prediction accuracy. To test this, we utilized a dataset of 11,082 EMA surveys collected from 218 stroke survivors, the largest dataset of its kind in social isolation research for this population. We extracted embeddings using a pre-trained language model and trained an autoencoder to generate compact latent representations, which were then used as input for a downstream PSI prediction model. Furthermore, we hypothesized that information across multiple EMA surveys per participant can capture temporal patterns and improve predictive performance. Our promising results highlight the potential for developing low-burden, scalable tools for continuous monitoring of psychosocial well-being in stroke survivors, paving the way for personalized and timely interventions in real-world settings. II. METHOD A. Study Participants We utilized a dataset of 218 stroke survivors (95 female, 59.79 ± 11.74 years old; mean ± standard deviation) in this study [ 23 ]. Study participants were recruited from a single hospital database between October 2018 and January 2021. To be eligible, participants must 1) be at least 18 years old, 2) be fluent in English, 3) exhibit mild-to-moderate stroke severity with the National Institutes of Health Stroke Scale (NIHSS) score of 13 or lower, 4) have experienced an ischemic or hemorrhagic stroke at least 3 months prior to the study, and 5) have no or minor pre-stroke disabilities. The study was approved by the ethics committees of Washington University in Saint Louis (#201704024) and Northwestern University (#STU00215308). B. Data Collection During the study, participants were instructed to complete 5 EMA surveys per day for 14 consecutive days, totaling up to 70 surveys per participant. However, due to non-compliance and other barriers, each participant completed an average of 54.14 ± 15.79 surveys throughout the study period. Participants completed EMA surveys using a mobile appli-cation installed on either 1) an iPod Touch (sixth generation, iOS 11) provided by the research team or 2) their own iPhone. During the study, the mobile application prompted participants to complete daily surveys at random intervals of approximately 2.5 hours within specific time slots: 8:00 AM to 10:00 AM, 11:00 AM to 1:00 PM, 2:00 PM to 4:00 PM, 5:00 PM to 7:00 PM, and 8:00 PM to 10:00 PM. After receiving the notification, participants had a one-hour window to complete the survey and received up to three reminders before the survey expired. Before starting the EMA protocol, participants underwent training on using the mobile application and completed a practice survey in person under the supervision of the research team. They also received follow-up calls on the first two days and periodically thereafter to address any potential questions and concerns. C. EMA Design The EMA survey used in this study was adapted from a previously validated survey designed to measure daily functioning and symptoms in individuals with HIV [ 24 ], [ 25 ]. The adapted version has demonstrated ecological validity and sensitivity in assessing mental and physical symptoms post-stroke [ 17 ], [ 23 ]. The EMA survey consisted of 30 questions designed to capture PSI, secondary conditions, and daily activity patterns of study participants (see Supplementary Material for the complete list of questions). More specifically, PSI was a subjective measure reflecting the extent to which participants felt isolated from others. Secondary conditions captured somatic and mood-related symptoms, including pain, fatigue, anxiety, and depression. Responses to these questions were rated on a 5-point Likert scale ranging from 1 (not at all) to 5 (very much). Daily activity patterns captured real-time information about 1) participants’ locations and transportation methods, 2) their social companions, and 3) activities they were engaged in, with responses selected from pre-defined options. In addition, participants were asked to rate their social interactions on a 7-point scale from 1 (not confident/not satisfied/not successful) to 7 (very confident/satisfied/successful) and activities on a 7-point scale from 1 (no help/not well/not satisfied/not engaged) to 7 (a lot of help/very well/very satisfied/very engaged). D. Overview of PSI Prediction Fig. 1 illustrates the data analytic pipeline designed for PSI prediction. In the pre-training phase, language embeddings were first extracted from individual survey questions using a pre-trained language model. The question-level embeddings were concatenated to form the input to an autoencoder. The autoencoder was trained to reconstruct the original input, thereby learning a compact latent representation of the entire survey. In the downstream phase, the pre-trained encoder was leveraged to generate latent representations, which were used as inputs to a supervised classification model for predicting PSI. Download figure Open in new tab Fig. 1. The plots illustrate the data analytic pipeline designed for PSI prediction. (a) Language embeddings were extracted from individual survey responses and concatenated to form the input to an autoencoder. The autoencoder was trained to learn compact latent representations by reconstructing the original input. (b) The pre-trained language model and encoder were then used to generate latent representations of new survey data, which served as inputs to a supervised classification model for predicting social isolation status. E. Embedding Extraction and Autoencoder Pre-Training We utilized pre-trained DistilBERT [ 26 ], a lightweight transformer-based model, to extract language embeddings from the EMA data. DistilBERT retains much of the representational capacity of the original BERT model whileenabling faster inference and requiring fewer computational resources. To preprocess the input data, we combined each question prompt with its corresponding patients response into a single textual input. Then, we grouped conceptually related questions, such as those associated with social interactions and activity performance, to provide a more coherent contextual representation, as shown in the example below. In total, 18 textual inputs were generated from the 30 EMA survey questions, and each input was encoded into a 768-dimensional embedding vector. I am at work right now. I drove myself . None of these things have gotten in the way . I have 4 or more interactions with someone else since the last alarm . I am with co-worker(s), friend(s), unknown people . The confidence level in my interaction with the person(s) I am with is 5. The satisfaction level in my interaction with the person(s) I am with is 6. The success level in my interaction with the person(s) I am with is 6 . I am doing working (paid). The help I am getting from someone else while doing this activity is 6 . The activity performance is 7. The satisfaction in the activity is 7. The engagement in the activity is 7 . My pain level is mild . I do not feel nervous or stressed at all . I feel a little bit tired . I do not feel depressed at all . I do not feel worthless at all . I have very much trouble concentrating . I do not have difficulty learning new tasks or instructions at all . I do not feel uneasy at all . It is a little bit easy for me to keep track of my thoughts and feelings . I have somewhat little interest in doing things . I do not have a poor appetite at all . My moving/speaking is so slow or restless that other people have noticed not at all . I feel very much cheerful . To merge the embeddings from different questions and represent each EMA survey as a single fixed-length vector, we designed an autoencoder to encode the sequence of embeddings. The autoencoder aimed to reconstruct the original input at its output [ 27 ], thereby learning a compact latent representation that reflects the underlying semantic structure and interrelationships among the individual survey questions. To prepare the input to the autoencoder, the language embeddings were concatenated and represented as a vector of size 18 × 768, corresponding to 18 textual inputs encoded into 768-dimensional DistilBERT embeddings. The encoderconsisted of three fully connected layers with sizes of 9 ×768, 4 × 768, and 2 × 768, respectively. The output of the encoder was a latent representation with 768 dimensions. The decoder mirrored the encoder architecture, consisting of three fully connected layers that reconstructed the original input from the latent representation. During the training of the autoencoder, data from ten subjects were randomly assigned to the test set, another ten subjects were selected for the validation set, and the remaining 198 subjects were used for training. We employed cross-validation to ensure that each subject appeared in the test set exactly once. The autoencoder was trained to minimize the reconstruction error between the DistilBERT embeddings and the autoencoder output, using mean squared error (MSE) as the loss function. The optimal learning rate for the Adam optimizer was chosen using a logarithmically-spaced grid search [10 − 4 , 10 − 2 ] based on the performance of the validation set. The autoencoder was trained for up to 50 epochs with early stopping to prevent overfitting. After training, only the encoder was used to generate latent representations for the subsequent PSI prediction. F. Social Isolation Prediction To train a classification model for predicting PSI, we leveraged multiple previous EMA surveys. This approach was motivated by the time-varying nature of PSI, which requires observing participants over time to improve prediction accuracy. Specifically, to predict the PSI level reported at time point t i , surveys collected at time points ( t i− 1 , t i− 2 , …, t i−N ) were used as input, where 1 ≤ N ≤ 5. A maximum of five previous surveys was used to ensure that predictions could be generated within a day, minimizing delays in potential interventions. Participants rated their PSI level on a 5-point Likert scale, where 1 indicated “not at all” and 5 indicated “very much”. To simplify the prediction task, responses were binarized: a rating of 1 was labeled as negative, indicating “not isolated”, while ratings from 2 to 5 were labeled as positive, indicating “isolated”. We employed a nested cross-validation strategy to predict whether a participant would experience PSI. In the outer loop of cross-validation, the ten participants designated as the test set during autoencoder pretraining (as described in Section II-E) were kept in the test set, while the remaining participants were put in the training set. This setup ensured that the test participants were not involved in any part of the model training process, thereby preserving generalizability. The input data were the latent representations generated by the autoencoder. These representations were normalized by subtracting the mean and dividing by the standard deviation. To identify informative dimensions within the latent space, we computed the Area Under the Precision-Recall Curve (AUPRC) for each dimension on the training set and selected those with AUPRC greater than a threshold δ . For classification, we employed Logistic Regression with L1 penalty to reduce overfitting. Hyperparameters for feature selection and classification were tuned in an inner three-fold cross-validation. Specifically, the AUPRC threshold δ was searched uniformly within the range [0.70, 0.85] [ 28 ], [ 29 ], and the regularization parameter C for Logistic Regression was searched logarithmically within the range [10 − 3 , 10 3 ]. Model performance was evaluated using F 1 score. To address class imbalance, F 1 score was computed with sample weights assigned inversely proportional to class distribution. After identifying the optimal hyperparameters, the final model was evaluated on the held-out test participants in the outer loop. G. Comparative Analysis To establish a baseline for comparison, we designed hand-crafted features based on the responses to the EMA surveys. Specifically, ordinal responses were encoded using their original numerical values, with larger values indicating a greater intensity or frequency of the measuring construct. For nominal questions, responses were one-hot encoded into binary features, where a value of one indicated the selected response and zero indicated otherwise. The question “What activity were you doing?” included a large number of possible responses. We reduced complexity by grouping the predefined activities into seven broader categories: activities of daily living (ADL), instrumental activities of daily living (IADL), passive leisure activity, cognitively stimulating activity, physical activity, social activity, and vocational activity [ 23 ]. Each category was then represented as a separate binary feature, indicating whether the participant engaged in an activity from that category. Because multiple EMA surveys could be used for PSI prediction, we aggregated features across up to five surveys to capture temporal patterns, consistent with the proposed method. We computed the mean, standard deviation, maximum, and minimum values for each feature across the available surveys, as suggested in prior work [ 18 ]. Additionally, previous studies have shown that temporal dynamics vary across individuals and can provide insight into intra-individual changes. Therefore, we also included variability-related measures, such as the root mean square of successive differences (RMSSD) [ 30 ], coefficient of variation [ 31 ], and mean successive variability [ 31 ]. In total, 567 features were extracted. The model trained on hand-crafted features followed the same procedure described in Section II-F. III. RESULTS A. Results of PSI Prediction A total of 11,802 EMA surveys were collected, with 1,707 (14.46%) indicating the presence of PSI. At the participant level, 124 stroke survivors (56.88%) reported experiencing PSI at least once. Although PSI occurred intermittently across individual surveys, its occurrence among more than half of the participants highlights its prevalence and the importance of detecting and addressing social isolation in stroke survivors. Table I summarizes the performance of our language embedding-based PSI prediction using varying numbers of EMA surveys as input. To address class imbalance, precision score, F 1 score, and AUPRC were computed with sample weights inversely proportional to the class distribution. Model performance generally improved with the inclusion of more surveys, indicating that aggregating information across multiple time points contributes to higher prediction accuracy. Performance stabilized around four surveys, with an F 1 score of 0.84 and an AUPRC of 0.92, as summarized in Fig. 2 . View this table: View inline View popup Download powerpoint TABLE 1: R esults OF S ocial I solation P rediction B ased ON L anguage E mbeddings Download figure Open in new tab Fig. 2. (a) The confusion matrix and (b) Precision-Recall curve of the classification model trained with four EMA surveys as input. B. Results of Comparative Analysis Fig. 3 compares the prediction performances of our proposed language embedding-based model with a model trained on hand-crafted features. Across different numbers of surveys used for training, our model consistently outperformed the hand-crafted feature model. Specifically, with four surveys, our approach achieved a 7% improvement in recall score, 4% improvement in F 1 score, and 4% improvement in AUPRC. These results highlight the effectiveness of representation learning in capturing complex and latent structures within EMA responses that manually crafted features may overlook. Download figure Open in new tab Fig. 3. The figures compare the models trained on hand-crafted features and latent representations learned from the autoencoder in terms of (a) recall score, (b) precision score, (c) F 1 score, and (d) AUPRC. C. Identify Important Questions in EMA Surveys We employed an empirical forward selection approach to identify the most informative EMA questions contributing to PSI prediction. Specifically, for each question, we kept only its language embedding and set the embeddings from all other questions to zero. Because the encoder only consisted of fully connected layers, zero-valued embeddings for unselected questions did not contribute to the computation of the latent representations [ 32 ]. The modified embeddings were concatenated and passed through the pre-trained encoder to generate latent representation, which was used as input to the downstream classification pipeline. Questions were ranked based on the classification performance in the validation set within the inner cross-validation loop. The question that yielded the best performance was selected first. Subsequently, the question that—when combined with the previously selected question(s)—yielded the most significant improvement was added. This iterative process continued until all questions had been evaluated. Fig. 4 presents the classification performance achieved by incrementally adding questions through the forward selection approach across different numbers of EMA surveys. Overall, performance improved with both the inclusion of more surveys and the addition of more questions. Notably, substantial improvements in AUPRC were observed with the first few selected questions. For example, using five surveys and only three questions yielded an AUPRC of 0.91, comparable to the performance obtained using all questions (0.92). These results suggest that a small subset of carefully selected questions can provide comparable predictive performance, especially when multiple surveys are available. Download figure Open in new tab Fig. 4. The heatmap illustrates how the classification performance changes with varying numbers of EMA surveys and questions included When using five surveys and only three questions, the most frequently selected questions in sequential order were 1) levels of anxiety, 2) feelings of worthlessness, and 3) the number of social interactions since the last EMA prompt. Due to the limited interpretability of raw language embeddings, we examined the average self-reported values for these questions across the five surveys, as illustrated in Fig. 5 . The Mann–Whitney U test indicated statistically significant differences in the distributions of all three questions between the socially isolated and non-isolated groups ( p < 0.05). Specifically, participants in the socially isolated group reported higher levels of anxiety, worse feelings of worthlessness, and fewer social interactions compared to their non-isolated counterparts. Although the average number of social interactions showed a statistically significant difference between groups, handcrafted features based on this question did not contribute to the prediction of PSI. However, incorporating the language embedding from this question improved model performance, suggesting that the proposed language embedding-based approach can capture subtle contextual inter-question dependencies that are not reflected in the handcrafted feature values. Download figure Open in new tab Fig. 5. Boxplots of the most important questions for social isolation prediction. The asterisks indicate a significant statistical difference ( p < 0.05 ) in the distribution between the socially isolated and non-isolated groups. Abbreviation: avg.—average IV. DISCUSSION In this paper, we propose a data analytic pipeline for predicting PSI based on language embeddings derived from EMA surveys. An autoencoder was trained to integrate embeddings from participants’ survey responses and produce a compact latent representation of the entire survey. Our results demonstrate that these latent representations captured rich and meaningful information, leading to an improved prediction performance compared to baseline models based on handcrafted features. These findings highlight the effectiveness of representation learning in capturing nuanced patterns from participants’ self-reported data. Previous studies on social isolation have primarily focused on identifying factors associated with loneliness and social well-being [ 33 ]–[ 38 ]. While these studies offer promising insights, their primary objective was not to develop machine learning models to predict social isolation in dynamic, real-world settings. Without validation in predictive contexts, the practical utility of their findings remains limited. A few studies have explored predictive modeling for the automatic detection of social isolation. For instance, Martinez et al . used features related to communication, mobility, and demographics to predict social isolation among older adults, reporting an accuracy of 100% [ 39 ]. However, this result was based on a small sample of only seven participants, which may offer limited generalizability. Doryab et al . examined the use of passive sensing data to infer loneliness levels among college students, achieving an F 1 score of 0.80 [ 40 ], notably lower than our approach. Moreover, both studies were conducted in healthy populations whose physical, cognitive, and psychosocial characteristics differ significantly from those of stroke survivors. As a result, the findings may not generalize well to post-stroke populations. We identified a small subset of EMA survey questions that significantly contributed to predicting social isolation, which is consistent with previous findings. The first important question was related to anxiety. Santini et al . reported a bidirectional relationship between social isolation and anxiety [ 41 ]. More specifically, individuals who lack social support are at increased risk of depression and anxiety, while elevated anxiety can lead to social withdrawal, further deepening feelings of loneliness and PSI. This dynamic is particularly relevant for stroke survivors, who often face a combination of physical limitations and insufficient social support. Notably, approximately 45% individuals with stroke report feeling abandoned after hospital discharge [ 17 ]. In addition, feelings of worthlessness have also been linked to PSI. A qualitative study by Freeman et al . examined individuals with multiple sclerosis and found that limited physical ability contributed to feelings of guilt and worthlessness, which in turn reinforced social isolation [ 42 ]. These psychological and social dynamics may extend to stroke survivors, who often face similar physical limitations and emotional challenges. A recent post-stroke EMA study, which utilized network analysis to examine the original EMA survey responses, found that feelings of worthlessness, both temporally and contemporaneously, predict PSI [ 17 ]. This study, along with our project, indicates that stroke survivors are more likely to report feelings of isolation in follow-up surveys when they have previously reported feeling worthless. Experiencing worthlessness can hinder individuals from forming and maintaining meaningful social relationships, which can lead to a decline in social interactions and an increased feeling of isolation [ 43 ]. Finally, the number of recent social interactions was also identified as an important factor. This factor is straightforward, as limited social participation is a common consequence of a stroke. Research shows that stroke survivors often develop very small social networks, with their most significant relationships primarily involving immediate family members [ 44 ]. Fewer social interactions suggest higher levels of social disconnectedness, which research has identified as a distinct form of isolation that negatively impacts psychosocial outcomes [ 45 ]. We envision a scenario where EMA surveys are deployed on mobile devices with only a small subset of informative questions. We aim to optimize the trade-off between predictive accuracy and user engagement by limiting the survey to as few as three items. Once responses are submitted, the predicted PSI status will be identified and displayed immediately. If the system detects a risk of social isolation, it can trigger timely interventions for participants to reduce this isolation. This work has several limitations. First, participants answer the EMA survey based on predefined response options, which may not fully capture personal feelings and experiences. Future work could explore the opportunity for patients to type about their current emotional state in short free-text or voice-based natural language formats. This approach could provide richer contextual data, capturing nuances that might be missing in structured surveys. Second, although our model achieved strong predictive performance, it relies on self-reported data, which may be subject to individual biases or inconsistencies. Future work could incorporate more objective, multi-modal sensor data, such as inertial, heart rate, skin temperature, and bio-impedance, to complement subjective responses and provide a more comprehensive understanding of the individual’s condition. V. CONCLUSION This paper demonstrates that language embeddings extracted from structured EMA surveys contain valuable information for predicting PSI. By leveraging only a small subset of questions, a trade-off between validity and usability can be achieved. These findings highlight the potential of scalable, low-burden digital tools for real-world deployment, supporting continuous mental health monitoring and enabling more personalized rehabilitation strategies for stroke survivors. Data Availability All data produced in the present study are not available ACKNOWLEDGMENT We would like to thank Dr. Jie Xiong and Dr. VP Nguyen for their valuable feedback on this work. Footnotes (email: { yundaliu{at}cs.umass.edu }). (email: mfong{at}michaveneuropsych.org ). (email: mettsch{at}musc.edu ). (email: ys1433{at}nyu.edu ). The work was partially supported by a grant from the Eunice Kennedy Shriver National Institute of Child Health and Human Development/National Center for Medical Rehabilitation Research (NICHD/NCMRR) (K01HD095388) References [1]. ↵ L. M. O’Keefe , S. J. Doran , L. Mwilambwe-Tshilobo , L. H. Conti , V. R. Venna , and L. D. McCullough , “Social isolation after stroke leads to depressive-like behavior and decreased bdnf levels in mice ,” Behavioural brain research , vol. 260 , pp. 162 – 170 , 2014 . OpenUrl PubMed [2]. ↵ J. Haun , M. Rittman , and M. Sberna , “The continuum of connectedness and social isolation during post stroke recovery ,” Journal of Aging Studies , vol. 22 , no. 1 , pp. 54 – 64 , 2008 . OpenUrl CrossRef PubMed [3]. ↵ L. Harrick , L. Krefting , J. Johnston , P. Carlson , and P. Minnes , “Stability of functional outcomes following transitional living programme participation: 3-year follow-up ,” Brain Injury , vol. 8 , no. 5 , pp. 439 – 447 , 1994 . OpenUrl CrossRef PubMed [4]. ↵ C. Byrne , C. W. Saville , R. Coetzer , and R. Ramsey , “Stroke survivors experience elevated levels of loneliness: a multi-year analysis of the national survey for wales ,” Archives of Clinical Neuropsychology , vol. 37 , no. 2 , pp. 390 – 407 , 2022 . OpenUrl PubMed [5]. ↵ L. Skolarus , R. Bi , C. C. Lin , S. Hassani , Y. Curran , and J. Burke , “Abstract wmp50: Stroke survivors face greater social risks than adults who have not had a stroke: results from a nationwide study ,” Stroke , vol. 56 , no. Suppl 1, pp. AWMP50 – AWMP50 , 2025 . OpenUrl [6]. ↵ R. Freak-Poli , J. Hu , A. Z. Z. Phyo , and F. Barker , “Does social isolation, social support or loneliness influence health or well-being after a cardiovascular disease event? a narrative thematic systematic review ,” Health & Social Care in the Community , vol. 30 , no. 1 , pp. e16 – e38 , 2022 . OpenUrl PubMed [7]. ↵ H. O. Taylor , R. J. Taylor , A. W. Nguyen , and L. Chatters , “Social isolation, depression, and psychological distress among older adults ,” Journal of aging and health , vol. 30 , no. 2 , pp. 229 – 246 , 2018 . OpenUrl CrossRef PubMed [8]. ↵ V. R. Venna and L. D. McCullough , “Role of social factors on cell death, cerebral plasticity and recovery after stroke ,” Metabolic Brain Disease , vol. 30 , pp. 497 – 506 , 2015 . OpenUrl CrossRef PubMed [9]. ↵ S. Shiffman , A. A. Stone , and M. R. Hufford , “Ecological momentary assessment ,” Annu. Rev. Clin. Psychol ., vol. 4 , no. 1 , pp. 1 – 32 , 2008 . OpenUrl CrossRef PubMed Web of Science [10]. ↵ T. B. Wray , J. E. Merrill , and P. M. Monti , “Using ecological momentary assessment (ema) to assess situation-level predictors of alcohol use and alcohol-related consequences ,” Alcohol research: current reviews , vol. 36 , no. 1 , p. 19 , 2014 . OpenUrl PubMed [11]. ↵ K. E. Heron , R. S. Everhart , S. M. McHale , and J. M. Smyth , “Using mobile-technology-based ecological momentary assessment (ema) methods with youth: A systematic review and recommendations ,” Journal of pediatric psychology , vol. 42 , no. 10 , pp. 1087 – 1107 , 2017 . OpenUrl CrossRef PubMed [12]. ↵ E. Johnson , I. Sibon , P. Renou , F. Rouanet , M. Allard , and J. Swendsen , “Feasibility and validity of computerized ambulatory monitoring in stroke patients ,” Neurology , vol. 73 , no. 19 , pp. 1579 – 1583 , 2009 . OpenUrl CrossRef PubMed [13]. C. M. Mazure , A. H. Weinberger , B. Pittman , I. Sibon , and J. Swendsen , “Gender and stress in predicting depressive symptoms following stroke ,” Cerebrovascular diseases , vol. 38 , no. 4 , pp. 240 – 246 , 2014 . OpenUrl CrossRef PubMed [14]. ↵ A. J. Neff , Y. Lee , C. L. Metts , and A. W. Wong , “Ecological momentary assessment of social interactions: associations with depression, anxiety, pain, and fatigue in individuals with mild stroke ,” Archives of physical medicine and rehabilitation , vol. 102 , no. 3 , pp. 395 – 405 , 2021 . OpenUrl PubMed [15]. ↵ G. Spanakis , G. Weiss , and A. Roefs , “Bagged boosted trees for classification of ecological momentary assessment data ,” in ECAI 2016 . IOS Press , 2016 , pp. 1612 – 1613 . [16]. C. Lei , D. Qu , K. Liu , and R. Chen , “Ecological momentary assessment and machine learning for predicting suicidal ideation among sexual and gender minority individuals ,” JAMA network open , vol. 6 , no. 9 , pp. e2 333 164 – e2 333164 , 2023 . OpenUrl [17]. ↵ Y. Shi , M. W. Fong , C. L. Metts , S. L. LaVela , C. Bombardier , L. Hu , and A. W. Wong , “Dynamics of perceived social isolation, secondary conditions, and daily activity patterns among individuals with stroke: a network analysis of ecological momentary assessment data ,” Archives of physical medicine and rehabilitation , vol. 105 , no. 7 , pp. 1314 – 1321 , 2024 . OpenUrl PubMed [18]. ↵ E. M. Kleiman , B. J. Turner , S. Fedor , E. E. Beale , R. W. Picard , J. C. Huffman , and M. K. Nock , “Digital phenotyping of suicidal thoughts ,” Depression and anxiety , vol. 35 , no. 7 , pp. 601 – 608 , 2018 . OpenUrl CrossRef PubMed [19]. ↵ J. J. Oleson , M. A. Jones , E. J. Jorgensen , and Y.-H. Wu , “Statistical considerations for analyzing ecological momentary assessment data ,” Journal of Speech, Language, and Hearing Research , vol. 65 , no. 1 , pp. 344 – 360 , 2022 . OpenUrl PubMed [20]. ↵ D. Shin , H. Kim , S. Lee , Y. Cho , and W. Jung , “Using large language models to detect depression from user-generated diary text data as a novel approach in digital mental health screening: Instrument validation study ,” Journal of Medical Internet Research , vol. 26 , p. e54617 , 2024 . OpenUrl PubMed [21]. ↵ H. Fisher , N. Jaffe , K. Pidvirny , A. Tierney , D. Pizzagalli , and C. Webb , “Using natural language processing to track negative emotions in the daily lives of adolescents ,” Research Square , pp. rs – 3 , 2025 . [22]. ↵ J. Sedlakova , P. Daniore , A. Horn Wintsch , M. Wolf , M. Stanikic , C. Haag , C. Sieber , G. Schneider , K. Staub , D. Alois Ettlin et al. , “Challenges and best practices for digital unstructured data enrichment in health research: A systematic narrative review ,” PLOS Digital Health , vol. 2 , no. 10 , p. e0000347 , 2023 . OpenUrl CrossRef [23]. ↵ Q. Bui , K. J. Kaufman , V. Pham , E. J. Lenze , J.-M. Lee , D. C. Mohr , M. W. Fong , C. L. Metts , S. E. Tomazin , and A. W. Wong , “Ecological momentary assessment of real-world functional behaviors in individuals with stroke: a longitudinal observational study ,” Archives of physical medicine and rehabilitation , vol. 103 , no. 7 , pp. 1327 – 1337 , 2022 . OpenUrl PubMed [24]. ↵ R. C. Moore , C. N. Kaufmann , A. S. Rooney , D. J. Moore , L. T. Eyler , E. Granholm , S. P. Woods , J. Swendsen , R. K. Heaton , J. Scott et al. , “Feasibility and acceptability of ecological momentary assessment of daily functioning among older adults with hiv ,” The American Journal of Geriatric Psychiatry , vol. 25 , no. 8 , pp. 829 – 840 , 2017 . OpenUrl PubMed [25]. ↵ E. W. Paolillo , B. Tang , C. A. Depp , A. S. Rooney , F. Vaida , C. N. Kaufmann , B. T. Mausbach , D. J. Moore , and R. C. Moore , “Temporal associations between social activity and mood, fatigue, and pain in older adults with hiv: An ecological momentary assessment study ,” JMIR mental health , vol. 5 , no. 2 , p. e9802 , 2018 . OpenUrl [26]. ↵ V. Sanh , L. Debut , J. Chaumond , and T. Wolf , “Distilbert, a distilled version of bert: smaller, faster, cheaper and lighter ,” arXiv preprint arXiv:1910.01108, 2019 . [27]. ↵ P. Vincent , H. Larochelle , I. Lajoie , Y. Bengio , P.-A. Manzagol , and L. Bottou , “Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion .” Journal of machine learning research , vol. 11 , no. 12 , 2010 . [28]. ↵ J. Albuquerque , A. M. Medeiros , A. C. Alves , M. Bourbon , and M. Antunes , “Comparative study on the performance of different classification algorithms, combined with pre-and post-processing techniques to handle imbalanced data, in the diagnosis of adult patients with familial hypercholesterolemia ,” PLoS One , vol. 17 , no. 6 , p. e0269713 , 2022 . OpenUrl PubMed [29]. ↵ B.-S. Jang , S. H. Jeon , I. H. Kim , and I. A. Kim , “Prediction of pseudoprogression versus progression using machine learning algorithm in glioblastoma ,” Scientific reports , vol. 8 , no. 1 , p. 12516 , 2018 . OpenUrl PubMed [30]. ↵ J. Von Neumann , R. H. Kent , H. Bellinson , and B. Hart , “The mean square successive difference ,” The Annals of Mathematical Statistics , vol. 12 , no. 2 , pp. 153 – 162 , 1941 . OpenUrl [31]. ↵ F. Shahabi , S. L. Battalio , A. F. Pfammatter , D. Hedeker , B. Spring , and N. Alshurafa , “A machine-learned model for predicting weight loss success using weight change features early in treatment ,” npj Digital Medicine , vol. 7 , no. 1 , p. 344 , 2024 . OpenUrl PubMed [32]. ↵ Z. Zhou and X. Liu , “Masked autoencoders in computer vision: A comprehensive survey ,” IEEE Access , vol. 11 , pp. 113560 – 113579 , 2023 . OpenUrl [33]. ↵ N. Goonawardene , X. Toh , and H.-P. Tan , “Sensor-driven detection of social isolation in community-dwelling elderly ,” in Human Aspects of IT for the Aged Population. Applications, Services and Contexts: Third International Conference, ITAP 2017, Held as Part of HCI International 2017, Vancouver, BC, Canada, July 9-14, 2017, Proceedings, Part II 3 . Springer , 2017 , pp. 378 – 392 . [34]. Y. Gao , A. Li , T. Zhu , X. Liu , and X. Liu , “How smartphone usage correlates with social anxiety and loneliness ,” PeerJ , vol. 4 , p. e2197 , 2016 . OpenUrl PubMed [35]. J. Petersen , S. Thielke , D. Austin , and J. Kaye , “Phone behaviour and its relationship to loneliness in older adults ,” Aging & Mental Health , vol. 20 , no. 10 , pp. 1084 – 1091 , 2016 . OpenUrl PubMed [36]. B. Wetzel , R. Pryss , H. Baumeister , J.-S. Edler , A. S. O. Gonc ,alves, and C. Cohrdes , ““how come you don’t call me?” smartphone communication app usage as an indicator of loneliness and social well-being across the adult lifespan during the covid-19 pandemic ,” International Journal of Environmental Research and Public Health , vol. 18 , no. 12 , p. 6212 , 2021 . OpenUrl [37]. D. Fulford , J. Mote , R. Gonzalez , S. Abplanalp , Y. Zhang , J. Luckenbaugh , J.-P. Onnela , C. Busso , and D. E. Gard , “Smartphone sensing of social interactions in people with and without schizophrenia ,” Journal of Psychiatric Research , vol. 137 , pp. 613 – 620 , 2021 . OpenUrl CrossRef PubMed [38]. ↵ P. I. Chow , K. Fua , Y. Huang , W. Bonelli , H. Xiong , L. E. Barnes , and B. A. Teachman , “Using mobile sensing to test clinical models of depression, social anxiety, state affect, and social isolation among college students ,” Journal of medical Internet research , vol. 19 , no. 3 , p. e62 , 2017 . OpenUrl CrossRef PubMed [39]. ↵ A. Martinez , V. Ortiz , H. Estrada , and M. Gonzalez , “A predictive model for automatic detection of social isolation in older adults ,” in 2017 International Conference on Intelligent Environments (IE) . IEEE , 2017 , pp. 68 – 75 . [40]. ↵ A. Doryab , D. K. Villalba , P. Chikersal , J. M. Dutcher , M. Tumminia , X. Liu , S. Cohen , K. Creswell , J. Mankoff , J. D. Creswell et al. , “Identifying behavioral phenotypes of loneliness and social isolation with passive sensing: statistical analysis, data mining and machine learning of smartphone and fitbit data ,” JMIR mHealth and uHealth , vol. 7 , no. 7 , p. e13209 , 2019 . OpenUrl [41]. ↵ Z. I. Santini , P. E. Jose , E. Y. Cornwell , A. Koyanagi , L. Nielsen , C. Hinrichsen , C. Meilstrup , K. R. Madsen , and V. Koushede , “Social disconnectedness, perceived isolation, and symptoms of depression and anxiety among older americans (nshap): a longitudinal mediation analysis ,” The Lancet Public Health , vol. 5 , no. 1 , pp. e62 – e70 , 2020 . OpenUrl [42]. ↵ J. Freeman , T. Gorst , H. Gunn , and S. Robens , “ “a non-person to the rest of the world”: experiences of social isolation amongst severely impaired people with multiple sclerosis ,” Disability and rehabilitation , vol. 42 , no. 16 , pp. 2295 – 2303 , 2020 . OpenUrl PubMed [43]. ↵ A. Pragholapati , N. Y. Wiratama , and R. Muliani , “Description of socialization ability in isolation patients in psychiatric hospital province, west java, indonesia ,” Journal of Health Science Community , vol. 2 , no. 2 , pp. 1 – 8 , 2021 . OpenUrl [44]. ↵ C. Ellis , R. Phillips , T. Hill , and P. M. Briley , “Social network structure in young stroke survivors with aphasia: A case series report ,” in Seminars in speech and language , vol. 40 , no. 05 . Thieme Medical Publishers , 2019 , pp. 359 – 369 . OpenUrl PubMed [45]. ↵ K. Li , F. Tang , S. Kwon , and Y. Jiang , “Social isolation and depressive symptoms of older chinese immigrants: The mediating effect of activity engagement ,” The International Journal of Aging and Human Development , p. 00914150241297414 , 2024 . View the discussion thread. Back to top Previous Next Posted July 17, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Leveraging Language Embeddings from EMA Surveys to Predict Perceived Social Isolation among Stroke Survivors Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Leveraging Language Embeddings from EMA Surveys to Predict Perceived Social Isolation among Stroke Survivors Yunda Liu , Alex W. K. Wong , Mandy W. M. Fong , Christopher L. Metts , Yun Shi , Sunghoon Ivan Lee medRxiv 2025.07.17.25331714; doi: https://doi.org/10.1101/2025.07.17.25331714 Share This Article: Copy Citation Tools Leveraging Language Embeddings from EMA Surveys to Predict Perceived Social Isolation among Stroke Survivors Yunda Liu , Alex W. K. Wong , Mandy W. M. Fong , Christopher L. Metts , Yun Shi , Sunghoon Ivan Lee medRxiv 2025.07.17.25331714; doi: https://doi.org/10.1101/2025.07.17.25331714 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffbd75969f51b23',t:'MTc3OTQ1MzEyMQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00