DeepSeq: High-Throughput Single-Cell RNA Sequencing Data Labeling via Web Search-Augmented Agentic Generative AI Foundation Models

doi:10.1101/2025.06.17.660107

DeepSeq: High-Throughput Single-Cell RNA Sequencing Data Labeling via Web Search-Augmented Agentic Generative AI Foundation Models

2025 · doi:10.1101/2025.06.17.660107

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 25,794 characters · extracted from preprint-html · click to expand

DeepSeq: High-Throughput Single-Cell RNA Sequencing Data Labeling via Web Search-Augmented Agentic Generative AI Foundation Models | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results DeepSeq: High-Throughput Single-Cell RNA Sequencing Data Labeling via Web Search-Augmented Agentic Generative AI Foundation Models View ORCID Profile Saleem A. Al Dajani , Abel Sanchez , John R. Williams doi: https://doi.org/10.1101/2025.06.17.660107 Saleem A. Al Dajani 1 Department of Civil and Environmental Engineering, Massachusetts Institute of Technology , 77 Massachusetts Avenue, Cambridge, MA, 02139, United States of America Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Saleem A. Al Dajani For correspondence: sdajani{at}mit.edu Abel Sanchez 1 Department of Civil and Environmental Engineering, Massachusetts Institute of Technology , 77 Massachusetts Avenue, Cambridge, MA, 02139, United States of America Find this author on Google Scholar Find this author on PubMed Search for this author on this site John R. Williams 1 Department of Civil and Environmental Engineering, Massachusetts Institute of Technology , 77 Massachusetts Avenue, Cambridge, MA, 02139, United States of America Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Generative AI foundation models offer transformative potential for processing structured biological data, particularly in single-cell RNA sequencing, where datasets are rapidly scaling toward billions of cells. We propose the use of agentic generative AI foundation models with real-time web search to automate the labeling of experimental data, achieving up to 82.5% accuracy. This addresses a key bottleneck in supervised learning for structured omics data by increasing annotation throughput without manual curation and human error. Our approach enables the development of virtual cell foundation models capable of downstream tasks such as cell-typing and perturbation prediction. As data volume grows, these models may surpass human performance in labeling, paving the way for reliable inference in large-scale perturbation screens. This application demonstrates domain-specific innovation in health monitoring and diagnostics, aligned with efforts like the Human Cell Atlas and Human Tumor Atlas Network. 1. Introduction & Background Single-cell RNA sequencing (scRNA-seq) has transformed our ability to understand biological systems at cellular resolution, enabling the decomposition of heterogeneous tissues into interpretable cellular subpopulations ( Hicks, 2018 ; Shalek et al., 2013 ). Unlike bulk sequencing, which averages gene expression across thousands of cells, single-cell approaches preserve cellular diversity and support downstream analyses such as lineage tracing, perturbation inference, and cell-type identification. A key challenge emerging from these advances is scale. With improved protocols and barcoding methods, scRNAseq datasets have grown from thousands to millions of cells per experiment, opening the door to system-level modeling of gene regulation and cellular behavior. However, the complexity and dimensionality of these datasets far outpace manual annotation methods, particularly as the number of clusters grows with data volume. This challenge becomes even more pressing when considering tasks like supervised leanring, pseudotime ordering, and perturbation modeling, which rely on accurate and interpretable cell-type labels. In this work, we introduce DeepSeq , https://github.com/saleemaldajani/deepseq a pipeline that applies large language models (LLMs) to automate labeling of structured single-cell data using top marker genes from unsupervised clustering. DeepSeq supports both local inference using lightweight models and agentic web-enhanced querying via GPT-4o. The system is designed for reproducibility and scalability, incorporating filtering, dimensionality reduction, structured prompt generation, and accuracy benchmarking. In the following sections, we describe the DeepSeq architecture and algorithms in detail, demonstrate its annotation accuracy across multiple LLM configurations, and discuss implications for high-throughput cell atlas construction and virtual cell modeling. 2. Methods The DeepSeq pipeline integrates single-cell RNA-seq preprocessing with foundation model–driven cell-type annotation using large language models (LLMs). The full workflow spans filtering, clustering, marker gene extraction, prompting, and structured evaluation. All core analysis and evaluation scripts are provided in the public repository. Algorithm 1 LLM-Based Cell-Type Labeling with DeepSeq Download figure Open in new tab 2.1. Preprocessing and Filtering Raw single-cell data is processed into gene-by-cell matrices and converted into the AnnData format. Filtering is performed using three strategies: (1) standard thresholding (e.g., ≥ 200 genes per cell), (2) automated knee-point detection using KneeLocator , and (3) smoothed inflectionbased filtering. These methods produce cleaned datasets with visual diagnostics for quality control. 2.2. Clustering and Marker Gene Extraction Dimensionality reduction is performed using PCA, and cells are clustered using the Leiden algorithm based on neighborhood graphs. UMAP is used to embed cells in 2D for visualization. For each cluster, the top marker genes are identified using Scanpy’s ranking functions and are used to construct structured prompts for LLMs. 2.3. LLM-Based Annotation LLMs are prompted with top-ranked marker genes per cluster to generate candidate cell-type labels. DeepSeq supports both local inference (via Ollama ) and agentic inference (via gpt-4o with web search). Prompt orchestration and postprocessing are handled by LangChain. Prompts are designed following the format described by Hou and Ji ( Hou & Ji, 2024 ), adapted to structured transcriptomic data. 2.4. Label Evaluation and Ground Truth Assessment To evaluate the precision of LLM-based labeling, we implement a two-stage validation protocol: Marker Gene Verification: We confirm that the top marker genes per cluster sufficiently match known canonical markers for each predicted label, ensuring that the evaluation is biologically meaningful. Label Accuracy Assessment: We compute the accuracy of LLM-generated labels by comparing them to manually curated ground truth labels. The comparison accounts for fuzzy string matching and synonym resolution to robustly assess agreement at the cluster level. This framework ensures reproducible, interpretable evaluation of foundation models in structured single-cell data domains. 3. Results We evaluated DeepSeq’s ability to automate structured single-cell annotation using foundation models prompted with top marker genes per cluster. As shown in Figure 4 , our two-stage evaluation assesses both the biological plausibility of marker gene matches and the accuracy of resulting cell-type predictions relative to ground truth annotations. Download figure Open in new tab Figure 1. Exponential growth of single-cell sequencing enables foundation model–scale datasets. The number of single cells profiled per study has followed an exponential trend since 2009, resembling Moore’s law and enabling projections exceeding 10 9 cells by 2030. Historical data points (black dots) are shown alongside a log-scale projection (red dashed line), adapted from ( Svensson et al., 2018 ). This scaling trend motivates the development of foundation models tailored to structured single-cell data, which require billion-scale inputs for training on tasks such as annotation, perturbation modeling, and virtual cell simulation. Download figure Open in new tab Figure 2. DeepSeq system architecture. DeepSeq is a foundation model–powered web application designed for automated labeling of structured single-cell RNA sequencing data. The pipeline integrates large language models (OpenAI, Ollama), orchestration frameworks (LangChain), and differential expression analysis tools (Scanpy, TRADE) to process high-throughput omics data. It spans ETL, analytics, and deployment layers using cloud-native platforms (Colab, Codespaces) and standardized biological formats (CRAM, H5AD). DeepSeq demonstrates a domain-specific application of foundation models for scalable biomedical data annotation and virtual cell modeling. Download figure Open in new tab Figure 3. Dual inference workflows in DeepSeq for structured data annotation. The top panel illustrates local inference via the Ollama client, enabling efficient deployment of domain-specialized LLMs (e.g., LLaMA3) for on-device cell type labeling. The bottom panel depicts a live agentic inference pipeline using GPT-4o with web search capabilities, where an OpenAI agent autonomously retrieves and summarizes external content to augment biological annotations. Together, these workflows demonstrate the versatility of foundation models in structured biomedical pipelines under both offline and online settings. Download figure Open in new tab Figure 4. Evaluation of foundation model accuracy in cell-type labeling from structured single-cell data. Top panel: Agreement between top marker genes per cluster and a ground truth reference ensures the validity of downstream label evaluations. Bottom panel: Accuracy comparison across LLMs using cluster-level marker gene inputs. The evaluation method was automated through prompting strategies inspired by ( Hou & Ji, 2024 ). The agentic gpt-4o model achieves the highest labeling accuracy (82.5%), demonstrating its ability to interpret structured gene expression signatures, and showcasing the feasibility of foundation models for high-throughput annotation in single-cell transcriptomics. 3.1. Marker Match Validation The top panel in Figure 4 confirms that marker genes extracted for each cluster match canonical gene sets for known cell types, validating the biological grounding of the prompts used for LLM querying. This step ensures that model outputs reflect meaningful transcriptional signatures rather than spurious correlations. 3.2. LLM Label Accuracy We then compared the predicted labels from each LLM against manually curated ground truth. As shown in the bottom panel, the agentic GPT-4o model achieved the highest accuracy (82.5%), outperforming both earlier GPT-3.5 variants and smaller local models like LLaMA3-1B. These results demonstrate that foundation models, when structured with domain-informed prompts, can approach expert-level annotation performance in high-throughput settings. 3.3. Reproducibility and Benchmarking The full set of results—including per-cluster marker genes, predicted labels, ground truth matches, and evaluation scores—is reproducibly generated via scripts provided in the DeepSeq repository. Each step of the pipeline—from filtering and dimensionality reduction to LLM prompting and evaluation—outputs interpretable logs, enabling precise traceability of every decision made during annotation. This framework supports extensibility to larger datasets, alternative LLM configurations, or modified evaluation strategies. 4. Discussion Our results show that foundation models, particularly agentic variants like gpt-4o , can achieve strong performance in structured biological tasks such as cell-type annotation. As illustrated in Figure 5 , GPT-4o achieved 82.5% agreement with ground truth labels when prompted with top-ranked marker genes per cluster. This level of accuracy, attained without fine-tuning or task-specific supervision, underscores the potential of foundation models for high-throughput biological interpretation. Download figure Open in new tab Figure 5. Total parameter count versus labeling accuracy for foundation models on structured single-cell data. Accuracy reflects automated cell-type labeling using top marker genes per cluster and prompting strategies derived from ( Hou & Ji, 2024 ). Despite similar active parameter counts, gpt-4o achieves the highest labeling accuracy (82.5%) while operating within a 1.8T parameter architecture. The logarithmic y-axis reveals how model scale influences annotation performance, highlighting diminishing returns beyond 100B parameters without more domain-specific single-cell data. These trends underscore the need for continued data scaling—toward billions or even trillions of single cells—to approach human-level labeling accuracy. Interestingly, model performance did not scale linearly with size. The leap in accuracy from LLaMA3-2-1B to GPT-3.5-turbo was larger than the improvement from GPT-3.5-turbo to GPT-4o, despite the latter having significantly more parameters. Since both GPT-3.5 and GPT-4o leverage agentic web search while LLaMA3-2-1B does not, these results suggest that agentic capabilities offer a baseline improvement, but architectural refinements and scaling yield diminishing returns in structured reasoning tasks without more domain-specific data — highlighting the need for experimental generation of such data from high-throughput experiments that will in turn require high-throughput labeling, as demonstrated in this paper. Local models like LLaMA3-1B also performed competitively given their size, reinforcing lightweight deployments in constrained environments. A key insight from our findings is that, similar to how language models improve with larger parameter counts and more diverse training data, cell-type annotation accuracy also depends on the scale and diversity of experimental input. As shown in Figure 1 , the number of cells profiled in single-cell studies has followed an exponential trajectory, with projections suggesting that datasets containing over 10 9 cells will become feasible within the decade. This scale is likely necessary to train robust, domain-specific models capable of resolving subtle transcriptional differences across tissues, conditions, and perturbations. We are now at a turning point where the volume of biological data is sufficient to support foundation model–level training and evaluation in structured omics. Nonetheless, the observed gap between model predictions and perfect label accuracy highlights current limitations in both model capabilities and marker gene distinctiveness. Marker-based prompts are only as informative as the signal contained within each cluster, and foundational models still exhibit brittleness in biologically ambiguous cases. Evaluation scripts in our repository provide insight into these edge cases for reproducible future benchmarking. These findings validate the use of LLMs for structured omics annotation, while motivating design of prompting protocols, marker gene selection, and evaluation pipelines. 5. Conclusion We introduced DeepSeq, a modular pipeline that applies foundation models to the structured domain of single-cell transcriptomics. By using top-ranked marker genes as prompts, DeepSeq enables large language models to perform scalable, automated cell-type labeling with strong agreement to expert-curated ground truth. Our evaluation shows that agentic models equipped with real-time retrieval capabilities outperform static or smaller models, highlighting the importance of model architecture and inference context in structured annotation tasks. Future work will extend this approach beyond cell-type classification to dynamic biological modeling, including transcriptional perturbation prediction and temporal inference. As single-cell datasets continue to scale, structured prompting combined with model-guided annotation offers a promising foundation for building interpretable, data-driven systems capable of capturing complex biological processes. These results also suggest that the scaling laws of language models—where performance improves with model size and data—extend to biological annotation. As illustrated in our scaling projections, the exponential growth in singlecell sequencing puts billion-cell datasets within reach. This volume of training data opens the door to training virtual cell foundation models that operate at scale and can generalize across tissue types, organisms, and experimental conditions. Unlike traditional pipelines constrained by human curation, DeepSeq leverages the compositional reasoning and retrieval capabilities of LLMs to automate annotation with high reproducibility and throughput. As more high-quality single-cell data becomes available, these models will continue to improve. Ultimately, foundation models applied to structured biological data will not only match—but are likely to surpass—human-level annotation performance in both speed and accuracy. Looking ahead, integrating DeepSeq with multi-omic datasets—such as single-cell ATAC-seq or spatial transcriptomic ( Wang et al., 2025 )—could further enhance resolving cell identity and state. By extending the prompting framework to handle diverse molecular modalities, DeepSeq can evolve into a general-purpose interface for querying structured biological systems using natural language. Impact Statement This paper presents work whose goal is to advance the application of generative AI foundation models for structured biological data, specifically in the context of single-cell transcriptomics. By automating cell-type labeling using agentic prompting strategies, our approach increases annotation throughput and enables scalable deployment in biomedical pipelines. These capabilities have potential implications for diagnostics, perturbation screening, and biological discovery at scale. The methodology developed reflects a broader trend toward integrating generative models with structured biological data, offering a pathway for more versatile and data-driven approaches to life science applications. As the underlying datasets grow toward billions or trillions of cells, ethical considerations emerge around privacy, model transparency, and equitable generalization. We emphasize the importance of responsible deployment and alignment with expert oversight in real-world health contexts. Acknowledgements S.A.A. acknowledges financial support from the Friesecke (1961) Fellowship through the Department of Civil and Environmental Engineering (CEE) at the Massachusetts Institute of Technology (MIT), and is grateful for the advising and mentorship of Professors Heidi Nepf and Ali Jadbabaie. The author thanks members of the AbuGoot Laboratory for insightful discussions, including—but not limited to—Professors Omar Abudayyeh and Jonathan Gootenberg, Thomas Kesheshian, Dr. Juhyung Jung, Elvira Kinzina, Oscar Pitcho, Dan Lesman, Nic Fishman, Jason Lequeyer, Tanush Kumar, and others for their valuable feedback and encouragement. The author also gratefully acknowledges members of the Gladyshev Laboratory, including—but not limited to—Professor Vadim Gladyshev, Dr. Jesse Poganik, Dmitrii Glubokov, and others, for their support. Footnotes Proceedings of the 42 nd International Conference on Machine Learning , Vancouver, Canada. PMLR 267, 2025. Copyright 2025 by the author(s). https://github.com/saleemaldajani/deepseq/tree/agentic References ↵ Hicks , S. C. Introduction to single-cell RNA-seq . https://www.stephaniehicks.com/2018-bioinfosummer-scrnaseq/ , 2018. Presented at BioInfoSummer 2018 , Accessed: 2025-04-23 . ↵ Hou , W. and Ji , Z. Assessing GPT-4 for cell type annotation in single-cell RNA-seq analysis . Nature Methods , 21 ( 8 ): 1462 – 1465 , 2024 . OpenUrl CrossRef PubMed ↵ Shalek , A. K. , Satija , R. , Adiconis , X. , Gertner , R. S. , Gaublomme , J. T. , Raychowdhury , R. , Schwartz , S. , Yosef , N. , Malboeuf , C. , Lu , D. , et al. Single-cell transcriptomics reveals bimodality in expression and splicing in immune cells . Nature , 498 ( 7453 ): 236 – 240 , 2013 . OpenUrl CrossRef PubMed Web of Science ↵ Svensson , V. , Vento-Tormo , R. , and Teichmann , S. A. Exponential scaling of single-cell RNA-seq in the past decade . Nature Protocols , 13 ( 4 ): 599 – 604 , 2018 . OpenUrl CrossRef PubMed ↵ Wang , H. , He , Y. , Coelho , P. P. , Bucci , M. , Nazir , A. , Chen , B. , Trinh , L. , Zhang , S. , Huang , K. , Chandrasekar , V. , et al. SpatialAgent: An Autonomous AI Agent for Spatial Biology . bioRxiv , pp. 2025 – 04 , 2025 . View the discussion thread. Back to top Previous Next Posted June 23, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following DeepSeq: High-Throughput Single-Cell RNA Sequencing Data Labeling via Web Search-Augmented Agentic Generative AI Foundation Models Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share DeepSeq: High-Throughput Single-Cell RNA Sequencing Data Labeling via Web Search-Augmented Agentic Generative AI Foundation Models Saleem A. Al Dajani , Abel Sanchez , John R. Williams bioRxiv 2025.06.17.660107; doi: https://doi.org/10.1101/2025.06.17.660107 Share This Article: Copy Citation Tools DeepSeq: High-Throughput Single-Cell RNA Sequencing Data Labeling via Web Search-Augmented Agentic Generative AI Foundation Models Saleem A. Al Dajani , Abel Sanchez , John R. Williams bioRxiv 2025.06.17.660107; doi: https://doi.org/10.1101/2025.06.17.660107 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7633) Biochemistry (17680) Bioengineering (13889) Bioinformatics (41927) Biophysics (21445) Cancer Biology (18585) Cell Biology (25491) Clinical Trials (138) Developmental Biology (13373) Ecology (19897) Epidemiology (2067) Evolutionary Biology (24308) Genetics (15606) Genomics (22494) Immunology (17736) Microbiology (40385) Molecular Biology (17175) Neuroscience (88583) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4822) Physiology (7641) Plant Biology (15149) Scientific Communication and Education (2045) Synthetic Biology (4293) Systems Biology (9822) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00