SMART: an approach for accurate formula assignment in spatially-resolved metabolomics

doi:10.1101/2025.03.12.642824

SMART: an approach for accurate formula assignment in spatially-resolved metabolomics

2025 · doi:10.1101/2025.03.12.642824

preprint OA: closed CC-BY-NC-ND-4.0

📄 Open PDF Full text JSON View at publisher

Full text 50,101 characters · extracted from preprint-html · click to expand

SMART: an approach for accurate formula assignment in spatially-resolved metabolomics | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results SMART: an approach for accurate formula assignment in spatially-resolved metabolomics Yinghao Cao , Shengxi Li , Zhaoyi Liu , Zixian Jia , Weidong Zhuang , Xu Pan , Jinyu Zhou , Lifeng Yang , Lin Wang doi: https://doi.org/10.1101/2025.03.12.642824 Yinghao Cao 1 State Key Laboratory of Common Mechanism Research for Major Disease, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences and Peking Union Medical College , Beijing 100730, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Shengxi Li 1 State Key Laboratory of Common Mechanism Research for Major Disease, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences and Peking Union Medical College , Beijing 100730, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Zhaoyi Liu 1 State Key Laboratory of Common Mechanism Research for Major Disease, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences and Peking Union Medical College , Beijing 100730, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Zixian Jia 1 State Key Laboratory of Common Mechanism Research for Major Disease, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences and Peking Union Medical College , Beijing 100730, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Weidong Zhuang 2 Shanghai Institute of Nutrition and Health, University of Chinese Academy of Sciences , Shanghai 200031, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Xu Pan 3 Institute of Blood Transfusion, Chinese Academy of Medical Sciences and Peking Union Medical College , Chengdu 610052, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jinyu Zhou 1 State Key Laboratory of Common Mechanism Research for Major Disease, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences and Peking Union Medical College , Beijing 100730, China 4 Department of Pharmacology, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences and Peking Union Medical College , Beijing 100005, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lifeng Yang 2 Shanghai Institute of Nutrition and Health, University of Chinese Academy of Sciences , Shanghai 200031, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: lfyang{at}sinh.ac.cn linwangZJU{at}hotmail.com Lin Wang 1 State Key Laboratory of Common Mechanism Research for Major Disease, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences and Peking Union Medical College , Beijing 100730, China 4 Department of Pharmacology, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences and Peking Union Medical College , Beijing 100005, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: lfyang{at}sinh.ac.cn linwangZJU{at}hotmail.com Abstract Full Text Info/History Metrics Preview PDF Abstract Spatially-resolved metabolomics plays a critical role in unraveling tissue-specific metabolic complexities. Despite its significance, this profound technology generates thousands of features, yet accurate annotation significantly lags behind LC-MS-based approaches. To bridge this gap, we introduce SMART, an open-source platform designed for precise formula assignment in mass spectrometry imaging. SMART constructs a KnownSet database containing 2.8 million formulas linked by DBEdges derived from repositories such as HMDB, ChEMBL, PubChem, and BioEdges from KEGG biological reactant pairs. Using a multiple linear regression model, SMART extracts formula networks associated with the m/z of interest and scores potential candidates based on several criteria, including linked formulas, DBEdges/BioEdges, and ppm values. Benchmarking against reference datasets demonstrates that SMART achieves prediction accuracy rates of up to 92.4%. Applied to mass spectrometry imaging, SMART successfully annotated 986 formulas in developing mouse embryos. This robust platform enables systematic formula annotation within tissues, enhancing our understanding of metabolic heterogeneity. Introduction Metabolomics is a powerful tool for mapping the landscape of metabolites in biological systems, offering insights into cellular functions, disease progression, drug responses 1 - 3 , and bridging the genotype-phenotype gap 4 , 5 . Liquid chromatography-Mass spectrometry (LC-MS)-based metabolomics allows simultaneous detection and quantification of thousands of metabolites features and is widely used in plants, microbial and mammalian studies. Advances in tools like XCMS 6 , MetLin 7 , MetDNA 8 , NetID 9 , SIRIUS 10 , BUDDY 11 and KGMN 12 have enabled the discovery of numerous biologically active metabolites. Recent progress in mass spectrometry imaging (MSI) have propelled spatially-resolved metabolomics to the forefront, allowing visualization of metabolic processes within biological systems 13 - 21 and probing metabolic fluxes, such as TCA carbon flux 22 , amino nitrogen flux 23 , and lipogenic flux 24 . However, challenges persist in untargeted spatially-resolved metabolomics, due to the chemical diversity and abundance variability of metabolites, and limitations like the lack of chromatographic separation, retention time, and sufficient ion abundance for robust MS/MS spectra. Although ion mobility and post ionization techniques offer potential solutions, high cost and complexity limit their broad application 25 . Consequently, LC-MS or LC-MS/MS-based metabolomics tools cannot be seamlessly adapted for MSI data analysis. For metabolites annotation in MSI, formula assignment typically starts by matching m/z signals to metabolomic database like HMDB 26 and KEGG 27 , or by identifying isotopic patterns based on ion intensity and spatial localization pattern. Multiple tools have been developed to improve confidence in formula annotation. For instance, pySM employes a false discovery rate (FDR)-controlled framework for metabolite annotation 28 , rMSIannotation integrates isotopic pattern coherence, image correlation and mass error 29 , and MSIannotator compares MSI data against reference metabolites from LC-MS data 30 . However, these methods have limitation in providing reliable annotations for MSI data due to two potential flaws: i) many compounds are either poorly characterized or absent in current databases, hindering accurate formula assignment for detected m/z values. ii) instrumental errors, along with limitations in mass accuracy, resolution and sensitivity, introduce uncertainties that further challenge annotation reliability. To address these issues, we introduce SMART (Spatial Metabolite Formula Predictor, Figure 1a ), a novel tool designed for precise formula annotation in MSI. SMART builds a comprehensive KnownSet formula database by integrating formulas from repositories including HMDB, ChEMBL 31 , and PubChem 32 . By analyzing chemical shifts (DBEdges) from random metabolite pairs in Knownset (Supplementary Table 1) and biological formula shift (BioEdges) from the KEGG REACTION 27 database, SMART connects all possible formulas in KnownSet database by DBEdges/BioEdges. Based on 118 manually validated metabolites from NetID, SMART extracted several features from KnownSet database to create a multiple linear regression (MLR) model for formula annotation. Validation on large datasets shows that SMART achieves high prediction accuracy, reaching up to 92.4%. Applying SMART to MSI data enabled systemic annotation of 986 formulas in developing embryos, providing new insights into spatial metabolic compartmentalization and its dynamic alteration during embryogenesis. Download figure Open in new tab Fig 1. The framework of SMART and ppm matching for formula assignment a. The framework of SMART. b. The formulas from MoNA and GNPS datasets existed in public databases. Gray color represents missing formulas. c. Formulas assignment for MoNA and GNPS datasets based on ppm matching against HMDB or PubChem. Gray color represents false assignment. For each bar, the dark color represents Top 1 accuracy, while the light color represents Top 3 accuracy, respectively. Black lines indicate the boundary for a prediction accuracy of 50%. Result Evaluation of Metabolite Formula Matching Across Databases Using ppm HMDB and PubChem are prominent databases for metabolite matching and novel compound searches. To assess their coverage, we analyzed two datasets: MoNA (9,010 metabolites) .( https://mona.fiehnlab.ucdavis.edu/ ) and GNPS 33 (3,280 metabolites) We compared their unique formulas with those in HMDB, PubChem and ChEMBL. Results showed that 40.3% of MoNA and 32.9% of GNPS formulas were missing in HMDB, while PubChem covered most formulas with only 0.4%-1.5% missing ( Figure 1b ). Next, we extracted the measured m/z values from MoNA and GNPS, grouping them into six ppm error intervals: 0-1, 1-2, 2-3, 3-5, 5-10, 10-20. Less than 50% of the m/z in most intervals matched in HMDB and PubChem accurately, with larger ppm intervals leading to more incorrect matches ( Figure 1c , Figure S1a-b). For instance, within the 0-1 ppm range, approximately 3000 m/z from MoNA and 1000 m/z from GNPS were found, but only around 200 and 500 matched in HMDB. Notably, using PubChem reduced matching accuracy significantly. These findings highlight the need for a more comprehensive database and a new pipeline to enhance formula assignment accuracy, minimizing reliance on ppm matching alone. SMART Database Construction Figure 1a illustrates the workflow of Spatial Metabolite Formula Predictor (SMART) approach, highlighting two key features: building a comprehensive database of unique formulas, and machine learning-assisted candidate ranking. Formulas with molecular weight exceeding 1000 and those not following IUPAC nomenclature (Supplementary Table 2) were excluded, leaving 20,154 (9.25%) from HMDB, 346,214 (15.02%) from ChEMBL, and 2,844,720 (41.68%) from PubChem (Figure S1c). Overlaps among these yielded 2,852,349 unique formulas, refined into KnownSet (Figure S1d). Chemical differences between KnownSet formulas produced over one billion shifts, termed as DBEdges ( Figure 2a ). Download figure Open in new tab Fig 2. SMART database construction and dataset generation. a. Construction of the SMART database. b. The occurrence of DBEdges, BioEdges with direction reaction shifts or indirect reaction shifts, and the reported reaction rules in NetID and Buddy. To filter nonsensical connections due to synthetic compounds in PubChem and ChEMBL, we used a biological reaction-guided process. KEGG provided 7,905 reaction pairs: 5,882 direct reaction pairs (DR), and 2,023 indirect reaction pairs (IDR). Here DR represents direct metabolite links, while IDR refers to unresolved R-group or chemical repeats. We calculated the biological formula shifts both from DR and IDR pairs to form BioEdges (Supplementary Table 3). When ranking BioEdges by DBEdges frequency, 510 (73.8%) shifts from DR and 442 (37.3%) from IDR were in the top 50,000 DBEdges (Figure S1e). Using a slope cutoff of 0.001, DBEdges beyond rank 9,508 were excluded ( Figure 2b ). Consequently, our formula shift database includes 1,787 BioEdges and 8,814 ChemEdges (other DBEdges except BioEdges) (Figure S1f). BioEdges were enriched in C-H-O shifts, while ChemEdges contained more C-H-N-O shifts (Figure S1g). Ultimately, the SMART database interconnected all 2,852,349 formulas via 8,814 ChemEdges and 1,787 BioEdges ( Figure 2a ). Formula Annotation and Evaluation To annotate measured m/z using SMART database, we built a multiple linear regression (MLR) model incorporating four crucial features: Sn (number of nodes linked to each candidate), BioEdges (biological shift connected to each candidate), ChemEdges (chemical shift edges connected to each candidate), and ppm (mass error). The model was trained using NetID dataset of 118 metabolites with m/z errors under 3 ppm (Supplementary Table 4). The results indicated that lower ppm errors, and higher Sn, BioEdge and DBEdge yielded more accurate formulas predictions (Figure S2a). We further tested the method on GNPS dataset with 3280 metabolites, showing similar trends ( Figure 3a ). Download figure Open in new tab Fig 3. Formula assignment in SMART. a. Feature distribution of formula candidates in the SMART database within a 5 ppm for GNPS dataset. For each m/z , features were normalized by dividing the maximum value. The blue color represents the true formula and yellow color represents the mean value of false formulas. b. Formula assignment for m/z 185.9934. The network for m/z 185.9934 was constructed by matching the m/z value with formula candidates within a defined ppm tolerance in SMART database (b). In the network, large circles filled with colors represent candidates from different datasets (HMDB: blue, Other candidates: orange). All these candidates were linked to the formula in SMART database (small green circles) via BioEdges (green line) or ChemEdges (brown line). Four features of formula candidates were extracted and normalized from the network including ppm, Sn, BioEdge, and ChemEdge. Scores were calculated by SMART MLR model. (c).ROC analysis on GNPS datasets using models that randomly combined the four features including: 1, Combined; 2, BioEdge_DBEdge_PPM; 3, Sn_DBEdge_PPM; 4, Sn_BioEdge_PPM; 5, Sn_BioEdge_DBEdge; 6, Sn_BioEdge; 7, Sn_DBEdge; 8, Sn_PPM; 9, BioEdge_DBEdge; 10, BioEdge_PPM; 11, DBEdge_PPM; 12, Sn; 13, BioEdge; 14, DBEdge; 15, PPM. The values in parentheses are the average of AUC values. To illustrate the functionality of the MRL model works, we consider m/z 185.9934 as an example. In a kidney extract sample, m/z 185.9934 was identified as 3-phosphoglyceri acid, with an error of 2.6 ppm using LC-MS (Figure S2b and c). SMART linked candidate formulas through edges forming a network that expanded as ppm tolerance increased ( Figure 3b ). At 5 ppm, 13 candidates, including the correct one (C 3 H 7 O 7 P) were identified (Figure S2b). While some candidates (e.g., C 4 H 11 O 2 PS 2 ) offered closer ppm tolerances, the MLR model ranked C 3 H 7 O 7 P highest (Figure S2b), highlighting the limitations of ppm matching alone. To assess the importance of the four features in the MLR model, we performed ROC analysis on both GNPS and MoNA datasets. Results showed that the combination of all four features achieved the highest AUC at 0.939 for GNPS and 0.915 for MoNA, respectively ( Figure 3c and Figure S2d). Metric like accuracy, precision, sensitivity and specificity (Figure S2e and f) further confirmed the performance with all features combined. SMART’s accuracy was further validated against SIRIUS 10 , BUDDY 11 (without MS/MS information), and ppm matching across four published datasets (American Gut, Tomato, Chagas Diseases and NIST human feces projects) 11 . Our method achieved high Top 1 accuracy rates, with an average of 83.8% across all datasets, and an average Top 3 accuracy rate of 92.4%, surpassing the average performance of SIRUS (71.5%) and BUDDY (80.4%). ( Figure 4a , Supplementary Table 5-9). In high ppm tolerance datasets (e.g., GNPS and MoNA), SMART maintained high accuracy (Top1 accuracy of 64.2% and a Top3 accuracy of 79.5%) compared to BUDDY (Top1 accuracy<50%) ( Figure 4b , Supplementary Table 10-12). These results confirm SMART’s reliability for formula assignment with only m/z value, even without RT and MS/MS. Download figure Open in new tab Fig 4. Accuracy evaluation of SMART. a. Top1/Top3 accuracy of SMART compare to other tools for four published datasets (American Gut Project, Tomato, Chagas Diseases and NIST human feces). PPM_KnownSet refers to using ppm matching against the KnownSet database. b. Top 1/Top 3 accuracy of SMART (using 5 ppm) and BUDDY (without MS/MS) against GNPS and MoNA datasets. Metabolomics Changes During Mouse Embryo Development Embryonic development involves complex metabolic reprogramming within each organ, highlighting the need for a detailed study of metabolic regulation throughput embryogenesis 34 . Using MSI-based analysis, we examined mouse embryos at day 14.5 (E14.5) and day 18 (E18) ( Figure 5a ), detecting 1,357 and 1,431 m/z peaks, respectively, with 1,023 shared ( Figure 5b ). Isotope pattern matching and manual reviewed revealed that over 30% (347 of 1,023) of the shared peaks lacked isotope information due to low signal intensity or overlapping signals (Figure S3a). As a result, tools like pySM, rMSIannotation were insufficient for annotating all m/z peaks, whereas SMART successfully annotated 986 m/z values (96.4% of the 1,023 peaks). Download figure Open in new tab Fig 5. Metabolomics changes during mouse embryo development. a. H&E staining of mouse embryos from E14.5 and E18. Colored outlines designate nine regions of the embryo. b. Statistics of peaks in MSI results for E14.5 and E18 of mouse embryos. Formula assignment and validation was performed for shared 1,023 candidate peaks. The validation results were classified into five types according to the peak complexity (single, multiple, complex, not enough peak and missing). c. An upset plot illustrates the intersection size with more than five candidates across nine organs at E14.5 and E18, with the number of peaks detected in each organ indicated in the left bar. To confirm the uniqueness of these annotated formulas, we performed LC-MS-based metabolomics on extracted mouse embryo samples. Among the formulas candidates, 163 (21.63%) yielded singular peaks in LC-MS within 5 ppm, indicating their unique identities without interference from isomers ( Figure 5b , Figure S3b). Additionally, 311 (34.01%) of these formulas generated multiple peaks in LC-MS, suggesting the presence of multiple metabolites corresponding to a single formula. For instance, C 6 H 12 O 6 could represent glucose, fructose or inositol. These findings indicate that additional analysis such as MS/MS, derivatization, or ion mobility, are necessary to reliably identify metabolites and differentiate isomers. Of the formulas detected in both MSI and LC-MS, 38 were further validated and identified by matching m/z values and retention time with purchased standards (Supplementary Table 13). For instance, C 4 H 9 N 3 O 2 eluting at 14.49 mins was confirmed as the creatine, and C 6 H 14 N 2 O 2 eluting at 20.11 min was identified as lysine (Figure S3c-d). It has been found that glucose levels in the brain are higher than in other tissues, and glycolytic intermediates such as glucose-6-phosphate (G6P) elevates in both the liver and brain after E12 35 . We found high signal intensity of glucose and G6P via examining their spatial distribution in our MSI results (Figure S3e). To further explore metabolic changes during embryonic development from a spatial perspective, we aligned Hematoxylin and Eosin (H&E) images of embryos with MSI data to identify key organs (e.g., brain, gut, heart, liver) ( Figure 5a ) and reveal organ-specific metabolic profiles The brain had the most assigned formulas, while the liver showed the fewest (Figure S3f). 176 formulas were detected across all organs with varying intensities, and others were specific to particular organs, such as 31 formulas in the brain and 6 in the liver ( Figure 5c ). Among all organs, the liver displayed the greatest number of formulas with signal levels rising at E18 compared to E14.5 (Figure S3g). Conversely, brain signals decreased significantly (Figure S3g). For example, C 10 H 15 N 3 O 5 , the top 1 candidate for m/z 257.1021, was enriched in the brain, liver and thymus primordium at E18 (Figure S3h). LC-MS analysis confirmed it as 5-methylcytidine, which plays a critical role in embryonic stem cell self-renewal and differentiation 36 . Similarly, C 7 H 15 NO 3 (carnitine) was enriched in liver, bladder, and lung at E18 (Figure S3i) that facilitating fatty acid beta-oxidation and thereby supporting embryonic development 37 . Thus, SMART can accurately annotate formulas solely based on m/z information, enhancing MSI metabolite identification and related biological studies. Discussion Organs are composed of distinct spatial regions and cells types, varying in proximity to blood vessel for nutrient acquisition. Variations in transporters and enzymes expression further amplify metabolic differences, even among neighboring cells. Current MSI techniques such as MALDI or DESI generate peaks with m/z information but lack robust MS/MS fragmentation due to the low sensitivity and miss RT because of the absence of pre-separation steps. Therefore, traditional pipelines that rely on RT or MS/MS data are unsuitable for MSI data analysis. Typically, m/z values are matched with metabolite databases based on natural abundance, but mass spectrometry accuracy, ppm variation and machine sensitivity limit these approaches to 35% accuracy established databases ( Figure 4a ). While high abundance metabolites can sometimes be annotated, a substantial gap remains in annotating metabolites with precision. The development of SMART represents advancement in spatially-resolved metabolomics, addressing the challenges of formula annotation in MSI. Its extensive database and multiple linear regression model enable accurate annotation, as demonstrated by its annotation of 986 formulas in developing mouse embryos. ChemEdges and BioEdges offer insights into possible formula extension. Beyond spatially-resolved analysis, SMART also shows great promise for application in single cell metabolomics, which similarly lacks RT information and face challenges in obtaining robust MS/MS spectra. Moreover, SMART has potential to predict the fragment formula in MS/MS, which is crucial for high-throughput metabolite structure inference. While SMART has demonstrated considerable progress, there are still opportunities for further improvement. A single formula can represent multiple metabolites (e.g., glucose, fructose, inositol all give formula as C 6 H 12 O 6 ), and issues like adducts and in-source fragmentation complicate detection. Parallel LC-MS runs often validate only a fraction of MSI results. In our study, 38 validations of formulas in mouse embryo MSI covered less than 10% compared to LC-MS runs using an in-house standard library. Therefore, enhancements, such as incorporating additional metabolite data, refining machine learning algorithms, on-tissue derivatization, or ion-mobility mass spectrometry, are necessary to improve annotation accuracy. In conclusion, SMART’s systematic annotation capability makes it as a transformative tool in the field of spatially-resolved metabolomics. Its comprehensive database, well-established multiple linear regression model and high accuracy make it indispensable for researchers to unravel the complexities of tissue-specific metabolism. By providing a detailed spatial map of metabolites, SMART can advance our understanding of metabolic processes in two-dimension. Methods Generation of KnownSet and the datasets for evaluation To construct the KnownSet database of chemical formulas, data were sourced from three databases: HMDB, ChEMBL, PubChem. The following three filtration steps were applied: (i) chemical structures with charges and a molecular weight exceeding 1000 were excluded from the dataset (ii) formulas containing atoms not recognized by IUPAC were excluded. (iii) duplicates were merged, ensuring that each formula was unique across the three database. The NetID dataset was manually curated and downloaded from supplemental table 2 of the NetID 9 . All artifacts and isotopes were removed during processing. For the GNPS and MoNA datasets, experimental spectra in json format were downloaded from their official websites (GNPS, https://gnps.ucsd.edu/ProteoSAFe/static/gnps-splash.jsp ; MoNA, https://mona.fiehnlab.ucdavis.edu/ ). Only candidates with m/z values below 1000 and a ppm error less than 20 were retained. Other four datasets with annotation results, American gut, Tomato, Chagas Diseases, and NIST human feces datasets, were downloaded from supplemental table 4-7 of BUDDY 11 . Definition of ppm intervals in evaluation datasets For each formula in the GNPS, and MoNA datasets, the ppm error (P) was calculated based on the theoretical values (M 0 ) and measured m/z values (M c ) according to the following formula: abs(M 0 - M c )/M 0 *1e6, where abs referred to absolute value function. For ppm matching evaluation, each dataset was divided into six intervals based on ppm error, including 0-1, 1-2, 2-3, 3-5, 5-10, 10-20. For the SMART MLR model evaluation, a cumulative intervals was defined with ppm error located in P<=5. DBEdges formula shift calculation The chemical difference, named as formula shift, was calculated between two seed formulas based on differences in their atomic composition. An illegal shift was defined when both positive and negative atom counts exist simultaneously, which were excluded. The remaining formula shifts were then tallied and ranked according to their frequency of occurrence in KnownSet database. To maintain a stringent cutoff, any formula shift occurring fewer than 5,000 times was excluded from consideration. These filtered shifts exceeding 1 billion are named as DBEdges. BioEdges formula shift filtration from biological reaction database Bio-reactions pairs were analyzed from KEGG REACTION databases. Initially, 7,905 reactant pairs with 7,042 compounds were extracted to generate 1,699 reaction classes. Some compound formulas contain undetermined functional group represented by the symbol R, or lacked a specified number of functional groups, represented by the symbol N. We included all possible formulas extended with different R and N, while excluding any whose molecular weight exceeds 1000 (Supplementary Table 3). As a result, the number of reactant pairs increased to 9,691. Then all reaction pairs were categorized into two groups: (i) formula pairs without R and N symbols, referred to direct reaction pairs (DR) and (ii) formula pairs involving extened formulas with different R or N symbols, referred to indirect reaction pairs (IDR). Formula shifts were then calculated and illegal pairs from each group were excluded only if an illegal shift was present. Formula shifts from group (i) were named as direct reactions shifts and were used to determine the DBEdges threshold in the following paragraph. Formula shifts from group (ii) were named as indirect reaction shift and were used solely for formula connection. All formula shifts are referred to as BioEdges. We computed the cumulative occurrence of direct reaction shifts based on the rank list in DBEdges. By determining of occurrence growth rate versus its ranking of shift, we choose a cutoff slope of less than 0.001 to remove all subsequent DBEdges. Eventually, our final shifts database composed of DBEdges preceding this cutoff and all BioEdges. Feature definition and multiple linear regression model (MLR model) construction in SMART For each m/z value, all candidate formulas will be retrieved from formula database with a defined ppm error (5 ppm, in this work). All shifts linking formulas in the SMART database and candidate formulas will also be obtained. These nodes (formulas linking to candidates) and edges (formula shifts) will be used for feature calculation including Sn, BioEdge, DBEdge, and ppm. For each candidate, Sn is defined as the sum of the linked nodes, represented by the following formula: Sn = ∑ kN k , where k ∈ (1,2,3) denotes the number of databases containing the nodes and N k represents the number of linked nodes for each k. The BioEdge feature stands for the number of BioEdges linked to the candidate and DBEdge indicates the number of other DBEdges linked to the candidate. ppm refers to the ppm error between the candidate (M c ) and input m/z value (M 0 ) according to the following formula: abs(M 0 - M c )/M 0 *1e6. For each m/z value, each feature of a candidate was normalized by dividing it by the maximum value of that feature in all candidates. To build the SMART model, normalized features were calculated for all the 118 metabolites in NetID dataset. Since each reference metabolite could generate one true formula and many false formulas, the normalized features between true and false formulas are imbalance. To address this phenomanon, an undersampling strategy was employed using RandomUnderSampler, a module from third-party python package called imlearn. A multiple linear regression model was built based on the 118 metabolites with 4 features, and the equation was obtained as: Y=Xβ + ε, where X=(Sn, BioEdge, DBEdge, ppm), β = (1.6025, 1.5975, -2.2289, -0.2211), ε = 0.1074. Meanwhile, models based on single features were also constructed to compare the integration of 4 features. Evaluation of MLR model construction in SMART To evaluate the performance of models constructed from NetID dataset, a large collection from the GNPS dataset was used, which contains 25 times more metabolites. Prediction was performed for all metabolites in GNPS dataset. To calculate evaluation attributes such as the AUC scores, the undersampling process was performed 500 times for each model. The accuracy of MLR model was evaluated by ranking the formulas according to the values generated by the model, considering the Top 1/Top 3 predicted formulas. All analysis was performed using the scikit-learn python package. Since only the m/z lists from GNPS and MoNA dataset were used with no spectra provided, the evaluation of Buddy 11 was performed under its ‘mz_to_formula’ function from the python package msbuddy ( https://github.com/HuanLab/BUDDY ). IACUC statement and animal models All animal studies were approved by the Institute of Basic Medical Sciences (IBMS)/Peking Union Medical College (PUMC) Animal Care and Use Committee (ACUC-A01-2022-003). C57BL/6J pregnant mice were purchased (Vital River, Beijing) and allowed two days of acclimation to the animal facilities before experiment. Mice were housed on a standard light cycle (from 8:00 to 20:00) and fed with a standard rodent chow (Jiangsu-Xietong, XT101FZ-002). Statistics and reprehensibility All statistical analyses were conducted in Python (3.8). Method evaluation and data analyses were performed computationally and can be reproduced by preprocessing pipelines. Code availability SMART is written in the Python language. Source codes with a small formula database extended from HMDB can be freely downloaded from https://github.com/bioinfo-ibms-pumc/SMART . For the entire database construction, please ask author for further help. Author contributions statement Y.C., L. Y. and L. W. conceived the project. Y. C. developed the method and wrote the SMART code. S. L. performed mass spectrometry imaging and LC-MS experiment. S. L. and Z. L. performed the mice experiments. Y. C., Z. J., X. P., J. Z. analyzed the data and draw the figures. Y. C., L. Y. and L. W. wrote the manuscript. All authors discussed the results and commented on the manuscript. Competing Interest Statement The authors declare no conflicts of interest. Acknowledgments This work was supported by the Beijing Natural Science Foundation (Z240014, L. W.), the Fundamental Research Funds for the Central Universities (3332023039, J. Z.) the Non-profit Central Research Institute Fund of the Chinese Academy of Medical Sciences (2021-RC350-008, L. W.), National Key R&D Program of China (2022YFA1106300, L. W.), State Key Laboratory Special Fund (2060204, L. W.), National Science Foundation of China Grants (32271354, L. Y.), Talent Plan of Shanghai Branch-Chinese Academy of Sciences (CASSHB-QNPD-2023-011, L. Y.), National Natural Science Foundation of China (92353302, J. Z.). We acknowledge the use of High-performance Computing Platform at the Center for Bioinformatics, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences, School of Basic Medicine Peking Union Medical College. We also thank to Prof. Jia Yu, Prof. Xiaoyue Wang and all members of the Wang laboratories for the discussion. Reference (1). ↵ Ghosh-Choudhary , S. ; Liu , J. ; Finkel , T. Trends Cell Biol 2020 , 30 ( 3 ), 201 – 212 . DOI: 10.1016/j.tcb.2019.12.005 . OpenUrl CrossRef PubMed (2). Karjalainen , M. K. ; Karthikeyan , S. ; Oliver-Williams , C. ; Sliz , E. ; Allara , E. ; Fung , W. T. ; Surendran , P. ; Zhang , W. ; Jousilahti , P. ; Kristiansson , K. ; Salomaa , V. ; Goodwin , M. ; Hughes , D. A. ; Boehnke , M. ; Fernandes Silva , L. ; Yin , X. ; Mahajan , A. ; Neville , M. J. ; van Zuydam , N. R. ; de Mutsert , R. ; Li-Gao , R. ; Mook-Kanamori , D. O. ; Demirkan , A. ; Liu , J. ; Noordam , R. ; Trompet , S. ; Chen , Z. ; Kartsonaki , C. ; Li , L. ; Lin , K. ; Hagenbeek , F. A. ; Hottenga , J. J. ; Pool , R. ; Ikram , M. A. ; van Meurs , J. ; Haller , T. ; Milaneschi , Y. ; Kahonen , M. ; Mishra , P. P. ; Joshi , P. K. ; Macdonald-Dunlop , E. ; Mangino , M. ; Zierer , J. ; Acar , I. E. ; Hoyng , C. B. ; Lechanteur , Y. T. E. ; Franke , L. ; Kurilshikov , A. ; Zhernakova , A. ; Beekman , M. ; van den Akker , E. B. ; Kolcic , I. ; Polasek , O. ; Rudan , I. ; Gieger , C. ; Waldenberger , M. ; Asselbergs , F. W. ; China Kadoorie Biobank Collaborative, G.; Estonian Biobank Research, T.; FinnGen; Hayward, C .; Fu , J. ; den Hollander , A. I. ; Menni , C. ; Spector , T. D. ; Wilson , J. F. ; Lehtimaki , T. ; Raitakari , O. T. ; Penninx , B. ; Esko , T. ; Walters , R. G. ; Jukema , J. W. ; Sattar , N. ; Ghanbari , M. ; Willems van Dijk , K. ; Karpe , F. ; McCarthy , M. I. ; Laakso , M. ; Jarvelin , M. R. ; Timpson , N. J. ; Perola , M. ; Kooner , J. S. ; Chambers , J. C. ; van Duijn , C. ; Slagboom , P. E. ; Boomsma , D. I. ; Danesh , J. ; Ala-Korpela , M. ; Butterworth , A. S. ; Kettunen , J. Nature 2024 , 628 ( 8006 ), 130 – 138 . DOI: 10.1038/s41586-024-07148-y . OpenUrl CrossRef PubMed (3). ↵ Wishart , D. S. Nat Rev Drug Discov 2016 , 15 ( 7 ), 473 – 484 . DOI: 10.1038/nrd.2016.32 . OpenUrl CrossRef PubMed (4). ↵ Johnson , C. H. ; Ivanisevic , J. ; Siuzdak , G. Nat Rev Mol Cell Biol 2016 , 17 ( 7 ), 451 – 459 . DOI: 10.1038/nrm.2016.25 . OpenUrl CrossRef PubMed (5). ↵ Rinschen , M. M. ; Ivanisevic , J. ; Giera , M. ; Siuzdak , G. Nat Rev Mol Cell Biol 2019 , 20 ( 6 ), 353 – 367 . DOI: 10.1038/s41580-019-0108-4 . OpenUrl CrossRef PubMed (6). ↵ Domingo-Almenara , X. ; Siuzdak , G. Methods Mol Biol 2020 , 2104 , 11 – 24 . DOI: 10.1007/978-1-0716-0239-3_2 . OpenUrl CrossRef (7). ↵ Guijas , C. ; Montenegro-Burke , J. R. ; Domingo-Almenara , X. ; Palermo , A. ; Warth , B. ; Hermann , G. ; Koellensperger , G. ; Huan , T. ; Uritboonthai , W. ; Aisporna , A. E. ; Wolan , D. W. ; Spilker , M. E. ; Benton , H. P. ; Siuzdak , G. Anal Chem 2018 , 90 ( 5 ), 3156 – 3164 . DOI: 10.1021/acs.analchem.7b04424 . OpenUrl CrossRef (8). ↵ Shen , X. ; Wang , R. ; Xiong , X. ; Yin , Y. ; Cai , Y. ; Ma , Z. ; Liu , N. ; Zhu , Z. J. Nat Commun 2019 , 10 ( 1 ), 1516 . DOI: 10.1038/s41467-019-09550-x . OpenUrl CrossRef PubMed (9). ↵ Chen , L. ; Lu , W. ; Wang , L. ; Xing , X. ; Chen , Z. ; Teng , X. ; Zeng , X. ; Muscarella , A. D. ; Shen , Y. ; Cowan , A. ; McReynolds , M. R. ; Kennedy , B. J. ; Lato , A. M. ; Campagna , S. R. ; Singh , M. ; Rabinowitz , J. D. Nat Methods 2021 , 18 ( 11 ), 1377 – 1385 . DOI: 10.1038/s41592-021-01303-3 . OpenUrl CrossRef PubMed (10). ↵ Bocker , S. ; Letzel , M. C. ; Liptak , Z. ; Pervukhin , A. Bioinformatics 2009 , 25 ( 2 ), 218 – 224 . DOI: 10.1093/bioinformatics/btn603 . OpenUrl CrossRef PubMed Web of Science (11). ↵ Xing , S. ; Shen , S. ; Xu , B. ; Li , X. ; Huan , T. Nat Methods 2023 , 20 ( 6 ), 881 – 890 . DOI: 10.1038/s41592-023-01850-x . OpenUrl CrossRef PubMed (12). ↵ Zhou , Z. ; Luo , M. ; Zhang , H. ; Yin , Y. ; Cai , Y. ; Zhu , Z. J. Nat Commun 2022 , 13 ( 1 ), 6656 . DOI: 10.1038/s41467-022-34537-6 . OpenUrl CrossRef PubMed (13). ↵ Xie , Y. R. ; Castro , D. C. ; Rubakhin , S. S. ; Trinklein , T. J. ; Sweedler , J. V. ; Lam , F. Nat Methods 2024 , 21 ( 3 ), 521 – 530 . DOI: 10.1038/s41592-024-02171-3 . OpenUrl CrossRef PubMed (14). Good , C. J. ; Butrico , C. E. ; Colley , M. E. ; Emmerson , L. N. ; Gibson-Corley , K. N. ; Cassat , J. E. ; Spraggins , J. M. ; Caprioli , R. M. Cell Chem Biol 2024 , 31 ( 10 ), 1852 – 1868 e1855 . DOI: 10.1016/j.chembiol.2024.09.005 . OpenUrl CrossRef PubMed (15). Shen , Y. ; Wang , Y. ; Wang , J. ; Xie , P. ; Xie , C. ; Chen , Y. ; Banaei , N. ; Ren , K. ; Cai , Z. J Adv Res 2024 . DOI: 10.1016/j.jare.2024.08.031 . OpenUrl CrossRef (16). Alexandrov , T. Nat Metab 2023 , 5 ( 9 ), 1443 – 1445 . DOI: 10.1038/s42255-023-00881-0 . OpenUrl CrossRef (17). Conroy , L. R. ; Clarke , H. A. ; Allison , D. B. ; Valenca , S. S. ; Sun , Q. ; Hawkinson , T. R. ; Young , L. E. A. ; Ferreira , J. E. ; Hammonds , A. V. ; Dunne , J. B. ; McDonald , R. J. ; Absher , K. J. ; Dong , B. E. ; Bruntz , R. C. ; Markussen , K. H. ; Juras , J. A. ; Alilain , W. J. ; Liu , J. ; Gentry , M. S. ; Angel , P. M. ; Waters , C. M. ; Sun , R. C. Nat Commun 2023 , 14 ( 1 ), 2759 . DOI: 10.1038/s41467-023-38437-1 . OpenUrl CrossRef PubMed (18). Sun , C. ; Wang , A. ; Zhou , Y. ; Chen , P. ; Wang , X. ; Huang , J. ; Gao , J. ; Wang , X. ; Shu , L. ; Lu , J. ; Dai , W. ; Bu , Z. ; Ji , J. ; He , J. Nat Commun 2023 , 14 ( 1 ), 2692 . DOI: 10.1038/s41467-023-38360-5 . OpenUrl CrossRef PubMed (19). Castro , D. C. ; Xie , Y. R. ; Rubakhin , S. S. ; Romanova , E. V. ; Sweedler , J. V. Nat Methods 2021 , 18 ( 10 ), 1233 – 1238 . DOI: 10.1038/s41592-021-01277-2 . OpenUrl CrossRef PubMed (20). Pareek , V. ; Tian , H. ; Winograd , N. ; Benkovic , S. J. Science 2020 , 368 ( 6488 ), 283 – 290 . DOI: 10.1126/science.aaz6465 . OpenUrl Abstract / FREE Full Text (21). ↵ Petras , D. ; Jarmusch , A. K. ; Dorrestein , P. C. Curr Opin Chem Biol 2017 , 36 , 24 – 31 . DOI: 10.1016/j.cbpa.2016.12.018 . OpenUrl CrossRef PubMed (22). ↵ Hui , S. ; Ghergurovich , J. M. ; Morscher , R. J. ; Jang , C. ; Teng , X. ; Lu , W. ; Esparza , L. A. ; Reya , T. ; Le , Z. ; Yanxiang Guo , J. ; White , E. ; Rabinowitz , J. D. Nature 2017 , 551 ( 7678 ), 115 – 118 . DOI: 10.1038/nature24057 . OpenUrl CrossRef PubMed (23). ↵ Wang , L. ; Xing , X. ; Zeng , X. ; Jackson , S. R. ; TeSlaa , T. ; Al-Dalahmah , O. ; Samarah , L. Z. ; Goodwin , K. ; Yang , L. ; McReynolds , M. R. ; Li , X. ; Wolff , J. J. ; Rabinowitz , J. D. ; Davidson , S. M. Nat Methods 2022 , 19 ( 2 ), 223 – 230 . DOI: 10.1038/s41592-021-01378-y . OpenUrl CrossRef PubMed (24). ↵ Fu , X. ; Deja , S. ; Fletcher , J. A. ; Anderson , N. N. ; Mizerska , M. ; Vale , G. ; Browning , J. D. ; Horton , J. D. ; McDonald , J. G. ; Mitsche , M. A. ; Burgess , S. C. Nat Commun 2021 , 12 ( 1 ), 3756 . DOI: 10.1038/s41467-021-23958-4 . OpenUrl CrossRef PubMed (25). ↵ May , J. C. ; McLean , J. A. Metabolomics 2022 , 18 ( 12 ), 104 . DOI: 10.1007/s11306-022-01961-0 . OpenUrl CrossRef PubMed (26). ↵ Wishart , D. S. ; Guo , A. ; Oler , E. ; Wang , F. ; Anjum , A. ; Peters , H. ; Dizon , R. ; Sayeeda , Z. ; Tian , S. ; Lee , B. L. ; Berjanskii , M. ; Mah , R. ; Yamamoto , M. ; Jovel , J. ; Torres-Calzada , C. ; Hiebert-Giesbrecht , M. ; Lui , V. W. ; Varshavi , D. ; Varshavi , D. ; Allen , D. ; Arndt , D. ; Khetarpal , N. ; Sivakumaran , A. ; Harford , K. ; Sanford , S. ; Yee , K. ; Cao , X. ; Budinski , Z. ; Liigand , J. ; Zhang , L. ; Zheng , J. ; Mandal , R. ; Karu , N. ; Dambrova , M. ; Schioth , H. B. ; Greiner , R. ; Gautam , V. Nucleic Acids Res 2022 , 50 ( D1 ), D622 – D631 . DOI: 10.1093/nar/gkab1062 . OpenUrl CrossRef PubMed (27). ↵ Kanehisa , M. ; Goto , S. Nucleic Acids Res 2000 , 28 ( 1 ), 27 – 30 . DOI: 10.1093/nar/28.1.27 . OpenUrl CrossRef PubMed Web of Science (28). ↵ Palmer , A. ; Phapale , P. ; Chernyavsky , I. ; Lavigne , R. ; Fay , D. ; Tarasov , A. ; Kovalev , V. ; Fuchser , J. ; Nikolenko , S. ; Pineau , C. ; Becker , M. ; Alexandrov , T. Nat Methods 2017 , 14 ( 1 ), 57 – 60 . DOI: 10.1038/nmeth.4072 . OpenUrl CrossRef PubMed (29). ↵ Semente , L. ; Baquer , G. ; Garcia-Altares , M. ; Correig-Blanchar , X. ; Rafols , P. Anal Chim Acta 2021 , 1171 , 338669 . DOI: 10.1016/j.aca.2021.338669 . OpenUrl CrossRef PubMed (30). ↵ Zhu , Y. ; Zang , Q. ; Luo , Z. ; He , J. ; Zhang , R. ; Abliz , Z. Anal Chem 2022 , 94 ( 20 ), 7286 – 7294 . DOI: 10.1021/acs.analchem.2c00557 . OpenUrl CrossRef (31). ↵ Gaulton , A. ; Bellis , L. J. ; Bento , A. P. ; Chambers , J. ; Davies , M. ; Hersey , A. ; Light , Y. ; McGlinchey , S. ; Michalovich , D. ; Al-Lazikani , B. ; Overington , J. P. Nucleic Acids Res 2012 , 40 ( Database issue ), D1100 – 1107 . DOI: 10.1093/nar/gkr777 . OpenUrl CrossRef PubMed Web of Science (32). ↵ Kim , S. ; Chen , J. ; Cheng , T. ; Gindulyte , A. ; He , J. ; He , S. ; Li , Q. ; Shoemaker , B. A. ; Thiessen , P. A. ; Yu , B. ; Zaslavsky , L. ; Zhang , J. ; Bolton , E. E. Nucleic Acids Res 2023 , 51 ( D1 ), D1373 – D1380 . DOI: 10.1093/nar/gkac956 . OpenUrl CrossRef PubMed (33). ↵ Wang , M. ; Carver , J. J. ; Phelan , V. V. ; Sanchez , L. M. ; Garg , N. ; Peng , Y. ; Nguyen , D. D. ; Watrous , J. ; Kapono , C. A. ; Luzzatto-Knaan , T. ; Porto , C. ; Bouslimani , A. ; Melnik , A. V. ; Meehan , M. J. ; Liu , W. T. ; Crusemann , M. ; Boudreau , P. D. ; Esquenazi , E. ; Sandoval-Calderon , M. ; Kersten , R. D. ; Pace , L. A. ; Quinn , R. A. ; Duncan , K. R. ; Hsu , C. C. ; Floros , D. J. ; Gavilan , R. G. ; Kleigrewe , K. ; Northen , T. ; Dutton , R. J. ; Parrot , D. ; Carlson , E. E. ; Aigle , B. ; Michelsen , C. F. ; Jelsbak , L. ; Sohlenkamp , C. ; Pevzner , P. ; Edlund , A. ; McLean , J. ; Piel , J. ; Murphy , B. T. ; Gerwick , L. ; Liaw , C. C. ; Yang , Y. L. ; Humpf , H. U. ; Maansson , M. ; Keyzers , R. A. ; Sims , A. C. ; Johnson , A. R. ; Sidebottom , A. M. ; Sedio , B. E. ; Klitgaard , A. ; Larson , C. B. ; P, C. A. B.; Torres-Mendoza , D. ; Gonzalez , D. J. ; Silva , D. B. ; Marques , L. M. ; Demarque , D. P. ; Pociute , E. ; O’Neill , E. C. ; Briand , E. ; Helfrich , E. J. N. ; Granatosky , E. A. ; Glukhov , E. ; Ryffel , F. ; Houson , H. ; Mohimani , H. ; Kharbush , J. J. ; Zeng , Y. ; Vorholt , J. A. ; Kurita , K. L. ; Charusanti , P. ; McPhail , K. L. ; Nielsen , K. F. ; Vuong , L. ; Elfeki , M. ; Traxler , M. F. ; Engene , N. ; Koyama , N. ; Vining , O. B. ; Baric , R. ; Silva , R. R. ; Mascuch , S. J. ; Tomasi , S. ; Jenkins , S. ; Macherla , V. ; Hoffman , T. ; Agarwal , V. ; Williams , P. G. ; Dai , J. ; Neupane , R. ; Gurr , J. ; Rodriguez , A. M. C. ; Lamsa , A. ; Zhang , C. ; Dorrestein , K. ; Duggan , B. M. ; Almaliti , J. ; Allard , P. M. ; Phapale , P. ; Nothias , L. F. ; Alexandrov , T. ; Litaudon , M. ; Wolfender , J. L. ; Kyle , J. E. ; Metz , T. O. ; Peryea , T. ; Nguyen , D. T. ; VanLeer , D. ; Shinn , P. ; Jadhav , A. ; Muller , R. ; Waters , K. M. ; Shi , W. ; Liu , X. ; Zhang , L. ; Knight , R. ; Jensen , P. R. ; Palsson , B. O. ; Pogliano , K. ; Linington , R. G. ; Gutierrez , M. ; Lopes , N. P. ; Gerwick , W. H. ; Moore , B. S. ; Dorrestein , P. C. ; Bandeira , N. Nat Biotechnol 2016 , 34 ( 8 ), 828 – 837 . DOI: 10.1038/nbt.3597 . OpenUrl CrossRef PubMed (34). ↵ Zhao , J. ; Yao , K. ; Yu , H. ; Zhang , L. ; Xu , Y. ; Chen , L. ; Sun , Z. ; Zhu , Y. ; Zhang , C. ; Qian , Y. ; Ji , S. ; Pan , H. ; Zhang , M. ; Chen , J. ; Correia , C. ; Weiskittel , T. ; Lin , D. W. ; Zhao , Y. ; Chandrasekaran , S. ; Fu , X. ; Zhang , D. ; Fan , H. Y. ; Xie , W. ; Li , H. ; Hu , Z. ; Zhang , J. Nat Metab 2021 , 3 ( 10 ), 1372 – 1384 . DOI: 10.1038/s42255-021-00464-x . OpenUrl CrossRef PubMed (35). ↵ Perez-Ramirez , C. A. ; Nakano , H. ; Law , R. C. ; Matulionis , N. ; Thompson , J. ; Pfeiffer , A. ; Park , J. O. ; Nakano , A. ; Christofk , H. R. Cell 2024 , 187 ( 1 ), 204 – 215 e214 . DOI: 10.1016/j.cell.2023.11.011 . OpenUrl CrossRef PubMed (36). ↵ Frye , M. ; Blanco , S. Development 2016 , 143 ( 21 ), 3871 – 3881 . DOI: 10.1242/dev.136556 . OpenUrl Abstract / FREE Full Text (37). ↵ Placidi , M. ; Di Emidio , G. ; Virmani , A. ; D’Alfonso , A. ; Artini , P. G. ; D’Alessandro , A. M. ; Tatone , C. Antioxidants (Basel) 2022 , 11 ( 4 ). DOI: 10.3390/antiox11040745 . OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted March 14, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following SMART: an approach for accurate formula assignment in spatially-resolved metabolomics Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share SMART: an approach for accurate formula assignment in spatially-resolved metabolomics Yinghao Cao , Shengxi Li , Zhaoyi Liu , Zixian Jia , Weidong Zhuang , Xu Pan , Jinyu Zhou , Lifeng Yang , Lin Wang bioRxiv 2025.03.12.642824; doi: https://doi.org/10.1101/2025.03.12.642824 Share This Article: Copy Citation Tools SMART: an approach for accurate formula assignment in spatially-resolved metabolomics Yinghao Cao , Shengxi Li , Zhaoyi Liu , Zixian Jia , Weidong Zhuang , Xu Pan , Jinyu Zhou , Lifeng Yang , Lin Wang bioRxiv 2025.03.12.642824; doi: https://doi.org/10.1101/2025.03.12.642824 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7642) Biochemistry (17715) Bioengineering (13907) Bioinformatics (42005) Biophysics (21472) Cancer Biology (18624) Cell Biology (25534) Clinical Trials (138) Developmental Biology (13390) Ecology (19935) Epidemiology (2067) Evolutionary Biology (24356) Genetics (15617) Genomics (22529) Immunology (17753) Microbiology (40437) Molecular Biology (17200) Neuroscience (88697) Paleontology (667) Pathology (2840) Pharmacology and Toxicology (4829) Physiology (7653) Plant Biology (15171) Scientific Communication and Education (2046) Synthetic Biology (4304) Systems Biology (9827) Zoology (2272)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-23T02:00:01.238055+00:00

License: CC-BY-NC-ND-4.0