An automated software-assisted approach for exploring metabolic susceptibility and degradation products in macromolecules using High-Resolution Mass Spectrometry

doi:10.1101/2025.05.01.651614

An automated software-assisted approach for exploring metabolic susceptibility and degradation products in macromolecules using High-Resolution Mass Spectrometry

2025 · doi:10.1101/2025.05.01.651614

preprint OA: closed CC-BY-4.0

📄 Open PDF Full text JSON View at publisher

Full text 70,163 characters · extracted from preprint-html · click to expand

An automated software-assisted approach for exploring metabolic susceptibility and degradation products in macromolecules using High-Resolution Mass Spectrometry | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results An automated software-assisted approach for exploring metabolic susceptibility and degradation products in macromolecules using High-Resolution Mass Spectrometry View ORCID Profile Paula Cifuentes , Ismael Zamora , Tatiana Radchenko , Fabien Fontaine , Albert Garriga , Luca Morettoni , Jesper Kammersgaard Christensen , Hans Helleberg , Bridget A. Becker doi: https://doi.org/10.1101/2025.05.01.651614 Paula Cifuentes 1 Pompeu Fabra University , Barcelona Spain 2 Lead Molecular Design, S.L., Sant Cugat del Valles , Spain 3 Mass Analytica, S.L., Sant Cugat Del Valles , Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Paula Cifuentes For correspondence: paula.cifuentes01{at}estudiant.upf.edu Ismael Zamora 3 Mass Analytica, S.L., Sant Cugat Del Valles , Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tatiana Radchenko 2 Lead Molecular Design, S.L., Sant Cugat del Valles , Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site Fabien Fontaine 3 Mass Analytica, S.L., Sant Cugat Del Valles , Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site Albert Garriga 3 Mass Analytica, S.L., Sant Cugat Del Valles , Spain Find this author on Google Scholar Find this author on PubMed Search for this author on this site Luca Morettoni 4 Mass Analytica, S.L., Bettona , Italy Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jesper Kammersgaard Christensen 5 Development ADME, Novo Nordisk , Måløv, Denmark Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hans Helleberg 5 Development ADME, Novo Nordisk , Måløv, Denmark Find this author on Google Scholar Find this author on PubMed Search for this author on this site Bridget A. Becker 6 Labcorp , Madison, WI, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Preview PDF Abstract A comprehensive understanding of drug metabolism is crucial for advancements in drug development. Automation has improved various stages of this process, from compound procurement to data analysis, supporting small molecules, peptides, and oligonucleotides. However, challenges remain, particularly in the time-consuming analysis of samples for metabolite identification. This article introduces new algorithms for automated Liquid Chromatography-High-Resolution Mass Spectrometry (LC-HRMS) data applicable to both small and macromolecules. While methodologies for small molecules are well established, adapting them for macromolecules presents challenges, including computational demands, peak detection complexities, and visualization issues. A data analysis employing diverse algorithms in the data preprocessing step was conducted across six datasets, ranging from small/medium linear or macrocyclic peptides to oligonucleotides with natural and unnatural monomers. Two peak detection approaches were evaluated: using the monoisotopic mass versus the most abundant isotope for mass calculation. Additionally, an exploration of two distinct structure visualization options was conducted for one of the datasets. Furthermore, data obtained through two different acquisition modes was processed. The computational time required for data processing was recorded throughout, ranging from 5 minutes to 2 hours per experiment. The results have been compared against prior studies, revealing substantial reductions in processing time, consistent identification of degradation products, and improved visualization techniques, thereby enhancing result interpretation. A comprehensive identification of 970 metabolites was achieved under varied incubation conditions across the six datasets, showcasing the workflow’s efficiency in managing experimental data within a molecular range from 700 to 7630 Daltons (Da). Particularly in larger molecules, the most abundant mass algorithm demonstrated higher scores and a greater number of matches, instilling greater confidence in the accurate prediction of metabolite structures. It has been illustrated how the visualization algorithm for macromolecules allows the combination of monomer and atom/bond notation, facilitating a clear depiction of metabolic changes in the molecular structure. Introduction An essential aspect of the drug development process is the comprehensive identification and characterization of the major metabolites of the drug candidate and the enzymes responsible for its metabolic transformation, commonly known as drug metabolism. These studies are crucial for uncovering optimal therapeutic properties (given that metabolites play a role in efficacy, toxicity, and drug-drug interactions) [ 1 ], identifying strategies for structural improvement, and discovering novel effective compounds by examining the structures of active metabolites, among other key objectives [ 2 , 3 ]. Recently, there has been a strong emphasis on developing more efficient systems and highly sensitive tools, such as in silico models, in vitro assays, and innovative hybrid approaches, aimed at identifying drug metabolites [ 4 ]. Numerous stages within the design process have been automated, starting from compound order and shipping to acquisition and data analysis. While the acquisition process can be automated and offers a wide dynamic range suitable for analysing various compounds, including small molecules and larger biomolecules like peptides and oligonucleotides, the analysis of metabolites can become time-consuming. [ 5 ] With the aim of automating data analysis, we developed a set of software solutions as discussed in previous publications [ 6 , 7 ]. These applications have helped to create faster systems for the data processing step and the results review/visualization as they perform the following steps automatically: select the chromatographic peaks that are related to the compound of interest, find the mass spectral information for each extracted peak, assign potential structures by comparing the theoretical fragmentation that can be predicted with the actual mass to charge ratio (m/z) values obtained with the experimental spectra, scoring potential solutions depending on the fragments assigned to the spectra alone or by the comparison with the parent fragmentation. After clustering the results from different experimental conditions and consolidating them into a single experimental entity, the results are stored in the database. Subsequently, upon the conclusion of the review process, a report is generated. These methods have been mainly applied to small molecules and to a certain extend to macromolecules. Nonetheless, there are specific challenges inherent in the data analysis and results review processes with respect to macromolecules, particularly concerning result visualization and processing time. The intricate structural features and complex fragmentation patterns associated with such molecules pose difficulties in presenting clear and interpretable results. Notably, as compounds may involve structures with hundreds of atoms, it becomes essential to address challenges related to identifying the specific location where the biotransformation has occurred. Thus, a crucial aspect in overcoming these challenges lies in developing suitable result visualization techniques that facilitate a more accessible interpretation of the outcomes. The conventional approach employed for visualizing and reporting degradation products and fragments in small molecules may not be suitable for larger and more intricate molecules, including peptides, oligonucleotides, and even bigger entities like antibodies. A substantial challenge emerges with the size of the molecule, as it leads to an exponential increase in the number of signals in the spectra that need interpretation and fragments to be computed and compared. The exponential growth in data increases the demands on software processing time and memory consumption. Finally, depending on the fragmentation data, it might be possible to assign a single structure, or several structural possibilities which may increase the time needed for the expert to review. The aim of this article is to describe new algorithms/approaches for automated LC-HRMS data analysis that addresses the mentioned challenges encountered in the processing of macromolecules. These challenges encompass optimizing the visualization of chemical structures and degradation products. Additionally, the described approach has successfully optimized the reduction of processing memory and time consumption in the execution of algorithms for potential structure generation and fragmentation. Furthermore, the proposed methods seek to provide a workflow capable of interpreting results across various data acquisition formats and modes. Analysis was conducted on six datasets spanning a molecular range from 700 to 7630 Da. These datasets consist of both linear and cyclic peptides, incorporating natural and unnatural amino acids, as well as oligonucleotides. Specifically, dataset-1 comprises 9 commercially available peptides, dataset-2 includes one commercially available peptide and 4 synthetic analogues, dataset-3 involves a natural peptide hormone and 7 synthetic analogues, dataset-4 features an antisense oligonucleotide, dataset-5 contains 28 commercially available peptides, and dataset-6 is composed of a peptide hormone. Comparisons of the results obtained for certain compounds with those of prior studies have enabled an evaluation of several factors, such as the number and structure of identified metabolites, along with a consideration of the time consumed during the data processing step. Materials and methods Experimental data For this study, six different experimental data sets (linear/cyclic, natural/unnatural amino acids, and an oligonucleotide dataset) have been used for the metabolite identification, as shown in Table 1 . View this table: View inline View popup Download powerpoint Table 1: Summary of the number of compounds of each dataset, along with the molecular weight range of the compounds and the corresponding data acquisition mode. (DDA= data-dependent acquisition, DIA= data-independent acquisition) The first set (dataset-1) is composed of nine commercially available peptides (secretin, calcitonin, oxytocin, octreotide, deslorelin, histrelin, goserelin, buserelin, and leuprolide), each of them, was separately incubated, with four selected protease enzymes – trypsin, chymotrypsin, pancreatic elastase, and pepsin. Data acquisition was performed using a Thermo Orbitrap® instrument in full scan mode with data-dependent tandem mass spectrometry (MS/MS). The detailed experimental conditions for this dataset are documented in the referenced bibliography [ 8 ]. Three of the compounds are cyclic peptides (octreotide, oxytocin, and calcitonin) and five contain unnatural amino acids (secretin, calcitonin, ocreotide, deslorelin, and histrelin). Molecular weight ranges from 1282 to 3429 Da, as illustrated in Table 2 . View this table: View inline View popup Download powerpoint Table 2: Dataset-1 sequence structures and its molecular weights. Dataset-2 consists of a commercially available peptide glucagon-like peptide-1 (GLP-1), a 30 amino acid compound, and four synthetic analogues, designed to have a reduced susceptibility to enzymatic degradation, taspoglutide, exenatide, liraglutide and semaglutide, all of them linear peptides. Metabolite identification has been conducted under the presence of DPP-4 and NEP, as both enzymes are known to be involved in native GLP-1 degradation. Data acquisition was performed using a Thermo Orbitrap® instrument in full scan mode with data-dependent MS/MS. The detailed experimental conditions for this dataset are documented in the referenced bibliography [ 8 ]. Except for semaglutide, which was incubated in dog plasma - with the two metabolites first synthesized and then spiked into the plasma - the data were collected using a Waters® ACQUITY® Ultra-Performance Liquid Chromatography with Vion Ion Mobility Spectrometry Quadrupole Time-of-Flight (IMS-QToF) Mass Spectrometer operated by UNIFI in a data-independent mode, in collaboration with Zealand Pharma. Taspoglutide peptide has non-natural amino acids and liraglutide has C-16 fatty acid side chain (palmitic acid). Molecular weights ranges from 3297 to 4184 Da, as presented in Table 3 , being exenatide the larger. View this table: View inline View popup Download powerpoint Table 3: Glucagon-like 1 protein and analogues (dataset-2) sequence structures and its molecular weights. Dataset-3 includes somatostatin, a natural growth-inhibiting peptide hormone, along with seven 14-amino acid cyclic analogues. Data is collected in two data acquisition modes; the first one was conducted on a Thermo Q-Exactive® instrument employing full scan mode with data-dependent MS/MS and the second one High Definition MS E (HDMS E ) data was collected using a Vion IMS QTof Mass Spectrometer. The detailed experimental conditions for this dataset are documented in the referenced bibliography [ 8 , 9 ]. In the synthesis of these analogues, a common approach is employed, which entails substituting some of the natural amino acids with non-natural or modified ones ( Fig 1 ). Notably, these analogues feature the substitution of Phe(7) by Msa, enhancing the rigidity due to the ortho substitution, and Trp(8) by D-Trp [ 12 ]. Additionally, various permutations involve substituting Ala(1), Cys(3), and Cys(14) with their D-amino acid equivalents, along with the substitution of Lys(4) by ornithine. [ 9 ] Molecular weight ranges from 1636 to 1678 Da Table 4 . Given the inherent low stability of somatostatin, a critical consideration for its pharmaceutical utility, there is a great interest in evaluating whether these novel analogs ( Table 4 ) exhibit prolonged lifetimes in human serum. Download figure Open in new tab Fig 1. Structure of somatostatin and its seven modified analogues including unnatural amino acids. All eight peptides exhibit a cyclic structure, closing through the disulfide bond (between monomer 3 and 14). View this table: View inline View popup Download powerpoint Table 4: Dataset-3 is composed of somatostatin and its seven modified analogues, with the corresponding molecular formulas and molecular weights. Dataset-4 includes an antisense oligonucleotide (ASOs) with the formula C 242 H 307 N 91 O 150 P 94 (molecular weight of 7633 Da) containing 25 monomers. ASOs are synthetic, small-sized single-stranded nucleic acids. Data was collected using a Thermo Orbitrap® instrument in DDA mode. This dataset pertains to the incubation of ASOs in human liver tissue, a commonly studied experimental condition. [ 11 ] It enables researchers to evaluate the efficacy and selectivity of the ASOs in targeting specific messenger RNA molecules within the complex environment of the liver. In this study, dataset-5 comprises a collection of 25 structurally diverse linear and cyclic peptides, with molecular weights ranging from 708 to 1900 Da (atosiban, BIO-11006, BIO-1211, carbetocin, CSP7, deslorelin, desmopressin, felypressin, gonadorelin, iseganan, lanreotide, LDTRYLEQLHKLY, leuprolide, lypressin, M10 peptide, MMI-0100, NAS-911, ocreotide, peptide T, salmon calcitonin, somatostatin, SPX-101, triptorelin, vasopressin, and vapreotide), as depicted in Table 5 . These compounds have been incubated with four pulmonary proteases (human cathepsin G, human neutrophil elastase, human MMP-12 catalytic domain, and bovine pancreatic trypsin). Except felypressin, iseganan, LDTRYKEQLHKLY, lypressin, MMI-0100, vasopressin that data is unavailable for bovine pancreatic trypsin incubation, and atosiban, lanreotide, leuprolide which data is also unavailable for the human cathepsin G protease incubation. Data acquisition was performed using a Waters® Q-TOF instrument in a data-independent mode. The data was used to develop an assay workflow aimed at guiding the initial chemical modifications of peptide hits in early respiratory drug discovery projects. The detailed experimental conditions for this dataset are documented in the referenced bibliography [ 13 ]. This workflow utilizes WebMetabase to effectively detect and elucidate the structures of metabolites formed through enzymatic proteolysis. This data has been used in this study for a comprehensive comparison of results obtained through this new approach. Furthermore, its utilization serves to underscore the noteworthy advancements in data processing time realized through the implementation of this workflow. View this table: View inline View popup Download powerpoint Table 5: Dataset-5, composed of 28 peptides, with the corresponding molecular formulas and molecular weights. Dataset-6 comprises human insulin, a peptide hormone containing three disulfide bridges, one of which is internally located within Chain A, while the other two covalently connect Chain A to Chain B ( Fig 2 ). Data was collected with QTOF from a Waters® instrument. Insulin has been subjected to analysis following incubation with IDE, a protease widely recognized for its pivotal role in degrading and inactivating insulin. The detailed experimental conditions for this dataset are documented in the referenced bibliography [ 19 ]. Download figure Open in new tab Fig 2. Insulin structure with the linear visualization The structure of insulin consists of two peptide chains known as Chain A, comprising 21 amino acids (numbered 1 to 21), and Chain B, comprising 30 amino acids (numbered 22 to 51). The A and B chains are interconnected by two disulfide bonds (highlighted in pink and light blue), and an additional disulfide bond is formed within the A Chain (highlighted in purple). Data preprocessing The MassMetaSite procedure consists of three steps: (a) data reading, (b) automatic detection of the chromatographic peaks related to the parent compound and its metabolites, and (c) structure elucidation by proposing a potential metabolite structure based on the fragmentation pattern for each peak detected in the previous step. a) Data Reading. Three different acquisition files need to be defined, depending on the data. Firstly, a blank file is employed to distinguish relevant signals from background peaks. This file is crucial for investigating whether a detected peak in the incubation file is attributable to the compound of interest or if it was already present in the incubation matrix (blank sample). Secondly, a substrate file is utilized to analyze the fragmentation pattern of the substrate. This step is essential in the structure elucidation process, involving the comparison of fragments assigned to the spectra cof the parent compound with the spectra of potential metabolites. Lastly, the incubation file which contains all the products after incubation, either in vitro or in vivo . It serves for investigating and identifying metabolites formed during the incubation process. b) Automatic detection of the chromatographic peaks. During the automated chromatographic peak detection stage, an initial spectral noise analysis is conducted. For each full scan (intensity vs. m/z), a noise level is computed by calculating the change in slope between two consecutive shortlists of ions present in the full scan, and ions below this threshold are systematically eliminated. Subsequently, the list of ions is examined across chromatographic retention times. Ions are selected based on specific m/z values to precisely determine the presence or absence of peak formation. Following the identification of a potential peak in the incubation sample, a background analysis is performed. Specifically, for the selected m/z and retention time of the potential peak, a search is conducted to verify the presence of the peak in the blank sample. If the peak is detected in the blank, a peak alignment optimization is initiated using a combination of Hodgkin and Pearson similarity indexes computation, which allows a comprehensive comparison of both shape and peak intensity. The sample peak is excluded from the analysis whenever it exhibits similar shape and equivalent (or lower) intensity to the blank peak. The Negative Control Area Ratio is then computed, representing the quantitative ratio between the peak area in the incubation sample and the corresponding in the blank. Subsequently, a filtered spectrum is computed by merging all the scans within the peak retention time range. This involves the selection of m/z values that exhibit correlation within the chromatographic peak shape. Each m/z value of each filtered spectrum is compared with any of the m/z values for the metabolites of the parent compound. There are two potential options to represent the theoretical m/z of the compound of the peak under consideration: the monoisotopic or the most abundant isotope species. Additionally, the isotope pattern derived from the metabolite formula is compared to the one from the experimental spectra and a filter may be set to consider the similarity between the observed and predicted intensity for each potential isotope. In addition, m/z values from multiple charge states were also used in the analysis. For each selected m/z value extracted from the filtered spectra, a comprehensive metabolite classification is conducted. This classification categorizes metabolites into distinct groups, including first-generation metabolites, second or higher generation metabolites, metabolites stemming from biotransformations unrecognized by the software (referred to as “red peaks” denoting unknowns), and cases where the fragment ion may arise from ion adduct formation or in-source neutral loss. Ultimately, a MS/MS evaluation is conducted, examining the presence of m/z values observed in the parent spectrum within the potential peak. The evaluation considers the shift based on the obtained formula, classifying a non-shifted scenario when the same m/z observed in the parent spectra is also observed in the metabolite, and identifying a shift when a change in the m/z of the considered value relative to the parent is observed between a peak in the parent spectra and a peak in the filtered spectra. The m/z values are scored according to multiple criteria: isotope similarity, retention time, MS/MS comparison and calculated m/z. Among all the values above the score threshold, the m/z that will represent the peak in the chromatogram is the one with the highest m/z value. This process results in a compiled list of peaks, each associated with an assigned m/z, retention time range, area, full scan filtered spectra, and MS/MS spectra. c) Structure Elucidation . The third stage of data processing is structure elucidation ( Fig 3 ), during which the fragment ions obtained from the parent and those from the metabolite are compared. Download figure Open in new tab Fig 3. Illustrates the third step, Structure Elucidation, of MassMetaSite procedure. This process has two starting points: the parent structure, or the metabolite structure which is obtained by virtual synthesis: 1. Identification of metabolite fragments from fragmentation of the parent 1.1 Parent fragmentation: During this process, the parent molecule is fragmented, and the m/z of the fragments are computed. There could be more than one m/z value for a single fragment due to potential hydrogen rearrangements. Fragment structures are then associated to the spectra m/z values considering a user-specified tolerance. 1.2 Generation of metabolite fragments: Metabolite fragments are built from parent fragments using metabolite and parent atom map. The metabolite resulting fragments m/z may be shifted or equal to the parent m/z depending on whether the fragment contains sites of metabolism or not. [ 14 ] 1.3 Association between parent peaks and metabolite peaks: For each parent spectrum, whether MS or MS/MS, the software checks if there are peaks with the same or shifted m/z in the associated metabolite spectrum. A shifted m/z is equal to the m/z of the parent plus the change of m/z due to the chemical modifications introduced during metabolism. Resulting in Substrate-Metabolite peak pairs that could be used for structural identification. a) Matches: When substrate and metabolite fragments are identical and both peaks of the Substrate-Metabolite fragment pair have the same m/z value, the observed and calculated interpretation match. Likewise, when the metabolite fragment is different from the substrate fragment and the Substrate-Metabolite fragment pair have a shifted mass, the interpretations also match. [ 15 ] b) Mismatches: The fragments that are mismatching are those ones where the m/z is observed as non-shifted between the parent and metabolite spectra, but the atom set of the fragment corresponds to a chemical modification that would change the m/z. Similarly, a mismatch is detected when the m/z is observed as shifted between the parent and the metabolite spectra, but the atom set of the fragment corresponds to a modification that would not change the m/z of the fragment. [ 15 ] 2. Identification of metabolite fragment from the structure of the metabolite: Virtual fragments of the metabolites are generated based on a predefined list of metabolic biotransformation reactions. [ 16 ] a) Fragmentation of the metabolite: This is the same as the parent fragmentation but the number of bonds that can be cut is usually lower since breaking all the possible metabolites has a greater computational cost. b) Metmatches: The fragments that are obtained in this way are assigned to the metabolite spectra are called metmatches. This fragmentation strategy is particularly beneficial for cyclic peptides, where the metabolite might be a linear peptide due to amide hydrolysis-induced ring opening, leading to a markedly different fragmentation pattern compared to the parent. Scoring is done by summing the intensity for the matching peaks plus the sum of the intensity for the metmatching peaks minus the sum of the intensity for the mismatching peaks. The solutions with the highest score are auto selected by the system and reported as potential structural candidates. [ 15 ] Each experiment consisted of a set of samples, i.e. one sample per incubation time point per matrix. MassMetaSite processes each sample as a separate entity, and thus generates three main pieces of information for each sample: metabolic scheme, spectrometry data (product ion assignment) and outcomes (retention time, MS area, MS relative area, collision cross section, and parts per million (ppm) mass error) for each found component. WebMetabase then consolidates all these data from the individual files into a single interpretation for the entire experiment (time/matrix) and analyses which metabolite peaks from each sample can be clustered based on its retention time and m/z. Settings/Structure Visualization In this study, data have been processed with distinct algorithms, establishing the groundwork for a comprehensive comparison among them. This research is focused on three crucial dimensions: -Peak detection (Monoisotopic Mass and Most Abundant Mass) Various algorithms for peak detection are employed based on the molecular size. The monoisotopic mass peak (here referred to as MiM) represents the peak to the ion with the lowest mass-to-charge (m/z) ratio and it is calculated using the lightest isotope mass of each element present in the molecule. It is particularly useful for accurately determining the molecular formula, especially for smaller molecules. [ 17 ] Conversely, the most abundant mass peak (here referred to as MaM) represents the molecule’s most common isotopic distribution, considering the natural abundance of all isotopes in the molecule, not just the lightest ones. For larger molecules or when the monoisotopic ion is undetectable, the MaM is employed for peak detection. This choice is made because, with increasing molecular size, the heightened probability of the entire molecule containing at least one heavy isotope atom (mainly 13 C) becomes more pronounced. Consequently, the MiM peak may be much more difficult to detect than the MaM peak. In addition, MaM peaks are typically the ones which are selected for triggering MSMS scans in DDA when no preferred list is provided to the acquisition software. In this study all datasets have been processed with both the MiM and MaM algorithms, except dataset-5 that has been exclusively subjected to processing with the MaM settings. -Acquisition Modes (Data-dependent acquisition and Data-independent acquisition) The LC-HRMS stands as the preferred method for metabolite identification, with DDA being commonly used strategy in MS data acquisition. In DDA, precursor ions selected based on their abundances are often employed to drive MS/MS. In contrast, DIA methods, such as MS E and HDMS E , eliminate the risk of overlooking metabolites by avoiding precursor ion selection. [ 18 ] The DIA HDMS E is a method that combines ion mobility separation with MS E data acquisition. It alternates between low and high collision energy ion mobility spectrometry-mass spectrometry scans, enabling accurate mass measurements of both precursor and product ions simultaneously. In contrast to MiM, where a specific m/z must be isolated before fragmentation, DIA provides more complex but more complete datasets. Data from dataset-3 was acquired employing the two predetermined strategies, DIA and DDA, facilitating a comparison of outcomes obtained from both acquisition modes. Settings used for the processing of DIA (MS E /HDMS E ) and DDA data for somatostatin synthetic analogues are presented in S6-S9 Files. -Structure visualization (Expanded and non-expanded) Two visualization options are available for representing the structure of polymeric compounds like peptides or oligonucleotides during data analysis. Monomers of the compound can be depicted either in an expanded form, revealing all atoms and intermonomer bonds, or in a non-expanded form, where the structure is represented by linking the monomer acronyms. In this study, dataset-4 was processed using both visualization options, enabling a comparative analysis of processing time and providing an illustrative example of how metabolites structures are visualized after metabolic reactions using both approaches. The selection to work on expanded or non-expanded monomers has an impact on structure visualization. The non-expanded mode shows the monomer symbol making it simpler for the user to identify the structure and the place where the biotransformation takes place and therefore it is recommended to be used. Nevertheless, it also has implications in the computation process. The structure that is represented as monomer does not undergo a virtual structure metabolite generation, the biotransformation is applied at monomer level and not at atomic level, therefore the resulting compound is not a valid chemical structure, since there is no information on the exact chemical structure that is obtained after the reaction. The part of the structure that is represented as atoms/bonds undergoes a typical virtual reaction and a defined chemical structure is obtained for each potential metabolite. In the monomer presented part of the molecule, fewer chemical structures need to be constructed during the calculation process, resulting in reduced computation time. There is another aspect applied on the part of all the molecule that is treated as monomer, since for this part only the typical a,b,c, and x,y,z fragmentation is considered, reducing the number of potential fragments generated degreasing the time and memory consumption. For the rest of the molecule treated as atoms/bond all the bonds are disconnected to generate fragments that will generate an increased number of fragments. Furthermore, there exists the option to work with a combination of both visualizations within the molecular structure. This can be achieved by selectively choosing which segments of the molecule to expand or maintain in a non-expanded state. Data Analysis Following data consolidation, manual data interpretation by the user is conducted for peak selection and structure elucidation steps, applying diverse data analysis criteria to systematically eliminate any potential false positive metabolites. These criteria are: Peak Selection MS area (%): Reporting with a relative area above 0.5%. Difference between observed and calculated m/z (amu, ppm): For the MS signal the system computes the difference between the observed and the computed m/z. The observed m/z considers the m/z finds at the different scans and derives a value which is compared to the vendor software package to consider effects like peak saturation and loss of accuracy at the top of the peak. Maintaining a difference of less than 10 ppm between observed and computed values [ 20 ]. Value of Isotopic All Similarity: Quantifying the match between observed and expected isotopic patterns for peaks, where a low value suggests pattern variability. Negative control area ratio: Establishing the ratio between peak areas in the incubation sample and the blank, with a signal observed in both considered non-specific. Kinetics: Reflects changes in metabolite abundance over time. At time 0 (t = 0), when the incubation begins, the cluster chart would initially show the presence of ions solely related to the parent compound. There should be no signals corresponding to metabolites at this point, as no biotransformation has occurred yet. The first generated metabolite usually has an exponential shape, as they are starting to be formed. If the metabolites are further metabolized, the signal of the metabolite will decrease since the metabolite has been consumed to generate a second generation one. Typically, the second-generation metabolite has a sigmoidal shape since it needs the first-generation metabolite to form and then be further metabolized [ 8 ]. Shape of the metabolite peak: Ideally, metabolite peaks should exhibit a Gaussian shape; however, in practice, peak tails may occasionally occur. It is important to distinguish these from peaks that resemble background noise or exhibit irregular shapes, such as broad or asymmetric profiles, which may suggest contamination or interference rather than the presence of a true metabolite. [ 21 ] Structure Elucidation The second step of the algorithm proposes potential metabolite structure based on the fragmentation pattern for each peak detected in the peak selection step. Figure 4 illustrates the MS Spectra data interpretation window, highlighting the analysis of fragment structures used to generate the score, including the count of matches and mismatches. Download figure Open in new tab Fig 4. Fragmentation pattern for the M2-38 metabolite of oxytocin in incubation with chymotrypsin in 120 min. On the left, full scan/data-dependent MS/MS spectras for oxytocin and M2-38 are presented, while on the right, a subset of fragment structures derived from the selected matched peaks is displayed. This window allows for a comparison to determine if the metabolites exhibit a similar fragmentation pattern compared to the substrate fragmentation. Metabolite fragment ions may either share the same m/z as a parent fragment ion (non-shifted ion) or exhibit a defined mass shift (shifted ion). The MS and MS/MS spectra contain 5 types of fragments: Black Peaks: These peaks lack fragment assignments in the parent, they have no effect on the interpretation of the metabolite under consideration. Red Peaks: Represent matching peaks, and their structural interpretation aligns with the proposed metabolite structure. Clicking on red peaks reveals the assigned structure in the right panel. Cyan Peaks: Indicate mismatching peaks, and their structural interpretation contradicts the proposed metabolite structure. Coral Peaks: Correspond to metabolite matching peaks with structural information consistent with the proposed metabolite structure. However, they lack a substrate fragment match, resulting from manual editing or MassMetaSite if metabolite fragmentation is selected in the settings. Light Green Peaks: Denote metabolite mismatching peaks, providing structural information contrary to the proposed structure under study. These peaks lack substrate peak matches and stem from the propagation of a manually edited peak. It is essential to consider the isotope pattern and ensure that it aligns with the expected charge state of the metabolite. The charge of the ion significantly influences the spacing between isotopic peaks, and deviations in the observed pattern may serve as indicators of errors in charge assignment or other issues. Furthermore, the structural assignment of the isotope pattern peaks is checked manually. If the structure assignment of a match or mismatch peak is not the expected one, it can be removed from the analysis and therefore the score will be re-calculated. In addition, black peaks can be examined, and structural information can be added by using the fragment structure editor if it is considered. Processing Time In this study, the data processing time has also been collected, encompassing the duration required for importing data into WebMetabase. Notably, dataset-5 facilitated a comparison with previously reported processing times in the bibliography [ 13 ], utilizing the same software with an outdated version (2021). A comparison of the processing time has also been conducted between the different algorithms and settings outlined in the Data Preprocessing section. Since the processing time may vary depending on the peak algorithm employed, as well as the choice of visualization for the compound representation, including expanded, non-expanded, or mixed options. Results and Discussion This section presents the experimental results obtained through the application of our approach and algorithms to perform the metabolite identification of the five distinct peptide datasets and an oligonucleotide dataset. All these metabolite structural assignments have been checked manually and considered as reliable because the fragmentation was adequate, isotope pattern was as expected, the m/z small differences between the m/z of observed and theoretical (<10 ppm), and the score was high. Monoisotopic Mass and Most Abundant Mass One of the primary objectives of this study is to conduct a comprehensive comparison between the two algorithms, MiM and MaM. To achieve this goal, datasets 1, 2, 3, 4 and 6 as previously outlined, have undergone processing with both algorithm configurations. Table 6 presents the number of identified metabolites corresponding to each dataset, based on the employed algorithm. View this table: View inline View popup Download powerpoint Table 6: Number of identified metabolites for each dataset, considering the algorithm, incubation conditions, and acquisition mode (in case of dataset-3). Notable differences between MiM and MaM algorithms are observed in compounds such as calcitonin from dataset-1 or taspoglutide from dataset-2. These variations are attributed to the larger peptide structures of these compounds. As molecular size increases, the relative intensity of the MiM tends to decrease. In such cases, the use of the MaM algorithm provides a more precise metabolite identification in larger peptides. The analysis of dataset-1 resulted in the identification of 150 metabolites through the MiM algorithm, while 161 metabolites were identified using the MaM algorithm. Calcitonin, a cyclic peptide, is one of the largest peptides of this dataset (3429.71 Da), yielding the identification of the same 6 metabolites with both settings, M1-2178, M2-2309, M3-1981, M4-1852, M5-499, and M6-1739 with the respectively retention times of 1.86, 1.91, 2.45, 2.47, 2.52, and 2.99 minutes. However, there is a noticeable difference between them in the score values Table 7 . A higher score indicates a better match between the theoretical product ion m/z value and the observed m/z value in the MS/MS spectrum and therefore a more confident structure prediction. This scoring system helps in distinguishing reliable matches from potential false positives. View this table: View inline View popup Download powerpoint Table 7. Retention times of the identified Calcitonin metabolites along with their corresponding values for score, matches, mismatches, and metmatches obtained using both algorithms. The dataset-2, consisting of GLP-1 and four synthetic analogues, comprises linear peptides with a molecular weight exceeding 3000 Da, thereby accentuating the significant differences when utilizing MaM or MiM algorithms. This contrast is evident in the case of taspoglutide, as illustrated below. Taspoglutide (3338.71 Da) incubated with DPP-4 has yielded 15 metabolites peaks with the MaM settings (M1-2175, M2-2163, M3-1966, M4-2223, M5-1925, M6-1895, M7-2222, M8-2154, M9-2255, M10-2094, M11-2147, M12-1977, M13-1396, M14-1146 and M15-407) with a retention time of 2.69, 3.54, 3.74, 3.88, 3.96, 4.00, 4.26, 4.47, 4.48, 4.54, 4.54, 4.92, 5.27, 5.90, and 6.80 respectively. In contrast, using MiM settings, 14 metabolites have been identified, the same as with MaM, but missing M6-1895 (at a retention time of 4.00). Eight of the metabolites correspond to first-generation products (from a single reaction) and are indicated by the green color of the peak, as shown in Fig 5 . The other seven brown colored metabolites are indicative of multiple enzymatic reactions. A score is calculated and reported for each metabolite. It can be highlighted that the increased number of matches in the MaM analysis contributes to higher maximum score values. This increase in score values convert a greater level of confidence in the results obtained. As for example, with MaM the metabolite M4-2223 the score is 1302.1 with 28 matching fragments, while with MiM the same metabolite results in a score of 807.1 with 17 matching fragments. Other results are shown in supporting information. Download figure Open in new tab Fig 5. Extracted ion chromatograms of Taspoglutide after 24 hours of incubation with DPP-4, using both algorithms (Blue peak: represents the parent peptide compound, green peaks: first generation of metabolites, and brown peaks: second generation or higher). The peptide GLP-1 (3297.68 Da) exhibits a brief half-life, primarily attributed to its swift degradation by proteases DPP-4 and NEP. Metabolite identification of GLP-1, incubated with DPP-4, revealed the presence of three metabolites: M1-137, M2-394, and M3-208, with respective retention times of 6.53, 6.58, and 6.66 minutes. Notably, M3-208 exhibits the common cleavage site reported in bibliography [ 22 ] and attributed to DPP-4, occurring between Ala(8) and Glu(9). A discernible distinction between the two algorithms lies in the appearance of false positives, as shown in Fig 6 , with a notable increase observed when employing the MiM settings. Download figure Open in new tab Fig 6. False positives of the GLP-1 compound using both the MaM and MiM algorithms. It is noteworthy that the number obtained with the MiM algorithm is significantly higher Semaglutide, a GLP-1 analogue, underwent data collection using the HDMS E acquisition mode on a Waters® QToF instrument. Structural assignments for two degradation products with both algorithms MiM and MaM, namely M1-3446 and M2-3418, have been achieved with high mass accuracy, featuring retention times of 2.77 and 3.07 minutes, respectively ( Fig 7 ). Consistent with prior bibliography, these metabolites arise from three distinct metabolic modifications, specifically induced by amide hydrolysis and sequential beta-oxidation in the fatty acid part [ 23 ]. Download figure Open in new tab Fig 7. Metabolites identified and extracted ion chromatogram of Semaglutide using MiM algorithm. Dataset-3 (comprising somatostatin and seven synthetic analogs incubated with human serum) allows the analysis with different acquisition modes in order to illustrate that the workflow for metabolite identification employing data coming from distinct structural mass spectrometry techniques as DIA and DDA. DDA data was collected with Thermo Scientific Q-Exactive Hybrid Quadrupole-Orbitrap Mass Spectrometer (Q-Exactive) instrument employing full scan mode and DIA HDMS E data were acquired using a Vion IMS QTof Mass Spectrometer. Both data was processed through Mass-MetaSite, and subsequently uploaded to WebMetabase for visualization via the Mass-MetaSite Batch Processor. DDA and DIA data underwent processing with both algorithms (MiM and MaM). The results obtained show no distinctions. The identified metabolites, score values, and various parameters such as the numbers of matches, mismatches, and metmatches remain consistent across both algorithms. Considering the minimal chemical or monomer modifications within the peptide structure of these compounds, no substantial shift in molecular size was observed in this dataset. The analysis of this dataset collected with DDA led to the identification of 17 metabolites for each of the algorithms. All the metabolites identified have been produced from amide hydrolysis reaction. The principal metabolite formations observed include the generation of -Ala (−71 Da) and -AlaGly (−128 Da) from the linear segment of the structure ( Fig 8 ). The incorporation of D-Trp at the eighth position showed an improved stability over the parent compound somatostatin, due to the differences in the appearance of metabolism as synthetic analogs avoid the ring opening observed between D-Trp(8) and Lys(9). This observation aligns with findings from previous bibliography, which highlighted that the introduction of Msa residues, coupled with the presence of D-Trp8, contributes to the augmentation of aromatic side-chains interactions in Somatostatin, providing a greater stability [ 12 ]. Download figure Open in new tab Fig 8. Somatostatin (Parent compound) and major metabolites identified using both algorithms. Metabolites M5-128 and M6-71 indicate cleavages from the tail portion of somatostatin. Additionally, M3+18 represents a ring-opening product occurring between DTrp(8) and Lys(9). Similarly, for DIA data, the identification of key metabolites, specifically -Ala and - AlaGly, is consistent. As previously documented in bibliography [ 9 ], the analog labeled as 95 demonstrates superior stability, characterized by delayed and reduced metabolic transformations compared to other analogs. This stability is further elucidated in Fig 9 , which delineates the time/response profiles of the substrate, illustrating the gradual disappearance of the peptide. Download figure Open in new tab Fig 9. Substrate profiles employing In-In scaling for somatostatin, Analogue 31, 65, and 95. Dataset-5 contains 16 linear and 12 cyclic peptides, incubated with cathepsin G, neutrophil elastase, trypsin and MMP-12. The data was collected using LC-HRMS, with analysis performed on a Synapt G2® high-definition quadrupole time-of-flight mass spectrometer (Waters®), operating in positive electrospray ionization mode. The data processing time, employing the settings outlined in the referenced research [ 13 ] study and utilizing the non-expanded structure visualization, has undergone a substantial reduction. As an illustration, the compound salmon calcitonin, which conventionally needed two hours for processing, now, requires only 25 minutes with the implementation of the new methodology. As an illustrative example of this dataset, the following compound and its analogs will be described, while the metabolite identification for the other compounds can be found in the Supplementary Information. Specifically, the dataset includes somatostatin, and analogs that have been synthesized over the past few decades introducing modifications such as exchange and deletion of amino acids, ring size reduction, or disulfide bridge modification, among others.[ 12 ] These analogs, namely octreotide, lanreotide, and vapreotide, are octapeptides characterized by a shorter and consequently less flexible ring structure compared to somatostatin. Previous bibliography reports that the ring opening from somatostatin and its analogs was only observed in the case of somatostatin, as also observed in this study [ 13 ]. Despite somatostatin being rapidly degraded by proteases, its analogs exhibit stability, as illustrated in Fig 10 , which presents extracted ion chromatograms after 60 minutes of incubation with neutrophil elastase. The processing time for these compounds was 15 minutes. Download figure Open in new tab Fig 10. Extracted ion chromatograms using MaM algorithms. A) somatostatin, B) lanreotide, C) octreotide, and D) vapeotride after 60 minutes of incubation with neutrophil elastase. Fig11 presents a detailed metabolite identification of somatostatin incubated with neutrophil elastase. The analysis identified the same metabolites as reported in the previously bibliography [ 13 ]: M1-1371, M2-1204, M3-230, M4+18, M5-909, M6+18, and M7-661, with respective retention times (RT) of 0.73, 1.60, 1.60, 1.71, 1.93, 1.93, and 2.21. Dataset-6 contains data of human insulin (5808 Da), a cyclic peptide with three disulfide bridges, after the incubation with IDE at 2 minutes. Computing using the MaM algorithm led to the identification of 12 metabolites, designated as M1-2965, M2-3315, M3-3145, M4-2973, M5-2902, M6-3452, M7-3151, M8-3032, M9-2961, M10-3289, M11-2869, and M12-2798, with respective retention times of 2.06, 7.78, 8.08, 9.14, 9.65, 9.80, 10.38, 11.17, 11.43, 11.69, and 12.39 minutes ( Fig 11 ). These metabolites have been previously documented in the bibliography and are generated through two cleavages, one within Chain A and the other within Chain B. Notably, four of them have been reported previously as major IDE-degraded insulin fragments ( Fig 12 ) [ 13 ]. The formation of these metabolites results from cleavage occurring either within the A chain, specifically at positions A13-14 or A14-15, and in the middle of the B chain, either at positions B9-10 or B14-15. Download figure Open in new tab Fig 11. Summarized MetID reports which each retention time (RT) from incubation of somatostatin with neutrophil elastase. Download figure Open in new tab Fig 12. Extracted ion chromatograms of Insulin after 2 min of incubation with IDE. Blue peak: substrate/parent peptide, green peaks: first generation metabolites, brown peaks: second generation or higher metabolites. In contrast, MiM identified 8 metabolites, M1-3306, M2-2971, M3-3450, M4-3150, M5-2959, M6-3287, M7-2867, and M8-2618, with respective retention times of 7.74, 9.14, 9.65, 9.78, 11.15, 11.43, 11.67, and 15.65 ( Fig 13 ). Notably, two of the major previously bibliography-reported products are absent [ 13 ]. Moreover, consistent with previous observations, there is a significant difference in score values between the two algorithms, with MaM. scores consistently higher due to the higher number of matches and no presence of mismatches Table 8 . Download figure Open in new tab Fig 13. Four of major products corresponding to Insulin fragments, using MaM algorithm, after incubutation with IDE. These metabolites, resulting from two distinct cleavages—one within Chain A and the other within Chain B—have been previously identified in the bibliography. View this table: View inline View popup Download powerpoint Table 8: Retention times of the identified Insulin metabolites along with their corresponding values for score, matches, mismatches, and metmatches obtained using both algorithms. NI = Non-identified metabolites Structure visualization – Atoms/bonds vs monomer The analysis of biotransformation products for therapeutic oligonucleotides using LC-HRMS presents a significant challenge, primarily attributed to the high molecular weight of these compounds. Given that these oligonucleotides consist of multiple monomers susceptible to metabolic reactions, constructing a virtual set containing all potential metabolites becomes a resource-intensive task in terms of time and computational requirements. Furthermore, the extensive number of cleavable bonds amplifies the complexity of the fragmentation analysis, demanding additional time and computing resources. This study shows the fragmentation algorithm that allows the analysis at monomer levels (non-expanded) and the other at the atom/bond levels (expanded). In this section, three experiments involving the incubation of ASOs in Human Liver at various timepoints are presented, comprising two sets incubated with distinct oligonucleotide strains (dataset-4). The data was acquired in a DDA mode in a Thermo Q-Exactive® spectrometer. A total of 11 metabolites have been identified in both experiments (expanded and non-expanded) using the MaM algorithm, M1-5473, M2-5473, M3-3282, M4-2567, M5-930, M6-617, M7-616, M8-313, M9-312, M10-304, M11-304, with respective retention times of 7.17, 8.62, 14.42, 16.36, 17.11, 17.59, 17.84, 17.90, 17.92, 17.95, and 18.26 ( Fig 14 ). The identified structures of the metabolites can be attributed to specific biotransformation reactions, including o-dealkylation, phosphoester hydrolysis, aromatic deamination, and nucleobase loss. Download figure Open in new tab Fig 14. Extracted ion chromatograms of ASOs after 72 hours of incubation with the modified strain. In contrast, using the MiM algorithm, a total of 7 metabolites have been identified (using non-expanded visualization), M1-5470, M2-5470, M3-2566, M4-617, M5-313, M6-313, and M7-304, with respective retention times of 7.17, 8.62, 16.36, 17.55, 17.89, 17.93, and 18.24. Table 9 illustrates the score value differences between the two algorithms. View this table: View inline View popup Download powerpoint Table 9: Retention times of the identified ASO metabolites along with their corresponding values for score, matches, mismatches, and metmatches obtained using both algorithms. NI = Non-identified metabolites In Fig 15 , the two distinct structure visualizations are presented for the same identified metabolite, showcasing a nucleobase loss from the parent compound and two phosphoester hydrolyses. The depiction at the bond level provides a clearer understanding of the biotransformation pathways and chemical alterations experienced by the compound. It is noteworthy to consider the processing time, which, in this specific example, is 40 minutes for the non-expanded representation and extends to 70 minutes when three of the monomers are expanded. Download figure Open in new tab Fig 15. Illustration of nucleobase loss in both expanded and non-expanded structural representations of ASO. This visualization algorithm allows to combine monomer and atom/bond notation, being then easily to see the metabolic changes in the structure. As a result, the need to expand all monomers individually is avoided, alleviating the associated high processing time. The constraint structure alignment between the substrate and the metabolite, maintaining the same orientation, allows for the interpretation of the occurred biotransformations. Conclusions A new automated workflow for LC-HRMS data analysis has been described and developed, addressing challenges associated with result visualization and computational time in processing incubated data of macromolecules. This approach has effectively proved the analysis of both linear and cyclic peptides containing natural or unnatural amino acids. A total of 970 metabolites have been identified across different incubation conditions and peak detection algorithms. Furthermore, its applicability extends beyond peptides, as demonstrated by successful processing of oligonucleotide data. The results have shown that the workflow can efficiently manage experimental data within a molecular range spanning 700 to 7630 Da. Importantly, its effectiveness has been validated across multiple acquisition modes, as data coming from different acquisition modes (DDA and DIA) has been processed. WebMetabase was employed for the processing and visualization of data derived from six databases using different algorithms in the data preprocessing step. Two different algorithms have been used for peak detection, MiM and MaM. Particularly in larger molecules, the MaM algorithm demonstrated higher scores and a greater number of matches, instilling greater confidence in the accurate prediction of metabolite structures. Furthermore, the study highlighted one of the datasets, elucidating the two available visualization options for representing macromolecules during data analysis. This visualization algorithm allows the combination of monomer and atom/bond notation, facilitating a clear depiction of metabolic changes in the molecular structure. Supporting information S1 File. Metabolite identification reports exported from WebMetabase for each compound incubated in each protease using both algorithms. S2 File. Dataset-1 and Dataset-2 MaM Settings. S3 File. Dataset-1 and Dataset-2 MiM Settings. S4 File. Dataset-2 Semaglutide MaM Settings. S5 File. Dataset-2 Semaglutide MiM Settings. S6 File. Dataset-3 MaM Settings DDA. S7 File. Dataset-3 MiM Settings DDA. S8 File. Dataset-3 MaM Settings DIA. S9 File. Dataset-3 MiM Settings DIA. S10 File. Dataset-4 MaM Settings S11 File. Dataset-4 MiM Settings S12 File. Dataset-5 MaM Settings S13 File. Dataset-6 MaM Settings S14 File. Dataset-6 MiM Settings Acknowledgments This work was supported by the Generalitat de Catalunya and Lead Molecular Design S.L through the Industrial Doctorate grant 00002/2023. References 1. ↵ Wu Y , Pan L , Chen Z , Zheng Y , Diao X , Zhong D . Metabolite Identification in the Preclinical and Clinical Phase of Drug Development . Curr Drug Metab . 2021 ; 22 ( 11 ): 838 – 57 . doi: 10.2174/1389200222666211006104502 . OpenUrl CrossRef PubMed 2. ↵ Wishart DS . Emerging Applications of Metabolomics in Drug Discovery and Precision Medicine . Nat Rev Drug Discov . 2016 Jun 30 ; 473 – 84 . doi: 10.1038/nrd.2016.32 . OpenUrl CrossRef PubMed 3. ↵ Pang H , Hu , Z. Metabolomics in Drug Research and Development: The Recent Advances in Technologies and Applications . Acta Pharm Sin B . 2023 Aug 1 ; 3238 – 51 . doi: 10.1016/j.apsb.2023.05.021 . OpenUrl CrossRef 4. ↵ Misra BB . New Software Tools, Databases, and Resources in Metabolomics: Updates from 2020 . Metabolomics . 2021 May 1 . doi: 10.1007/s11306-021-01796-1 . OpenUrl CrossRef PubMed 5. ↵ Xu C , Ma B . Software for computational peptide identification from MS–MS data . Drug Discov Today . 2006 ; 11 ( 13–14 ): 595 – 600 . doi: 10.1016/j.drudis.2006.05.011 PMID: 16793527 . OpenUrl CrossRef PubMed 6. ↵ Mass Analytica . WebMetabase . Feb 2023. 7. ↵ Mass Analytica . MassMetaSite . Feb 2023. 8. ↵ Radchenko T , Brink A , Siegrist Y , Kochansky C , Bateman A , Fontaine F , Morettoni , L. ; Zamora I . Software-Aided Approach to Investigate Peptide Structure and Metabolic Susceptibility of Amide Bonds in Peptide Drugs Based on High Resolution Mass Spectrometry . PLoS One . 2017 ; 12 ( 11 ). doi: 10.1371/journal.pone.0186461 . OpenUrl CrossRef 9. ↵ Radchenko T , Escolà A , Riera A , Valeri A , Zamora I . Software assisted analysis for peptide drug metabolism. Unpublished manuscript in: Radchenko T. New advances in metabolism prediction: Biotransformation of peptides and its implications in drug discovery [PhD thesis]. Barcelona : Universitat Pompeu Fabra ; 2018 . Available from: https://www.tdx.cat/handle/10803/665008 . 10. Waters Corporation . Metabolite Identification of Complex Cyclic Peptides Using WebMetabase, Ion Mobility-Enabled DIA, and Product Ion Confirmation. [Internet] . Waters Corporation; 2020 [cited 2024 Jan 30]. Available from: https://www.waters.com/nextgen/fr/fr/library/application-notes/2020/metabolite-identification-of-complex-cyclic-peptides-using-webmetabase,-ion-mobility-enabled-dia,-and-product-ion-confirmation.html?srsltid=AfmBOoq-dBDE8QYBzfq8UZqcrUChEe3jzflBm6tYPCgbrJtpgyki24sR 11. ↵ Basiri B , Xie F , Wu B , Humphreys SC , Lade JM , Thayer MB , Yamaguchi P , Florio M , Rock B . Introducing an in vitro liver stability assay capable of predicting the in vivo pharmacodynamic efficacy of siRNAs for IVIVC . Mol Ther Nucleic Acids . 2020 ; 21 : 725 – 36 . OpenUrl CrossRef PubMed 12. ↵ Martín-Gago P , Aragón E , Gomez-Caminals M , Fernández-Carneado J , Ramón R. , Martin-Malpartida P , Verdaguer X , López-Ruiz P , Colás B , Cortes M. A , Ponsati B , Macias MJ , Riera A . Insights into Structure-Activity Relationships of Somatostatin Analogs Containing Mesitylalanine . Molecules . 2013 ; 18 ( 12 ): 14564 – 84 . doi: 10.3390/molecules181214564 . OpenUrl CrossRef PubMed 13. ↵ Wesche F , De Maria L , Leek T , Narjes F , Bird J , Su W , Czechtizky W . Automated High-Throughput in Vitro Assays to Identify Metabolic Hotspots and Protease Stability of Structurally Diverse, Pharmacologically Active Peptides for Inhalation . J Pharm Biomed Anal . 2022 ; 211 . doi: 10.1016/j.jpba.2021.114518 . OpenUrl CrossRef 14. ↵ Bonn B , Leandersson C , Fontaine F , Zamora I . Enhanced metabolite identification with MS(E) and a semi-automated software for structural elucidation . Rapid Commun Mass Spectrom . 2010 ; 24 ( 21 ): 3127 – 3138 . doi: 10.1002/rcm.4753 PMID: 20941759 . OpenUrl CrossRef PubMed Web of Science 15. ↵ Cece-Esencan EN , Fontaine F , Plasencia G , Teppner M , Brink A , Pahler A , et al. Software-aided cytochrome P450 reaction phenotyping and kinetic analysis in early drug discovery . Rapid Commun Mass Spectrom . 2016 ; 30 ( 2 ): 301 – 310 . doi: 10.1002/rcm.7429 PMID: 26689160 . OpenUrl CrossRef PubMed 16. ↵ Zelesky V , Schneider R , Janiszewski J , Zamora I , Ferguson J , Troutman M . Software automation tools for increased throughput metabolic soft-spot identification in early drug discovery . Bioanalysis . 2013 May ; 5 ( 10 ): 1165 – 79 . doi: 10.4155/bio.13.89 PMID: 23721441 . OpenUrl CrossRef PubMed 17. ↵ Soares R , Franco C , Pires E , Ventosa M , Palhinhas R , Koci K , Martinho de Almeida A , Varela Coelho A . Mass spectrometry and animal science: protein identification strategies and particularities of farm animal species . J Proteomics . 2012 Jul 19 ; 75 ( 14 ): 4190 – 206 . doi: 10.1016/j.jprot.2012.04.009 PMID: 22543184 . OpenUrl CrossRef PubMed 18. ↵ Radchenko T , Kochansky CJ , Cancilla M , Wrona MD , Mortishire-Smith RJ , Kirk J , Murray G , Fontaine F , Zamora I . Metabolite identification using an ion mobility enhanced data-independent acquisition strategy and automated data processing . Rapid Commun Mass Spectrom . 2020 Jun 30 ; 34 ( 12 ): e8792 . doi: 10.1002/rcm.8792 PMID: 32208529 . OpenUrl CrossRef PubMed 19. ↵ Manolopoulou M , Guo Q , Malito E , Schilling AB , Tang , WJ . Molecular Basis of Catalytic Chamber-Assisted Unfolding and Cleavage of Human Insulin by Human Insulin-Degrading Enzyme . J Biol Chem . 2009 ; 284 ( 21 ): 14177 – 88 . doi: 10.1074/jbc.M900068200 . OpenUrl Abstract / FREE Full Text 20. ↵ Mass Analytica . Available from: https://mass-analytica.com/products/webchembase/chromatography-quality-and-multiple-signal-detection/ [cited 2024 Feb 26]. 21. ↵ Wahab M , Patel D , Armstrong D . Peak shapes and their measurements: the need and the concept behind total peak shape analysis . LC GC North Am . 2017 Dec ; 12 : 846 – 53 . OpenUrl 22. ↵ Manandhar B , Ahn JM . Glucagon-like Peptide-1 (GLP-1) Analogs: Recent Advances, New Possibilities, and Therapeutic Implications . J Med Chem . 2015 ; 58 ( 3 ): 1020 – 37 . doi: 10.1021/jm500810s . OpenUrl CrossRef PubMed 23. ↵ Jensen L , Helleberg H , Roffel , A , van Lier J.J , Bjørnsdottir I , Pedersen PJ , Rowe E , Derving Karsbøl J , Pedersen ML. Absorption , Metabolism and Excretion of the GLP-1 Analogue Semaglutide in Humans and Nonclinical Species . Eur J Pharm Sci . 2017 ; 104 : 31 – 41 . doi: 10.1016/j.ejps.2017.03.020 .. OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted May 07, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following An automated software-assisted approach for exploring metabolic susceptibility and degradation products in macromolecules using High-Resolution Mass Spectrometry Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share An automated software-assisted approach for exploring metabolic susceptibility and degradation products in macromolecules using High-Resolution Mass Spectrometry Paula Cifuentes , Ismael Zamora , Tatiana Radchenko , Fabien Fontaine , Albert Garriga , Luca Morettoni , Jesper Kammersgaard Christensen , Hans Helleberg , Bridget A. Becker bioRxiv 2025.05.01.651614; doi: https://doi.org/10.1101/2025.05.01.651614 Share This Article: Copy Citation Tools An automated software-assisted approach for exploring metabolic susceptibility and degradation products in macromolecules using High-Resolution Mass Spectrometry Paula Cifuentes , Ismael Zamora , Tatiana Radchenko , Fabien Fontaine , Albert Garriga , Luca Morettoni , Jesper Kammersgaard Christensen , Hans Helleberg , Bridget A. Becker bioRxiv 2025.05.01.651614; doi: https://doi.org/10.1101/2025.05.01.651614 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41913) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13372) Ecology (19889) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15599) Genomics (22483) Immunology (17728) Microbiology (40365) Molecular Biology (17163) Neuroscience (88540) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15130) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9818) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-23T02:00:01.238055+00:00

License: CC-BY-4.0