Full text
86,254 characters
· extracted from
preprint-html
· click to expand
Clonal Evolution of Pediatric Acute Myeloid Leukemia and Its Contribution to Disease Relapse | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Clonal Evolution of Pediatric Acute Myeloid Leukemia and Its Contribution to Disease Relapse View ORCID Profile Masayuki Umeda , View ORCID Profile Pandurang Kolekar , View ORCID Profile Anna LW Huskey , View ORCID Profile Shruthi Suryaprakash , View ORCID Profile Jing Ma , View ORCID Profile Hanxia Li , View ORCID Profile Tamara Westover , Michael P. Walsh , Guangchun Song , Yanling Liu , View ORCID Profile Quang Tran , View ORCID Profile Vidya Balagopal , View ORCID Profile Lu Wang , View ORCID Profile Samuel W. Brady , View ORCID Profile Tanja A Gruber , View ORCID Profile Stan Pounds , Paul E. Mead , View ORCID Profile Seth E. Karol , View ORCID Profile Hiroto Inaba , View ORCID Profile Jeffrey E. Rubnitz , View ORCID Profile Raul Ribeiro , View ORCID Profile Xiaotu Ma , View ORCID Profile Jeffery M. Klco doi: https://doi.org/10.1101/2025.11.04.25339472 Masayuki Umeda 1 Department of Pathology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Masayuki Umeda Pandurang Kolekar 2 Department of Computational Biology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Pandurang Kolekar Anna LW Huskey 1 Department of Pathology, St. Jude Children’s Research Hospital , TN 3 Department of Pharmacology, DeBusk College of Osteopathic Medicine, Lincoln Memorial University , Harrogate, TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Anna LW Huskey Shruthi Suryaprakash 4 Department of Oncology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Shruthi Suryaprakash Jing Ma 1 Department of Pathology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jing Ma Hanxia Li 2 Department of Computational Biology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Hanxia Li Tamara Westover 1 Department of Pathology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tamara Westover Michael P. Walsh 1 Department of Pathology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site Guangchun Song 1 Department of Pathology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yanling Liu 2 Department of Computational Biology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site Quang Tran 2 Department of Computational Biology, St. Jude Children’s Research Hospital , TN 5 Department of Bioinformatics and Computational Biology, The University of Texas MD Anderson Cancer Center , Houston, TX Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Quang Tran Vidya Balagopal 1 Department of Pathology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Vidya Balagopal Lu Wang 1 Department of Pathology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Lu Wang Samuel W. Brady 2 Department of Computational Biology, St. Jude Children’s Research Hospital , TN 6 Department of Pharmacy and Pharmaceutical Sciences, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Samuel W. Brady Tanja A Gruber 7 Department of Pediatrics, Stanford University School of Medicine , Stanford, CA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tanja A Gruber Stan Pounds 8 Department of Biostatistics, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Stan Pounds Paul E. Mead 1 Department of Pathology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site Seth E. Karol 4 Department of Oncology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Seth E. Karol Hiroto Inaba 4 Department of Oncology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Hiroto Inaba Jeffrey E. Rubnitz 4 Department of Oncology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jeffrey E. Rubnitz Raul Ribeiro 4 Department of Oncology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Raul Ribeiro Xiaotu Ma 2 Department of Computational Biology, St. Jude Children’s Research Hospital , TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xiaotu Ma For correspondence: Jeffery.Klco{at}STJUDE.ORG Xiaotu.Ma{at}STJUDE.ORG Jeffery M. Klco 1 Department of Pathology, St. Jude Children’s Research Hospital , TN 9 Center of Excellence for Leukemia Studies (CELS), St. Jude Children’s Research Hospital , Memphis, TN Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jeffery M. Klco For correspondence: Jeffery.Klco{at}STJUDE.ORG Xiaotu.Ma{at}STJUDE.ORG Abstract Full Text Info/History Metrics Supplementary material Preview PDF ABSTRACT Relapse remains the leading cause of mortality in pediatric acute myeloid leukemia (AML), yet the genetic changes contributing to relapse remain incompletely defined. To address this gap, we performed whole-genome sequencing and targeted-capture sequencing on 39 diagnosis–relapse and 2 relapse–relapse pairs of pediatric AML. Mutational burden increased at relapse, largely reflecting spontaneous mutagenesis, whereas therapy-related signatures were rarely observed and only occasionally associated with pathogenic mutations. Although recurrently enriched mutations at relapse included those in FLT3 , WT1 , and TP53 , relapse-fated subclones were frequently marked only by non-pathogenic or non-coding variants. Longitudinal deep sequencing in eight patients showed rapid depletion of major clones after induction therapy, whereas subclones often displayed variable chemosensitivity. Relapse-specific mutations emerged only late or remained undetectable during remission, suggesting that clonal selection of pre-existing clones is the predominant mechanism of relapse. Transcriptome analysis of paired RNA sequencing data revealed no differentially expressed genes, but gene set enrichment analysis and CIBERSORT deconvolution in each pair uncovered heterogeneous trajectories to relapse. Although relapse is often attributed to the emergence of stem-like phenotypes, our data demonstrate that transcriptional evolution is more diverse: some cases acquired stem-like features, whereas others showed partial differentiation, which was confirmed by re-analysis of a public single cell RNA sequence dataset. These changes were largely constrained by baseline differentiation states at diagnosis. Together, our data indicate that pediatric AML relapse arises through selection of pre-existing clones with diverse trajectories, underscoring the need to target both stem-like and differentiated populations to achieve durable cures. INTRODUCTION Acute myeloid leukemia (AML) makes up approximately 20% of acute pediatric leukemias and has a long-term survival rate of approximately 70% 1 , which only marginally increased over the past few decades, largely due to improved supportive care and the intensification of treatments. 2 Although many patients achieve remission during induction therapies, the major cause of mortality is attributed to relapse, which often shows aggressive and chemo-resistant phenotypes. Thus, understanding the biology behind disease relapse will be a fundamental step toward improving clinical outcomes of pediatric AML. Relapses of acute leukemia can be attributed to multiple mechanisms. We have shown that initiating driver alterations are a major determinant of clinical outcomes of pediatric AML, with relapsed pediatric AML cases enriching for specific driver alterations such as UBTF tandem duplications or NUP98 rearrangements, which show primary resistance or high measurable/minimal residual disease (MRD) positivity. 3 , 4 These findings indicate that driver alterations are intrinsically associated with chemoresistance and relapse, and targeting the functions of driver alterations can be an effective approach to eliminate all leukemic clones and cure the disease. Also, the enrichment of specific cooperating alterations at relapse, such as WT1 or FLT3 -ITD mutations, indicates that these somatic alterations render cells with chemo-resistance or growth advantage over cells without mutations. Similarly, a genomic study of longitudinal diagnosis, relapse, and primary resistant specimens from 48 adult and 25 pediatric patients with AML also highlighted gene alterations frequently found at relapse. 5 However, these alterations are often found clonally or sub-clonally at diagnosis, contrasting to the acquisition of chemotherapy-related somatic mutations during chemotherapy for B-ALL, such as NT5C2 and NR3C1 mutations with thiopurine treatment 6 , or limited cases of AML treated with FLT3 inhibitors 7 or menin inhibitors. 8 , 9 Despite the known genotoxicity of cytotoxic agents used in AML induction therapy, whether these drugs can promote gene mutations related to specific chemoresistance mechanisms leading to relapse is to be studied. Additionally, expression profiles resembling hematopoietic stem and progenitor cells (HSPC) at the bulk RNA expression level are associated with relapse 10 , and recent advances in single cell technologies showed the enrichment of stem-like populations at relapse 11 , 12 , indicating that stem-like populations are a major source of recurring diseases. Several somatic alterations at diagnosis, such as GATA2/MECOM or WT1 mutations, were associated with more primitive transcriptional states 9 , whereas how the acquisition of somatic alterations can affect the transcriptional states and contribute to the evolution of mutated clones in individual AML cases needs further investigation. In this study, we profiled 39 diagnosis-relapse (D-R) pairs and two relapse-relapse (R-R) pairs of pediatric AML using whole-genome sequencing (WGS) and target-capture sequencing (TCS), showing that the majority of pathogenic mutations are found at diagnosis, except for those in a subset of genes such as FLT3 , WT1 , or CREBBP. Comparison of genetic and transcriptional profiles at diagnosis and relapse showed the expansion of genetic clones at relapse associated with variable stem-like or differentiated phenotypes, whereas relapse-fated subclones were not always marked by pathogenic mutations. Longitudinal deep sequencing of eight cases showed that genetic clones rapidly decline after induction therapy at variable rates, indicating differential chemosensitivity, whereas the emergence of clones with new mutations was a late event, possibly contributing to clonal competition within residual diseases. METHODS Patient cohort Samples from patients with relapsed acute myeloid leukemia were obtained with written informed consent using a protocol approved by the institutional review board (IRB) (Supplementary Table 1). Samples for WGS (n=16), TCS (n=97), RNA sequencing (RNAseq, n=7), and single cell RNA sequencing (scRNAseq, n=2) were newly sequenced in this study, and the rest of the data were obtained from previous publications (Supplementary Tables 1-2). Mutational profiling with WGS and TCS We newly subjected DNA from 16 samples to WGS targeting 40X coverage, followed by mapping to the reference human genome assembly GRCh37-lite with bwa. 13 Combined with previously published data, all 41 pairs had WGS data with a median coverage of 49X, which underwent paired analyses with germline samples. Specifically, single-nucleotide variants (SNVs) and insertions/deletions (indels) were called by Bambino 14 , copy number variations (CNVs) by CONSERTING 15 , and structural variations (SVs) by CREST. 16 Detected coding variants were confirmed by manual inspection. Labels of the pathogenicity of mutations (pathogenic/likely pathogenic: P/LP) were assigned based on the literature and known function. A custom capture panel covering 1.3 Mb genomic regions, including 5,979 SNVs/InDels and 395 SVs identified in WGS data in this study, was used for resequencing. Libraries were prepared from 37 patients using TWIST Biosciences NGS Target Enrichment Library Preparations with enzymatic fragmentation according to the manufacturer’s protocol. The remaining four patients did not have sufficient material for additional sequencing. Pooled samples were sequenced using a Novaseq SP 300 cycle kit (paired-end, 150bp) targeting 500X coverages and mapped to the reference human genome assembly GRCh37-lite with bwa 13 , with a median coverage of 514X. SNVs were called using SequencErr 17 , and Indels and SVs were called using SVIndelGenotyper. 18 Alterations with supporting reads more than background levels were regarded as real ( P <0.05 by binomial tests). Remission samples for 8 cases were also sequenced using the same pipeline, except for sequencing targeting 5000x with a NovaSeq S1 200-cycle kit, with a median coverage of 4890X. Alterations with supporting reads more than background levels were regarded as real ( P <0.05 by binomial tests) as described previously 6 (see Methods). Mutations in relapse samples suspected to be contamination of normal bone marrow cells from allogenic hematopoietic stem cell transplant (allo-HSCT) graft were removed manually. Diagnosis or relapse-enriched alterations were defined as variants with significantly decreased or increased allele frequencies from diagnosis to relapse using WGS data (false discovery rate: FDR-adjusted P -value<0.05 with the Fisher’s exact test followed by the Benjamini-Hochberg adjustment). Relapse-specific mutations were defined as mutations undetectable at diagnosis with both WGS and TCS, whereas the possibility that mutated clones pre-existed at low frequencies below the detection limit cannot be excluded. RESULTS Study cohort of relapsed pediatric AML We established a cohort of 41 pediatric patients with relapsed AML from clinical trials sponsored by St. Jude Children’s Research Hospital 19 – 24 ( Fig.1A , Supplementary Table 1). The cohort represented major molecular subtypes of pediatric AML ( Fig.1B ), such as KMT2A -rearrangements ( KMT2A r: n=13, 31.7%) and NUP98 rearrangements ( NUP98 r, n=6, 14.6%), as well as NPM1 mutations (n=3, 7.4%), RUNX1::RUNX1T1 (n=2, 4.9%), or UBTF tandem duplications (TD) (n=2). Among 39 D-R pairs, the median age at diagnosis was 10.6 years (range: 0.7-20.2), and the days between diagnosis and relapse for D-R pairs were 335 days (175-1773). All diagnosis cases received cytotoxic chemotherapy based on anthracycline, cytarabine, and etoposide ( Fig.1C ), whereas treatment protocols, which included immunotherapy or molecularly targeted agents, varied according to their assigned clinical trials ( e.g., gemtuzumab ozogamicin: GO in AML02 20 or sorafenib in AML08 21 ). Twelve patients (29.3%) received allo-HSCT, and seven patients received NK-cell transfusion (17.1%) between two timepoints. Download figure Open in new tab Figure 1. Study cohort of diagnosis-relapse paired pediatric AML. A. Study design showing patient sources, numbers, and timing of sample collection of 41 patient pairs. B. Patient characteristics (age, left and molecular categories, right). C. Clinical characteristics of individual patients within the cohort, including treatment protocols, off-protocol treatment, and days to relapse. Lines of the box plots for patient age represent the 25% quantile, median, and 75% quantile. The upper whisker represents the higher value of maxima or 1.5 x interquartile range (IQR), and the lower whisker represents the lower value of minima or 1.5 x IQR. Abbreviations. WGS: whole-genome sequencing, TCS: target-capture sequencing, UBTF -TD: UBTF -tandem duplication, allo-HSCT: allogeneic hematopoietic stem cell transplant. Abbreviations of treatment protocols are shown within the figure. Genomic profiling of pediatric AML at diagnosis and relapse Paired WGS analysis of 39 D-R and two R-R paired samples identified 22,245 SNVs, 156 indels, 377 CNVs, and 395 SVs (Supplementary Tables 3-6). TCS from 37 patients validated 98.8% (7656/7752) of SNV/indel calls and 95.0% (477/502) SV calls at diagnosis and relapse (Supplementary Figure 1). Among 139 pathogenic or likely pathogenic (P/LP) co-occurring alterations ( Fig.2A ), WT1 (n=19) and NRAS (n=17) were most commonly mutated in the cohort, followed by FLT3 (internal tandem duplication-ITD: n=6, tyrosine kinase domain-TKD: n=7, juxtamembrane domain-JMD: n=3) and SETD2 (n=8). The total number of co-occurring P/LP mutations was generally small and did not significantly differ between time points, with median (range) P/LP mutations of 3 (0-9) at diagnosis and of 3 (0-6) at relapse ( P =0.48). Among them, 38 mutations in 22 genes were enriched (with significantly higher variant allele frequency: VAF) at relapse. FLT3 (n=6), WT1 (n=5), and TP53 (n=3) were most frequently observed ( Fig.2B ), suggesting that clones harboring these mutations may have a competitive advantage. Notably, 37 alterations, including FLT3 (n=6), WT1 (n=6), TP53 , CREBBP , NRAS , IKZF1 , and CBF alterations (n=2 each), were detected only at relapse, despite targeted-capture sequencing at 500x coverage at diagnosis, indicating that these mutations were either present in minor clones below the limit of detection (∼1.0% VAF) at diagnosis or were acquired during or after chemotherapy. Interestingly, clones with these mutations do not always expand at relapse, with WT1 and FLT3 alterations showing various patterns of depletion and enrichment ( Fig.2C ). In contrast, NRAS mutations at diagnosis were frequently decreased to undetectable levels at relapse (7/15: 46.7%), suggesting that clones with NRAS mutations are more sensitive to chemotherapy or less competitive during disease progression. These data suggest that AML cells acquire somatic mutations during disease progression, whereas clones with pathogenic somatic mutations frequently exist at diagnosis and selectively expand during disease progression. Somatic mutations acquired during disease progression The mutational burden (SNV/indel) showed a significant increase at relapse compared to diagnosis; median mutations per sample were 216 (range: 10-838) at diagnosis and 376 (71-1893) at relapse ( P =9.7x10 -6 , Wilcoxon signed-rank test, Fig.3A ). SV and CNV were also more frequently observed at relapse (median SV: 4 vs 8, P =6.2x10 -5 , median CNV: 3 vs 5, P =7.8x10 -3 ), although the detection of subclonal CNV (< 20%) can be underestimated by WGS. 25 The mutational burden at diagnosis had a strong correlation with age (Spearman’s coefficient: R =0.68, P =1.8x10 -6 , Fig.3B ), whereas one case with a TP53 mutation (SJAML016581) showed an outlier high mutation burden. Contrarily, mutations newly detected at relapse had only a weak, non-significant correlation ( R =0.18, P =0.28) with time to relapse, indicating that differences in treatment or tumor biology affected the mutagenesis. We then investigated the mutational processes underlying somatic mutations found at diagnosis (“ de novo mutations”) and those detected only at relapse (“relapse-specific mutations”) by analyzing their mutational signatures 26 ( Fig.3C , Supplementary Table 7). De novo mutations were typically enriched for Single Base Substitution (SBS) Signatures SBS1 and SBS5, associated with spontaneous mutation accumulation and consistent with the correlation between mutation number and age at diagnosis. An exception is the presence of SBS18 (reactive oxygen species-related) or SBS2/SBS100 (APOBEC-like) found in six cases. Relapse-specific mutations also exhibited predominantly SBS1/SBS5 signatures coupled with the enrichment of distinct treatment-related signatures in some cases. For example, SBS87 (thiopurine-associated) was found in a patient (SJAML001409) treated with 6-mercaptopurine (6-MP) for off-protocol therapy, and a ganciclovir-related signature was observed in two patients (SJAML016582, SJAML061532) who received ganciclovir for treatment or prophylaxis of cytomegalovirus infection. 27 Notably, samples with these treatment-associated signatures harbored P/LP mutations that can be explained by the underlying mutagenic mechanisms ( Fig.3D ); however, the probabilities were overall low, indicating that while therapy-related mutagenesis may cause pathogenic mutations in rare cases ( e.g. , CEBPA in SJAML016582), its overall contribution to relapse appears modest, and additional non-genetic mechanisms are likely involved. Download figure Open in new tab Figure 2. Genomic profiling of pediatric AML at diagnosis and relapse. A. Heatmap showing mutational profiles of pathogenic or likely pathogenic (P/LP) somatic alterations in 39 diagnosis-relapse pairs and two relapse-relapse pairs. Each pair is shown side-by-side. The colors of the heatmap show types of alterations, and arrows indicate enrichment or depletion at relapse. Patient annotations on the top show sex, time points, age, and time to relapse of each patient pair. B. Bar graphs showing numbers of significantly enriched or depleted mutations between two time points of 41 patient pairs (left: red-enrichment, gray-stable, and blue-depletion), and mutations detected at the two time points (right: dark red-relapse only, dark gray-both time points, dark blue-diagnosis only). C. Mutational patterns of FLT3 (left-top), WT1 (left-middle), and NRAS (left-bottom), and changes of variant allele frequencies between two time points. The colors of dots indicate types of mutations (left), and the colors of lines indicate patients with these mutations. Gray lines show mutations found in individual patients. Lines of the box plots represent the 25% quantile, median, and 75% quantile. The upper and lower whisker represents the higher value of maxima or 1.5 x IQR and the lower value of minima or 1.5 x IQR, respectively. Download figure Open in new tab Figure 3. Somatic mutations acquired during disease progression. A. Comparison of mutational burdens at diagnosis and relapse. P -value was calculated by the Wilcoxon signed-rank test. B. Scatter plots of age at diagnosis and mutational burden at diagnosis (left) and time to relapse (days) and newly detected mutations at relapse (right). Spearman’s rank correlation was used to assess the coefficient and its statistical significance. C. Bar plots showing the estimated contribution of known mutational signatures to somatic mutation found at diagnosis (left) and newly detected at relapse (middle). The colors of bars indicate the types of known mutational signatures. The right panel shows representative cases compared with known mutational signatures mainly contributing to the case (top-SJAML016582 and SBSA-ganciclovir, mid: SJAML001409 and SBS87 (thiopurine-associated), bottom: SJAML040277 and SBS5-clock-like). D. Contribution of mutational signatures to pathogenic somatic mutations found in patients with drug-associated mutational signatures. E. Odds ratio (dots), 95% confidence interval (lines), and statistical significance between drugs used for treatment and newly detected somatic mutations ( FLT3 and WT1 ) assessed by the Fisher’s exact test. Multiple testing adjustment is not applied due to the limited sample size for this explorative analysis. We also examined associations between recurrently enriched or acquired P/LP mutations (n=21) as resistance mechanisms for treatment, acknowledging the limited statistical power due to the small sample size for multiple testing adjustment. FLT3 mutations were associated with sorafenib treatment ( P =0.017), and three of four acquired FLT3- TKD mutations were found in sorafenib-treated patients ( P =7.1x10 -3 , Fig.3E ), consistent with the well-known association between sorafenib resistance and FLT3 -TKD mutations. 28 Additionally, we observed new FLT3 -ITDs in two of these patients ( P =0.018), although the newly observed ITD had similar length and insertion sites as the original, differing from FLT3 -ITDs associated with FLT3 inhibitor resistance. 29 As additional findings, WT1 mutations were enriched in two patients treated with vorinostat ( P =0.021), which might imply a relationship between WT1 mutations and vorinostat-induced downregulation of WT1 expression. 30 Clonal evolution during disease progression Given the scarcity of new pathogenic mutations at relapse, we investigated how existing clones contribute to relapse by comparing VAFs of somatic mutations at diagnosis and relapse in each case ( Fig.4A -B, Supplementary Table 8, Supplementary Figure 2). Clustering of mutations according to VAFs at diagnosis and relapse showed specific patterns in each patient. Initiating and category-defining alterations, such as RUNX1::RUNX1T1 or KMT2A r, were identified with high VAFs in both time points ( Fig.4A ). However, in the majority of cases (n=39, 95.1%), we observed that mutations in dominant clones or subclones (>10% VAF at diagnosis) were depleted to undetectable levels in the later time points ( Fig.4B ). Contrarily, subclones marked by somatic mutations with low VAFs at diagnosis became dominant in 36 cases (87.8%), indicating that leukemic cells were frequently depleted down to only a few genetic clones during treatment. Among 37 cases with sufficient tumor purity to infer clonality, relapses of 21 cases (56.8%) were inferred to have evolved from single genetic clones ( e.g ., SJAML060227 in Fig.4A ), whereas 16 cases (43.2%) were estimated to have a few genetic clones contributing to relapse, with each clone supported by at least two mutations. Relapses that evolved with multiple clones showed significantly shorter remission ( P =1.2x10 -3 ); multiple: median 266 days (85-370) and single: 547 days (175-1773). These subclones fated for relapse were recurrently marked by SETD2 (n=3), WT1 (n=2), or NRAS (n=2) mutations, but 27 cases (73%) had only non-pathogenic or non-coding mutations marking subclones at diagnosis. Twelve cases acquired P/LP mutations ( e.g. , WT1 or FLT3 -TKD, n=3 for each) at relapse, but the remaining 15 cases either had no P/LP cooperating mutations present at diagnosis or acquired at relapse ( e.g ., SJAML001444, Fig.4B ). These data align with experimental models postulating that genetic and non-genetic mechanisms likely confer a competitive advantage to certain clones 31 , enabling them to survive therapy and ultimately drive relapse. Serial sequencing reveals clonal selection patterns To gain more insights into how these clones are selected and expand during therapy, we applied deep sequencing (4890X median coverage) on remission samples from eight cases (Supplementary Table 9). Dominant mutations (VAF>40%), including driver alterations, were frequently reduced to 5% or lower after induction, except for one case (SJAML016582) with high VAF (>10%) of dominant mutations during treatment ( Fig.5A -H, Supplementary Figure 3). Clonal alterations were consistently detected with low VAF (0.1∼10%, two or more mutations in each sample) in the other six cases throughout the time course, indicating persistent measurable residual disease. Relapse-specific mutations in WGS and TCS data were detected only in the final remission samples before relapse in three cases, along with residual dominant mutations (55-217 days before relapse). For example, SJAML016582 showed multiple NRAS + subclones at diagnosis, which persisted to day 93, when a new NRAS mutation emerged ( Fig.5B -C). By relapse at day 310 after allo-HSCT, this clone expanded while another clone obtained WT1 mutations, both contributing to relapse ( Fig.5C ). In the remaining five samples, relapse-specific mutations were not consistently detected before clinical relapse, even with deep sequencing. These data suggest that the acquisition or outgrowth of these mutations is a late event during disease progression. Download figure Open in new tab Figure 4. Clonal evolution during disease progression. A. Clonal evolution patterns of three representative cases from the cohort (top: SJAML060277, bottom-left: SJAML001444, and bottom-right: SJAML003321). On 2D plots of variant allele frequencies, the colors of dots show the pathogenicity of somatic alterations, and the shapes of dots show enrichment at each time point. Undetected calls are shown away from the axes. On the schematics of clonal patterns, colored dots show mutations marking clones. B. Schematic (left) and statistical summaries (right) of somatic alterations. Inference of subclones required at least two supporting somatic alterations. The association with the clonality and time to relapse was assessed with the Wilcoxon rank-sum test. Lines of the box plots represent the 25% quantile, median, and 75% quantile. The upper and lower whisker represents the higher value of maxima or 1.5 x IQR and the lower value of minima or 1.5 x IQR, respectively. Download figure Open in new tab Figure 5. Serial sequencing reveals clonal selection patterns. A. Mutational patterns at diagnosis and relapse of SJAML016582 (left) and clinical courses and changes in VAFs along time points (right). Clusters were defined by hierarchical clustering (Ward method) using VAFs at diagnosis and relapse. B. Changes in VAFs of pathogenic mutations (left: NRAS and right: WT1 ). C. Inferred clonal structure from mutational patterns. D. Comparison of log10 fold changes of VAFs of mutations in each cluster at diagnosis and immediately after induction therapy. E. Flow cytometric profiling of HLA-DR and CD11b expression on blast population (CD45 + CD34 + CD117:KIT + ) from bone marrow aspirate. Gating strategies are shown in Supplementary Figure 3. F. Mutational patterns at diagnosis and relapse of SJAML001444 (left) and clinical courses and changes in VAFs along time points (right). G. Inferred clonal structure from mutational patterns. H. Comparison of log10 fold changes of VAFs of mutations in each cluster at diagnosis and immediately after induction therapy. For plots and fold changes of calls with no mutant read count, a VAF of 0.0005 (half of the expected limit of detection of deep sequencing with 5000x coverage) was used as a value. Statistical significance of the reduction of VAFs was assessed by the Wilcoxon rank-sum test, followed by adjustment by the Benjamini-Hochberg procedure. Lines of the box plots represent the 25% quantile, median, and 75% quantile. The upper and lower whisker represents the higher value of maxima or 1.5 x IQR and the lower value of minima or 1.5 x IQR, respectively. We also compared VAFs at diagnosis and immediately after induction therapy in four cases with somatic alterations detected after therapy to infer chemosensitivity of each genetic subclone. Compared with dominant mutations, subclonal mutations showed variable, cluster-specific reduction patterns in all cases (adjusted P -value<0.05, Fig.5D , Fig.5H , Supplementary Figure 3). Mutations depleted at relapse often showed greater reduction than dominant mutations, whereas subclones selected at relapse were relatively maintained. These less sensitive clusters did not necessarily harbor P/LP coding mutations. We also performed flow cytometric analysis of two representative cases. AML with NUP98 :: NSD1 (SJAML016582) showed the enrichment of an HLA-DR + population among CD117 + CD34 + blast population after chemotherapy, which was maintained at relapse, while reducing CD11b + population ( Fig. 5E ). AML with TP53 and PHF6 mutations (SJAML016581) showed transient enrichment of CD56 - population among blasts after chemotherapy, whereas populations at relapse were comparable to those at diagnosis (Supplementary Table 3). These data suggest that relapse often originates from subclones that survive chemotherapy, potentially through both genetic and non-genetic mechanisms, with cooperating alterations mainly contributing to their competitive advantage later during progression of the disease. Transcriptional evolution during disease progression We further investigated how clonal evolution contributes to disease phenotypes as reflected in expression profiles, given previous reports of the enrichment of primitive populations in relapsed pediatric AML. 11 , 32 By integrating expression data with a pediatric AML cohort from our previous study 4 , we performed UMAP and clustering analyses, which showed that most diagnosis–relapse pairs clustered closely according to their molecular categories, forming distinct transcriptional groups, such as those with HOXA or HOXB expression ( HOXA/B groups), CBF leukemias, and immature AML, as we have shown previously 4 (Supplementary Figure 4A, Supplementary Tables 10). Paired differentially expressed gene (DEG) analyses across the entire cohort revealed no significant DEGs ( Fig.6A -B, Supplementary Tables 11). Gene set enrichment analysis 33 (GSEA) between diagnosis–relapse pairs in the cohort identified recurrent positive enrichments such as E2F targets and T cell–related signatures ( Fig.6C ), whereas enrichment for hematopoietic or leukemia stem/progenitor cell (HSPC/LSPC) gene sets was inconsistent. We then applied GSEA to each diagnosis–relapse pair, which revealed patient-specific patterns of enrichment for gene sets related to stemness, differentiation, or translation, underscoring heterogeneous modes of relapse ( Fig.6E , Supplementary Figure 4B, Supplementary Table 12). We also applied CIBERSORT deconvolution 10 , 34 to bulk RNAseq, which showed that increases in stem cell populations were mostly consistent with GSEA results ( e.g., LSC47 35 , R =0.51, P =8.5x10 -4 , Fig.6F -G). However, the abundance of each cellular population was associated with the molecular groups of the sample ( Fig.6H ). Diagnosis–relapse changes of the LSPC scores were not observed in the entire cohort or each molecular group either (all FDR-adjusted P -value>0.05, Fig.6H , Supplementary Figure 4C), whereas the changes negatively correlated with the baseline LSPC scores at diagnosis (Spearman’s correlation: R =-0.46, P =3.4x10 -3 , Fig.6I ). The duration of remission was not correlated with baseline LSPC scores ( R =-0.13, P =0.41), but showed a weak correlation with changes in LSPC fractions: a trend that late relapses increased LSPC populations ( R =0.32, P =0.046). Cases with relapse-specific or enriched FLT3 or WT1 mutations showed increased LSPC scores ( Fig.6J ). These transcriptional data suggest at least two relapse trajectories: an early relapse driven by partial differentiation, and a late relapse that emerged from smoldering clones by acquiring stemness-related phenotypes. Download figure Open in new tab Figure 6. Transcriptional evolution during disease progression. A. Schematic showing comparison of diagnosis and relapse samples in this cohort. B. Volcano plot showing fold changes and log 10 P -values of differentially expressed gene (DEG) analysis by limma. After the false discovery rate (FDR) adjustment, no gene had FDR-adjusted P -values <0.05, and log10 P -values are shown instead. C. Normalized enrichment scores (NES) of top 5 and representative 5 gene sets enriched at diagnosis (dark blue) or relapse (dark red) with paired Gene Set Enrichment Analysis (GSEA) using pre-ranked gene sets according to t-statistics from limma. Gene sets associated with hematopoiesis or leukemia are highlighted in red. D. Schematic showing pair-wise comparison of diagnosis and relapse samples. E. Heatmap showing NES of gene sets related to stemness or hematopoietic differentiation. The colors represent NES values. Samples and gene sets were clustered using Euclidean distance and Ward clustering in the pheatmap function. F. Heatmap showing stemness and differentiation scores derived from CIBERSORT deconvolution. The colors represent relative scores normalized to sum to 1. G. Scatter plot of NES of LSC47 gene set and changes in LSPC scores in each patient pair. H. Boxplots showing changes of LSPC scores (left), GMP scores (middle), and differentiation scores (right) between diagnosis and relapse. Each sample was grouped according to molecular groups of driver alterations. I. Scatter plots of LSPC scores and changes in LSPC scores (left), LSPC scores at diagnosis and time to relapse (middle), and changes in LSPC scores and time to relapse (right). J. Comparison of changes in LSPC scores in cases with or without FLT3 (left) or WT1 (right) mutations enriched or newly detected at relapse. In F and H, Spearman’s rank correlation was used to assess the coefficient and its statistical significance. In G, the Kruskal-Wallis test and Dunn’s test were used for multiple non-parametric tests. In I, P -values were calculated by the Wilcoxon rank-sum test. Lines of the box plots represent the 25% quantile, median, and 75% quantile. The upper and lower whisker represents the higher value of maxima or 1.5 x IQR and the lower value of minima or 1.5 x IQR, respectively. Abbreviations. LSPC: leukemia stem and progenitor cell, GMP: granulocyte-monocyte progenitor, cDC: classic dendritic cell, CBF: core binding factor. Single cell profiling confirms the heterogeneity of relapse of pediatric AML To investigate cellular changes between diagnosis and relapse, we performed scRNAseq on D-R paired samples with a BCL11B alteration (SJAML045737). Leukemia clusters at diagnosis were characterized by LGALS1 and TYROBP expression, whereas leukemia clusters at relapse were characterized by JUN / FOS families (Supplementary Figure 5A), indicating distinct transcriptional profiles. Next, we projected each pair onto BoneMarrowMap 36 , a comprehensive and granular normal bone marrow reference dataset, to infer differentiation stages in detail. Whereas inferred pseudotime along differentiation showed significant but modest enrichment of immature cells at relapse (median: 5.10 at diagnosis and 5.05 at relapse, P =4.6x10 -3 , r =0.050), both diagnosis and relapse samples were enriched for progenitor populations, and the overall distribution was comparable (Supplementary Figure 5B). These data suggest that leukemia cells changed transcriptional status while maintaining cellular hierarchy during disease progression in this case. We then expanded our analysis to a public single-cell RNA sequencing dataset of 22 diagnosis-relapse paired pediatric AML 11 ( Fig.7A , Supplementary Table 13). Annotated cell types from the original publication showed that the progenitor population significantly increased, whereas the GMP population decreased at relapse ( Fig.7B , both FDR-adjusted P =0.031). The HSC and differentiated populations did not change significantly, and D-R pairs showed higher similarities of cellular components than with others. Inferred pseudotime on BoneMarrowMap showed that each molecular group changed differentiation status between diagnosis and relapse, with KMT2A r, RUNX1 :: RUNX1T1 , and FLT3 groups having shorter pseudotime at relapse (Supplementary Figure 5C), indicating increased stemness at relapse, consistent with previous findings. 11 , 32 However, patterns in each patient pair were not necessarily toward undifferentiated stages (differentiated: n=8, undifferentiated: n=14, P =0.19, r =0.23, Fig.7D ). Similar to our bulk dataset, changes in differentiation statuses (median pseudotime) showed a modest negative correlation to those at diagnosis but not with statistical significance ( R =-0.41, P =0.053), which require validation with larger datasets. Inferred cell types of each D-R pair were similar, with differences among patients being greater than changes between diagnosis and relapse ( Fig.7E -F). Download figure Open in new tab Figure 7. Single cell profiling confirms the heterogeneity of relapse of pediatric AML. A. Analytic approaches for single cell RNA sequencing (scRNAseq) data of pediatric AML from a publication. 11 B. Comparison of individual cell types annotated in the original publication (top) and pairwise comparison of annotated cell proportion of D-R pairs with non-paired sample correlation (bottom). C. Pseudotime on the BoneMarrowMap (left) and projection of scRNAseq data of representative two cases and comparison of pseudotime (right). D. Comparison of median pseudotime between diagnosis and relapse (top) and correlation of changes in pseudotime and median pseudotime at diagnosis (bottom). E. Relative proportion of annotated cell types by BoneMarrowMap. F. Pairwise comparisons of annotated cell proportion of D-R pairs with non-paired sample correlation within the entire cohort (top) and within each molecular group (bottom). G. Schematic showing patterns of relapse of pediatric AML. Lines of the box plots represent the 25% quantile, median, and 75% quantile. The upper and lower whisker represents the higher value of maxima or 1.5 x IQR and the lower value of minima or 1.5 x IQR, respectively. Abbreviations. HSC: hematopoietic stem cell, MPP: multipotent progenitor cell, LMPP: lympho-myeloid primed progenitor, pDC: plasmacytoid dendritic cell, ASDC: AXL + SIGLEC6 + dendritic cell, MEP: megakaryo-erythroid progenitor, B/CFU−E: Burst/Colony forming unit, MLP: multi-lymphoid progenitors, CLP: common lymphoid progenitor. Collectively, these findings show that relapse of pediatric AML occurs through various patterns ( Fig.7G ), represented by those characterized by increased stemness from differentiated populations and others showing differentiation from immature cells, both of which may be driven by both genetic and non-genetic mechanisms. DISCUSSION Relapse remains a major barrier to improving outcomes in pediatric AML. Although initiating and driver alterations at diagnosis are associated with survival, the genome-wide changes between diagnosis and relapse are poorly characterized. To fill the gap, we performed high-resolution mutational profiling of 39 diagnosis–relapse pairs and two relapse-relapse pairs using WGS, complemented by TCS to map clonal architecture. Despite frequent mutations in WT1 and SETD2 in this relapsed disease cohort, these alterations were often detectable in either major or minor subclones at diagnosis. Comparative profiling between diagnosis and relapse revealed a significant increase in mutational burden at relapse, whereas most of these newly acquired mutations were non-pathogenic or located in non-coding regions, with notable exceptions such as a subset of FLT3 , WT1 , and TP53 mutations. Drug-mutation association analyses confirmed a well-established mechanism of sorafenib resistance mediated by FLT3 -TKD mutations, whereas the other associations ( FLT3 -ITD-sorafenib and WT1- vorinostat) require validation in a larger cohort due to the limitation of statistical power of this study. Given these mutational patterns, we next examined the mutational processes underlying both mutations at diagnosis and those at relapse by mutational signature profiling. These analyses showed that the majority of the newly acquired somatic mutations were attributable to spontaneous mutagenesis (SBS1/SBS5). Whereas some cases treated with 6-MP or ganciclovir had mutational signatures associated with treatment, these rarely contributed to the acquisition of pathogenic mutations. Given that 6-MP is not routinely used in standard AML therapy and ganciclovir is being replaced by valganciclovir or letermovir for cytomegalovirus prevention and treatment, our data suggest that therapy-related mutagenesis may not be a primary driver of pediatric AML progression. This pattern contrasts with pediatric ALL, where acquisition of mutations linked to specific protocol-defined drugs is commonly observed during therapy. 5 Further comparisons of mutational profiles in each patient revealed a common pattern; major clones at diagnosis were depleted, while minor subclones marked by somatic alterations expanded toward relapse. These alterations were often non-pathogenic or non-coding mutations. Longitudinal deep-targeted sequencing of remission samples in eight patients further showed that genetic subclones differ in their sensitivity to induction therapy based on anthracyclines and cytarabine and often persist throughout the clinical course. Expansion of clones with newly acquired mutations was observed only at later stages. These observations suggest that relapse frequently arises through the non-deterministic survival of subclones, rather than through the persistence of a single population defined by specific pathogenic mutations. As genetic clearance 37 or detection of MRD after induction therapies 38 are strong predictors of relapse, following clonal driver alterations by deep sequencing may aid in prompt intervention before clinical relapse. As a non-genetic mechanism, enrichment of stem-like populations at relapse has been proposed as a common feature in major subtypes of pediatric AML 11 , and our data confirmed stem-like expression patterns in a subset of relapsed cases. In contrast, we also identified differentiated relapses, particularly in AML harboring driver alterations linked to immature phenotypes ( e.g ., FUS :: ERG and MNX1 SV), which were associated with early relapse. Together with the observed differences in clonal architecture—multiple clones in early relapse versus single or few clones in late relapse—these findings indicate that relapse can arise through multiple, distinct mechanisms. Stem-like disease resistant to conventional chemotherapy enables multiple surviving clones to rapidly expand with differentiation. Conversely, differentiated diseases are sensitive to chemotherapy, but a few persistent (and possibly dormant) clones can emerge as a relatively immature, stem-like disease at late relapse. These findings complicate a unified treatment strategy for pediatric AML aimed at reducing relapse rates. In addition to the multiple modes of relapse, differentiation stage–specific vulnerabilities are increasingly recognized—for example, venetoclax resistance in monocytic 39 or erythromegakaryocytic 40 populations. Inflammation and immune responses 41 – 43 also contribute to chemoresistance and relapse, possibly altering differentiation states and modulating sensitivity to targeted agents. Furthermore, early acquisition of resistance mutations to molecular therapies is well documented, such as MEN1 mutations driving resistance to menin inhibitors. 8 , 9 Given that current cytotoxic regimens have reached near-maximal intensity, future strategies will need to combine molecularly targeted drugs with induction chemotherapy, addressing both genetic and phenotypic vulnerabilities to eliminate the entire leukemic population at an early stage, and ultimately, achieve durable cures for children with AML. AUTHOR CONTRIBUTIONS J.M.K and X. M conceptualized the study. J.M.K, M.U, P.K, A.L.W.H, S.S, H.L, M.P.W, G.S, J.M, T.W, Y.L, Q.T,P.M, and X.M collected data. J.M.K, M.U, P.K, H.L, M.P.W, G.S, J.M, T.W, and X.M curated data. M.U, and J.M conducted formal analyses. M.U, J.M, H.L, and X.M performed statistical tests. MU made figures. S.K, H.I, J.E.R, and R.R provided resources. M.U and J.M.K wrote the original draft, and all the co-authors reviewed it and were involved in the revision process. SUPPLEMENTARY METHOD Subject cohorts and sample details No patient received compensation for enrollment in this study. Patient names were de-identified and replaced with unique research IDs, which are maintained using an honest broker, in accordance with institutional protocols and ethical guidelines. This unique research ID cannot be linked to patient identification outside of the research team. Samples for RNA sequencing (RNAseq: n=8), whole genome sequencing (WGS: n=16), and target capture sequencing (TCS: n=96) were newly sequenced in this study, and the rest of the data were obtained from previous publications 3 , 44 , 45 (Supplementary Table 1-2), European Genome-Phenome Archive ( EGAS00001004701 ) 46 , or St. Jude Cloud 44 (see details in DATA AVAILABILITY and Table S1). Sample processing, library preparation, and sequencing For newly sequenced samples with insufficient tumor purity ( e.g. , below 60%), we enriched the leukemic cell population either by flow cytometric sorting or T cell depletion by magnetic beads (EasySep Human CD3 Positive Selection Kit II, StemCell Technologies, 17851) as we have previously reported. 38 CD45 dim CD33 dim positive population was sorted using anti-CD45 PerCP-Cyanine5.5 (eBioscience, cat# 8045-9459-120, Clone:2D1, RRID: AB_1907397, 1:20 dilution), anti-CD33 APC (eBioscience, cat# 17-0338-42,clone:WM53, RRID:AB_10667893, 1:20 dilution), and DAPI (BD Biosciences, cat# 564907, RRID:AB_2869624) using FACSAria III instrument and FACS Diva v9.0 (both BD Biosciences) as we have implemented. 38 CD34 gating using anti-CD34 PE (Beckman cat# IM1459U, Clones:QBEnd10; Immu133; Immu409, RRID:AB_131210, 1:5 dilution) was included depending on each patient sample. Enrichment of the tumor population was confirmed by flow cytometric analysis of the post-sorting samples, generally achieving 90%. Libraries were constructed using the TruSeq Stranded Total RNA Kit, with Ribozero Gold (Illumina, 20020598) for RNAseq. The TruSeq DNA PCR-Free Library Prep Kit (Illumina, 20015963) was used for WGS according to the manufacturer’s instructions. After library quality and quantity assessment, samples were sequenced on HiSeq2000 or 2500 (Illumina, RRID:SCR_020132, RRID:SCR_016383) instruments with paired-end (2 x 101 bp, 2 x 126 bp, or 2 x 151 bp) sequencing using TruSeq SBS Kit v3-HS (Illumina, FC-401-3001) or TruSeq Rapid SBS Kit (Illumina, FC-402-4023) and HiSeq Control Software of the latest version at the time of sequencing. Details of DNA data mapping and detection of alterations Both WGS and TCS sequencing data were mapped to the reference human genome assembly GRCh37-lite with bwa 13 (WGS: v0.7.15-r1140 and v0.5.9-r26-dev, RRID:SCR_010910). For WGS, candidate SNVs and Indels were called by Bambino 14 (no version) and classified for putative pathogenicity with PeCanPie/MedalCeremony 47 . SV were analyzed using CREST 16 (v1.0), and CNVs were analyzed using CONSERTING 15 (no version). SNVs were called using SequencErr 17 (v2.09), and Indels and SVs were called using SVIndelGenotyper 18 , 48 (v1.0, https://github.com/stjude/SVIndelGenotyper ) Verification of somatic alterations detected by capture sequencing data To verify somatic mutations using capture sequencing data, we first constructed a background error profile for each marker, described previously as “rotating control” 6 . Briefly, for a given marker, the background was established by genotyping the corresponding position across the germline samples of all patients excluding the index patient(s) in whom the marker was initially identified. This yielded a reference set of mutant (nM) and total (nT) read counts representing background noise. The observed nM and nT values for the marker in the index sample were then compared against this background using a binomial test to calculate a p-value. Multiple testing correction was performed using the false discovery rate (FDR) method in Python module “statsmodels”. Markers with a q-value <0.05 were classified as somatic mutations, whereas markers with a q-value ≥ 0.05 were considered wild-type. Mutation Signature Analysis Mutational signature analyses on WGS tier 1-3 SNV calls performed by MutationalPatterns 26 (v3.6.0) on samples with 10 or more SNVs. Mutations found at diagnosis and mutations specific to relapse were analysed separately to dissect de novo somatic mutations and mutations possibly acquired after treatment. De novo mutational signature extraction using NMF (non-negative matrix factorization) was initially performed for each WGS sample, followed by testing for the presence of 238 publicly available signatures (COSMIC 49 : n=60, Signal 50 : n=174, the experimental thiopurine signatures 5 (n=2), the thio_dMMR signature 51 , and the ganciclovir signature 27 : each n=1). Based on these data, the finalized WGS signature data were obtained by testing for the presence of only the following signatures in each sample: COSMIC signatures 1 and 5 (clock-like), COSMIC signature 2 (APOBEC), COSMIC signature 87 (Thiopurine), COSMIC signature 18 (reactive oxygen species), the ganciclovir signature 27 , and the published SBS100. 50 A required cosine similarity increase of 0.03 or more was used for a new signature to be considered detected in a single sample. The probability that individual SNVs were caused by a signature was calculated as carried out in Li et al. 6 RNAseq mapping, fusion detection, and transcriptome analysis RNA reads from newly sequenced samples and from publications were mapped to the reference human genome assembly GRCh37-lite with bwa 13 (0.7.12-r1039; 0.7.15-r1140, RRID:SCR_010910) using the StrongARM pipeline. 52 Chimeric fusion detection was carried out using CICERO 53 (v0.3.0) and Neo-Versioner 54 (v1.0) followed by a manual review. Reads from aligned BAM files were assigned to genes and counted using Reads from aligned BAM files were assigned to genes and counted using HTSeq 55 (v0.11.2, RRID: SCR_005514) with the GRCh37/hg19 GTF file. Expression data of reference AML cases from a previous studiy 38 was obtained in a gene x sample matrix and combined with D-R paired AML data to establish RNAseq data of 933 samples. For a gene to be considered expressed, we required that at least 4 samples should have ≥ 20 read counts per million (CPM) reads sequenced. The count data were transformed to log2CPM using Voom 56 available from the R package limma 57 (v3.44.3, RRID: SCR_010943). The R package Seurat 58 – 60 (v4.0.2, RRID:SCR_016341) was used for dimension reduction and sample clustering. Briefly, the top 200 variable protein-coding genes were selected using the “vst” method. The expression data were then scaled and used for PCA (Principal Component Analysis), and the top 25 principal components were used for dimension reduction using UMAP 90,91 (Uniform Manifold Approximation and Projection, RRID:SCR_018217) (n_neighbors=15 and min_dist=0.2). Samples were clustered using the top 25 principal components by first constructing a K nearest-neighbor graph and then iteratively optimizing the modularity using the Louvain algorithm (resolution=0.6). Annotations of each cluster are based on clustering patterns and enrichment of molecular categories. One diagnosis-relapse paired sample from patient SJAML060228 was excluded from analyses for the estimated tumor purity of 20% at relapse and inconsistent clustering patterns with other leukemia with CBFB::MYH11 .Differential expressed gene (DEG) analyses of diagnosis-relapse paired samples were performed by limma 57 , and we set Log2 CPM = 0 if it is < 0 based on the Log2 CPM data distribution. Molecular category groups were incorporated into the model, and patient diagnosis–relapse pairs were accounted for using the duplicateCorrelation function in limma with the “block” argument. P -values were adjusted by the Benjamini-Hochberg method to calculate the adjusted P -values using the R function p.adjust. With a definition of differentially expressed genes (DEGs) of absolute fold change > 1 and adjusted P -values < 0.05, no gene was regarded as a DEG. To assess the similarity between paired diagnosis and relapse samples, we calculated the Spearman correlation coefficients across all expressed genes after voom transformation. Gene-level t-statistics obtained from the paired model used in limma were then used to rank genes as an input for pre-ranked Gene Set Enrichment Analysis (GSEA) 33 to account for patient-intrinsic expression patterns. GSEA was performed by locally installed GSEA (v4.4.0, RRID: SCR_003199) using MSigDB gene sets c2.all and c5 (v7.2.1, RRID:SCR_016863) with gene counts between 15-1,500. FDR<0.05 was used as a threshold of significant enrichment, resulting in 2413 enriched gene sets. GSEA between a diagnosis-relapse pair of each patient was performed similarly using a gene list ranked according to fold changes. Gene sets with normalized enrichment score (NES) and FDR for all pairs and ranked at 30th or higher among gene sets at least in one sample were extracted, and a total of 314 gene sets were shown in a heatmap. Inference of cellular hierarchy by CIBERSORT 34 (RRID:SCR_016955) was performed by the web interface of CIBERSORTx 61 using “Malignant Signature Matrix” and “Malignant Single Cell Reference Sample” data from a previous study 10 as references. Deconvolution of bulk RNAseq data was performed in absolute mode with S-mode batch correction with 100 permutations using normalized CPM values as input. The similarity of each D-R pair was assessed by Spearman’s correlation for gene-level expression patterns. For the downstream analysis, LSPC-Quiescent, LSPC-Primed, and LSPC-Cycling scores were combined as LSPC scores, and ProMono-like, Mono-like, and cDC-like scores were combined as differentiation scores. Single cell RNA sequencing (scRNAseq) analysis ScRNAseq was performed on SJAML045737. Frozen patient samples were thawed followed by dead cell removal using EasySep Dead Cell Removal Kit (StemCell Technologies, 17899) and resuspended in DPBS (Dulbecco’s Phosphate Buffered Saline, Gibco, 14190144) with 0.04%BSA (Bovine Serum Albumin, Sigma-Aldrich, A7030-50G). Cells were prepared and quantified following the 10x Genomics cell thawing protocol for single cell assays (CG000447, rev B). Cells were isolated for single-cell barcoding and 3 ’ GEX library preparation using the 10x Genomics Chromium Controller Genetic Analyzer (RRID:SCR_019326) and Next-GEM Single Cell V(D)J Reagent Kit (PN-1000128, PN-1000167) following the manufacturer’s protocol. Libraries were sequenced on an Illumina NovaSeq 6000 (RRID:SCR_016387) using SP Reagent kit v1.5 (Illumina, 20028401). The feature-barcode count data matrix for each sample was generated from raw sequence FASTQ files using Cell Ranger (v3.0, RRID:SCR_017344) and the GRCh37/hg37 human genome assembly. The output data from Cell Ranger was further analyzed in R environment (v4.0.2, RRID: SCR_001905) using R package Seurat (v4.1.0, RRID: SCR_007322). Count data matrices from diagnosis and relapse samples were combined using the merge function, followed by excluding cells with fewer than 200 detected genes or greater than 20% mitochondrial transcripts. Feature counts for each cell were normalized to 1 × 10 4 counts/cell and natural log-transformed using the NormalizeData function. The dimensional reduction was performed by principal component analysis (PCA) using the top 750 variable genes identified by vst analysis using the Find Variable Features function, followed by Jack Straw analysis and UMAP (uniform manifold approximation and projection, RRID: SCR_018217) using the following parameters (dims=1:20, n.neighbors=15L, min.dist=0.1). Clusters were identified by the Find Neighbors and FindClusters functions (resolution = 0.05) and annotated by enrichment of leukemic samples or normal hematopoietic cells. Marker genes were identified by the FindAllMarkers function in Seurat (min.pct = 0.25), and DEGs were identified by FindMarker gene function in Seurat (v4.1.0), both of which calculate adjusted P -values with limma implementation of the Wilcoxon rank-sum test followed by Bonferroni correction. Projection onto BoneMarrowMap, cell type inference, and pseudotime inference were performed using the BoneMarrowMap library (0.1.0). Public scRNAseq data from a publication 11 was obtained from the Gene Expression Omnibus (GEO) under accession number GSE235063 , and the patient data was obtained from the publication. Diagnosis and relapse data for each patient were imported using the Read10X function from the Seurat library along with cell metadata. After projection onto BoneMarrowMap, these metadata and variables for leukemia cells were extracted from the original publication according to “Malignant” labels from metadata for statistical tests and visualization. Patient 7 (PAWNPU) was excluded from the analyses due to the limited number of malignant cells at diagnosis (n=1). Flow cytometric analysis of patient samples Approximately 1 x 10 6 ficoll-purified mononuclear cells were washed with DPBS supplemented with 0.1% (w/v) bovine serum albumin, blocked with non-immune rabbit serum, and stained with the following combination of antibodies; CD45-Alexa Fluor 700 (BD, cat#570762, clone 2D1, RRID:AB_3686024), CD34-PercP (BD, cat#340666, clone 8G10, RRID:AB_40007), CD117-APC (BD, cat#341106, clone 104D2, RRID:AB_400565), CD33-PE-Cy7 (BD, cat#333949, clone P67.6, RRID:AB_399964), CD56-PE (BD, cat#340724, clone NCAM16.2, , RRID:AB_400122). Cells were incubated at room temperature for 15 minutes in the dark, then washed twice with PBS/BSA. Data was collected on a BD FACS CantoII flow cytometer. Data was analyzed using BD FACS Diva software. Statistics One-sided binomial tests for mutation calls were performed using the binom.test function from the stats library (SCR_025968) on the R statistical environment using variant and total allele counts from tumor samples and germline controls as input. Two-sided Fisher’s tests for the enrichment or depletion of somatic mutations were performed with the fisher.test from the stats library. Spearman correlations among clinical parameters, mutational burden, and transcriptional profiles were calculated with the cor.test function from the stats library, assuming the non-normal distribution and non-linear correlation. Wilcoxon rank sum test for unpaired samples and signed rank test for paired samples were performed using the wilcox.test function from the stat library for comparison of data with non-normal distributions. Kruskal-Wallis test (the kruskal.test function from the stat library) followed by Dunn’s test (the dunn.test library) for multiple comparison of data with non-normal distribution. Benjamini-Hochberg procedure was performed using the p.adjust function when the adjustment for multiple testing was appropriate. For comparison of similarities of expression or cellular proportion of each diagnosis–relapse pair, the Spearman correlation coefficient between the two samples was compared with the distribution of correlations between each sample and all other non-paired samples. We defined delta as the difference between the D–R correlation and the median of the corresponding non-paired correlations. One-sample Wilcoxon signed-rank test was applied to test whether the median delta was greater than zero, which indicates that each paired sample is significantly more similar to its match than to unrelated samples. DATA AVAILABILITY Genomic analyses in this study were based on the GENCODE GRCh37/hg19, and gnomAD (version 2.1.1, RRID: SCR_014964) was used for classification of germline and somatic mutations. The sequencing data from patient samples newly generated in this study (RNAseq: n=8, WGS: n=16, TCS: n=96, scRNAseq: n=2) have been deposited in the European Genome-Phenome Archive (EGA, RRID:SCR_004944), which is hosted by the European Bioinformatics Institute (EBI), under accession EGAS00001005760 . For the remaining RNAseq data for 74 samples, 64 samples are sequenced in St. Jude and available either on EGA or St. Jude Cloud. 3 , 44 , 45 For 10 published cases, we downloaded the BAM files from EGA ( EGAS00001004701 ). Of the remaining WGS data for 68 samples, 58 samples were sequenced at St. Jude and are available on either EGA or St. Jude Cloud 3 , 44 , 45 , and for the other 10 published cases 46 , we downloaded the BAM files from EGA ( EGAS00001004701 ). Cell Ranger output files for count data of newly sequenced scRNAseq data will be available on GEO upon publication. Public scRNAseq data 11 was obtained from the GEO under accession number GSE235063 . Other data generated in this study are available in Supplemental tables or upon request to the corresponding author. We did not use any custom code in this study. COMPETING INTERESTS We have nothing to disclose regarding the content of this manuscript. ACKNOWLEDGEMENTS We thank all the patients and their families at St. Jude Children’s Research Hospital (SJCRH) for their contribution of the biological specimens used in this study. We also thank the Biorepository, the Flow Cytometry and Cell Sorting Core, and the Hartwell Center for Bioinformatics and Biotechnology at SJCRH for their essential services. This work was funded by the American Lebanese and Syrian Associated Charities of St. Jude Children’s Research Hospital and grants from the NIH (P30 CA021765, Cancer Center Support Grant, and a Developmental Fund Award to J.M. Klco and X. Ma, and R01 CA276079 to J.M. Klco). The content, however, does not necessarily represent the official views of the NIH and is solely the responsibility of the authors. This work was also supported in part by the Fund for Innovation in Cancer Informatics ( www.the-ici-fund.org , to X. Ma and J.M. Klco). J.M. Klco is a previous recipient of the V Foundation Scholar Award (Pediatric) and a Career Award for Medical Scientists from the Burroughs Wellcome Fund. The authors thank Ben Huang, MD (UCSF), for a critical review of this manuscript. REFERENCES 1. ↵ Rubnitz , J.E. & Kaspers , G.J.L . How I treat pediatric acute myeloid leukemia . Blood 138 , 1009 – 1018 ( 2021 ). OpenUrl PubMed 2. ↵ Bolouri , H. et al. The molecular landscape of pediatric acute myeloid leukemia reveals recurrent structural alterations and age-specific mutational interactions . Nat Med 24 , 103 – 112 ( 2018 ). OpenUrl CrossRef PubMed 3. ↵ Umeda , M. et al. Integrated Genomic Analysis Identifies UBTF Tandem Duplications as a Recurrent Lesion in Pediatric Acute Myeloid Leukemia . Blood Cancer Discov 3 , 194 – 207 ( 2022 ). OpenUrl CrossRef PubMed 4. ↵ Umeda , M. et al. A new genomic framework to categorize pediatric acute myeloid leukemia . Nat Genet ( 2024 ). 5. ↵ Stratmann , S. et al. Genomic characterization of relapsed acute myeloid leukemia reveals novel putative therapeutic targets . Blood Adv 5 , 900 – 912 ( 2021 ). OpenUrl PubMed 6. ↵ Li , B. et al. Therapy-induced mutations drive the genomic landscape of relapsed acute lymphoblastic leukemia . Blood 135 , 41 – 55 ( 2020 ). OpenUrl CrossRef PubMed 7. ↵ Kiyoi , H. , Kawashima , N. & Ishikawa , Y . FLT3 mutations in acute myeloid leukemia: Therapeutic paradigm beyond inhibitor development . Cancer Sci 111 , 312 – 322 ( 2020 ). OpenUrl CrossRef PubMed 8. ↵ Perner , F. et al. MEN1 mutations mediate clinical resistance to menin inhibition . Nature 615 , 913 – 919 ( 2023 ). OpenUrl CrossRef PubMed 9. ↵ Tiong , I.S. , Ritchie , D.S. & Blombery , P . Response and Resistance to Menin Inhibitor in UBTF-Tandem Duplication AML . N Engl J Med 390 , 2323 – 2325 ( 2024 ). OpenUrl PubMed 10. ↵ Zeng , A.G.X. et al. A cellular hierarchy framework for understanding heterogeneity and predicting drug response in acute myeloid leukemia . Nat Med 28 , 1212 – 1223 ( 2022 ). OpenUrl CrossRef PubMed 11. ↵ Lambo , S. et al. A longitudinal single-cell atlas of treatment response in pediatric AML . Cancer Cell 41 , 2117 – 2135 e12 ( 2023 ). OpenUrl CrossRef PubMed 12. ↵ Petti , A.A. et al. Genetic and Transcriptional Contributions to Relapse in Normal Karyotype Acute Myeloid Leukemia . Blood Cancer Discov 3 , 32 – 49 ( 2022 ). OpenUrl Abstract / FREE Full Text 13. ↵ Li , H. & Durbin , R . Fast and accurate short read alignment with Burrows-Wheeler transform . Bioinformatics 25 , 1754 – 60 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 14. ↵ Edmonson , M.N. et al. Bambino: a variant detector and alignment viewer for next-generation sequencing data in the SAM/BAM format . Bioinformatics 27 , 865 – 6 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 15. ↵ Chen , X. et al. CONSERTING: integrating copy-number analysis with structural-variation detection . Nat Methods 12 , 527 – 30 ( 2015 ). OpenUrl PubMed 16. ↵ Wang , J. et al. CREST maps somatic structural variation in cancer genomes with base-pair resolution . Nat Methods 8 , 652 – 4 ( 2011 ). OpenUrl CrossRef PubMed Web of Science 17. ↵ Davis , E.M. et al. SequencErr: measuring and suppressing sequencer errors in next-generation sequencing data . Genome Biol 22 , 37 ( 2021 ). 18. ↵ Kolekar , P. et al. SJPedPanel: A Pan-Cancer Gene Panel for Childhood Malignancies to Enhance Cancer Monitoring and Early Detection . Clin Cancer Res 30 , 4100 – 4114 ( 2024 ). OpenUrl PubMed 19. ↵ Rubnitz , J.E. et al. Combination of cladribine and cytarabine is effective for childhood acute myeloid leukemia: results of the St Jude AML97 trial . Leukemia 23 , 1410 – 6 ( 2009 ). OpenUrl CrossRef PubMed 20. ↵ Rubnitz , J.E. et al. Minimal residual disease-directed therapy for childhood acute myeloid leukaemia: results of the AML02 multicentre trial . Lancet Oncol 11 , 543 – 52 ( 2010 ). OpenUrl CrossRef PubMed Web of Science 21. ↵ Rubnitz , J.E. et al. Clofarabine Can Replace Anthracyclines and Etoposide in Remission Induction Therapy for Childhood Acute Myeloid Leukemia: The AML08 Multicenter, Randomized Phase III Trial . J Clin Oncol 37 , 2072 – 2081 ( 2019 ). OpenUrl PubMed 22. Alexander , T.B. et al. Phase I Study of Selinexor, a Selective Inhibitor of Nuclear Export, in Combination With Fludarabine and Cytarabine, in Pediatric Relapsed or Refractory Acute Leukemia . J Clin Oncol 34 , 4094 – 4101 ( 2016 ). OpenUrl CrossRef PubMed 23. Karol , S.E. et al. Venetoclax in combination with cytarabine with or without idarubicin in children with relapsed or refractory acute myeloid leukaemia: a phase 1, dose-escalation study . Lancet Oncol 21 , 551 – 560 ( 2020 ). OpenUrl CrossRef PubMed 24. ↵ Inaba , H. et al. Phase I pharmacokinetic and pharmacodynamic study of the multikinase inhibitor sorafenib in combination with clofarabine and cytarabine in pediatric relapsed/refractory leukemia . J Clin Oncol 29 , 3293 – 300 ( 2011 ). OpenUrl Abstract / FREE Full Text 25. ↵ Wang , L. , et al. Integrated Whole Genome and Transcriptome Sequencing as a Framework for Pediatric and Adolescent AML Diagnosis and Risk Assessment . Res Sq ( 2025 ). 26. ↵ Manders , F. et al. MutationalPatterns: the one stop shop for the analysis of mutational processes . BMC Genomics 23 , 134 ( 2022 ). 27. ↵ de Kanter , J.K. et al. Antiviral treatment causes a unique mutational signature in cancers of transplantation recipients . Cell Stem Cell 28 , 1726 – 1739 e6 ( 2021 ). OpenUrl CrossRef PubMed 28. ↵ Man , C.H. et al. Sorafenib treatment of FLT3-ITD(+) acute myeloid leukemia: favorable initial outcome and mechanisms of subsequent nonresponsiveness associated with the emergence of a D835 mutation . Blood 119 , 5133 – 43 ( 2012 ). OpenUrl Abstract / FREE Full Text 29. ↵ Kayser , S. et al. Insertion of FLT3 internal tandem duplication in the tyrosine kinase domain-1 is associated with resistance to chemotherapy and inferior outcome . Blood 114 , 2386 – 92 ( 2009 ). OpenUrl Abstract / FREE Full Text 30. ↵ Galimberti , S. et al. Vorinostat and bortezomib significantly inhibit WT1 gene expression in MO7-e and P39 cell lines . Leukemia 22 , 628 – 31 ( 2008 ). OpenUrl CrossRef PubMed 31. ↵ Fennell , K.A. et al. Non-genetic determinants of malignant clonal fitness at single-cell resolution . Nature 601 , 125 – 131 ( 2022 ). OpenUrl CrossRef PubMed 32. ↵ Cui , B. et al. Single-cell epigenetic and clonal analysis decodes disease progression in pediatric acute myeloid leukemia . Blood 145 , 1211 – 1224 ( 2025 ). OpenUrl PubMed 33. ↵ Subramanian , A. et al. Gene set enrichment analysis: a knowledge-based approach for interpreting genome-wide expression profiles . Proc Natl Acad Sci U S A 102 , 15545 – 50 ( 2005 ). OpenUrl Abstract / FREE Full Text 34. ↵ Newman , A.M. et al. Determining cell type abundance and expression from bulk tissues with digital cytometry . Nat Biotechnol 37 , 773 – 782 ( 2019 ). OpenUrl CrossRef PubMed 35. ↵ Huang , B.J. et al. Integrated stem cell signature and cytomolecular risk determination in pediatric acute myeloid leukemia . Nat Commun 13 , 5487 ( 2022 ). OpenUrl CrossRef PubMed 36. ↵ Zeng , A.G.X. et al. Single-cell Transcriptional Atlas of Human Hematopoiesis Reveals Genetic and Hierarchy-Based Determinants of Aberrant AML Differentiation . Blood Cancer Discov 6 , 307 – 324 ( 2025 ). OpenUrl CrossRef PubMed 37. ↵ Klco , J.M. et al. Association Between Mutation Clearance After Induction Therapy and Outcomes in Acute Myeloid Leukemia . JAMA 314 , 811 – 22 ( 2015 ). OpenUrl CrossRef PubMed 38. ↵ Umeda , M. et al. A new genomic framework to categorize pediatric acute myeloid leukemia . Nat Genet 56 , 281 – 293 ( 2024 ). OpenUrl CrossRef PubMed 39. ↵ Pei , S. et al. Monocytic Subclones Confer Resistance to Venetoclax-Based Therapy in Patients with Acute Myeloid Leukemia . Cancer Discov 10 , 536 – 551 ( 2020 ). OpenUrl Abstract / FREE Full Text 40. ↵ Kuusanmaki , H. et al. Erythroid/megakaryocytic differentiation confers BCL-XL dependency and venetoclax resistance in acute myeloid leukemia . Blood 141 , 1610 – 1625 ( 2023 ). OpenUrl CrossRef PubMed 41. ↵ Lasry , A. et al. An inflammatory state remodels the immune microenvironment and improves risk stratification in acute myeloid leukemia . Nat Cancer 4 , 27 – 42 ( 2023 ). OpenUrl PubMed 42. Mumme , H. et al. Single-cell analysis reveals altered tumor microenvironments of relapse- and remission-associated pediatric acute myeloid leukemia . Nat Commun 14 , 6209 ( 2023 ). OpenUrl CrossRef PubMed 43. ↵ Morodomi , Y. et al. Inflammatory platelet production stimulated by tyrosyl-tRNA synthetase mimicking viral infection . Proc Natl Acad Sci U S A 119 , e2212659119 ( 2022 ). OpenUrl PubMed SUPPLEMENTARY REFERENCES 44. ↵ Newman , S. et al. Genomes for Kids: The scope of pathogenic mutations in pediatric cancer revealed by comprehensive DNA and RNA sequencing . Cancer Discov ( 2021 ). 45. ↵ Umeda , M . et al. Proposal of a new genomic framework for categorization of pediatric acute myeloid leukemia associated with prognosis . (Research Square, 2023 ). 46. ↵ Fornerod , M. et al. Integrative Genomic Analysis of Pediatric Myeloid-Related Acute Leukemias Identifies Novel Subtypes and Prognostic Indicators . Blood Cancer Discov 2 , 586 – 599 ( 2021 ). OpenUrl Abstract / FREE Full Text 47. ↵ Edmonson , M.N. et al. Pediatric Cancer Variant Pathogenicity Information Exchange (PeCanPIE): a cloud-based platform for curating and classifying germline variants . Genome Res 29 , 1555 – 1565 ( 2019 ). OpenUrl Abstract / FREE Full Text 48. ↵ Shao , Y. et al. Analysis of Error Profiles of Indels and Structural Variants in Deep Sequencing Data . SSRN Electronic Journal ( 2025 ). 49. ↵ Alexandrov , L.B. et al. The repertoire of mutational signatures in human cancer . Nature 578 , 94 – 101 ( 2020 ). OpenUrl CrossRef PubMed 50. ↵ Degasperi , A. et al. Substitution mutational signatures in whole-genome-sequenced cancers in the UK population . Science 376 ( 2022 ). 51. ↵ Yang , F. et al. Chemotherapy and mismatch repair deficiency cooperate to fuel TP53 mutagenesis and ALL relapse . Nat Cancer 2 , 819 – 834 ( 2021 ). OpenUrl PubMed 52. ↵ Wu , G. et al. The genomic landscape of diffuse intrinsic pontine glioma and pediatric non-brainstem high-grade glioma . Nat Genet 46 , 444 – 450 ( 2014 ). OpenUrl CrossRef PubMed 53. ↵ Tian , L. , et al. CICERO: a versatile method for detecting complex and diverse driver fusions using cancer RNA sequencing data . Genome Biol 21 , 126 ( 2020 ). OpenUrl CrossRef PubMed 54. ↵ Liu , Y. et al. Etiology of oncogenic fusions in 5,190 childhood cancers and its clinical and therapeutic implication . Nat Commun 14 , 1739 ( 2023 ). OpenUrl CrossRef PubMed 55. ↵ Anders , S. , Pyl , P.T. & Huber , W . HTSeq--a Python framework to work with high-throughput sequencing data . Bioinformatics 31 , 166 – 9 ( 2015 ). OpenUrl CrossRef PubMed Web of Science 56. ↵ Law , C.W. , Chen , Y. , Shi , W. & Smyth , G . K. voom: Precision weights unlock linear model analysis tools for RNA-seq read counts . Genome Biol 15 , R29 ( 2014 ). 57. ↵ Ritchie , M.E. et al. limma powers differential expression analyses for RNA-sequencing and microarray studies . Nucleic Acids Res 43 , e47 ( 2015 ). OpenUrl CrossRef PubMed 58. ↵ Satija , R. , Farrell , J.A. , Gennert , D. , Schier , A.F. & Regev , A . Spatial reconstruction of single-cell gene expression data . Nat Biotechnol 33 , 495 – 502 ( 2015 ). OpenUrl CrossRef PubMed 59. Butler , A. , Hoffman , P. , Smibert , P. , Papalexi , E. & Satija , R . Integrating single-cell transcriptomic data across different conditions, technologies, and species . Nat Biotechnol 36 , 411 – 420 ( 2018 ). OpenUrl CrossRef PubMed 60. ↵ Stuart , T. , Srivastava , A. , Madad , S. , Lareau , C.A. & Satija , R . Single-cell chromatin state analysis with Signac . Nat Methods 18 , 1333 – 1341 ( 2021 ). OpenUrl CrossRef PubMed 61. ↵ Steen , C.B. , Liu , C.L. , Alizadeh , A.A. & Newman , A.M . Profiling Cell Type Abundance and Expression in Bulk Tissues with CIBERSORTx . Methods Mol Biol 2117, 135 – 157 ( 2020 ). View the discussion thread. Back to top Previous Next Posted November 06, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Clonal Evolution of Pediatric Acute Myeloid Leukemia and Its Contribution to Disease Relapse Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Clonal Evolution of Pediatric Acute Myeloid Leukemia and Its Contribution to Disease Relapse Masayuki Umeda , Pandurang Kolekar , Anna LW Huskey , Shruthi Suryaprakash , Jing Ma , Hanxia Li , Tamara Westover , Michael P. Walsh , Guangchun Song , Yanling Liu , Quang Tran , Vidya Balagopal , Lu Wang , Samuel W. Brady , Tanja A Gruber , Stan Pounds , Paul E. Mead , Seth E. Karol , Hiroto Inaba , Jeffrey E. Rubnitz , Raul Ribeiro , Xiaotu Ma , Jeffery M. Klco medRxiv 2025.11.04.25339472; doi: https://doi.org/10.1101/2025.11.04.25339472 Share This Article: Copy Citation Tools Clonal Evolution of Pediatric Acute Myeloid Leukemia and Its Contribution to Disease Relapse Masayuki Umeda , Pandurang Kolekar , Anna LW Huskey , Shruthi Suryaprakash , Jing Ma , Hanxia Li , Tamara Westover , Michael P. Walsh , Guangchun Song , Yanling Liu , Quang Tran , Vidya Balagopal , Lu Wang , Samuel W. Brady , Tanja A Gruber , Stan Pounds , Paul E. Mead , Seth E. Karol , Hiroto Inaba , Jeffrey E. Rubnitz , Raul Ribeiro , Xiaotu Ma , Jeffery M. Klco medRxiv 2025.11.04.25339472; doi: https://doi.org/10.1101/2025.11.04.25339472 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (570) Allergy and Immunology (864) Anesthesia (302) Cardiovascular Medicine (4445) Dentistry and Oral Medicine (444) Dermatology (383) Emergency Medicine (609) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1515) Epidemiology (15234) Forensic Medicine (30) Gastroenterology (1127) Genetic and Genomic Medicine (6610) Geriatric Medicine (669) Health Economics (1000) Health Informatics (4548) Health Policy (1370) Health Systems and Quality Improvement (1613) Hematology (543) HIV/AIDS (1266) Infectious Diseases (except HIV/AIDS) (15925) Intensive Care and Critical Care Medicine (1104) Medical Education (623) Medical Ethics (147) Nephrology (668) Neurology (6612) Nursing (346) Nutrition (999) Obstetrics and Gynecology (1147) Occupational and Environmental Health (957) Oncology (3340) Ophthalmology (975) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (665) Pediatrics (1694) Pharmacology and Therapeutics (693) Primary Care Research (714) Psychiatry and Clinical Psychology (5458) Public and Global Health (9243) Radiology and Imaging (2204) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1197) Rheumatology (596) Sexual and Reproductive Health (715) Sports Medicine (530) Surgery (713) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a021872dad393fe2',t:'MTc3OTg0ODI5Ng=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.