SVCFit: Inferring structural variant cellular fraction in tumors

doi:10.1101/2025.02.01.636056

SVCFit: Inferring structural variant cellular fraction in tumors

2025 · doi:10.1101/2025.02.01.636056

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 101,734 characters · extracted from preprint-html · click to expand

Longitudinal structural variant phylogenies define tumor evolution under therapeutic selection pressure in metastatic prostate cancer | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Longitudinal structural variant phylogenies define tumor evolution under therapeutic selection pressure in metastatic prostate cancer Yunzhou Liu , Jiaying Lai , Yang Yi , Mark C. Markowski , Emmanuel S. Antonarakis , View ORCID Profile Angelo M. De Marzo , View ORCID Profile Srinivasan Yegnasubramanian , Laura D. Wood , Laura A. Sena , Rachel Karchin doi: https://doi.org/10.1101/2025.02.01.636056 Yunzhou Liu 1 Department of Biomedical Engineering, Institute of Computational Medicine, Johns Hopkins University , Baltimore, MD, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jiaying Lai 1 Department of Biomedical Engineering, Institute of Computational Medicine, Johns Hopkins University , Baltimore, MD, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yang Yi 1 Department of Biomedical Engineering, Institute of Computational Medicine, Johns Hopkins University , Baltimore, MD, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mark C. Markowski 2 Department of Oncology, Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine , Baltimore, MD, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Emmanuel S. Antonarakis 2 Department of Oncology, Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine , Baltimore, MD, USA 6 Department of Medicine, University of Minnesota, Masonic Cancer Center , Minneapolis, MN, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Angelo M. De Marzo 2 Department of Oncology, Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine , Baltimore, MD, USA 7 Department of Pathology, Johns Hopkins School of Medicine , Baltimore, MD, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Angelo M. De Marzo Srinivasan Yegnasubramanian 2 Department of Oncology, Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine , Baltimore, MD, USA 7 Department of Pathology, Johns Hopkins School of Medicine , Baltimore, MD, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Srinivasan Yegnasubramanian Laura D. Wood 2 Department of Oncology, Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine , Baltimore, MD, USA 3 Department of Pathology, Sol Goldman Pancreatic Cancer Research Center, Johns Hopkins University School of Medicine , Baltimore, MD, USA 4 Department of Medicine, Division of Gastroenterology and Hepatology, Johns Hopkins University , Baltimore, MD, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Laura A. Sena 2 Department of Oncology, Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine , Baltimore, MD, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rachel Karchin 1 Department of Biomedical Engineering, Institute of Computational Medicine, Johns Hopkins University , Baltimore, MD, USA 2 Department of Oncology, Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine , Baltimore, MD, USA 5 Department of Computer Science, Johns Hopkins University , Baltimore, MD, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: karchin{at}jhu.edu laura.sena{at}jhmi.edu Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Therapeutic pressure shapes tumor evolution by selecting for subclones with distinct genomic architectures that contribute to resistance. In metastatic castration-resistant prostate cancer (mCRPC), structural variants (SVs) are central to this process but are rarely incorporated into clonal reconstruction from bulk DNA sequencing data. Here we describe SVCFit, a computational framework for estimating the cellular fraction of diverse classes of structural variants from whole-genome sequencing and using these estimates to reconstruct clonal evolutionary relationships. SVCFit explicitly models SV-type-specific breakpoint and breakend patterns and accounts for local copy-number context, enabling accurate estimation of SV cellular fraction across heterogeneous tumor genomes. Using simulated datasets and in silico mixtures of metastatic prostate cancer samples, we show that SVCFit achieves lower estimation error than SVclone, the current state-of-the-art approach for bulk-sequencing SV cellular-fraction estimation. We then apply SVCFit to longitudinal whole-genome sequencing data from patients with mCRPC treated with bipolar androgen therapy (supraphysiologic testosterone). Structural variant-based clonal phylogenies reveal marked treatment-associated clonal reconfiguration, including contraction of highly rearranged subclones and expansion of resistant populations defined by distinct structural alterations. Together, these results demonstrate that integrating structural variants into clonal evolutionary analysis provides critical insight into tumor evolution under therapy. SVCFit enables reconstruction of SV-defined clonal architecture from routine whole-genome sequencing, expanding the toolkit for studying cancer evolution in precision oncology. Introduction Intratumor heterogeneity encompasses a broad spectrum of molecular alterations within a single tumor, including single nucleotide variants (SNVs), structural variants (SVs), copy number variants (CNVs), protein expression differences, and epigenetic modifications. Genomic instability and selective pressures can shape the emergence of diverse subpopulations, or subclones, that differ in their aggressiveness, metastatic potential, and resistance to therapy 1 , 2 . Reconstructing clonal architecture can reveal critical events that shape cellular phenotypes and underlying mechanisms 3 . Early research into cancer evolution focused primarily on aneuploidy 4 , and modern bulk-sequencing methods continue to leverage copy-number and allelic-imbalance signals to infer tumor purity, ploidy, and subclonal mixture proportions. The advent of Next Generation Sequencing (NGS) enabled scientists to investigate genetic alterations at the single-base level, using SNVs as markers for cancer progression 5 . More recently, paired-end sequencing has made the detection of structural variants more feasible and cost-effective, allowing their integration into studies of cancer evolution 6 , 7 . For both SNVs and SVs, estimating the cellular fraction, the proportion of cells that harbor a specific alteration is key to inferring their relative evolutionary ordering during tumor development. Structural variants contribute significantly to genomic diversity and genome instability 8 . SVs come in various forms, such as insertions, deletions, tandem duplications, inversions, and translocations. Throughout, we use “duplication” to mean a tandem duplication unless otherwise specified. These large-scale changes can disrupt gene function by shifting or removing nucleotide sequences, which can lead to dysfunction in gene regulation or abnormalities in protein structure 9 . Because of their significance in shaping cancer genomes, SVs are critical to studying cancer genomics and metastasis, and they have direct relevance to therapeutic outcomes. Prostate cancer is increasingly recognized as a disease driven not only by small DNA sequence alterations but also by the presence and evolution of SVs. In primary prostate tumors, hallmark events such as the TMPRSS2-ERG fusion and losses of tumor suppressors like PTEN may arise through chromosomal rearrangements 10 – 13 . In metastatic castration-resistant prostate cancer (mCRPC), whole-genome sequencing has revealed that these structural alterations become even more pervasive, with complex events and noncoding rearrangements at loci such as AR , MYC , and FOXA1 dominating the genomic landscape 14 , 15 . Under the selective pressure of androgen deprivation and modern AR -targeted therapies, clones carrying AR amplification, enhancer co-amplification, or other AR locus rearrangements are positively selected, while alternative trajectories involving SV-mediated loss of TP53 / RB1 and lineage plasticity give rise to AR -indifferent phenotypes. Understanding and monitoring this SV evolution is therefore important to predicting treatment response and prognosis, designing rational therapeutic combinations, and ultimately improving outcomes for men with metastatic prostate cancer. Somatic alterations like SNVs and SVs in the human genome can be studied through bulk DNA sequencing, which is widely used due to its affordability. Incorporating somatic SVs detected from bulk sequencing into tumor evolution analysis can be done by first estimating variant allele frequency (VAF) at a specific locus. VAF is calculated as the ratio of altered reads to total reads at a genomic position, which works well for SNVs but does not transfer well to SVs: SNVs are detected at a single genomic position, while SVs are detected across multiple genomic positions and span multiple SV classes that each affect the genome differently, producing distinct patterns of altered and reference reads. While VAF-based approaches are widely used in clonal reconstruction, the process is noisy, due to confounding factors like aneuploidy and tumor impurity, both common in cancer samples 16 , 17 . A variant-centric strategy is to convert read support into cellular fraction, explicitly accounting for tumor purity and local copy-number state. While this method has been widely applied to SNVs 18 – 20 , to our knowledge, only one published method, SVclone 21 , estimates the cellular fraction of SVs from bulk sequencing data. Despite their biological importance, SVs are typically excluded from tumor phylogenies inferred from bulk sequencing, largely due to challenges in estimating their cellular prevalence across diverse SV classes and copy-number contexts. Addressing this gap is critical for reconstructing tumor evolution under therapy. Here, we introduce SVCFit, a computational framework for estimating the cellular fraction of structural variants and use these estimates to reconstruct clonal evolutionary relationships from longitudinal whole-genome sequenced samples 22 . Results The SVCFit framework Defining SV cellular fraction SVCFit estimates the SV cellular fraction (SVCF), defined as the proportion of cells in a bulk tumor sample that harbor a given structural variant. The framework rests on a single relationship between SVCF and observable read counts: where R̅ is the average number of break intervals per cell (the genomic regions flanked by an SV’s break sites) and VAF is the structural-variant allele frequency. Two features of SVs require this departure from SNV-style cellular fraction estimation. First, an SNV is observed at a single genomic position, whereas an SV is observed across multiple positions: a breakpoint , where rearranged DNA is joined and which generates SV-supporting reads, and one or two breakends , the original genomic locations flanking the rearrangement, which generate reference-supporting reads ( Fig. 1a ). Second, the per-cell breakpoint multiplicity, and the relationship between breakpoint and breakend reads, depends on the SV class. Treating SVs with the position-naive SNV formulation systematically biases cellular fraction estimates, particularly for tandem duplications and translocations. Download figure Open in new tab Figure 1. Read-evidence patterns of structural variant classes and haplotype-aware zygosity inference. ( A-F ) Schematics of the paired-end short-read evidence used by SVCFit for each supported structural variant (SV) class. Each chromosome is shown as two horizontal bars representing the two homologous haplotypes; black-outlined boxes mark the SV-affected segment, and dashed lines indicate the alignment of rearranged DNA segments relative to their normal genomic counterparts. Read pile-ups above and below each haplotype show the supporting short reads: green pile-ups are breakend (reference-supporting) reads spanning unrearranged junctions, and red pile-ups are breakpoint (SV-supporting) reads spanning the rearrangement. (A) Deletion. (B) Tandem duplication. (C) Inversion. (D) Cut-paste translocation: a segment of chr1 is excised and inserted into chr2. (E) Copy-paste translocation: a segment of chr1 is copied to chr2, leaving the chr1 source intact. (F) Reciprocal translocation: segments are exchanged between chr1 and chr2. ( G-I ) Haplotype-aware SV phasing and zygosity inference. The two horizontal bars represent the two haplotypes of one chromosome; the yellow lightning symbol marks a germline heterozygous SNP position. Below each pair, the green arrows represent paired-end breakend (reference) reads and the red arrows represent paired-end breakpoint (SV-supporting) reads; the SNP symbol on an arrow indicates that the captured SNP allele is observed on that read pair. (G) Heterozygous SV in which the SV is carried on the haplotype that does not carry the captured SNP allele: the SNP is observed only on breakend reads. (H) Heterozygous SV in which the SV is carried on the haplotype that does carry the captured SNP allele: the SNP is observed on breakpoint reads but not on breakend reads. (I) Homozygous SV present on both haplotypes: breakpoint reads are observed from both haplotypes, with both SNP alleles appearing on breakpoint reads. SV-type-specific cellular fraction estimation SVCFit derives SVCF for each SV class from the observable breakpoint count (BPC) and breakend count (BEC), where BPC and BEC are read counts scaled by mean read depth at their respective genomic positions. Because the per-cell breakpoint multiplicity differs by SV class ( Fig. 1a ), the closed-form estimator differs accordingly ( Table 1 ). View this table: View inline View popup Download powerpoint Table 1. Closed-form SVCF for each SV class. N = number of cells in the sample; r = local copy number for tandem duplications; R̅ = average number of break intervals per cell. Full derivations are provided in Supplementary Note S1. For deletions and tandem duplications, breakend reads are counted on both flanks of the break interval while breakpoint reads are counted at one breakpoint, requiring a factor-of-two correction relative to the breakpoint when computing VAF. Across all SV classes, these expressions reduce to a single multiplicative identity from which SVCF can be recovered using only observable VAF, R̅ , and r (for tandem duplications). Step-by-step derivations and induction proofs of the multiplicative identities are provided in Supplementary Notes S1 and S4. Estimation in non-diploid genomic regions SVs frequently arise within copy-number altered regions, where naive read-count ratios systematically bias SVCF. SVCFit corrects for this by integrating allele-specific copy number (ASCN) with germline heterozygous SNPs and CNV estimated by FACETS 23 ( Fig. 1b ). When breakpoint-supporting reads carry only one heterozygous SNP allele, the SV is inferred to be heterozygous and assigned to the corresponding haplotype; presence of both alleles indicates a homozygous event. Depending on which allele of germline heterozygous SNPs is present, an SV is phased accordingly. ASCN is recovered from the VAF of nearby heterozygous SNPs as ASCN = VAF het /(1 − VAF het ), with directional ambiguity between alternate-allele amplification and reference-allele loss resolved by the FACETS gain/loss classification. Given the inferred zygosity, phase, and CNV class, SVCFit determines whether the overlapping CNV alters the breakpoint or breakend read count and adjusts the corresponding observable: These adjustments are coupled through SVCF itself. SVCFit therefore applies an iterative procedure that we prove (Supplementary Note S5) converges to closed-form fixed points. When the breakpoint count is altered (e.g., a CNV in cis occurring after the SV), the fixed point is When the breakend count is altered (CNV preceding an in-cis SV, or CNV in trans), SVCF is the feasible root of the resulting quadratic, A property of Eq. 4 worth noting is that its sign is informative about the temporal order of an overlapping in-cis SV and CNV: under the algebraic constraint that an SV occurring after a CNV can affect at most the CNV-bearing alleles, the numerator is non-positive when the SV postdates the CNV and strictly positive when it predates (Supplementary Note S5.3; Supplementary Fig. S4). In practice, distinguishing these regimes from observed read counts requires sufficient breakpoint and breakend coverage to separate small numerator differences from sampling noise, so SVCFit reports the sign as a directional indicator of relative SV-CNV ordering rather than a hard ordering call. SVCFit improves SV cellular fraction estimation compared to state-of-the-art methods Evaluation on simulated data We benchmarked SVCFit against SVclone 21 , the current state-of-the-art for bulk-sequencing SV cellular fraction estimation, using synthetic tumor genomes generated by the haplotype-aware simulator VISOR 24 . We tested 75 scenarios spanning five SV-CNV overlap configurations, five SV tumor-purity levels (10%, 20%, 40%, 60%, and 80%), and three subclonal mixture settings, with replicate sampling and per-condition statistical comparisons described in Methods. Accuracy was quantified as the signed error between true and estimated SVCFs ( Eq. 1 1; positive values indicate underestimation, negative values indicate overestimation). SVCFit outperformed SVclone for inversions in 20 of 25 conditions and for translocations in 21 of 25 conditions across the full five-purity range (Supplementary Fig. S2); these two SV classes showed the largest number of conditions favoring SVCFit, although clonal translocations at 40% and 80% purity remained notable failure cases. Translocations refer to inter-chromosomal rearrangements unless otherwise specified. For clonal deletions and tandem duplications in baseline and pre-CNV scenarios (exp1-3), SVCFit also achieved significantly lower signed bias across most purity levels, with the advantage for tandem duplications restricted to ≥ 40% purity; performance was comparable between methods for most subclonal conditions. The failure cases for SVCFit were confined to three conditions across the full benchmark: clonal inversions at 80% purity, and clonal translocations at 80% and 40% purity, where SVclone showed a statistically significant advantage. For deletions in scenarios where a copy-number event on the same locus preceded the SV (exp4-5), performance between the two methods was comparable, consistent with SVclone’s copy-number-adjusted estimation approach having a structural advantage when the CN landscape directly modifies the SV-bearing locus. At 10% purity, performance differences narrowed for most SV types, consistent with the expected degradation of signal at extreme low tumor content. Fig. 2A shows three representative purities (10%, 40%, and 80%); the complete five-purity sweep is in Supplementary Fig. S2. Download figure Open in new tab Figure 2. Benchmarking of SVCFit versus SVclone on simulated and prostate cancer mixture datasets. Throughout, SVCFit is shown in blue and SVclone in orange; clonal variants are drawn as solid lines and subclonal variants as dashed lines. The signed-error convention is identical across panels: error is defined as truth minus estimate, so positive values indicate underestimation and negative values indicate overestimation. Panels A and B report error in structural variant cellular fraction (SVCF), the fraction of all cells in the sample (tumor plus non-tumor) carrying the variant; panel C reports error in cancer cell fraction (CCF), the fraction of tumor cells carrying the variant. The two are related by CCF = SVCF / SVTP, where SVTP is the SV tumor purity ( Eq. 1 0, Methods); the panel-C metric switches to CCF for direct comparability with the SVclone in-silico mixing benchmark, which reports CCF. ( A ) Mean signed SVCF estimation error from VISOR24 simulations, stratified by SV type (columns) and shown at three representative SV tumor purities (10%, 40%, 80%; rows). The complete five-purity sweep (10%, 20%, 40%, 60%, 80%) and the full per-condition breakdown by SV-CNV overlap configuration, SV-CNV temporal ordering, zygosity, and clonality are provided in Supplementary Fig. S2. ( B ) Distribution of signed SVCF estimation error at low tumor purity (10-20%), stratified by variant zygosity (heterozygous vs. homozygous). Statistical comparisons of absolute error were performed using two-sided paired Wilcoxon signed-rank tests with Benjamini-Hochberg correction; significance markers: p < 0.05 (*), p < 0.01 (**), p < 0.001 (***), p < 0.0001 (****). ( C ) Mean signed CCF estimation error on the in-silico prostate cancer mixture benchmark of Cmero et al.21 Each x-axis position corresponds to one mixing condition: the nine three-cluster mixing ratios (bM:gM = 10:90 through 90:10) and the additional four- and five-cluster mixtures (“4 clust”, “5 clust”). Solid dots show per-replicate means; lines connect per-condition means; vertical bars indicate the 95% interval across replicates. Significance markers above each condition use the same notation as panel B. We also analyzed the impact of zygosity on estimation precision at low tumor purity (10-20%; Fig. 2B ). SVCFit achieved lower signed bias than SVclone for homozygous SVs, where both alleles carry the rearrangement and breakpoint read evidence is unambiguous. For heterozygous SVs, SVclone showed significantly better performance, driven largely by low-purity heterozygous tandem duplications, which produced the long-error tail observed in Fig. 2B and the SV-type-stratified breakdown in Supplementary Fig. S2. The comprehensive per-condition breakdown, stratified by SV-CNV overlap configuration, SV-CNV temporal ordering, zygosity, SV type, clonality, and the full five-purity range, is provided in Supplementary Fig. S2. Evaluation on in-silico mixtures Next, we applied an in silico mixing benchmark developed by the SVclone authors, using two previously sequenced metastatic prostate tumor samples with known purities (49% and 46%) and shared ancestry 22 . Mixing the two source samples at nine pairwise ratios produced nine three-cluster mixtures (one source-A-only, one source-B-only, and one shared truncal cluster, with cellular fractions set by the mixing proportions); differential subsampling of odd and even chromosomes from each source generated additional four-cluster and five-cluster mixtures (Methods). Across the nine three-cluster mixtures, SVCFit signed errors fluctuated near zero across mixing ratios (mean signed error approximately −0.026 to +0.11), with the largest magnitudes at the extreme 10:90 and 90:10 dilutions where one clone constitutes only 10% of the mixture; SVclone showed consistent positive bias indicating systematic CCF underestimation across all nine three-cluster conditions ( Fig. 2C ). For the four- and five-cluster mixtures, SVCFit retained low error in the five-cluster setting; the four-cluster mixture exhibited greater variance and was the only condition at which SVCFit showed worse performance than SVclone. SVCFit significantly outperformed SVclone in 9 of the 11 conditions (two-sided paired Wilcoxon signed-rank test, Benjamini-Hochberg-corrected p < 0.05), with the advantage clearest at intermediate mixing ratios (30:70-80:20) where SVCFit achieved near-zero error. Longitudinal self-evaluation on paired pre/post-treatment simulations To evaluate SVCFit’s downstream phylogenetic inferences against known ground truth, we generated paired pre- and post-treatment whole-genome simulations using VISOR 24 . Each simulated tumor carried a truncal clone (CCF = 1.0 at both timepoints) and two subclonal children whose CCFs cross under simulated therapeutic selection (one declines from CCF 0.83 to 0.17; one expands from 0.17 to 0.83). Sweeping SV tumor purity over {20%, 40%, 60%, 80%} (Methods), we evaluated three downstream capabilities per replicate: per-SV truncal/subclonal classification, per-clone CCF estimation, and rooted tree-topology recovery (Supplementary Note S8; Supplementary Fig. S5). Truncal/subclonal classification and topology recovery were robust across the 20-80% purity range (Supplementary Fig. S5A,C). Per-SV balanced accuracy increased modestly from 0.79 at 20% purity to 0.89 at 80% purity, and was nearly identical at the pre- and post-treatment timepoints. Rooted tree topology was correctly recovered in 83-88% of replicates (22/25 at 20%, 20/24 at 40%, 22/25 at 60%, 22/25 at 80%) with no monotonic dependence on purity. An illustrative recovered topology with paired pre/post centroid placement is shown in Supplementary Fig. S6. Per-clone CCF point estimates show a systematic positive signed error (Supplementary Fig. S5B): the truncal cluster is underestimated by 15-26% across the purity range, and per-clone mean absolute error decreases modestly with purity (0.18 at 20% purity to 0.12 at 80%). This residual reflects two well-characterized properties of split-read SV genotyping rather than a SVCFit failure mode. The truncal CCF is bounded at 1.0, so any inferential noise can only push the inferred centroid below truth, producing a one-sided positive signed bias even under unbiased estimation. In addition, SVtyper 25 alternate-allele counts undercount split-read evidence by approximately 9-12% at high tumor purity, a documented sensitivity limit of breakpoint-resolved SV genotyping 26 , which propagates as a near-uniform downward bias in per-clone CCF. Per-SV read support and SV-type composition were tested directly as covariates of the residual error (Supplementary Figs. S7, S8); coverage explained little of the variation, while subclonal CCF error correlated with the number of breakend-defined translocations assigned to each cluster, consistent with mappability-driven miscounting at BND breakpoints rather than overall read-depth limitation. Topology recovery and truncal/subclonal classification, the inferences on which the longitudinal phylogenetic analysis depends, are unaffected by this magnitude bias. Longitudinal analysis of SV-defined clones in mCRPC subjects receiving bipolar androgen therapy To demonstrate the clinical utility of SVCFit in resolving tumor evolution, we analyzed longitudinal whole-genome sequencing (WGS) data from 11 evaluable subjects with metastatic castration-resistant prostate cancer (mCRPC) treated with bipolar androgen therapy (BAT) 27 , 28 , a form of supraphysiologic testosterone treatment, on the COMBAT trial 29 . BAT is the cyclical use of high dose testosterone as treatment for CRPC, which is associated with a 30% response rate, but fairly rapid acquired resistance with a median progression-free survival of 6 months 30 . Subjects on the COMBAT study were required to undergo paired sequential metastatic tumor biopsies at baseline (pre-treatment) and after 3 months of BAT treatment. SV-based phylogenies were reconstructed using SVCFit for each subject; for direct comparison, SNV-based phylogenies, with prostate-cancer driver annotations applied to clustered SNVs, were reconstructed from the same WGS data in parallel using PictographPlus 31 . Per-subject phylogenies and a side-by-side SV-versus-SNV comparison across the cohort are presented in Supplementary Note S7. Two subjects, 10 and 31, are presented in detail below because they initially demonstrated a biochemical response (assessed by serum prostate-specific antigen (PSA) levels) to BAT, followed by a subsequent PSA rise at the time of the second biopsy, suggesting early development of acquired therapy resistance under selection pressures ( Fig. 3 ). For these two subjects we applied SVCFit to estimate the cellular fractions of somatic SVs, followed by Dirichlet Process Gaussian Mixture Models (DP-GMM) to cluster SVs and a modified Gabow-Myers algorithm to reconstruct the evolutionary tree topologies and relative ordering of SV-defined clusters of these cancers to assess how tumor evolution may have contributed to acquired therapy resistance (Methods). Download figure Open in new tab Figure 3. PSA dynamics during bipolar androgen therapy (BAT) and timing of biopsies. Two side-by-side panels, titled “Subject 10” and “Subject 31”, show serum prostate-specific antigen (PSA, ng/mL; linear scale) from the COMBAT trial clinical record plotted against days on BAT at days 0, 28, 56, and 84. Y-axis ranges differ between panels to accommodate per-subject PSA levels (Subject 10: 0-25 ng/mL; Subject 31: 0-40 ng/mL). Both subjects exhibit an initial decline in PSA followed by a rise by day 84 (week 12), consistent with acquired therapy resistance. Vertical arrows mark the time points of the pre-BAT first biopsy (day 0) and the on-BAT repeat biopsy (day 84). In Subject 10, SVCFit resolved three distinct clonal populations, consistent with a branched evolutionary architecture ( Fig. 4A ; tree-display conventions and the breakpoint-versus-copy-number annotation rule are defined in the Fig. 4 legend and Methods). The truncal cluster of SVs, annotated to Clone 3 in Fig. 4A , was inherited by all tumor cells at both time points and contained canonical prostate cancer alterations, including TP53 deletion 14 , 15 , CHEK2 intragenic deletion, MDM2 duplication 32 , 33 , and the TMPRSS2-ERG fusion 12 , 13 (Although TMPRSS2-ERG was directly genotyped only in the pre-BAT WGS, the on-BAT BAM retained one supporting read and ERG-positive immunohistochemistry was confirmed in both biopsies. Of the SVs assigned to the truncal cluster, none were absent from the on-BAT sample after read-level rescue; mean breakpoint sequencing depth was 38× in the pre-BAT biopsy and 19× in the on-BAT biopsy. We therefore place the fusion on the trunk and attribute its absence from the on-BAT VCF to reduced tumor purity and lower effective coverage rather than subclonal loss; see Methods, “Rescue of key driver events”.) Following BAT, we observed a marked clonal reconfiguration, characterized by the disappearance of Clone 1 and the expansion of Clone 2 ( Fig. 4B , 4C). Download figure Open in new tab Figure 4. Longitudinal genomic landscape and tumor evolution in COMBAT mCRPC subjects treated with bipolar androgen therapy (BAT). Two representative subjects are shown side-by-side, each with the same five panel types in fixed order: phylogenetic tree, clone-proportion pie chart, alluvial plot of cluster-level SVCF shifts, pre-BAT circos, and on-BAT circos. Subject 10 occupies panels A-E and Subject 31 occupies panels F-J in this order. Phylogenetic reconstruction details, including the SV inclusion criteria and clustering procedure, are provided in Methods (“Phylogenetic reconstruction”). ( A, F ) SVCFit-inferred phylogenetic trees. Within each tree, boxes on tree edges annotate SV clusters (groups of SVs with similar inferred SVCF) and nodes denote clones (each clone is the cumulative set of clusters along the path from the germline root (G) to that node); the pie at each terminal node summarizes that clone’s per-sample fraction (cross-reference panels B/G). Edge lengths indicate inferred evolutionary ordering, not elapsed time. Only SVs that overlap the prostate-cancer driver gene list of Hosseini et al.38 are annotated on each edge. Unless otherwise marked, annotated “deletions” and “duplications” are defined by SV breakpoint evidence rather than read-depth copy-number calls. Three annotation marks distinguish the underlying evidence: *intragenic del/dup denotes an SV breakpoint-defined event affecting only part of the gene; **del bp denotes an SV breakpoint-defined deletion with no copy-number state implied; ***hem del denotes a hemizygous deletion supported by copy-number evidence (CN = 1). ( B, G ) Pie charts of estimated clone proportions in each sample (per-sample fraction of tumor cells assigned to each clone, not per-cluster prevalence), shown side-by-side for the Pre-BAT and On-BAT biopsies. Slice colors match the corresponding tree node labels in panels A/F (Clone 1 = sensitive; Clone 2 = resistant; Clone 3 = truncal). ( C, H ) Alluvial plots tracing changes in cluster-level structural variant cellular fraction (SVCF) between pre-BAT and on-BAT time points. SVCF values are binned into three strata using the half-open intervals shown on the figure axis: (0, 0.15] (low-frequency or undetected), (0.15, 0.7] (subclonal), and (0.7, 1] (clonal or near-clonal). Stratum height is proportional to the number of SVs in that bin, and ribbons trace per-cluster shifts between strata under therapy; ribbon colors correspond to SV clusters in panels A/F. SVs detected at only one time point are assigned SVCF = 0 at the absent time point and therefore appear in the (0, 0.15] bin for that sample. ( D, E, I, J ) Circos plots summarizing genome-wide structural variation per sample (D: Subject 10 pre-BAT; E: Subject 10 on-BAT; I: Subject 31 pre-BAT; J: Subject 31 on-BAT). The outer ring shows chromosomes 1-22. The inner ring shows somatic copy-number state as a continuous blue-to-red gradient encoding log2 copy-ratio (blue: copy-number loss; red: copy-number gain; near-white: neutral; scale shown beneath each circos as “CNV log2”). Curved interior links denote individual SVs colored by type: blue (deletions), green (tandem duplications), orange (inversions), and purple (translocations). Blue interior links (SV deletions) are distinguished from blue blocks on the copy-number ring (CN losses) by their shape and ring position. Clone 1 was notable for extensive genome-wide rearrangement, with SV cluster 1 containing the most translocations (n=21) and duplications (n=269), together with major structural alterations across multiple DNA-repair pathways, including hemizygous deletion of RAD51B , deletion of PALB2 . In contrast, Clone 2 was defined by an 11 kb intronic duplication within CUL1 (chr7:148,755,369-148,766,429; hg38), the catalytic subunit of the SCF (SKP1-CUL1-F-box) E3 ubiquitin ligase complex. The duplicated interval overlaps four ENCODE candidate distal-enhancer elements and one CTCF-bound element 34 , all carrying active chromatin marks and harboring MYC and E2F1 binding sites consistent with the regulatory architecture of CUL1 , a direct c-MYC transcriptional target 35 . As the catalytic backbone of SCF SKP 2 , CUL1 mediates degradation of p27 Kip 1 and other cell-cycle regulators, with documented relevance to prostate cancer progression 36 . We treat this duplication as a candidate resistance-associated structural event, consistent with the broader observation that non-coding rearrangements in mCRPC can affect transcriptional programs of nearby driver genes 14 , 15 ; direct functional consequence of the CUL1 intronic duplication on CUL1 expression will require clone-resolved transcriptional validation, which we discuss as a limitation. The expansion of Clone 2 under therapy illustrates how SVCFit can nominate candidate resistance-associated structural events that may be overlooked by SNV-centric analyses. The phylogeny reflects relative ordering of SV-defined clones constrained by pre- and on-treatment sampling, enabling direct comparison of clonal composition before and during therapy. Longitudinal analysis on Subject 10 further revealed a remodeling of the genomic landscape following BAT. The pre-BAT (baseline) biopsy exhibited a hyper-rearranged phenotype indicative of pronounced genomic instability, with 30 translocations and 297 duplications ( Fig. 4D ). In contrast, the on-BAT sample showed a 7.5-fold reduction in translocations and a 5-fold reduction in duplications (4 translocations, 56 duplications; Fig. 4E ). To determine whether these differences could be explained by lower mean coverage in the on-BAT sample (38× pre-BAT, 19× on-BAT), we performed in silico downsampling of the pre-BAT DNA sequencing data to match the effective tumor depth of the on-BAT sample, repeated across 20 bootstrap replicates with distinct random seeds. Across replicates, the downsampled pre-BAT yielded a mean of 7 translocations (95% CI: 5.84-8.16) and 43 duplications (95% CI: 38.08-48.02). The translocation count remained higher in the downsampled pre-BAT than in the on-BAT sample (mean 7 vs. 4 observed), indicating a residual ∼1.75-fold difference not accounted for by coverage and consistent with a biological reduction in translocations driven by the elimination of the highly rearranged Clone 1. In contrast, the downsampled duplication count (mean 43; 95% CI: 38.08-48.02) fell below the on-BAT count (56), with the on-BAT value exceeding the bootstrap CI upper bound, indicating that the apparent reduction in duplications is fully attributable to lower sequencing depth in the on-BAT biopsy, with no evidence of net biological loss of duplication-bearing clones. Remarkably, a similar pattern of branched evolution and clonal replacement was observed in Subject 31 ( Fig. 4F ). This tumor was founded by a truncal cluster of SVs (Clone 3) characterized by CHD1 hemizygous deletion, a focal FOXA1 duplication (exon 2-txEnd) and complex genomic duplications involving MYC , FOXA1 , NCOA2 , and RAD21 . BAT treatment coincided with the divergence of two subclonal populations. Clone 1, which harbored BRCA2 , RB1 , and ATM deletions (a candidate compound HR-pathway defect 37 , with the caveats noted in Supplementary Table S2 RB1 row), contracted following therapy ( Fig. 4G , 4H). Conversely, Clone 2 expanded to become the dominant population and was defined by a characteristic set of structural alterations, including multiple ATM inversions predicted to be inactivating, FOXA1 intragenic deletion (exon 2-exon 2), NCOA2 duplication, and RAD51B inversion. In each ATM inversion, one breakpoint mapped within the ATM gene body (txStart-intron5), truncating the coding sequence. Together, these observations demonstrate that mCRPC tumors can evolve through the selective expansion or contraction of SV-defined subclones during therapy. By estimating the cellular prevalence of these events, SVCFit enables inference of clonal evolutionary relationships, distinguishing early truncal SVs in Subject 31, including CHD1 hemizygous deletion and focal FOXA1 duplication, from later-arising subclonal alterations whose prevalence changed during BAT treatment. In Subject 31, the contraction of Clone 1, which harbored BRCA2 , RB1 , and ATM deletions, is consistent with selection against a clone with candidate DNA-repair vulnerability. In contrast, the expansion of Clone 2, despite predicted ATM -inactivating inversions and NCOA2 duplication, suggests that resistance was unlikely to be explained by any single alteration in isolation. Rather, the resistant phenotype may reflect the combined effect of Clone 2 alterations, including concurrent FOXA1 intragenic deletion affecting the forkhead DNA-binding domain and NCOA2 duplication. Although NCOA2 gain would conventionally be expected to enhance AR coactivation, the concurrent FOXA1 alteration could redirect AR/coactivator activity toward an altered transcriptional program rather than simply amplifying the canonical AR-target repertoire associated with BAT sensitivity. These hypotheses require transcriptional and chromatin-state validation, as SVCFit identifies therapy-associated clonal shifts but does not by itself establish which individual SVs are causal mediators of sensitivity or resistance. Consistent with these findings, the on-BAT sample from Subject 31 exhibited a four-fold reduction in translocations, all of which were confined to Clone 1, alongside a modest increase in duplications (pre-BAT: 4 translocations, 16 duplications; on-BAT: 1 translocation, 18 duplications; Fig. 4I , 4J). As both samples had comparable tumor purity (∼0.6), no downsampling was required. Collectively, these cases suggest that BAT can selectively eliminate highly rearranged subclones, while leaving the dominant genomic architecture of resistant disease largely intact. Across the broader COMBAT cohort, SV-based phylogenies resolved branched clonal architecture and assigned candidate sensitive-clone alterations in all 11 evaluable subjects and resistant-clone-specific alterations in 9 of 11, based on the prostate-cancer driver gene list of Hosseini et al. 38 SNV-based phylogenies, even after annotating clustered SNVs against prostate-cancer driver genes, yielded actionable driver-gene information in only one subject (Subject 20, truncal BRCA2 frameshift), provided non-actionable annotations in four additional subjects (6, 10, 12, 47), were subclonal but carried no annotated driver events in four subjects (1, 7, 18, 49), and appeared monoclonal at both timepoints in two subjects (13, 31), the latter two despite massive clonal replacement on the SV phylogeny. Candidate SV-driven resistance mechanisms identified across the cohort included an intronic CUL1 duplication overlapping active distal-enhancer elements with MYC and E2F1 binding sites 35 , 36 (Subject 10), FOXA1 structural remodeling (Subject 31), MDM2 gain 32 , 33 , 39 (Subject 20), PTEN loss (Subject 6), NCOR1 inversion (Subject 13), BRAF gain 32 , 40 with SETD2 loss on the persistent clone (Subject 1), and BRCA2 loss combined with RB1 inversion (Subject 49), where the two events independently disrupt HR-pathway components, with RB1 loss noted as HR-supporting but not equivalent to BRCA1/2 loss 41 (Supplementary Table S2 RB1 row). None of these candidate mechanisms were identified by the corresponding SNV trees. Per-subject phylogenies, the side-by-side SV-versus-SNV comparison, and aggregate cohort summaries are presented in Supplementary Note S7. Literature support for the driver-status of each annotated gene, organized by pathway, with primary references, is compiled in Supplementary Table S2. Discussion While clonal reconstruction in cancer genomics has historically relied on single nucleotide variants for clonal reconstruction 42 , this study demonstrates that genomic structural variants are not merely “co-passengers” but may shape clonal architecture and evolution 6 , 7 , 14 , 15 . By explicitly modeling the SV breakpoint patterns and the copy-number context, SVCFit provides a level of precision in cellular fraction estimation that has been difficult to achieve with bulk sequencing-based approaches. Benchmarking against SVclone 21 shows that failure to account for these structural features can bias clonality estimates, potentially misclassifying truncal events as subclonal or obscuring emerging resistant populations. The application of SVCFit to the COMBAT trial patients offers a high-resolution view of how metastatic castration-resistant prostate cancer (mCRPC) navigates therapeutic bottlenecks. In Subject 31, FOXA1 showed a truncal focal gene-body amplification spanning exon 2 through the transcript end (CN ∼3 pre-BAT; ∼5 on-BAT), while a dominant resistant subclone acquired a small intragenic deletion within exon 2. This suggests that therapeutic pressure during BAT may select for FOXA1 locus remodeling with potential consequences for androgen receptor cistrome regulation. Routine gene-level copy-number reporting would likely summarize FOXA1 as a single dosage state, masking subclonal gene-body alterations that emerge in resistant lineages. Furthermore, the expansion of Clone 2 in Subject 10, characterized by an 11 kb intronic duplication within CUL1 spanning four candidate distal-enhancer elements with MYC and E2F1 binding sites, illustrates how SVCFit can pinpoint non-coding structural rearrangements at cell-cycle and ubiquitin-ligase loci, events that are difficult to detect with SNV-focused pipelines. Interestingly, in both of these cases of early acquired resistance to BAT, we observed eradication of clones carrying multiple HR-pathway alterations: BRCA2 deletion (Subject 31) or compound structural alterations spanning PALB2 , and RAD51B (Subject 10). While homologous recombination repair deficiency (HRD) has been proposed to be a predictive biomarker of response to BAT 43 , 44 , we previously reported that it was not associated with durable response in the COMBAT study 45 . Application of SVCFit to these cases suggests that HRD may in fact be a valid predictive biomarker of response to BAT, but these responses can be limited by tumor HR status heterogeneity enabling outgrowth of resistant HR-proficient clones. Notably, subjects enrolled on the COMBAT trial had progressed on more prior therapies than those on previous trials of BAT 27 , 29 , 30 , 46 , 47 and therefore may have had a higher degree of intra- and intertumoral heterogeneity and plasticity that enables acquired therapy resistance by clonal selection 3 , 48 . We acknowledge the rising utility of single-cell sequencing in resolving tumor heterogeneity; however, technical limitations such as allelic dropout and high costs currently restrict its use in routine clinical practice 49 , 50 . SVCFit complements these approaches by recovering subclone-level resolution of SV-defined clonal architecture from standard bulk WGS, distinguishing truncal from subclonal events and reconstructing branched phylogenies in the cases shown here. This scalability makes SVCFit a viable candidate for integration into molecular tumor boards, where it might help clinicians distinguish stable truncal alterations from emerging resistant subclones. SVCFit depends on the quality and characteristics of upstream SV calling. While the framework performs robustly for simple and moderately complex rearrangements when applied to consensus calls from established short-read SV callers, differences across callers in breakpoint-detection sensitivity, definitions of split-read and discordant-pair support, and internal quality filtering produce different per-SV reference- and alternate-supporting read counts and may therefore yield different downstream SVCFit estimates from the same underlying data; we mitigated this in the COMBAT analysis by using a Manta 51 + Delly 52 + GRIDSS 53 consensus merged with SURVIVOR 26 before genotyping, but a residual sensitivity to caller choice remains. Highly complex processes such as chromoplexy, chromothripsis, and breakage-fusion-bridge cycles generate multi-breakpoint architectures that are not yet explicitly represented in the current model, even when the breakpoints that make up these events are detectable with short-read sequencing. Future extensions will address this by incorporating structure-aware representations of complex SVs and by integrating SNVs, CNVs, and SVs within a unified evolutionary model, with long-read sequencing providing additional breakpoint resolution where available. In conclusion, we demonstrate that SVCFit enables robust reconstruction of SV-defined clonal architecture from genomic data. Although shown here in metastatic prostate cancer, SVCFit is broadly applicable to other malignancies in which structural variation plays a central role. By enabling reconstruction of SV-defined clonal phylogenies from routine bulk sequencing, this framework supports more comprehensive analysis of tumor evolution and reduces the risk that clinically consequential genomic alterations are overlooked in precision oncology. Methods Structural variant cellular fraction estimation Closed-form SVCF in diploid regions SVCFit estimates SVCF from breakpoint and breakend read counts obtained from whole-genome sequencing. For each SV class, SVCF is recovered from the observed breakpoint count (BPC) and breakend count (BEC) using the closed-form expressions summarized in Table 1 of Results, which follow from the SV-class-specific multiplicative identities derived by induction in Supplementary Notes S1 and S4. In diploid regions, R = 2 for all SV classes except tandem duplications, for which R = 2( r − 1)/ r where r is the local total copy number from FACETS. Allele-specific copy number from heterozygous SNPs In non-diploid regions, SVCFit requires the local allele-specific copy number (ASCN) to apply the read-count adjustments in Eqs. 2 -3. For each germline heterozygous SNP within an inferred copy-number segment, ASCN is recovered from its variant allele frequency VAF het : Eq. 6 introduces a directional ambiguity between alternate-allele amplification and reference-allele loss; SVCFit resolves this using the gain/loss classification from FACETS 23 . Per-segment ASCN is taken as the median across the heterozygous SNPs in that segment. Full algebraic derivation is given in Supplementary Note S2. Haplotype-aware SV phasing and zygosity inference Each SV is assigned to a parental haplotype using germline heterozygous SNPs that fall within 500 bp of an SV breakpoint. Tumor BAMs are filtered to breakpoint-supporting reads using samtools 54 (v1.21; -f 1 -F 2), and bcftools 54 mpileup is run at the heterozygous SNP positions over the filtered BAM. Presence of only one of the two heterozygous SNP alleles in the breakpoint pileup identifies a heterozygous SV on the haplotype carrying that allele; presence of both alleles identifies a homozygous SV. Overlapping CNVs are phased by the FACETS allele-specific call. Combining the SV haplotype with the CNV haplotype determines the in-cis versus in-trans relationship, which in turn selects between the two non-diploid update rules below. Across the 11 evaluable COMBAT subjects, 61.3% of SVs (3,128 of 5,181) had at least one germline heterozygous SNP within 500 bp of a breakpoint and were assigned to a parental haplotype. The remaining 38.7% (1,963 of 5,181) could not be phased due to insufficient heterozygous SNP coverage within the 500 bp breakpoint window, a limitation inherent to short-read data at SV breakpoints, and were treated as unphased; for these SVs, zygosity was inferred from copy-number context only. Iterative SVCF estimation in non-diploid regions When an SV overlaps a CNV, the read-count adjustments in Eqs. 2 -3 couple BPC ′ and BEC ′ to SVCF itself. SVCFit therefore applies a fixed-point iteration initialized at SVCF 0 = 0.5. When the breakpoint count is altered (CNV in cis occurring after the SV), the update rule is When the breakend count is altered (CNV preceding an in-cis SV, or CNV in trans), the update rule is Iteration proceeds until | SVCF q +1 − SVCF q | < 10 −6 or 100 iterations, whichever comes first. Both procedures converge to the closed-form fixed points given by Eqs. 4 and 5 of Results; convergence proofs and the formal derivation of the SV-CNV ordering criterion (sign of Eq. 4 ) appear in Supplementary Note S5. Sequencing-depth assumption SVCFit assumes that average read depth at SV breakpoints is approximately equal to the average read depth at the corresponding breakends (Eq. S14, Supplementary Note S1). This was empirically validated in 1,000 simulated clonal tumors at 50× coverage spanning SV tumor purities from 10% to 100% (Supplementary Fig. S1; Pearson correlation ≈ 0; analytical justification in Supplementary Notes S3 and S6). Datasets Synthetic benchmark sets (VISOR) We used the Haplotype-Aware Structural Variants Simulator VISOR 24 (v1.1.3) to generate multi-clonal synthetic tumor genomes (with one matched normal). Somatic structural variants (SVs) with known structural-variant cellular fractions (SVCFs) and copy-number variants (CNVs) were spiked into tumor genomes to explicitly model SV-CNV overlaps in both cis and trans configurations. To optimize compute, simulations were restricted to chromosomes 1 and 2, spanning a range of SV/CNV overlap contexts, tumor purities, and subclonal compositions. For each scenario, we specified a phylogeny with a germline root and a clonal tumor ancestor that bifurcates into two terminal subclones. SVs were simulated on chromosome 1, ordered from the p- to q-arm, and partitioned into three sets assigned to the truncal clone and two subclones (33/33/34 events, respectively). Chromosome 2 was used only as a partner for inter-chromosomal translocations. Restricting the simulation to these chromosomes simplified the controlled design of SV-CNV overlap patterns across tumor purities and subclonal mixtures. Next, we simulated five SV-CNV configurations: (i) SV without CNV; (ii) SV preceding cis amplification; (iii) SV preceding trans amplification; (iv) SV following cis amplification; and (v) SV following trans deletion. Each configuration was crossed with five SV tumor-purity levels (10%, 20%, 40%, 60%, 80%; Eq. 9 ) and three subclonal mixture settings (10:90, 30:70, 50:50), yielding 75 scenarios at 50× mean coverage (one BAM per scenario). Each scenario was simulated as a single multi-clonal genome with 100 spiked-in SVs partitioned among the truncal clone and two terminal subclones (33 / 33 / 34 events, respectively), so that observations from SVs within the same simulated genome are not statistically independent; per-condition statistical comparisons between methods therefore treat each simulated genome as the unit of observation rather than each individual SV, and Supplementary Fig. S2 panels stratify the 75 scenarios by SV class for readability rather than presenting all panels for every SV class at every condition. For each spiked-in SV we retained start/end positions, SV type, and reference/alternate read counts in both tumor and normal BAMs, enabling direct comparison with SVCFit and SVclone 21 . where SVTP is SV tumor purity, N sv is the number of cells containing any somatic SV, and N is the total number of cells in the sample. Tumor purity terminology In this study, we distinguish between two related but distinct concepts of purity. SV tumor purity (SVTP) refers to the fraction of cells in a sample harboring somatic structural variants, the primary quantity modeled and estimated by SVCFit. In contrast, tumor purity (or sample purity ) refers to the fraction of tumor cells in a sample, as estimated by copy-number-based methods such as FACETS 23 . Unless otherwise specified, references to “SV tumor purity” explicitly denote SVTP, whereas references to “tumor purity” denote conventional tumor cellularity. Prostate metastasis mixtures (benchmark set) For direct comparison with SVclone, we generated eleven multi-clonal tumors by mixing BAM files from two single-clone metastatic prostate cancer samples. These samples were originally sequenced in a landmark study of subclonal structure in prostate cancer metastasis 22 and were subsequently used as a key benchmark in the SVclone paper 21 . Raw tumor and matched-normal FASTQs were obtained from EGA (EGAD00001001343) 22 and aligned to hs37d5 with BWA mem 55 (v0.7.19) to generate a single-clone tumor BAM. Then, each single-clone tumor BAM was downsampled in 10% increments of clonal frequency and mixed to create BAMs of heterogeneous samples with known SVCFs and known clonal composition. Nine mixtures yielded three-cluster tumors (cluster 1: SVs shared by both tumors; cluster 2: shared + private SVs from tumor A; cluster 3: shared + private SVs from tumor B). Four- and five-cluster mixtures were generated by subsampling odd and even chromosomes from each metastasis at different rates prior to mixing, as described in Cmero et al 21 . Specifically, the four-cluster mixture combined bM odd / bM even chromosomes at 20% / 60% with gM odd at 40%, yielding ground-truth CCFs of 0.2 (bM-odd), 0.6 (bM-even), 0.4 (gM-odd), and 1.0 (shared clonal); the five-cluster mixture combined bM odd / bM even at 80% / 60% with gM odd / gM even at 20% / 40%, yielding ground-truth CCFs of 0.8, 0.6, 0.2, 0.4, and 1.0 across the four sample-specific subclones plus the shared clonal cluster. Given that purity, mixture proportions, and per-SV clonality are all known quantities, and that SV tumor purity can be approximated as the purity, the true SVCF can be derived for each SV. For clonal SVs, true SVCF equals the mixture’s SV tumor purity (SVTP); for subclonal SVs, true SVCF equals the source tumor’s SVTP times its mixing proportion. SVs were called with Manta 51 (v1.6.0) and genotyped with SVtyper 25 (v0.7.1). For direct comparison with SVclone, SVCF was converted to cancer cell fraction (CCF) via Eq. 1 0. We regenerated the mixture BAM files using the same script described in the SVclone paper( https://github.com/mcmero/SVclone_Rmarkdown/tree/master/make_insilico_mixt ures.sh). We also contacted the first and senior authors to request the original mixture BAM files but did not receive a response. mCRPC subject samples (COMBAT) The COMBAT trial ( NCT03554317 ) tested bipolar androgen therapy (BAT; testosterone cypionate 400 mg intramuscularly every 28 days) for 3 months followed by BAT plus nivolumab in patients with metastatic castration-resistant prostate cancer (mCRPC) who had progressed on at least one prior androgen receptor pathway inhibitor; full eligibility, response criteria, IRB approval (Johns Hopkins University), informed consent, and clinical results are described in Markowski et al 29 . Participants underwent paired biopsies of a soft-tissue metastasis at screening (baseline) and after 3 months of BAT monotherapy (prior to nivolumab addition). Flash-frozen biopsies underwent laser-capture microdissection to enrich for cancer cells, followed by paired-end WGS on a NovaSeq 6000 S4 (2×150 bp; ∼110 Gb per sample) for each biopsy and matched PBMC normal. FASTQs were trimmed with trim_galore 56 (v0.6.10), aligned to GRCh38 with BWA mem 55 (v0.7.19), and processed with GATK4 57 (MarkDuplicates, BaseRecalibrator, ApplyBQSR; v4.6.2.0). Data processing Somatic structural variant calling For both benchmark datasets, somatic SVs were called with Manta 51 (v1.6.0) using tumor-normal pairs with default parameters. For COMBAT, somatic SVs were called using Manta (v1.6.0), Delly 52 (v1.5.0), and GRIDSS 53 (v2.13.2) with default parameters, each provided with the tumor BAM and its matched normal. For SV genotyping we used SVtyper 25 (v0.7.1), which was selected to decouple SV discovery from breakpoint-resolved read-evidence quantification. Modern somatic SV pipelines achieve their best precision-recall by combining multiple discovery callers: Manta, Delly, and GRIDSS each exploit complementary evidence (assembly-based, paired-end + split-read, and breakend-assembly-with-rule-based-filtering, respectively), and consensus across two or more of these callers consistently outperforms any single tool, but the discovery callers report inconsistent and non-comparable read-support metrics, making their raw output unsuitable for VAF-based downstream analyses. SVtyper provides a uniform, per-breakpoint quantification using a Bayesian likelihood model over discordant paired-end and split-read evidence, the same evidence classes used by the discovery callers themselves, producing per-SV reference-supporting (RO), alternate-supporting (AO), and allele-balance (AB) values directly grounded in breakpoint geometry. These values are the per-SV reference/alternate counts required as input by Eq. 1 of the main text and the per-class identities in Supplementary Note S1. The Manta + Delly + GRIDSS → SURVIVOR 26 → SVtyper workflow we adopt for the COMBAT cohort is an established pattern for cancer somatic SV analysis, used for example in the BCL11B enhancer-hijacking study in lineage-ambiguous stem-cell leukemia 58 . Alternative breakpoint genotypers, caller-specific built-in genotypers, or graph-based germline-population genotypers, are restricted to one caller’s VCF format or are optimized for population-scale germline rather than per-sample somatic VAF. Because SVtyper expects a LUMPY-formatted VCF with INFO fields MATEID, END, CIPOS, CIEND, and SVTYPE, we manually appended CIPOS and CIEND to the VCF outputs from GRIDSS, Delly, and Manta to ensure compatibility. Finally, for COMBAT, all genotyped VCF files were harmonized using SURVIVOR 26 (v1.0.7) to produce a consensus VCF prior to cellular fraction analysis. Rescue of key driver events Using default parameters, SVtyper detected the canonical prostate cancer fusion TMPRSS2-ERG 12 , 13 in the pre-BAT biopsy of COMBAT Subject 10 (tumor purity 68%; breakpoint coverage 95), but not in the on-BAT biopsy, which had substantially lower tumor purity and reduced read depth at the breakpoint (purity 36.6%; breakpoint coverage 31). Given that TMPRSS2-ERG is a well-established early event in prostate tumorigenesis and is rarely lost, we manually inspected the on-BAT BAM file and identified a single read supporting the TMPRSS2-ERG fusion (deletion at chr21:38,499,931-41,501,273). While this read alone is insufficient to confidently call the fusion, ERG protein expression was positive by immunohistochemistry in both the pre- and on-BAT biopsies, a pattern strongly suggestive of an underlying ERG rearrangement, most commonly TMPRSS2-ERG . Together with its robust detection by WGS in the pre-BAT sample, these orthogonal data support TMPRSS2-ERG as a clonal event, and we therefore placed it on the trunk of the inferred phylogeny. Similarly, TMPRESS2-ERG was also rescued for subject 1, 6, 12, 13. In subject 20, PTEN del and RAD51B inv were initially filtered out because it failed the 2 called criteria but both SVs were detected in pre- and on-BAT samples using delly and were recovered. In addition, PTEN loss is confirmed to be present at both pre- and on-BAT sample via IHC data. Germline heterozygous SNP detection Germline SNPs were identified from matched normal BAMs using GATK4 57 HaplotypeCaller (v4.6.2.0; default parameters) and filtered to heterozygous sites using bcftools 54 (v1.20). The downstream use of these heterozygous SNPs for SV phasing and ASCN inference is described in the “Structural variant cellular fraction estimation” subsection above. Somatic copy-number variant detection CNVs overlapping SVs were called with FACETS 23 (v0.6.2). FACETS first generates a SNP pileup file using tumor and matched-normal BAMs and SNPs detected from GATK4 57 HaplotypeCaller (v4.6.2.0; default parameters); then coverage and the germline B-allele frequencies (BAFs) were computed from the SNPs pileup to detect CNV. FACETS SNP pileups were generated with: -g -q15 -Q20 -P100 -r25,0. Dependence on upstream SV calling SVCFit consumes SV breakpoint coordinates and per-SV reference- and alternate-supporting read counts produced by upstream SV calling and breakpoint genotyping; it does not itself perform SV discovery. Different SV callers (Manta 51 , Delly 52 , GRIDSS 53 , and others) differ in breakpoint-detection sensitivity, in their definitions of split-read versus discordant-pair support, and in their internal quality filters. These differences propagate into the per-SV reference- and alternate-supporting read counts that feed Eq. 1 and the per-class identities of Supplementary Note S1, and can therefore influence downstream SVCF estimates. To mitigate caller-specific idiosyncrasies in the COMBAT analysis we used a multi-caller consensus (Manta, Delly, GRIDSS) merged with SURVIVOR 26 before genotyping; for the VISOR and prostate-mixture benchmarks, SVs were called with Manta only to match the SVclone benchmark configuration. We discuss the implications of caller dependence further in the Discussion. SVCF comparison and benchmarking SVCFit requires inputs including: SV positions and types, SV read counts (SVtyper), CNV data (FACETS), and germline heterozygous SNP pileups (GATK4 and bcftools). SVCF per SV was computed as defined in the Results ( Eq. 1 ; Table 1 ) with non-diploid corrections from Eqs. 2 -5. SVclone (v1.1.2) was obtained from on March 10, 2024. SVclone was run with default parameters using as input the SV genomic coordinates, VISOR-specified tumor purity and ploidy from the simulations, and the tumor BAM file. Benchmarking on VISOR simulations For each of the 75 VISOR simulation scenarios, structural variants (SVs) were identified using Manta and subsequently genotyped with SVtyper to obtain reference and alternate supporting read counts. Heterozygous single-nucleotide variants (SNVs) were detected using GATK4 HaplotypeCaller and further processed with bcftools. Copy-number variants (CNVs) were inferred with FACETS. Additionally, tumor purity and ploidy estimates from FACETS, along with the simulated mean sequencing depth (50×), were supplied as input to SVclone. To quantify uncertainty in all performance metrics, we generated 30 independent bootstrap replicates of each scenario by resampling reads with different random seeds and ran the full SVCFit pipeline (Manta, SVtyper, GATK HaplotypeCaller, FACETS) and SVclone independently on each replicate. The ground-truth SVCF values derived from the simulation were compared to estimates produced by SVCFit and SVclone. Error was defined as: We tested for a difference in median error between methods using a two-sided paired Wilcoxon signed-rank test with Benjamini-Hochberg correction for multiple testing. A BH-adjusted p-value < 0.05 was considered statistically significant. Benchmarking on prostate metastasis mixtures SVs were called with Manta and genotyped with SVtyper on the mixture BAMs. SVs were filtered following Cmero et al. 21 : fewer than one supporting read in the matched normal, more than two supporting reads in at least one tumor sample, CCF capped at 1.0, and PASS calls only. SVCF estimates from SVCFit were converted to cancer cell fraction (CCF) using Eq. 1 0 (Methods), with tumor purity taken from Hong et al. (2015) 22 , which reported purity estimates for the source metastases. Error was defined as Error = CCF true − CCF est (positive values indicate underestimation), where the true CCF is computed from the known mixing proportions. To quantify uncertainty, we generated 30 independent replicates of each of the eleven mixture conditions by subsampling reads with different random seeds and ran the full SVCFit pipeline (Manta, SVtyper, GATK HaplotypeCaller, FACETS) independently on each replicate. SVclone was run on the same eleven non-subsampled mixtures using its standard pipeline (CCube clustering, Battenberg copy number); per-SV CCF was computed analytically from the observed read counts, sample purity, and SVclone’s inferred copy-number genotypes using MapVaf2CcfPyClone, following the procedure that produced the SVclone publication’s Figure 2C . For each SVCFit replicate-SVclone pairing, mean signed CCF error was computed per mixture condition. Per-condition differences between methods were tested using a two-sided paired Wilcoxon signed-rank test on the 30 paired replicate mean errors, with Benjamini-Hochberg correction across the eleven mixture conditions. To ensure a methodologically fair head-to-head comparison, errors were computed on the intersection of SVs detected and truth-matched by both methods; results on each method’s full call set are also reported in Results. Longitudinal self-evaluation simulations To evaluate downstream phylogenetic inferences, we generated paired pre- and post-treatment whole-genome simulations with VISOR 24 following a single S1 scenario: a truncal clone (CCF = 1.0 at both timepoints) and two subclonal children whose CCFs cross under simulated therapeutic selection (one declining from CCF 0.83 → 0.17 and one expanding from 0.17 → 0.83). Each replicate carried 33 truncal SVs and 33 + 34 subclonal SVs on the two terminal subclones, drawn from a mixture of deletions, inversions, tandem duplications, and translocations, with reads simulated at 50× mean coverage and a matched normal at 50×. SV tumor purity was swept over {20%, 40%, 60%, 80%} with 25 paired pre/post replicates per purity (boots 0-4 × five experimental designs; n = 24 at 40% purity due to one failed to build tree). The full SVCFit pipeline (BWA-MEM 55 v0.7.19 alignment to GRCh38, Manta 51 v1.6.0 SV calling, SVtyper 25 v0.7.1 genotyping, GATK4 57 v4.6.2.0 HaplotypeCaller germline SNP calling, FACETS 23 v0.6.2 copy number inference) was run independently on each replicate at default SVCFit parameters with no per-scenario tuning (DP-GMM concentration α = 1). For each replicate we computed three metrics: per-SV truncal/subclonal balanced accuracy and F1 (truncal = positive class) at each timepoint, per-clone signed CCF error (true − inferred CCF, positive = underestimation), and exact rooted-topology recovery against the S1 ground-truth tree. Per-purity summaries report mean and bootstrap 95% confidence intervals across the 24-25 replicates per purity. Full per-purity numerical results, the multi-covariate analysis of per-clone signed CCF residuals, and the operating-range conclusions are presented in Supplementary Note S8. SV Longitudinal Clustering and Dynamics Structural variants (SVs) in the COMBAT dataset were clustered on a per-subject basis using Dirichlet Process Gaussian Mixture Models (DP-GMM). DP-GMM inference was performed using the scikit-learn library 59 in R via the reticulate interface 60 . For clustering, all SVCF was converted to CCF using purity estimates from FACETS 23 ( Eq. 1 0). SVs detected at only one time point were assigned a CCF of 0 at the other time point. To avoid bias in DP-GMM clustering, we retained only the unique CCF pair observed across the two time points. Two features were then derived for clustering: the pre-BAT CCF and the on-BAT CCF. DP-GMM models were fitted using a maximum of 10 components (kmax = 10), a Dirichlet process concentration parameter of 1 (weight_concentration_prior = 1), and a covariance regularization term of reg_covar = 1e−6, covariance_type = “full”, kmax=10, init_params = “kmeans++”. Both cluster-merging cutoffs are motivated as follows. Five-SV minimum per component Three complementary considerations justify this floor. First, algorithmic stability under 2D dimensionality : SVs are clustered jointly in the two-dimensional space of (pre-BAT CCF, on-BAT CCF), so the mathematical minimum for a non-singular empirical covariance matrix is N = D + 1 = 3 non-collinear points; furthermore, a full 2×2 covariance matrix requires estimating three parameters (σ 2 _pre, σ 2 _on, σ_pre,on) rather than one, substantially increasing susceptibility to overfitting at low N. At 3-4 points, the DP-GMM Bayesian prior overwhelmingly dictates covariance matrix orientation, producing ill-conditioned estimates and triggering aggressive component pruning during Expectation-Maximization (EM); five points is the absolute bare minimum at which 2D covariance estimation is statistically feasible without numerical failure. Second, biological signal vs. technical noise in multi-sample data : SV calling carries inherent noise from sequencing artifacts, mapping anomalies, and read-depth fluctuations; in 2D clustering, spurious artifact calls at similar CCF across both timepoints can masquerade as subclonal clusters. A putative subclone supported by fewer than five SVs lacks the evidentiary footprint to be distinguished from multi-dimensional background noise. Third, consistency with published practice : minimum-variant-per-cluster thresholds of 5-10 are standard filtering heuristics across published tumor subclonal-reconstruction pipelines 61 , 62 ; our floor of 5 in 2D is consistent with this convention, representing the conservative (lower) end of the established range. Clusters falling below five SVs were merged with their nearest neighbor by Euclidean distance. 0.2 Euclidean-distance threshold At low tumor purity (≤ 40%), the observed CCF separation between subclones with a true CCF difference of ∼0.30 collapses to ∼0.12, below DP-GMM’s resolving power. The 0.2 threshold was chosen to balance two regimes: large enough to consolidate spurious additional splits driven by SV-type CCF noise (high-purity spurious sub-clusters sit ≥ 0.5 from the parent in (pre, on) CCF space and are preserved as separate by this threshold), while small enough to leave genuine subclonal separations of ∼0.30 intact. Both thresholds were fixed prior to formal benchmarking and were not tuned per scenario. Alluvial plots were constructed using the R package ‘ggalluvial’ 63 . Circos plots were constructed using the R package ‘circlize’ 64 . Clonal trees and pie plots were created with SVCFit software. SV Tumor Evolution Tree Construction and Annotation SV clusters were ordered onto the edges of an evolutionary tree. Candidate directed edges between clusters were first identified by enforcing lineage precedence, which required that descendant CCFs not exceed ancestral CCFs by more than a tolerance ε1. Valid edges were assembled into candidate trees using a modified Gabow-Myers algorithm 65 , following the pseudocode of Popic et al. 66 . Briefly, the original Gabow-Myers procedure enumerated all directed spanning trees of a directed graph rooted at the ancestral node, and the modification constrained enumeration to edges that satisfied the lineage-precedence filter. Tree topologies that violated the Sum Condition 67 , where the sum of descendant CCFs exceeded the ancestral CCF by more than ε2, were excluded. We used ε1=0.2 and ε2=0.2. The tree reconstruction approach did not assume clock-like evolution or edge lengths proportional to time. The remaining trees were ranked using the SCHISM fitness function 68 , which assigned penalties for violations of lineage precedence and the Sum Condition. These penalties were combined as a weighted sum within the exponent of a negative exponential scoring function, such that trees with fewer or smaller violations received higher fitness scores. Equal weights were assigned to the two violation terms (weights = 1). Per-sample clonal proportions were then inferred from the top-ranked tree, where the proportion of a parent clone was computed as its CCF minus the sum of the CCFs of its children: where CCF 𝑛 is the CCF of the cluster associated with node n. CCF 𝐴(𝑛) is the CCF of the cluster directly upstream of node n. ∑ q ∈𝐷(𝑛) 𝐶 𝐶𝐹 q is the sum of CCFs of clusters directly downstream of node n. To annotate the edges of the trees, we first identified a set of prostate cancer driver genes and their hg38 coordinates from Hosseini et al. 38 and web search. Custom R scripts were used to identify driver gene overlap with the coordinates of the SVs called in each subject, including nearby upstream regions. Literature support for the driver-status of each gene that appears on a tree edge in Fig. 4 or in Supplementary Note S7 is compiled in Supplementary Table S2, organized by pathway with primary references. When annotating tree edges, “deletions” and “duplications” refer to events defined from SV breakpoint evidence rather than read-depth copy-number calls. Hemizygous deletion is annotated only when additionally supported by copy-number evidence (CN=1), and intragenic duplication/deletion only when breakpoints fall within the gene body. Structural Variant Down-sampling Validation To control for differences in mean sequencing depth between the pre-BAT (38×) and on-BAT (19×) biopsies from Subject 10, we performed a down-sampling validation. The pre-BAT BAM file was subsampled using samtools view -s to reduce the total read count so that the effective tumor coverage matched that of the on-BAT sample. Structural variant calling was re-run on this down-sampled dataset using the identical pipeline. To quantify sampling variability, the downsampling was repeated across 20 bootstrap replicates using distinct random seeds; mean SV counts and 95% CIs across replicates are reported in the Results. IHC analysis ERG IHC was performed on an automated staining platform (Ventana/Roche) using a validated IHC assay as described previously 69 using formalin-fixed, paraffin-embedded tumor sections were that were taken adjacent to the fresh frozen tissue used for the whole genome sequencing. Statistical analysis Unless stated otherwise, analyses used the two-sided paired Wilcoxon signed-rank test (α = 0.05). All p-values are corrected for multiple testing using the Benjamini-Hochberg method, with significance assessed against BH-adjusted p-values. Medians and interquartile ranges were reported where appropriate. The function wilcox_test from the R package rstatix (v0.7.2) 70 was used to apply the statistical test, and the R package ggpubr (v0.6.0) 71 was used to visualize statistical significance. Data availability All open access data is accessible through Mendeley (10.17632/2nhhdjx225.4) Code Availability https://github.com/KarchinLab/SVCFit/ MIT license Author Contributions Y.L. and R.K. conceived the study. Y.L. and R.K. developed the methodology. Y.L. implemented the software and performed the computational analyses. J.L. and Y.Y. contributed to data analysis and figure generation. M.C.M., E.S.A., A.M.D.M., S.Y., L.D.W., and L.A.S. provided clinical interpretation, biological insight, and domain expertise. L.A.S. coordinated access to clinical data and resources. Y.L., L.A.S., and R.K. wrote the initial manuscript. All authors contributed to manuscript review and editing. R.K. supervised the study. Competing Interests L.A.S. receives funding and resources to her institution from Panbela Therapeutics and Abbott Lingo for research that is not relevant to this work. E.S.A. reports grants and personal fees from Janssen, Johnson & Johnson, Sanofi, Bayer, Bristol Myers Squibb, Convergent Therapeutics, Curium, MacroGenics, Merck, Pfizer, and AstraZeneca; personal fees from Aadi Bioscience, Abeona Therapeutics, Aikido Pharma, Astellas, Amgen, Blue Earth, Boundless Bio, Corcept Therapeutics, Duality Bio, Exact Sciences, Hookipa Pharma, Invitae, Eli Lilly, Foundation Medicine, Menarini-Silicon Biosystems, Tango Therapeutics, Tempus, Tolmar Scientific, VIR Biotechnology, and Z-alpha; grants from Novartis, Celgene, and Orion; and has a patent for an AR-V7 biomarker technology that has been licensed to Qiagen. S.Y. reports grants from the NIH/NCI and Commonwealth Foundation during the conduct of the study, as well as grants from Janssen and Celgene/Bristol Myers Squibb, other support from Brahm Astra Therapeutics and Digital Harmonic, and grants and personal fees from Cepheid, all outside the submitted work. R.K. receives royalty distributions through Johns Hopkins Technology Ventures from licensing agreements with Exact Sciences Corp. (formerly Thrive Earlier Detection Corp.), Genentech Corp., Agios Pharmaceuticals, Inc., and Servier Pharmaceuticals. None of these arrangements relate to the work presented in this manuscript. Acknowledgements This work was supported by The Prostate Cancer Foundation, the Lustgarten Foundation (Y.L. L.D.W., R.K.), CDMRP/DOD grant W81XWH-21-PCARP-IDA (Y.L. L.D.W., R.K.), and NIH-NCI grants U01CA271273 (J.L., L.D.W., R.K.), P30CA006973 (S.Y, A.M.D.M) and P50CA272391 (S.Y., A.M.D.M), and Break Through Cancer, Cambridge, MA (to R.K., L.D.W., J.L.). E.S.A. is partially supported by NCI grant P30 CA077598 and DOD grant W81XWH-22-2-0025. M.C.M is supported by DOD grant W81XWH-19-1-0692. Special thanks to Dr. Winston Timp and Dr. Benjamin Langmead for their support of this project in its early days. Funder Information Declared Prostate Cancer Foundation, https://ror.org/01npwtv09 Lustgarten Foundation, https://ror.org/030635250 CDMRP/DOD , W81XWH-21-PCARP-IDA National Cancer Institute , U01CA271273 , P30 CA077598 , P30CA006973 , P50CA272391 Break Through Cancer United States Department of Defense , W81XWH-22-2-0025 , W81XWH-19-1-0692 Footnotes This version of the manuscript has been substantially revised from the January 2026 submission in response to peer review. The SVCFit framework, including the SVCF definition, the closed-form per-class estimators, and the non-diploid framework with iterative SVCF estimation, has been promoted from the Supplementary Notes into the main-text Results (new SVCFit framework subsection with five new equations and Table 1) and Methods. The signed cellular-fraction error metric (positive values indicate underestimation) replaces the absolute-error metric used previously, exposing per-SV-type direction of bias and enabling direct comparison with the SVclone published convention. The benchmarking has been strengthened on multiple axes. The VISOR simulation benchmark now spans 75 scenarios (five SV-CNV overlap configurations, five SV tumor purities at 10%, 20%, 40%, 60%, and 80%, and three subclonal mixture settings), with each simulated genome treated as the unit of observation. The in-silico prostate-mixture benchmark uses 30 independent bootstrap replicates per condition. All per-condition comparisons against SVclone use two-sided paired Wilcoxon signed-rank tests with Benjamini-Hochberg correction, and 95% confidence intervals and significance markers are reported throughout. A dedicated longitudinal self-evaluation of downstream phylogenetic inferences has been added (new Results subsection, new Methods subsection, new Supplementary Note S8, and four new supplementary figures). Paired pre- and post-treatment whole-genome simulations across four SV tumor purities (n = 24-25 replicates per purity, default parameters) quantify per-SV truncal/subclonal balanced accuracy (0.79 at 20% purity rising to 0.89 at 80% purity), per-clone CCF signed error, and rooted tree-topology recovery (83 to 88% across the purity range). The COMBAT longitudinal analysis has been substantially expanded from two patient cases to the 11 evaluable subjects of the COMBAT trial, with parallel SV-based and SNV-based phylogenies presented per subject in Supplementary Note S7. SV-based phylogenies resolved branched clonal architecture in 11 of 11 subjects, while SNV-based phylogenies provided clinically actionable annotations in only 1 of 11. The Subject 10 Clone 2 duplication is now reported as an 11 kb intronic event within CUL1 (chr7:148,755,369-148,766,429, hg38), not promoter-proximal to EZH2 as originally described, with the mechanistic interpretation updated accordingly. The Subject 31 tandem duplication on chromosome 13 was withdrawn because the directly relevant orthogonal copy-number test was negative. Clone and cluster terminology has been clarified throughout, overreaching longitudinal claims have been tempered, and a new driver-gene citation table (Supplementary Table S2) compiles literature support for every gene annotated on the cohort phylogenies. https://data.mendeley.com/datasets/2nhhdjx225/4 https://github.com/KarchinLab/SVCFit/ Reference 1. ↵ Venkatesan , S. , Swanton , C. , Taylor , B. S. & Costello , J. F . Treatment-induced Mutagenesis and selective pressures sculpt cancer evolution . Cold Spring Harb Perspect Med 7 , a026617 ( 2017 ). 2. ↵ Negrini , S. , Vassilis , G. & Thanos , D . Genomic Instability - Evolving Hallmark Cancer . Nat. Rev. Mol. Cell Biol . 3. ↵ Rhinehart , D. P. et al. Intratumoral heterogeneity drives acquired therapy resistance in a patient with metastatic prostate cancer. NPJ Precis . Oncol . 8 , 275 ( 2024 ). 4. ↵ Nowell , P. C . The clonal evolution of tumor cell populations . Science 194 , 23 – 28 ( 1976 ). OpenUrl Abstract / FREE Full Text 5. ↵ Mardis , E. R . The impact of next-generation sequencing on cancer genomics: From discovery to clinic . Cold Spring Harb Perspect Med 9 , a036269 ( 2019 ). 6. ↵ Li , Y. et al. Patterns of somatic structural variation in human cancer genomes . Nature 578 , 112 – 121 ( 2020 ). OpenUrl CrossRef PubMed 7. ↵ Hamdan , A. & Ewing , A . Unravelling the tumour genome: The evolutionary and clinical impacts of structural variants in tumourigenesis . J Pathol 257 , 479 – 493 ( 2022 ). OpenUrl CrossRef PubMed 8. ↵ Hurles , M. E. , Dermitzakis , E. T. & Tyler-Smith , C . The functional impact of structural variation in humans . Trends Genet 24 , 238 – 245 ( 2008 ). OpenUrl CrossRef PubMed Web of Science 9. ↵ Scott , A. J. , Chiang , C. & Hall , I. M . Structural variants are a major source of gene expression differences in humans and often affect multiple nearby genes . Genome Res 31 , 2249 – 2257 ( 2021 ). OpenUrl Abstract / FREE Full Text 10. ↵ Yoshimoto , M. et al. Absence of TMPRSS2:ERG fusions and PTEN losses in prostate cancer is associated with a favorable outcome . Mod. Pathol . 21 , 1451 – 1460 ( 2008 ). OpenUrl CrossRef PubMed Web of Science 11. Abeshouse , A. et al. The Molecular Taxonomy of Primary Prostate Cancer . Cell 163 , 1011 – 1025 ( 2015 ). OpenUrl CrossRef PubMed 12. ↵ Tomlins , S. A. et al. Recurrent fusion of TMPRSS2 and ETS transcription factor genes in prostate cancer . Science 310 , 644 – 648 ( 2005 ). OpenUrl Abstract / FREE Full Text 13. ↵ Weier , C. et al. Nucleotide resolution analysis of TMPRSS2 and ERG rearrangements in prostate cancer . J. Pathol . 230 , 174 – 183 ( 2013 ). OpenUrl CrossRef PubMed 14. ↵ Quigley , D. A. et al. Genomic Hallmarks and Structural Variation in Metastatic Prostate Cancer . Cell 174 , 758 – 769 .e9 ( 2018 ). OpenUrl CrossRef PubMed 15. ↵ Viswanathan , S. R. et al. Structural Alterations Driving Castration-Resistant Prostate Cancer Revealed by Linked-Read Genome Sequencing . Cell 174 , 433 – 447 .e19 ( 2018 ). OpenUrl CrossRef PubMed 16. ↵ Aran , D. , Sirota , M. & Butte , A. J . Systematic pan-cancer analysis of tumour purity . Nat Commun 6 , 8971 ( 2015 ). OpenUrl CrossRef PubMed 17. ↵ Taylor , A. M. et al. Genomic and functional approaches to understanding cancer aneuploidy . Cancer Cell 33 , 676 – 689 .e3 ( 2018 ). OpenUrl CrossRef PubMed 18. ↵ Satas , G. , Zaccaria , S. , El-Kebir , M. & Raphael , B. J . DeCiFering the elusive cancer cell fraction in tumor heterogeneity and evolution . Cell Syst 12 , 1004 – 1018 .e10 ( 2021 ). OpenUrl PubMed 19. Zheng , L. , Niknafs , N. , Wood , L. D. , Karchin , R. & Scharpf , R. B . Estimation of cancer cell fractions and clone trees from multi-region sequencing of tumors . Bioinformatics 38 , 3677 – 3683 ( 2022 ). OpenUrl CrossRef PubMed 20. ↵ Roth , A. et al. PyClone: statistical inference of clonal population structure in cancer . Nat Methods 11 , 396 – 398 ( 2014 ). OpenUrl CrossRef PubMed Web of Science 21. ↵ Cmero , M. et al. Inferring structural variant cancer cell fraction . Nat Commun 11 , 730 ( 2020 ). 22. ↵ Hong , M. K. H. et al. Tracking the origins and drivers of subclonal metastatic expansion in prostate cancer . Nat Commun 6 , 6605 ( 2015 ). OpenUrl CrossRef PubMed 23. ↵ Shen , R. & Seshan , V. E . FACETS: allele-specific copy number and clonal heterogeneity analysis tool for high-throughput DNA sequencing . Nucleic Acids Res 44 , e131 – e131 ( 2016 ). OpenUrl CrossRef PubMed 24. ↵ Bolognini , D. et al. VISOR: a versatile haplotype-aware structural variant simulator for short- and long-read sequencing . Bioinformatics 36 , 1267 – 1269 ( 2020 ). OpenUrl CrossRef PubMed 25. ↵ Chiang , C. et al. SpeedSeq: ultra-fast personal genome analysis and interpretation . Nat Methods 12 , 966 – 968 ( 2015 ). OpenUrl CrossRef PubMed 26. ↵ Jeffares , D. C. et al. Transient structural variations have strong effects on quantitative traits and reproductive isolation in fission yeast . Nat. Commun . 8 , 14061 ( 2017 ). 27. ↵ Schweizer , M. T. et al. Effect of bipolar androgen therapy for asymptomatic men with castration-resistant prostate cancer: Results from a pilot clinical study . Sci. Transl. Med . 7 , 269r a2 ( 2015 ). OpenUrl 28. ↵ Denmeade , S. R. , Sena , L. A. , Wang , H. , Antonarakis , E. S. & Markowski , M. C . Bipolar Androgen Therapy Followed by Androgen Receptor Inhibition as Sequential Therapy for Prostate Cancer . The Oncologist 28 , 465 – 473 ( 2023 ). OpenUrl CrossRef PubMed 29. ↵ Markowski , M. C. et al. Bipolar androgen therapy plus nivolumab for patients with metastatic castration-resistant prostate cancer: the COMBAT phase II trial . Nat. Commun . 15 , 14 ( 2024 ). 30. ↵ Denmeade , S. R. et al. TRANSFORMER: A Randomized Phase II Study Comparing Bipolar Androgen Therapy Versus Enzalutamide in Asymptomatic Men With Castration-Resistant Metastatic Prostate Cancer . J. Clin. Oncol. Off. J. Am. Soc. Clin. Oncol . 39 , 1371 – 1382 ( 2021 ). OpenUrl 31. ↵ Lai , J. et al. Reconstructing clone-resolved transcriptional programs from bulk tumor sequencing . 2026.01.15.699695 Preprint at doi: 10.64898/2026.01.15.699695 ( 2026 ). OpenUrl Abstract / FREE Full Text 32. ↵ Sondka , Z. et al. The COSMIC Cancer Gene Census: describing genetic dysfunction across all human cancers . Nat. Rev. Cancer 18 , 696 – 705 ( 2018 ). OpenUrl CrossRef PubMed 33. ↵ Oliner , J. D. , Saiki , A. Y. & Caenepeel , S . The Role of MDM2 Amplification and Overexpression in Tumorigenesis . Cold Spring Harb. Perspect. Med . 6 , a026336 ( 2016 ). 34. ↵ Moore , J. E. et al. Expanded encyclopaedias of DNA elements in the human and mouse genomes . Nature 583 , 699 – 710 ( 2020 ). OpenUrl CrossRef PubMed 35. ↵ O’Hagan , R. C. et al. Myc-enhanced expression of Cul1 promotes ubiquitin-dependent proteolysis and cell cycle progression . Genes Dev . 14 , 2185 – 2191 ( 2000 ). OpenUrl Abstract / FREE Full Text 36. ↵ Yang , G. et al. Elevated Skp2 Protein Expression in Human Prostate Cancer: Association with Loss of the Cyclin-dependent Kinase Inhibitor p27 and PTEN and with Reduced Recurrence-free Survival1 . Clin. Cancer Res . 8 , 3419 – 3426 ( 2002 ). OpenUrl Abstract / FREE Full Text 37. ↵ Chakraborty , G. et al. Attenuation of SRC Kinase Activity Augments PARP Inhibitor-mediated Synthetic Lethality in BRCA2-altered Prostate Tumors . Clin. Cancer Res. Off. J. Am. Assoc. Cancer Res . 27 , 1792 – 1806 ( 2021 ). OpenUrl 38. ↵ Hosseini , N. et al. Divergent Clonal Evolution and Early Dissemination Promote Genetic Heterogeneity of Metastases in Castration-Resistant Prostate Cancer . Cancer Res . 85 , 4251 – 4268 ( 2025 ). OpenUrl PubMed 39. ↵ Chopra , H. et al. Activation of p53 and destabilization of androgen receptor by combinatorial inhibition of MDM2 and MDMX in prostate cancer cells . Oncotarget 9 , 6270 – 6281 ( 2018 ). OpenUrl PubMed 40. ↵ Chehrazi-Raffle , A. et al. Unique Spectrum of Activating BRAF Alterations in Prostate Cancer . Clin. Cancer Res. Off. J. Am. Assoc. Cancer Res . 29 , 3948 – 3957 ( 2023 ). OpenUrl 41. ↵ Vélez-Cruz , R. et al. RB localizes to DNA double-strand breaks and promotes DNA end resection and homologous recombination through the recruitment of BRG1 . Genes Dev . 30 , 2500 – 2512 ( 2016 ). OpenUrl Abstract / FREE Full Text 42. ↵ Turajlic , S. , Sottoriva , A. , Graham , T. & Swanton , C . Resolving genetic heterogeneity in cancer . Nat. Rev. Genet . 20 , 404 – 416 ( 2019 ). OpenUrl CrossRef PubMed 43. ↵ Chatterjee , P. et al. Supraphysiological androgens suppress prostate cancer growth through androgen receptor-mediated DNA damage . J. Clin. Invest . 129 , 4245 – 4260 ( 2019 ). OpenUrl CrossRef PubMed 44. ↵ Markowski , M. C. et al. Molecular and Clinical Characterization of Patients With Metastatic Castration Resistant Prostate Cancer Achieving Deep Responses to Bipolar Androgen Therapy . Clin. Genitourin. Cancer 20 , 97 – 101 ( 2022 ). OpenUrl PubMed 45. ↵ Sena , L. A. et al. Androgen receptor activity in prostate cancer dictates efficacy of bipolar androgen therapy through MYC . J. Clin. Invest . 132 , e162396 ( 2022 ). OpenUrl CrossRef PubMed 46. ↵ Markowski , M. C. et al. A Multicohort Open-label Phase II Trial of Bipolar Androgen Therapy in Men with Metastatic Castration-resistant Prostate Cancer (RESTORE): A Comparison of Post-abiraterone Versus Post-enzalutamide Cohorts . Eur. Urol . 79 , 692 – 699 ( 2021 ). OpenUrl PubMed 47. ↵ Teply , B. A. et al. Bipolar androgen therapy in men with metastatic castration-resistant prostate cancer after progression on enzalutamide: an open-label, phase 2, multicohort study . Lancet Oncol . 19 , 76 – 86 ( 2018 ). OpenUrl CrossRef PubMed 48. ↵ Quintanal-Villalonga , Á. et al. Lineage plasticity in cancer: a shared pathway of therapeutic resistance . Nat. Rev. Clin. Oncol . 17 , 360 – 371 ( 2020 ). OpenUrl CrossRef PubMed 49. ↵ Kim , N. , Eum , H. H. & Lee , H.-O . Clinical Perspectives of Single-Cell RNA Sequencing . Biomolecules 11 , 1161 ( 2021 ). OpenUrl PubMed 50. ↵ Ren , X. , Kang , B. & Zhang , Z . Understanding tumor ecosystems by single-cell sequencing: promises and limitations . Genome Biol . 19 , 211 ( 2018 ). 51. ↵ Chen , X. , et al. Manta: rapid detection of structural variants and indels for germline and cancer sequencing applications . Bioinformatics 32 , 1220 – 1222 ( 2016 ). OpenUrl CrossRef PubMed 52. ↵ Rausch , T. et al. DELLY: structural variant discovery by integrated paired-end and split-read analysis . Bioinformatics 28 , i333 – i339 ( 2012 ). OpenUrl CrossRef PubMed Web of Science 53. ↵ Cameron , D. L. et al. GRIDSS2: comprehensive characterisation of somatic structural variation using single breakend variants and structural variant phasing . Genome Biol 22 , 202 ( 2021 ). 54. ↵ Danecek , P. et al. Twelve years of SAMtools and BCFtools . GigaScience 10 , giab008 ( 2021 ). 55. ↵ Li , H. & Durbin , R . Fast and accurate short read alignment with Burrows-Wheeler transform . Bioinformatics 25 , 1754 – 1760 ( 2009 ). OpenUrl CrossRef PubMed Web of Science 56. ↵ Krueger , F. , et al. FelixKrueger/TrimGalore: v0.6.10 - add default decompression path (0.6.10) . Zenodo ( 2023 ). 57. ↵ Van Der Auwera , G. A. & O’Connor , B. D . Genomics in the Cloud . ( O’Reilly Media , 2020 ). 58. ↵ Montefiori , L. E. et al. Enhancer Hijacking Drives Oncogenic BCL11B Expression in Lineage-Ambiguous Stem Cell Leukemia . Cancer Discov . 11 , 2846 – 2867 ( 2021 ). OpenUrl Abstract / FREE Full Text 59. ↵ Pedregosa , F. et al. Scikit-learn: Machine Learning in Python . J. Mach. Learn. Res . 12 , 2825 – 2830 ( 2011 ). OpenUrl CrossRef PubMed 60. ↵ Interface to Python . https://rstudio.github.io/reticulate/ . 61. ↵ Tarabichi , M. et al. A practical guide to cancer subclonal reconstruction from DNA sequencing . Nat. Methods 18 , 144 – 155 ( 2021 ). OpenUrl CrossRef PubMed 62. ↵ Salcedo , A. et al. A community effort to create standards for evaluating tumor subclonal reconstruction . Nat. Biotechnol . 38 , 97 – 107 ( 2020 ). OpenUrl CrossRef PubMed 63. ↵ Brunson , J . C. ggalluvial: Layered Grammar for Alluvial Plots . J. Open Source Softw . 5 , 2017 ( 2020 ). OpenUrl PubMed 64. ↵ Gu , Z. , Gu , L. , Eils , R. , Schlesner , M. & Brors , B . circlize implements and enhances circular visualization in R . Bioinformatics 30 , 2811 – 2812 ( 2014 ). OpenUrl CrossRef PubMed Web of Science 65. ↵ Gabow , H. N. & Myers , E. W . Finding All Spanning Trees of Directed and Undirected Graphs . SIAM J. Comput . 7 , 280 – 287 ( 1978 ). OpenUrl CrossRef 66. ↵ Popic , V. et al. Fast and scalable inference of multi-sample cancer lineages . Genome Biol . 16 , 91 ( 2015 ). 67. ↵ Nik-Zainal , S. et al. The life history of 21 breast cancers . Cell 149 , 994 – 1007 ( 2012 ). OpenUrl CrossRef PubMed 68. ↵ Niknafs , N. , Beleva-Guthrie , V. , Naiman , D. Q. & Karchin , R . SubClonal Hierarchy Inference from Somatic Mutations: Automatic Reconstruction of Cancer Evolutionary Trees from Multi-region Next Generation Sequencing . PLoS Comput. Biol . 11 , e1004416 ( 2015 ). OpenUrl CrossRef PubMed 69. ↵ Morais , C. L. et al. PTEN loss and ERG protein expression are infrequent in prostatic ductal adenocarcinomas and concurrent acinar carcinomas . The Prostate 75 , 1610 – 1619 ( 2015 ). OpenUrl PubMed 70. ↵ Kassambara , A . rstatix: Pipe-Friendly Framework for Basic Statistical Tests . ( 2025 ). 71. ↵ Kassambara , A . ggpubr: ‘ggplot2’ Based Publication Ready Plots . ( 2025 ). View the discussion thread. Back to top Previous Next Posted May 13, 2026. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Longitudinal structural variant phylogenies define tumor evolution under therapeutic selection pressure in metastatic prostate cancer Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Longitudinal structural variant phylogenies define tumor evolution under therapeutic selection pressure in metastatic prostate cancer Yunzhou Liu , Jiaying Lai , Yang Yi , Mark C. Markowski , Emmanuel S. Antonarakis , Angelo M. De Marzo , Srinivasan Yegnasubramanian , Laura D. Wood , Laura A. Sena , Rachel Karchin bioRxiv 2025.02.01.636056; doi: https://doi.org/10.1101/2025.02.01.636056 Share This Article: Copy Citation Tools Longitudinal structural variant phylogenies define tumor evolution under therapeutic selection pressure in metastatic prostate cancer Yunzhou Liu , Jiaying Lai , Yang Yi , Mark C. Markowski , Emmanuel S. Antonarakis , Angelo M. De Marzo , Srinivasan Yegnasubramanian , Laura D. Wood , Laura A. Sena , Rachel Karchin bioRxiv 2025.02.01.636056; doi: https://doi.org/10.1101/2025.02.01.636056 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17691) Bioengineering (13892) Bioinformatics (41937) Biophysics (21452) Cancer Biology (18589) Cell Biology (25504) Clinical Trials (138) Developmental Biology (13378) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24320) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88605) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15156) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00