DNA sequence encoded conformational flexibility orchestrates pioneer transcription factor–nucleosome interaction landscape

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 96,179 characters · extracted from preprint-html · click to expand
DNA sequence encoded conformational flexibility orchestrates pioneer transcription factor–nucleosome interaction landscape | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results DNA sequence encoded conformational flexibility orchestrates pioneer transcription factor–nucleosome interaction landscape View ORCID Profile Upalabdha Dey , View ORCID Profile Gustavo Sganzerla Martinez , View ORCID Profile Venkata Rajesh Yella , View ORCID Profile Aditya Kumar doi: https://doi.org/10.1101/2025.05.21.655105 Upalabdha Dey 1 Tezpur University, Molecular Biology and Biotechnology , Tezpur, Assam, 784028, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Upalabdha Dey Gustavo Sganzerla Martinez 2 Department of Microbiology and Immunology, Faculty of Medicine, Dalhousie University , Halifax, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Gustavo Sganzerla Martinez Venkata Rajesh Yella 3 Koneru Lakshmaiah Education Foundation, Department of Biotechnology , Vaddeswaram, Guntur, Andhra Pradesh, 522502, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Venkata Rajesh Yella For correspondence: yvrajesh_bt{at}kluniversity.in aditya{at}tezu.ernet.in Aditya Kumar 1 Tezpur University, Molecular Biology and Biotechnology , Tezpur, Assam, 784028, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Aditya Kumar For correspondence: yvrajesh_bt{at}kluniversity.in aditya{at}tezu.ernet.in Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Transcriptional regulation by pioneer transcription factors (PTFs) critically depends on their ability to uniquely engage with cognate motifs embedded within nucleosome-occluded DNA. Despite their importance, the mechanistic underpinnings of DNA conformational dynamics that facilitate PTF-nucleosomal interactions remain elusive. To address this, we present the first large-scale, systematic investigation dissecting the role of DNA flexibility in defining transcription factor (TF) binding specificity in-vitro and in-vivo within nucleosomal DNA. Integration of high-throughput datasets with empirically derived DNA flexibility descriptors obtained DNA flexibility-augmented statistical regression models that highlight the relative contribution and mechanistic relevance of bending, torsional, and stretching flexibilities in predicting the nucleosomal DNA binding affinities of 194 TFs. Our findings also suggested that “DNA flexibility gradient” along nucleosomal substrate might be a key element to PTF-mediated nucleosome destabilization. Integrative modeling of DNA flexibility with in-vivo genomic occupancy, and chromatin accessibility data reveals that established PTFs, GATA3 and the pluripotency factors(OCT4, SOX2, and KLF), likely exploits the differential DNA flexibility signatures observed in the pioneered sites to remodel the closed chromatin. Finally, by proposing a robust computational framework, we demonstrate that sequence-encoded DNA structural descriptors are key determinants for accurately identifying functional PTF binding sites across diverse cellular contexts. In summary, our study highlights DNA conformational flexibility as a critical and previously under appreciated regulatory layer in eukaryotic gene regulation mediated by the PTFs—advancing our understanding of the interplay between DNA structural dynamics, chromatin organization, and transcriptional control. Introduction Transcription factors (TF), are a group of DNA-binding proteins, that can recognize specific “motifs” in the genome, crucial in directing gene regulatory networks involved in basal and enhanced transcription, development, cell-cycle control, and response to intra- and inter-cellular cues. DNA in eukaryotic systems, 75-90% of genome( Bell et al. 2011 ; Van Holde 2012 ) is usually folded into condensed chromatin structure by the histone proteins into nucleosomes, employing 147 bp long DNA stretches in a one and three-fourth turn and encompassing 14 super helical turns( Read, Baldwin, and Crane-Robinson 1985 ). Therefore, the majority of the TF DNA binding sites(TFBS), are usually buried in the nucleosomal DNA, rendering them inaccessible in a condensed state( Zhu et al. 2018 ). However, this constraint is an essential mechanism for eukaryotic gene regulation, employing pioneer transcription factors. A subset of TFs, Pioneer TFs (PTFs), obtained the ability to access the obscured TFBS tightly packed in the closed chromatin( Barral and Zaret 2024 ). They remodel the local chromatin landscape by directly evicting nucleosomes, or by recruiting chromatin remodeling enzymes( Iwafuchi-Doi and Zaret 2014 ; Bulyk et al. 2023 ; Stoeber et al. 2024 ; Zaret 2020 ). Hence, PTFs can render regulatory sequences in open chromatin conformation from the closed conformation for their counterparts without “pioneering” capabilities. This “bookmarking activity” in the cis-regulatory region, coupled with the hierarchical mode of DNA interactions, are thought to be instrumental for cellular differentiation, development, and even in reprogramming events( Barral and Zaret 2024 ). In this context, several models have been put forward to approximate the mechanism of nucleosomal interactions by PTFs( Carminati et al. 2024 ). Competition between PTFs and histone molecules for nucleosomal DNA( Donovan et al. 2023 ), partial unwrapping of DNA from histone core for transient access of PTF binding sites being the most prominent ones( Stoeber et al. 2024 ; Huyghe, Trajkova, and Lavial 2024 ; Balsalobre and Drouin 2022 ). Nonetheless, positional preferences of PTFs binding in the nucleosomal DNA was thoroughly acknowledged, suggesting PTF nucleosomal DNA binding are frequent in the entry and exit sites of nucleosomal complexes, i.e., -7 or +7 of Super Helical Location. This mechanism is often attributed to transiently accessible DNA sequence through nucleosome breathing mechanism( Isbel, Grand, and Schübeler 2022 ). While the nucleosomal dyad region and internal sites of the complex are rather inaccessible, and constrained by the histone moieties( Carminati et al. 2024 ). In a comprehensive study, Zhu et al used high-throughput SELEX-based in-vitro experiment, Nucleosomal Consecutive Affinity Purification followed by SELEX(NCAP-SELEX), to screen nucleosome binding capacity of nearly 200 mammalian TFs( Zhu et al. 2018 ). They stratified the TFs into few non mutually exclusive groups, where each cohort of TFs found to engage in the two DNA gyres (gyre binding TFs), while other TFs showed orientational or periodic preferences and only a fraction of TFs found to be engage in the nucleosome dyad. However, it was concluded that the majority of TF-nucleosomal interactions are limited to the entry and exit of 147 bp of nucleosomal sequence( Michael et al. 2020 ). In line, high resolution structure of OCT4-SOX2-nucleosome interface demonstrates periodic binding by solvent exposed sequence motifs in nucleosomal DNA are also relevant, in addition to entry-exit binding site exposure mediated interaction( Michael et al. 2020 ). Similarly, GATA3, a zinc finger TF, is found to bind a partial or half-motif present in exposed position along the nucleosomal surface( Isbel, Grand, and Schübeler 2022 ). A recent cryo-EM study reported, histone proteins are weakly bound to the genomic ALBN1 enhancer DNA sequence compared to Widom 601 synthetic nucleosome positioning sequence( Takizawa et al. 2018 ). All these outstanding in-vitro and in-vivo observations on PTF-nucleosomal interaction highlights the role of DNA sequence, and sequence dependent features like nucleotide composition, distribution, frequency, periodicity, density of specific oligonucleotides, and motifs( Xu et al. 2024 ). However, compared to the in-vivo functional binding sites of a PTF, its recognition motifs owing to the length of 6-12bp, will be widespread over the eukaryotic genome. Since degeneracy of the DNA recognition motifs of TFs from the same family (with same DNA binding domain) is very common, how DNA sequence and context drives the PTF binding to their cognate functional sites embedded in the closed chromatin still remains elusive( Slattery et al. 2014 ). Recent regulatory genomics studies, when complemented with structural insights of TF-DNA interface, highlighted the importance of large sequence environments of TFBS in sequence specific TF interactions( Inukai, Kock, and Bulyk 2017 ; Peng et al. 2024 ). Of note, B-DNA in-vivo, owing to its non-homogeneous composition, explore large conformational space( Rohs et al. 2010 ). While DNA conformational flexibility largely depends upon the sequence-dependent fluctuations of DNA helical parameters at di-nucleotide steps, other factors, like asymmetry in glycosidic bonds, exo-cyclic groups within the grooves, stacking area overlap in the dinucleotide steps, are also considered to influence the structural plasticity and anisotropic behaviour of DNA molecule(Peters and Maher III 2010; Bansal, Kumar, and Yella 2014 ). Therefore, various mechanical properties, like DNA duplex bendability( Basu, Bobrovnikov, and Ha 2021 ; Marin-Gonzalez et al. 2021 ; Dohnalová and Lankaš 2022 ), DNA shape, and electrostatic potential varies along the sequence context( Rohs et al. 2010 ). Such features have been found to be determinants of TF-cognate DNA binding specificity, when compared to one dimensional sequence information( Slattery et al. 2014 ; Le Poul et al. 2020 ; Pataskar et al. 2019 ). DNA shape, curvature, groove shape, topography, and bendability is therefore have significant role in DNA-protein recognition( Abe et al. 2015 ; Yella et al. 2018 ; Sarkar et al. 2021 ; Dey et al. 2023 ), DNA strand melting, genome packaging( Biswas and Basu 2023 ), nucleosomal assembly( Bishop et al. 2011 ). In line, Basu et. al developed “loop-seq”, a technique to experimentally determine the propensity of yeast DNA sequence to be in a “loopable” or “cyclizable” conformation( Basu et al. 2021 ). It was reported that, loopability of DNA molecule being an intrinsic property to sequence composition, can act as a proxy of DNA bendability, and could underpin the principles of yeast nucleosomal assembly, transcription factor binding, and other cis-regulatory logic( Basu, Bobrovnikov, and Ha 2021 ). Many predictive models of DNA flexibility have emerged that can quantify DNA flexibility along the DNA sequence(K. Li et al. 2022 ; Back and Walther 2023 ). In the context of nucleosomal-PTF interaction, how DNA flexibility and other sequence dependent mechanical code is important still remain unexplored( Ngo et al. 2016 ). Contemporary evidences from nucleosomal-PTF structural studies and complementary molecular dynamics(MD) simulation of complexes suggests that sequence dependent DNA dynamics is crucial for OCT4, SOX2 and OCT4-SOX2 co-binding events in nucleosomal sequence. However, how widespread these DNA flexibility dynamics is in PTF-nucleosomal interaction is not yet characterized across different PTFs. In this context, it was also found that DNA sequence with 10 bp dinucleotide periodicity was preferred by histone octamers for nucleosome assembly( Basu et al. 2021 ). Single molecule experiment with a nucleosome formed on the W601 sequence suggested stability of one end of W601 substrate heavily dependent on unwrapping of the other end( Ngo et al. 2015 ). Along with other historical perspectives on DNA flexibility, current literature also embraces role of sequence context, and sequence encoded mechanical properties in nucleosomal positioning( Basu et al. 2022 ; Lawson et al. 2024 ). Nevertheless, the relevance of DNA flexibility in the in-vivo functional PTF-nucleosomal interaction, accessibility of their recognition motifs in closed chromatin configuration, and PTF driven change in chromatin conformation warrant more focus. In this study, we present a high-throughput computational approach for quantifying DNA sequence encoded flexibility in the realm of PTF-nucleosomal interactions. Firstly, our approach quantified DNA flexibility using in-vitro NCAP-SELEX dataset, and showed how DNA flexibility could be relevant to TF-nucleosomal interactions. Next, integrating multimodal genomic datasets for hallmark PTFs like GATA3 and pluripotency factors, we showed the importance of DNA flexibility for nucleosomal PTF interactions. Further we report that, DNA encoded flexibility can in turn increase motifs accessibility in nucleosomal DNA compared to free DNA, which have been suggested previously. Finally, we showed how different DNA flexibility models could be useful to differentiate between in-vivo functional PTF binding sites on nucleosomal sequences from their non-functional nucleosomal counterparts. These results indeed suggested that DNA flexibility provides specificity along with their cognate motifs for in-vivo PTF-nucleosomal interactions. Results DNA flexibility is pronounced in in-vitro interaction of nucleosome and transcription factors In order to systematically characterize how DNA flexibility influences PTF-nucleosomal interactions, we analyzed the Nucleosome Consecutive Affinity-Purification SELEX (NCAP-SELEX)—a compendium of high-throughput experiments quantifying DNA binding affinities for 194 mammalian transcription factors(TFs)( Zhu et al. 2018 ). We computed axial (bending), torsional, and stretching flexibility profiles along the 101 bp end-binding nucleosomal sequences derived from NCAP-SELEX PWMs ( Figure 1A ). Focusing on 82 end-binding TFs, we assessed flexibility patterns using models representing DNase-I sensitivity, nucleosome positioning preference, twist dispersion, and stiffness modulus. Download figure Open in new tab Figure 1: DNA Flexibility Characterizes In Vitro TF–Nucleosome Interactions. (A) Schematic workflow outlining NCAP-SELEX data processing and downstream flexibility analysis (B) Representative DNaseI flexibility profiles for CEBPB end-binding sequences obtained from NCAP-SELEX (red), free DNA HT-SELEX (blue), and Nucleosome-SELEX (green) libraries. Higher values indicate greater flexibility (propensity to bend towards the major groove) (C) Hierarchical clustering of DNase-I flexibility profiles for 82 end-binding TFs from the lig147 NCAP-SELEX library. Flexibility values (Z-scores) are scaled per TF. Blue indicates lower flexibility; red indicates higher flexibility. Arrow denotes direction from outer flank towards nucleosome center (D) Comparison of FIMO log-odds motif scores across CEBPB end-binding sequences binned into quartiles (Q1-Q4) of increasing DNase I flexibility. **** p < 0.0001, Wilcoxon rank-sum test comparing Q4 vs Q1. (E) Hierarchically clustered correlation matrix relating NCAP-SELEX quantitative binding metrics (e.g., E-MI penetration, periodicity) with flexibility profile gradients derived from five distinct biophysical scales (DNase I, NPP, twist dispersion, trx, stiffness) (F) Principal Component Analysis (PCA) of TFs based on sequence features alone (4-mer counts; left panel) versus sequence features combined with flexibility descriptors (right panel). Points represent individual TFs, color-coded by DBD family (G) Boxplots comparing pairwise Euclidean distances between TFs within the same family (Intra-family) versus different families (Inter-family) using PCA coordinates derived from sequence alone (left) or sequence + flexibility (right). Incorporating flexibility significantly increases inter-family separation(ΔMedian = 1.76 vs 1.47). For instance, the pioneer factor CEBPB exhibited enhanced DNA flexibility flanking its core motif site within bound nucleosomes, compared to unbound nucleosomal DNA or CEBPB-bound free DNA ( Figure 1B ). Crucially, higher DNase-I derived flexibility correlated significantly with higher CEBPB affinity (Wilcoxon rank-sum test, p < 0.001 ; Figure 1D ), indicating flanking flexibility promotes high-affinity PTF-nucleosomal binding. Generalizing across TF families, our analysis revealed distinct flexibility patterns associated with different DNA binding domains (DBDs). Factors like bZIP, zinc-finger, and forkhead TFs displayed prominent “flexibility gradients,” whereas bHLH and homeodomain factors showed more periodic profiles ( Figure 1C ). To link these structural features to mechanisms, we further correlated flexibility profile slopes with nucleosomal engagement metrics(E-MI scores) from NCAP-SELEX ( Figure 1E ). Positive correlations emerged between DNase-I and torsional flexibility gradients and measures of nucleosome penetration and destabilization, particularly for established nucleosome-displacing factors like homeodomain and forkhead proteins ( Supplementary Figure 1D ). Furthermore, clustering TFs based on flexibility slopes grouped factors by DBD family ( Supplementary Figure 1C ), suggesting domain architecture dictates flexibility-mediated recognition even under nucleosomal constrains, consistent with prior work on DNA shape( Yang et al. 2017 ). Principal component analysis (PCA) demonstrated that incorporating flexibility features along with sequence information improved the partitioning of TF families ( Figure 1F ). Flexibility descriptors accounted for significant variance (PC1 ∼15%, PC2 ∼11% vs. ∼12% and ∼9% for sequence-only), and enhanced the separation between families like bHLH, and bZIP ( Figure 1F , right panel). To quantify the added discriminatory power conferred by DNA flexibility, we compared pairwise Euclidean distances between TFs across models with and without flexibility features. Inclusion of flexibility information significantly increased inter-family distances relative to sequence-only models, with a marked improvement in the separation between TFs from distinct DBD classes ( Figure 1G ). To surmise, these results underscore that sequence-encoded DNA flexibility, particularly the flexibility gradient along the nucleosomal substrate, varies markedly across TF families and constitutes a key determinant of TF-nucleosomal interactions. This motivated further investigation using statistical machine learning models. DNA flexibility augmented models accurately predict TF binding affinity on nucleosomal sequences To quantify the contribution of DNA flexibility to TF binding affinity to nucleosomes, we applied multiple linear regression (MLR) models to the NCAP-SELEX dataset, adapting established strategies. Baseline models incorporating only mononucleotide sequence features (1-mer) were compared against models augmented with bending and torsional DNA flexibility descriptors (1-mer + Flexibility; Figure 2A ). Predictive performance was evaluated using ridge regression with 10-fold nested cross-validation, and improvements were assessed by changes in the coefficient of determination (ΔR²). Download figure Open in new tab Figure 2: Addition of DNA flexibility features enhance prediction of TF-nucleosomal binding affinity in NCAP-SELEX data (A) Overview of the regression modeling pipeline. Affinity tables derived from NCAP-SELEX k-mer counts were used to train L2-regularized multiple linear regression models predicting relative binding affinity based on sequence features alone (1-mer) or sequence combined with DNA flexibility descriptors (1-mer + Flexibility). Model performance was evaluated using 10-fold cross-validation (R²) (B) Scatter plot comparing model performance (R²) for 301 TF datasets using sequence-only features (x-axis) versus sequence + flexibility features (y-axis). Each point represents a dataset, colored by TF family. Diagonal line indicates equal performance (C) Summary scatter plot showing R² comparison for 128 unique TFs, selecting the dataset with the longest flanking region per TF from (B); (D) Violin plots comparing the distribution of R² values for sequence-only versus sequence + flexibility models across all 128 unique TFs. Flexibility significantly improves performance (Wilcoxon signed-rank test, p = 0.028) (E) Boxplots showing the improvement in R² (ΔR² = R²(1mer+Flex) - R²(1mer)) upon adding flexibility features, grouped by TF family. Significant differences exist across families (Kruskal–Wallis test, p = 0.045 ). From an initial pool of 708 TF datasets derived from NCAP-SELEX, we first applied a validity criterion: datasets where the augmented models(1-mer + Flexibility) failed to outperform the baseline model(1-mer) were excluded (N=140), as adding features should theoretically yield at least equivalent performance. This resulted in 568 valid datasets for subsequent analysis. Among these valid datasets, incorporating DNA flexibility features significantly enhanced predictive performance (ΔR² > 0) in 53%(N=301) of datasets( Figure 2B, 2C ), compared to 267 datasets (47%) that showed no improvement. Furthermore, the magnitude of these improvements was considerable for a subset: 119 (40%) of the gaining datasets exhibited a ΔR² greater than 5%, 37 (12%) exceeded 10%, and three datasets achieved improvements over 0.20 (GATA2: 21.3%, LMX1B: 22.5%, PAX7: 21.7%). This overall trend of enhanced performance upon including flexibility was consistently observed when results were aggregated per TF across datasets derived from varying motif, and flanking sequence lengths, and the difference in model performance is significantly improved by addition of flexibility features ( Figure 2C, 2D ). Grouping the TFs by DNA-binding domain (DBD) family revealed distinct patterns in ΔR² gains, with Forkhead, CUT, and PAX families (including the top performer PAX7) exhibiting the most substantial improvements, suggesting a pronounced dependence on local DNA mechanical properties for nucleosomal site recognition ( Figure 2E ; Kruskal–Wallis test, p < 0.05 between groups). Notably, TFs known for closed chromatin engagement, and pioneering activity—like ONECUT2 ( Rotinen et al. 2018 ), CUX1 ( Liu et al. 2024 ), FOXO1 ( Remadevi, Muraleedharan, and Sreeja 2021 ), PAX7 ( Mayran et al. 2018 ; Gouhier et al. 2024 ), SOX2, MAF ( Katsarou et al. 2023 ) demonstrated particularly large flexibility-driven enhancements in binding affinity predictions( Balsalobre and Drouin 2022 ). Collectively, these findings strongly support the hypothesis that axial and torsional DNA flexibility, particularly within flanking regions surrounding the core motif, constitutes a critical determinant of effective TF engagement with nucleosomal DNA. Moreover, results also indicate that DNA flexibility features are particularly relevant for PTF-nucleosomal interactions, prompting our investigation into the in-vivo binding sites of PTFs. DNA flexibility might influence nucleosomal pioneer TF interactions in-vivo The pioneering activity of OCT4, SOX2, and KLF4 (OSK) is well established( Knaupp et al. 2017 ; D. Li et al. 2017 ; Takahashi and Yamanaka 2006 ). Motivated by the results obtained from the in-vitro data analysis, we investigated the role of DNA flexibility in OSK binding in vivo using publicly available ChIP-Seq and MNase-Seq datasets ( Soufi et al. 2015 ). We integrated these data to identify OSK binding sites overlapping nucleosomal regions and characterize the DNA flexibility signatures at the pioneering genomic sites. Consistent with previous reports, approximately 80% of OCT4 binding events mapped to genomic sites with strong MNase-Seq signals, indicating frequent OCT4 interaction with nucleosomal DNA( Soufi et al. 2015 ) ( Figure 3A ). Download figure Open in new tab Figure 3: DNA Flexibility Signatures at In Vivo Nucleosomal Binding Sites of Pluripotency Factors. (A) Heatmap displaying MNase-Seq read density centered on OSK factor ChIP-Seq peak summits in IMR90 cells. Peaks are sorted by average MNase-Seq signal intensity across the central 200 bp. Darker shading indicates higher nucleosome occupancy (B) Average DNA flexibility profiles (DNase I model) across OSK binding sites located within nucleosome-enriched (red; high MNase signal) versus nucleosome-depleted (grey; low MNase signal) genomic regions (±500 bp from peak center) (C) Comparison of average OCT4 ChIP-Seq signal intensity across OCT4 nucleosomal binding sites binned into deciles based on flanking DNase-I flexibility (Decile 10 = highest flexibility, Decile 1 = lowest flexibility). Significant differences observed across bins (ANOVA, p < 2.2e-16 ); Wilcoxon rank-sum test comparing Decile 10 vs 1, ****, p < 0.0001 , (D) Scatter plots showing the positive correlation between average OCT4 ChIP-Seq signal intensity (x-axis) and average DNA flexibility (y-axis; DNase I and twist dispersion models) at OCT4 nucleosomal binding sites (Spearman rank correlation; p < 2.2e-16 for both). Lines represent linear regression fits. Further, we characterized and compared the DNA flexibility of OSK binding sites within nucleosome-enriched regions versus those in nucleosome-depleted regions. Consistent with our in-vitro results, OSK binding sites within nucleosome-enriched regions exhibited significantly higher DNA flexibility than the binding sites in the nucleosome-depleted regions[Apostolou and Hochedlinger (2013); Moonen et al. (2022 )( Figure 3B ). Specifically, the DNA bending (axial flexibility) was markedly higher in the nucleosomal binding sites of OSK factors. However, except for KLF4, we observed the rotational or torsional flexibility profiles were also elevated for other two factors at nucleosomal binding sites. Nonetheless, the trend of flexibility change observed for OCT4 and SOX2 binding sites, were also consistent with the DNAShape profiles for minor groove width(MGW), propeller twist(ProT), electrostatic potential(EP) and roll ( Supplementary Figure 2 ). The characteristic flexibility and shape change, were also consistent with the underlying di-nucleotide distributions in the two genomic sites for the OSK factors( Supplementary Figure 2 ). Since nucleosomal DNA is inherently flexible, potentially reflecting sequence preferences for histone wrapping(Jiang and Pugh 2009; Back and Walther 2023 ), we sought to confirm that heightened flexibility in nucleosomal binding sites was not merely an artifact of nucleosome positioning signals. To this end, OCT4 nucleosomal binding sites were stratified by bending flexibility values obtained from DNaseI scale, and compared the average ChIP-Seq tag density across the bins. Interestingly, OCT4 occupancy reflected by the ChIP-Seq tag density increased significantly with higher local flexibility ( Figure 3C ; Spearman rank correlation, rho = 0.14 for DNase-I, rho = 0.19 for twist dispersion, p< 2.2e-16 )–suggesting that sequence-encoded flexibility facilitates OCT4 engagement with its nucleosomal targets. These in-vivo findings align with atomistic molecular simulations showing OCT4 induces partial DNA unwrapping from the nucleosome ( Huertas et al. 2020 ; Michael et al. 2020 ; Mauney et al. 2018 ). DNA flexibility might direct GATA3-Mediated Chromatin Opening Unlike OSK factors, GATA3 utilizes its zinc-finger domains to recognize the core GATA motif (5’-GAT-3’), similar to other members of the GATA family, and plays critical roles in lineage specification, differentiation, and cellular reprogramming. Prior studies have established GATA3 as a pioneer transcription factor across diverse chromatin contexts, owing to its ability to remodel chromatin structure( Takaku et al. 2016 ; Tanaka et al. 2020 ; Isbel, Grand, and Schübeler 2022 ; Y. Chen et al. 2012 ). However, the extent to which DNA flexibility contributes to GATA3-induced nucleosome remodeling remains unclear. To address this, we integrated chromatin occupancy data from ChIP-Seq experiments with chromatin accessibility profiles derived from ATAC-Seq in the MDA-MB-231 breast cancer cell line. As shown in Figure 4A , GATA3-bound regions were classified as “pioneering sites” based on a significant increase in chromatin accessibility following ectopic GATA3 expression (FDR < 0.005). While approximately 55% of GATA3-bound regions overlapped constitutively open chromatin, ∼13% exhibited a marked gain in accessibility, indicative of the GATA3-mediated chromatin remodeling. An example pioneering site is illustrated in Figure 4B , highlighting a region initially inaccessible prior to GATA3 expression that became distinctly open post-induction. Download figure Open in new tab Figure 4: DNA Flexibility Distinguishes GATA3 Pioneering Sites. (A) Heatmaps showing ATAC-Seq read density in WT MDA-MB-231 cells (left), GATA3 ChIP-Seq signal (center), and ATAC-Seq density after GATA3 expression (right), centered on GATA3 pioneering sites (±5 kb) (B) Top: MA plot illustrating differential ATAC-Seq accessibility analysis comparing GATA3+ versus WT conditions; red points indicate significant pioneering sites (FDR 1). Middle: Genome browser view of a representative pioneering site on chr2. Bottom: Number of GATA3 ChIP-Seq peaks classified into pioneering, constitutively open, or constitutively closed categories (C) Average DNA flexibility profiles (DNase I model) across GATA3 binding sites classified as pioneering (red), constitutively open (blue), or constitutively closed (grey) (±500 bp from peak center) (D) Fraction of GATA3 peaks within each category containing strong (top quartile log-odds score) FIMO motif hits (E) Average number of FIMO motif hits identified per peak within each category (F) Average log-odds score of FIMO motif hits per peak within each category (G) Boxplots comparing DNase I flexibility values in upstream, central (motif), and downstream regions across the three GATA3 site categories. Statistical significance assessed by Wilcoxon rank-sum tests. To explore the sequence context underlying these differences, we scanned each class of GATA3-bound regions (pioneering, closed, and constitutively open) for GATA motifs within ±100 bp of the peak centers. As expected, all three groups contained high-scoring motifs; however, pioneering sites were enriched for strong (top-quartile) motif hits ( Figure 4C ), whereas closed regions paradoxically exhibited both a greater number of motifs per peak and higher average motif scores ( Supplementary Figure 4 ). To investigate this, we analyzed the rotational positioning of GATA3 motifs using the nucleosomal rotational positioning (NRP) score, based on a model by Zhurkin and colleagues( Cui and Zhurkin 2013 ). This model quantifies the tendency of A/T-rich(W) and G/C-rich(S) dinucleotides to occupy major or minor grooves of nucleosomal DNA, respectively. As shown in Figure 4D , GATA motifs in pioneering sites preferentially occupied solvent-exposed orientations(characterised by lower NRP scores), whereas motifs in closed sites were buried toward the histone core. Additionally, pioneering sites exhibited increased bending and torsional flexibility flanking the core motif region ( Figure 4E ), supporting a model wherein local DNA deformability facilitates nucleosome remodeling. Supplementary analyses of DNA shape features and dinucleotide compositions further substantiated these distinctions ( Supplementary Figure 4 ). Together, our in-vivo findings suggest that sequence-dependent DNA flexibility, particularly around the GATA motif, plays a critical role in directing GATA3-mediated pioneering activity. Pioneering sites characterized by higher intrinsic flexibility exhibit pronounced transitions from closed to open chromatin states upon GATA3 binding, implying that GATA3 exploits local DNA deformability to promote nucleosome eviction or repositioning. DNA Flexibility Features Enhance Classification of Functional PTF Nucleosomal Binding Sites Building on our in-vitro and in-vivo observaxtions, we hypothesized that DNA flexibility could be relevant in distinguishing functional (occupied) PTF binding sites within nucleosomes from unbound genomic motifs. To test this, we propose a classification framework integrating ChIP-Seq occupancy with MNase-Seq nucleosome positioning data for a curated set of PTFs ( Figure 5A ). Models trained solely on one-hot-encoded sequence features (4-bit) were compared with those augmented by DNA flexibility descriptors (4-bit + Flexibility) to evaluate whether flexibility attributes improve the identification of functional binding sites. Download figure Open in new tab Figure 5: Flexibility Features Enhance Machine Learning Classification of Functional In Vivo PTF Binding Sites. (A) Workflow for classification dataset curation and model training/testing using ENCODE ChIP-Seq, MNase-Seq, and sequence/flexibility features (B) Comparison of classification performance (Matthews Correlation Coefficient, MCC) using sequence features alone (4-bit) versus sequence augmented with DNase I flexibility (4-bit + DNaseI). Left: MCC values across all datasets (26 PTFs, multiple cell lines). Right: Median MCC per PTF across datasets (C) Improvement in MCC (ΔMCC = MCC(4bit+Flex) - MCC(4bit)) achieved by adding individual flexibility features (DNaseI, NPP, twistDisp, trx, Stiffness) to the 4-bit sequence model, shown for each PTF (D) Boxplots showing median ΔMCC achieved by adding twist dispersion or trx flexibility features, grouped by PTF DNA binding domain family (E) Improvement in MCC (ΔMCC) for cross-cell line predictions, where models trained on one cell line were tested on data for the same PTF from a different cell line, shown for individual flexibility features (F, G) Feature importance analysis for XG-Boost classification models trained on HepG2 data for FOXA1 (F) and GATA3 (G) using 4-bit + flexibility features (±20 bp flanks). Top: Sequence logo derived from positive (bound) sites. Bottom: Heatmap showing position-specific importance scores for different flexibility descriptors. Color intensity reflects relative importance. Incorporating DNA flexibility features, particularly DNase-I sensitivity, substantially improved classification performance, measured by Matthews Correlation Coefficient (MCC), relative to sequence-only models across multiple datasets ( Figure 5B , left panel). These improvements were even more pronounced when considering median MCC values across different cell lines for each PTF ( Figure 5B , right panel). Among the flexibility descriptors, torsional descriptors— twist dispersion and twist–roll–X displacement (trx)—provided the greatest gains in predictive accuracy across the 26 PTFs analyzed. For factors such as GATA3, KLF4, and FOXA3, models including trx features significantly outperformed sequence-only models, as indicated by ΔMCC values ( Figure 5C ). In contrast, factors like GATA1, NEUROD1, and ATF6 exhibited minimal performance improvement, recapitulating family-specific differences in flexibility dependence. Stratifying MCC gains by DNA-binding domain (DBD) family revealed that bZIP, NFYA, and Forkhead PTFs benefited most from the inclusion of flexibility features in the current dataset ( Figure 5D ). Consistently, models trained on bZIP factor data across different cell lines (HepG2, K562, MCF-7, HeLa-S3) exhibited robust MCC improvements when flexibility information was incorporated. We further evaluated cross-cell line generalization, demonstrating that models trained on data from one cell line (e.g., HepG2) enhanced classification performance when tested on the same PTF in different cellular contexts ( Figure 5E ). Notably, rotational flexibility descriptors consistently provided the greatest cross-cell line performance improvements, highlighting their robustness and biological relevance. To gain mechanistic insights, we performed position-specific feature importance analysis around the PTF motifs. Distinct patterns emerged: for FOXA1, rotational flexibility in the downstream flank was most predictive ( Figure 5F ), whereas for GATA3—where flexibility significantly enhanced classification—rotational flexibility upstream and bending flexibility downstream of the motif were particularly informative ( Figure 5G ). These findings indicate that both the type and positional distribution of flexibility features vary across different PTFs. In summary, our results demonstrate that incorporating DNA flexibility features markedly improves the classification of functional, in-vivo PTF binding sites within nucleosomes compared to sequence features alone. This underscores the critical role of local DNA mechanics in shaping the functional nucleosomal cistrome of pioneer transcription factors. Discussions Eukaryotic gene regulation is orchestrated by the dynamic interplay between transcription factors (TFs) and their genomic binding sites, many of which are occluded by nucleosome. Pioneer transcription factors (PTFs) possess a unique capacity to engage the genomic sites within compacted chromatin landscapes. However, elements of precise molecular determinants that govern such interactions is challenging due to several reasons. Firstly, the high prevalence of non-functional genomic motifs that mimic functional PTF binding sites, as well as the confounding influence of intrinsic nucleosome positioning signals( Barozzi et al. 2014 ) are the most prominent ones. While atomic-resolution structures, and molecular dynamics simulations offer valuable mechanistic insights of PTF-nucleosomal interactions( Zhou, Gaullier, and Luger 2019 ; Gadea and Nikolova 2023 ; Fedulova et al. 2024 ; MacCarthy et al. 2022 ; Huertas and Cojocaru 2021 ), are often limited to only a small number of PTFs. In contrast, high-throughput assays such as NCAP-SELEX enable large-scale interrogation of TF-nucleosomal interactions but provide limited mechanistic insight into the DNA dynamics that underlie in-vivo complexities ( Zhu et al. 2018 ). Critically, DNA flexibility is recognized as a key element in determining TF binding to genomic DNA( Yella et al. 2018 ), though its specific role in governing PTF engagement with nucleosomal DNA remains largely unresolved. Our study addresses this gap through large-scale statistical modeling, integrating high-throughput datasets with empirically derived DNA flexibility descriptors to systematically probe the influence of DNA mechanics on PTF-nucleosome interactions. Analysis of in vitro NCAP-SELEX data established a direct relationship between DNA flexibility and TF-nucleosome binding. Sequences exhibiting elevated axial(or bending) and torsional(or rotational) flexibility were significantly enriched among the high-affinity binding sites, indicating that intrinsic DNA deformability facilitates PTF recognition under nucleosomal constraints. Reports from Gracia et al. suggested the strong nucleosome binders often feature DNA binding domains such as basic helix-loop-helix (bHLH), homeodomains (HD, e.g., OCT4, BRN2), ETS (e.g., PU1), and zinc-fingers (ZnF), which typically recognize cognate motifs via short alpha-helices( Garcia et al. 2019 ). In contrast, weak binders including bZIP factors (e.g., cMYC, MYOG, CEBP), generally employ scissor-like binding mechanisms. In a general agreement, our analysis also revealed distinct flexibility profiles and gradients further differentiated TF families known to employ unique nucleosomal engagement strategies( Garcia et al. 2019 ; Zhu et al. 2018 ). Interestingly, the “gradient of DNA flexibility” or “flexibility slope” along the NCAP-SELEX nucleosomal substrates, appeared to be another determining factor of stable complex formation during PTF-nucleosomal interaction. In fact, current findings align with the recent work by Peng et al., where authors described clustering of TFs, based on dyad-to-end binding ratios–indicating specialized nucleosomal site selection mechanisms( Peng et al. 2024 ). Similar to their report, we observed homeodomain TFs preferentially bind sequences characterized by increased flexibility near nucleosomal dyad regions, while bZIP and ETS factors demonstrated different preferences. Notably, flexibility gradient of the TFs correlate positively with NCAP-SELEX derived metrics like periodic binding tendencies, and EMI-penetration that reflect propensity of a TF to penetrate deeper into the nucleosomal subustrate( Zhu et al. 2018 ). While such correlation should be interpreted carefully, these observations are consistent with the models that suggests local DNA mechanics modulate DNA unwrapping dynamics and might contribute to TF-mediated nucleosome destabilization( Ngo et al. 2015 , 2016 , 2024 ). The structural relevance of the axial and rotational flexibility signatures were also quantitatively validated using regularized linear regression models. Incorporation of DNA flexibility descriptors improved predictions of TF-nucleosomal binding affinity, compared to sequenceonly models for the a majority of the TFs. This enhancement was especially pronounced for TFs with established pioneering activity, reinforcing the role of DNA flexibility as an integral component of PTF-mediated nucleosomal recognition. Therefore, our scalable, sequence-dependent modeling approach effectively recapitulates findings from diverse complementary methodologies in the context of TF-nucleosome interactions( Gadea and Nikolova 2023 ; Fedulova et al. 2024 ; MacCarthy et al. 2022 ; Huertas and Cojocaru 2021 ). Additionally, extending these principles to in-vivo systems offers deeper insights into the functional capacities of hallmark PTFs governed by the local DNA mechanics. Analysis of OSK and GATA3 in-vivo datasets indicate that OCT4-bound genomic sites, particularly those associated with the nucleosomal regions(pioneering sites), display significantly higher flanking DNA flexibility than constitutively accessible sites(non-pioneered sites in the open chromatin region). These results align with the prior evidences suggesting that flexible DNA facilitates OCT4-induced chromatin remodeling, and are further supported by recent studies exploring OCT4-nucleosome interactions using cryo-EM and MD simulations( Soufi et al. 2015 ; Gadea and Nikolova 2022 ; MacCarthy et al. 2022 ). Similarly, structural studies of SOX2-nucleosomal complexes have demonstrated the role of flexibility at nucleosomal DNA termini( Dodonova et al. 2020 ), a finding reflected in our genomic analyses of SOX2-bound sites. Additionally, Gadea and Nikolova recently showed that the positional preferences of SOX2 on the nucleosomal substrates are influenced collectively by the DNA sequence, DNA shape, and histone contacts( Gadea and Nikolova 2023 ). Their investigation into suboptimal SOX2 binding sites emphasized that the stability of these complexes is significantly affected by the energetic costs incurred from histone-DNA contact disruption and DNA helix distortions. GATA3, a canonical pioneer factor essential for development and disease, utilizes two zinc fingers spaced to recognize cognate motifs (5’-GAT-3’) within nucleosomal DNA( Tanaka et al. 2020 ; Ishida et al. 2023 ). Our analysis revealed that GATA3 pioneering sites, marked by increased chromatin accessibility (ATAC-seq), exhibit significantly higher bending and torsional DNA flexibility compared to GATA3-bound sites in closed chromatin. This pioneering activity supports its established role in chromatin remodeling( Takaku et al. 2016 ). Although recent cryo-EM structures suggest minimal DNA distortion upon GATA3 binding in the pioneering sites( Tanaka et al. 2020 ), our results argues that intrinsic sequence encoded flexibility is critical for accommodating the spatial constraints imposed by the dual zinc fingers during engagement with nucleosomal targets. Moreover, the differential DNA flexibility in the pioneering sites might be key to the GATA3 mediated chromatin remodelling and pioneering activity. Supporting this, one recent study, demonstrate that indeed GATA3 binding induces DNA conformational changes that facilitate histone dissociation by employing MD simulations( Ishida et al. 2023 ). Importantly, our data also suggests that motif density or binding affinity of motifs alone might not be sufficient for distinguishing pioneering sites, as reported earlier. Instead, flanking sequence context—particularly increased DNA flexibility emerges as a key discriminator, separating functional pioneering sites from high affinity motifs in inaccessible chromatin regions. These findings underscore the pivotal role of local DNA mechanics in enabling GATA3 to engage nucleosomal targets and initiate chromatin remodeling( Portillo-Ledesma et al. 2024 ). Building on these findings, we assessed the predictive utility of flexibility features within a classification framework to distinguish functional, occupied PTF binding sites from unbound genomic motifs. Augmenting sequence-based models with flexibility descriptors significantly improved classification accuracy across multiple PTFs and cell types. Rotational flexibility features, in particular, proved robust and informative in cross-cell line validations, likely due to their relevance in positioning motifs along the nucleosomal surface. Feature importance analysis revealed factor-specific dependencies—for instance, downstream rotational flexibility was most predictive for FOXA1, whereas GATA3 required both upstream rotational and downstream translational flexibility—suggesting distinct mechanical recognition strategies across TF families. The consistent performance gains observed for bZIP, Forkhead, and ZnF families merit further investigation. While most PTFs benefited from the inclusion of flexibility features, others such as ATF6 and NEUROD1 showed limited improvement, potentially due to data constraints. Nonetheless, our analysis highlights the previously underappreciated importance of flanking rotational flexibility in facilitating in vivo PTF binding within nucleosomes and suggests that distinct PTFs employ unique mechanical readout strategies( Mathelier et al. 2016 ). While our study provides broad mechanistic insights across a diverse set of TFs, limitations remain. Firstly, the classification framework was applied to only a curated panel of 26 PTFs, and did not incorporate explicit high and low affinity binding site stratification—an important direction for future research. Nevertheless, our findings consistently support DNA flexibility as a critical and previously under-explored regulatory layer governing PTF-nucleosome interactions. Sequence-encoded mechanical properties of DNA–particularly torsional or rotational flexibility in the flanking regions of core motifs—emerge as key determinant of functional PTF binding site specification in-vivo . Our computational framework offers a scalable platform to further investigate these properties and predict functional PTF targets across diverse genomic and cellular contexts. Methodology NCAP-SELEX data processing Raw sequencing data (FASTQ format) from NCAP-SELEX, HT-SELEX, and Nucleosome-SELEX experiments were obtained from the European Nucleotide Archive (ENA; accession PRJEB226841 )( Zhu et al. 2018 ). Adapter sequences were trimmed off from the FASTQ files, followed by the removal of the reads with ambiguous nucleotides(N), PCR duplicates were also removed, and finally forward and reverse reads were merged using fastp (S. Chen et al. 2018 ). Additionally, for each library the merged reads with only 101 bp length, corresponding to the random regions of SELEX templates were kept while reads with more and less than 101 bp length were discarded. The QCed cycle4 library of NCAP-SELEX dataset was then used to calculate the enrichment of each k=7-mers compared to the input cycle in the 101 bp sequences as 7bp k-mers were reported to have highest information gain (varying length kmers in the selected library when compared to the background library using KL-divergence) for this dataset by Mariani et al ( Mariani et al. 2024 ). Enrichment was defined as the log (Cycle4/Cycle0 frequency) ratio, and k-mers with log (enrichment) < 1 were excluded( Pantier et al. 2022 ). For each TF, the top 10 enriched 7-mers were aligned using sites2meme (MEME suite v5.5.7)( Bailey et al. 2009 ) to generate a position weight matrix (PWM). Additional PWMs were obtained from JASPAR 2022( Rauluseviciute et al. 2024 ) for validation. Motif scanning of processed 101 bp reads was performed using FIMO (MEME suite v5.5.7) with a significance threshold of p < 1e-4 . Reads were classified as “end-binding” if a FIMO hit occurred within 10 bp of either the 5 or 3 end; sequences with motifs at the 3 end were reverse-complemented to standardize 5 -end orientation. Quantification of DNA flexibility We profiled flexibility of each DNA molecule along the length of the end-binding and non-end binding sequences corresponding to each TF in NCAP-SELEX and HT-SELEX libraries and also considered Nucleosome-SELEX library. Here, we used two empirically derived DNA structural features i.e., DNaseI and NPP, that have been widely used in the literature to estimate DNA flexibility from nucleotide sequences. DNaseI sensitivity( Brukner et al. 1995 ), and NPP( Satchwell, Drew, and Travers 1986 ) both are tri-nucleotide model, and more accurate than the dinucleotide based models( Bansal, Kumar, and Yella 2014 ). DNaseI model was derived from oligonucleotide cutting experiments by DNaseI enzyme. The bending propensity values in this model range from −0.281(= AAT/ATT) to 0.194(= TCA/TGA). This model differentiates bendability of the trinucleotides in terms of ease of bending towards the major groove with higher negative value corresponding to lower bendability. Satchwell’s Nucleosome Positioning Preference (NPP) model gives preferences of all possible trinucleotides in the DNA duplex, for their minor or major groove face to be towards the histone core. This model provides relative values for major groove face preferring or minor groove preferring as well as trinucleotides with no rotational position preference, on an absolute scale. The values range from 45(for GCC/GGC) to 2(for CAG/CTG). According to this model, trinucleotides with strong preference for their major groove or minor groove to face the histone core are rigid, whereas trinucleotides without any rotational preference are flexible. The trinucleotide models calculate bendability in arbitrary units and hence give an indication of relative bendability of various DNA sequences. Additionally, we also considered twist-roll-x displacement(trx) scale( Heddi et al. 2010 ) of DNA flexibility obtained from 328 dataset of chemical shifts observed in phosphate backbones through in-solution NMR( Heddi et al. 2006 ). Due to the differences in torsional angles of B-DNA, phosphate groups can adopt two distinct conformational states, i.e., BI and BII, as observed in crystallographic and NMR experiments( Heddi et al. 2010 ). Since backbone angles are fundamentally coupled to the several helicoidal parameters like roll, tilt twist, and xdisplacement, along with curvature, the BI to BII transition in-solution has been observed to be sequence dependent. Therefore, BI-BII state transition reflects the overall B-DNA structure, and dynamics in inter-basepair parameters. Nonetheless, TRX scale defines malleability of DNA sequence based on dinucleoide steps, as some dinucleotide steps, like YpR are highly flexible(GpC, CpG, and GpG-CpC and CpA-TpG) than other steps(ApT, TpA, and ApA-TpT). TRX scale also known to be correlated to inter-base pair(twist, roll), base-pair parameter(X-displacement), and winding along with groove dimensions, therefore sought to capture intrinsic DNA flexibility at each dinucleotide steps. Hence, the flexible dinucleotides could be easily deformed, compared to stiffened dinucleotides, that can be displaced away from helical axis and towards the major groove. Previously, TRX scale was found to be relevant in nucleosomal DNA, and expected to underpin the protein-DNA recognition. To calculate DNA bendability along the length of a given sequence, we used a slide window algorithm approach implemented in DNAflexpy python package ( https://upalabdhad.github.io/DNAflexpy/ ). Where the sequence is first distributed in overlapping windows of a specific length (w=0,10,20,30). Then the values of overlapping trinucleotides in each window were replaced with corresponding unique k=3 mer from look-up table of DNaseI and NPP model. The values obtained for each window were then averged and assigned for that window. Therefore, the transformed numeric vector for a sequence will be shortened compared to the sequence length, and will be of (L-W+1) in length. Where, L is length of the sequence corresponding to 101 nt random region of SELEX libray, W is window of 10 nt, and therefore each 101 nt sequence will be represented as a numeric vector of length 92 (101-10+1). However, different window sizes can be chosen based on the property/feature being examined and whether one is interested in its manifestation at the local level or global level. For local level structure analysis, calculations are generally carried out for window size of less than 10 nucleotides (one helical turn of DNA). Regression Model Dataset Preparation The NCAP-SELEX datasets obtained for 194 TFs after QC were utilised for k-mer count table generation. In the current analysis, relative affinities were estimated from k-mer counts observed in the cycle-4, compared to the counts estimated from the initial round (round zero) using a fifth order markov model with SELEX R-package(Rastogi et al. 2015). Furthermore, for calculation of the relative affinities we also considered following parameters: number of core flanking sequences to derive and number of allowed mismatches to represent the binding sites of a TF. For each TF probed in the NCAP-SELEX experiments, we used the motifs reported in the original study. However, one important caveat while generating kmer tables from SELEX data is: kmer counts inversely vary with the length of kmers—higher the length of kmer obtains lesser counts. While shorter kmers with very high counts facilitate the robust calculation of affinity tables, longer k-mers provide useful contextual information of the binding sites from the flanking sequences. To optimise the k-mer length for affinity table generation from kmer counts, current methodology generates datasets with varying k-mer length first. The process starts with a initial flanking length with the following formula |(10-motif length)/2|, so that kmer length is at least 10, even when the motif length is lesser than 10 bp. This approach allowed us to calculate the flexibility for at least 7 to 8 position along the binding sites. In addition, to consider each k-mer reliable in a dataset the minimum k-mer count should be equal to the length of the k-mer considered. Further, datasets with lesser than 1000 k-mers were not considered in the downstream processing. Hence, for each TF flanking we obtained filtering pipeline might generate multiple k-mer affinity tables with varying flanking length satisfying the specified criteria. One of the other trade-off can be observed in the k-mer analysis in the number of mismatches in k-mer with reference or consensus motif. While allowing more number of mismatches in the core motif increase the complexity(or variability) and useful in representing diversity of a TF binding sites, too many mismatches will represent spurious binding sites. Therefore, the number of allowed mismatches for any dataset was defined by the formula |(core length - 4) /2| + 1—where number of allowed mismatches varies with the length of reference k-mer and in-turn with different flanking lengths( Yang et al. 2017 ). Then we additionally filtered the remaining datasets with L2-regularised linear regression. Datasets for which DNA flexibility augmented model perform poorer than the qmer only model were discarder, as the flexibility augmented models are expected to perform at least or better than the 1mer model, given the increased dimensionality, this filtering process was previously employed by Yang et al for modelling HT-SELEX dataset( Yang et al. 2017 ; Rastogi et al. 2015; Riley et al. 2014 ; Ibarra et al. 2020 ). For a TF if multiple datasets were passed the filtering protocol, only the dataset with maximum length of flanking sequence length was considered as higher flanking sequence length represent provide additional contextual information of TF binding events. Multiple linear regression To relate binding affinity with DNA sequence and flexibility, we used L2-regularized multiple linear regression modelling. To express the DNA sequence for modelling we utilized the one-hot-encoding of the sequence to train the multiple linear regression model, referred as 1mer model. In addition, DNA flexibility features were calculated using DNAflexpy python package and normalized and concatenated with the encoded sequence features (1mer + flexibility). Based on the resultant feature matrix and corresponding affinity score, we applied a L2 regularized linear regression model for each dataset to prevent overfitting. To implement the models we used the RigdeCV function from the scikit-learn library. In order to apply the model, each dataset was split into 10 fold training set and testing test. To select the best regularization parameter, another 5 fold cross-validation was performed on the each of the 10 fold. Using the best model, by learning the regularization parameter on the internal 5 fold CV, we predicted the relative affinities on the test dataset to evaluate the model performance. Here each model was trained separately as the realtive affinities obtained might be incomparable across the different experiments or different TFs in the NCAP-SELEX dataset. For evaluation and comparison of the model performance we used R2 and MSE across the datasets. In-vivo dataset preparation for OSK pioneer TFs To identify the genomic occupancy of OSK factors, we used ChIP-Seq peaks of these TFs obtained from IMR90 celline ( GSE36570 )( Soufi, Donahue, and Zaret 2012 ). In addition, the nucleosomal occupancy data for the matched celline was obtained from Mnase-Seq (micrococcal nuclease digestion) experiment( GSE21823 )( Kelly et al. 2012 ). In order to calculate the enrichment of nucleosomal signal in the TF binding sites, first the MNase-Seq read alignments were pooled accross the samples with samtools ( Danecek et al. 2021 ). The resultant alignment was then used to calculate read coverage across the Hg18 genome assembly with 10bp non-overlapping bins using bamCoverage from DeepTools package( Ramírez et al. 2016 ) and normalized using RPGC method. The coverage bigwig file was then used to estimate the MNase-Seq read density in the -100 to +100 bps of the OSK TF peak centre using DeepTools . The OSK occupying genomic intervals were further sorted based on the average of read density in the aforementioned genomic regions. To generate the read density heatmap, 1kb regions encompassing the ChIP-Seq peak center were used to calculate the read density, and plot using DeepTools ( Ramírez et al. 2016 ). Then the OSK binding sites were filtered into and nucleosomal depleted regions based on the read density in 200bp of OSK peak center where read density is lower than 1 and the rest of the regions are grouped into binding sites in the nucleosomal enriched regions. To extract sequences -500 to +500 bp of peak center was considered, and bedtools ( Quinlan 2014 ) was used to extract sequence from Hg38 assembly, after lifting over the genomic coordinates from Hg18 to Hg38 assembly using ucsc-lifOver ( Hinrichs et al. 2006 ). Then these two group of sequences were subjected to flexibility calculation as described previously. In-vivo GATA3 dataset analysis For GATA3 TF, we obtained reanalysed GEO dataset GSE72141 . The author provided peak called file from ChIP-Seq data was downloaded from GEO( Takaku et al. 2016 ). For ATAC-Seq we downloaded the raw sequencing reads for wildtype ATAC-Seq data without GATA3 expression along with ATAC-Seq reads after GATA3 expression(GATA3+). Each condition have 3 biological replicates. Separate runs were merged for each replicate. Adapter was chopped and base quality score below Q20 were trimmed of from the raw reads using fastp . Next, the paired-end reads were aligned using to Hg19 genome assembly with bowtie2 ( Langmead and Salzberg 2012 ) using “–very-sensitive –no-mixed –no-discordant -X 5000” options enabled. The resultant alignments were then sorted and indexed with samtools . In order to identify differential accessible regions from ATAC-Seq data after GATA3 expression, we quantified the read counts in the -100 to +100 bp of GATA3 ChIP-Seq peak center using featureCounts ( Liao, Smyth, and Shi 2014 ). Then, we used wald statistics, as implemented in DESeq2 R package( Love, Huber, and Anders 2014 ), to call the differential accessiblity of genomic sites. FDR2 were used as cutoff to consider any GATA3 peaks in differentially open chromatin region. To identify the genomic regions with conserved ATAC-Seq signal, peaks with DESeq2 FDR>0.05 were considered, and then library size normalised read counts for each ATAC-Seq samples was calculated with DESeq2. To identify the conserved open regions in both condition, mean of normalised read counts with more than 20 in both condition was considered( Takaku et al. 2016 ). Mean normalised read counts of less than 10 in both condition was considered for calling conserved closed regions. Read density heatmap were generated with deeptools for -5 to +5 kb of GATA3 peak center as described previously. -500 to +500 bp flanking the peaks was used to extract the sequence from Hg38 genome assembly after lifting over the peak coordinates from Hg19. Then the sequences were profiled for DNA flexibility as described earlier. DNAshape analysis were performed using DNAShapeR R-package. In-vivo celline datasets from ENCODE We curated a list of 26 PTFs from literature. To obtain the in-vivo datasets, irreproducible discovery rate (IDR)-thresholded peaks for all the selected PTFs were downloaded from EN-CODE. MNase-Seq peak files of the corresponding cell lines were also downloaded from the ENCODE (Consortium et al. 2012). For the PTFs, motif profiles were obtained from JASPER db 2022(December 2024). The datasets were pre-processed as described in Peng etal( Peng et al. 2024 ). To identify the bound, and unbound motifs of a given PTF, the JASPER motif of that PTF was used to scan the human genome (hg38) with the motif scanning tool (FIMO), followed by the intersection with ChIP-Seq peaks with bedtools. Next, to identify the corresponding PTF binding sites in nucleosomal DNA, MNase-Seq peaks were used to intersect with bedtools. A positive dataset was defined as the PTF motif in the nucleosomal region covered by the chp-seq signal. A negative dataset composed of PTF motif hits in the nucleosomal DNA devoid of the ChIP-Seq signal. To extract the DNAn sequences of the bound(positive) and unbound motifs(negative) of each PTF we used 10 and 20 bp flanks of motif centres from Hg38 human genome assembly with bedtools getfasta subcommand. Next, both sets of sequences were used for flexibility profiling using our python package DNAflexpy ( https://upalabdhad.github.io/DNAflexpy/ ), as described above. Additionally, for one-hot-encoding of the sequences, we used a custom python script with Biopython library( Cock et al. 2009 ). Further, all the flexibility descriptors along with one-hot encoded features were used to construct, train, and test a gradient boosting classifier, XGBoost model, as implemented in scikit-learn python module( Pedregosa et al. 2011 ). First, while constructing the classification model with scikit-learn, to tune the hyper-parameters using XGBoost models, we used GridSearchCV approach utilizing positive and negative datasets pooled from all the curated ENCODE PTFs in the HepG2 cell line. The full dataset was then split in 80-20 ratio for training, and validation purposes. Here, only 4 bit, and DNaseI flexibility features were used as descriptors for tuning models with k=10 fold stratified cross-validation (CV) on the training set, and then validated on the remaining 20% split. While classifying the positive and negative sequences using the tuned XGBoost model, we took account of accuracy, precision, recall, F1score, area under the curve (AUC) of Receiver-operator curve(ROC), Mathews-correlation coefficient (MCC) for model comparisons. The best tuned model was further used for training and testing on each PTF datasets from the respective cellines independently, employing a k=5 fold stratified CV approach. While training, and testing, we used 4 bit and individual flexibility features used in this manuscript i.e., DNaseI, NPP, twist dispersion, trx and stiffness. But to independently assess the improvement of the model performance upon inclusion of the flexibility features, we separately incorporated each flexibility feature at a time along with the 4 bit feature, and then trained and tested using the tuned model on each PTF across four different cell lines. Next, for determining the importance of each flexibility feature in describing the positive class, we considered models trained on 20 bp flanking regions, as training and testing from HepG2 cell line( Mathelier et al. 2016 ). Further, we also considered the possibility of training on the dataset obtained from one cell line to train the model and validated the performance on the data from another cell line for a given PTF. In that case, we first trained and validated the tuned model, and then saved the model to predict using the dataset from a different cell line; completely new datasets, that model never seen during the training step. Funder Information Declared Department of Biotechnologyx, https://ror.org/03tjsyq23 References ↵ Abe , Namiko , Iris Dror , Lin Yang , Matthew Slattery , Tianyin Zhou , Harmen J Bussemaker , Remo Rohs , and Richard S Mann . 2015 . “ Deconvolving the Recognition of DNA Shape from Sequence .” Cell 161 ( 2 ): 307 – 18 . OpenUrl CrossRef PubMed Apostolou , Effie , and Konrad Hochedlinger . 2013 . “ Chromatin Dynamics During Cellular Reprogramming .” Nature 502 ( 7472 ): 462 – 71 . OpenUrl CrossRef PubMed Web of Science ↵ Back , Georg , and Dirk Walther . 2023 . “ Predictions of DNA Mechanical Properties at a Genomic Scale Reveal Potentially New Functional Roles of DNA Flexibility .” NAR Genomics and Bioinformatics 5 ( 4 ): lqad097 . OpenUrl ↵ Bailey , Timothy L , Mikael Boden , Fabian A Buske , Martin Frith , Charles E Grant , Luca Clementi , Jingyuan Ren , Wilfred W Li , and William S Noble . 2009 . “ MEME SUITE: Tools for Motif Discovery and Searching .” Nucleic Acids Research 37 ( suppl_2 ): W202 – 8 . OpenUrl CrossRef PubMed Web of Science ↵ Balsalobre , Aurelio , and Jacques Drouin . 2022 . “ Pioneer Factors as Master Regulators of the Epigenome and Cell Fate .” Nature Reviews Molecular Cell Biology 23 ( 7 ): 449 – 64 . OpenUrl CrossRef PubMed ↵ Bansal , Manju , Aditya Kumar , and Venkata Rajesh Yella . 2014 . “ Role of DNA Sequence Based Structural Features of Promoters in Transcription Initiation and Gene Expression .” Current Opinion in Structural Biology 25 : 77 – 85 . OpenUrl CrossRef PubMed ↵ Barozzi , Iros , Marta Simonatto , Silvia Bonifacio , Lin Yang , Remo Rohs , Serena Ghisletti , and Gioacchino Natoli . 2014 . “ Coregulation of Transcription Factor Binding and Nucleosome Occupancy Through DNA Features of Mammalian Enhancers .” Molecular Cell 54 ( 5 ): 844 – 57 . OpenUrl CrossRef PubMed Web of Science ↵ Barral , Amandine , and Kenneth S Zaret . 2024 . “ Pioneer Factors: Roles and Their Regulation in Development .” Trends in Genetics 40 ( 2 ): 134 – 48 . OpenUrl CrossRef PubMed ↵ Basu , Aakash , Dmitriy G Bobrovnikov , Basilio Cieza , Juan Pablo Arcon , Zan Qureshi , Modesto Orozco , and Taekjip Ha . 2022 . “ Deciphering the Mechanical Code of the Genome and Epigenome .” Nature Structural & Molecular Biology 29 ( 12 ): 1178 – 87 . OpenUrl CrossRef PubMed ↵ Basu , Aakash , Dmitriy G Bobrovnikov , and Taekjip Ha . 2021 . “ DNA Mechanics and Its Biological Impact .” Journal of Molecular Biology 433 ( 6 ): 166861 . OpenUrl CrossRef PubMed Basu , Aakash , Dmitriy G Bobrovnikov , Zan Qureshi , Tunc Kayikcioglu , Thuy TM Ngo , Anand Ranjan , Sebastian Eustermann , et al. 2021 . “ Measuring DNA Mechanics on the Genome Scale .” Nature 589 ( 7842 ): 462 – 67 . OpenUrl CrossRef PubMed ↵ Bell , Oliver , Vijay K Tiwari , Nicolas H Thomä , and Dirk Schübeler . 2011 . “ Determinants and Dynamics of Genome Accessibility .” Nature Reviews Genetics 12 ( 8 ): 554 – 64 . OpenUrl CrossRef PubMed ↵ Bishop , Eric P , Remo Rohs , Stephen CJ Parker , Sean M West , Peng Liu , Richard S Mann , Barry Honig , and Thomas D Tullius . 2011 . “ A Map of Minor Groove Shape and Electro-static Potential from Hydroxyl Radical Cleavage Patterns of DNA .” ACS Chemical Biology 6 ( 12 ): 1314 – 20 . OpenUrl CrossRef PubMed ↵ Biswas , Aditi , and Aakash Basu . 2023 . “ The Impact of the Sequence-Dependent Physical Properties of DNA on Chromatin Dynamics .” Current Opinion in Structural Biology , 102698 . ↵ Brukner , Ivan , Roberto Sanchez , Dietrich Suck , and Sandor Pongor . 1995 . “ Sequence-Dependent Bending Propensity of DNA as Revealed by DNase i: Parameters for Trinucleotides .” The EMBO Journal 14 ( 8 ): 1812 – 18 . OpenUrl CrossRef PubMed Web of Science ↵ Bulyk , Martha L , Jacques Drouin , Melissa M Harrison , Jussi Taipale , and Kenneth S Zaret . 2023 . “ Pioneer Factors—Key Regulators of Chromatin and Gene Expression .” Nature Reviews Genetics 24 ( 12 ): 809 – 15 . OpenUrl CrossRef PubMed ↵ Carminati , Manuel , Luca Vecchia , Lisa Stoos , and Nicolas H Thomä . 2024 . “ Pioneer Factors: Emerging Rules of Engagement for Transcription Factors on Chromatinized DNA .” Current Opinion in Structural Biology 88 : 102875 . OpenUrl CrossRef PubMed ↵ Chen , Shifu , Yanqing Zhou , Yaru Chen , and Jia Gu . 2018 . “ Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor .” Bioinformatics 34 ( 17 ): i884 – 90 . OpenUrl CrossRef PubMed ↵ Chen , Yongheng , Darren L Bates , Raja Dey , Po-Han Chen , Ana Carolina Dantas Machado , Ite A Laird-Offringa , Remo Rohs , and Lin Chen . 2012 . “ DNA Binding by GATA Transcription Factor Suggests Mechanisms of DNA Looping and Long-Range Gene Regulation .” Cell Reports 2 ( 5 ): 1197 – 1206 . OpenUrl CrossRef PubMed ↵ Cock , Peter JA , Tiago Antao , Jeffrey T Chang , Brad A Chapman , Cymon J Cox , Andrew Dalke , Iddo Friedberg , et al. 2009 . “ Biopython: Freely Available Python Tools for Computational Molecular Biology and Bioinformatics .” Bioinformatics 25 ( 11 ): 1422 . OpenUrl CrossRef PubMed Web of Science Consortium, ENCODE Project , et al. 2012 . “ An Integrated Encyclopedia of DNA Elements in the Human Genome .” Nature 489 ( 7414 ): 57 . OpenUrl CrossRef PubMed Web of Science ↵ Cui , Feng , and Victor B Zhurkin . 2013 . “ Rotational Positioning of Nucleosomes Facilitates Selective Binding of P53 to Response Elements Associated with Cell Cycle Arrest .” Nucleic Acids Research 42 ( 2 ): 836 – 47 . OpenUrl PubMed ↵ Danecek , Petr , James K Bonfield , Jennifer Liddle , John Marshall , Valeriu Ohan , Martin O Pollard , Andrew Whitwham , et al. 2021 . “ Twelve Years of SAMtools and BCFtools .” Gigascience 10 ( 2 ): giab008 . OpenUrl CrossRef PubMed ↵ Dey , Upalabdha , Kaushika Olymon , Anikesh Banik , Eshan Abbas , Venkata Rajesh Yella , and Aditya Kumar . 2023 . “ DNA Structural Properties of DNA Binding Sites for 21 Transcription Factors in the Mycobacterial Genome .” Frontiers in Cellular and Infection Microbiology 13 : 1147544 . OpenUrl CrossRef ↵ Dodonova , Svetlana O , Fangjie Zhu , Christian Dienemann , Jussi Taipale , and Patrick Cramer . 2020 . “ Nucleosome-Bound SOX2 and SOX11 Structures Elucidate Pioneer Factor Function .” Nature 580 ( 7805 ): 669 – 72 . OpenUrl CrossRef PubMed ↵ Dohnalová , Hana , and Filip Lankaš . 2022 . “ Deciphering the Mechanical Properties of b-DNA Duplex .” Wiley Interdisciplinary Reviews: Computational Molecular Science 12 ( 3 ): e1575 . OpenUrl CrossRef ↵ Donovan , Benjamin T , Hengye Chen , Priit Eek , Zhiyuan Meng , Caroline Jipa , Song Tan , Lu Bai , and Michael G Poirier . 2023 . “ Basic Helix-Loop-Helix Pioneer Factors Interact with the Histone Octamer to Invade Nucleosomes and Generate Nucleosome-Depleted Regions .” Molecular Cell 83 ( 8 ): 1251 – 63 . OpenUrl CrossRef PubMed ↵ Fedulova , Anastasiia S , Grigoriy A Armeev , Tatiana A Romanova , Lovepreet Singh-Palchevskaia , Nikita A Kosarim , Nikita A Motorin , Galina A Komarova , and Alexey K Shaytan . 2024 . “ Molecular Dynamics Simulations of Nucleosomes Are Coming of Age .” Wiley Interdisciplinary Reviews: Computational Molecular Science 14 ( 4 ): e1728 . OpenUrl CrossRef ↵ Gadea , Fabiana C Malaga , and Evgenia N Nikolova . 2022 . “ Nucleosome Topology and DNA Sequence Modulate the Engagement of Pioneer Factors SOX2 and OCT4 .” BioRxiv , 2022 – 01 . ↵ Gadea , Fabiana C Malaga , and Evgenia N Nikolova . 2023 . “ Structural Plasticity of Pioneer Factor Sox2 and DNA Bendability Modulate Nucleosome Engagement and Sox2-Oct4 Synergism .” Journal of Molecular Biology 435 ( 2 ): 167916 . OpenUrl CrossRef PubMed ↵ Garcia , Meilin Fernandez , Cedric D Moore , Katharine N Schulz , Oscar Alberto , Greg Donague , Melissa M Harrison , Heng Zhu , and Kenneth S Zaret . 2019 . “ Structural Features of Transcription Factors Associating with Nucleosome Binding .” Molecular Cell 75 ( 5 ): 921 – 32 . OpenUrl CrossRef PubMed ↵ Gouhier , Arthur , Justine Dumoulin-Gagnon , Vincent Lapointe-Roberge , Juliette Harris , Aurelio Balsalobre , and Jacques Drouin . 2024 . “ Pioneer Factor Pax7 Initiates Two-Step Cell-Cycle-Dependent Chromatin Opening .” Nature Structural & Molecular Biology 31 ( 1 ): 92 – 101 . OpenUrl CrossRef PubMed ↵ Heddi , Brahim , Nicolas Foloppe , Nadia Bouchemal , Edith Hantz , and Brigitte Hartmann . 2006 . “ Quantification of DNA BI/BII Backbone States in Solution. Implications for DNA Overall Structure and Recognition .” Journal of the American Chemical Society 128 ( 28 ): 9170 – 77 . OpenUrl CrossRef PubMed Web of Science ↵ Heddi , Brahim , Christophe Oguey , Christophe Lavelle , Nicolas Foloppe , and Brigitte Hartmann . 2010 . “ Intrinsic Flexibility of b-DNA: The Experimental TRX Scale .” Nucleic Acids Research 38 ( 3 ): 1034 – 47 . OpenUrl CrossRef PubMed Web of Science ↵ Hinrichs , Angela S , Donna Karolchik , Robert Baertsch , Galt P Barber , Gill Bejerano , Hiram Clawson , Mark Diekhans , et al. 2006 . “ The UCSC Genome Browser Database: Update 2006 .” Nucleic Acids Research 34 ( suppl_1 ): D590 – 98 . OpenUrl CrossRef PubMed Web of Science ↵ Huertas , Jan , and Vlad Cojocaru . 2021 . “ Breaths, Twists, and Turns of Atomistic Nucleo-somes .” Journal of Molecular Biology 433 ( 6 ): 166744 . OpenUrl CrossRef PubMed ↵ Huertas , Jan , Caitlin M MacCarthy , Hans R Schöler , and Vlad Cojocaru . 2020 . “ Nucleosomal DNA Dynamics Mediate Oct4 Pioneer Factor Binding .” Biophysical Journal 118 ( 9 ): 2280 – 96 . OpenUrl CrossRef PubMed ↵ Huyghe , Aurélia , Aneta Trajkova , and Fabrice Lavial . 2024 . “ Cellular Plasticity in Reprogramming, Rejuvenation and Tumorigenesis: A Pioneer TF Perspective .” Trends in Cell Biology 34 ( 3 ): 255 – 67 . OpenUrl CrossRef PubMed ↵ Ibarra , Ignacio L , Nele M Hollmann , Bernd Klaus , Sandra Augsten , Britta Velten , Janosch Hennig , and Judith B Zaugg . 2020 . “ Mechanistic Insights into Transcription Factor Cooperativity and Its Impact on Protein-Phenotype Interactions .” Nature Communications 11 ( 1 ): 124 . OpenUrl CrossRef PubMed ↵ Inukai , Sachi , Kian Hong Kock , and Martha L Bulyk . 2017 . “ Transcription Factor–DNA Binding: Beyond Binding Site Motifs .” Current Opinion in Genetics & Development 43 : 110 – 19 . OpenUrl CrossRef PubMed ↵ Isbel , Luke , Ralph S Grand , and Dirk Schübeler . 2022 . “ Generating Specificity in Genome Regulation Through Transcription Factor Sensitivity to Chromatin .” Nature Reviews Genetics 23 ( 12 ): 728 – 40 . OpenUrl CrossRef PubMed ↵ Ishida , Hisashi , Atsushi Matsumoto , Hiroki Tanaka , Aya Okuda , Ken Morishima , Paul A Wade , Hitoshi Kurumizaka , Masaaki Sugiyama , and Hidetoshi Kono . 2023 . “ Structural and Dynamic Changes of Nucleosome Upon GATA3 Binding .” Journal of Molecular Biology 435 ( 23 ): 168308 . OpenUrl CrossRef PubMed ↵ Iwafuchi-Doi , Makiko , and Kenneth S Zaret . 2014 . “ Pioneer Transcription Factors in Cell Reprogramming .” Genes & Development 28 ( 24 ): 2679 – 92 . OpenUrl Abstract / FREE Full Text Jiang , Cizhong , and B Franklin Pugh . 2009 . “ Nucleosome Positioning and Gene Regulation: Advances Through Genomics .” Nature Reviews Genetics 10 ( 3 ): 161 – 72 . OpenUrl CrossRef PubMed Web of Science ↵ Katsarou , Alexia , Nikolaos Trasanidis , Kanagaraju Ponnusamy , Ioannis V Kostopoulos , Jaime Alvarez-Benayas , Foteini Papaleonidopoulou , Keren Keren , et al. 2023 . “ MAF Functions as a Pioneer Transcription Factor That Initiates and Sustains Myelomagenesis .” Blood Advances 7 ( 21 ): 6395 – 6410 . OpenUrl CrossRef PubMed ↵ Kelly , Theresa K , Yaping Liu , Fides D Lay , Gangning Liang , Benjamin P Berman , and Peter A Jones . 2012 . “ Genome-Wide Mapping of Nucleosome Positioning and DNA Methylation Within Individual DNA Molecules .” Genome Research 22 ( 12 ): 2497 – 2506 . OpenUrl Abstract / FREE Full Text ↵ Knaupp , Anja S , Sam Buckberry , Jahnvi Pflueger , Sue Mei Lim , Ethan Ford , Michael R Larcombe , Fernando J Rossello , et al. 2017 . “ Transient and Permanent Reconfiguration of Chromatin and Transcription Factor Occupancy Drive Reprogramming .” Cell Stem Cell 21 ( 6 ): 834 – 45 . OpenUrl CrossRef PubMed ↵ Langmead , Ben , and Steven L Salzberg . 2012 . “ Fast Gapped-Read Alignment with Bowtie 2 .” Nature Methods 9 ( 4 ): 357 – 59 . OpenUrl CrossRef PubMed ↵ Lawson , Catherine L , Helen M Berman , Li Chen , Brinda Vallat , and Craig L Zirbel . 2024 . “ The Nucleic Acid Knowledgebase: A New Portal for 3D Structural Information about Nucleic Acids .” Nucleic Acids Research 52 ( D1 ): D245 – 54 . OpenUrl CrossRef PubMed ↵ Le Poul , Yann , Yaqun Xin , Liucong Ling , Bettina Mühling , Rita Jaenichen , David Hörl , David Bunk , et al. 2020 . “ Regulatory Encoding of Quantitative Variation in Spatial Activity of a Drosophila Enhancer .” Science Advances 6 ( 49 ): eabe2955 . OpenUrl FREE Full Text ↵ Li , Dongwei , Jing Liu , Xuejie Yang , Chunhua Zhou , Jing Guo , Chuman Wu , Yue Qin , et al. 2017 . “ Chromatin Accessibility Dynamics During iPSC Reprogramming .” Cell Stem Cell 21 ( 6 ): 819 – 33 . OpenUrl CrossRef PubMed ↵ Li , Keren , Matthew Carroll , Reza Vafabakhsh , Xiaozhong A Wang , and Ji-Ping Wang . 2022 . “ DNAcycP: A Deep Learning Tool for DNA Cyclizability Prediction .” Nucleic Acids Research 50 ( 6 ): 3142 – 54 . OpenUrl CrossRef PubMed ↵ Liao , Yang , Gordon K Smyth , and Wei Shi . 2014 . “ featureCounts: An Efficient General Purpose Program for Assigning Sequence Reads to Genomic Features .” Bioinformatics 30 ( 7 ): 923 – 30 . OpenUrl CrossRef PubMed Web of Science ↵ Liu , Weihan , Jeffrey L Kurkewich , Angela Stoddart , Saira Khan , Dhivyaa Anandan , Alexandre N Gaubil , Donald J Wolfgeher , Lia Jueng , Stephen J Kron , and Megan E McNerney . 2024 . “ CUX1 Regulates Human Hematopoietic Stem Cell Chromatin Accessibility via the BAF Complex .” Cell Reports 43 ( 5 ). OpenUrl CrossRef ↵ Love , Michael I , Wolfgang Huber , and Simon Anders . 2014 . “ Moderated Estimation of Fold Change and Dispersion for RNA-Seq Data with DESeq2 .” Genome Biology 15 : 1 – 21 . OpenUrl CrossRef PubMed ↵ MacCarthy , Caitlin M , Jan Huertas , Claudia Ortmeier , Hermann Vom Bruch , Daisylyn Senna Tan , Deike Reinke , Astrid Sander , et al. 2022 . “ OCT4 Interprets and Enhances Nucleosome Flexibility .” Nucleic Acids Research 50 ( 18 ): 10311 – 27 . OpenUrl CrossRef PubMed ↵ Mariani , Luca , Xiao Liu , Kwangwoon Lee , Stephen S Gisselbrecht , Philip A Cole , and Martha L Bulyk . 2024 . “ DNA Flexibility Regulates Transcription Factor Binding to Nucleosomes .” bioRxiv , 2024 – 09 . ↵ Marin-Gonzalez , Alberto , JG Vilhena , Ruben Perez , and Fernando Moreno-Herrero . 2021 . “ A Molecular View of DNA Flexibility .” Quarterly Reviews of Biophysics 54 : e8 . OpenUrl CrossRef PubMed ↵ Mathelier , Anthony , Beibei Xin , Tsu-Pei Chiu , Lin Yang , Remo Rohs , and Wyeth W Wasser-man . 2016 . “ DNA Shape Features Improve Transcription Factor Binding Site Predictions in Vivo .” Cell Systems 3 ( 3 ): 278 – 86 . OpenUrl CrossRef PubMed ↵ Mauney , Alexander W , Joshua M Tokuda , Lisa M Gloss , Oscar Gonzalez , and Lois Pollack . 2018 . “ Local DNA Sequence Controls Asymmetry of DNA Unwrapping from Nucleosome Core Particles .” Biophysical Journal 115 ( 5 ): 773 – 81 . OpenUrl CrossRef PubMed ↵ Mayran , Alexandre , Konstantin Khetchoumian , Fadi Hariri , Tomi Pastinen , Yves Gauthier , Aurelio Balsalobre , and Jacques Drouin . 2018 . “ Pioneer Factor Pax7 Deploys a Stable Enhancer Repertoire for Specification of Cell Fate .” Nature Genetics 50 ( 2 ): 259 – 69 . OpenUrl CrossRef PubMed ↵ Michael , Alicia K , Ralph S Grand , Luke Isbel , Simone Cavadini , Zuzanna Kozicka , Georg Kempf , Richard D Bunker , et al. 2020 . “ Mechanisms of OCT4-SOX2 Motif Readout on Nucleosomes .” Science 368 ( 6498 ): 1460 – 65 . OpenUrl Abstract / FREE Full Text ↵ Moonen , Jan-Renier , James Chappell , Minyi Shi , Tsutomu Shinohara , Dan Li , Maxwell R Mumbach , Fan Zhang , et al. 2022 . “ KLF4 Recruits SWI/SNF to Increase Chromatin Accessibility and Reprogram the Endothelial Enhancer Landscape Under Laminar Shear Stress .” Nature Communications 13 ( 1 ): 4941 . OpenUrl CrossRef PubMed ↵ Ngo , Thuy TM , Bailey Liu , Feng Wang , Aakash Basu , Carl Wu , and Taekjip Ha . 2024 . “ Dependence of Nucleosome Mechanical Stability on DNA Mismatches .” Elife 13 : RP95514 . OpenUrl CrossRef PubMed ↵ Ngo , Thuy TM , Jejoong Yoo , Qing Dai , Qiucen Zhang , Chuan He , Aleksei Aksimentiev , and Taekjip Ha . 2016 . “ Effects of Cytosine Modifications on DNA Flexibility and Nucleosome Mechanical Stability .” Nature Communications 7 ( 1 ): 10813 . OpenUrl CrossRef PubMed ↵ Ngo , Thuy TM , Qiucen Zhang , Ruobo Zhou , Jaya G Yodh , and Taekjip Ha . 2015 . “ Asymmetric Unwrapping of Nucleosomes Under Tension Directed by DNA Local Flexibility .” Cell 160 ( 6 ): 1135 – 44 . OpenUrl CrossRef PubMed ↵ Pantier , Raphael , Kashyap Chhatbar , Grace Alston , Heng Yang Lee , and Adrian Bird . 2022 . “ High-Throughput Sequencing SELEX for the Determination of DNA-Binding Protein Specificities in Vitro .” STAR Protocols 3 ( 3 ): 101490 . OpenUrl CrossRef PubMed ↵ Pataskar , Abhijeet , Willem Vanderlinden , Johannes Emmerig , Aditi Singh , Jan Lipfert , and Vijay K Tiwari . 2019 . “ Deciphering the Gene Regulatory Landscape Encoded in DNA Biophysical Features .” Iscience 21 : 638 – 49 . OpenUrl CrossRef PubMed ↵ Pedregosa , Fabian , Gaël Varoquaux , Alexandre Gramfort , Vincent Michel , Bertrand Thirion , Olivier Grisel , Mathieu Blondel , et al. 2011 . “ Scikit-Learn: Machine Learning in Python .” Journal of Machine Learning Research 12 ( Oct ): 2825 – 30 . OpenUrl ↵ Peng , Yunhui , Wei Song , Vladimir B Teif , Ivan Ovcharenko , David Landsman , and Anna R Panchenko . 2024 . “ Detection of New Pioneer Transcription Factors as Cell-Type-Specific Nucleosome Binders .” Elife 12 : RP88936 . OpenUrl CrossRef PubMed Peters , Justin P , and L James Maher III . . 2010. “DNA Curvature and Flexibility in Vitro and in Vivo.” Quarterly Reviews of Biophysics 43 ( 1 ): 23 – 63 . ↵ Portillo-Ledesma , Stephanie , Suckwoo Chung , Jill Hoffman , and Tamar Schlick . 2024 . “ Regulation of Chromatin Architecture by Transcription Factor Binding .” Elife 12 : RP91320 . OpenUrl CrossRef PubMed ↵ Quinlan , Aaron R . 2014 . “ BEDTools: The Swiss-Army Tool for Genome Feature Analysis .” Current Protocols in Bioinformatics 47 ( 1 ): 11 – 12 . OpenUrl CrossRef ↵ Ramírez , Fidel , Devon P Ryan , Björn Grüning , Vivek Bhardwaj , Fabian Kilpert , Andreas S Richter , Steffen Heyne , Friederike Dündar , and Thomas Manke . 2016 . “ deepTools2: A Next Generation Web Server for Deep-Sequencing Data Analysis .” Nucleic Acids Research 44 ( Web Server issue ): W160 . OpenUrl CrossRef PubMed Rastogi , Chaitanya , Dahong Liu , Harmen Bussemaker, SystemRequirements Java, and Maintainer Harmen Bussemaker . 2015 . “ Package ‘SELEX’ .” ↵ Rauluseviciute , Ieva , Rafael Riudavets-Puig , Romain Blanc-Mathieu , Jaime A Castro-Mondragon , Katalin Ferenc , Vipin Kumar , Roza Berhanu Lemma , et al. 2024 . “ JASPAR 2024: 20th Anniversary of the Open-Access Database of Transcription Factor Binding Profiles .” Nucleic Acids Research 52 ( D1 ): D174 – 82 . OpenUrl CrossRef PubMed ↵ Read , Christopher M , John P Baldwin , and Colyn Crane-Robinson . 1985 . “ Structure of Subnucleosomal Particles. Tetrameric (H3/H4) 2 146 Base Pair DNA and Hexameric (H3/H4) 2 (H2A/H2B) 1 146 Base Pair DNA Complexes .” Biochemistry 24 ( 16 ): 4435 – 50 . OpenUrl CrossRef PubMed ↵ Remadevi , Viji , Parvathy Muraleedharan , and Sreeharshan Sreeja . 2021 . “ FOXO1: A Pivotal Pioneer Factor in Oral Squamous Cell Carcinoma .” American Journal of Cancer Research 11 ( 10 ): 4700 . OpenUrl PubMed ↵ Riley , Todd R , Matthew Slattery , Namiko Abe , Chaitanya Rastogi , Dahong Liu , Richard S Mann , and Harmen J Bussemaker . 2014 . “ SELEX-Seq: A Method for Characterizing the Complete Repertoire of Binding Site Preferences for Transcription Factor Complexes .” Hox Genes: Methods and Protocols , 255 – 78 . ↵ Rohs , Remo , Xiangshu Jin , Sean M West , Rohit Joshi , Barry Honig , and Richard S Mann . 2010 . “ Origins of Specificity in Protein-DNA Recognition .” Annual Review of Biochemistry 79 ( 1 ): 233 – 69 . OpenUrl CrossRef PubMed Web of Science ↵ Rotinen , Mirja , Sungyong You , Julie Yang , Simon G Coetzee , Mariana Reis-Sobreiro , WenChin Huang , Fangjin Huang , et al. 2018 . “ ONECUT2 Is a Targetable Master Regulator of Lethal Prostate Cancer That Suppresses the Androgen Axis .” Nature Medicine 24 ( 12 ): 1887 – 98 . OpenUrl CrossRef PubMed ↵ Sarkar , Sharmilee , Upalabdha Dey , Trust Boitumelo Khohliwe , Venkata Rajesh Yella , and Aditya Kumar . 2021 . “ Analysis of Nucleoid-Associated Protein-Binding Regions Reveals DNA Structural Features Influencing Genome Organization in Mycobacterium Tuberculosis .” FEBS Letters 595 ( 19 ): 2504 – 21 . OpenUrl CrossRef PubMed ↵ Satchwell , Sandra C , Horace R Drew , and Andrew A Travers . 1986 . “ Sequence Periodicities in Chicken Nucleosome Core DNA .” Journal of Molecular Biology 191 ( 4 ): 659 – 75 . OpenUrl CrossRef PubMed Web of Science ↵ Slattery , Matthew , Tianyin Zhou , Lin Yang , Ana Carolina Dantas Machado , Raluca Gordân , and Remo Rohs . 2014 . “ Absence of a Simple Code: How Transcription Factors Read the Genome .” Trends in Biochemical Sciences 39 ( 9 ): 381 – 99 . OpenUrl CrossRef PubMed Web of Science ↵ Soufi , Abdenour , Greg Donahue , and Kenneth S Zaret . 2012 . “ Facilitators and Impediments of the Pluripotency Reprogramming Factors’ Initial Engagement with the Genome .” Cell 151 ( 5 ): 994 – 1004 . OpenUrl CrossRef PubMed Web of Science ↵ Soufi , Abdenour , Meilin Fernandez Garcia , Artur Jaroszewicz , Nebiyu Osman , Matteo Pellegrini , and Kenneth S Zaret . 2015 . “ Pioneer Transcription Factors Target Partial DNA Motifs on Nucleosomes to Initiate Reprogramming .” Cell 161 ( 3 ): 555 – 68 . OpenUrl CrossRef PubMed ↵ Stoeber , Shane , Holly Godin , Cheng Xu , and Lu Bai . 2024 . “ Pioneer Factors: Nature or Nurture? ” Critical Reviews in Biochemistry and Molecular Biology , 1 – 15 . ↵ Takahashi , Kazutoshi , and Shinya Yamanaka . 2006 . “ Induction of Pluripotent Stem Cells from Mouse Embryonic and Adult Fibroblast Cultures by Defined Factors .” Cell 126 ( 4 ): 663 – 76 . OpenUrl CrossRef PubMed Web of Science ↵ Takaku , Motoki , Sara A Grimm , Takashi Shimbo , Lalith Perera , Roberta Menafra , Hendrik G Stunnenberg , Trevor K Archer , Shinichi Machida , Hitoshi Kurumizaka , and Paul A Wade . 2016 . “ GATA3-Dependent Cellular Reprogramming Requires Activation-Domain Dependent Recruitment of a Chromatin Remodeler .” Genome Biology 17 : 1 – 16 . OpenUrl CrossRef PubMed ↵ Takizawa , Yoshimasa , Hiroki Tanaka , Shinichi Machida , Masako Koyama , Kazumitsu Maehara , Yasuyuki Ohkawa , Paul A Wade , Matthias Wolf , and Hitoshi Kurumizaka . 2018 . “ Cryo-EM Structure of the Nucleosome Containing the ALB1 Enhancer DNA Sequence .” Open Biology 8 ( 3 ): 170255 . OpenUrl CrossRef PubMed ↵ Tanaka , Hiroki , Yoshimasa Takizawa , Motoki Takaku , Daiki Kato , Yusuke Kumagawa , Sara A Grimm , Paul A Wade , and Hitoshi Kurumizaka . 2020 . “ Interaction of the Pioneer Transcription Factor GATA3 with Nucleosomes .” Nature Communications 11 ( 1 ): 4136 . OpenUrl CrossRef PubMed ↵ Van Holde , Kensal E. 2012 . Chromatin . Springer Science & Business Media . ↵ Xu , Cheng , Holly Kleinschmidt , Jianyu Yang , Erik M Leith , Jenna Johnson , Song Tan , Shaun Mahony , and Lu Bai . 2024 . “ Systematic Dissection of Sequence Features Affecting Binding Specificity of a Pioneer Factor Reveals Binding Synergy Between FOXA1 and AP-1 .” Molecular Cell 84 ( 15 ): 2838 – 55 . OpenUrl CrossRef PubMed ↵ Yang , Lin , Yaron Orenstein , Arttu Jolma , Yimeng Yin , Jussi Taipale , Ron Shamir , and Remo Rohs . 2017 . “ Transcription Factor Family-Specific DNA Shape Readout Revealed by Quantitative Specificity Models .” Molecular Systems Biology 13 ( 2 ): 910 . OpenUrl Abstract / FREE Full Text ↵ Yella , Venkata Rajesh , Devesh Bhimsaria , Debostuti Ghoshdastidar , José A Rodríguez-Martínez , Aseem Z Ansari , and Manju Bansal . 2018 . “ Flexibility and Structure of Flanking DNA Impact Transcription Factor Affinity for Its Core Motif .” Nucleic Acids Research 46 ( 22 ): 11883 – 97 . OpenUrl CrossRef PubMed ↵ Zaret , Kenneth S . 2020 . “ Pioneer Transcription Factors Initiating Gene Network Changes .” Annual Review of Genetics 54 ( 1 ): 367 – 85 . OpenUrl CrossRef PubMed ↵ Zhou , Keda , Guillaume Gaullier , and Karolin Luger . 2019 . “ Nucleosome Structure and Dynamics Are Coming of Age .” Nature Structural & Molecular Biology 26 ( 1 ): 3 – 13 . OpenUrl CrossRef PubMed ↵ Zhu , Fangjie , Lucas Farnung , Eevi Kaasinen , Biswajyoti Sahu , Yimeng Yin , Bei Wei , Svetlana O Dodonova , et al. 2018 . “ The Interaction Landscape Between Transcription Factors and the Nucleosome .” Nature 562 ( 7725 ): 76 – 81 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted May 27, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following DNA sequence encoded conformational flexibility orchestrates pioneer transcription factor–nucleosome interaction landscape Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share DNA sequence encoded conformational flexibility orchestrates pioneer transcription factor–nucleosome interaction landscape Upalabdha Dey , Gustavo Sganzerla Martinez , Venkata Rajesh Yella , Aditya Kumar bioRxiv 2025.05.21.655105; doi: https://doi.org/10.1101/2025.05.21.655105 Share This Article: Copy Citation Tools DNA sequence encoded conformational flexibility orchestrates pioneer transcription factor–nucleosome interaction landscape Upalabdha Dey , Gustavo Sganzerla Martinez , Venkata Rajesh Yella , Aditya Kumar bioRxiv 2025.05.21.655105; doi: https://doi.org/10.1101/2025.05.21.655105 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7618) Biochemistry (17635) Bioengineering (13859) Bioinformatics (41846) Biophysics (21401) Cancer Biology (18534) Cell Biology (25422) Clinical Trials (138) Developmental Biology (13352) Ecology (19860) Epidemiology (2067) Evolutionary Biology (24285) Genetics (15582) Genomics (22463) Immunology (17700) Microbiology (40298) Molecular Biology (17141) Neuroscience (88424) Paleontology (666) Pathology (2825) Pharmacology and Toxicology (4813) Physiology (7633) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4284) Systems Biology (9808) Zoology (2267)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00