Analysis of long-range contacts across cell types outlines a core sequence determinant of 3D genome organisation

doi:10.1101/2025.03.16.643527

Analysis of long-range contacts across cell types outlines a core sequence determinant of 3D genome organisation

2025 · doi:10.1101/2025.03.16.643527

preprint OA: gold CC-BY-NC-4.0

📄 Open PDF Full text JSON View at publisher

Full text 79,026 characters · extracted from preprint-html · click to expand

Analysis of long-range contacts across cell types outlines a core sequence determinant of 3D genome organisation | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Analysis of long-range contacts across cell types outlines a core sequence determinant of 3D genome organisation View ORCID Profile Liezel Tamon , Zahra Fahmi , View ORCID Profile James Ashford , View ORCID Profile Rosana Collepardo-Guevara , View ORCID Profile Aleksandr B. Sahakyan doi: https://doi.org/10.1101/2025.03.16.643527 Liezel Tamon 1 MRC WIMM Centre for Computational Biology, MRC Weatherall Institute of Molecular Medicine, Radcliffe Department of Medicine, University of Oxford , Oxford, OX3 9DS, United Kingdom Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Liezel Tamon Zahra Fahmi 2 Department of Chemistry, University of Cambridge , Lensfield Road, Cambridge, CB2 1EW, United Kingdom Find this author on Google Scholar Find this author on PubMed Search for this author on this site James Ashford 1 MRC WIMM Centre for Computational Biology, MRC Weatherall Institute of Molecular Medicine, Radcliffe Department of Medicine, University of Oxford , Oxford, OX3 9DS, United Kingdom Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for James Ashford Rosana Collepardo-Guevara 2 Department of Chemistry, University of Cambridge , Lensfield Road, Cambridge, CB2 1EW, United Kingdom 3 Department of Physics, University of Cambridge , JJ Thomson Ave, Cambridge, CB3 0HE, United Kingdom 4 Department of Genetics, University of Cambridge , Cambridge, CB2 3EH, United Kingdom Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rosana Collepardo-Guevara Aleksandr B. Sahakyan 1 MRC WIMM Centre for Computational Biology, MRC Weatherall Institute of Molecular Medicine, Radcliffe Department of Medicine, University of Oxford , Oxford, OX3 9DS, United Kingdom Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Aleksandr B. Sahakyan For correspondence: aleksandr.sahakyan{at}imm.ox.ac.uk Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract The sequence-driven organising principles of the 3D genome are crucial for interpreting the core effects of genomic variation and for understanding the evolution of genome organisation and function. We investigated these by isolating and analysing cell-type-persistent contacts, heavily dependent on the similarly cell-type-persistent genomic sequence. We stratified long-range contacts from a diverse group of human tissues and cell lines based on contact persistence, c p , reflecting their presence across cell or tissue types, presenting them as an atlas of contacts and the cell-type invariant (CETI) hubs they form across human chromosomes. Our survey of more than 300 chromatin and genome features revealed their association with c p , contrasting variable from persistent contacts in terms of co-localisation with genes, 3D architectural domains, epigenetic and sequence elements. We found persistent contacts to be predominantly comprised of AT-rich sequences and related to heterochromatin. A key outcome is finding a link between the experimental genomic contacts and the complementarity between pairs of contacting DNA loci. This work provides evidence for a sequence determinant of genomic contacts contributing to the decoding of the relationship between sequence and structure that is crucial for functional and evolutionary studies concerning the 3D genome organisation. Introduction Within a human cell, a DNA double helix of ∼ 2 m in length is intricately organised inside a nucleus, ∼ 10 μm in diameter, while enabling proper functioning of molecular processes like replication and gene expression. According to length or size, this organisation can broadly be categorised into the 1) scale of nucleosome- and chromatin-fibre, and of higher-order organisation, consisting of the 2) intermediate (domain) scale (e.g. loops, topologically associating domains or TADs [ 1 – 3 ], lamina associated domains or LADs and compartments [ 4 ]), and the 3) nuclear scale (e.g. chromosome territories, genome arrangement with respect to nuclear centre/periphery and nuclear bodies). Key to understanding the relationship between the way the genome is organised and its function is the identification of factors influencing that organisation. Results from biochemical mapping methods, imaging experiments, polymer simulations, and other computational and experimental investigations have shown that a number of factors play a multiplexed role in determining the 3D genome organisation. These factors include the general polymeric nature of the DNA [ 5 ], the complex and nonuniform information the chromatin holds in the form of epigenetic modifications and other occupants [ 6 ], the molecular processes the chromatin goes through particularly during replication [ 7 ] and transcription [ 8 ], and the genomic DNA sequence [ 9 – 12 ]. The relationship between genomic sequence and any compounding phenomenon, like 3D genome organisation, remains a subject of much interest, as it can crucially contribute to the interpretation of naturally occurring and de novo genomic variations and to the understanding of the sequence-structural evolution of our genome. The volume of work targeting the understanding of genomic contacts has steeply risen ever since the first experimental means have been reported [ 4 , 13 – 15 ] and it has established genome organisation as dynamic and stochastic at all levels (recently reviewed [ 16 ]), as opposed to being static and deterministic. The increasing amount of data from 3C and other types of methods in the context of various cell types, states, and species have outlined the multiscale nature of genome organisation [ 1 , 3 , 6 , 17 ], and the complex dynamics of contacts therein [ 16 , 18 ]. Genome organisation within a given species varies [ 19 , 20 ] across population [ 21 ], depending on a cell type [ 22 ], state in a cell cycle [ 23 , 24 ], epigenetic state [ 25 ], and throughout differentiation [ 26 , 27 ]. The presence of features and patterns first described using bulk data were validated, but extensive heterogeneity across single cells of the same type and population[ 28 ] was revealed, even down to individual alleles [ 20 , 29 , 30 ]. In this work, we leveraged the available large pool of experimental contact data, particularly through the availability of Hi-C data from a wide selection of human cell lines and primary tissues [ 22 ], to better understand the DNA sequence basis of genomic contact formation and 3D genome organisation. In particular, we characterised the contacts across human cell types to isolate and investigate the core, cell-type-persistent or invariant contacts, which are likely enriched in associations with similarly cell-type-invariant factors, i.e. DNA sequence-based features largely common in all the human cells. Materials and Methods Computational platforms and resources Computer code was written in R programming language and most computations utilised the in-house high-performance computing facilities at MRC Weatherall Institute of Molecular Medicine, University of Oxford, by employing cluster nodes with 256 GB random access memory, and Intel Xeon E5-2680v3 12-core (24-thread) and Intel Xeon E7-8891v3 10-core (20-thread) processors. Statistical analyses Differences between two groups were analysed using Student’s t-test and Mann-Whitney-Wilcoxon (MWW) tests. Pairwise comparisons of > 2 distributions were done using pairwise implementations of both tests in R. Alternative hypothesis was two-sided by default and the p-values reported were Benjamini-Hochberg adjusted [ 31 ]. The alpha (α) or significance level is by default 0.05. Contact persistence stratification The main working dataset of genomic contacts was retrieved from 21 published Hi-C data from 14 primary tissues and 7 cell lines at 40-kb resolution, consolidated and reanalysed in Schmitt et al. [ 22 ] (GEO:GSE87112). The processed contact matrices, containing uniquely mapped reads (in correspondence with the authors), and normalised using HiCNorm [ 32 ] were used for the stratification. Only long-range contacts were used in our analyses with linear distance or contact gap ≥ 2 Mb. The c p of each long-range contact was calculated as the number of cell type the contact is present in i.e. the contact had a non-zero HiCNorm c f in the Hi-C contact matrix. See Supplementary Table S5 and S6 for counts of long-range contacts and unique regions forming these contacts per cell line or tissue, respectively. Atlas of persistent substructures in human chromosomes Arc and network diagrams for visualising contacts Arc and network diagrams were built using R libraries R4RNA [ 33 ] and visNetwork, respectively. In the network, a node represents a region or bin of length equal to the Hi-C resolution. Which bins to be represented as black nodes are determined by the set gap between nodes. If the value is 50, the consecutive black nodes would have 50 bins in between them i.e. bin 1 is connected to bin 52 by a grey arrow edge; bin 52 is connected to bin 103 and so on. The length of the edge is scaled by this gap argument; however, since edges behave like a spring, this length is only the value at rest, and it can change when the edge must stretch due to the positioning of contacts. Consequently, the network representation of the persistent substructure may not be proportional to the length of the chromosome, hence the equidistant black nodes become the only distance markers. There are also the orange nodes and edges, which represent the highly persistent contacts. Note that when a contact bin coincides with a black marker node, that node is coloured orange. But this contact region can still be distinguished as a distance marker because, unlike the other contact regions located in between black marker nodes, it is preceded by the arrowhead of an edge. Identification of CEll-Type Invariant (CETI) hubs CETI hubs are comprised of a central region, 40 kb long in this work, highly interconnected with several other 40-kb regions. Per chromosome, hub centres were chosen based on having extremely long-range persistent contacts and then all the 2-Mb persistent contacts (c p ≥ 19) it participates in were retrieved, forming the hub. We have identified 38 such CETI hubs, with each chromosome contributing at least one hub except for chr. 15, chr. 17 and chr. X. For these chromosomes, it was hard to identify hubs because they barely contain contacts between very distant regions (as filtered in our visualisations) compared to other chromosomes of similar length. Supplementary File S5 brings detailed information on the 38 CETI hubs. Feature association with contact persistence See Supplementary Table S7 for the comprehensive list of the sources of around 300 features used for the associations. Unless indicated otherwise, ranges overlapping means having at least 1 shared or common bp. Region-wise association Chromatin and genomic features, which are characteristics of a region (not by a contact), were associated with the unique contact regions per c p via two ways. 1) The significance of feature enrichment or depletion in a set of unique contact regions was quantified through permutation tests (10,000 iterations) using custom wrapper functions based on the R library regioneR [ 34 ]. The unique contact regions were the ones being permuted and the random samples were drawn without replacement from the background set. The association was quantified by calculating (a) number of contact regions overlapping with feature ranges, where an overlap of one contact region with multiple regions of a feature was counted only once, and (b) total intersection in bp. The same permutation test procedure was used for calculating the significance of enrichment of long genes at unique high-c p contact regions except that the sample statistic was the mean length of genes overlapping. Enrichment and depletion of features were determined at unique 40-kb regions forming the c p = 21 and c p ≥ 19 contacts. The background was the set of all unique long-range contact regions (c p ≥ 1). 2) The number of unique contact regions per c p that overlap with a feature was calculated using custom R scripts. Contact-wise association The contact-wise association was done via two ways. 1) Per c p , we determined the fraction of feature-defined contact types based on whether the two regions of a contact overlap or do not overlap with a feature. 2) Per feature-defined contact type, we calculated the fraction of contacts of given c p (see Supplementary Fig. S10B,11B ). Gene-related analyses In all analyses involving genes, those with multiple transcripts were represented either by the single longest transcript or, in the case of ties, by the first of the longest transcripts (but preferring coding over non-coding ones). The latter was the case for 962 genes out of 24,910 (∼3.86%) unique genes from the UCSC hg19 annotation table. Expression analysis The Genotype-Tissue Expression (GTEx) data (E-MTAB-5214) [ 35 ] (in TPM) was filtered to contain only genes with expression values in at least 1 tissue. The EMBL-EBI Expression Atlas definition of the expression levels was used in this study – not-expressed (NE): TPM/FPKM 1000 [ 36 ]. Another baseline expression dataset (E-MTAB-1733) [ 37 ], containing RNA-seq data of coding genes from 27 normal tissues from 95 adult individuals, was subjected to the same pre-processing and used to repeat the analyses ( Supplementary Fig. S15 Set 1 ). Functional term enrichment analysis with DAVID Because DAVID can only be used to process up to 3000 inputs at a single instance, 3 sets containing 2999 genes (in some instances, DAVID maps more than 1 DAVID gene identifier to a gene name) were randomly sampled without replacement from 4209 unique genes co-localising with prime contact (c p = 21) regions. The built-in medium stringency of DAVID functional annotation clustering was applied. The built-in set of Homo sapiens genes was used as background. The top 2 enriched clusters were similar across samples and ( Fig. 3D ) shows results from one sample. Gene count is the number of c p = 21 contact genes associated with each term. Replication timing data processing and analysis The replication timing (RT) data (192 samples) from ReplicationDomain [ 38 ] were processed using custom R scripts into a final dataset, wherein a 40-kb region has one average RT value from each of the 61 unique cell lines (50 non-cancer- and 11 cancer-related cell lines). RT measurements were binned at 40 kb to match the contact data and normalised across samples by aligning each sample to a reference set by linear-model fitting. For the association with c p , the mean and median of the average values from each group of cell lines were calculated for each 40-kb region. Only regions with data from ≥ 59 cell lines were considered. A region was represented by the mean or median of the RT measurements overlapping with it. The consensus RT for a contact was then calculated as the mean of the two means or two medians from the two contacting regions. Somatic cancer SNV data processing and analyses The dataset (N = 38,428,969) was downloaded from the ICGC Data Portal (Release 28, 2019 March 27) [ 39 ] from all human autosomes from 2320 samples. The SNVs were categorised based on their location relative to transcript components according to a hierarchical assignment of SNVs in the following decreasing order of priority, exon > intron > intergenic. To quantify the vulnerability of each contact to SNVs, we first calculated, for each region, the number of mutated sites with at least one mutation (Nmutsite), and the total number of mutations (Nmut). Both metrics were normalised to the number of base pairs that can be mutated depending on the SNV type and location (Nmutsite norm and Nmut norm , respectively). The consensus value for a contact was then equal to the mean of the metric values from the two contacting regions. Contact sequence complementarity calculation Sequence complementarity of contacts (c || ) was estimated using three ways. 1) Calculating c || via global sequence alignment using edit or Levenshtein distance (i.e., minimum number of single-character edits to transform one sequence to another) (c | | align ) was implemented using the open-source C/C++ library, edlib [ 40 ]. Substitutions, insertions, or deletions were penalised by 1 regardless of the base identity. 2) Calculating c || via matching of 7-mer counts (c | | k-mer ) involved counting the occurrence of all possible 7-mers in both strands of a region and normalising these counts to the length of the region. The c | | k-mer of a contact is then calculated as the sum of the absolute differences between the 7-mer normalised counts of pair of regions in contact. 3) Calculating c || via crude estimation of hybridisation energy of regions in contacts (c || G ) was possible with published free energy parameters for unique, perfectly matched DNA triplets [ 41 ]. Free energy parameters for 7-mers were derived from the triplet energy parameters by sliding a 3-bp window along the 7-mers and averaging the parametric values of triplets present. The c || G of a contact is then given by the sum of the products of the 7-mer parametric values with the matching 7-mer counts of regions in contact. All complementarity values were normalised to the length of contact regions. The c || k-mer and c || align were negated to be directly proportional to complementarity. See Supplementary File S1: Section 5.2 for more details. Shuffling of contact regions Shuffling, performed per chromosome and per c p , was done using our general-purpose optimisation library, rOptimus [ 42 ], in order to maximise the number of fake/shuffled long-range contacts that would not be present in the real/original set (duplicates also not allowed). See Supplementary File S1: Section 5.3 for more details. Repeat-related analyses Transposon subfamily sites and copy number ranking were derived from the UCSC hg19 RepeatMasker annotation table. For calculating sequence complementarity using repeat-masked genome, only c || k-mer was calculated as detailed above. Contacts formed by regions with > 50% of their sequence masked were excluded. In addition, those involving regions with at least one missing bp in the unmasked genome were excluded also to be consistent with the analysis using the unmasked genome. See Supplementary Fig. S28 caption for more details. Contact map generation based on c p , c f and c || The Hi-C p maps include all long-range contacts from all cell types. The other Hi-C f data not part of the main contact dataset were downloaded as .hic from the 4DN portal [ 43 ] and the sparse contact matrices were retrieved using the R library strawr. For all contact maps with gradient colouring, the upper and lower limits of the colour scale are the upper and lower whisker values (Q1 − 1.5×IQR and Q3 + 1.5×IQR). Values outside these limits are coloured using the corresponding most extreme colour. Results Contact persistence to focus on core genomic contacts We quantified the persistence of genomic contacts across human cell types ( Supplementary File S1 , Supplementary Table S1 ) by integrating long-range contacts (40-kb resolution) with gap between the contacting regions ≥ 2 Mb. The latter threshold was taken to be greater than the size of most TADs [ 44 ] ( Supplementary Fig. S1 ). This was done considering our focus on core, sequence-driven components of genome organisation, which would benefit from minimising the effect of specific mechanisms, such as the reported role in TAD formation of the CTCF/cohesin-mediated loop extrusion predominantly demonstrated at the submegabase scale [ 17 , 45 , 46 ] and the similarly, more pronounced driver role of cell-type specific transcription at shorter-scale organisation [ 8 ]. The assembly of such contacts was possible by using 21 high-quality, re-analysed Hi-C datasets from varied primary tissues and cell lines generously made available by Ren and co-workers [ 22 ] ( Fig. 1A ). With the integration, the original contact matrices, denoted as Hi-C f , based on the conventional contact frequency measure, c f , were converted into a single map - here termed as Hi-C p ( Fig. 1B ). In a Hi-C p map, each contact is represented by a persistence score (c p ) from 1 to 21, equal to the number of human cell types it is present in ( Fig 1B,C ). The value of c p is independent of the exact c f value that a given contact is present in the different cell types i.e. a contact in a given cell type contributes to c p as long as cf (HiCNorm-normalised [ 32 ]) > 0 for uniquely mapped contacts. We did not apply statistical tests to enrich for interactions that have higher c f than the expected value based on distance or gap between contacting regions, as our goal was not to prioritise the potential for the most functionally relevant contacts. It should also be emphasised that contacts not flagged as “significant” by statistical enrichment tests are not guaranteed to be non-contacts or noise [ 47 ]. By design, we wanted to work with all contacts with firm evidence of happening, i.e. a robust chimeric read representing that contact. For this reason, we also ensured that potential artefactual/ambiguously mapping reads were not accounted at all. In addition, working with longer-range contacts (> 2 Mb, greater than the average TAD size determined using various methods [ 44 ]) was a way to mitigate inclusion of significant noise from the method, e.g. getting chimeric reads for contacts that are just extremely close to each other linearly. Furthermore, if a given contact was just deceptively passing all our checks being a mere artefact, we then had the voting of all the considered cell types to define the persistence of the contact, hence an artefact will not likely have a high contact persistence (c p ), hence influence our conclusions ( Supplementary Methods for extended explanation). The contact persistence scores were also found to be robust against any outlying datasets based on sequencing depth and source similarity ( Supplementary Fig. S2 ). Download figure Open in new tab Figure 1. Contact persistence to isolate and investigate the core genomic contacts, their determinants and implications. ( A ) Human tissue and cell line sources of the Hi-C (or Hi-C f ) datasets [ 22 ] used in this study. The icons indicate the sources, and the colours denote tissue origin, cell line source and differentiation state ( Supplementary Table S1 ). ( B ) Diagram representation of c p derivation. Hi-C f contacts all in black to indicate that all contacts with HiCNorm c f > 0 (uniquely mapped reads) were considered present in a dataset. ( C ) Chr. 17 FC (IMR-90) Hi-C f compared with Hi-C p of long-range contacts from all cell types. ( D ) The strategy of this study to reveal core determinants of genomic contacts – investigating core, persistent contacts by contrasting with variable ones to reveal their implications and determinants, which are likely to be similarly persistent across cell types i.e. sequence determinants. Out of the 112,861,349 long-range contacts examined, the proportion of contacts decreases with increasing persistence, with the least and most persistent ones comprising 17.448 % and 0.012 %, respectively ( Supplementary Table S2, Supplementary Fig. S3 ). The c p stratification is independent of the exact c f , but we did find that persistent contacts tend to have relatively high c f including when accounting for contact gap variation across c p ( Supplementary Fig. S4,5 ). Persistent contacts tend to be shorter in range, with median contact gap lengths between 2.4 to 2.7 Mb for c p ≥ 19 contacts ( Supplementary Fig. S6 ). Interestingly, in Yang et al. [ 48 ], authors reported that short-range contacts, with contact gaps of around 2.5 Mb, primarily have high relative contact frequencies conserved across the lymphoblastoid cells of humans and 3 other primates - chimpanzee, bonobo and gorilla. By directly associating their data with our c p data, we did find that our persistent contacts mostly correspond to the contacts they found to have conserved high-c f pattern ( Supplementary Fig. S7,8 ). By contrasting variable and persistent contacts, we investigated the foundations of contact persistence to facilitate the study of the core, invariant determinants (as well as its implications) of higher-order genome organisation ( Fig. 1D ). In Fig. 2 , we visualise the persistent contacts as arcs in an arc diagram ( Fig. 2A ) and as edges in a network diagram ( Fig. 2B ), in particular, to highlight outlying persistent contacts that are extremely long-range. For instance, out of its 10,253 c p ≥ 19 contacts, chr. 1 has 303, 90 and 5 highly persistent contacts joining intervals linearly separated by ≥ 8, 26 and 200 Mb distance. The network diagrams also effectively show how high persistence contacts can bring together far-off regions and form clusters of highly interconnected regions that are present amongst most cell types and that we call in this text as CETI ( ce ll- t ype- i nvariant) hubs ( Fig. 2B,D ). Download figure Open in new tab Figure 2. Visualisation of persistent contacts and the chromosome organisation they mediate. Contacts displayed have c p ζ 19 (c p denoted by arc or edge colour). Centromeric regions are marked by cyan lines and by thick edges on arc and network diagrams, respectively. ( A ) Arc diagram for chr. 1. The upper side has contacts with gap ζ 200 40-kb bins (or 8 Mb), while the bottom side shows the rest of c p ζ 19 contacts (ζ 2 Mb). ( B ) Network diagram for chr. 1 formed by c p ζ 19 contacts with gap ζ 650 40-kb bins (or 26 Mb). The ce ll- t ype- i nvariant (CETI) hubs of interconnected regions are indicated. ( C ) Arc diagrams corresponding to (A), but for the rest of human autosomes and chr. X. ( D ) Representative network diagrams, analogous to (B), for some chromosomes generated through c p ζ 19 contacts with gap ζ 200 40-kb bins (or 8 Mb). Feature associations of persistent contacts To examine the identity, causes and implications of a contact persistent in many cell identities, we looked for significant associations with ∼300 chromatin and genomic features with c p ( Supplementary Fig. S9-11 ). We found, at high-c p regions (region-wise), a significant enrichment of heterochromatin-related domains and features i.e. B-compartments and subcompartments, lamina-associated domain (LAD) marker LMNB1 and repressive chromatin mark H3K9me3 ( Fig. 3A left). At the sequence level, AT-rich features are consistently enriched, particularly A-phased repeats, L1 and L2 isochores, and CpG-depleted prairie sequences [ 49 ] ( Fig. 3A left). Analysis of enriched 7-mers at persistent contact regions show no strong motif, but they tend to contain more A/T over G/C bases ( Supplementary Table S3, Fig. S12, 13 ); although, at 40-kb resolution, the differences in the AT content across c p is not drastic ( Supplementary Fig. S14 ), with values across c p being close to the recently calculated genome-wide average of 40.9% GC [ 50 ]). The enrichment of features is accompanied by depletion of euchromatin-related domains and GC-rich features namely A-compartments and subcompartments, CpG island, and putative G-quadruplex sequences, H1, H2 and H3 isochores, and CpG-dense forest sequences [ 49 ] ( Fig. 3A right). Findings translate contact-wise, with the proportion of contacts associated with the enriched features increasing with c p as demonstrated here for H/L isochores ( Fig. 3B ). Download figure Open in new tab Figure 3. Persistent contacts enriched for contacts with features associated with heterochromatin and preferential AT sequence composition. The log 2 fold changes are relative to value at c p = 1. The c p = “All” refers to all long-range contacts. ( A ) Fold change of the proportion of unique contact regions across c p overlapping with significantly (left) enriched and (right) depleted features at unique c p = 21 contact regions (p-value < 0.05, permutation test, see Supplementary Fig. S9 heatmap for complete result of permutation tests and Supplementary File S2 for data behind the heatmap). Cell-type specific features can have multiple datasets (indicated in parentheses). “Indiv.” refers to individual data, “shared” refers to regions shared by or common to all individual data for that feature, and “predicted” refers to subcompartment regions predicted by SNIPER [ 53 ]. ( B ) Fraction of contacts (Fr fij ) overlapping isochore families across c p (see Supplementary Fig. S10,11 for similar plots of other enriched features). Only dominant contact types are shown in legend; “n” means no overlap. ( C ) Fold change of the mean of various cross-tissue expression metrics across c p ( Supplementary Fig. S15,16 ). The c p ≤ 3 and c p ≥ 19 distributions are significantly different (p-values < 0.002) except for CV (Mann-Whitney-Wilcoxon, MWW, p-value of 0.105). Only genes with data in ≥ 70% of the tissues were considered. ( D ) Top 2 most significant DAVID clusters of enriched functional terms at c p = 21 ( Supplementary Fig. S17 ). Red dashed line at log 10 0.05 ∼ 1.301. ( E ) Fold change of the mean of derivative lengths and counts of various genic elements across c p ( Supplementary Fig. S18 ). Mean length of c p = 21 and c p ≥ 19 genes significantly higher than c p ≥ 1 genes (p-value < 0.0001, permutation test). ( F ) Mean cross-tissue replication timing (log 2 ratio of early and late signals) across c p ; error bars at 95% confidence intervals ( Supplementary Fig. S19 ). All pairwise comparisons of the distributions are significantly different (p adj. < 0.002). ( G ) Mean somatic cancer single nucleotide variant (SNV) frequency metrics across c p ( Supplementary Fig. S20-22 ); error bars at 95% confidence intervals. Nmutsite norm and Nmut norm are the number of mutated sites with at least one mutation, and the total number of mutations, respectively, normalised to the total bp that can be mutated depending on the SNV type and location. Consistent with enrichment of heterochromatin-related features, we also observed that genes at persistent contacts have a relatively lower expression across tissues -evident in the lower distribution of cross-tissue mean and median values of expression, but with the cross-tissue normalised variation (CV) being not significantly different between variable and persistent gene sets ( Fig. 3C ). Consistently, the fraction of tissues with low expression for a gene increases with c p , accompanied by a decrease in fraction of tissues with medium and high expression ( Supplementary Fig. S15 ). The c p = 21 contact regions co-localise with genes enriched for terms related to synapse, glycoproteins, and (trans)membrane ( Fig. 3D ). With the most significant cluster of terms showing enrichment of neuronal genes that are known to be long [ 51 ] we also investigated gene length variation and found that c p = 21 contact genes are also significantly longer, driven by higher mean intron length and greater count of exons as c p increases ( Fig. 3E ). To further probe the implications of the persistent genome organisation, we associated contact persistence with two associated molecular phenotypes, replication timing (using aggregated data from different cell types) and somatic mutation frequency (using cancer SNV data). Consistent with the enrichment of heterochromatin-related features [ 52 ], we found that persistent contacts occur between regions that are late-replicating ( Fig. 3F ) and have higher SNV frequencies ( Fig. 3G ). Higher sequence complementarity between persistent contacts Sequence-specific interactions involving the recognition and association of complementary or homolog nucleic acid sequences are commonplace occurrences in various molecular processes ( Fig. 4A left). The protein-independent, preferential interaction of identical DNA duplexes (DNA self-assembly) with the help of biological cations has also been demonstrated in vitro [ 54 – 58 ], even for DNA occurring in nucleosomes (nucleosome self-assembly) [ 59 ] ( Fig. 4A right, see Supplementary File S1: Section 5.1 for description of each paper). The sequence identity awareness or recognition involved in these processes, whether happening directly and/or indirectly, prompted us to hypothesise that the degree of sequence similarity or complementarity between regions is associated or could contribute to the tendency of regions to come in contact, regardless of cell type or state. Associating the similarity of sequences with contact persistence, we have taken a general approach, defining the similarity to be independent of any specific chromatin feature or genomic pattern, by using different measures of complementarity between sequences, c || . These measures were derived from 1) matching of short-span 7-mer counts between sequences in contact (c || k-mer ) ( Fig. 4B ), 2) long-span global sequence alignment using edit distance (c || align ) ( Fig. 4C ), and 3) approximate estimation of hybridisation free energy (c || G , decreasing trend means increasing c || ). Remarkably, for these three metrics, we observe a stepwise increase in the complementarity of sequences with rising persistence ( Fig. 4B,C ). Furthermore, when shuffling the contacting DNA regions within a c p category to form fake contacts not actually paired in that c p category, fake contacts showed significantly lower c || distribution of fake compared to that of real ones, more pronounced for high-c p values ( Fig. 4B,C ). This suggested that the degree of complementarity is specific to real pairs of sequences in contact and not solely dependent on a single sequence motif or pattern present in all persistent regions. The observation holds valid even when limiting the drastic contact gap variation across c p by observing c || trends at narrower ranges of contact gaps ( Fig. 4D ), and when completely removing the effect of gap or distance ( Supplementary Fig. S23 ), which may independently impact contact formation. Download figure Open in new tab Figure 4. Higher sequence complementarity between persistent contacts. ( A ) Studies on the preferential association of identical DNA duplexes in vitro [ 54 – 59 ] (right) motivate the implication of sequence complementarity, c || , in contact formation, reinforced by many known sequence-dependent processes (left), involving single- and double-stranded nucleic acids (e.g. D-[ 60 ] and R-loop [ 61 , 62 ] formation, triplex formation [ 63 , 64 ], DNA origami nanotechnology [ 65 ] and homolog pairing [ 66 ]). Sequence complementarity of original (orig.) contacts calculated based on ( B ) short-span k-mer matching (c || k-mer ) and ( C ) long-span global alignment using edit distance (c || align ); c || of shuffled (shuff.) contacts in grey. Pairwise comparisons of orig. distributions between any two neighbouring c p values show significant differences (p adj. < 0.01) except for some comparisons between the highly persistent contacts (c p ≥ 17). Each orig. vs. shuff. distribution pair also significantly different (p adj. < 0.001). ( Supplementary Fig. S24-27 , Supplementary Table S4 ) ( D ) c k-mer across c at specific contact gap ranges ( Supplementary Fig. S28 ). Shown are medians of distributions and the dashed segments extend to the 25th and 75th percentiles. Pairwise comparisons of the 5 most persistent and 5 most variable distributions show significant differences (p adj. < 0.05). ( E ) Hi-C p vs. Hi-C k-mer using absolute and binned values of c || (top and bottom 5% contacts based on c || values in red and blue, respectively; the rest in beige (see Supplementary File S4 for other chromosomes). Braces highlight some areas similar between contact maps. Black scale bars at the top left corner are 4 Mb long. ( F ) Zoom in on chr. 1 highlighted regions in ( E ). We thus found a link between the Hi-C-inferred contact persistence and a simple metric, c || , calculable between any two sequences, without any training or assumption borrowed from experimental data. Consistent with the observed positive correlation between c || and c p , the complementarity-based map, Hi-C || (generated by directly plotting the calculated c || values and binned at a matching resolution), shows areas of both high and low signal like those in the Hi-C p ( Fig. 4E,F , highlighted with braces). Consequently, similar areas of matching high and low signals can also be seen when comparing c || with c f data from human embryonic and lymphoblastoid cell lines ( Fig. 5A,B ), and Drosophila embryonic and differentiated (neuronal) cell types ( Fig. 5C ) at different resolutions ( Supplementary File S3 for complete set of contacts maps across chromosomes using data from different human and Drosophila cell lines). These similarities are emphasised in the versions of the hybrid contact maps, where c || is binned to distinguish the top and bottom 5% contacts based on value. Download figure Open in new tab Figure 5. Genome-wide c II values recapitulate some Hi-C features shown for Human and Drosophila . Hi-C f vs. Hi-C k-mer using log observed over expected c, and absolute and binned values of c (top and bottom 5% contacts based on c || values in red and blue, respectively; the rest in beige. Black scale bars at the top left corner are 4 Mb long. ( A-B ) Human (hg38) cell types: ESC (H1-hESC) embryonic and LC (GM12878) lymphoblastoid cells; ( C ) Drosophila (dm6) cell types: Kc167 embryonic and BG3 neuronal cells. See Supplementary File S3 for other chromosomes. Repeat in the observed sequence complementarity of contacts Based on the DNA/nucleosome self-assembly phenomenon [ 54 – 59 ], repetitive elements, representing similar sequences, have been proposed to mediate a sequence-dependent phase separation of the genome [ 67 ]. Consistently, a combination of computational [ 68 – 70 ] and experimental [ 71 ] findings show that the self-clustering of repeat families is indeed correlated with genome organisation. Given these, we determined whether transposons, which have brought similar sequences to different parts of the genome, could be reinforcers of the observed higher complementarity at persistent contacts by using metrics to measure site distribution of a subfamily between two contacting regions. In Fig. 6 , we highlight 4 candidate subfamilies, MIRb, MIR, L2a and L2c, likely to be most influential to the observed higher complementarity of persistent contacts (green text in Fig. 6 ). These subfamilies, among the ones with the highest copy numbers, have at least 1 shared number of sites in more than 50% of persistent contacts (c p ≥ 19) ( Fig. 6A ), and higher median site skew at persistent compared with variable contacts (c p ≤ 3) ( Fig. 6B ). Based on the site distribution metrics, their sites tend to be more distributed between persistent contact regions even relative to other high-copy-number subfamilies like AluJb and AluSx (∼2000 sites more than L2c). MIRb was the only subfamily that had higher median shared number at persistent contacts ( Fig. 6C ). MIRb does have the highest copy number among the subfamilies, but it is not the only one that could have 2 shared sites because the other 3 subfamilies had median site total of ≥ 4 sites ( Fig. 6D ) at persistent contacts. MIR, L2a and L2c have mean shared numbers at persistent contacts significantly higher than the variable counterparts. As for the rest of the subfamilies, since only contacts with at least 1 site could be considered in the site skew calculation, having a median site skew of 1 means that most of the (long-range, intra-chromosomal) contacts these subfamilies have inserted on have no shared site. Download figure Open in new tab Figure 6. Repeat contribution to the observed sequence complementarity of contacts. Transposon subfamily site distribution at persistent (c p ≥ 19) vs. variable contacts (c p ≤ 3). Given I and j forming a contact, and N i and N j being the number of subfamily sites in i and j , A shows fraction of contacts with at least 1 shared number of site i.e. min( N i , N j ) = 1. In green text are subfamilies with ≥ 1 shared number at more than 50% of contacts. B-D show medians of the distributions at persistent and variable contacts of the following metrics: B site skew equal to |N i – N j | / (N i + N j ), C shared number of sites i.e. min( N i , N j ), and D total number of sites of a contact i.e. N i + N j . In yellow text, are additional subfamilies with median site skew not equal to 1 for both distributions. Only contacts with at least 1 site can have a valid site skew value. All subfamilies from the UCSC hg19 RepeatMasker annotation table were used and each data point, representing a subfamily, is coloured with a transparency level of less than 1, whereby darker areas indicate the overlap of many points. For all named subfamilies, the variable and persistent distributions of the metrics are significantly different (p-values < 0.03). E c k-mer across c at specific contact gap ranges using repeat-masked genome ( Supplementary Fig. S28 ). Given that repeats represent a significant amount of sequence, about half of the human genome, it is not surprising that repeats are key contributors to the observed complementarity phenomenon. Finding that some high-copy-number subfamilies, particularly the ancient MIRb and MIR transposons as well as L2a and L2c, tend to have more shared sites that are more distributed at persistent contacts support this and is consistent with earlier aforementioned studies. It should be noted, however, that the site distribution metrics do not directly measure the individual and collective contribution of subfamilies to the total degree of complementarity of contacts based on the length and sequence of the remnants. Interestingly, when we used the repeat-masked genome to analyse a smaller subset of long-range contacts, whose regions were mostly devoid of annotated repeats, even the unmasked portions of persistent contacts in that subset were found to be more similar than that of variable contacts, suggesting that this higher complementarity at the given resolution is a characteristic of persistent contacts that the annotated simple and interspersed repeat sites could not solely account for. Discussion In this study, we aimed to contribute to the understanding of the sequence basis of 3D genome organisation. A sequence determinant learned from one or a few cell types could potentially be applicable to any other type for being a cell-context-invariant feature. However, because a phenotype or phenomenon, such as regions being in contact, is evidently dictated by the interplay of cell-invariant sequence and cell-specific epigenetic drivers, the significant association of a contact with a specific sequence motif or pattern in one or few cell types may not hold in other cell types. This underscores the advantage of our strategy to isolate and investigate the cell-context-persistent contacts (or any phenotype), in this case cell-type persistent ones, for studying the sequence basis of genomic contacts, as well as the contribution of epigenetic features that behave consistently across cell contexts, explored here through comprehensive associations of various chromatin features using data from different cell types. We have found that persistent contacts are predominantly associated with B-compartment/heterochromatin features across different cell types and AT-rich sequence features, which indeed are less dictated by the cell identity, i.e. vary less across cell types (particularly, constitutive heterochromatin regions), in contrast to A-compartment active regions. This observation is also linked to the proposed model that B compartmentalisation, which was not linked to any sequence pattern except for preferring AT-rich regions, could be the “default” state of sequences [ 72 ], unless sequences become A-compartment material by e.g. having active TSS sites [ 72 ], which are prone to cell-type specificity depending on the required gene expression profile. Also, with AT-rich duplexes found to interact more favourably than GC-rich ones in vitro [ 73 ], AT-rich features enriched at contacts, could contribute to persistence across cell states, demonstrating how a non-specific or core sequence effect can have influence on which regions preferentially interact. Also, this tight clustering of AT-rich sequences in, generally, heterochromatin regions, could account for some chromosomes, e.g. chromosomes 1 and 9, which are known to have a large portion of constitutive heterochromatin, containing prominent CETI hubs i.e. large clusters of persistent contacts particularly extremely long-range ones ( Fig. 2 , Supplementary File S5 ). The mechanisms involved in the preferential interaction of similar regions, correlated with genome organisation, are yet to be elucidated in detail but have been described in terms of direct and indirect mechanisms. The direct recognition of sequence identity is supported by in vitro [ 55 – 57 , 59 , 73 ] and in vivo (in Neurospora crassa fungus) [ 74 ] studies. In addition, similar DNA sequences could have similar proteins and RNAs co-localised on them, which could be the ones driving the preferential interaction or phase separation [ 71 , 75 ]. Both mechanisms could potentially contribute, and the combination and identity of factors may vary for certain subsets of contacts depending on whichever sequence or epigenetic features are present. Our analyses do not thoroughly characterise specific combinations of mechanisms at play, but the association studies have identified a general characteristic, sequence complementarity between regions, that is most pronounced for contacts persistent across different cell contexts. Hence, it is a potential core sequence determinant of genome organisation, which contributes to the “default” tendency of interactions between regions in any cell context, much like the observed favourable association of AT-rich duplexes [ 73 ]. The implication of sequence complementarity in genome organisation along with findings from the characterisation of the persistent contacts also corroborate with associations previously brought up elsewhere, providing insight on the relationship between genome organisation and function. For instance, as summarised in [ 76 ], pairing of homologous DNA duplexes have been proposed to have a role in initiating heterochromatin formation and transcription silencing [ 77 ], in mediating cytosine methylation [ 78 ], which produces mutation hotspots across the genome, and in generating supercoiling, which could affect topoisomerase-dependent long genes [ 79 ]. Findings presented here prompt further computational and experimental investigations. The contribution of sequence to genome organisation could be quantitatively assessed, both at long and shorter scales, by comparing models of genome organisation based on the complementarity of sequences (and other sequence-derived features such as k-mer contents calculated in this study and the recently reported quantum mechanical properties of genomic sequence [ 80 ]) with experimental data not only from different cell types but also from other layers e.g. across cell cycle and species. These models can be generated through molecular simulations, as we have attempted ( Supplementary Fig. S29,30 ). Quantifying the similarity of regions using the complementarity measures, which goes beyond defining similarity based on a particular feature like repeat site content, could enable dissection of how the degree and pattern of similarity or homology between regions could influence genome organisation, which are relevant based on in silico [ 81 ], in vitro [ 82 ] and in vivo [ 74 ] mechanistic studies of DNA duplex association [ 76 ], and phase separation mechanisms [ 83 ]. Finally, experimental investigations are crucial, particularly, to validate and disentangle the potential direct and/or indirect contributions of genomic sequence to the 3D organisation. This study therefore contributes to the understanding of the relationship between genome sequence and structure by implicating a single parameter, sequence complementarity, as a core factor contributing to the formation of genomic contacts. Along with other works that aimed to understand the encoding of the structure into the sequence (reviewed in [ 9 – 12 ]), this study shows that the complementarity between different parts of the genome may play a role in this encoding, and suggests that organising mechanisms, such as phase separation, are not agnostic to the underlying DNA sequence. Consequently, the DNA could be involved in both direct and indirect manner, and we encourage experimental validation that will help delineate the contribution of sequence to genome organisation, in conjunction with earlier-characterised protein and epigenetic determinants. Data Availability The computer code is available through the GitHub repository: https://github.com/SahakyanLab/GenomicContactDynamics . This study is a purely computational work that relied on these publicly available datasets and software: View this table: View inline View popup Supplementary Data Supplementary Data are available at NAR Genomics & Bioinformatics online. Author Contributions Liezel Tamon: Conceptualisation, Formal analysis, Methodology, Software, Validation, Visualisation, Writing – original draft. Aleksandr B. Sahakyan: Conceptualisation, Funding acquisition, Methodology, Resources, Software, Supervision, Writing – original draft. Zahra Fahmi: Formal analysis, Methodology, Writing – review & editing. Rosana Collepardo-Guevara: Funding acquisition, Methodology, Resources, Supervision. James Ashford: Software, Writing – review & editing. Funding This research has been supported by the UK Medical Research Council (MRC) for the MRC Strategic Alliance Funding (MC_UU_12025). Conflict of Interest None declared. Acknowledgements L.T. is grateful to the Jardine Foundation for supporting her DPhil studies. J.A. is thankful to the MRC for funding his DPhil through a WIMM Studentship. The Sahakyan laboratory including this project has been supported by the UK Medical Research Council (MRC) for the MRC Strategic Alliance Funding (MC_UU_12025). UCSF Chimera, used here for 3D genome model visualisation, was developed by the Resource for Biocomputing, Visualization, and Informatics at the University of California, San Francisco, with support from NIH P41-GM103311. The cell-MDCK icon by DBCLS (grey-scaled in this paper) in Fig 1A of this study is licensed under CC-BY 4.0 Unported and was retrieved from bioicons ( https://bioicons.com ). Footnotes https://github.com/SahakyanLab/GenomicContactDynamics References 1. ↵ Dixon JR , Selvaraj S , Yue F , et al. Topological domains in mammalian genomes identified by analysis of chromatin interactions . Nature . 2012 ; 485 ( 7398 ): 376 – 380 . doi: 10.1038/nature11082 OpenUrl CrossRef PubMed Web of Science 2. Nora EP , Lajoie BR , Schulz EG , et al. Spatial partitioning of the regulatory landscape of the X-inactivation centre . Nature . 2012 ; 485 ( 7398 ): 381 – 385 . doi: 10.1038/nature11049 OpenUrl CrossRef PubMed Web of Science 3. ↵ Sexton T , Yaffe E , Kenigsberg E , et al. Three-Dimensional Folding and Functional Organization Principles of the Drosophila Genome . Cell . 2012 ; 148 ( 3 ): 458 – 472 . doi: 10.1016/j.cell.2012.01.010 OpenUrl CrossRef PubMed Web of Science 4. ↵ Lieberman-Aiden E , Berkum NL van , Williams L , et al. Comprehensive Mapping of Long-Range Interactions Reveals Folding Principles of the Human Genome . Science . 2009 ; 326 ( 5950 ): 289 – 293 . doi: 10.1126/science.1181369 OpenUrl Abstract / FREE Full Text 5. ↵ Hancock R , Jeon KW Rosa A , Zimmer C . Chapter Nine - Computational Models of Large-Scale Genome Architecture . In: Hancock R , Jeon KW , eds. International Review of Cell and Molecular Biology. Vol 307 . New Models of the Cell Nucleus: Crowding, Entropic Forces, Phase Separation, and Fractals. Academic Press ; 2014 : 275 – 349 . doi: 10.1016/B978-0-12-800046-5.00009-6 OpenUrl CrossRef PubMed 6. ↵ Misteli T . The Self-Organizing Genome: Principles of Genome Architecture and Function . Cell . 2020 ; 183 ( 1 ): 28 – 45 . doi: 10.1016/j.cell.2020.09.014 OpenUrl CrossRef PubMed 7. ↵ Sima J , Chakraborty A , Dileep V , et al. Identifying cis Elements for Spatiotemporal Control of Mammalian DNA Replication . Cell . 2019 ; 176 ( 4 ): 816 – 830.e18 . doi: 10.1016/j.cell.2018.11.036 OpenUrl CrossRef PubMed 8. ↵ Hsieh THS , Cattoglio C , Slobodyanyuk E , et al. Resolving the 3D Landscape of Transcription-Linked Mammalian Chromatin Folding . Molecular Cell . 2020 ; 78 ( 3 ): 539 – 553.e8 . doi: 10.1016/j.molcel.2020.03.002 OpenUrl CrossRef PubMed 9. ↵ Quan H , Yang Y , Liu S , Tian H , Xue Y , Gao YQ . Chromatin structure changes during various processes from a DNA sequence view . Current Opinion in Structural Biology . 2020 ; 62 : 1 – 8 . doi: 10.1016/j.sbi.2019.10.010 OpenUrl CrossRef PubMed 10. Mondal M , Yang L , Cai Z , Patra P , Gao YQ . A perspective on the molecular simulation of DNA from structural and functional aspects . Chem Sci. Published online 2021 :10.1039.D0SC05329E. doi: 10.1039/D0SC05329E OpenUrl CrossRef 11. Bernardi G . The “Genomic Code”: DNA Pervasively Moulds Chromatin Structures Leaving no Room for “Junk.” Life . 2021 ; 11 ( 4 ): 342 . doi: 10.3390/life11040342 OpenUrl CrossRef PubMed 12. ↵ King JT , Shakya A . Phase separation of DNA: From past to present . Biophysical Journal . 2021 ; 120 ( 7 ): 1139 – 1149 . doi: 10.1016/j.bpj.2021.01.033 OpenUrl CrossRef PubMed 13. ↵ van Steensel B , Dekker J . Genomics tools for unraveling chromosome architecture . Nat Biotechnol . 2010 ; 28 ( 10 ): 1089 – 1095 . doi: 10.1038/nbt.1680 OpenUrl CrossRef PubMed Web of Science 14. Kempfer R , Pombo A . Methods for mapping 3D chromosome architecture . Nature Reviews Genetics . 2020 ; 21 ( 4 ): 207 – 226 . doi: 10.1038/s41576-019-0195-2 OpenUrl CrossRef PubMed 15. ↵ Jerković I , Cavalli G. Understanding 3D genome organization by multidisciplinary methods . Nat Rev Mol Cell Biol . 2021 ; 22 ( 8 ): 511 – 528 . doi: 10.1038/s41580-021-00362-w OpenUrl CrossRef 16. ↵ Sood V , Misteli T . The stochastic nature of genome organization and function . Current Opinion in Genetics & Development . 2022 ; 72 : 45 – 52 . doi: 10.1016/j.gde.2021.10.004 OpenUrl CrossRef PubMed 17. ↵ Schwarzer W , Abdennur N , Goloborodko A , et al. Two independent modes of chromatin organization revealed by cohesin removal . Nature . 2017 ; 551 ( 7678 ): 51 – 56 . doi: 10.1038/nature24281 OpenUrl CrossRef PubMed 18. ↵ Finn EH , Misteli T . Molecular basis and biological function of variability in spatial genome organization . Science . 2019 ; 365 ( 6457 ). doi: 10.1126/science.aaw9498 OpenUrl Abstract / FREE Full Text 19. ↵ Wachsmuth M , Knoch TA , Rippe K . Dynamic properties of independent chromatin domains measured by correlation spectroscopy in living cells . Epigenetics & Chromatin . 2016 ; 9 ( 1 ): 57 . doi: 10.1186/s13072-016-0093-1 OpenUrl CrossRef PubMed 20. ↵ Finn EH , Pegoraro G , Brandão HB , et al. Extensive Heterogeneity and Intrinsic Variation in Spatial Genome Organization . Cell . 2019 ; 176 ( 6 ): 1502 – 1515.e10 . doi: 10.1016/j.cell.2019.01.020 OpenUrl CrossRef PubMed 21. ↵ Waszak SM , Delaneau O , Gschwind AR , et al. Population Variation and Genetic Control of Modular Chromatin Architecture in Humans . Cell . 2015 ; 162 ( 5 ): 1039 – 1050 . doi: 10.1016/j.cell.2015.08.001 OpenUrl CrossRef PubMed 22. ↵ Schmitt AD , Hu M , Jung I , et al. A Compendium of Chromatin Contact Maps Reveals Spatially Active Regions in the Human Genome . Cell Reports . 2016 ; 17 ( 8 ): 2042 – 2059 . doi: 10.1016/j.celrep.2016.10.061 OpenUrl CrossRef PubMed 23. ↵ Naumova N , Imakaev M , Fudenberg G , et al. Organization of the Mitotic Chromosome . Science . 2013 ; 342 ( 6161 ): 948 – 953 . doi: 10.1126/science.1236083 OpenUrl Abstract / FREE Full Text 24. ↵ Nagano T , Lubling Y , Várnai C , et al. Cell-cycle dynamics of chromosomal organization at single-cell resolution . Nature . 2017 ; 547 ( 7661 ): 61 – 67 . doi: 10.1038/nature23001 OpenUrl CrossRef PubMed 25. ↵ Boettiger AN , Bintu B , Moffitt JR , et al. Super-resolution imaging reveals distinct chromatin folding for different epigenetic states . Nature . 2016 ; 529 ( 7586 ): 418 – 422 . doi: 10.1038/nature16496 OpenUrl CrossRef PubMed 26. ↵ Torre-Ubieta L de la , Stein JL , Won H , et al. The Dynamic Landscape of Open Chromatin during Human Cortical Neurogenesis . Cell . 2018 ; 172 ( 1 ): 289 – 304.e18 . doi: 10.1016/j.cell.2017.12.014 OpenUrl CrossRef PubMed 27. ↵ Paulsen J , Liyakat Ali TM , Nekrasov M , et al. Long-range interactions between topologically associating domains shape the four-dimensional genome during differentiation . Nat Genet . 2019 ; 51 ( 5 ): 835 – 843 . doi: 10.1038/s41588-019-0392-0 OpenUrl CrossRef 28. ↵ Nagano T , Lubling Y , Stevens TJ , et al. Single-cell Hi-C reveals cell-to-cell variability in chromosome structure . Nature . 2013 ; 502 ( 7469 ): 59 – 64 . doi: 10.1038/nature12593 OpenUrl CrossRef PubMed Web of Science 29. ↵ Bintu B , Mateo LJ , Su JH , et al. Super-resolution chromatin tracing reveals domains and cooperative interactions in single cells . Science . 2018 ; 362 ( 6413 ). doi: 10.1126/science.aau1783 OpenUrl Abstract / FREE Full Text 30. ↵ Su JH , Zheng P , Kinrot SS , Bintu B , Zhuang X . Genome-Scale Imaging of the 3D Organization and Transcriptional Activity of Chromatin . Cell . 2020 ; 182 ( 6 ): 1641 – 1659.e26 . doi: 10.1016/j.cell.2020.07.032 OpenUrl CrossRef PubMed 31. ↵ Benjamini Y , Hochberg Y . Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing . Journal of the Royal Statistical Society Series B (Methodological ) . 1995 ; 57 ( 1 ): 289 – 300 . OpenUrl CrossRef PubMed Web of Science 32. ↵ Hu M , Deng K , Selvaraj S , Qin Z , Ren B , Liu JS . HiCNorm: removing biases in Hi-C data via Poisson regression . Bioinformatics . 2012 ; 28 ( 23 ): 3131 – 3133 . doi: 10.1093/bioinformatics/bts570 OpenUrl CrossRef PubMed Web of Science 33. ↵ Tsybulskyi V , Mounir M , Meyer IM . R-chie: a web server and R package for visualizing cis and trans RNA–RNA, RNA–DNA and DNA–DNA interactions . Nucleic Acids Research . 2020 ; 48 ( 18 ): e105 . doi: 10.1093/nar/gkaa708 OpenUrl CrossRef PubMed 34. ↵ Gel B , Díez-Villanueva A , Serra E , Buschbeck M , Peinado MA , Malinverni R . regioneR: an R/Bioconductor package for the association analysis of genomic regions based on permutation tests . Bioinformatics. Published online January 15 , 2016 :btv562. doi: 10.1093/bioinformatics/btv562 OpenUrl CrossRef PubMed Web of Science 35. ↵ The Gtex Consortium, Ardlie KG , Deluca DS , et al. The Genotype-Tissue Expression (GTEx) pilot analysis: Multitissue gene regulation in humans . Science . 2015 ; 348 ( 6235 ): 648 – 660 . doi: 10.1126/science.1262110 OpenUrl Abstract / FREE Full Text 36. ↵ Papatheodorou I , Moreno P , Manning J , et al. Expression Atlas update: from tissues to single cells . Nucleic Acids Research. Published online October 30 , 2019 : gkz947 . doi: 10.1093/nar/gkz947 OpenUrl CrossRef 37. ↵ Fagerberg L , Hallström BM , Oksvold P , et al. Analysis of the Human Tissue-specific Expression by Genome-wide Integration of Transcriptomics and Antibody-based Proteomics * . Molecular & Cellular Proteomics . 2014 ; 13 ( 2 ): 397 – 406 . doi: 10.1074/mcp.M113.035600 OpenUrl Abstract / FREE Full Text 38. ↵ Weddington N , Stuy A , Hiratani I , Ryba T , Yokochi T , Gilbert DM . ReplicationDomain: a visualization tool and comparative database for genome-wide replication timing data . BMC Bioinformatics . 2008 ; 9 ( 1 ): 530 . doi: 10.1186/1471-2105-9-530 OpenUrl CrossRef PubMed 39. ↵ Zhang J , Bajari R , Andric D , et al. The International Cancer Genome Consortium Data Portal . Nat Biotechnol . 2019 ; 37 ( 4 ): 367 – 369 . doi: 10.1038/s41587-019-0055-9 OpenUrl CrossRef PubMed 40. ↵ Šošić M , Šikić M . Edlib: a C/C ++ library for fast, exact sequence alignment using edit distance . Bioinformatics . 2017 ; 33 ( 9 ): 1394 – 1395 . doi: 10.1093/bioinformatics/btw753 OpenUrl CrossRef PubMed 41. ↵ Tulpan D , Andronescu M , Leger S . Free energy estimation of short DNA duplex hybridizations . BMC Bioinformatics . 2010 ; 11 ( 1 ): 105 . doi: 10.1186/1471-2105-11-105 OpenUrl CrossRef PubMed 42. ↵ Johnson NAG , Tamon L , Liu X , Sahakyan AB . ROptimus: a parallel general-purpose adaptive optimization engine . Bioinformatics . 2023 ; 39 ( 5 ): btad292 . doi: 10.1093/bioinformatics/btad292 OpenUrl CrossRef PubMed 43. ↵ Reiff SB , Schroeder AJ , Kırlı K , et al. The 4D Nucleome Data Portal as a resource for searching and visualizing curated nucleomics data . Nat Commun . 2022 ; 13 ( 1 ): 2365 . doi: 10.1038/s41467-022-29697-4 OpenUrl CrossRef PubMed 44. ↵ Zufferey M , Tavernari D , Oricchio E , Ciriello G . Comparison of computational methods for the identification of topologically associating domains . Genome Biology . 2018 ; 19 ( 1 ): 217 . doi: 10.1186/s13059-018-1596-9 OpenUrl CrossRef PubMed 45. ↵ Fudenberg G , Imakaev M , Lu C , Goloborodko A , Abdennur N , Mirny LA . Formation of Chromosomal Domains by Loop Extrusion . Cell Rep . 2016 ; 15 ( 9 ): 2038 – 2049 . doi: 10.1016/j.celrep.2016.04.085 OpenUrl CrossRef PubMed 46. ↵ Ganji M , Shaltiel IA , Bisht S , et al. Real-time imaging of DNA loop extrusion by condensin . Science . 2018 ; 360 ( 6384 ): 102 – 105 . doi: 10.1126/science.aar7831 OpenUrl Abstract / FREE Full Text 47. ↵ Lajoie BR , Dekker J , Kaplan N . The Hitchhiker’s guide to Hi-C analysis: Practical guidelines . Methods . 2015 ; 72 : 65 – 75 . doi: 10.1016/j.ymeth.2014.10.031 OpenUrl CrossRef PubMed 48. ↵ Yang Y , Zhang Y , Ren B , Dixon JR , Ma J . Comparing 3D Genome Organization in Multiple Species Using Phylo-HMRF . Cell Systems . 2019 ; 8 ( 6 ): 494 – 505 .e14. doi: 10.1016/j.cels.2019.05.011 OpenUrl CrossRef PubMed 49. ↵ Liu S , Zhang L , Quan H , et al. From 1D sequence to 3D chromatin dynamics and cellular functions: a phase separation perspective . Nucleic Acids Research . 2018 ; 46 ( 18 ): 9367 – 9383 . doi: 10.1093/nar/gky633 OpenUrl CrossRef PubMed 50. ↵ Piovesan A , Pelleri MC , Antonaros F , Strippoli P , Caracausi M , Vitale L . On the length, weight and GC content of the human genome . BMC Research Notes . 2019 ; 12 ( 1 ): 106 . doi: 10.1186/s13104-019-4137-z OpenUrl CrossRef 51. ↵ Sahakyan AB , Balasubramanian S . Long genes and genes with multiple splice variants are enriched in pathways linked to cancer and other multigenic diseases . BMC Genomics . 2016 ; 17 ( 1 ): 225 . doi: 10.1186/s12864-016-2582-9 OpenUrl CrossRef 52. ↵ Schuster-Böckler B , Lehner B . Chromatin organization is a major influence on regional mutation rates in human cancer cells . Nature . 2012 ; 488 ( 7412 ):504-507. doi: 10.1038/nature11273 OpenUrl CrossRef PubMed Web of Science 53. ↵ Xiong K , Ma J . Revealing Hi-C subcompartments by imputing inter-chromosomal chromatin interactions . Nature Communications . 2019 ; 10 ( 1 ): 5069 . doi: 10.1038/s41467-019-12954-4 OpenUrl CrossRef PubMed 54. ↵ Kornyshev AA , Leikin S . Sequence Recognition in the Pairing of DNA Duplexes . Phys Rev Lett . 2001 ; 86 ( 16 ): 3666 – 3669 . doi: 10.1103/PhysRevLett.86.3666 OpenUrl CrossRef PubMed Web of Science 55. ↵ Inoue S , Sugiyama S , Travers AA , Ohyama T . Self-Assembly of Double-Stranded DNA Molecules at Nanomolar Concentrations . Biochemistry . 2007 ; 46 ( 1 ): 164 – 171 . doi: 10.1021/bi061539y OpenUrl CrossRef PubMed Web of Science 56. Baldwin GS , Brooks NJ , Robson RE , et al. DNA Double Helices Recognize Mutual Sequence Homology in a Protein Free Environment . J Phys Chem B . 2008 ; 112 ( 4 ): 1060 – 1064 . doi: 10.1021/jp7112297 OpenUrl CrossRef PubMed 57. ↵ Danilowicz C , Lee C , Kim K , et al. Single molecule detection of direct, homologous, DNA/DNA pairing . Proceedings of the National Academy of Sciences of the United States of America . 2009 ; 106 : 19824 – 19829 . doi: 10.1073/pnas.0911214106 OpenUrl Abstract / FREE Full Text 58. ↵ Ohyama T . New Aspects of Magnesium Function: A Key Regulator in Nucleosome Self-Assembly, Chromatin Folding and Phase Separation . International Journal of Molecular Sciences . 2019 ; 20 ( 17 ): 4232 . doi: 10.3390/ijms20174232 OpenUrl CrossRef PubMed 59. ↵ Nishikawa Jichi , Ohyama T. Selective association between nucleosomes with identical DNA sequences . Nucleic Acids Research . 2013 ; 41 ( 3 ): 1544 – 1554 . doi: 10.1093/nar/gks1269 OpenUrl CrossRef PubMed 60. ↵ Kasamatsu H , Robberson DL , Vinograd J . A Novel Closed-Circular Mitochondrial DNA with Properties of a Replicating Intermediate . PNAS . 1971 ; 68 ( 9 ): 2252 – 2257 . doi: 10.1073/pnas.68.9.2252 OpenUrl Abstract / FREE Full Text 61. ↵ Thomas M , White RL , Davis RW . Hybridization of RNA to double-stranded DNA: formation of R-loops . PNAS . 1976 ; 73 ( 7 ): 2294 – 2298 . doi: 10.1073/pnas.73.7.2294 OpenUrl Abstract / FREE Full Text 62. ↵ Kim A , Wang GG . R-loop and its functions at the regulatory interfaces between transcription and (epi)genome . Biochimica et Biophysica Acta (BBA) - Gene Regulatory Mechanisms . 2021 ; 1864 ( 11 ): 194750 . doi: 10.1016/j.bbagrm.2021.194750 OpenUrl CrossRef PubMed 63. ↵ Felsenfeld G , Rich A . Studies on the formation of two- and three-stranded polyribonucleotides . Biochimica et Biophysica Acta . 1957 ; 26 ( 3 ): 457 – 468 . doi: 10.1016/0006-3002(57)90091-4 OpenUrl CrossRef PubMed Web of Science 64. ↵ Buske FA , Mattick JS , Bailey TL . Potential in vivo roles of nucleic acid triple-helices . RNA Biology . 2011 ; 8 ( 3 ): 427 – 439 . doi: 10.4161/rna.8.3.14999 OpenUrl CrossRef PubMed Web of Science 65. ↵ Rothemund PWK . Folding DNA to create nanoscale shapes and patterns . Nature . 2006 ; 440 ( 7082 ): 297 – 302 . doi: 10.1038/nature04586 OpenUrl CrossRef PubMed Web of Science 66. ↵ Barzel A , Kupiec M . Finding a match: how do homologous sequences get together for recombination? Nat Rev Genet . 2008 ; 9 ( 1 ): 27 – 37 . doi: 10.1038/nrg2224 OpenUrl CrossRef PubMed Web of Science 67. ↵ Tang SJ . Potential Role of Phase Separation of Repetitive DNA in Chromosomal Organization . Genes . 2017 ; 8 ( 10 ): 279 . doi: 10.3390/genes8100279 OpenUrl CrossRef 68. ↵ Cournac A , Koszul R , Mozziconacci J . The 3D folding of metazoan genomes correlates with the association of similar repetitive elements . Nucleic Acids Research . 2016 ; 44 ( 1 ): 245 – 255 . doi: 10.1093/nar/gkv1292 OpenUrl CrossRef PubMed 69. Nikumbh S , Pfeifer N . Genetic sequence-based prediction of long-range chromatin interactions suggests a potential role of short tandem repeat sequences in genome organization . BMC Bioinformatics . 2017 ; 18 ( 1 ): 218 . doi: 10.1186/s12859-017-1624-x OpenUrl CrossRef PubMed 70. ↵ Winter DJ , Ganley ARD , Young CA , et al. Repeat elements organise 3D genome structure and mediate transcription in the filamentous fungus Epichloë festucae . PLOS Genetics . 2018 ; 14 ( 10 ): e1007467 . doi: 10.1371/journal.pgen.1007467 OpenUrl CrossRef PubMed 71. ↵ Lu JY , Chang L , Li T , et al. Homotypic clustering of L1 and B1/Alu repeats compartmentalizes the 3D genome . Cell Res . 2021 ; 31 ( 6 ): 613 – 630 . doi: 10.1038/s41422-020-00466-6 OpenUrl CrossRef 72. ↵ Zhou J . Sequence-based modeling of three-dimensional genome architecture from kilobase to chromosome scale . Nat Genet . 2022 ; 54 ( 5 ): 725 – 734 . doi: 10.1038/s41588-022-01065-4 OpenUrl CrossRef PubMed 73. ↵ Yoo J , Kim H , Aksimentiev A , Ha T . Direct evidence for sequence-dependent attraction between double-stranded DNA controlled by methylation . Nat Commun . 2016 ; 7 ( 1 ): 11045 . doi: 10.1038/ncomms11045 OpenUrl CrossRef PubMed 74. ↵ Gladyshev E , Kleckner N . Direct recognition of homology between double helices of DNA in Neurospora crassa . Nat Commun . 2014 ; 5 ( 1 ): 3509 . doi: 10.1038/ncomms4509 OpenUrl CrossRef PubMed 75. ↵ Ding DQ , Okamasa K , Katou Y , et al. Chromosome-associated RNA–protein complexes promote pairing of homologous chromosomes during meiosis in Schizosaccharomyces pombe . Nat Commun . 2019 ; 10 ( 1 ): 5598 . doi: 10.1038/s41467-019-13609-0 OpenUrl CrossRef PubMed 76. ↵ Mazur AK , Nguyen TS , Gladyshev E . Direct Homologous dsDNA–dsDNA Pairing: How, Where, and Why? Journal of Molecular Biology . 2020 ; 432 ( 3 ): 737 – 744 . doi: 10.1016/j.jmb.2019.11.005 OpenUrl CrossRef PubMed 77. ↵ Gladyshev E , Kleckner N . DNA sequence homology induces cytosine-to-thymine mutation by a heterochromatin-related pathway in Neurospora . Nat Genet . 2017 ; 49 ( 6 ): 887 – 894 . doi: 10.1038/ng.3857 OpenUrl CrossRef PubMed 78. ↵ Bender J . Cytosine methylation of repeated sequences in eukaryotes: the role of DNA pairing . Trends in Biochemical Sciences . 1998 ; 23 ( 7 ): 252 – 256 . doi: 10.1016/S0968-0004(98)01225-0 OpenUrl CrossRef PubMed Web of Science 79. ↵ King IF , Yandava CN , Mabb AM , et al. Topoisomerases facilitate transcription of long genes linked to autism . Nature . 2013 ; 501 ( 7465 ):58-62. doi: 10.1038/nature12504 OpenUrl CrossRef PubMed Web of Science 80. ↵ Masuda K , Abdullah AA , Sahakyan AB . Quantum mechanical electronic and geometric parameters for DNA k-mers as features for machine learning . Published online January 26, 2023:2023.01.25.525597. doi: 10.1101/2023.01.25.525597 OpenUrl Abstract / FREE Full Text 81. ↵ Mazur AK . Homologous Pairing between Long DNA Double Helices . Phys Rev Lett . 2016 ; 116 ( 15 ): 158101 . doi: 10.1103/PhysRevLett.116.158101 OpenUrl CrossRef PubMed 82. ↵ Wang X , Zhang X , Mao C , Seeman NC . Double-stranded DNA homology produces a physical signature . Proceedings of the National Academy of Sciences . 2010 ; 107 ( 28 ): 12547 – 12552 . doi: 10.1073/pnas.1000105107 OpenUrl Abstract / FREE Full Text 83. ↵ Erdel F , Rippe K . Formation of Chromatin Subcompartments by Phase Separation . Biophysical Journal . 2018 ; 114 ( 10 ): 2262 – 2270 . doi: 10.1016/j.bpj.2018.03.011 OpenUrl CrossRef PubMed 84. Flicek P , Amode MR , Barrell D , et al. Ensembl 2014 . Nucleic Acids Research . 2014 ; 42 ( D1 ): D749 – D755 . doi: 10.1093/nar/gkt1196 OpenUrl CrossRef PubMed Web of Science 85. Cunningham F , Allen JE , Allen J , et al. Ensembl 2022 . Nucleic Acids Research . 2022 ; 50 ( D1 ): D988 – D995 . doi: 10.1093/nar/gkab1049 OpenUrl CrossRef PubMed 86. Adams MD , Celniker SE , Holt RA , et al. The Genome Sequence of Drosophila melanogaster . Science . 2000 ; 287 ( 5461 ): 2185 – 2195 . doi: 10.1126/science.287.5461.2185 OpenUrl Abstract / FREE Full Text 87. dos Santos G , Schroeder AJ , Goodman JL , et al. FlyBase: introduction of the Drosophila melanogaster Release 6 reference genome assembly and large-scale migration of genome annotations . Nucleic Acids Research . 2015 ; 43 ( D1 ): D690 – D697 . doi: 10.1093/nar/gku1099 OpenUrl CrossRef PubMed 88. Chathoth KT , Zabet NR . Chromatin architecture reorganization during neuronal cell differentiation in Drosophila genome . Genome Res . 2019 ; 29 ( 4 ): 613 – 625 . doi: 10.1101/gr.246710.118 OpenUrl Abstract / FREE Full Text 89. The ENCODE Project Consortium. An integrated encyclopedia of DNA elements in the human genome . Nature . 2012 ; 489 ( 7414 ): 57 – 74 . doi: 10.1038/nature11247 OpenUrl CrossRef PubMed Web of Science 90. Davis CA , Hitz BC , Sloan CA , et al. The Encyclopedia of DNA elements (ENCODE): data portal update . Nucleic Acids Research . 2018 ; 46 ( D1 ): D794 – D801 . doi: 10.1093/nar/gkx1081 OpenUrl CrossRef PubMed 91. Kundaje A , Meuleman W , Ernst J , et al. Integrative analysis of 111 reference human epigenomes . Nature . 2015 ; 518 ( 7539 ): 317 – 330 . doi: 10.1038/nature14248 OpenUrl CrossRef PubMed 92. Karolchik D , Hinrichs AS , Furey TS , et al. The UCSC Table Browser data retrieval tool . Nucleic Acids Research . 2004 ; 32 ( suppl_1 ): D493 – D496 . doi: 10.1093/nar/gkh103 OpenUrl CrossRef PubMed Web of Science 93. Akgol Oksuz B , Yang L , Abraham S , et al. Systematic evaluation of chromosome conformation capture assays . Nat Methods . 2021 ; 18 ( 9 ): 1046 – 1055 . doi: 10.1038/s41592-021-01248-7 OpenUrl CrossRef PubMed 94. Krietenstein N , Abraham S , Venev SV , et al. Ultrastructural Details of Mammalian Chromosome Architecture . Molecular Cell . 2020 ; 78 ( 3 ): 554 – 565 .e7. doi: 10.1016/j.molcel.2020.03.003 OpenUrl CrossRef PubMed 95. Huang DW , Sherman BT , Lempicki RA . Systematic and integrative analysis of large gene lists using DAVID bioinformatics resources . Nat Protoc . 2009 ; 4 ( 1 ): 44 – 57 . doi: 10.1038/nprot.2008.211 OpenUrl CrossRef PubMed Web of Science 96. Huang DW , Sherman BT , Lempicki RA . Bioinformatics enrichment tools: paths toward the comprehensive functional analysis of large gene lists . Nucleic Acids Research . 2009 ; 37 ( 1 ): 1 – 13 . doi: 10.1093/nar/gkn923 OpenUrl CrossRef PubMed Web of Science View the discussion thread. Back to top Previous Next Posted March 17, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Analysis of long-range contacts across cell types outlines a core sequence determinant of 3D genome organisation Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Analysis of long-range contacts across cell types outlines a core sequence determinant of 3D genome organisation Liezel Tamon , Zahra Fahmi , James Ashford , Rosana Collepardo-Guevara , Aleksandr B. Sahakyan bioRxiv 2025.03.16.643527; doi: https://doi.org/10.1101/2025.03.16.643527 Share This Article: Copy Citation Tools Analysis of long-range contacts across cell types outlines a core sequence determinant of 3D genome organisation Liezel Tamon , Zahra Fahmi , James Ashford , Rosana Collepardo-Guevara , Aleksandr B. Sahakyan bioRxiv 2025.03.16.643527; doi: https://doi.org/10.1101/2025.03.16.643527 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genomics Subject Areas All Articles Animal Behavior and Cognition (7622) Biochemistry (17650) Bioengineering (13871) Bioinformatics (41880) Biophysics (21424) Cancer Biology (18566) Cell Biology (25461) Clinical Trials (138) Developmental Biology (13365) Ecology (19866) Epidemiology (2067) Evolutionary Biology (24290) Genetics (15590) Genomics (22475) Immunology (17713) Microbiology (40328) Molecular Biology (17148) Neuroscience (88473) Paleontology (666) Pathology (2827) Pharmacology and Toxicology (4816) Physiology (7635) Plant Biology (15114) Scientific Communication and Education (2044) Synthetic Biology (4286) Systems Biology (9815) Zoology (2268)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-21T05:10:58.409756+00:00

License: CC-BY-NC-4.0