Predicting 3D Chromatin Interactions Using Transformer-Enhanced Deep Learning Models

doi:10.1101/2025.04.10.647995

Predicting 3D Chromatin Interactions Using Transformer-Enhanced Deep Learning Models

2025 · doi:10.1101/2025.04.10.647995

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 31,996 characters · extracted from preprint-html · click to expand

Predicting 3D Chromatin Interactions Using Transformer-Enhanced Deep Learning Models | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Predicting 3D Chromatin Interactions Using Transformer-Enhanced Deep Learning Models Kexin Xu , View ORCID Profile Li Shen doi: https://doi.org/10.1101/2025.04.10.647995 Kexin Xu 1 Department of Computer Science, New York University Find this author on Google Scholar Find this author on PubMed Search for this author on this site Li Shen 2 Department of Artificial Intelligence, and Human Health, Department of Neuroscience, Icahn School of Medicine at Mount Sinai Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Li Shen For correspondence: li.shen{at}mssm.edu Abstract Full Text Info/History Metrics Preview PDF Abstract The three-dimensional (3D) structure of the human genome is essential for regulating gene expression and cellular functions. Chromatin interactions bring distant genomic regions into physical contact, enabling processes like gene regulation, DNA replication, and repair. Disruptions in this organization can lead to diseases such as cancer and genetic disorders. In this study, we propose a Transformer-based deep learning model to predict the chromatin interactions from DNA sequences. By developing a streamlined and efficient data pipeline to handle the sparse and noisy high-throughput chromosome conformation capture (Hi-C) sequencing data, our approach improves both data processing speed and model performance. The Transformer’s ability to capture long-range interactions among genomic regions via attention mechanism, combined with nucleotide position encoding, enables more accurate predictions than purely convolution-based models. This work highlights the potential of Transformer-based network architectures to advance our understanding of genome organization and paves the way for future research with large datasets and advanced network designs. 1 Introduction The three-dimensional (3D) organization of the human genome plays a crucial role in gene regulation, cellular processes, and disease mechanisms. Chromatin interactions, defined as physical contacts between distant genomic regions, facilitate essential processes such as gene expression, DNA replication, and repair [ 1 ][ 2 ]. Disruptions in chromatin organization are implicated in diseases like cancer [ 3 ] and genetic disorders [ 4 ], highlighting the need to understand the spatial arrangement of the genome within the cell nucleus. Hi-C experiments [ 5 ] provide genome-wide maps of chromatin interactions, offering valuable insights into genome folding. However, these methods are resource-intensive, costly, and limited in scalability, motivating the development of computational approaches to predict chromatin interactions directly from DNA sequences. Early efforts using convolutional neural networks (CNNs), such as Akita [ 6 ] and DeepC [ 7 ], demonstrated the potential of sequence-based models for predicting chromatin interactions. However, CNNs are inherently limited in their ability to capture long-range dependencies due to the size of the receptive fields, restricting their effectiveness in modeling hierarchical chromatin organization, such as enhancer-promoter loops and topologically associating domains (TADs). Transformer-based architectures, originally developed for natural language processing [ 8 ], have emerged as a powerful solution for modeling long-range dependencies through self-attention mechanisms. In genomics, models like Enformer [ 9 ] have demonstrated the potential of Transformers to improve the prediction of gene expression by integrating DNA sequence information across nearly 200Kb regions. Recent advancements, including the DNABERT-2 [ 10 ] and the Nucleotide Transformer [ 11 ], have further showcased the Transformer’s ability to learn contextual relationships in long DNA sequences. Despite these successes, their applications to chromatin interaction prediction remains underexplored. To address this gap, we propose DeepChromI, a Transformer-based method designed to predict chromatin interaction matrices using DNA sequences. DeepChromI introduces several key innovations: 1. Training on the largest Hi-C dataset to date: we utilize 117 experiments across 39 cell lines, significantly expanding beyond prior works that used only 1–5 Hi-C experiments. 2. Transformer-based architecture with domain-specific adaptations: our model incorporates novel hierarchical positional encodings to effectively capture long-range chromatin interactions. 3. Scalable and biologically relevant predictions: DeepChromI provides biologically meaningful insights into genome organization, gene regulation, and potentially disease mechanisms while addressing scalability challenges across diverse cell lines. Our work bridges the gap between computational predictions and experimental data, advancing the state of the art in predictive genomics and offering a scalable alternative to Hi-C experiments. 2 Methodology 2.1 Data Processing The data processing workflow prepares the input and target for the model: the linear genomic sequences as input and the chromatin interaction matrices as target. Our processing pipeline ensures standardized and high-quality data are produced for model training and evaluation. 2.1.1 Genomic Region Selection and DNA Sequence Processing The workflow begins by selecting specific regions on the genome as defined in a BED file. In this study, the same BED files from the Enformer paper [ 9 ] are used. Each entry in a BED file indicates a genomic interval with a chromosome name, start and end positions. These intervals act as anchors for extracting both DNA sequences and Hi-C interaction data. The DNA sequences are retrieved from the hg38 reference genome. If an interval extends beyond the chromosomal boundaries, the sequence is padded with “N”s to ensure equal sequence length across the dataset. We follow the same train-validation-test split as in the Enformer paper. The length of each interval is modified to be 200Kb using the same center as the original interval to be compatible with the resolution of Hi-C data, which is typically 1Kb, 5Kb or 10Kb. The partitioning yields 34,021 samples for the training set; 2,213 samples for the validation set; and 1,937 samples for the test set. Each sequence contains four possible nucleotide bases: (A, C, G, T) and N to represent missing value. We use one-hot encoding to transform the sequences into numerical vectors so that A=[1,0,0,0], C=[0,1,0,0], G=[0,0,1,0], T=[0,0,0,1], and N=[0.25,0.25,0.25,0.25]. This transformation yields fixed-size matrices suitable for model input. 2.1.2 Hi-C Interaction Matrix Processing Hi-C experiments capture the 3D structure of the genome by measuring how frequently different regions of the genome come into physical contact. These interaction frequencies are represented as matrices, where rows and columns correspond to genomic positions, and values indicate the strength of interactions. In this study, a diverse and comprehensive Hi-C dataset comprising 117 high-quality experiments spanning 39 distinct cell lines from the 4D Nucleome (4DN) database [ 12 ] is assembled. This represents the largest publicly available Hi-C dataset to date. Only 4DN samples with high interaction counts, excellent coverage, and 1Kb resolution are selected to ensure data quality. The collection includes widely studied cell lines such as GM12878, K562, HepG2, and H1-ESC, as well as primary and differentiated cells. This breadth enables our model to learn both cell-type-invariant chromatin organizations and cell-type-specific interactions. All datasets are processed using the same normalization method to ensure comparability across different experiments. The interaction matrices corresponding to the genomic regions defined in the BED files are extracted using the hictk package [ 13 ]. Smaller matrices are padded with zeros to ensure uniform matrix dimensions. Missing or invalid values are replaced with zeros. Since the interaction matrices are symmetric, about half of the entries are redundant. To reduce memory usage and computational overhead, only the upper triangular portion of each matrix is extracted, including the main diagonal. It is then flattened into a 1D vector. This approach ensures that the model uses only unique chromatin interactions, making training more efficient. 2.1.3 Normalization and Bias Correction Hi-C data typically contain biases arising from factors such as sequencing depth, restriction enzyme efficiency, and genomic sequence features (e.g., GC content, mappability). These biases are addressed through normalization, primarily using the VC_SQRT (Square Root of Vanilla Coverage) method, as described in Rao et al., 2014 [ 14 ]. This approach normalizes matrix values by dividing each entry by the square root of the product of row and column sums, effectively balancing the matrix and reducing sequencing depth biases. By applying VC_SQRT normalization, the influence of high-coverage regions is mitigated, enabling more accurate detection of chromatin interactions and downstream analysis of 3D genome organization. 2.1.4 Handling Distance-Dependent Effects A notable characteristic of Hi-C data is the high interaction frequency along the matrix diagonal, representing short-range genomic interactions. While biologically meaningful, these strong diagonal signals can overshadow long-range interactions. A diagonal offset approach is implemented to exclude two diagonals near the main diagonal from analysis. This technique helps to focus a model’s attention on more biologically relevant long-range interactions and prevent fitting to trivial short-range signals. 2.1.5 Multi-cell Training Strategy A key innovation in this study is the multi-cell training framework. Rather than training separate models for each cell line, all cell lines are processed simultaneously. For each genomic region, interaction matrices are extracted from all cell lines and stacked together. Models are trained to predict interactions for all cell lines at once. This multi-task learning strategy encourages feature reuse since all outputs share the same model weights and biases. 2.2 Model Architecture DeepChromI integrates convolution, transformer, and residual connection to predict Hi-C interaction matrices from DNA sequences ( Fig. 1 ). The deep neural network processes genomic sequences through multiple stages to capture both local sequence patterns and long-range chromatin interactions. The network starts with an initial 1D convolutional block that processes the input DNA sequences. This block consists of a 1D convolutional layer with an input channel size of 4 (corresponding to the four nucleotides: A, C, G, and T) and an output channel size of 64. The convolution uses an 11×1 kernel with padding to ensure the input sequence length is preserved. The kernel size is chosen to identify DNA sequence motifs. The output is then passed through batch normalization and ReLU activation, followed by a 1D max-pooling layer with a size of 2 to reduce the sequence length and computational cost. Download figure Open in new tab Fig. 1. DeepChromI Model Architecture. After this initial processing stage, the model employs a series of 1D convolutional blocks. Each block consists of a 1D convolutional layer that extracts increasingly complex features. The output channel sizes are defined by a predefined list, alternating between 96 and 48 channels. The convolutional layers use a 3×1 kernel with padding set to 2, allowing the model to learn hierarchical sequence features efficiently. Following each convolutional layer, batch normalization ensures stable training, and a 1D max-pooling layer with a kernel size of 2 progressively reduces the sequence length while retaining the most informative features. a final 1D convolutional block is applied, using a kernel size of 2 with padding set to 1, reducing the feature channels to match the required embedding dimension for the Transformer. This step includes a convolutional layer followed by batch normalization and a ReLU activation function to refine the feature representation. Next, the output undergoes linear interpolation to a fixed length of 200 bins, ensuring consistency in input dimensions for the subsequent Transformer encoder. The next stage of the model processes the extracted sequence features through a series of transformer layers to refine the representation and capture long-range interactions. To incorporate positional information, positional encoding is added to the interpolated features, which preserves the order of genomic positions within the sequence. The combined features are passed through a multi-layer Transformer encoder, which leverages multi-head self-attention to capture both local and long-range dependencies between distant genomic regions. Based on the approach introduced by Akita [ 3 ], a 1D-to-2D conversion layer is implemented to transform the sequence-based 1D features into a 2D feature map that is suitable for predicting chromatin interactions. This layer computes all pairwise interactions among the genomic bins, converting a (200, 96) tensor into a (200, 200, 96) tensor by averaging the features of all bin pairs. Following the Akita’s design, pairwise positional information (=|i-j|) is incorporated as an additional channel, resulting in a (200, 200, 97) tensor. The subsequent 1×1 convolutional block refines this 2D representation while preserving spatial relationships. It is then processed through multiple 2D convolutional blocks with residual connections, enabling modeling of spatial interactions. Each 2D convolutional block uses a 3×3 kernel size. The first block has 96 channels, while subsequent blocks reduce the output to 64 channels, balancing computational efficiency and representational power. All 2D convolutional blocks use ReLU activation, Batch Normalization, and Dropout for regularization. Because Hi-C interaction matrices are symmetric, a Symmetrize2D layer is added to the last convolutional layer to ensures matrix symmetry by averaging the 2D feature map with its own transposition. To focus on meaningful long-range interactions, a diagonal offset of 2 is applied to exclude trivial self-interactions near the matrix diagonal. This adjustment directs the model’s attention toward more biologically relevant long-distance chromatin interactions. Furthermore, only the upper triangular portion of the matrix is extracted, reducing redundancy and computational overhead. This design effectively balances precision, biological relevance, and computational efficiency. The final prediction layer consists of linear projection heads that output Hi-C interactions as one 1D vector (size of 19,701) for each of the 117 cell lines, matching our training target format. Additional architectural designs are implemented to accommodate our data augmentation approach. For data augmentation, two strategies are implemented. First, following Akita’s approach, reverse complement transformation is applied randomly with 50% chance, ensuring strand invariance. Second, random sequence shift up to 11 base pairs is applied to improve positional invariance. A switch reverse triangular layer is used to maintain consistency between transformed sequences and their corresponding Hi-C interactions. 2.3 Model Training The hyperparameters are determined through an automated tuning process using the Optuna [ 15 ] package – a popular program for efficient hyperparameter optimization. A search space is defined for key parameters such as the number of Transformer layers and attention heads, dropout rates, and the number of residual and 2D convolutional layers. During this process, Optuna performs multiple trials, where each trial tests a different combination of hyperparameters based on the objective to minimize validation loss. Early stopping is used to prevent overfitting and a learning rate scheduler is adopted for stable convergence. After running 20 trials, the best configuration is automatically selected based on the lowest validation loss achieved. This process results in the final model architecture with 15 Transformer layers, four attention heads, a dropout rate of 0.28, two 2D convolutional layers, and four residual layers ( Fig. 1 ). For the training process, a fixed learning rate of 0.0001 coupled with a weight decay of 1e-4 are used. To address potential gradient instability issues, gradient clipping with a maximum norm of 1.0 is implemented. The AdamW optimizer is chosen for its ability to handle adaptive learning rates while incorporating weight decay regularization. Mean Squared Error (MSE) is used as the loss function. To enhance the training dynamics, a cosine annealing warm restart scheduler is implemented. This approach allows dynamic learning rate adjustments throughout the training process, helping to navigate the loss landscape more effectively than constant learning rate. The scheduler is proved to be particularly useful in avoiding local minima and promoting better convergence. During the initial three epochs, a warm up phase is incorporated to gradually increase the learning rate, which helps to establish stable training dynamics. A maximum of 16 training epochs is set but the training would halt if the validation loss shows no improvement for 5 consecutive epochs. The batch size is set to be 32. Several key metrics are monitored on the validation set: these include the MSE, Pearson Correlation Coefficient (R), and R 2 (Coefficient of Determination). Model checkpoints based on validation loss are saved, ultimately selecting the model that achieves the lowest validation loss for our final evaluation. 3 Results In this section, the performance of the CNN-based Akita [ 3 ] model and the Transformer-based DeepChromI model is compared. Both models are trained using the same data pipeline and methodology on 117 Hi-C datasets, spanning 39 distinct cell lines/tissues. The results are analyzed in terms of MSE, R and R 2 on the test set, highlighting the improvements achieved through the integration of Transformer layers and multi-cell learning. 3.1 Model Performance Across Cell Lines/Tissues Fig. 3 Visualization of the predicted and target Hi-C Interaction Matrices. The heatmaps depict a specific region of Chromosome 11: 75,617,088-75,944,288. The left panel displays the predictions generated by DeepChromI, illustrating its prediction of genomic interactions, while the right panel shows the actual interaction data obtained from the experiment. The comparison of model metrics between Akita and DeepChromI is summarized in Table 1 . DeepChromI consistently outperforms Akita in all metrics. DeepChromI demonstrates a reduced MSE of 1.12, compared to 1.15 for Akita. This indicates that our model can better fit the data and generalize across diverse cell lines/tissues. The average R is improved from 0.78 for Akita to 0.79 for DeepChromI, indicating stronger alignment between predicted and actual interaction matrices across multiple cell lines/tissues. Our model also achieves an average R 2 value of 0.62, outperforming Akita’s R 2 of 0.61, suggesting a better overall ability to explain the variance in the data. View this table: View inline View popup Download powerpoint Table 1. Average results across 117 Hi-C datasets comparing DeepChromI vs. Akita. 3.2 Cell-Specific Performance Substantial variation is observed in prediction performance across different cell lines/tissues ( Fig. 2 ). HFFc6 shows the highest prediction accuracy with a R of 0.79, while K562 shows the lowest accuracy with a R of 0.12. Other high-performing cell lines include H9-derived cardiac cells: R of 0.70-0.72 and CyT49 endoderm: R of 0.69. Download figure Open in new tab Fig. 2. Pearson correlation (R) across different cell lines/tissues. 3.3 Visualization of Predicted and Target Hi-C Interaction Matrices The comparison of predicted and target Hi-C interaction matrices is visualized in Fig. 3 , showing an example from the GM12878 cell line (4DNFI1UEG1HD). The Transformer-based DeepChromI successfully captures both local (near-diagonal) and long-range (off-diagonal) interactions, demonstrating its ability to model 3D genome organization effectively. Download figure Open in new tab Fig. 3. Visualization of the predicted and target Hi-C Interaction Matrices. The heatmaps depict a specific region of Chromosome 11: 75,617,088-75,944,288. The left panel displays the predictions generated by DeepChromI, illustrating its prediction of genomic interactions, while the right panel shows the actual interaction data obtained from the experiment. 4 Conclusion DeepChromI advances the prediction of 3D genome organization by leveraging the Transformer architecture and multi-cell training on a large database of 117 Hi-C datasets. The model achieves superior performance over a CNN-based model. Our result also reveals large performance variation across 39 cell lines/tissues. The Transformer’s self-attention mechanism effectively captures long-range chromatin interactions, while our multi-cell training approach enables the learning of both universal and cell-specific chromatin organizations. These improvements enhance our understanding of the sequence-to-structure relationships in the genome and can help to establish a foundation for investigating the impact of genetic variations on 3D genome organization in diseases. Acknowledgement This work was supported in part through the Minerva computational and data resources and staff expertise provided by Scientific Computing and Data at the Icahn School of Medicine at Mount Sinai and supported by the Clinical and Translational Science Awards (CTSA) grant UL1TR004419 from the National Center for Advancing Translational Sciences. Footnotes kx2139{at}nyu.edu References [1]. ↵ G. Tesauro , D. S. Touretzky and T.K. Leen Alexander , J.A. & Mozer , M.C. ( 1995 ) Template-based algorithms for connectionist rule extraction . In G. Tesauro , D. S. Touretzky and T.K. Leen (eds.), Advances in Neural Information Processing Systems 7 , pp. 609 – 616 . Cambridge, MA : MIT Press . OpenUrl [2]. ↵ Bower , J.M. & Beeman , D. ( 1995 ) The Book of GENESIS: Exploring Realistic Neural Models with the GEneral NEural SImulation System . New York : TELOS/Springer-Verlag . [3]. ↵ Hasselmo , M.E. , Schnell , E. & Barkai , E. ( 1995 ) Dynamics of learning and recall at excitatory recurrent synapses and cholinergic modulation in rat hiippocampal region CA3 . Journal of Neuroscience 15 ( 7 ): 5249 – 5262 . OpenUrl Abstract / FREE Full Text [4]. ↵ Bonev , B. & Cavalli , G. ( 2016 ). Organization and function of the 3D genome . Nature Reviews Genetics 17 , 661 – 678 . OpenUrl CrossRef PubMed [5]. ↵ Rowley , M.J. & Corces , V.G. ( 2018 ). Organizational principles of 3D genome architecture . Nature Reviews Genetics 19 , 789 – 800 . OpenUrl CrossRef PubMed [6]. ↵ Lupiáñez , D.G. , Kraft , K. , Heinrich , V. , Krawitz , P. , Brancati , F. , Klopocki , E. , Horn , D. , Kayserili , H. , Opitz , J.M. , Laxova , R. , Santos-Simarro , F. , Gilbert-Dussardier , B. , Wittler , L. , Borschiwer , M. , Haas , S.A. , Osterwalder , M. , Franke , M. , Timmermann , B. , Hecht , J. , Spielmann , M. , Visel , A. & Mundlos , S. ( 2015 ). Disruptions of topological chromatin domains cause pathogenic rewiring of gene-enhancer interactions . Cell 161 ( 5 ), 1012 – 1025 . OpenUrl CrossRef PubMed [7]. ↵ Flavahan , W.A. , Drier , Y. , Liau , B.B. , Gillespie , S.M. , Venteicher , A.S. , Stemmer-Rachamimov , A.O. , Suvà , M.L. & Bernstein , B.E. ( 2016 ). Insulator dysfunction and oncogene activation in IDH mutant gliomas . Nature 529 , 110 – 114 . OpenUrl CrossRef PubMed [8]. ↵ Erez Lieberman-Aiden 1, Nynke L van Berkum , Louise Williams , Maxim Imakaev , Tobias Ragoczy , Agnes Telling , Ido Amit , Bryan R Lajoie , Peter J Sabo , Michael O Dorschner , Richard Sandstrom , Bradley Bernstein , M A Bender , Mark Groudine , Andreas Gnirke , John Stamatoyannopoulos , Leonid A Mirny , Eric S Lander , Job Dekker . Comprehensive Mapping of Long-Range Interactions Reveals Folding Principles of the Human Genome . SCIENCE , 2009 . [9]. ↵ Geoff Fudenberg , David R. Kelley & Katherine S. Pollard . Predicting 3D genome folding from DNA sequence with Akita . Nature Methods , 2020 . [10]. ↵ Ron Schwessinger , Matthew Gosden , Damien Downes , Richard C. Brown , A. Marieke Oudelaar , Jelena Telenius , Yee Whye Teh , Gerton Lunter & Jim R. Hughes . DeepC: predicting 3D genome folding using megabase-scale transfer learning . Nature Methods , 2020 . [11]. ↵ Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N. Gomez , Łukasz Kaiser , Illia Polosukhin . Attention Is All You Need . NeurIPS , 2017 . [12]. ↵ Žiga Avsec , Vikram Agarwal , Daniel Visentin , Joseph R. Ledsam , Agnieszka Grabska-Barwinska , Kyle R. Taylor , Yannis Assael , John Jumper , Pushmeet Kohli & David R. Kelley . Effective gene expression prediction from sequence by integrating long-range interactions . Nature Methods , 2021 . [13]. ↵ Zhihan Zhou , Yanrong Ji , Weijian Li , Pratik Dutta , Ramana Davuluri , Han Liu ( 2024 ). DNABERT-2: Efficient Foundation Model and Benchmark For Multi-Species Genome [14]. ↵ Hugo Dalla-Torre , Liam Gonzalez , Javier Mendoza-Revilla , Nicolas Lopez Carranza , Adam Henryk Grzywaczewski , Francesco Oteri , Christian Dallago , Evan Trop , Bernardo P. de Almeida , Hassan Sirelkhatim , Guillaume Richard , Marcin Skwark , Karim Beguir , Marie Lopez & Thomas Pierrot . Nucleotide Transformer: building and evaluating robust foundation models for human genomics . Nature Methods , 2024 . [15]. ↵ Job Dekker , Andrew S. Belmont , Mitchell Guttman , Victor O. Leshyk , John T. Lis , Stavros Lomvardas , Leonid A. Mirny , Clodagh C. O’Shea , Peter J. Park , Bing Ren , Joan C. Ritland Politz , Jay Shendure , Sheng Zhong & the 4D Nucleome NetworkThe 4D nucleome project . Nature . 2017 [16]. Roberto Rossini , Jonas Paulsen . hictk: blazing fast toolkit to work with .hic and .cool files . Bioinformatics . 2024 [17]. Suhas S.P. Rao , Miriam H. Huntley , Neva C. Durand , Elena K. Stamenova , Ivan D. Bochkov , James T. Robinson , Adrian L. Sanborn , Ido Machol , Arina D. Omer , Eric S. Lander , Erez Lieberman Aiden . A 3D Map of the Human Genome at Kilobase Resolution Reveals Principles of Chromatin Looping . Cell , 2014 . [18]. Takuya Akiba , Shotaro Sano , Toshihiko Yanase , Takeru Ohta , Masanori Koyama . Optuna: A Next-generation Hyperparameter Optimization Framework . arXiv , 2019 , https://arxiv.org/abs/1907.10902 . View the discussion thread. Back to top Previous Next Posted April 16, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Predicting 3D Chromatin Interactions Using Transformer-Enhanced Deep Learning Models Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Predicting 3D Chromatin Interactions Using Transformer-Enhanced Deep Learning Models Kexin Xu , Li Shen bioRxiv 2025.04.10.647995; doi: https://doi.org/10.1101/2025.04.10.647995 Share This Article: Copy Citation Tools Predicting 3D Chromatin Interactions Using Transformer-Enhanced Deep Learning Models Kexin Xu , Li Shen bioRxiv 2025.04.10.647995; doi: https://doi.org/10.1101/2025.04.10.647995 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7618) Biochemistry (17635) Bioengineering (13859) Bioinformatics (41846) Biophysics (21401) Cancer Biology (18534) Cell Biology (25423) Clinical Trials (138) Developmental Biology (13352) Ecology (19860) Epidemiology (2067) Evolutionary Biology (24286) Genetics (15582) Genomics (22463) Immunology (17700) Microbiology (40298) Molecular Biology (17141) Neuroscience (88429) Paleontology (666) Pathology (2825) Pharmacology and Toxicology (4813) Physiology (7633) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4284) Systems Biology (9808) Zoology (2267)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00