Full text
62,615 characters
· extracted from
preprint-html
· click to expand
Post-operative tissue fragment puzzling using histopathological vision transformer alignment HiViTAlign | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Post-operative tissue fragment puzzling using histopathological vision transformer alignment HiViTAlign Christoph Blattgerste , Tanzina Ferdous , Ayk Jessen , Maximilian Legnar , Karl Rohr , View ORCID Profile Claudia Scherl , Jürgen Hesser , Cleo-Aron Weis doi: https://doi.org/10.1101/2025.07.14.664649 Christoph Blattgerste 1 Institute of Pathology, Computational Pathology Heidelberg, Heidelberg University Hospital , Heidelberg, Germany 8 Data Analysis and Modeling in Medicine, Mannheim Institute for Intelligent Systems in Medicine (MIISM), Heidelberg University , Mannheim, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: christoph.blattgerste{at}med.uni-heidelberg.de Tanzina Ferdous 2 Institute of Computer Science, Faculty of Mathematics and Computer Science, Heidelberg University , Heidelberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ayk Jessen 1 Institute of Pathology, Computational Pathology Heidelberg, Heidelberg University Hospital , Heidelberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Maximilian Legnar 1 Institute of Pathology, Computational Pathology Heidelberg, Heidelberg University Hospital , Heidelberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Karl Rohr 3 BioQuant, IPMB, Biomedical Computer Vision Group, Heidelberg University , Heidelberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Claudia Scherl 4 Department of Otorhinolaryngology, Head and Neck Surgery, University Hospital Mannheim, Heidelberg University , Mannheim, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Claudia Scherl Jürgen Hesser 5 Interdisciplinary Center for Scientific Computing, Heidelberg University , Heidelberg, Germany 6 Central Institute for Computer Engineering (ZITI), Heidelberg University , Heidelberg, Germany 7 CZS Heidelberg Initiative for Model-Based AI, Heidelberg University , Heidelberg, Germany 8 Data Analysis and Modeling in Medicine, Mannheim Institute for Intelligent Systems in Medicine (MIISM), Heidelberg University , Mannheim, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Cleo-Aron Weis 2 Institute of Computer Science, Faculty of Mathematics and Computer Science, Heidelberg University , Heidelberg, Germany 5 Interdisciplinary Center for Scientific Computing, Heidelberg University , Heidelberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Preview PDF 1 Abstract In pathology, reconstructing adjacent tissue parts enables an overview of the macro environment of objects like tumors. Especially, malignoma are of interest to verify invasion and resection margins, as patients with positive margins face a higher mortality risk. Reassembling image fragments is widely used in other domains, but adjacent blocks in pathology are mostly analyzed separately missing global context. In this project, neighboring tissue of pig organ whole slide images (WSI) are reconstructed without a ground truth based on histological sections at the end of a complex work-up process. Histological tissue slices with artifacts, frayed or disrupted boundaries and sometimes missing pieces complicate the puzzling task. Thus, typical approaches such as direct feature comparison of tissue boundaries or estimating a tiles position based on an overview image or a known structures are not applicable. A new approach is presented using partial image registration where only parts of a fixed and a moving image are aligned for adjacency. In contrast to existing projects aligning subsequent tissue slices of the same block, WSIs from separated blocks will be reassembled for adjacency. The used three stage vision transformer extracts image features on various scales, compares neighboring tiles by shape, color and texture and predicts transformation parameters. Even though the pipeline is capable of handling rigid transformation such as rotation or reflection, only translation is currently supported due to the limited training set. Supervised training of the network can be realized using a puzzle generator creating irregular shaped fragments of masked whole slide images. The factorized trained neural network is embedded into a sophisticated histopathological vision transformer alignment (HiViTAlign) pipeline executing the following steps in roughly 10 seconds per reassembled tissue puzzle: First, extract the specimen and mask the background in each whole slide image. Second, compare tile boundaries using partial image registration. Third, calculate the adjacency by boundary proximity for each image pair. Fourth, determine a minimal spanning tree to optimize adjacency of pairwise registrations and transformations for tissue reconstruction. The python source code for HiViTAlign to start puzzling with WSIs or other objects is available at https://github.com/cpheidelberg/HiViTAlign . The generator for creating a dataset with irregular shaped tiles can be downloaded from https://github.com/cpheidelberg/ImagePuzzleGenerator . Author summary Histopathology as the microscopic analysis of tissue remains the gold standard for evaluating tumors, especially when assessing resection margins. However, the physical processing of tissue disrupts its original three dimensional structure, leaving pathologists with fragmented, two-dimensional slices that lack spatial context. This fragmentation makes it difficult to understand the full extent and orientation of tumors and to correlate pathology results with radiological imaging used in surgical planning. In this study, we present a computational pipeline for histopathological vision transformer alignment (HiViTAlign) that reassembles fragmented histological tissue sections, similar to solving a jigsaw puzzle. Using a deep learning model based on Vision Transformers, our method predicts how individual tissue fragments are spatially related and outputs transformation parameters for adjacency. While the pipeline is designed to accommodate a variety of rigid transformations (e.g., rotation and scaling), its current implementation, constrained by the limited diversity of the training dataset, focuses solely on predicting translational shifts between fragments. A custom dataset generator was developed to create realistic puzzles from whole slide images, assigning original coordinates to each fragment to enable supervised training. The full pipeline was evaluated on both synthetic datasets and real-world whole slide images, demonstrating its ability to reconstruct tissue cross-sections without requiring a reference image. This method may support more accurate spatial interpretation of pathological specimens and better integration with surgical imaging data. The open-source Python code, we developed, invites collaboration and innovation, reflecting our commitment to advancing computational pathology through technology and shared resources. Paper to be submitted to PLOS Computational Biology . 3 Introduction Pathology, dealing with two-dimensional, tissue-based imaging, remains the gold standard for tumor analysis and evaluation of resection margins. To capture three-dimensional anatomical details, surgeons predominantly utilize imaging modalities such as magnetic resonance imaging (MRI), computed tomography (CT), or hybrid techniques like positron emission tomography (PET)-CT. These imaging approaches offer 3D insights into anatomical structures, tumor localization, and tumor extension [ 1 ]. However, while these techniques are valuable for guiding interventions, they lack the spatial and histological resolution that is required to accurately determine tumor dimensions and the exact localization needed for resection margin evaluation in surgery. Therefore, histopathological analysis of excised tissue still represents the optimal method for acquiring these crucial parameters. To avoid waiting times that usually occurs through histological work-up, occasionally histopathological analysis is performed intra-operatively using a freezing-based workup, known as frozen section service. Answering these spatial questions is a significant challenge for the corresponding pathology lab and its sampling-based work-up scheme: Tissue sections are prepared without or only with coarse consideration of the stereotactic orientation of and within the original 3D tissue, resulting in a loss of spatial information critical for comprehensive analysis. In addition, not all tissue parts are typically embedded, making it challenging to ensure the completeness of a volume, such as a tumor. Consequently, the spatial context between adjacent histological slides, or in the case of digital pathology, whole-slide images (WSIs), is disrupted. This fragmentation limits the ability to analyze the overall histological structure of the specimen, hindering the precise correlation of pathological findings with in vivo imaging data. For example, it also limits the spatial correlation with adjacent slides and blocks as well as radiological imaging data to standardized workflows like radical prostatectomy [ 2 ]. This lack of integration hinders achieving precise spatial alignment and comprehensive analysis across imaging modalities. Critical questions, such as determining tumor extent or assessing tumor margins, are addressed based on a descriptive analysis of the gross sectioning process and the subsequent sampling of seemingly representative image parts. To reconstruct the overall specimen from separated slides without access to a reference image, we propose a pipeline that reconstructs individual tissue fragments into complete histological cross-sections of the entire specimen. From the perspective of the literature, using the ontological framework recently proposed by Yilmaz et al. [ 3 ] for the plethora of different puzzling problems and solving approaches, our histological puzzling problem is a single-solution puzzle composed of one-sided, pictorial fragments with irregular shapes. Our task of reconstructing entire specimen cross-sections based on histological images presents several unique challenges. Missing tissue fragment parts caused by processing complicate the process, similar to the gaps in antique frescoes. Deformations add another layer of complexity. Semi-transparency of tissue sections on glass slides means they remain single-sided pieces but may also involve mirroring. Many small, repetitive shapes in histological images make the task more similar to assembling a mosaic than restoring a fresco; it is worth noting that mosaic reconstruction remains an open problem in the context of 2D puzzles [ 3 ]. Variable shape of the resulting reconstructed piece further increases the challenge. This work addresses these five key challenges above. Beyond these, there is a sixth challenge of solving a puzzle with many missing parts and no known results. For most specimens, a standardized protocol for comprehensive specimen work-up incorporating cross-sectional imaging is lacking, except in specific cases such as osteosarcoma [ 4 ]. This absence complicates analysis due to missing tissue and the lack of reference images. To manage complexity, this study uses only datasets with known outcomes and complete sets of tissue fragments, either generated from WSI-derived puzzle pieces or from fully embedded cross-sections. The proposed pipeline applies a pattern recognition-based puzzling approach to overcome the five challenges mentioned above. The pipeline comprises the steps common for puzzle problems, comprehensively described by Yilmaz et al. in [ 3 ]: Image preprocessing, matching via pairwise registration, adjacency quantification and tissue reassembly. By estimating global transformations, all related slides can be reconstructed in their original spatial arrangement without altering the underlying image data. The modular design ensures flexibility and reproducibility, allowing the pipeline to adapt to various tissue types and imaging conditions. In addition to the puzzling pipeline itself, a dataset generator is presented. Therefore, WSIs are cropped into fragments labeled with their center coordinates. This approach enables the training of a deep neural network in an effective, supervised manner. 4 Related Work Other disciplines already solved comparable problems using digital image recombination algorithms, such as astronomy [ 5 ], geography [ 6 ], and archaeology [ 7 - 9 ] where various pixel-based images are stitched together. A structured classification was recently defined by Yilmaz et al. [ 3 ] in 2023 where detailed descriptions of various puzzle problems are given. Accordingly, a single solution equivalent to the original tissue structure is possible consisting of pictorial single-sided irregular shaped fragments. Paumard et al. [ 10 ] investigated a puzzle solution only based on predicting the position of image content which requires the presence of global shapes and structures. In contrast, Basu et al. [ 11 ] propose a shape based approach to restore hand shredded blank paper without a reference image. Combining both puzzling strategies is presented by Derech et al. [ 8 ] who created an overlap between adjacent puzzle fragments by extending each one synthetically. The color information at a boundary was used together with the matching shape. In medical research, image analysis methods heavily rely on state-of-the-art computer vision algorithms such as multi-resolution vision transformer [ 12 , 13 ] for superior feature extraction and registration. Analogous approaches and complete applications for image reassembly have been established like chessboard-like tile stitching of microscopy images in ImageJ [ 14 ] or MIST [ 15 ], image registration of volumetric grayscale MRI and CT images [ 16 – 18 ]. However, these research areas can make use of simplifying assumptions, for example the rectangular shape for microscopy tile stitching or the complete overlap for spacial feature matching in radiological image registration. In histopathology, image registration and feature detection are used to match slices of the same tissue with similar features, but different staining [ 19 , 20 ]. Also stacks of slices cut from the same block are reassembled to a 3D object by VALIS [ 21 ]. Furthermore, feature detection is used to enable multimodal analysis and combine information from WSI and MRI [ 22 ]. In contrast to previous studies, this paper presents an approach to reassemble neighboring WSI without any overlap which to our knowledge was not investigated before. Stitching together adjacent histological slides and there corresponding blocks of tissue promises new analyses tools and diagnosis opportunities for partial image registration. 5 Material and Methods Here, a histological 2D puzzle, reconstructing irregularly shaped tissue fragments scanned as whole slide images (WSIs) is solved. Analogous to a jigsaw puzzle, the task can be summarized as a single-solution puzzle composed of one-sided, pictorial fragments with irregular shapes [ 3 ]. Following this general scheme of image reassembly, shape and content features are extracted and matched in a unified step using a computer-aided method. In contrast to previous work, the puzzle pieces may also lack tissue parts due to iterative reassembly, but the overall pipeline is consistent with [ 3 ]. To solve the herein described 2D puzzling task, we propose a pipeline that reconstructs individual tissue fragments into complete histological cross-sections of the entire specimen, as shown in Fig 1 . Download figure Open in new tab Figure 1. Workflow of puzzling pipeline starting with regular WSIs of all neighboring fragments. After WSI preprocessing (1) including tissue masking, each combination of fixed and moving tissue fragments are pairwise registered using a ViT (2) in Fig 2 . Based on subsequent adjacency quantification of all registrations (3) resulting in a disparity matrix, the best matching tissue fragments are reassembled with a cluster optimization algorithm (4) visualized by a minimal spanning tree in Fig 3 . WSI Preprocessing: Downsample and crop tissue fragments to thumbnails I thumb for subsequent image processing and analysis (see Subsection 5.1). Pairwise Registration: Infer best adjacency with transformation M between each fixed and moving fragments I&J : M ( I thumb ) ∼ J thumb using a multi-stage ViT (see Subsection 5.2). Adjacency Quantification: Order pairwise registrations by reconstructing registered image pair ( M ( I ) × J ) and calculate neighbor similarity: S IJ = ∑ i ( M i,pred ( I ) × J − M i,true ( I ) × J ) 2 (see Subsection 5.2.3). Tissue Reassembly: Assemble fragments via a search tree based on pairwise matches, yielding global transformations M g lob ( I ) for each fragment with enforced global consistency (see Subsection 5.3). For supervised learning relying on a ground truth of the image reconstruction, a synthetic puzzle generator was developed in addition to the pipeline. Described in Section 5.4, the open-source code is available on GitHub and can be adapted for other projects. The approach is established and tested on a data set based on histological slides from the archive of the Institute of Pathology Heidelberg, as described in Section 5.5 below. The vision transformer for registration was optimized on the HELIX cluster and finally trained on a GPU server of the deNBI cloud using NVIDIA RTX A6000 GPU, AMD EPYC 7502 8-core CPU and 128 GB RAM for roughly 200 seconds per epoch. 5.1 WSI preprocessing The algorithm operates on 2D pixel images. Each of the n WSIs in a puzzle I WSI ∈ C is downsampled to a thumbnail I thumb at a fixed zoom using OpenSlide [ 23 ]. WSIs contain both tissue and a mostly white background. To suppress the background in black, tissue segmentation is required and various masking techniques are compared. A basic thresholding based on Otsu’s method [ 24 ] minimizing the intra-class variance is used initially. A more robust variant, inspired by Histolab [ 25 ], applies adaptive Canny edge detection [ 26 ] on grayscale images converted from RGB. Masking the background in LAB color space performs even better on light H&E-stained fatty tissue shown in Fig. 7. While deep learning methods like YOLO [ 27 ] or Mask R-CNN [ 28 ] offer higher precision, they are computationally intensive and only marginal improvements for registration are achieved. 5.2 Pairwise image registration This subsection explains the method used to determine the optimal transformation M for each puzzle piece I thumb , ensuring that each is correctly positioned relative to its neighbor: M ( I thumb ) ∼ J thumb . These transformations are computed using a Vision Transformer (ViT), a powerful deep learning model designed for image analysis. In addition, the alignment quality between each pair of puzzle pieces is evaluated and quantified using a similarity score. This score measures how well two pieces fit together, helping to identify the most accurate alignments. By combining the calculated transformations and similarity scores, the approach ensures precise placement of the puzzle pieces within the reconstructed image. 5.2.1 Transformations for Fragment Alignment The initially centered fragments I and J are reassembled using rigid transformations given by the following parameters: Translation in x and y direction is limited by the image width w or height h : t x ( I, J ), t y ( I, J ) ∈ [− w/ 2, w/ 2], [− h/ 2, h/ 2]. Rotation is defined by an angle ω ( I, J ) ∈ [−179, 180]. As each WSI thumbnail I thumb is represented by a pixel-based image matrix I xy , the transformation can be described by a matrix multiplication. In order to align image I to image J , a transformation M ( I, J ) based on the transformation parameters has to be found such that M ( I ) ∼ J . A rigid or Euclidean transformation M can be formed by a rotation matrix R ( ω ) and a translation vector T ( t x , t y ) as follows: 5.2.2 Vision Transformer for Estimating Transformation Due to staining artifacts and shape distortions, edge-based methods are unreliable for fragment alignment. Instead, we adopt a pattern recognition-based approach, combining structural and textural information. While classical feature-based methods such as scale-invariant feature transform (SIFT) [ 29 ] or speeded-up robust features (SURF) [ 30 ] are lightweight and interpretable, Vision Transformers (ViTs) offer critical advantages for registering complex, irregular, and partially corrupted tissue fragments. Even though computer vision networks require substantial computing resources during training, leading to higher CO 2 emissions, inference, however, is faster, less error-prone and better adjustable once the model is trained. ViTs are well-suited for this task, as their self-attention mechanism captures both local and long-range dependencies, enabling robust matching even under rotation or staining variation. Among ViT architectures, the Coarse-to-Fine Vision Transformer (C2FViT) [ 31 ] is particularly effective with its multi-resolution architecture originally developed for 3D medical image registration. It progressively refines transformation parameters via a multi-resolution strategy ( Fig. 2 ). Download figure Open in new tab Figure 2. A three-stage vision transformer C2FViT [ 31 ] is used for pairwise image registration. The model consists of a multi-resolution strategy that progressively refines rigid transformation parameters for an optimal adjacency between fixed and transformed moving fragments. C2FViT solves a regression task defined by Eq. 1 , aligning fixed and moving fragments by predicting optimal rigid transformations. Its hierarchical design enables robust alignment despite staining inconsistencies, missing tissue, or boundary artifacts. 5.2.3 Similarity Metric for Fragment Matching To evaluate the quality of pairwise fragment alignment described previously, we define a similarity score that quantifies how well two tissue pieces fit together, guiding accurate reassembly in the puzzle pipeline. Since ground truth images are unavailable, standard metrics like Structural Similarity Index Measure (SSIM) [ 32 ] or Peak-Signal-to-Noise-Ratio (PSNR) [ 33 ] cannot be applied. Instead, we use a custom similarity function S ( I, J ) and the inverse disparity D ( I, J ) respectively that quantifies edge alignment: Here, S dist penalizes large boundary distances between fragments, while S lap penalizes excessive overlap. S dist ( I, J ) counts the number of fragment boundary pixel pairs ( i, j ) such that , with Θ set to the minimal boundary distance plus 2. S lap ( I, J ) measures the number of overlapping pixels in I ⋂ J within the union I ⋃ J . This metric ensures that fragments with both minimal spatial misalignment and compatible edge content are well balanced for reconstruction. 5.3 Fragment Reassembly Tissue reconstruction is performed step-by-step in an agglomerative manner using the previously computed pairwise registrations and their disparity scores. To reduce computational cost and maintain explainability, we adopt discrete algorithmic methods over AI-based models. In order to optimize for precise reassembly, various analytical algorithms are implemented relying on the same disparity matrix of all pairwise fragment registrations. All three processes aim to minimize the total disparity as defined in Eq. 2 . With minimal complete connectivity ensuring that each fragment is connected to at least one other, a cluster reassembly is shown in Fig 3 . Global transformations G ( I ) for each fragment as the requested result are calculated recursively from known pairwise transformations: Download figure Open in new tab Figure 3. Various clustering algorithms rely on the pairwise disparity between every possible tissue adjacency determined previously (left). A minimal spanning tree for example connects the fragments with the minimal disparity until all tissue fragments are connected (center) and the cluster is fully reassembled (right). with G ( I 0 ) := 𝕀 for the initial fragment I 0 . 5.3.1 Simulated Annealing Simulated annealing, inspired by the cooling process of lattice atoms in a solid [ 34 ], explores the configuration space by randomly modifying fragment adjacencies. Random modifications depend on the temperature T which is gradually reduced with each iteration k based on a cooling factor ε and starting temperature T 0 : Configurations which worsen the disparity Δ D = D new − D curr > 0 may be accepted with probability P to escape local minima. The final layout minimizes total disparity while preserving connectivity. Global transformations are propagated using Eq. 3 via breadth-first traversal. 5.3.2 Minimal Spanning Tree Fragments are modeled as nodes in a graph, with edges weighted by disparity. Using Kruskal’s algorithm [ 35 ], the minimal spanning tree (MST) is built by selecting the lowest-weight edges without cycles visualized in Fig 3 . The presented union find algorithm is used to track the sets of edges {( I, J, D )} efficiently with a deterministic solution: where 𝒞( I ) denotes the set (or cluster) containing the fragment I . Transformations are propagated as in simulated annealing. By definition, a tree with minimal total disparity is created: where D ( I, J ) is the disparity of the edge ( I, J ). 5.3.3 Agglomerative Clustering Agglomerative clustering (AHC) follows a greedy, bottom-up strategy inspired by Distance matrix ALIgnment (DALI) [ 36 ] used in bioinformatics. Clusters are merged based on the minimum pairwise disparity: A disparity matrix is maintained and updated throughout merging. Final global transformations are computed hierarchically using Eq. 3 . Visualizations of the hierarchy and intermediate alignments aid interpretation. 5.3.4 Quantitative Evaluation In order to quantify the performance of these algorithms, the predicted reassembled tissue puzzle is evaluated against the ground truth WSI thumbnail shown in Fig. 4 ) using: Download figure Open in new tab Figure 4. A puzzle set is created by cropping a WSI image along an irregular network mask implemented open-source. The image is concatenated with the mask (left) before the tissue is cropped apart (center). Each fragment is centered (right) and stored as . png file separately. Mean Absolute Error (MAE): Measures positional deviation of fragments’ centers. Mean Squared Error (MSE): Captures pixel-wise differences to the original WSI. Structural Similarity Index (SSIM) [ 32 ]: Assesses perceptual similarity, including texture and structure. 5.4 Dataset Generation To adopt the C2FViT model for histopathological partial image registration, a large dataset of WSI puzzles is required for training. However, cases of spatially correlated slices are rare in our group and also elsewhere with other image content as stated by Yilmaz et al. [ 3 ]. A scalable synthetic puzzle set generator was implemented especially for this project returning a labeled puzzle dataset with centered tissue fragments and their center coordinates in the WSI thumbnail visualized in Fig. 4 . The generator pipeline comprises the following steps: WSI tissue masking: apply segmentation to whole-slide images (see subsection 5.1) Grid acquisition: download the geometric tessellation of German postal-code regions. Fragment extraction: overlay the postal-code grid on the image and crop out each tile. Annotation and storage: assign unique labels, center-align each fragment, and save separately. Beside the puzzle pipeline, the code is published open-source and can be easily adopted without restrictions of the texture of the input image. For this project, WSIs approved for research purposes were used to create anonymized tissue puzzles. Up to 16 neighboring square tissue tiles of 230 tissue micro-arrays (TMA) were used to create 32650 unique puzzles depicted in Fig 4 . Overlaid with a binary image of a random German postal code border, each thumbnail is cropped into a unique puzzle set. Thus, a developer might get a puzzle based on the shape of the hometown borders. Furthermore, multiple puzzles can be created out of one image due to varying border nets resulting in different fragments shapes. 5.5 Dataset Composition for Image Registration Due to the absence of a suitable large dataset of histological puzzle fragments, three synthetic subsets, irregular-shaped & square tiles as well as pig organ were defined based on the puzzle generator presented in Section 5.4. For irregular tile dataset, 6512 patches from 230 tissue microarray (TMA) WSIs were cropped along geographical borders to create 32560 irregular-shaped labeled puzzles. To further enhance textural pattern matching during training, a second square tile dataset was concatenated where the same WSI tiles were cropped into 2 x 2 square fragments with varying overlap between 0 − 10 pixels, resulting in 44384 puzzles. A real-world WSI dataset is concatenated as well, based on pig organ slices, which is published with the puzzle pipeline for testing. The data collection and all experiments were conducted in accordance with a vote of the ethics commission II of Heidelberg University (vote S-206/2005). Here, the prior masking step was used for all 143 available WSIs within the study, as shown on the right in Fig. 5 . Using a high multiplicity of 25 with multiple superimposed geographical nets per image, 21620 puzzles are created. Download figure Open in new tab Figure 5. WSIs are processed and cropped differently resulting in three different puzzle datasets. Irregular (left) and square (center) tile fragments as well as fragmented tissue puzzles enhance different image characteristics for training. 6 Results To solve a 2D histology puzzle task, technically speaking, a single-solution puzzle composed of one-sided, pictorial fragments with irregular shapes [ 3 ], a four-step pipeline has been developed. Relying on partial image registration, these steps described in the methods section are able to tackle missing tissue fragment parts and small deformations without a reference image: WSI preprocessing (see 5.1), pairwise registration (see 5.2), adjacency quantification (see 5.2.3), tissue reassembly (see 5.3). In subsequent subsections 6.1&-6.2, the critical steps are evaluated separately to determine an optimal configuration for the final pipeline. After parameter optimization, the final results are evaluated and presented in the concluding section of the results on synthetic data in subsection 6.3 and real-world data in Subsection 6.4. This comprehensive approach ensures a rigorous and systematic analysis of the 2D histology puzzle problem. 6.1 Pairwise Puzzling with Vision Transformer For partial image registration or, respectively, puzzling two puzzle pieces together, the C2FViT model published by Mok et al. [ 31 ], summarized in Section 5.2.2, was adopted to infer a transformation to match two 2D RGB histological images next to each other. Therefore, the adjusted vision transformer is trained on all three synthetic, balanced puzzle sets presented in Section 5.4. However, only translation as rigid transformation can be corrected due to the limited dataset characteristics. For irregular-& square tile and pig organ dataset, fragment shifts are predicted based on matching edges and continuation of shapes and colors across. Supervised training used centered, fixed, and moving adjacent image fragments as features. The difference in translations required for centering each WSI fragment serves as ground truth labels for correct positioning of the moving image next to the fixed one. Thus, a mean square error loss was minimized using AdamW optimizer [ 37 ] with hyperparameters listed in Appendix 4. Training was performed for 256 epochs on a balanced, sophisticated dataset described in Section 5.4 with parameters listed in Appendix 4. Data augmentation was used for a better generalization. Translation parameters can be successfully predicted whereas the C2FViT registration model fails to estimate additional transformation parameters such as rotation or reflection. A fractional training approach with varying datasets did not increase the registration ability. The training process is accompanied by independent validation using various metrics. These are also used to quantify the result of the partial image registration listed in Table 1 . The ground truth and predicted translation parameters are subtracted to determine mean absolute error (MAE) and mean squared error (MSE). For the normalized cross-correlation (NCC) and structural similarity index measure (SSIM), the predicted transformation is applied to the moving fragment and reassembled with the fixed one. Quantitative evaluation requires a ground truth image for comparison as shown in the appendix in Fig. 8. View this table: View inline View popup Download powerpoint Table 1. Test results of the partial image registration for translation data subsets in Fig. 5 . MAE & MSE are calculated based on the deviations between ground truth and predicted image centers, NCC & SSIM rely on the reassembled image pairs shown in the appendix in Fig. 8 The evaluation metrics differ for the three subsets of the dataset shown in Table 1 , verifying the expectation that different puzzling characteristics highlighted in each dataset are learned differently. Lower MAE and MSE point to better learning performance. Irregular tiles based on high magnification tissue perform best with the clear irregular border match and the cellular structure. Also, high NCC and SSIM loss affirm that square tiles without characteristic boundaries show worse matching performance with their neighbors, resulting in a higher discrepancy between the original and the reassembled image. Masked WSI without cellular structure, but irregular fragment boundaries can also be reassembled better than textural information of the tissue only as shown by all metrics simultaneously. The combined test set leads to an average loss and can proof the correct registration training of the network for all three subsets. The overall registration performance yields an error of approximately 7 pixels per coordinate as the MAE takes the two-dimensional position into account. 6.2 Clustering Optimization for Puzzle Reassembly The clustering of tissue fragments is a combinatorial problem with a limited solution space constrained by the known pairwise registration results [ 3 , 38 ]. These results are represented by a disparity matrix, which captures the relative alignment errors between fragments, and a transformation matrix, and which stores the transformation parameters for all fragments. Each fragment is systematically compared to all others, enabling the identification of the best-matching pairs. Three distinct clustering methods were implemented and evaluated: Agglomerative Clustering: A deterministic bottom-up method that iteratively merges the most similar fragment pairs based on minimum disparity to form a hierarchical reconstruction. Minimal Spanning Tree: A greedy and deterministic approach that connects all fragments by selecting the lowest-disparity edges without forming cycles, ensuring global connectivity. Simulated Annealing: A non-deterministic top-down optimization algorithm that explores different fragment configurations by probabilistically accepting worse solutions to escape local minima and minimize total disparity. Each algorithm was evaluated on a synthetic dataset with the same pairwise registration results and known ground truth. For a single tissue puzzle, the recombined final image shows significant discrepancies in Fig. 9 in the appendix. Quantitative results are listed in Table 2 . The MAE over all center coordinates in the reassembled puzzle is lowest for the adjusted minimal spanning tree algorithm confirmed by the pixelwise MSE of the resulting image. However, the SSIM comparing ground truth and transformed moving image differ resulting from View this table: View inline View popup Download powerpoint Table 2. Comparison of three clustering algorithms based on mean absolute error (MAE), mean squared error (MSE), and structural similarity index measure (SSIM). The outcomes are evaluated on a ground truth dataset of tissue fragments shown in Fig 6 . The results of these comparisons provided insights into the strengths and weaknesses of each clustering approach, offering a robust framework for optimizing tissue fragment reassembly while maintaining computational efficiency. The adjusted minimal spanning tree algorithm achieves the lowest mean absolute error (MAE) over all fragment center coordinates and is further validated by its competitive pixel-wise mean squared error (MSE). However, the observed differences in structural similarity index measure (SSIM) stem exclusively from residual spatial misalignment at the fragment boundaries. Pixel intensities remain unchanged in both the ground truth and transformed images. SSIM does not seem applicable for translational image differences as different algorithms achieve similar results. Agglomerative clustering achieves a lower MSE even though MAE is roughly double as imprecise. Also a qualitative comparison of Fig. 9 confirm the search tree based algorithm is most suitable. Simulated annealing as a probabilistic and non-deterministic approach takes by far the longest to achieve satisfying results. However, they cannot compete with the MST algorithm. 6.3 Evaluation of the Full Reassembly Pipeline using Synthetic Data The full pipeline was tested on both synthetic puzzle subsets generated from WSI tiles. The square tile puzzles consist of 2 × 2 fragments per cluster. The irregular tile puzzles are made up of 2–15 fragments with known ground truth positions and original tissue image. Table 3 summarizes the quantitative performance on both synthetic subsets, irregular and square tile puzzles. The mean MAE and MSE compare each fragments original and predicted coordinate whereas the SSIM and NCC compare the original tissue image with the reassembly result. View this table: View inline View popup Download powerpoint Table 3. Test scores for 100 reassembled fragmented WSI tissue puzzles for synthetic and real-world datasets. Each fragment’s position is compared to its ground truth using MAE and MSE normalized to the number of fragments per puzzle, as well as the reassembled tissue with SSIM and NCC. The low MAE and NCC close to one confirm the applicability for all datasets. High MSE and low SSIM show the high misalignment of outliers. With an asymptotic complexity of O ( n 2 ), the mean execution time increases less than quadratically with the number of fragments. For n fragments, n ( n − 1) pairwise registrations are required. The minimal spanning tree clustering algorithm based on Kruskal’s algorithm has O ( n log n ) complexity. A typical puzzle with 4 fragments can be recombined in less than 2 seconds. Similar to the registration results of fragments pairs, irregular tile puzzle show lower MAE and MSE, and slightly better NCC. The MAE of 49.6 for two dimensional translation corresponds to an average misplacement of 7.0 pixels per axis or 3% of the used image size of 256 × 256. High NCC confirms strong structural agreement, while SSIM is not applicable because of different image canvases of original and reconstructed tissue. Square tiles cut into 2 x 2 fragments can also be reconstructed shown by a very good NCC, but the MAE of 92 confirms only half the alignment precision. The elevated MSE reflects outliers with larger errors for irregular as well as square tiles. 6.4 Pipeline Evaluation on Pig Organ Tissue Data To demonstrate practical applicability, the pipeline was applied to a set of WSIs of pig organ slices. Unlike the synthetic tiled datasets, macroscopic WSI tissue exhibits staining variability, partial fragment loss, and scanning artifacts. HiViTAlign relies on the registration ability of the C2FViT model for H&E stained WSI thumbnails and the general reassembly of tissue fragments. Thus, the pipeline could successfully reassemble multiple fragments per masked WSI tissue shown in the right of Fig. 5 . WSIs were manually fragmented and reconstructed, since no true adjacency annotations exist for a supervised learning approach. The reassembly results of the reassembly are quantified in Table 3 : An MAE of 37.27 ± 51.86 shows a low mean uncertainty of 6.1 pixels per coordinate axis. However, the large standard deviation, the high MSE and qualitative evaluation of reassembled images point to a huge puzzle misalignment if only one or multiple pairwise registrations in a puzzle fail. A high normalized cross-correlation adjusted for RGB images confirm the overall good ability of the HiViTAlign pipeline to puzzle tissue fragments. Download figure Open in new tab Figure 6. Four different pig organ puzzles (A,B,C,D) were reassembled using the ground truth (subfigure left) and predicted (subfigure right) translation. Fragments with large boundaries can be easily matched (A,C) whereas puzzles with more fragments (B) or heterogenous tissue shape show larger errors. The reassembly result is mostly limited by the pairwise registration uncertainty. The used search tree algorithm is able to reassemble adjacent tissue fragments. A metric to quantify puzzle reassembly is the average neighbor percentage, defined as the fraction of correctly reassembled fragments, as described by Derech et al. [ 8 ]. Shown in the appendix in Fig. 10 for all numbers of fragments per cluster in the pig organ dataset, performance declines with increasing fragment count. A nearly perfect reassembly performance for 2 & 3 fragments, drops almost to 50% for clusters with 8 or more fragments (see Fig. 10 in the Appendix). 7 Discussion This work presents HiViTAlign , a modular pipeline for Hi stopathological Vi sion T ransformer Align ment that integrates ViT-based image registration with graph-based clustering algorithms available at https://github.com/cpheidelberg/HiViTAlign . To our knowledge, the first approach is presented to reassemble histopathological tissue from whole slide image (WSI) fragments. Unlike prior methods relying on edge extension [ 39 ] or overlap-based registration [ 8 ], the transformer-based approach enables direct alignment of adjacent fragments mimicking real-world histological slicing conditions. Despite the absence of standardized datasets for tissue reassembly, a synthetic puzzle set generator was developed publicly available via GitHub ( https://github.com/cpheidelberg/ImagePuzzleGenerator ). Hereby, we demonstrate that training on synthetically fragmented data generalizes well to real WSIs cropped along irregular, pathology-inspired boundaries. Even though the generator creates labeled data for supervised learning automatically, synthetic fragments differ significantly from real adjacent WSIs, limiting reconstruction of whole organs. A computer vision approach has always the disadvantage of training which compensates the advantage of fast puzzle reconstruction. Despite the extensive training, the pairwise image registration still shows prediction errors like overlapping fragments or abrupt changing texture at the boundary. A completely rule-based approach solves these issues, but might not be as flexible as extending the C2FViT network by a physics-informed component [ 40 ]. HiViTAlign is designed with a modular structure, allowing components such as stain normalization [ 41 ], tissue detection [ 27 ], or alternative transformer architectures to be flexibly integrated. Therefore, further improvements of the partial image registration can be easily integrated such as additional transformations including rotation, flip and shearing. Its open-source availability further promotes reproducibility and adaptation to diverse histopathological contexts. Trained on an automatically generated puzzle dataset, the pipeline can be reused for reassembling images in other disciplines as well. Beyond automation, the pipeline supports explainability. It not only outputs the predicted fragment configuration together with the corresponding transformation parameters but also visualizes the spatial relationships that underlie each clustering decision. This transparency is critical for clinical adoption, reducing reliance on opaque black-box models. In contrast to serial histological sections based on consecutive slices [ 21 ], HiViTAlign addresses the challenge of fragment reassembly from independently prepared 2D slides from separate tissue blocks without strict serial ordering or overlap. The overarching goal of this study is to bridge the resolution and scale discrepancy between radiological and pathological imaging. By paving the way for three-dimensional reconstruction from histological sections, HiViTAlign established the foundation for seamlessly integrating both imaging regimes into a unified and generalizable diagnostic workflow. 8 Acknowledgments This publication was supported through state funds approved by the State Parliament of Baden-Württemberg for the Innovation Campus Health + Life Science alliance Heidelberg Mannheim. Furthermore, the authors acknowledge support by the state of Baden-Württemberg through bwHPC and the German Research Foundation (DFG) through grant INST 35/1597-1 FUGG. This work was supported by the de.NBI Cloud within the German Network for Bioinformatics Infrastructure (de.NBI) and ELIXIR-DE (Forschungszentrum Jülich and W-de.NBI-001, W-de.NBI-004, W-de.NBI-008, W-de.NBI-010, W-de.NBI-013, W-de.NBI-014, W-de.NBI-016, W-de.NBI-022). The authors gratefully acknowledge the data storage service SDS@hd supported by the Ministry of Science, Research and the Arts Baden-Württemberg (MWK) and the German Research Foundation (DFG) through grant INST 35/1503-1 FUGG. References 1. ↵ Su R , Zhang Y , Liu H , F Frangi A , editors. Medical Imaging and Computer-Aided Diagnosis: Proceedings of 2022 International Conference on Medical Imaging and Computer-Aided Diagnosis (MICAD 2022). vol. 810 of Lecture Notes in Electrical Engineering . Singapore : Springer Nature ; 2023 . Available from: https://link.springer.com/10.1007/978-981-16-6775-6 . 2. ↵ Rusu M , Shao W , Kunder CA , Wang JB , Soerensen SJC , Teslovich NC , et al. Registration of presurgical MRI and histopathology images from radical prostatectomy via RAPSODI . Medical Physics . 2020 ; 47 ( 9 ): 4177 – 4188 . doi: 10.1002/mp.14337 . OpenUrl CrossRef PubMed 3. ↵ Yilmaz S , Nabiyev VV . Comprehensive survey of the solving puzzle problems . Computer Science Review . 2023 ; 50 : 100586 . doi: 10.1016/j.cosrev.2023.100586 . OpenUrl CrossRef 4. ↵ Mangham DC , Athanasou NA . Guidelines for histopathological specimen examination and diagnostic reporting of primary bone tumours . Clinical Sarcoma Research . 2011 ; 1 ( 1 ): 6 . doi: 10.1186/2045-3329-1-6 . OpenUrl CrossRef PubMed 5. ↵ Beroiz M , Cabral JB , Sanchez B. Astroalign: A Python module for astronomical image registration . Astronomy and Computing . 2020 ; 32 : 100384 . doi: 10.1016/j.ascom.2020.100384 . OpenUrl CrossRef 6. ↵ Wang Q. Towards Real-Time 3D Terrain Reconstruction from Aerial Imagery . Geographies . 2024 ; 4 ( 1 ): 66 – 82 . doi: 10.3390/geographies4010005 . OpenUrl CrossRef 7. ↵ Dong Y , Ifrim G , Mladenić D , Saunders C , Van Hoecke S Kim K , Hong J , Rhee SH , Woo SS . Reconstructing the Past: Applying Deep Learning to Reconstruct Pottery from Thousands Shards . In: Dong Y , Ifrim G , Mladenić D , Saunders C , Van Hoecke S , editors. Machine Learning and Knowledge Discovery in Databases. Applied Data Science and Demo Track. vol. 12461 . Cham : Springer International Publishing ; 2021 . p. 36 – 51 . Available from: https://link.springer.com/10.1007/978-3-030-67670-4_3 . 8. ↵ Derech N , Tal A , Shimshoni I. Solving archaeological puzzles . Pattern Recognition . 2021 ; 119 : 108065 . doi: 10.1016/j.patcog.2021.108065 . OpenUrl CrossRef 9. ↵ Abitbol R , Shimshoni I , Ben-Dov J. Machine Learning Based Assembly of Fragments of Ancient Papyrus . Journal on Computing and Cultural Heritage . 2021 ; 14 ( 3 ): 1 – 21 . doi: 10.1145/3460961 . OpenUrl CrossRef 10. ↵ Paumard MM , Picard D , Tabia H. Deepzzle: Solving Visual Jigsaw Puzzles with Deep Learning and Shortest Path Optimization . IEEE Transactions on Image Processing . 2020 ; 29 : 3569 – 3581 . doi: 10.1109/TIP.2019.2963378 . OpenUrl CrossRef 11. ↵ Basu A , Das S , Horain P , Bhattacharya S , editors K S L , Das S , Menon A , Varghese K. Graph-Based Clustering for Apictorial Jigsaw Puzzles of Hand Shredded Content-less Pages . In: Basu A , Das S , Horain P , Bhattacharya S , editors. Intelligent Human Computer Interaction. vol. 10127 . Cham : Springer International Publishing ; 2017 . p. 135 – 147 . Available from: http://link.springer.com/10.1007/978-3-319-52503-7_11 . 12. ↵ Dosovitskiy A , Beyer L , Kolesnikov A , Weissenborn D , Zhai X , Unterthiner T , et al. An Image is Worth 16×16 Words: Transformers for Image Recognition at Scale ; 2021 . Available from: http://arxiv.org/abs/2010.11929 . 13. ↵ Liu Z , Lin Y , Cao Y , Hu H , Wei Y , Zhang Z , et al. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows ; 2021 . Available from: http://arxiv.org/abs/2103.14030 . 14. ↵ Preibisch S , Saalfeld S , Tomancak P. Globally optimal stitching of tiled 3D microscopic image acquisitions . Bioinformatics . 2009 ; 25 ( 11 ): 1463 – 1465 . doi: 10.1093/bioinformatics/btp184 . OpenUrl CrossRef PubMed Web of Science 15. ↵ Chalfoun J , Majurski M , Blattner T , Bhadriraju K , Keyrouz W , Bajcsy P , et al. MIST: Accurate and Scalable Microscopy Image Stitching Tool with Stage Modeling and Error Minimization . Scientific Reports . 2017 ; 7 ( 1 ): 4988 . doi: 10.1038/s41598-017-04567-y . OpenUrl CrossRef PubMed 16. ↵ Yaniv Z , Lowekamp BC , Johnson HJ , Beare R. SimpleITK Image-Analysis Notebooks: a Collaborative Environment for Education and Reproducible Research . Journal of Digital Imaging . 2018 ; 31 ( 3 ): 290 – 303 . doi: 10.1007/s10278-017-0037-8 . OpenUrl CrossRef PubMed 17. Chen J , Frey EC , He Y , Segars WP , Li Y , Du Y. TransMorph: Transformer for unsupervised medical image registration . Medical Image Analysis . 2022 ; 82 : 102615 . doi: 10.1016/j.media.2022.102615 . OpenUrl CrossRef 18. ↵ Strittmatter A , Schad LR , Zöllner FG . Deep learning-based affine medical image registration for multimodal minimal-invasive image-guided interventions - A comparative study on generalizability . Zeitschrift für Medizinische Physik . 2024 ; 34 ( 2 ): 291 – 317 . doi: 10.1016/j.zemedi.2023.05.003 . OpenUrl CrossRef 19. ↵ Zhang C , Jiang Y , Li N , Zhang Z , Islam MT , Dai J , et al. A Hybrid Deep Feature-Based Deformable Image Registration Method for Pathology Images ; 2023 . Available from: http://arxiv.org/abs/2208.07655 . 20. ↵ Lin Y , Liang Z , He Y , Huang W , Guan T. End-to-end affine registration framework for histopathological images with weak annotations . Computer Methods and Programs in Biomedicine . 2023 ; 241 : 107763 . doi: 10.1016/j.cmpb.2023.107763 . OpenUrl CrossRef PubMed 21. ↵ Gatenbee CD , Baker AM , Prabhakaran S , Swinyard O , Slebos RJC , Mandal G , et al. Virtual alignment of pathology image series for multi-gigapixel whole slide images . Nature Communications . 2023 ; 14 ( 1 ): 4502 . doi: 10.1038/s41467-023-40218-9 . OpenUrl CrossRef PubMed 22. ↵ Woo J , Hering A , Silva W , Li X , Fu H , Liu X Albuquerque T , Fang ML , Wiestler B , Delbridge C , Vasconcelos MJM , Cardoso JS , et al. Multimodal Context-Aware Detection of Glioma Biomarkers Using MRI and WSI . In: Woo J , Hering A , Silva W , Li X , Fu H , Liu X , et al. , editors. Medical Image Computing and Computer Assisted Intervention – MICCAI 2023 Workshops . Cham : Springer Nature Switzerland ; 2023 . p. 157 – 167 . 23. ↵ Goode A , Gilbert B , Harkes J , Jukic D , Satyanarayanan M. OpenSlide: A vendor-neutral software foundation for digital pathology . Journal of Pathology Informatics . 2013 ; 4 ( 1 ): 27 . doi: 10.4103/2153-3539.119005 . OpenUrl CrossRef PubMed 24. ↵ Otsu N. A Threshold Selection Method from Gray-Level Histograms . IEEE Transactions on Systems, Man, and Cybernetics . 1979 ; 9 ( 1 ): 62 – 66 . doi: 10.1109/TSMC.1979.4310076 . OpenUrl CrossRef PubMed Web of Science 25. ↵ Colling R , Pitman H , Oien K , Rajpoot N , Macklin P , CM-Path AI in Histopathology Working Group, et al . Artificial intelligence in digital pathology: a roadmap to routine use in clinical practice . The Journal of Pathology . 2019 ; 249 ( 2 ): 143 – 150 . doi: 10.1002/path.5310 . OpenUrl CrossRef PubMed 26. ↵ Canny J. A Computational Approach To Edge Detection. Pattern Analysis and Machine Intelligence , IEEE Transactions on . 1986 ;PAMI-8:679–698. doi: 10.1109/TPAMI.1986.4767851 . OpenUrl CrossRef PubMed Web of Science 27. ↵ Rong R , Sheng H , Jin KW , Wu F , Luo D , Wen Z , et al. A Deep Learning Approach for Histology-Based Nucleus Segmentation and Tumor Microenvironment Characterization . Modern Pathology . 2023 ; 36 ( 8 ): 100196 . doi: 10.1016/j.modpat.2023.100196 . OpenUrl CrossRef PubMed 28. ↵ Bancher B , Mahbod A , Ellinger I , Ecker R , Dorffner G. Improving Mask R-CNN for Nuclei Instance Segmentation in Hematoxylin & Eosin-Stained Histological Images . In: Proceedings of the MICCAI Workshop on Computational Pathology . PMLR ; 2021 . p. 20 – 35 . Available from: https://proceedings.mlr.press/v156/bancher21a.html . 29. ↵ Lowe DG . Object recognition from local scale-invariant features . In: Proceedings of the Seventh IEEE International Conference on Computer Vision. Kerkyra, Greece : IEEE ; 1999 . p. 1150 – 1157 vol.2. Available from: http://ieeexplore.ieee.org/document/790410/ . 30. ↵ Bay H , Ess A , Tuytelaars T , Van Gool L. Speeded-Up Robust Features (SURF) . Computer Vision and Image Understanding . 2008 ; 110 ( 3 ): 346 – 359 . doi: 10.1016/j.cviu.2007.09.014 . OpenUrl CrossRef Web of Science 31. ↵ Mok TCW , Chung ACS . Affine Medical Image Registration with Coarse-to-Fine Vision Transformer. In: 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) ; 2022 . p. 20803 – 20812 . Available from: https://ieeexplore.ieee.org/document/9879546 . 32. ↵ Wang Z , Bovik AC , Sheikh HR , Simoncelli EP . Image quality assessment: from error visibility to structural similarity . IEEE Transactions on Image Processing . 2004 ; 13 ( 4 ): 600 – 612 . doi: 10.1109/TIP.2003.819861 . OpenUrl CrossRef PubMed Web of Science 33. ↵ Sethi D , Bharti S , Prakash C. A comprehensive survey on gait analysis: History, parameters, approaches, pose estimation, and future work . Artificial Intelligence in Medicine . 2022 ; 129 : 102314 . doi: 10.1016/j.artmed.2022.102314 . OpenUrl CrossRef PubMed 34. ↵ Klüver C , Klüver J , Schmidt J. Modellierung komplexer Prozesse durch naturanaloge Verfahren: Künstliche Intelligenz und Künstliches Leben . Wiesbaden : Springer Fachmedien ; 2024 . Available from: https://link.springer.com/10.1007/978-3-658-43408-3 . 35. ↵ Jungnickel D. Graphs, networks and algorithms . Fourth edition ed. Algorithms and computation in mathematics . Berlin ; Heidelberg ; New York ; Dordrecht ; London : Springer ; 2013 . 36. ↵ Holm L , Sander C. Mapping the Protein Universe . Science . 1996 ; 273 ( 5275 ): 595 – 602 . doi: 10.1126/science.273.5275.595 . OpenUrl Abstract / FREE Full Text 37. ↵ Loshchilov I , Hutter F. Decoupled Weight Decay Regularization ; 2019 . Available from: http://arxiv.org/abs/1711.05101 . 38. ↵ Wolfson H , Schonberg E , Kalvin A , Lamdan Y. Solving jigsaw puzzles by computer . Annals of Operations Research . 1988 ; 12 ( 1 ): 51 – 64 . doi: 10.1007/BF02186360 . OpenUrl CrossRef 39. ↵ Fresquet X , Hesser J. 1st. Joint DFH/UFA workshop on AI in Medicine: Optimised Trials with Machine Learning. vol. 1. Fresquet X, Hesser J, editors . Heidelberg : Heidelberg University Library ; 2024 . Available from: https://archiv.ub.uni-heidelberg.de/volltextserver/35481/ . 40. ↵ Kadambi A , de Melo C , Hsieh CJ , Srivastava M , Soatto S. Incorporating physics into data-driven computer vision . Nature Machine Intelligence . 2023 ; 5 ( 6 ): 572 – 580 . doi: 10.1038/s42256-023-00662-0 . OpenUrl CrossRef 41. ↵ Runz M , Rusche D , Schmidt S , Weihrauch MR , Hesser J , Weis CA . Normalization of HE-stained histological images using cycle consistent generative adversarial networks . Diagnostic Pathology . 2021 ; 16 ( 1 ): 71 . doi: 10.1186/s13000-021-01126-y . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted July 18, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Post-operative tissue fragment puzzling using histopathological vision transformer alignment HiViTAlign Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Post-operative tissue fragment puzzling using histopathological vision transformer alignment HiViTAlign Christoph Blattgerste , Tanzina Ferdous , Ayk Jessen , Maximilian Legnar , Karl Rohr , Claudia Scherl , Jürgen Hesser , Cleo-Aron Weis bioRxiv 2025.07.14.664649; doi: https://doi.org/10.1101/2025.07.14.664649 Share This Article: Copy Citation Tools Post-operative tissue fragment puzzling using histopathological vision transformer alignment HiViTAlign Christoph Blattgerste , Tanzina Ferdous , Ayk Jessen , Maximilian Legnar , Karl Rohr , Claudia Scherl , Jürgen Hesser , Cleo-Aron Weis bioRxiv 2025.07.14.664649; doi: https://doi.org/10.1101/2025.07.14.664649 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Pathology Subject Areas All Articles Animal Behavior and Cognition (7618) Biochemistry (17635) Bioengineering (13859) Bioinformatics (41846) Biophysics (21401) Cancer Biology (18534) Cell Biology (25422) Clinical Trials (138) Developmental Biology (13352) Ecology (19860) Epidemiology (2067) Evolutionary Biology (24285) Genetics (15582) Genomics (22463) Immunology (17700) Microbiology (40298) Molecular Biology (17141) Neuroscience (88424) Paleontology (666) Pathology (2825) Pharmacology and Toxicology (4813) Physiology (7633) Plant Biology (15107) Scientific Communication and Education (2042) Synthetic Biology (4284) Systems Biology (9808) Zoology (2267)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.