Full text
42,577 characters
· extracted from
preprint-html
· click to expand
Constrained template matching using rejection sampling | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Constrained template matching using rejection sampling View ORCID Profile Valentin J. Maurer , View ORCID Profile Lukas Grunwald , View ORCID Profile Dante M. Kennes , View ORCID Profile Jan Kosinski doi: https://doi.org/10.1101/2025.06.10.658890 Valentin J. Maurer 1 Structural Biology Unit, European Molecular Biology Laboratory , Notkestraße 85, 22607 Hamburg, Germany 2 Centre for Structural Systems Biology , CSSB, Notkestraße 85, 22607 Hamburg, Germany 3 Institute of Molecular Biology and Biophysics, ETH Zurich , 8092 Zurich, Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Valentin J. Maurer For correspondence: valentin.maurer{at}embl-hamburg.de Lukas Grunwald 4 Institut für Theorie der Statistischen Physik, RWTH Aachen University and JARA-Fundamentals of Future Information Technology , 52056 Aachen, Germany 5 Max Planck Institute for the Structure and Dynamics of Matter, Center for Free-Electron Laser Science (CFEL) , Luruper Chaussee 149, 22761 Hamburg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Lukas Grunwald Dante M. Kennes 4 Institut für Theorie der Statistischen Physik, RWTH Aachen University and JARA-Fundamentals of Future Information Technology , 52056 Aachen, Germany 5 Max Planck Institute for the Structure and Dynamics of Matter, Center for Free-Electron Laser Science (CFEL) , Luruper Chaussee 149, 22761 Hamburg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Dante M. Kennes Jan Kosinski 1 Structural Biology Unit, European Molecular Biology Laboratory , Notkestraße 85, 22607 Hamburg, Germany 2 Centre for Structural Systems Biology , CSSB, Notkestraße 85, 22607 Hamburg, Germany 6 Molecular Systems Biology Unit, European Molecular Biology Laboratory , Meyerhofstrasse 1, 69117 Heidelberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jan Kosinski For correspondence: jan.kosinski{at}embl.de Abstract Full Text Info/History Metrics Preview PDF Abstract Identifying macromolecular complexes in situ using cryo-electron tomography remains challenging, with low signal-to-noise ratios and heterogeneous backgrounds among the key limiting factors. By integrating prior knowledge on macromolecular localization, such as the preferred orientations of membrane-associated proteins, detection can be improved by constraining searches to biologically feasible orientations. However, previous approaches integrating such constraints fail to achieve both computational scalability to large and curved systems and accurate detection. To resolve this, here we present rejection sampling, a novel approach for integrating translational and rotational constraints into template matching. Using simulated influenza virus-like particles, we demonstrate that rejection sampling outperforms existing methods in terms of precision and recall. Our approach can uniquely integrate constraints at voxel resolution while being compatible with imaging filters such as the contrast transfer function, essential for accurate macromolecular localization. Rejection sampling thus provides a practical solution for macromolecular detection in large and curved systems. I. INTRODUCTION Cryo-electron tomography (cryo-ET) has become a cornerstone of structural biology by enabling visualization of macromolecular complexes within their native environments at molecular resolution 1 , 2 . By imaging frozen-hydrated samples at different tilt angles and computationally reconstructing three-dimensional tomograms, cryo-ET provides detailed views into molecular architecture and protein organization in situ 3 . However, the low signal-to-noise ratio, molecular crowding, and heterogeneous cellular backgrounds complicate automated detection of macromolecular complexes 4 – 6 . Template matching enables macromolecular detection by systematically comparing a reference template structure representing a specific macromolecule of interest against a tomogram based on cross-correlation 7 . This approach has proven effective for detecting isolated particles in purified samples but has low precision and recall in crowded cellular environments 4 – 6 , 8 , 9 . However, when prior knowledge about the orientation of macro-molecules relative to cellular geometries is available, such as preferred orientations of membrane-associated proteins, the translational and rotational search can be constrained to biologically relevant configurations to improve detection 10 . Constrained template matching implements constraints through seed points 10 . Conceptually, seed points are initial guesses of the positions and orientations of macromolecules of interest, and can be derived from surface parameterizations of membranes 11 . To accurately represent the surface geometry, i.e., to constrain matches to biologically relevant configurations, the number of seed points increases with system size and curvature. The standard approach extracts subtomograms at each seed point and performs template matching within these localized regions. Translations are then constrained to specific areas within each subtomogram, while orientations are limited to deviating from the seed point normal vector up to a predefined threshold 10 . The key limitation of this approach lies in the representation of surface geometry. Each seed point provides a single normal vector, yet surface normals vary continuously throughout every subtomogram. This is particularly problematic for curved systems such as viral envelopes, mitochondrial cristae, and endoplasmic reticulum tubules, which require dense seed point sampling to be accurately represented. This results in redundant computations from overlapping subtomograms, rendering this approach computationally prohibitive for commonly available resources. The recent deformation-based method MPicker 12 addresses the curvature problem by flattening surfaces, effectively eliminating the need for dense sampling since all surface normals become aligned. However, cryo-ET filters, such as missing wedge compensation and contrast transfer function (CTF) correction, which are essential for accurate template matching 7 , 10 , 11 , have no readily available analytical definition in flattened geometries. Therefore, MPicker relies on deep-learning-based picking 13 , which, unlike template matching, requires subsequent classification to determine orientation and distinguish between macromolecular species. These limitations motivate a new approach, combining voxel-resolution surface representation with cryo-ET filters to enable fast macromolecular detection and differentiation. To achieve this, here we introduce constrained template matching using rejection sampling, a novel approach to integrate constraints into template matching. Unlike previous methods, rejection sampling performs template matching on entire tomograms and then applies constraints to reject matches that do not satisfy spatial and orientational constraints imposed by a given set of seed points. Compared to subtomogram-based approaches, rejection sampling eliminates redundant computations and improves computational efficiency by leveraging the scaling properties of template matching on large volumes rather than many subtomo-grams. Rejection sampling enables surface representation at voxel resolution equivalent to deformation-based methods, while being compatible with cryo-ET filters. To illustrate the utility of rejection sampling, we apply it to synthetic cryo-ET data of Influenza A virus-like particles (IAV VLPs) and show improved precision, recall, and runtime compared to existing methods. Rejection sampling is particularly well-suited for identifying and distinguishing macromolecules in large biological systems and highly curved membrane structures, where using subtomograms becomes computationally prohibitive. Rejection sampling thus provides an immediately applicable, practical solution for accurate and efficient constrained template matching. II. RESULTS Schematically, rejection sampling follows the unconstrained template matching procedure on entire tomograms, computing cross-correlations over all template translations and a set of rotations. After each rotation, cross-correlations that do not satisfy the constraints imposed by the given set of seed points are rejected, limiting the set of possible template matches to biologically plausible results ( Fig. 1 ). Rejection is achieved by projecting the template orientation into the local coordinate basis of each seed point and evaluating orientational and translational constraints (see methods IV A). Crucially, rejection sampling decouples computational complexity and the number of seed points, enabling voxel-resolution surface representation while maintaining compatibility with cryo-ET filters for accurate template matching. Download figure Open in new tab FIG. 1. Conceptual depiction of constrained template matching using rejection sampling. A seed point with normal vector n ≡ n z defines the local coordinate system with basis n x , n y , n z . The orientation constraint limits template orientations to those where the reference vector r i falls within a cone of aperture θ around the normal, e.g., r n . The spatial constraint is implemented as an ellipsoid with radii a, b, c centered at each seed point µ x , µ y , µ z and aligned along n z . Only matches satisfying both orientational and spatial constraints are retained. To demonstrate these advantages, we created a synthetic IAV VLP, containing the viral glycoproteins hemagglutinin (HA) and neuraminidase (NA) in previously reported ratios ( Fig. 2a , see also Ref. 14). The VLP was modeled as a spherical membrane with a diameter of 120 nm, with glycoproteins distributed at 10 nm spacing 15 . We simulated a tomogram based on the synthetic system using Mepsi 16 , which considers cryo-ET modulation factors such as solvent, CTF, missing wedge, and acquisition noise ( Fig. 2b ). Download figure Open in new tab FIG. 2. Performance evaluation of rejection sampling on a synthetic IAV VLP. a , Synthetic IAV VLP with HA (blue) and NA (green) on spherical membrane. b , Simulated tomogram based on ( a ). Colored triangles indicate examples of HA and NA instances. c , ROC curves comparing unconstrained template matching, rejection sampling, and MPicker. AUC values are shown for each method. Identified instances were considered correct if they fell within 5 voxels of the ground truth. d , Angular deviation between template-matched HA instances and assigned ground truth, quantified as the scalar product between normal vectors (One-tailed Mann-Whitney U test, **** p < 0.0001). e , Runtime comparison across methods, with measured times (points, solid lines) and extrapolations (dotted lines). We evaluated the performance of rejection sampling against established approaches by applying unconstrained template matching, using the standard cross-correlation formulation widely used in the field 17 – 20 , and deep-learning-based picking on membranes using MPicker 12 , 13 to the simulated tomogram for viral glycoprotein localization ( Fig. 2c ). We found that rejection sampling outperforms both methods in terms of the receiver operating characteristic (ROC), achieving an area under the curve (AUC) of 0.922, compared to 0.707 for unconstrained template matching and 0.568 for MPicker. Since the picking model provided with MPicker did not differentiate between HA and NA, we also considered identified NA instances as correct for this comparison. Furthermore, we found that the missing wedge compensation and CTF correction are essential for accurate identification of HA ( Fig. S1a ), which was not observed for NA ( Fig. S1b ). While we omit the comparison with subtomogram-based constrained template matching, as it relies on the same template matching principles as rejection sampling, rejection sampling provides better performance per unit time due to reduced computational complexity. Based on the localized glycoproteins, we next assessed whether their angular orientation was correctly determined ( Fig. 2d ). Unconstrained template matching produced a bimodal pattern (median and IQR, 155.377° ± 165.464°), indicating that while HA instances were correctly localized, the determined orientation was not biologically realistic, placing the transmembrane region of HA away from the membrane. In contrast, rejection sampling maintained orientations within the specified 20° threshold (median and IQR 6.420° ± 10.189°), demonstrating its ability to enforce biologically relevant constraints despite noise introduced during data simulation. This was similarly seen for NA ( Fig. S1c ). MPicker does not provide estimates of angular orientations and was therefore excluded from this comparison. Since rejection sampling operates on entire tomograms, its computational cost becomes lower than subtomogram-based methods when the number of seed points exceeds a critical threshold. This threshold k critical can be approximated by comparing the algorithmic complexity of both approaches, which primarily originates from the n × log ( n ) complexity of Fast Fourier Transforms (FFTs) underlying template matching where n is the number of voxels of the tomogram, and a is the number of voxels of each subtomogram. The filtering step during rejection sampling scales linearly with seed point count and is negligibly expensive. For the IAV dataset with a tomogram of 300 3 voxels and subtomograms of 60 3 voxels, rejection sampling offers increasing computation advantages beyond 174 seed points. In practice, the required number of seed points depends on the geometry of the surface being analyzed. While large surfaces require additional seed points to achieve uniform coverage, special consideration is required for curved surfaces. The relation between seed points and surface curvature can be derived for arbitrary geometries (see SI VII, Fig. S2 ), and for a sphere becomes where d ω is the angular resolution, d s is the spacing between seed points, and R is the sphere radius. Eq. (2) indicates that maintaining constant angular resolution requires the number of seed points to increase linearly with the surface curvature (inverse of the radius). 648 seed points are required to achieve a seed point spacing equivalent to the glycoprotein spacing of 10 nm for the IAV VLP membrane. In practice, we used 3,178 seed points to represent the surface with higher accuracy. To validate our theoretical predictions, we measured runtime performance across methods ( Fig. 2e ). The runtime of rejection sampling remained nearly constant with seed point count, processing 10,000 seed points in approximately 1,000 seconds. In contrast, subtomogram-based approaches exceeded that runtime by a factor of ten at the same number of seed points. k critical occurred at ≈ 750 seed points, because our theoretical prediction omits rotational sampling costs. III. DISCUSSION We introduced rejection sampling, an efficient solution for constrained template matching in large and complex biological systems with curved membrane structures. By integrating spatial and orientation constraints while maintaining compatibility with cryo-ET filters, rejection sampling enables more efficient and accurate macromolecular identification than existing approaches. Rejection sampling expresses constraints as coordinate-basis transforms, enabling representation of surface geometries at voxel resolution with minimal additional computational costs incurred from additional seed points. This approach addresses a fundamental limitation of subtomogram-based constrained matching, which exhibits linear computational scaling with seed point count. Consequently, rejection sampling facilitates the practical use of substantially higher numbers of seed points. This enables investigation of large and highly curved biological structures such as viral capsids, mitochondrial cristae, and endoplasmic reticulum networks. While rejection sampling and subtomogram-based constrained matching yield similar results when applied to identical seed points, the ability to increase surface resolution through denser seed point sampling leads to more accurate integration of constraints and thus improves macromolecular detection. Beyond our synthetic validation, our recent application of rejection sampling demonstrated its practical effectiveness in experimental cryo-ET data of filamentous IAV VLPs and Mycoplasma pneumoniae , enabling identification and characterization of various membrane protein complexes in their native environments 21 . Despite these advantages, rejection sampling has certain considerations that merit discussion. The computational implementation involves calculating cross-correlation scores for all possible template rotations before applying constraints. For flat systems, such as membranes flattened by MPicker 12 where surface normals are uniformly aligned, this potentially results in rejection of numerous rotations and reduced computational efficiency. Nevertheless, for most biological systems, the favorable computational scaling of rejection sampling accommodates additional rotational sampling, and increased seed point count typically improves the percentage of accepted rotations. Furthermore, rejection sampling avoids redundant rotation calculations by computing template orientations once for the entire tomogram rather than separately for each seed point. While combining template matching with membrane flattening appears advantageous due to reduced search space and simplified rotational sampling, implementing cryo-ET filters such as CTF correction in flattened space presents a technical challenge that requires careful consideration to avoid aliasing artifacts and remains to be solved. Additionally, rejection sampling shares limitations with unconstrained template matching by construction, where suboptimal template masks or filter parameters can result in false positive identifications. Looking beyond macromolecular localization, rejection sampling opens several avenues for methodological development. A promising application is to apply rejection sampling as a post-processing step to existing datasets, enabling evaluation of whether previously identified positions satisfy biological constraints. This capability can improve the reliability of existing structural analyses without necessitating complete dataset reprocessing. Furthermore, rejection sampling could complement deep learning-based particle picking methods, which, while efficient, typically do not distinguish between different macromolecular species or provide particle orientation estimates 6 , 9 , 12 , 13 . This motivates a hybrid approach, where rejection sampling could refine matches identified through deep learning by determining species and orientation, combining neural network efficiency with correlation-based specificity. The outputs could further be used in an iterative positive feedback loop 8 , serving as training data for deep-learning approaches aiming to improve their performance. In conclusion, rejection sampling advances constrained template matching for cryo-ET, offering a computationally efficient approach that maintains high detection accuracy while incorporating biologically relevant constraints. This method promises to expand the practical application of template matching to large systems and regions of high curvature, where conventional approaches become computationally prohibitive. While this initial manuscript introduces the method and demonstrates the effectiveness on synthetic data, we plan to update this manuscript with additional biological examples and applications. IV. METHODS A. Theory We present a rejection sampling-based implementation of constrained template matching for the identification of macromolecular complexes in cryo-ET data. Template matching constraints are implemented through seed points with associated normal vectors, which can, for instance, be generated from mesh representations of cellular surfaces at a specified density 11 , 21 . Orientation constraints are implemented through a cone-angle restriction. For a given seed point and its normal vector n , we define an associated rotation matrix R that maps the default template orientation, chosen without loss of generality as e z , onto the normal vector n , viz. n = R e z . We use R to generate an orthonormal coordinate system around the surface normal n ≡ n z The constraints are applied following the default template matching procedure with uniformly sampled rotations of the template over the entire tomogram. For each sampled rotation R i , we project the default template orientation e z as to model the rotation of the template. If R = R i , the template will be oriented exactly along the surface normal. To assess whether a given R i satisfies the geometrical constraints imposed by the seed points normal vector, we project r into the cone coordinate system ( n x , n y , n z ), where acceptance can be expressed as The angle θ defines the acceptance cone aperture, limiting orientations to those where the reference direction of the template falls within the specified cone angle around the seed point normal. Spatial constraints are implemented using an elliptical mask where ( x ′, y ′, z ′) are coordinates in the rotated reference of the given seed point, and ( a, b, c ) define the acceptance radii along each dimension. Mask coordinates are transformed analogous to Eq. (3) where µ i is the seed point coordinate in dimension i . In practice, spatial constraints are not bound to geometrical shapes but can be implemented by user-defined masks. We note that rejection sampling is not limited to surfaces but is compatible with any source of seed points. Furthermore, the mathematical framework generalizes to arbitrary dimensions with appropriate modifications to the constraint equations, e.g., 2D images. B. Implementation We implemented constrained template matching using rejection sampling in the Python Template Matching Engine (PyTME, v0.3.0, 17). PyTME provides a flexible architecture with user-defined analyzers that can be invoked during template matching at the level of individual orientations. Our rejection sampling approach is implemented by the MaxScoreOverRotationsConstrained analyzer class. This new analyzer efficiently applies the translational and orientational constraints imposed by a set of seed points without requiring modifications to the core template matching algorithms. C. Data acquisition Mepsi (v0.3, 16) was used for creating simulated IAV data. The viral membrane was set to a radius of 600 Å. 387 HA and 85 NA instances were placed on the membrane using a ratio of 1:5 and spacing of 10 nm, as previously described 14 , 15 . AlphaFold 2 multimer structure predictions of HA and NA were used as templates 22 , 23 , predicted using 6 refinement cycles and otherwise default parameters. A/Hong-Kong/1/1968 H3N2 (UniProt: P11134) sequence was used for HA, A/California/04/2009 H1N1 (UniProt: C3W5S3) for NA predictions. The system was solvated and converted into a tilt series ranging from −60° to 60° with 3° degree spacing. The tilt-series was CTF-modulated using a defocus of 5 µm and a signal-to-noise ratio of 0.65. The cubic tomogram was reconstructed at a voxel size of 6.8 Å with an edge length of 300 voxels. Simulated IAV tomograms were segmented in Mosaic (v1.0, 21) with MemBrain seg v10 alpha weights (v0.05, 24) downloaded from https://github.com/teamtomo/membrain-seg . Segmentations were refined using interactive tools and converted to triangular membrane meshes in Mosaic. Seed points were generated from the mesh with equidistant spacing of 40 nm and positioning 80 nm radially outward from the mesh surface to approximate the center of mass of viral glycoproteins, resulting in a total of 3,178 seed points. D. Template matching Template matching for IAV was performed using the HA and NA structure predictions described in section IV C. Atomic structure templates were aligned to the z-axis unit vector and converted into densities using the preprocessing utilities of PyTME. Densities were low-pass filtered to the Nyquist frequency of the tomograms used for template matching and resampled to the corresponding voxel size using cubic spline interpolation. Unconstrained template matching and rejection sampling were performed using PyTME. Schematically, template matching evaluates the cross-correlation (CC) between a target f and a template g over translational and rotational degrees of freedom. In practice, the translational search can be accelerated through FFTs where ℱand ℱ −1 denote the forward and inverse Fourier transform, respectively, and * the complex conjugate. The rotations are sampled sequentially, and the maximum CC per translation and the corresponding rotation are retained. To avoid systematic bias towards regions of high density, the cross-correlation is normalized by considering a background CC distribution given by a suitably chosen template mask m (see also Ref. 17). One such derivation is the fast local cross-correlation (FLC, 20), which is implemented as where n m is the number of non-zero elements within the mask, and are the mean and standard deviation of the masked template, respectively. We used the FLC score, continuous wedge masks reflecting on the respective tilt range, CTF-corrected 25 templates, and an angular sampling rate of 7 degrees. Matches were accepted when deviating no more than 20° from the nearest seed point normal and falling within an ellipse of radii (6, 6, 10) voxels centered around that seed point and oriented along its normal. Note, the radii are not directly dependent on the particle size, but rather express the maximum expected translation between seed point positions and the correct position. Template matches were identified using PeakCallerMaximumFilter and a minimum peak distance of 10 voxels. All template matching runs were run on compute nodes with AMD Epyc Genoa 9554 CPUs and NVIDIA L40s GPUs with 48 GB of memory. The typical runtime was below 30 minutes. E. MPicker The simulated IAV tomogram was flattened using MPicker (v1.2.0, 12). The binary membrane segmentation was meshed and UV unwrapped 26 with an energy of 4.04, using utilities provided with MPicker. The tomogram was flattened using a thickness of 30 and a radial basis function distance of 10. MPicker picking was performed using the particle picking model provided with EPicker (v1.1.2, 13) on tomogram slices 48 to 57, which contained the viral particles. Individual tomogram slices were padded to an edge length of 1,200 as per the instructions, and the score threshold was set to 0 to recall particles orthogonal to the missing wedge. EPicker was instructed to consider up to 10,000 peaks with a minimum distance of 10 voxels between peaks. EPicker was run on an NVIDIA A100 GPU as per the instructions. Unlike template matching, which identifies particle positions based on center-of-mass correspondence with the reference template, EPicker detects salient structural features that may not coincide with the center-of-mass. To ensure consistent comparison, EPicker picks were projected onto the membrane mesh using nearest-distance projection in Mosaic 21 and shifted by 80 Å along their corresponding surface normals to match the radial distance of ground truth positions. This correction enabled evaluation based solely on lateral positioning accuracy, explained in the next section. F. Evaluation of matching approaches Template matching and MPicker picks were ranked by FLC score and activations, respectively. Picks within 5 voxels of the ground truth were considered correct, with each ground truth particle assigned to the highest-scoring proximate pick. Angular accuracy was quantified by the scalar product between predicted and ground truth normal vectors, and was not computed for MPicker due to a lack of orientation estimates. The runtime between unconstrained template matching, rejection sampling, and subtomogram-based approaches was compared as follows. Unconstrained template matching was performed on the full tomogram. Rejection sampling was run using 50, 199, 795, 3,178, and 12,712 seed points, corresponding to surface sampling densities from 320 Å to 20 Å spacing with sequential halving intervals. Both approaches sampled SO(3) at 7° angular sampling density. Subtomogram-based runtime was approximated using 60-voxel-sized subtomograms, sufficient to accommodate all rotations of HA while maintaining aliasing-free CTF definitions 25 . The subtomogram approach used cone-restricted rotational sampling around surface normals at 7° angular sampling density, also excluding redundant rotations originating due to the C3 symmetry of HA. Single-rotation evaluation time was subtracted from total runtime, assuming cached imaging filters and FFT plans in optimized implementations. All methods used identical AMD Epyc Genoa 9554 CPUs with NVIDIA L40s GPUs, and applied missing wedge compensation and CTF correction, with runtime measurements averaged across three independent runs. V. CODE AVAILABILITY Constrained template matching using rejection sampling is implemented in PyTME from version v0.3.0 onwards. PyTME is available from https://github.com/KosinskiLab/pytme . VII. EXTENDED DATA Consider a parametric surface embedded in three dimensional space f ( γ ) : U → M ⊂ ℝ 3 with γ = ( u, v ). For a sphere, we could use spherical coordinates with ( u, v ) = ( θ, ϕ ) and U = [0, π ] × [0, 2 π ] ⊂ ℝ 2 . A discretization of the surface f introduces an angular resolution for the surface normals n ( γ ), which we illustrate in Fig. S2 . It is directly related to the variation of the normal vectors along the surface. For a given direction d γ = d s d T , with arc-length d s and normalized tangent vector d T ∈ T γ M , we have the angular resolution with d n ( γ ) | d T = n ( γ + d γ ) − n ( γ ). A central result of differential geometry connects the normal vector variation to the shape operator S γ : T γ M → T γ M (also called Weingarten map) whose eigenvalues determine the local curvature of the surface 28 , 29 . Computationally it is represented as a two-dimensional matrix in the local tangent-basis ( ∂ u f, ∂ v f ), where it depends on the first I γ and second II γ fundamental forms as . The angular resolution is hence given by In the case of a spherical surface with radius R , this result simplifies to d ω = d sR −1 , emphasizing that high curvature regions require dense sampling to retain angular resolution of surface normals. Download figure Open in new tab FIG. S1. Comparison of template matching approaches on a synthetic IAV VLP dataset. ROC curves for HA ( a ) and NA ( b ) showing performance of three methods: unconstrained template matching, rejection sampling, and unfiltered rejection sampling, not considering filters for missing wedge compensation and CTF correction. AUC values are displayed for each approach. True positive identifications were defined as those falling within the ground truth distance threshold of 5 voxels. c , Angular deviation between NA instances and assigned ground truth, quantified as the scalar product between normal vectors (One-tailed Mann-Whitney U test, **** p < 0.0001). Download figure Open in new tab FIG. S2. Illustration of surface normal angular resolution along a given direction d γ = d s d T . The geodesic distance between sampling points d s (arc-length) and the local curvature of the surface, along the normalized tangent vector d T , here given by ∥ S γ (d T ) ∥ = R −1 , determine the angular resolution d ω | d γ [Eq. (11)]. This illustrates that high-curvature regions require higher sampling rates to retain angular resolution. VI. ACKNOWLEDGEMENTS We thank the EMBL IT and HPC resources for providing essential computational infrastructure 27 . VM and JK acknowledge funding from the CSSB flagship project Plasmofraction. JK was supported by the ERC (TransFORM, 101119142). Funder Information Declared Centre for Structural Systems Biology, https://ror.org/04fhwda97 , Plasmofraction European Research Council , 101119142 REFERENCES ↵ M. Turk and W. Baumeister , FEBS Letters 594 , 3243 ( 2020 ). OpenUrl CrossRef PubMed ↵ C. M. Oikonomou and G. J. Jensen , Annu Rev Biochem 86 , 873 ( 2017 ). OpenUrl CrossRef PubMed ↵ P. C. Hoffmann , J. P. Kreysing , I. Khusainov , M. W. Tuijtel , S. Welsch , and M. Beck , Nature Communications 13 , 7435 ( 2022 ). OpenUrl CrossRef PubMed ↵ J. Böhm , A. S. Frangakis , R. Hegerl , S. Nickell , D. Typke , and W. Baumeister , Proc. Natl. Acad. Sci. U. S. A . 97 , 14245 ( 2000 ). OpenUrl Abstract / FREE Full Text V. Maurer , M. Siggel , and J. Kosinski , Acta Crystallographica Section D Structural Biology 80 ( 2024 ) , doi: 10.1107/S2059798324004303 . OpenUrl CrossRef ↵ G. Liu , T. Niu , M. Qiu , Y. Zhu , F. Sun , and G. Yang , Nat Commun 15 , 2090 ( 2024 ). OpenUrl CrossRef PubMed ↵ A. S. Frangakis , J. Böhm , F. Förster , S. Nickell , D. Nicastro , D. Typke , R. Hegerl , and W. Baumeister , Proceedings of the National Academy of Sciences 99 , 14153 ( 2002 ). OpenUrl Abstract / FREE Full Text ↵ X. Zeng , A. Kahng , L. Xue , J. Mahamid , Y.-W. Chang , and M. Xu , Proceedings of the National Academy of Sciences 120 , e2213149120 ( 2023 ). OpenUrl CrossRef PubMed ↵ I. de Teresa-Trueba , S. K. Goetz , A. Mattausch , F. Stojanovska , C. E. Zimmerli , M. Toro-Nahuelpan , D. W. C. Cheng , F. Tollervey , C. Pape , M. Beck , A. Diz-Muñoz , A. Kreshuk , J. Mahamid , and J. B. Zaugg , Nature Methods 20 , 284 ( 2023 ). OpenUrl CrossRef PubMed ↵ F. Förster , O. Medalia , N. Zauberman , W. Baumeister , and D. Fass , Proc Natl Acad Sci U S A 102 , 4729 ( 2005 ). OpenUrl Abstract / FREE Full Text ↵ D. Castaño-Díez , M. Kudryashev , and H. Stahlberg , Journal of Structural Biology 197 , 135 ( 2017 ). OpenUrl CrossRef PubMed ↵ X. Yan , S. Li , W. Huang , H. Wang , T. Zhao , M. Huang , N. Zhou , Y. Shen , and X. Li , Nature Communications 16 , 472 ( 2025 ). OpenUrl CrossRef PubMed ↵ X. Zhang , T. Zhao , J. Chen , Y. Shen , and X. Li , Nature Communications 13 , 2468 ( 2022 ). OpenUrl CrossRef PubMed ↵ A. Harris , G. Cardone , D. C. Winkler , J. B. Heymann , M. Brecher , J. M. White , and A. C. Steven , Proc Natl Acad Sci U S A 103 , 19123 ( 2006 ). OpenUrl Abstract / FREE Full Text ↵ M. D. Vahey and D. A. Fletcher , Elife 8 ( 2019 ) , doi: 10.7554/eLife.43764 . OpenUrl CrossRef ↵ B. Rodríguez de Francisco , A. Bezault , X.-P. Xu , D. Hanein , and N. Volkmann , Journal of Structural Biology 214 , 107921 ( 2022 ). OpenUrl CrossRef PubMed ↵ V. J. Maurer , M. Siggel , and J. Kosinski , SoftwareX 25 , 101636 ( 2024 ). OpenUrl CrossRef T. Hrabe , Y. Chen , S. Pfeffer , L. Kuhn Cuellar , A.-V. Mangold , and F. Förster , Journal of Structural Biology 178 , 177 ( 2012 ). OpenUrl CrossRef PubMed W. Wan , S. Khavnekar , and J. Wagner , Acta Crystallographica Section D 80 , 336 ( 2024 ). OpenUrl CrossRef ↵ A. M. Roseman , Ultramicroscopy 94 , 225 ( 2003 ). OpenUrl CrossRef PubMed Web of Science ↵ V. J. Maurer , M. Siggel , R. K. Jensen , J. Mahamid , J. Kosinski , and W. Pezeshkian , bioRxiv ( 2025 ) , doi: 10.1101/2025.05.24.655915 . OpenUrl Abstract / FREE Full Text ↵ J. Jumper , R. Evans , A. Pritzel , T. Green , M. Figurnov O. Ronneberger , K. Tunyasuvunakool , R. Bates , A. Z̆ídek , A. Potapenko , A. Bridgland , C. Meyer , S. A. Kohl , A. J. Ballard , A. Cowie , B. Romera-Paredes , S. Nikolov , R. Jain , J. Adler , T. Back , S. Petersen , D. Reiman , E. Clancy , M. Zielinski , M. Steinegger , M. Pacholska , T. Berghammer , S. Bodenstein , D. Silver , O. Vinyals , A. W. Senior , K. Kavukcuoglu , P. Kohli , and D. Hassabis , Nature 596 , 583 ( 2021 ). OpenUrl CrossRef PubMed ↵ R. Evans , M. O’Neill , A. Pritzel , N. Antropova , A. Senior , T. Green , A. Žídek , R. Bates , S. Blackwell , J. Yim , O. Ronneberger , S. Bodenstein , M. Zielinski , A. Bridgland , A. Potapenko , A. Cowie , K. Tunyasuvunakool , R. Jain , E. Clancy , P. Kohli , J. Jumper , and D. Hassabis , bioRxiv ( 2022 ) , doi: 10.1101/2021.10.04.463034 . OpenUrl Abstract / FREE Full Text L. Lamm , S. Zufferey , R. D. Righetto , W. Wietrzynski , K. A. Yamauchi , A. Burt , Y. Liu , H. Zhang , A. Martinez-Sanchez , S. Ziegler , F. Isensee , J. A. Schnabel , B. D. Engel , and T. Peng , bioRxiv ( 2024 ) , doi: 10.1101/2024.01.05.574336 . OpenUrl Abstract / FREE Full Text ↵ A. Rohou and N. Grigorieff , J Struct Biol 192 , 216 ( 2015 ). OpenUrl CrossRef PubMed ↵ M. Li , D. M. Kaufman , V. G. Kim , J. Solomon , and A. Sheffer , ACM Trans. Graph . 37 ( 2018 ) , doi: 10.1145/3272127.3275042 . OpenUrl CrossRef ↵ E. M. B. Laboratory , J. Pečar , R. Lueck , and M. Wahlers , “Embl heidelberg hpc cluster,” ( 2020 ). ↵ T. Shifrin , University of Georgia ( 2016 ). ↵ B. O’Neill , Elementary Differential Geometry, revised 2nd ed ed . ( Elsevier academic press , Amsterdam , 2006 ). View the discussion thread. Back to top Previous Next Posted June 15, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Constrained template matching using rejection sampling Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Constrained template matching using rejection sampling Valentin J. Maurer , Lukas Grunwald , Dante M. Kennes , Jan Kosinski bioRxiv 2025.06.10.658890; doi: https://doi.org/10.1101/2025.06.10.658890 Share This Article: Copy Citation Tools Constrained template matching using rejection sampling Valentin J. Maurer , Lukas Grunwald , Dante M. Kennes , Jan Kosinski bioRxiv 2025.06.10.658890; doi: https://doi.org/10.1101/2025.06.10.658890 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Biophysics Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41911) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13371) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15599) Genomics (22482) Immunology (17728) Microbiology (40363) Molecular Biology (17163) Neuroscience (88536) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.