Full text
69,868 characters
· extracted from
preprint-html
· click to expand
CryoFSL: An Annotation-Efficient, Few-Shot Learning Framework for Robust Protein Particle Picking in Cryo-EM Micrographs | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results CryoFSL: An Annotation-Efficient, Few-Shot Learning Framework for Robust Protein Particle Picking in Cryo-EM Micrographs View ORCID Profile Biplab Poudel , Rajan Gyawali , Ashwin Dhakal , Jianlin Cheng , View ORCID Profile Dong Xu doi: https://doi.org/10.1101/2025.09.19.677446 Biplab Poudel 1 Department of Electrical Engineering and Computer Science, University of Missouri , Columbia, MO 65211, USA 2 Bond Life Sciences Center, University of Missouri , Columbia, MO 65211, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Biplab Poudel Rajan Gyawali 1 Department of Electrical Engineering and Computer Science, University of Missouri , Columbia, MO 65211, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ashwin Dhakal 1 Department of Electrical Engineering and Computer Science, University of Missouri , Columbia, MO 65211, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jianlin Cheng 1 Department of Electrical Engineering and Computer Science, University of Missouri , Columbia, MO 65211, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: jianlin.cheng{at}missouri.edu xudong{at}missouri.edu Dong Xu 1 Department of Electrical Engineering and Computer Science, University of Missouri , Columbia, MO 65211, USA 2 Bond Life Sciences Center, University of Missouri , Columbia, MO 65211, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Dong Xu For correspondence: jianlin.cheng{at}missouri.edu xudong{at}missouri.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF A bstract Accurate identification of protein particles in cryo-electron microscopy (cryo-EM) micrographs is crucial for high-resolution structure determination, but remains challenging due to the heavy reliance on extensive annotated datasets and the difficulty of ensuring robustness under low signal-to-noise ratio (SNR) conditions. Current approaches require large annotations and exhibit poor generalization to new protein targets. We present CryoFSL ( Cryo -EM F ew S hot- L earning), a novel few-shot learning framework built upon Segment Anything Model 2 (SAM2) with lightweight adapters, enabling robust particle picking using as few as five labeled micrographs, significantly reducing annotation burden. The framework’s hierarchical adapter design supports dynamic feature modulation for low-SNR and heterogeneous conditions, resolving the trade-off between annotation burden and performance. CryoFSL surpasses both traditional template-based methods and state-of-the-art deep learning models across diverse proteins in the few-short learning setting, achieving superior recall, precision and 3D reconstruction resolution with minimal supervision. It maintains stability across heterogeneous micrographs and consistently detects high-quality particles with fewer false positives. Notably, CryoFSL achieves competitive density map reconstruction resolution with just a fraction of the particles picked by other methods, redefining efficiency and quality in cryo-EM analysis. This work paves the way for scalable, generalizable, and annotation-efficient particle picking pipelines. The code is available at GitHub . 1 Introduction The determination of protein structures stands at the forefront of modern structural biology, providing essential insights into cellular mechanisms, disease processes, and therapeutic development [ 1 ]. Understanding the structure of proteins is essential for investigating protein interactions, comprehending pathophysiology, and advancing drug development [ 2 , 3 ]. Cryo-electron microscopy, or cryo-EM, has emerged as a groundbreaking technology for structure determination, enabling near-atomic resolution imaging of large macromolecular complexes by preserving specimens in their native state through rapid vitrification [ 4 , 5 , 6 ]. A pivotal step in the cryo-EM workflow is protein particle picking, which involves identifying and extracting individual protein particles from micrographs containing thousands of randomly oriented molecules in noisy backgrounds [ 7 ]. This process presents substantial technical challenges that directly influence the quality of downstream structural analysis. Cryo-EM micrographs are inherently characterized by extremely low signal-to-noise ratios (SNR), rendering protein particles as subtle contrast variations that are often indistinguishable from background noise, ice contaminations, and various imaging artifacts [ 8 , 9 ]. The complexity is further compounded by the intrinsic heterogeneity of biological specimens, which may exhibit conformational flexibility, preferred orientations, and structural variability. Additionally, aggregated particles, overlapping structures, and false positives such as ice crystals further complicate reliable particle detection [ 10 , 11 , 12 ]. Therefore, robust and efficient particle picking is essential for ensuring high-resolution cryo-EM structures, as the accuracy and effectiveness of this process have a significant impact on the quality of the resulting 3D density map reconstruction and its resolution. Traditional particle picking approaches have progressed from fully manual selection to semi-automated template-based methods incorporated in commonly used software packages such as EMAN2 [ 13 ], RELION [ 14 ], Scipion [ 15 ], Dog Picker [ 16 ], and APPION [ 17 ]. Manual picking, while accurate, is time-consuming and impractical for large datasets [ 18 , 19 ]. Template-based picking methods involve the generation of 2D reference templates from initial particle subsets, which are then cross-correlated with the entire micrograph through iterative refinement to guide the picking process [ 1 , 18 , 19 ]. Although these approaches have demonstrated effectiveness for well-characterized proteins under favorable signal-to-noise conditions, their performance remains constrained by template quality and representativeness. The inherent limitations of template-based methods include their dependence on iterative user intervention, extensive parameter optimization requirements, and susceptibility to template selection bias. These constraints limit their applicability when applied to new targets or heterogeneous datasets and introduce operator-dependence variability that can compromise reproducibility. Furthermore, templates may inadequately represent variations in particle orientation, size, or imaging conditions, leading to reduced accuracy in complex experimental scenarios [ 1 , 18 , 19 , 20 , 21 , 22 , 23 ]. The advancement in deep learning methods has shown great promise in particle picking automation strategies. A number of models, such as APPLE picker [ 23 ], DeepPicker [ 24 ], AutoCryoPicker [ 25 ], Warp [ 26 ], CASSPER [ 27 ], Topaz [ 28 ], CrYOLO [ 29 ], and CryoMAE [ 30 ] have been developed to improve detection accuracy and reduce manual intervention. Among them, Topaz and CrYOLO, both adopting convolutional neural network (CNN)-based models, remain the most widely used in the cryo-EM community. However, CrYOLO frequently overlooks true protein particles from micrographs, while Topaz demonstrates susceptibility to false positive detection, including ice contaminants and duplicate particles [ 30 , 31 , 32 ]. More recent deep learning approaches, including CryoSegNet [ 31 ] and CryoTransformer [ 32 ], have shown enhanced performance through sophisticated architectural designs and extensive training procedures. However, these methods typically require large, well-curated training datasets and substantial computational resources for model optimization, making it challenging to apply them in scenarios where limited training data is available [ 21 ]. The reliance on extensive training data also raises concerns about generalization to novel particle types or experimental conditions not well-represented in the training datasets. The trade-offs between annotation burden and model adaptability underscore a key gap in Cryo-EM, a paradigm that balances efficient learning with strong flexibility under extreme data scarcity. This study presents CryoFSL, a data-efficient, few-shot learning framework for protein particle picking in cryo-EM micrographs, leveraging the Segment Anything Model 2 (SAM2) [ 33 ], a state-of-the-art vision foundation model. Our approach incorporates lightweight adapter modules into SAM2’s image encoder while maintaining the base model in a frozen state, enabling rapid and effective adaptation to novel protein specimens using as few as five manually annotated micrographs. Although built on a deep learning foundation, CryoFSL’s working mechanism closely resembles template-based methods in practice, offering efficient adaptation to new targets with minimal supervision and no need for large-scale retraining. CryoFSL is specifically designed for the practical settings where current particle-picking methods struggle: (1) novel or low-resource projects where only a handful of annotated micrographs are available (e.g., early screening of a new target or small labs without extensive annotation resources); (2) low signal-to-noise and heterogeneous datasets where particle contrast varies strongly across micrographs and templates or fully pretrained supervised models fail to generalize; (3) workflows that prioritize particle quality over raw quantity , such as downstream projects requiring high-quality reconstruction from fewer and cleaner particles, and (4) computationally constrained environments where full model re-training is impractical. By directly addressing these challenges, CryoFSL bridges the gap between accuracy, annotation efficiency, and generalizability, providing a practical solution for both exploratory and large-scale cryo-EM studies. We tested our framework against traditional template-based methods (EMAN2, RELION, and Scipion) and deep learning approaches (Topaz and CrYOLO), demonstrating superior performance in particle detection across a variety of proteins. While deep learning models like CryoSegNet and CryoTransformer have shown promising results in fully supervised settings, we did not include them in our evaluation due to their dependency on large, curated training datasets and resource-intensive training procedures. These characteristics make them unsuitable for the few-shot, low-annotation regime targeted by CryoFSL. Notably, our method excels in challenging scenarios characterized by low signal-to-noise ratios and significant structural heterogeneity. Our findings highlight the potential of combining foundational models with few-shot learning paradigms to address longstanding challenges in cryo-EM particle picking, offering a scalable, accurate, and annotation-efficient solution for structural biology applications. 2 Results CryoFSL framework for few-shot protein particle picking We introduce CryoFSL , a parameter-efficient few-shot learning framework for automated protein particle picking in cryo-EM micrographs, utilizing SAM2 enhanced with task-specific adapter modules. As illustrated in Figure 1 , input micrographs are first encoded through the frozen SAM2 Hiera-large hierarchical vision transformer encoder to extract multi-scale features, crucial for detecting particles in noisy, low-contrast images. To adapt SAM2 for cryo-EM without full finetuning, lightweight adapter modules are integrated at each encoder stage, comprising a stage-specific unshared linear layer and a shared linear projection that modulate features via residual connections, preserving pretrained knowledge while enabling task-specific adaptation (see section 4 ). The adapted features are passed to the SAM2 mask decoder to generate a dense segmentation mask identifying potential particle regions. The resulting binary masks undergo comprehensive post-processing through a multi-stage pipeline that combines distance transforms, multi-scale peak detection, and watershed segmentation to extract precise particle coordinates (Supplementary Algorithm S1) . The final output includes protein particle coordinates in STAR files, compatible with tools like RELION and CryoSPARC [ 34 ] for generating 3D protein density maps, and visually segmented particles marked with circles on the particle regions. Download figure Open in new tab Figure 1: Model architecture of CryoFSL for few-shot cryo-EM protein particle picking. The framework takes a small set of annotated cryo-EM micrographs as input. It utilizes the SAM2 image encoder as its backbone, enhanced with lightweight adapter modules to enable efficient fine-tuning under limited supervision. The encoder is divided into four sequential stages (Stage 1 to Stage 4 ), each containing multiple transformer blocks shown as vertical bars. Adapter modules ( A 1 - A 4 ) inserted between these blocks consist of a shared linear layer, GELU activation, and an unshared lightweight linear layer, enabling targeted adaptation of feature representation while keeping most of the backbone frozen. Frozen and trainable components are visually distinguished to highlight the minimal parameter footprint. The encoded features are passed through the SAM2 mask decoder to generate binary segmentation masks, which undergo a structured post-processing pipeline to generate output. The final outputs are a STAR file (text document) for downstream cryo-EM analysis and a visual map of predicted protein particles. This architecture allows CryoFSL to achieve strong generalization with minimal annotations, leveraging the power of vision foundation models while maintaining adaptability to the heterogeneity and noise in cryo-EM micrographs. To thoroughly evaluate CryoFSL’s performance, we conducted extensive experiments on the CryoPPP dataset [ 35 ], testing 1-shot, 5-shot, and 10-shot scenarios across six diverse proteins with varying morphological complexities from EMPIAR (10028, 10081, 10017, 10093, 10345, and 11056) [ 36 ]. We used precision, recall, F1-score, Intersection over Union (IoU), and 3D reconstruction resolution (in Angstroms) to measure particle picking accuracy. To ensure consistency across methods, all baseline models were trained using only five annotated micrographs per protein, matching the few-shot training setup used for CryoFSL, before being tested on the same dataset. Each method was configured according to published best practices, with further details of training and implementation provided in the Methods section. The evaluation revealed that CryoFSL uniformly outperformed all baseline models across different scenarios, demonstrating its stability and effectiveness in few-shot settings with minimal labeled data. Comparative evaluation of 1-shot, 5-shot, and 10-shot learning for CryoFSL particle picking We assessed CryoFSL’s few-shot learning capabilities under 1-shot, 5-shot, and 10-shot configurations, where the model was trained using 1, 5, and 10 annotated micrographs per protein, respectively, to investigate the trade-off between annotation effort and model performance. The results are shown in Figure 2 . As expected, the 10-shot setup achieved the highest performance, particularly excelling in datasets like 10081, 10093, and 11056. Impressively, the 1-shot scenario revealed CryoFSL’s tolerance for sparse annotations by achieving an acceptable F1-score above 50 % on most proteins despite only using one annotated micrograph. Download figure Open in new tab Figure 2: Detailed comparison of CryoFSL’s performance across 1-shot, 5-shot, and 10-shot learning scenarios, evaluated using precision, recall, and F1-score metrics on six diverse cryo-EM protein datasets from the EMPIARs (10028, 10081, 10017, 10345, 10093, and 11056). The x-axis represents those six datasets, and the y-axis displays the numerical value of the performance metrics, ranging from 0 to 100, where higher values indicate better performance. Three few-shot learning scenarios are compared using color-coded bars: 1-shot learning (pink), 5-shot learning (cyan), and 10-shot learning (orange), where each scenario corresponds to the number of annotated micrographs used for training. The progression from 1-shot to 5-shot yielded greatly enhanced performance, with the 5-shot configuration achieving results remarkably close to 10-shot across all datasets. In certain instances, like EMPIAR-10017 and 10345, the 5-shot CryoFSL even outperformed 10-shot learning with better recall scores. Notably, the marginal gains from 5-shot to 10-shot settings were modest, with 5-shot achieving 90 – 95 % of the maximum attainable performance while requiring only half the annotated data and computational resources. Consequently, we adopted the 5-shot configuration as our optimal training paradigm for all subsequent experiments, as it struck an ideal balance between performance excellence and practical feasibility. This choice supported one of the central goals of our framework: achieving strong generalization with minimal supervision, thereby enabling rapid and scalable deployment in real-world Cryo-EM analysis workflows. Systematic comparison of methods using segmentation metrics across diverse cryo-EM datasets in few-shot learning settings CryoFSL demonstrated excellent overall performance across all six protein targets, achieving the highest average recall (0.845), and F1-score (0.684), as shown in Table 1 . In contrast, template-based methods like RELION, EMAN2, and Scipion achieved high recall (e.g., 0.793 for RELION and 0.787 for Scipion) but considerably lower precision (0.352 and 0.367), leading to suboptimal F1-scores of 0.471 and 0.478. Even deep learning methods designed for low-data regimes, such as Topaz and CrYOLO, struggled in challenging cases like EMPIAR 10345, where they achieved F1-scores of just 0.074 and 0.078, respectively. Supplementary Figure S1 confirms their failure to identify many true particles, reflecting poor adaptability under morphological complexity and low contrast. This suggests that such methods may require more labeled data to achieve reliable results. In contrast, CryoFSL achieved a stable F1-score of 0.528 on the same dataset, leveraging adaptive feature modulation to effectively capture particle characteristics. This resilience with minimal labeled data highlights CryoFSL’s efficiency in few-shot settings, significantly reducing the need for extensive annotations and enhancing reliability for complex cryo-EM micrographs. View this table: View inline View popup Download powerpoint Table 1: Quantitative evaluation results on six protein datasets from CryoPPP for a few-shot setting. Column 1 lists the EMPIAR IDs, and Column 2 specifies the corresponding protein types. The remaining column reports the evaluation metrics – precision, recall, F1-score and IoU – for each compared method. For each metric, the best performance across methods is highlighted in bold . The final row presents the average performance of each method across all six proteins, with the best average also highlighted. Visual comparisons on selected micrographs from EMPIAR-10028, 10081, and 10017 further illustrate these differences ( Figure 3 ). CrYOLO generally under-picks, missing a large number of true particles. Topaz, while capturing most particles from the ground truth (GT), suffers from high redundancy due to frequent overlapping picks. Template-based methods like RELION, Scipion, and EMAN2, on the other hand, exhibit aggressive over-picking behavior, identifying excessive numbers of false positives and including non-particle regions, leading to noisy and less reliable selections. This is especially evident in the magnified regions of Figure 3(b) , where multiple overlapping or ambiguous picks are observed. CryoFSL, however, achieves a strong balance: maintaining precise alignment with expert-annotated GT particles while simultaneously detecting additional valid particles overlooked during manual curation. Supplementary Figures S2-S6 visually reinforce this, highlighting the widespread false detections by competing methods and the clean, uniform particle selections achieved by CryoFSL. Download figure Open in new tab Figure 3: Visual comparison of particle picking across methods and sample micrographs. (a) Sample micrographs from EMPIAR IDs 10028, 10081, and 10017 are shown with particle locations predicted by all competing methods, overlaid alongside the expert-annotated ground truth (GT). Each method’s result is shown as colored circular markers on the micrographs. All models were evaluated on the same set of micrographs to ensure consistency in qualitative comparison.(b) A magnified view of a representative region from a micrograph in EMPIAR 10017 highlights differences in particle localization between the methods. The red box on the full micrograph shows the zoomed region. Robustness and failure mode analysis of particle picking methods using segmentation metric distributions The boxplot analysis in Figure 4 reveals considerable variability in precision and recall distributions across methods, as evidenced by wider interquartile ranges, frequent outliers, and inconsistent medians, particularly for template-based approaches such as RELION and EMAN2. These large spreads and skewed distributions indicate unstable particle picking behavior across different micrographs, often stemming from over-picking in cluttered or noisy regions. The outliers represent instances where template correlation thresholds become either too permissive (generating excessive false positives) or too restrictive (missing true targets), suggesting that fixed-parameter approaches cannot dynamically adapt to the heterogeneous nature of cryo-EM micrographs. This phenomenon is especially pronounced in EMPIARs – 10345 and 10093, where particle diversity and contamination challenge the reproducibility of the templates. The resulting inconsistency is also reflected in Table 1 , where high recall is often offset by low precision and F1-score. Download figure Open in new tab Figure 4: Detailed box plots comparing precision (left) and recall (right) distribution across six methods on six different datasets. The x-axis for both plots represents the six datasets (10028, 10081, 10017, 10345, 10093, and 11056), while the y-axis displays the metric values, ranging from 0 to 1. Each box represents the interquartile range (IQR), the black horizontal line inside each box indicates the median value, and the whiskers extend to the range of non-outlier values. Outliers are plotted as individual points. In contrast, CryoFSL achieves comparatively tighter distributions across both precision and recall metrics, with fewer outliers and generally higher medians than other methods across most protein datasets. While variability is still observed in some cases, the narrower interquartile ranges highlight more stable behavior that adapts dynamically to varying imaging conditions rather than relying on static decision boundaries. This stability stems from CryoFSL’s adapter modules’ ability to modulate feature representation in response to local micrograph characteristics, effectively learning to distinguish signal from noise patterns that would confound correlation-based approaches. Statistical analysis using Wilcoxon signed-rank tests [ 43 ] confirms CryoFSL’s superior performance, with highly significant adjusted p-values (e.g., < 1e − 10 for recall in most cases) and large effect sizes, as detailed in Supplementary Table S4-S5 . Further, threshold-based analysis shows CryoFSL achieves a 96.6 % recall success rate at the 0.6 threshold and maintains 69.1 % at 0.8, outperforming all baselines in both performance and consistency, as detailed in Supplementary Figure S7 . A combined analysis of segmentation behavior across multiple EMPIAR-10345 micrographs (Supplementary Figure S6 and Supplementary Table S3) highlights distinct failure modes among methods. CryoFSL maintains particle predictions within a stable range (typically 100-150), reflecting high recall and robustness, with acceptable precision due to detecting valid but unlabeled particles. In contrast, CrYOLO consistently under-picks (as few as 9-26), while Topaz exhibits highly variable behavior, with predicted counts ranging from 5 to 363 depending on micrograph complexity. Template-based methods such as Scipion, RELION, and EMAN2 exhibit aggressive over-picking, achieving high recall but extremely low precision due to genuine false positive detections. Notably, CryoFSL’s lower precision arises from recovery of true but unlabeled particles, fundamentally differing from the false positive-driven precision loss in traditional methods. Comparative analysis of particle quantity and 3D density map reconstruction resolution across methods Table 2 presents a comprehensive evaluation of particle quantity and 3D density map reconstruction quality across methods. Template-based methods routinely selected 2 to 3 times more particles than our approach, yet CryoFSL achieved the best average resolution of 5.33 Å with significantly fewer particles – around 45000 on average. This inverse relationship is particularly striking for challenging proteins like 10345, where CryoFSL’s precise selection (21008 particles) yields exceptional 3.84 Å resolution, outperforming all other methods by greater than 1.44 Å despite their much larger particle sets. High particle numbers are regularly accompanied by elevated resolution value (i.e., lower quality), with RELION reaching 7.36 Å on 10028 despite picking 68055 particles, and EMAN2 yielding 7.26 Å on 10081 with 97234 particles. This observation strongly corroborates our earlier findings from Table 1 and Figure 3 , where traditional methods exhibited high recall but low precision and significant performance variability. The superior 3D density recovery of CryoFSL is further demonstrated in Figure 5 , which visualizes the density maps for EMPIAR-10345, clearly showing finer structural details and fewer artifacts compared to baseline methods. Additional reconstructions and resolution comparisons for all protein datasets are provided in Supplementary Figures S8-S9 , affirming CryoFSL’s advantage in delivering cleaner, higher-resolution structures. View this table: View inline View popup Download powerpoint Table 2: Comparison of 3D reconstruction resolution and particle yield across six cryo-EM protein datasets. For each EMPIAR ID, the number of particles picked (#) and the corresponding 3D resolution (Res., in Å) obtained using each method are reported. The final row shows the average particle count and average resolution for each method. The best resolution (lowest Res. value) for each protein is highlighted in bold , indicating superior performance in structural recovery. Download figure Open in new tab Figure 5: Comparison results for the resolution of the 3D density maps and reconstructed 3D density maps of particles picked by different methods on the EMPIAR-10345 dataset. To further examine the quality of particles selected by each method, we conducted a progressive reconstruction analysis on EMPIAR 10081 protein, evaluating resolution at 25%, 50%, 75%, and 100 % of the particles picked. Figure 6 reveals that with just 25 % of the particles, CryoFSL achieves a resolution of 7.91 Å, already comparable to or better than the full 100 % sets of EMAN2 (7.26 Å), Scipion (7.36 Å), and RELION (8 Å). As more particles are added, resolution for CryoFSL improves steadily – 6.82 Å at 50%, 6.29 Å at 75%, and 6.16 Å at 100%, but the diminished gain beyond 50 % reflects the robust and high-quality particles across its selection. In contrast, template-based methods like EMAN2 and Scipion exhibit larger performance jumps (e.g., EMAN2 improves from 9.38 Å at 25 % to 7.26 Å at 100%). The higher variability and steeper improvements observed in these methods imply a greater presence of low-quality or noisy particles that dilute reconstruction quality at lower sampling levels. Download figure Open in new tab Figure 6: Evaluation of 3D reconstruction resolution across different sampling levels of picked particles for each method using the 10081-protein dataset. (a) Bar plot illustrating the variation in 3D resolution (in Ångström) as a function of the proportion of particles used (25%, 50%, 75%, 100%) for each method. The x-axis represents the percentage of particles used from each method’s picked set, while the y-axis shows the resulting 3D reconstruction resolution in Ångström. The height of each bar reflects the resolution achieved at that sampling level; lower bars indicate better resolution. (b) Heatmap displaying the same data with numerical resolution values annotated in each cell. The x-axis represents the percentage of selected particles used for reconstruction, and the y-axis represents particle picking methods. The color scale encodes resolution values: green indicates superior (lower) resolution, while red denotes poorer (higher) resolution. The color intensity provides an at-a-glance comparison of method performance across sampling levels, with bold annotations for clarity. Together, these visualizations emphasize CryoFSL’s uniformly high-quality particle selection, achieving low resolution values even with a reduced number of particles. Evaluating the impact of annotation density on 3D resolution performance across picking methods To examine performance under limited supervision, we trained each method on five micrographs per protein while varying particle annotations from 10 % to 100%. As shown in Figure 7(a) and Supplementary Table S2 , CryoFSL demonstrates stable resolution across annotation levels, with only marginal gains at higher supervision. For example, on protein 10017, CryoFSL achieves 5.10 Å at 10 % annotation, nearly matching its 4.99 Å at 100%, and still outperforms EMAN2 (5.61 Å), Scipion (5.89 Å), and RELION (5.58 Å) trained with full annotations. A similar trend holds for protein 10081. This robustness is further quantified in Figure 7(b) , where CryoFSL shows the flattest resolution degradation slopes (e.g., −0.00197 Å/ % for 10017), indicating minimal sensitivity to annotation sparsity. In contrast, traditional methods degrade sharply as annotation decreases. These findings highlight CryoFSL’s unique ability to generalize from sparse data. Download figure Open in new tab Figure 7: Robustness analysis of cryo-EM protein particle picking methods across varying annotation percentages on two EMPIAR IDs (10017 and 10081).(a) A line plot (left) showing the 3D resolution (Å) achieved by each method with different annotation percentages. The x-axis represents the annotation percentage (%), indicating the proportion of training data available for each method. The y-axis shows the 3D resolution in angstroms (Å), where lower values indicate better performance (i.e., higher resolution structures). Different markers distinguish the two protein datasets: ⋆ (star) for protein 10017 and • (bullet) for protein 10081. Flatter curves indicate greater robustness to reduced training data, while steeper downward slopes suggest higher sensitivity to annotation scarcity. (b) A grouped bar chart (right) displaying the degradation slope (Å per %) for each method–protein combination. The x-axis lists the four approaches, and the y-axis shows the degradation slope in angstroms per percentage point, representing how much the 3D resolution deteriorates for each 1 % reduction in annotation data. Values closer to zero indicate superior robustness (minimal performance degradation with reduced training data). All slopes are negative, which is expected as performance typically degrades with less training data. Bars are grouped by method, with each protein dataset represented by different patterns: diagonal white stripes on a colored background for protein 10017, and solid-colored bars for protein 10081, as indicated in the legend. Numerical values on each bar show the exact slope coefficient. A horizontal dashed line at y = 0 serves as a reference for perfect robustness (no performance degradation). 3 Discussion Cryo-EM particle picking is a crucial step in structural biology, enabling the identification and extraction of individual protein particles from noisy micrographs to reconstruct high-resolution 3D protein structures. This process is inherently challenging due to extremely low signal-to-noise ratios, subtle contrast variations, and the presence of confounding factors like ice contamination and imaging artifacts. Existing approaches, ranging from rigid template matching to fully supervised deep learning models, often fall short in real-world scenarios characterized by protein heterogeneity, limited annotations, and variable imaging conditions. Template-based methods tend to over-pick, misclassifying noise and background as particles due to their reliance on fixed reference shapes, while modern deep learning methods require an extensive labeled dataset and often fail to generalize in low-data or high-complexity regimes. These constraints have created an accessibility barrier that limits the widespread adoption of automated particle picking, particularly for novel protein targets or laboratories with limited computational resources. To address these challenges, we introduced CryoFSL, a few-shot learning framework that leverages SAM2 enhanced with strategically designed adapter modules to achieve superior particle detection using minimal supervision. Across all evaluations, CryoFSL demonstrated remarkable robustness and flexibility not just by achieving superior average performance metrics, but by maintaining consistency across highly diverse proteins and experimental setups. Unlike conventional methods, CryoFSL was able to identify high-quality particles even when trained with as few as five micrographs, and its performance remained stable across varying levels of annotation density. This level of resilience, especially in heterogeneous or low-contrast micrographs, underscores the strength of its few-shot learning design and its ability to modulate features effectively for novel proteins. The robustness of CryoFSL stems from its unique architectural design that synergistically combines the foundational visual understanding of SAM2 with task-specific adapter modules integrated across hierarchical encoder stages. This design enables dynamic feature modulation at multiple stages, from local texture patterns distinguishing protein particles from ice crystals to global contextual information resolving ambiguous regions—rather than relying on fixed correlation thresholds that plague traditional methods. While traditional methods exhibit high variance in performance across different micrographs and datasets—evidenced by wide interquartile ranges and numerous outliers—CryoFSL consistently delivers tight performance distributions with minimal variability. Our recall success rate evaluation (96.6 % at 0.6 threshold, 69.1 % at 0.8 threshold) collectively demonstrates unprecedented stability that directly correlates with coherent 3D reconstruction quality. The framework’s exceptional performance under sparse annotation conditions— maintaining near-optimal resolution with only 10 % labeled particles while surpassing deep learning and traditional methods—reveals that the adapter modules efficiently extract maximally informative features rather than memorizing dataset-specific patterns. The visual comparisons further corroborate this: CryoFSL avoids the under-picking of CrYOLO, the redundancy of Topaz, and the aggressive over-picking of template-based methods, achieving a precise balance between recall and precision. CryoFSL transforms protein identification by implementing what we term ‘intelligent selectivity’ , achieving superior 3D reconstruction resolution (average 5.33 Å) with significantly fewer particles (~45,000) compared to traditional methods that select 2–3 times more particles, yet produce inferior results. This counterintuitive finding reveals that precision in particle selection is far more critical than quantity, exemplified by the EMPIAR-10345 results, where CryoFSL achieved 3.84 Å resolution with only 21,008 particles, while outperforming all baselines by >1.44 Å despite their substantially larger particle counts. The progressive reconstruction analysis further validates this principle, showing that CryoFSL’s particle quality is so high that using only 25 % of selected particles yields resolutions comparable to or better than template-based methods using their complete sets. This transformation occurs because the hierarchical adapter integration enables the framework to distinguish high-quality particles from background noise and artifacts with unprecedented accuracy, effectively mimicking expert microbiologist decision-making through learned feature representation. The strength of CryoFSL lies in overcoming the fundamental trade-offs that have constrained traditional approaches for decades. Template-based methods achieve high recall through aggressive over-picking but suffer from severely compromised precision due to excessive false positives, while deep learning methods such as Topaz and CrYOLO, when restricted to a few annotated micrographs, exhibited unstable behavior with high variability across proteins. In contrast, CryoFSL maintains consistently high performance across all metrics through its foundational model adaptation strategy. The framework’s few-shot learning capability (optimal performance with just 5 annotated micrographs) addresses the annotation burden that has limited accessibility of automated particle picking, while its parameter-efficient design (lightweight adapters vs. full model retraining) ensures computational feasibility. Most importantly, CryoFSL’s ability to generalize rapidly from minimal examples while maintaining stability across diverse conditions resolves the long-standing tension between automation and accuracy that has plagued the field. Despite these advances, CryoFSL has limitations that merit consideration. In extremely low-data settings, such as one-shot learning on morphologically complex proteins, the adapter layer may overfit to micrograph-specific noise patterns or sampling artifacts, limiting generalizability. Similarly, very low contrast or severely degraded signal-to-noise ratio (SNR) can render the particle signal indistinguishable from the background. Extreme aggregation, heavy overlaps, or highly irregular particle shapes break assumptions in the post-processing stage (e.g., circularity/area constraints and watershed segmentation), producing merged picks or missed centers. These are fundamentally post-processing failures rather than faults of the adapter strategy itself. Additionally, large domain shifts in imaging conditions— such as unusual microscope settings, uncommon contaminants, or carbon films not represented in the few training micrographs—can leave the frozen backbone’s priors misaligned with task appearance. In such cases, CryoFSL might require modest additional labeled micrographs to adapt effectively. Future enhancements could address these limitations through domain-specific pretraining on larger cryo-EM datasets to improve particle understanding, active learning strategies to guide optimal training example selection, or development of adaptive post-processing algorithms to better handle overlapping particles. Expanding the few-shot framework to downstream tasks like particle classification or heterogeneity analysis could also unlock broader utility in the cryo-EM pipeline. In conclusion, CryoFSL redefines data efficiency and generalizability in cryo-EM particle picking when few annotated micrographs are available. By combining the strengths of foundational vision models with the flexibility of few-shot adaptation, it bridges the gap between precision and practicality - enabling robust particle identification with minimal annotations, strong generalization across proteins, and higher downstream reconstruction quality. This represents a pivotal step toward scalable and reliable cryo-EM analysis in real-world research environments. 4 Methods Dataset We evaluate our approach using the CryoPPP dataset, a large and diverse collection of expertly annotated cryo-EM micrographs curated from the Electron Microscopy Public Image Archive (EMPIAR). CryoPPP encompasses a wide range of protein types, molecular sizes, particle shapes, and imaging conditions, including low signal-to-noise ratios, ice contamination, carbon films, and heterogeneous particle distributions. For our few-shot experiments, we select a representative subset of six proteins from CryoPPP (EMPIAR – 10028, 10081, 10017, 10093, 10345, 11056). These datasets were chosen to reflect variability in particle morphology and micrograph complexity. For each protein, we randomly sample 1, 5, and 10 labeled micrographs to simulate 1-shot, 5-shot, and 10-shot scenarios, respectively. The details of the dataset are presented in Supplementary Table S1. Evaluation of particle picking To quantitatively assess the particle picking performance of CryoFSL and competing approaches, we adopted four widely used metrics: precision, recall, F1-score and Intersection over Union (IoU). Precision measures the proportion of correctly identified particles among all predicted particles, whereas recall measures the proportion of ground truth particles that were correctly detected. Here, true positive (TP) denotes the predicted particles correctly matched to ground truth, false positive (FP) denotes the predicted particles with no matching ground truth, and false negative (FN) denotes the ground truth particles that were missed. F1-score is the harmonic mean of precision and recall, proving a balanced metric for performance. Intersection over Union (IoU) is a measure of the spatial overlap between predicted and ground truth particles. where P D represents the predicted particles, and P T represents the ground truth particles. To further validate the impact of particle picking accuracy on downstream structure determination, we also report the 3D reconstruction resolution obtained using particles picked by each method. This metric reflects the quality of structural recovery and is reported in Angstroms (Å), where lower values indicate higher quality. Statistical Analysis To evaluate the statistical significance of differences between CryoFSL and competing methods, p-values were computed using paired non-parametric tests applied on a per-micrograph basis for each of the six EMPIAR datasets. Specifically, the Wilcoxon signed-rank test was employed to compare paired metric values (precision or recall), assessing the null hypothesis of no difference with CryoFSL as the reference method. Effect sizes were calculated to quantify the magnitude and direction of differences using the rank-biserial correlation [ 44 ], computed as , where z is the Wilcoxon test statistic and N is the number of paired observations. Given multiple comparisons (5 competitor methods × 6 datasets = 30 tests per metric), raw p-values were adjusted for false discovery using the Benjamini-Hochberg procedure [ 45 ] for controlling the false discovery rate (FDR). All statistical computations were implemented in Python using the scipy.stats.wilcoxon function for raw p-values and effect sizes, and statsmodels.stats.multitest.multipletests with the ‘fdr_bh’ method for adjustments. Final results, including median values, p-values, adjusted p-values, and effect sizes, were tabulated to provide a comprehensive comparison across datasets and methods. Overall framework Overall framework Our framework leverages the Segment Anything Model 2 (SAM2) as the backbone for automated protein particle picking in Cryo-EM micrographs. SAM2 features a Hiera-large hierarchical vision transformer encoder, which efficiently captures multi-scale visual representations through progressive down-sampling and deepening of features. This hierarchical structure enables the model to integrate both local texture and global context - crucial for detecting protein particles in noisy, low-contrast micrographs. The proposed architecture consists of four main components: the frozen SAM2 Hiera-large image encoder for robust feature extraction, novel lightweight adapter modules integrated across the encoder’s stages for task-specific adaptation, the SAM2 mask decoder for segmentation, and a post-processing pipeline to extract particle coordinates from the segmentation mask. Unlike the original SAM [ 46 ]/SAM2, CryoFSL omits the prompt encoder and operates without any interactive inputs (e.g., points, boxes, or masks), enabling fully automated particle detection. Adapter Integration in SAM2 Image Encoder To adapt the frozen SAM2 image encoder to the particle picking task, we used parameter-efficient adapter modules following the approach of Chen et al. [ 47 ] and He et al. [ 48 ], by strategically placing them across all four hierarchical stages of the encoder. Each adapter consists of an unshared linear layer ( L unshared ) that captures stage-specific task adaptations, followed by GELU activation for non-linearity, and a shared linear layer ( L shared ) that ensures consistent feature dimensionality across stages. The integration of adapters into the transformer blocks is carefully aligned with the hierarchical structure of the SAM2 image encoder, where each stage processes features of increasing abstraction and semantic complexity. The varying capacities of adapters A 1 through A 4 reflect this hierarchical progression: adapter A 1 operates on the highest spatial resolution features (Stage 1, 144-dimensional embeddings) with two lightweight adapters. At Stage 2, adapter A 2 processes 288-dimensional embeddings using six adapters. At Stage 3, adapter A 3 handles 576-dimensional embeddings with 36 adapters—the largest capacity among the stages—consistent with the deeper representation complexity. Finally, adapter A 4 is integrated into Stage 4, where 1152-dimensional embeddings are projected using four adapters. This architectural design ensures that each hierarchical level benefits from appropriately scaled adaptation capacity, balancing spatial detail in the early stages with semantic richness in the deeper layers. Let i ∈ {1, 2, 3, 4} denote the stage index, and j ∈ {0, 1, …, depths[I − 1]} represent the block index within that stage. Let feat ( i,j ) be the input feature to block j in stage i . Each adapter consists of an unshared linear layer specific to block, GELU activation function σ, and a shared linear layer L shared that is common across all adapters. The adapter output for block j in stage i is formulated as: Or, in simplified notation: where and W shared are the learnable weight metrices of the unshared and shared layers, respectively. The task-adapted feature is then added to the original input feature via a residual connection and the final output of block j in stage i is: This residual formulation ensures the adapter introduces task-specific modulation without disrupting the pretrained encoder’s underlying representations. Finally, the updated feature is forwarded through the transformer block BLK ( i,j ) for further processing as: This integration strategy allows each block in the frozen SAM2 encoder to benefit from targeted, low-parameter task adaptation, enabling the model to generalize effectively from a limited number of labeled examples without full fine-tuning of the encoder weights. Post-processing and particle localization To extract accurate particle coordinates from SAM2-generated masks, we employ a robust multi-stage post-processing pipeline combining distance transform, multi-scale peak detection, and watershed segmentation. Geometric filtering based on circularity and area constraints ensures biological plausibility, while a dual-pass strategy enables recovery of closely packed or overlapping particles without duplication. A schematic overview and detailed algorithm are provided in Supplementary Figure S10 , and Supplementary Algorithm S1 , respectively. Training CryoFSL was trained using five annotated micrographs from each of the EMPIAR datasets. All micrographs were resized to 1024 × 1024 pixels to align with the input requirements of the SAM2 model. Training was performed using a batch size of 2, the Adam optimizer [ 49 ] with a learning rate of 1e − 4 , and a maximum of 4000 epochs. The total number of trainable parameters in the model was approximately 3.94 million. For optimization, the balanced binary cross-entropy loss with logits (BCEWithLogitsLoss) was used. During training, the SAM2 image encoder was frozen, and only the adapter module and SAM2 mask decoder were updated. All experiments were implemented in Python 3.11.0, leveraging PyTorch 2.3.0 for model construction and training, and CUDA 11.8 for GPU acceleration. Template-based methods (EMAN2, RELION, and Scipion) were evaluated using their standard correlation-based workflows. For each protein, templates were generated from the same five labeled micrographs and applied to the test sets with default settings, adjusting only diameter and correlation thresholds when required. No additional fine-tuning or external data were used. Deep learning approaches were likewise trained under identical few-shot configurations. For CrYOLO, we employed PhosaurusNet architecture with an input image of size 768 × 768 and trained for 100 epochs with a learning rate of 1e − 4 . Similarly, Topaz used a ResNet backbone, configured with 32 units in the initial layers and batch normalization, trained for 50 epochs with a learning rate of 2e − 4 using the GE-binomial loss. All approaches were then applied to the same test datasets, ensuring a direct and fair comparison with CryoFSL under sparse annotation regimes. Author contributions Study conceptualization (D.X., J.C.); supervision (D.X., J.C.); funding acquisition (D.X., J.C.); methodology and experimental design (B.P.); data preparation and formal analysis (B.P., R.G., A.D.); initial manuscript (B.P.); validation and visualization (B.P., R.G., A.D.). All authors contributed to the revision of the manuscript and have approved the final version. Funding This work is partially supported by the National Institutes of Health (grant R35GM126985 to DX) and grant R01GM146340 to JC. Data and Code Availability The dataset for this study is available on https://github.com/BioinfoMachineLearning/cryoppp . The source code is available at https://github.com/biplabpoudel25/CryoFSL . References [1]. ↵ Dhakal , A. , McKay , C. , Tanner , J. J. , & Cheng , J. ( 2022 ). Artificial intelligence in the prediction of protein– ligand interactions: recent advances and future directions . Briefings in bioinformatics , 23 ( 1 ), bbab476 . doi: 10.1093/bib/bbab476 OpenUrl CrossRef PubMed [2]. ↵ Rasheed , F. , Markgren , J. , Hedenqvist , M. , & Johansson , E. ( 2020 ). Modeling to understand plant protein structure-function relationships-implications for seed storage proteins . Molecules , 25 ( 4 ), 873 . doi: 10.3390/molecules25040873 OpenUrl CrossRef PubMed [3]. ↵ Gyawali , R. , Dhakal , A. , & Cheng , J. ( 2025 ). Multimodal deep learning integration of cryo-EM and AlphaFold3 for high-accuracy protein structure determination . bioRxiv , 2025 – 07 . doi: 10.1101/2025.07.03.663071 OpenUrl Abstract / FREE Full Text [4]. ↵ Vilas , J. L. , Carazo , J. M. , & Sorzano , C. O. S. ( 2022 ). Emerging themes in CryoEM-Single particle analysis image processing . Chemical Reviews , 122 ( 17 ), 13915 – 13951 . doi: 10.1021/acs.chemrev.1c00850 OpenUrl CrossRef PubMed [5]. ↵ Bai , X. C. , McMullan , G. , & Scheres , S. H. ( 2015 ). How cryo-EM is revolutionizing structural biology . Trends in biochemical sciences , 40 ( 1 ), 49 – 57 . doi: 10.1016/j.tibs.2014.10.005 OpenUrl CrossRef PubMed Web of Science [6]. ↵ Milne , J. L. , Borgnia , M. J. , Bartesaghi , A. , Tran , E. E. , Earl , L. A. , Schauder , D. M. , … & Subramaniam , S. ( 2012 ). Cryo-electron microscopy–a primer for the non-microscopist . The FEBS journal , 280 ( 1 ), 28 – 45 . doi: 10.1111/febs.12078 OpenUrl CrossRef PubMed [7]. ↵ Chung , J. M. , Durie , C. L. , & Lee , J. ( 2022 ). Artificial intelligence in cryo-electron microscopy . Life , 12 ( 8 ), 1267 . doi: 10.3390/life12081267 OpenUrl CrossRef PubMed [8]. ↵ Bendory , T. , Bartesaghi , A. , & Singer , A. ( 2020 ). Single-particle cryo-electron microscopy: Mathematical theory, computational challenges, and opportunities . IEEE signal processing magazine , 37 ( 2 ), 58 – 76 . doi: 10.1109/msp.2019.2957822 OpenUrl CrossRef PubMed [9]. ↵ Moriya , T. , Saur , M. , Stabrin , M. , Merino , F. , Voicu , H. , Huang , Z. , … & Gatsogiannis , C. ( 2017 ). High-resolution single particle analysis from electron cryo-microscopy images using SPHIRE . Journal of visualized experiments:JoVE , ( 123 ), 55448 . doi: 10.3791/55448 OpenUrl CrossRef PubMed [10]. ↵ Agard , D. , Cheng , Y. , Glaeser , R. M. , & Subramaniam , S. ( 2014 ). Single-particle cryo-electron microscopy (cryo-EM): Progress, challenges, and perspectives for further improvement . Advances in imaging and electron physics , 185 , 113 – 137 . doi: 10.1016/B978-0-12-800144-8.00002-1 OpenUrl CrossRef [11]. ↵ Baxter , W. T. , Grassucci , R. A. , Gao , H. , & Frank , J. ( 2009 ). Determination of signal-to-noise ratios and spectral SNRs in cryo-EM low-dose imaging of molecules . Journal of structural biology , 166 ( 2 ), 126 – 132 . doi: 10.1016/j.jsb.2009.02.012 OpenUrl CrossRef PubMed [12]. ↵ Skalidis , I. , Kyrilis , F. L. , Tüting , C. , Hamdi , F. , Chojnowski , G. , & Kastritis , P. L. ( 2022 ). Cryo-EM and artificial intelligence visualize endogenous protein community members . Structure , 30 ( 4 ), 575 – 589 . doi: 10.1016/j.str.2022.01.001 OpenUrl CrossRef [13]. ↵ Tang , G. , Peng , L. , Baldwin , P. R. , Mann , D. S. , Jiang , W. , Rees , I. , & Ludtke , S. J. ( 2007 ). EMAN2: an extensible image processing suite for electron microscopy . Journal of structural biology , 157 ( 1 ), 38 – 46 . doi: 10.1016/j.jsb.2006.05.009 OpenUrl CrossRef PubMed Web of Science [14]. ↵ Scheres , S. H. ( 2012 ). RELION: implementation of a Bayesian approach to cryo-EM structure determination . Journal of structural biology , 180 ( 3 ), 519 – 530 . doi: 10.1016/j.jsb.2012.09.006 OpenUrl CrossRef PubMed [15]. ↵ De la Rosa-Trevin , J. M. , Quintana , A. , del Cano , L. A. , Zaldívar , A. , Foche , I. , Gutiérrez , J. , … & Carazo , J.M. ( 2016 ). Scipion: A software framework toward integration, reproducibility and validation in 3D electron microscopy . Journal of structural biology , 195 ( 1 ), 93 – 99 . doi: 10.1016/j.jsb.2016.04.010 OpenUrl CrossRef PubMed [16]. ↵ Voss , N. R. , Yoshioka , C. K. , Radermacher , M. , Potter , C. S. , & Carragher , B. ( 2009 ). DoG Picker and TiltPicker: software tools to facilitate particle selection in single particle electron microscopy . Journal of structural biology , 166 ( 2 ), 205 – 213 . doi: 10.1016/j.jsb.2009.01.004 OpenUrl CrossRef PubMed [17]. ↵ Lander , G. C. , Stagg , S. M. , Voss , N. R. , Cheng , A. , Fellmann , D. , Pulokas , J. , … & Carragher , B. ( 2009 ). Appion: an integrated, database-driven pipeline to facilitate EM image processing . Journal of structural biology , 166 ( 1 ), 95 – 102 . doi: 10.1016/j.jsb.2009.01.002 OpenUrl CrossRef PubMed Web of Science [18]. ↵ Zhan , Z. H. , Hong , J. , Li , J. Y. , Wang , C. , He , L. , Xu , Z. , & Zhang , J. ( 2025 ). Artificial intelligence-based methods for protein structure prediction: a survey . Artificial Intelligence Review , 58 ( 10 ), 1 – 36 . doi: 10.1007/s10462-025-11325-4 OpenUrl CrossRef [19]. ↵ Dhakal , A. , Gyawali , R. , Wang , L. , & Cheng , J. ( 2025 ). Artificial intelligence in cryo-EM protein particle picking: recent advances and remaining challenges . Briefings in Bioinformatics , 26 ( 1 ), bbaf011 . doi: 10.1093/bib/bbaf011 OpenUrl CrossRef [20]. ↵ Zhang , X. , Zhao , T. , Chen , J. , Shen , Y. , & Li , X. ( 2022 ). EPicker is an exemplar-based continual learning approach for knowledge accumulation in cryoEM particle picking . Nature Communications , 13 ( 1 ), 2468 . doi: 10.1038/s41467-022-29994-y OpenUrl CrossRef [21]. ↵ Zhang , C. , Cheng , Y. , Feng , K. , Zhang , F. , Han , R. , & Feng , J. ( 2025 ). UPicker: a semi-supervised particle picking transformer method for cryo-EM micrographs . Briefings in Bioinformatics , 26 ( 1 ), bbae636 . doi: 10.1093/bib/bbae636 OpenUrl CrossRef [22]. ↵ Zamanos , A. , Koromilas , P. , Bouritsas , G. , Kastritis , P. L. , & Panagakis , Y. ( 2025 ). Self-supervised learning for generalizable particle picking in cryo-EM micrographs . Cell reports methods , 5 ( 7 ), 101089 . doi: 10.1016/j.crmeth.2025.101089 OpenUrl CrossRef PubMed [23]. ↵ Heimowitz , A. , Andén , J. , & Singer , A. ( 2018 ). APPLE picker: Automatic particle picking, a low-effort cryo-EM framework . Journal of structural biology , 204 ( 2 ), 215 – 227 . doi: 10.1016/j.jsb.2018.08.012 OpenUrl CrossRef PubMed [24]. ↵ Wang , F. , Gong , H. , Liu , G. , Li , M. , Yan , C. , Xia , T. , … & Zeng , J. ( 2016 ). DeepPicker: A deep learning approach for fully automated particle picking in cryo-EM . Journal of structural biology , 195 ( 3 ), 325 – 336 . doi: 10.1016/j.jsb.2016.07.006 OpenUrl CrossRef PubMed [25]. ↵ Al-Azzawi , A. , Ouadou , A. , Tanner , J. J. , & Cheng , J. ( 2019 ). AutoCryoPicker: an unsupervised learning approach for fully automated single particle picking in Cryo-EM images . BMC bioinformatics , 20 ( 1 ), 326 . doi: 10.1186/s12859-019-2926-y OpenUrl CrossRef [26]. ↵ Tegunov , D. , & Cramer , P. ( 2019 ). Real-time cryo-electron microscopy data preprocessing with Warp . Nature methods , 16 ( 11 ), 1146 – 1152 . doi: 10.1038/s41592-019-0580-y OpenUrl CrossRef PubMed [27]. ↵ George , B. , Assaiya , A. , Roy , R. J. , Kembhavi , A. , Chauhan , R. , Paul , G. , … & Philip , N. S. ( 2021 ). CASSPER is a semantic segmentation-based particle picking algorithm for single-particle cryo-electron microscopy . Communications biology , 4 ( 1 ), 200 . doi: 10.1038/s42003-021-01721-1 OpenUrl CrossRef [28]. ↵ Bepler , T. , Morin , A. , Rapp , M. , Brasch , J. , Shapiro , L. , Noble , A. J. , & Berger , B. ( 2019 ). Positive-unlabeled convolutional neural networks for particle picking in cryo-electron micrographs . Nature methods , 16 ( 11 ), 1153 – 1160 . doi: 10.1038/s41592-019-0575-8 OpenUrl CrossRef PubMed [29]. ↵ Wagner , T. , Merino , F. , Stabrin , M. , Moriya , T. , Antoni , C. , Apelbaum , A. , … & Raunser , S. ( 2019 ). SPHIRE-crYOLO is a fast and accurate fully automated particle picker for cryo-EM . Communications biology , 2 ( 1 ), 218 . doi: 10.1038/s42003-019-0437-z OpenUrl CrossRef PubMed [30]. ↵ Xu , C. , Zhan , X. , & Xu , M. ( 2025 , February ). CryoMAE: few-shot cryo-EM particle picking with masked Autoencoders . In 2025 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) (pp. 3876 – 3885 ). IEEE . [31]. ↵ Gyawali , R. , Dhakal , A. , Wang , L. , & Cheng , J. ( 2024 ). CryoSegNet: accurate cryo-EM protein particle picking by integrating the foundational AI image segmentation model and attention-gated U-Net . Briefings in Bioinformatics , 25 ( 4 ), bbae282 . doi: 10.1093/bib/bbae282 OpenUrl CrossRef PubMed [32]. ↵ Dhakal , A. , Gyawali , R. , Wang , L. , & Cheng , J. ( 2024 ). CryoTransformer: a transformer model for picking protein particles from Cryo-EM micrographs . Bioinformatics , 40 ( 3 ), btae109 . doi: 10.1093/bioinformatics/btae109 OpenUrl CrossRef [33]. ↵ Ravi , N. , Gabeur , V. , Hu , Y. T. , Hu , R. , Ryali , C. , Ma , T. , … & Feichtenhofer , C. ( 2024 ). Sam 2: Segment anything in images and videos . arXiv preprint arxiv: 2408.00714 . doi: 10.48550/arXiv.2408.00714 OpenUrl CrossRef [34]. ↵ Punjani , A. , Rubinstein , J. L. , Fleet , D. J. , & Brubaker , M. A. ( 2017 ). cryoSPARC: algorithms for rapid unsuper-vised cryo-EM structure determination . Nature methods , 14 ( 3 ), 290 – 296 . doi: 10.1038/nmeth.4169 OpenUrl CrossRef PubMed [35]. ↵ Dhakal , A. , Gyawali , R. , Wang , L. , & Cheng , J. ( 2023 ). A large expert-curated cryo-EM image dataset for machine learning protein particle picking . Scientific Data , 10 ( 1 ), 392 . doi: 10.1038/s41597-023-02280-2 OpenUrl CrossRef PubMed [36]. ↵ Iudin , A. , Korir , P. K. , Somasundharam , S. , Weyand , S. , Cattavitello , C. , Fonseca , N. , … & Patwardhan , A. ( 2023 ). EMPIAR: the electron microscopy public image archive . Nucleic Acids Research , 51 ( d1 ), d1503 – d1511 . doi: 10.1093/nar/gkac1062 OpenUrl CrossRef PubMed [37]. Wong , W. , Bai , X. C. , Brown , A. , Fernandez , I. S. , Hanssen , E. , Condron , M. , … & Scheres , S. H. ( 2014 ). Cryo-EM structure of the Plasmodium falciparum 80S ribosome bound to the anti-protozoan drug emetine . elife , 3 , e03080 . doi: 10.7554/elife.03080 OpenUrl CrossRef PubMed [38]. Lee , C. H. , & MacKinnon , R. ( 2017 ). Structures of the human HCN1 hyperpolarization-activated channel . Cell , 168 ( 1 ), 111 - 120.e11 . doi: 10.1016/j.cell.2016.12.023 OpenUrl CrossRef PubMed [39]. Scheres , S. H. ( 2015 ). Semi-automated selection of cryo-EM particles in RELION-1.3 . Journal of structural biology , 189 ( 2 ), 114 – 122 . doi: 10.1016/j.jsb.2014.11.010 OpenUrl CrossRef PubMed [40]. Campbell , M. G. , Cormier , A. , Ito , S. , Seed , R. I. , Bondesson , A. J. , Lou , J. , … & Nishimura , S. L. ( 2020 ). Cryo-EM reveals integrin-mediated TGF-β activation without release from latent TGF-β . Cell , 180 ( 3 ), 490 - 501.e16 . doi: 10.1016/j.cell.2019.12.030 OpenUrl CrossRef PubMed [41]. Jin , P. , Bulkley , D. , Guo , Y. , Zhang , W. , Guo , Z. , Huynh , W. , … & Cheng , Y. ( 2017 ). Electron cryo-microscopy structure of the mechanotransduction channel NOMPC . Nature , 547 ( 7661 ), 118 – 122 . doi: 10.1038/nature22981 OpenUrl CrossRef PubMed [42]. Asami , J. , Kimura , K. T. , Fujita-Fujiharu , Y. , Ishida , H. , Zhang , Z. , Nomura , Y. , … & Ohto , U. ( 2022 ). Structure of the bile acid transporter and HBV receptor NTCP . Nature , 606 ( 7916 ), 1021 – 1026 . doi: 10.1038/s41586-022-04845-4 OpenUrl CrossRef PubMed [43]. ↵ Wilcoxon , F. ( 1945 ). Individual Comparisons by Ranking Methods . Biometrics Bulletin , 1 ( 6 ), 80 – 83 . doi: 10.2307/3001968 OpenUrl CrossRef [44]. ↵ Cureton , E. E. ( 1956 ). Rank-biserial correlation . Psychometrika , 21 ( 3 ), 287 – 290 . doi: 10.1007/bf02289138 OpenUrl CrossRef Web of Science [45]. ↵ Benjamini , Y. , & Hochberg , Y. ( 1995 ). Controlling the false discovery rate: a practical and powerful approach to multiple testing . Journal of the Royal statistical society: series B (Methodological) , 57 ( 1 ), 289 – 300 . http://www.jstor.org/stable/2346101 OpenUrl CrossRef PubMed Web of Science [46]. ↵ Kirillov , A. , Mintun , E. , Ravi , N. , Mao , H. , Rolland , C. , Gustafson , L. , … & Girshick , R. ( 2023 ). Segment anything . In Proceedings of the IEEE/CVF international conference on computer vision (pp. 4015-4026) . doi: 10.1109/ICCV51070.2023.00371 OpenUrl CrossRef [47]. ↵ Chen , T. , Lu , A. , Zhu , L. , Ding , C. , Yu , C. , Ji , D. , … & Zang , Y. ( 2024 ). Sam2-adapter: Evaluating & adapting segment anything 2 in downstream tasks: Camouflage, shadow, medical image segmentation, and more . arXiv preprint arxiv: 2408.04579 . doi: 10.48550/arXiv.2408.04579 OpenUrl CrossRef [48]. ↵ He , F. , Yang , Z. , Gao , M. , Poudel , B. , Dhas , N. S. E. S. , Gyawali , R. , … & Xu , D. ( 2024 , November ). Adapting segment anything model (SAM) through prompt-based learning for enhanced protein identification in cryo-EM micrographs . In 2024 IEEE International Conference on Medical Artificial Intelligence (MedAI) (pp. 9 - 20 ). IEEE. doi: 10.1109/medai62885.2024.00009 OpenUrl CrossRef [49]. ↵ Kingma , D. P. , & Ba , J. ( 2014 ). Adam: A method for stochastic optimization . arXiv preprint arxiv: 1412.6980 . doi: 10.48550/arXiv.1412.6980 OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted September 21, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following CryoFSL: An Annotation-Efficient, Few-Shot Learning Framework for Robust Protein Particle Picking in Cryo-EM Micrographs Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share CryoFSL: An Annotation-Efficient, Few-Shot Learning Framework for Robust Protein Particle Picking in Cryo-EM Micrographs Biplab Poudel , Rajan Gyawali , Ashwin Dhakal , Jianlin Cheng , Dong Xu bioRxiv 2025.09.19.677446; doi: https://doi.org/10.1101/2025.09.19.677446 Share This Article: Copy Citation Tools CryoFSL: An Annotation-Efficient, Few-Shot Learning Framework for Robust Protein Particle Picking in Cryo-EM Micrographs Biplab Poudel , Rajan Gyawali , Ashwin Dhakal , Jianlin Cheng , Dong Xu bioRxiv 2025.09.19.677446; doi: https://doi.org/10.1101/2025.09.19.677446 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7621) Biochemistry (17645) Bioengineering (13867) Bioinformatics (41872) Biophysics (21416) Cancer Biology (18549) Cell Biology (25443) Clinical Trials (138) Developmental Biology (13360) Ecology (19866) Epidemiology (2067) Evolutionary Biology (24289) Genetics (15587) Genomics (22470) Immunology (17706) Microbiology (40314) Molecular Biology (17142) Neuroscience (88456) Paleontology (666) Pathology (2826) Pharmacology and Toxicology (4815) Physiology (7634) Plant Biology (15111) Scientific Communication and Education (2042) Synthetic Biology (4285) Systems Biology (9812) Zoology (2268)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.