Full text
39,385 characters
· extracted from
preprint-html
· click to expand
Few-Shot Learning for Prostate Cancer Detection on MRI: Comparative Analysis with Radiologists’ Performance | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Few-Shot Learning for Prostate Cancer Detection on MRI: Comparative Analysis with Radiologists’ Performance View ORCID Profile Yosuke Yamagishi , View ORCID Profile Yasutaka Baba , Jun Suzuki , View ORCID Profile Yoshitaka Okada , View ORCID Profile Kent Kanao , View ORCID Profile Masafumi Oyama doi: https://doi.org/10.1101/2025.01.19.25320503 Yosuke Yamagishi 1 Saitama Medical University International Medical Center, Department of Diagnostic Radiology 3 Department of Radiology and Biomedical Engineering, Graduate School of Medicine, The University of Tokyo Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yosuke Yamagishi For correspondence: yamagishi-yosuke0115{at}g.ecc.u-tokyo.ac.jp Yasutaka Baba 1 Saitama Medical University International Medical Center, Department of Diagnostic Radiology Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yasutaka Baba Jun Suzuki 1 Saitama Medical University International Medical Center, Department of Diagnostic Radiology Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yoshitaka Okada 1 Saitama Medical University International Medical Center, Department of Diagnostic Radiology Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yoshitaka Okada Kent Kanao 2 Saitama Medical University International Medical Center, Department of Urological Oncology Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kent Kanao Masafumi Oyama 2 Saitama Medical University International Medical Center, Department of Urological Oncology Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Masafumi Oyama Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract Background Deep-learning models for prostate cancer detection often require large datasets, which can be challenging to obtain and may lead to domain shift issues in various clinical settings. Purpose This study aimed to develop a deep-learning model for prostate cancer detection on magnetic resonance images using few-shot learning and compare its performance with radiologists. Materials and Methods This retrospective study used 99 cases (80 positive, 19 negative) of confirmed prostate cancer, diagnosed through needle biopsy from 2017 to 2022, with 20 cases for training, 5 for validation, and 74 for testing. The 2D transformer model was trained on T2-weighted, diffusion-weighted, and apparent diffusion coefficient map images. Model predictions were compared between the two radiologists using the Matthews correlation coefficient (MCC) and F1 score, and the bootstrap method was used to calculate 95% confidence intervals (CIs). Results Seventy-four patients (mean age, 71 years ± 8; 60 men) were included in the test set. The model achieved an MCC of 0.297 (95% CI: 0.095–0.474) and F1 score of 0.707 (95% CI: 0.598–0.847). Radiologist 1 had an MCC of 0.276 (95% CI: 0.054–0.484) and an F1 score of 0.741 (95% CI: 0.632–0.832), while Radiologist 2 had an MCC of 0.504 (95% CI: 0.289–0.703) and an F1 score of 0.871 (95% CI: 0.800–0.931). The performance of the model was not significantly different from that of Radiologist 1 (MCC difference: 0.021, 95% CI: −0.270–0.306; F1 score difference: −0.034, 95% CI: −0.153–0.078), but was lower than that of Radiologist 2 (F1 difference: −0.16, 95% CI: −0.287– - 0.061). Conclusion A deep-learning model trained on only 20 cases achieved a performance comparable to one radiologist in detecting prostate cancer on magnetic resonance images, demonstrating the potential of few-shot learning in addressing domain shift challenges. Key Results A deep learning model for prostate cancer detection on MRI was developed using only 20 training cases. The model achieved performance comparable to one radiologist (MCC: 0.297 vs 0.276) but lower than another (F1: 0.707 vs 0.871). Few-shot learning demonstrated potential for addressing domain shift challenges in medical imaging AI. Summary Statement Few-shot learning enables development of prostate cancer detection models on MRI with performance comparable to radiologists, using minimal training data. Introduction The development of deep-learning models for prostate cancer detection has progressed rapidly in recent years. Many researchers have constructed high-performance models using large-scale datasets, paving the way for clinical applications ( 1 – 6 ). In many cases, these models have demonstrated performance comparable to or surpassing that of radiologists ( 7 ). However, in the field of medical image diagnosis, domain shift can occur owing to various factors, such as differences in imaging equipment, patients’ racial backgrounds, and variations in imaging protocols across facilities ( 8 ). This domain shift can cause models that perform well in one environment to underperform significantly in another. Therefore, it is crucial to fine-tune the models at each facility and adapt them to specific environments. However, collecting cases at medical institutions is often difficult and building large-scale datasets is not easily achievable. Therefore, it is extremely important to perform fine-tuning using a small number of cases. Previous studies have investigated the relationship between the model performance and the amount of training data ( 7 ). However, research directly comparing the model performance under few-shot learning conditions with the diagnostic accuracy of radiologists is limited. To address these challenges, we propose an enhanced approach that maximizes the utility of limited magnetic resonance images (MRI) datasets. Our method expands upon established methodologies and previous techniques by adopting a 2D model to leverage the multi-slice nature of MRI data ( 9 ), effectively increasing the amount of training data per case. We streamlined the data preparation process using an efficient slice-by-slice labeling system based on the biopsy results. Despite working with only 20 cases, our goal was to develop a deep-learning model that achieves diagnostic accuracy comparable to that of experienced radiologists. We investigated the effectiveness of fine-tuning techniques using limited data and explored how these models can be realistically implemented in clinical settings. Herein, we present a method for each medical facility to efficiently optimize models using their own data, thereby improving the diagnostic accuracy and efficiency. This will provide a path for solving the problem of domain shift while reducing the costs associated with large-scale data collection and labeling. By demonstrating that a model can be developed with minimal data from a facility, we demonstrated that optimal model development is possible for each facility, regardless of the domain shift. Consequently, we expect that any facility can develop a model to enhance diagnostic accuracy and efficiency by collecting a small number of cases and performing simple labeling. Materials and Methods This retrospective study was conducted at a single institution and approved by the Institutional Review Board of our institution. This study adhered to the guidelines outlined in the Checklist for Artificial Intelligence in Medical Imaging (CLAIM) 2024 update ( 10 ). Dataset We used multiparametric MRI images comprising T2-weighted images (T2WI), diffusion-weighted images (DWI), and apparent diffusion coefficient (ADC) maps acquired at our hospital between 2017 and 2022. MRI examinations were performed using a Philips Achieva scanner (Philips, Amsterdam, Netherlands) between April 2017 and July 2022. We randomly selected 100 cases, all of which had undergone MRI-targeted prostate biopsy with a subsequent pathological diagnosis. One case was excluded because of difficulty in evaluation owing to bleeding from the pre-MRI biopsy. The remaining 99 cases were divided into the training and test datasets. The training dataset was further divided into training and validation subsets. Figure 1 shows a flowchart of patient selection and data distribution. Download figure Open in new tab Figure 1: Flowchart of patient inclusion and data distribution in the study of multiparametric MRI data for prostate cancer detection. The flowchart illustrates the selection process from 100 initial cases to 99 analyzed cases, with one patient excluded because of post-biopsy bleeding. This shows the division into training (n = 25) and test (n = 74) datasets, with further subdivision of the training set into training (n = 20) and validation (n = 5) subsets. Positive and negative biopsy results are obtained for each patient subset. MRI, magnetic resonance imaging; T2WI, T2-weighted image; DWI, diffusion-weighted image; ADC, apparent diffusion coefficient. Data Preprocessing The magnetic resonance (MR) image data varied in resolution, ranging from 224 × 224 to 1,008 × 1,008 pixels. To standardize the input, all the images were resized to 224 × 224 pixels using bilinear interpolation. The pixel values were normalized to 256 levels (0-255 for each pixel). The model input consisted of T2WIs, DWIs, and ADC maps stacked in the channel direction as NumPy arrays ( 11 ). Model Training An overview of the proposed approach is shown in Figure 2 . Download figure Open in new tab Figure 2: MR image analysis pipelines. This figure presents two interconnected pipelines for magnetic resonance (MR) image analysis: the upper slice-level pipeline for training processes of individual MR image slices through channel-wise stacking, and a Swin Transformer, producing slice-level predictions. The lower case-level prediction aggregates multiple stacked images, processes them through a slice-trained Swin Transformer, and selects the top three predictions to generate a case-level prediction. Both pipelines use the same 2D image models and output probability values from 0 to 1 for positive/negative predictions. MRI, magnetic resonance imaging; T2WI, T2-weighted image; DWI, diffusion-weighted image; ADC, apparent diffusion coefficient. To train the model with a small number of MR images, we adopted a 2D model instead of 3D models such as 3D convolutional neural network (CNN) models ( 12 ), which are frequently used in conventional MR image classification. Using a 2D model, the amount of data input to the model was increased by labeling each slice. The training data were labeled for each slice, depending on the presence or absence of lesions, and 645 2D image data points (96 lesion-positive, 549 lesion-negative) were obtained. We used the Swin Transformer Small ( 13 ) with a patch size of 4 and an image size of 224. The pre-trained weights of the model were initialized using the ImageNet-22k dataset ( 14 ) and fine-tuned on the ImageNet-1k dataset ( 15 ), which is publicly available in the Timm library ( 16 ). During training, we used a batch size of 8 and trained the model for 10 epochs. The AdamW optimizer ( 17 ) was employed, and a warm-up strategy was applied to gradually increase the learning rate to 0.0001 during the first epoch, followed by gradual decay for the remaining 9 epochs. A binary cross-entropy loss function was used. The Albumentation library ( 18 ) was used for data augmentation during the training. The augmentation techniques applied included random resizing and cropping between 85% and 100% of the original scale, random rotation with a maximum angle of 15 °, random horizontal and vertical flipping, random hue and saturation shifts, and cutout with a maximum of 5 randomly placed black patches up to 5% of the image size. After applying these data augmentations, normalization was performed using the mean and standard deviation of the RGB values from the ImageNet dataset, and the normalized images were used as inputs to the model. The values predicted by the model were obtained for each slice. The three highest predicted values for each case were extracted and their average values were calculated as the predicted values for each case. The experiments were conducted using Python 3.10 and PyTorch 2.0 ( 19 ). Statistical Analysis The performance of the obtained model is evaluated using multiple metrics. The area under the receiver operating characteristic curve (ROC-AUC), precision, recall, and accuracy were calculated. However, to address the challenges of imbalanced datasets, we primarily focused on the Matthews correlation coefficient (MCC) and the F1 score for performance comparisons. All statistical metrics were calculated using the scikit-learn library ( 20 ). The bootstrap method was used to calculate the 95% Credible intervals (CIs) for all metrics. The performance of the model, particularly in terms of F1 score and MCC, was compared with the reading results of two radiologists (hereinafter referred to as Radiologist 1 and Radiologist 2, with 15 and 8 years of experience, respectively) to assess its potential clinical utility using a previously validated method ( 21 ). Visualization of Model Attention In addition, gradient-weighted class activation mapping (GradCAM) ( 22 ) was used to visualize the importance of pixels contributing to the prediction of a particular class using the gradient information of the model. The gradient of the model output was calculated and weighted for each channel of the feature map. The weighted feature maps for each channel were then summed to produce an importance map for the prediction results. Results Dataset Characteristic Table 1 shows that all study participants were Asian, with a mean age of 71.39 years (standard deviation [SD] 7.43) overall, while the training group had a mean age of 73.12 years (SD 7.18) and the test group had a mean age of 70.81 years (SD 7.47). Regarding Gleason scores, 19.2% (19/99) of participants had no cancer. It should be noted that all cases were patients who required biopsy due to suspected prostate cancer, resulting in this high cancer detection rate (80.8%). In the training group, Gleason score 8 (4+4) was most common at 44.0% (11/25), while the test group showed a more balanced distribution across scores from 6 to 10, with Gleason 7 being most frequent (35.1% combined for 3+4 and 4+3). View this table: View inline View popup Download powerpoint Table 1. Demographic Characteristics of the Study Participants Overall and by Group Allocation The actual imaging parameters achieved across all examinations were as follows: slice thickness was consistently maintained at 3.0 mm for all sequence types. The detailed parameters obtained for each sequence are summarized in Table 2 , including TR, TE, field of view, and matrix size. View this table: View inline View popup Download powerpoint Table 2: Distribution of MRI Studies and Imaging Parameters for Training and Test Datasets: Number of Studies by Tesla Strength (Upper) and Mean Imaging Parameters with Ranges (Lower) Model Performance and Comparison with Radiologists The constructed model for prostate cancer classification demonstrated a significant predictive capability. Its performance metrics were as follows: accuracy, 0.611 (95% CI: 0.500 to 0.723); precision, 0.947 (0.865 to 1.00); recall, 0.567 (0.439 to 0.690); ROC-AUC, 0.730 (0.588 to 0.847); MCC, 0.297 (0.095 to 0.474); and F1 score, 0.707 (0.598 to 0.847). For comparison, we evaluated the performance of the two radiologists in the same cases. Radiologist 1 achieved an accuracy of 0.641 (0.528–0.750), precision of 0.929 (0.840–1.00), recall of 0.619 (0.492–0.742), MCC of 0.276 (0.054–0.484), and F1 score of 0.741 (0.632–0.832). Radiologist 2 showed a higher performance level, with an accuracy of 0.803 (0.708–0.889), precision of 0.961 (0.898–1.00), recall of 0.797 (0.698–0.889), MCC of 0.504 (0.289–0.703), and F1 score of 0.871 (0.800–0.931). To compare the performance of the model with that of radiologists, we focused on the differences in the MCC and F1 scores. When compared to Radiologist 1, the model showed a slightly higher MCC (difference of 0.021, 95% CI: −0.270 to 0.306) and a slightly lower F1 score (difference of - 0.034, 95% CI: −0.153 to 0.078), but there were no statistically significant differences. In comparison with Radiologist 2, the MCC of the model was lower, but not significantly (difference of −0.207, 95% CI: −0.480 to 0.060), while its F1 score was significantly lower (difference of −0.16, 95% CI: −0.287 to −0.061). The results are summarized in Table 3 and Figure 3 , which provide a comprehensive comparison of the performance metrics between the constructed model and the two radiologists. The analysis suggests that while the model’s performance was comparable to that of Radiologist 1, there is room for improvement to match the higher performance level demonstrated by Radiologist 2, particularly in terms of overall accuracy and recall. View this table: View inline View popup Download powerpoint Table 3: Comparison of Performance Metrics Between the Constructed Model and Two Radiologists (95% Confidence Intervals in Parentheses). ROC-AUC was not applicable for radiologists as they provided binary predictions rather than probability scores. Download figure Open in new tab Figure 3: Comparison of MCC and F1 scores between the few-shot learning model and two radiologists for prostate cancer detection, showing absolute values (left panels) and differences between the model and each radiologist (right panels); error bars representing 95% confidence intervals. MCC, Matthews correlation coefficient. Visualization of Model Attention Figure 4 shows the attention visualization of the model using Grad-CAM for prostate cancer classification. The figure shows a 3-channel stacked image, T2WI, and the corresponding GradCAM visualization. Download figure Open in new tab Figure 4: Visualization of prostate MRI analysis, showing (left) the model input as a 3-channel (3ch) stacked image combining T2-weighted images (T2WIs), diffusion-weighted images (DWI), and apparent diffusion coefficient maps (ADC); (center) the original T2WI used for gradient-weighted class activation mapping (GradCAM) overlay; and (right) the resulting GradCAM heatmap highlighting areas of model focus for cancer detection. The visualization demonstrated that the model focused primarily on the entire prostate gland, as indicated by the warm colors (red and yellow) in GradCAM images. The areas of highest attention aligned with the zonal anatomy of the prostate were visible in the T2-weighted image, suggesting that the model recognizes clinically relevant structures. The surrounding tissues, shown in cooler colors (blue and green), received less attention from the model. This indicates that the model appropriately prioritizes the prostate region in its analysis. Discussion We developed a deep-learning model to classify prostate magnetic resonance images (MRI) based on T2-weighted images (T2WI), diffusion-weighted images (DWI), and apparent diffusion coefficient (ADC) maps to determine whether a biopsy is positive or negative. We devised an algorithm that divides MR images into slice-level sequences and stacks them together, enabling analysis using a two-dimensional model. Despite training on only 20 cases, the model achieved a classification performance comparable to that of radiologists. The data used in this study consisted of cases in which biopsy was deemed necessary because of remaining suspicion of malignancy based on radiological findings. Many of these cases were difficult to distinguish as benign or malignant; therefore, the results obtained were significantly valuable. Several deep-learning models have been proposed for the classification of csPCa images. Aldoj and colleagues developed a highly accurate 3D CNN model with an ROC-AUC of 0.897 ( 23 ). However, their model required MRI data from a sizable cohort of 175 patients for training purposes. Chen et al. utilized a 2D CNN pretrained on the ImageNet dataset to achieve an AUC of 0.83 ( 24 ). They performed training using 330 samples from the PROSTATEx dataset ( 25 ). Although it is challenging to directly compare performances because of differences in the target populations, we achieved promising results, with performance comparable to that of a radiologist using only 20 cases. The main objective of this study was to propose a solution for domain shift ( 8 ), which can significantly reduce the accuracy, even for highly accurate models trained on large data sets, because of variations in imaging conditions and imaging equipment. The development of models with sufficient performance, based on a small number of cases at each facility, will enable individual facility-specific model development without considering the domain shift. Furthermore, by utilizing only cases from the facility itself, many issues related to data confidentiality can also be resolved. Here, we developed a high-performance model based on a small number of cases, and established a pathway to resolve these issues. The future direction of artificial intelligence (AI) in medical imaging is increasingly turning toward the use of foundational models. For instance, BiomedCLIP trained on 15 million text-image pairs from the open PMC dataset has shown remarkable performance in radiology ( 26 ). Similarly, CT-CLIP, aimed at becoming a foundational model for lung computed tomography, was developed using scans from more than 20,000 patients and has achieved impressive results ( 27 ). The introduction of TotalSegmentator MRI has enabled segmentation of virtually every organ in the body ( 28 ). These advancements suggest that the emergence of foundational models or their fine-tuning may lead to the development of AI systems that rely on radiologists’ expertise. There are only a few reported studies on few-shot learning using MRI. Dhinagar et al. ( 29 ) proposed few-shot learning for classifying autism spectrum disorder from MR images. In their approach, they first trained a 3D CNN model using the Autism Brain Imaging Data Exchange dataset ( 30 ) and then fine-tuned 20 cases for each site. There are many similarities between their research and ours, such as the use of a small number of data points for fine-tuning (20 cases) and the focus on analyzing the MRI data. We anticipate that performance comparisons between AI and physicians, such as radiologists, under few-shot learning conditions, will become increasingly important. Our research laid the groundwork for this crucial line of inquiry. Our study had several limitations. First, the performance was evaluated at a single institution, limiting the generalizability of the results. Further evaluation at multiple institutions will allow for a more comprehensive assessment of the generalizability of the model. However, this study focused on demonstrating the feasibility of developing a model to address domain shifts by using a small amount of data from the same institution. Second, we did not validate whether the combination of the model with radiologists would improve diagnostic accuracy in a clinical setting. The proposed algorithm determines whether each slice contains malignant findings, thereby enabling the presentation of noteworthy slices. Therefore, it is expected that the combination of the model and radiologists may improve the diagnostic accuracy. Third, the evaluation was limited to cases in which biopsies were performed and cases without any suspicious findings for malignancy were not included. To construct a model that targets all cases, including cases where biopsies are not required, it is necessary to consider improvements such as setting target labels when biopsies are necessary. In conclusion, we developed a slice-based MRI model capable of predicting malignancy in biopsies in only 20 cases. The model achieved a performance similar to one radiologist and partially inferior to another, demonstrating its potential for developing effective diagnostic tools with minimal data. This approach may significantly reduce the annotation costs and resource requirements for model development. Future research should explore application to other diseases and extend beyond classification to segmentation and object detection tasks. Data Availability The data used in this study contain personal information of patients and cannot be made publicly available due to ethical and legal restrictions. However, the data may be available for research purposes upon reasonable request and with appropriate procedures. For inquiries about data access, please contact the corresponding author. List of abbreviations ADC Apparent diffusion coefficient AI Artificial intelligence CI Confidence intervals CNN Convolutional neural network DWI Diffusion-weighted images GradCAM Gradient-weighted class activation mapping T2WI T2-weighted images References 1. ↵ Naik N , Tokas T , Shetty DK , et al. Role of Deep Learning in Prostate Cancer Management: Past, Present and Future Based on a Comprehensive Literature Review . J Clin Med . 2022 ; 11 ( 13 ): 3575 . doi: 10.3390/jcm11133575 . OpenUrl CrossRef PubMed 2. Yan Y , Shao L , Liu Z , et al. Deep Learning with Quantitative Features of Magnetic Resonance Images to Predict Biochemical Recurrence of Radical Prostatectomy: A Multi-Center Study . Cancers (Basel) . 2021 ; 13 ( 12 ): 3098 . doi: 10.3390/cancers13123098 . OpenUrl CrossRef PubMed 3. Khosravi P , Lysandrou M , Eljalby M , et al. A Deep Learning Approach to Diagnostic Classification of Prostate Cancer Using Pathology–Radiology Fusion . J Magn Reson Imaging . 2021 ; 54 ( 2 ): 462 – 471 . doi: 10.1002/jmri.27599 . OpenUrl CrossRef PubMed 4. Shiradkar R , Panda A , Leo P , et al. T1, T2 MR Fingerprinting Measurements of Prostate Cancer and Prostatitis Correlate with Deep Learning Derived Estimates of Epithelium, Lumen and Stromal Composition on Corresponding Whole Mount Histopathology . Eur Radiol . 2021 ; 31 ( 3 ): 1336 – 1346 . doi: 10.1007/s00330-020-07214-9 . OpenUrl CrossRef PubMed 5. Schelb P , Wang X , Radtke JP , et al. Simulated clinical deployment of fully automatic deep learning for clinical prostate MRI assessment . Eur Radiol . 2021 ; 31 ( 1 ): 302 – 313 . doi: 10.1007/s00330-020-07086-z . OpenUrl CrossRef PubMed 6. ↵ Hiremath A , Shiradkar R , Merisaari H , et al. Test-retest repeatability of a deep learning architecture in detecting and segmenting clinically significant prostate cancer on apparent diffusion coefficient (ADC) maps . Eur Radiol . 2021 ; 31 ( 1 ): 379 – 391 . doi: 10.1007/s00330-020-07065-4 . OpenUrl CrossRef PubMed 7. ↵ Hosseinzadeh M , Saha A , Brand P , Slootweg I , de Rooij M , Huisman H. Deep learning-assisted prostate cancer detection on bi-parametric MRI: minimum training data size requirements and effect of prior knowledge . Eur Radiol . 2022 ; 32 ( 4 ): 2224 – 2234 . doi: 10.1007/s00330-021-08320-y . OpenUrl CrossRef PubMed 8. ↵ Torralba A , Efros AA . Unbiased look at dataset bias . CVPR 2011 . 2011 . p. 1521 – 1528 . doi: 10.1109/CVPR.2011.5995347 . OpenUrl CrossRef 9. ↵ Yoo S , Gujrathi I , Haider MA , Khalvati F. Prostate Cancer Detection using Deep Convolutional Neural Networks . Sci Rep . Nature Publishing Group ; 2019 ; 9 ( 1 ): 19518 . doi: 10.1038/s41598-019-55972-4 . OpenUrl CrossRef PubMed 10. ↵ Tejani AS , Klontzas ME , Gatti AA , et al. Checklist for Artificial Intelligence in Medical Imaging (CLAIM): 2024 Update . Radiology: Artificial Intelligence. Radiological Society of North America ; 2024 ; 6 ( 4 ): e240300 . doi: 10.1148/ryai.240300 . OpenUrl CrossRef 11. ↵ Harris CR , Millman KJ , Walt SJ van der , et al. Array programming with NumPy . Nature. Springer Science and Business Media LLC ; 2020 ; 585 ( 7825 ): 357 – 362 . doi: 10.1038/s41586-020-2649-2 . OpenUrl CrossRef PubMed 12. ↵ Singh SP , Wang L , Gupta S , Goli H , Padmanabhan P , Gulyás B. 3D Deep Learning on Medical Images: A Review . Sensors . Multidisciplinary Digital Publishing Institute ; 2020 ; 20 ( 18 ): 5097 . doi: 10.3390/s20185097 . OpenUrl CrossRef 13. ↵ Liu Z , Lin Y , Cao Y , et al. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows . arXiv ; 2021 . doi: 10.48550/arXiv.2103.14030 . OpenUrl CrossRef 14. ↵ Ridnik T , Ben-Baruch E , Noy A , Zelnik-Manor L. ImageNet-21K Pretraining for the Masses . arXiv ; 2021 . doi: 10.48550/arXiv.2104.10972 . OpenUrl CrossRef 15. ↵ Russakovsky O , Deng J , Su H , et al. ImageNet Large Scale Visual Recognition Challenge . arXiv ; 2015 . doi: 10.48550/arXiv.1409.0575 . OpenUrl CrossRef 16. ↵ Wightman R. PyTorch Image Models . GitHub repository. GitHub ; 2019 . doi: 10.5281/zenodo.4414861 . OpenUrl CrossRef 17. ↵ Loshchilov I , Hutter F. Decoupled Weight Decay Regularization . arXiv ; 2019 . doi: 10.48550/arXiv.1711.05101 . OpenUrl CrossRef 18. ↵ Buslaev A , Iglovikov VI , Khvedchenya E , Parinov A , Druzhinin M , Kalinin AA . Albumentations: Fast and Flexible Image Augmentations . Information . 2020 ; 11 ( 2 ). doi: 10.3390/info11020125 . OpenUrl CrossRef 19. ↵ Paszke A , Gross S , Massa F , et al. PyTorch: An Imperative Style, High-Performance Deep Learning Library . arXiv ; 2019 . doi: 10.48550/arXiv.1912.01703 . OpenUrl CrossRef 20. ↵ Pedregosa F , Varoquaux G , Gramfort A , et al. Scikit-learn: Machine Learning in Python . Journal of Machine Learning Research . 2011 ; 12 : 2825 – 2830 . OpenUrl 21. ↵ Tiu E , Talius E , Patel P , Langlotz CP , Ng AY , Rajpurkar P. Expert-level detection of pathologies from unannotated chest X-ray images via self-supervised learning . Nat Biomed Eng . Nature Publishing Group ; 2022 ; 6 ( 12 ): 1399 – 1406 . doi: 10.1038/s41551-022-00936-9 . OpenUrl CrossRef 22. ↵ Selvaraju RR , Cogswell M , Das A , Vedantam R , Parikh D , Batra D. Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization . Int J Comput Vis . 2020 ; 128 ( 2 ): 336 – 359 . doi: 10.1007/s11263-019-01228-7 . OpenUrl CrossRef 23. ↵ Aldoj N , Lukas S , Dewey M , Penzkofer T. Semi-automatic classification of prostate cancer on multi-parametric MR imaging using a multi-channel 3D convolutional neural network . Eur Radiol . 2020 ; 30 ( 2 ): 1243 – 1253 . doi: 10.1007/s00330-019-06417-z . OpenUrl CrossRef PubMed 24. ↵ Chen Q , Hu S , Long P , Lu F , Shi Y , Li Y. A Transfer Learning Approach for Malignant Prostate Lesion Detection on Multiparametric MRI . Technol Cancer Res Treat . 2019 ; 18 : 1533033819858363 . doi: 10.1177/1533033819858363 . OpenUrl CrossRef PubMed 25. ↵ Armato SG , Huisman H , Drukker K , et al. PROSTATEx Challenges for computerized classification of prostate lesions from multiparametric magnetic resonance images . J Med Imaging (Bellingham) . 2018 ; 5 ( 4 ): 044501 . doi: 10.1117/1.JMI.5.4.044501 . OpenUrl CrossRef PubMed 26. ↵ Zhang S , Xu Y , Usuyama N , et al. BiomedCLIP: a multimodal biomedical foundation model pretrained from fifteen million scientific image-text pairs . arXiv ; 2024 . doi: 10.48550/arXiv.2303.00915 . OpenUrl CrossRef 27. ↵ Hamamci IE , Er S , Almas F , et al. A foundation model utilizing chest CT volumes and radiology reports for supervised-level zero-shot detection of abnormalities . arXiv ; 2024 . doi: 10.48550/arXiv.2403.17834 . OpenUrl CrossRef 28. ↵ D’Antonoli TA , Berger LK , Indrakanti AK , et al. TotalSegmentator MRI: Sequence-Independent Segmentation of 59 Anatomical Structures in MR images . arXiv ; 2024 . doi: 10.48550/arXiv.2405.19492 . OpenUrl CrossRef 29. ↵ Dhinagar NJ , Santhalingam V , Lawrence KE , Laltoo E , Thompson PM . Few-Shot Classification of Autism Spectrum Disorder using Site-Agnostic Meta-Learning and Brain MRI . arXiv ; 2023 . doi: 10.48550/arXiv.2303.08224 . OpenUrl CrossRef 30. ↵ Di Martino A , Yan C-G , Li Q , et al. The autism brain imaging data exchange: towards a large-scale evaluation of the intrinsic brain architecture in autism . Mol Psychiatry. Nature Publishing Group ; 2014 ; 19 ( 6 ): 659 – 667 . doi: 10.1038/mp.2013.78 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted January 20, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Few-Shot Learning for Prostate Cancer Detection on MRI: Comparative Analysis with Radiologists’ Performance Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Few-Shot Learning for Prostate Cancer Detection on MRI: Comparative Analysis with Radiologists’ Performance Yosuke Yamagishi , Yasutaka Baba , Jun Suzuki , Yoshitaka Okada , Kent Kanao , Masafumi Oyama medRxiv 2025.01.19.25320503; doi: https://doi.org/10.1101/2025.01.19.25320503 Share This Article: Copy Citation Tools Few-Shot Learning for Prostate Cancer Detection on MRI: Comparative Analysis with Radiologists’ Performance Yosuke Yamagishi , Yasutaka Baba , Jun Suzuki , Yoshitaka Okada , Kent Kanao , Masafumi Oyama medRxiv 2025.01.19.25320503; doi: https://doi.org/10.1101/2025.01.19.25320503 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Radiology and Imaging Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4536) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (541) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3332) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00846a098fe58d3',t:'MTc3OTU4MzUwOA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.