Classifying the severity of diabetic macular oedema from optical coherence tomography scans using deep learning: a feasibility study

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 47,755 characters · extracted from preprint-html · click to expand
Classifying the severity of diabetic macular oedema from optical coherence tomography scans using deep learning: a feasibility study | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Classifying the severity of diabetic macular oedema from optical coherence tomography scans using deep learning: a feasibility study View ORCID Profile Cathal Breathnach , Fiona Harney , Deirdre Townley , View ORCID Profile Rachel Hickey , View ORCID Profile Andrew Simpkin , View ORCID Profile Derek O’Keeffe doi: https://doi.org/10.1101/2025.02.19.24317749 Cathal Breathnach 1 School of Medicine, College of Medicine Nursing and Health Sciences, University of Galway , Galway, Ireland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Cathal Breathnach For correspondence: cathalbreathnach{at}outlook.ie Fiona Harney 1 School of Medicine, College of Medicine Nursing and Health Sciences, University of Galway , Galway, Ireland 2 Department of Ophthalmology, University Hospital Galway , Galway, Ireland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Deirdre Townley 2 Department of Ophthalmology, University Hospital Galway , Galway, Ireland Find this author on Google Scholar Find this author on PubMed Search for this author on this site Rachel Hickey 1 School of Medicine, College of Medicine Nursing and Health Sciences, University of Galway , Galway, Ireland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Rachel Hickey Andrew Simpkin 3 School of Mathematical and Statistical Sciences, University of Galway , Galway, Ireland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Andrew Simpkin Derek O’Keeffe 1 School of Medicine, College of Medicine Nursing and Health Sciences, University of Galway , Galway, Ireland 4 Department of Endocrinology, University Hospital Galway , Galway, Ireland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Derek O’Keeffe Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF ABSTRACT Background Diabetic macular oedema (DME) is a vision-threatening complication of diabetes mellitus. It is reliably detected using optical coherence tomography (OCT). This work evaluates a deep learning system (DLS) for the automated detection and classification of DME severity from OCT images. Methods Anonymised OCT images were retrospectively obtained from 950 patients at University Hospital Galway, Ireland. Images were graded by a consultant ophthalmologist to classify the level of DME present (normal, non-centre-involving DME, centre-involving DME) excluding other pathologies. A DLS was trained using cross-validation, then evaluated on a test dataset and an external dataset. The test set was graded by a second ophthalmologist for comparison. Results In detecting the presence of DME, the DLS achieved a mean area under the receiver operating characteristic curve (AUC) of 0.98 on cross-validation. AUCs of 0.94 (95% CI 0.90-0.98) and 0.94 (0.92-0.96) were achieved on evaluation of DME detection for the test dataset when graded by the first and second ophthalmologist respectively. An AUC of 0.94 (0.92-0.96) was achieved on evaluation with the external dataset. When detecting the DME severity, AUCs of 0.98, 0.86 and 0.99 were achieved per class on cross validation. For the test dataset, AUCs of 0.99, 0.89 and 0.98 were achieved when graded by the first ophthalmologist and AUCs of 0.96, 0.89 and 0.95 were achieved when graded by the second ophthalmologist. Conclusion This study suggests promising results for the use of deep learning in the classification of severity of DME which could be used to automate screening for DME and direct appropriate referrals. INTRODUCTION Diabetic macular oedema (DME) is the accumulation of fluid in the macula that can occur at any stage of diabetic retinopathy (DR).[ 1 ] DME is associated with more severe DR, and can result in visual loss.[ 2 ] Early detection facilitates treatment, including systemic control of diabetes alongside treatments such as laser photocoagulation and intraocular anti-vascular endothelial growth factor (VEGF) injections.[ 3 ] Many DR screening programmes use retinal photographs for DME diagnosis, but this relies on surrogate markers, which is associated with high false positive rates, leading to unnecessary referrals.[ 3 – 5 ] In comparison, optical coherence tomography (OCT) provides a detailed structural view of the retina and is the gold standard for DME evaluation.[ 4 , 6 ] OCT is quick, non-invasive and has been recommended for use in screening programmes to reduce unneeded referrals and cost.[ 4 ] Certain screening programmes already routinely use OCT for example in Denmark,[ 7 ] and in Ireland, where it is used for higher risk patients and has been predicted to reduce referrals.[ 8 ] However, screening processes are often manual, and OCT scans require expert interpretation.[ 2 , 9 ] Deep learning (DL) is a form of artificial intelligence (AI) that can be used to analyse OCT scans.[ 9 , 10 ] A convolutional neural network (CNN) is a class of DL algorithm that is commonly used for medical image classification,[ 11 ] and such algorithms have been shown to reliably detect DME from OCT scans and have the potential to play a role in treatment decisions.[ 12 – 14 ] Several approaches for the implementation of DL algorithms for OCT analysis have previously been tested.[ 15 – 27 ] Some studies have analysed two-dimensional images from OCT images (B-scans) for the detection of DME and other retinal pathologies.[ 15 – 17 ] Other studies have used a transfer learning approach (which uses an existing model trained to recognise general images and then optimises it for a supplied dataset) which can offer an advantage when very large datasets are not available.[ 11 ] This approach has been tested for the detection of DME and other retinal pathologies such as choroidal neovascularization and drusen.[ 11 , 18 , 19 ] Several studies have specifically analysed three-dimensional OCT volumes to detect DME alongside age-related macular degeneration.[ 20 – 22 ] Other studies have taken an approach for full segmentation of the retina to diagnose disease, using an additional segmentation step to allow the detection of various pathologies or to segment retinal fluid.[ 23 – 25 ] A notable study was developed by De Fauw et al., using an initial segmentation approach to reliably predict a referral level.[ 26 ] However, while each study reports favourable performance for the detection of DME and other retinal pathologies, there are limitations. For example, the segmentation approach requires labour intensive annotation and approaches such as transfer learning can require considerable input to fine-tune the algorithm. However, few studies including those outlined thus far consider the severity of the DME present, only detecting whether it is present or absent. Various terms have been used to describe DME, including clinically-significant diabetic macular oedema, which can be determined by the presence of specific characteristics.[ 28 ] The central area of the fovea is of most interest, and DME within 500μm on either side of the foveal centre is referred to as centrally-involved DME (CI-DME).[ 2 ] The International Council of Ophthalmology (ICO) recommends that the presence of CI-DME should be used in conjunction with visual acuity to guide treatment decisions.[ 2 ] A study by Tang et al. considered this difference when testing a DL approach for DME detection and classification.[ 27 ] This study achieved favourable performance and was tested using both two-dimensional images and three-dimensional scan data. A further more recent study also utilised three dimensional OCT scan data to classify the DME severity per the same classification system, also showing promising results.[ 29 ] Additionally, there are specific biomarkers detectable using OCT scans that can be used to define the DME severity,[ 30 ] and it has been suggested in a recent review that such biomarkers could be used to direct treatment decisions.[ 31 ] A study by Xu et al. used DL to analyse for the presence of such features,[ 32 ] and a more recent study by Mitamura et al. also utilised biomarkers combined with visual acuity to guide treatment.[ 33 ] These approaches did however require segment annotation, which as outlined previously is time consuming. Tan et al. did also examine the use of deep learning to detect CI-DME with visual impairment from OCT images combined with fundal photographs but showed no benefit over using OCT alone to detect CI-DME.[ 34 ] Another recent study by Mondal et al. has used deep learning to predict patient responses to treatments such as anti-VEGF injections, but this study only included patients who had already been diagnosed with DME.[ 35 ] Thus, limited studies have examined the classification of DME from standard OCT images that could be utilised in a screening setting with the exception of the study completed by Tang et al.,[ 27 ] and it is clear this area is under-explored. Our study will implement a simple and lightweight approach to this problem, using a recognised scale for DME severity. Furthermore, we are not aware of any study that has previously tested DL for the detection of diabetic eye disease in an Irish population. MATERIALS & METHODS Image Collection & Datasets Patients were randomly selected from all patients with diabetes currently being treated in University Hospital Galway under the Irish national DR screening programme to create the training and validation dataset. Imaging was conducted with a Heidelberg Spectralis OCT machine. A volume scan was captured for each patient, but one single B-scan, from the level of the fovea was used as the input for this algorithm. The algorithm was designed for a square input and thus a square image centred around the fovea was used, being cropped if required. Images were taken from both eyes when available. As a retrospective study, there was no patient follow-up and there was no patient or public involvement in the design or conduct of this study. A second test dataset was created using scan images from every patient in the diabetic registry who attended for an ophthalmology appointment over a two-week period in July 2022. No patients from the training and validation dataset were included in the test dataset. Finally, an externally graded dataset of 250 images, assessed by multiple graders, was also used to test the algorithm.[ 36 ] Only DME detection was tested using this external set as these images were not graded using the scheme used in this study. Grading Criteria This study aimed to distinguish between normal OCT images and those with signs of DME, but it also classified the level of DME present. The scale used is as defined by the ICO guidelines for the treatment of DME, using DME changes within the central area, within 500μm of the foveal centre as the cutoff between categories.[ 2 ] A description used is outlined in Table 1 with Figure 1 providing image examples as per this scheme. This central zone is the area of most significance and this may become the thickest part of the retina with DME.[ 28 ] This grading scheme has a clinical context in mind, as DME that affects the central area poses an increased threat to a patient’s vision and will be more likely to require and respond to DME treatments such as intra-vitreal anti-VEGF injections.[ 2 ] This is clinically relevant for a screening programme, where the aim is to detect those who need the most urgent treatment in over-burdened services. Combining the presence of CI-DME with visual acuity may even offer an additional option for triage as while CI-DME may later develop into more substantial DME, it has been shown that it does not always require urgent treatment, and may be suitable for monitoring if visual acuity is not affected.[ 31 ] Thus, by choosing this grading scale, patients could be triaged more effectively, as those with more severe disease could be referred for more immediate treatment, while those with other signs of DME could be suitable for further assessment of visual acuity, followed by treatment as required. View this table: View inline View popup Download powerpoint Table 1: DME classification requirements. *The central area is defined as 500μm on either side of the foveal centre. Download figure Open in new tab Figure 1: OCT image examples as per the grading scheme used in this study, showing an example of a normal scan with corresponding level on a retinal photograph (A), an example of NCI-DME showing DME with no change to central area (B) and an example of CI-DME with central changes and loss of the normal foveal contour (C). Exclusion Criteria Exclusion criteria were used to exclude non-relevant images for the scope of this study. The study excluded (i) poor quality scans, where the retinal layers at the level of the fovea are not discernible, (ii) images with signs of other retinal pathologies, an example of which is wet age-related macular degeneration, (iii) loss of the foveal contour caused by another retinal pathology, for example due to an epiretinal membrane and (iv) images that did not represent the foveal level. Grading Process The training and validation dataset was graded by an ophthalmology consultant (first ophthalmologist) as per the grading scheme outlined. This ophthalmologist also determined which images met the exclusion criteria for the training and validation data. The test dataset was graded by the first ophthalmologist and by a second ophthalmology consultant (second ophthalmologist) for comparison. The graders were both aware of the exclusion criteria in this case and there was agreement on which images to exclude. Both graders discussed the grading criteria to reach a consensus but then graded the images independently. Cohen’s k [ 37 ] was also calculated as a measure of agreement in grading. The algorithm trained with the training and validation data generated predictions for the test dataset and were compared independently to the grades produced by the two ophthalmologists. Both consultant ophthalmologists involved in this study had more than 10 years of experience in reading OCT scans and treating diabetic eye disease. No clinical details were provided for image grading. Algorithm Overview A CNN was designed and implemented using the Python programming language (version 3.8.13) with the Keras (version 2.9.0) and Tensorflow frameworks (version 2.9.2). The training and validation stage used stratified 5-fold cross validation. The structure of the model is outlined in Supplemental Table 1 and is approximately similar to CNNs used in other DL studies for OCT image analysis.[ 15 ] The overall structure and sizes used were determined experimentally to optimise performance within the scope of this study. Two near-identical models were trained in this study with only a differing output layer. The first model for DME detection, used the sigmoid activation for two-class classification with a threshold of 0.5 and the second model used the softmax activation function for three-class classification of the level of DME. Each convolution layer used zero padding with a relu activation function. The model was trained for 35 epochs in each setting with a learning rate of 0.001, using the Adam optimiser.[ 38 ] Data augmentation was used as has been implemented in similar studies,[ 27 ] using the Keras library to reduce overfitting. This involved random flipping, 10% random rotation and 10% random zoom applied to the data before the rescaling step. The performance of the algorithm was measured by reporting accuracy, sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV) and area under the receiver operating characteristic curve (AUC). Confidence intervals for the performance indices were calculated in the test data using the DeLong method for AUC[ 39 ] ( pROC R package, version 1.18.5) and the Wilson method otherwise [ 40 ] using the epiR R package (version 2.0.77). RESULTS Population Within the available timeframe, a total of 1878 images were included from 950 distinct patients. The training and validation dataset was comprised of 1598 images from 807 patients, and after the exclusion criteria were applied, there were 1501 images available from 793 patients. The test dataset was comprised of 280 images from 143 patients, and after the exclusion criteria were applied, there were 239 images included from 132 patients. The class breakdown is similar between the training and test datasets and is presented with other dataset characteristics in Table 2 , graded as per the first ophthalmologist. The test dataset was graded by both the first and second ophthalmologist, and Cohen’s κ was calculated as 0.87 for DME detection and 0.85 for DME classification suggesting near-perfect agreement in both situations. View this table: View inline View popup Download powerpoint Table 2: Characteristics of the training and test data collected for all non-excluded images. Percentage values expressed relative to the training or test datasets. Graded as per the first ophthalmologist. Detection of DME The results for the detection of DME are presented in Table 3 . This includes the results from the cross-validation training and then compares the trained algorithm results to the gradings from the first ophthalmologist and the second ophthalmologist. The results are also included when the algorithm is tested with the external dataset. The confusion matrices and the AUC curves for the grading completed by the first and second ophthalmologist and for the external dataset are included as supplemental Figures 1 and 2. View this table: View inline View popup Download powerpoint Table 3: Results for DME detection using the dataset gradings. Values reported as mean (SD) for the cross-validation data The trained algorithm is then compared to the ophthalmologist gradings for the test dataset and to the external dataset, presented with 95% confidence intervals. Classification of DME The mean accuracy overall for the classification of the DME type when evaluated for the 5-fold cross-validation was 91.60% (SD = 0.84%). When the algorithm for DME classification was evaluated using the test dataset graded by the first ophthalmologist, the overall accuracy of the algorithm was 89.12%. When compared to the second ophthalmologist, the accuracy of the algorithm was 84.94%. The AUC values per class are presented in Table 4 with other metrics for the cross-validation and test data. The confusion matrices and the AUC curves comparing to the DME classification algorithm are included as supplemental Figures 3 and 4. View this table: View inline View popup Download powerpoint Table 4: Results for the DME classification using the dataset grading. Values reported as mean (SD) for the cross-validation data. The trained algorithm is then compared using the test dataset as graded by both the first and second ophthalmologist with values presented with 95% confidence intervals. DISCUSSION The algorithm in this study demonstrates excellent performance for the detection of DME on the cross-validated training and similar performance on the test data when compared to the gradings by the first ophthalmologist, achieving an accuracy of 92.5% and an AUC of 0.94. When compared to the second ophthalmologist, the accuracy of the algorithm is slightly reduced at 90.0% with an AUC of 0.92 although the specificity and PPV is similar in both cases. Given that the first ophthalmologist produced the gradings for the training dataset, this is likely a source of the slight difference in accuracy. The similar specificity however suggests the second ophthalmologist may have a slightly lower threshold for diagnosing DME. The DME detection algorithm also showed favourable performance when compared to the external dataset, with an AUC of 0.94, matching the test dataset performance for the first ophthalmologist. The previous study using this dataset did achieve a higher AUC of 0.99 for binary classification using this as a test dataset, but not as an external dataset and multiple images from the same patient were used in this study.[ 11 ] The images used in this dataset also covered a wider section of the retina and could have been interpreted differently when re-sized for input in this study. There can also be slight ethnic difference between populations and has been suggested this could cause slight differences in algorithm performance when using OCT scans.[ 27 ] However, while these slight differences might contribute to variance between algorithms when tested on different populations, this study overall has demonstrated reliable performance for the detection of DME in an Irish population, where the use of DL for OCT image analysis has not previously been tested. The DME classification algorithm in this study has also shown promising performance. The cross-validation training shows high performance, particularly for the detection of the normal and CI-DME class. When compared to the first ophthalmologist using the test data, the algorithm performs favourably for the detection of the normal class (AUC = 0.99) and the CI-DME class (AUC = 0.98), and these results are similar when compared to the second ophthalmologist using the test data for normal (AUC = 0.96) and for CI-DME (AUC = 0.95). The NCI-DME class is less well detected when compared to both the first ophthalmologist (AUC = 0.89) and the second ophthalmologist (AUC = 0.89). However, it can be noted that the NCI-DME class also has the lowest number of samples, which could affect the performance, particularly for early or mild DME. It can also be noted that as per the DME detection phase, variability between the ophthalmologist gradings may affect the algorithm predictions, and it can be noted that the algorithm performs slightly more favourably when compared to the first ophthalmologist gradings. This situation is similar for the testing of DME detection and could be improved in future work by having an increased number of graders involved in the training stage and using an increased number of images, particularly for the NCI-DME class. Furthermore, discussing difference in assigned grades would establish a more robust gold standard, and reduce the potential for any bias in grading. Overall, the algorithms in this study have demonstrated the ability to classify DME, and there are notably low rates of error for the CI-DME class, which would be ideal for use in screening applications. In the previous similar study by Tang et al., they achieved an AUC of 0.958 with their primary dataset, and AUC results between 0.936 and 0.956 for their external datasets using images from the Heidelberg Spectralis machine for the detection of DME.[ 27 ] They achieved an AUC of 0.951 for the distinguishing of CI-DME versus NCI-DME with their primary dataset, and AUC scores between 0.899 and 0.934 for their external datasets. Thus, the overall AUC scores achieved in this study are similar and even though the accuracy scores are higher for the study completed by Tang et al., the datasets used have many images taken from the same patients, unlike this study which had no patient duplication and may have more heterogenous data. Despite the images also being classified in a slightly different manner in the previous study, this work supports the previous finding that an automated classification of DME level from OCT scans is feasible. This has practical implications and can be used to direct treatment decisions and prevent blindness due to DME, which is relevant as the number of patients with DM continues to grow worldwide.[ 1 ] The ICO has guidelines for the management of DME based on the location on the retina,[ 2 ] and the Scottish diabetic retinal screening service already use DME levels in retinal photographs to direct treatment or referral.[ 41 ] While macular thickening may not always be associated with a change in the patient’s vision,[ 28 ] reduced visual acuity is associated with a higher prevalence of DME,[ 4 ] and thus combining visual acuity with the level DME as has been previously suggested may represent a suitable implementation of automated DME severity classification in screening programmes.[ 2 , 31 ] A reduction in vision in conjunction with the presence of severe DME as per the proposed grading scale should indicate the most urgent referral, and using such software in a screening service could expedite treatment for those who need the earliest treatment. There are several limitations to consider for this study. There was a limited variety in image grading, and the gold standard for the training and validation images was set by a single ophthalmologist. It has previously been noted that inter-grader discrepancy can arise in the assessment of DME [ 42 ], and while we have shown that considerable agreement does exist between the graders for the test set, future work should utilise multiple graders for the training and validation data also, with discussion on any disagreement. Taking such steps with the grading would reduce the potential for grading bias, making the algorithm more generalisable in the future. There is a population bias as the images were only taken from a hospital ophthalmology clinic, and the amount of data was also limited in this study due to time constraints. This is most notable for the NCI-DME class, where the number of samples was limited and performance was reduced. Increasing the amount of data for this class in particular could increase future performance. While an automated approach could be used for the detection of the fovea,[ 43 ] there could be signs of DME elsewhere in the eye and only images at the foveal level were included. Furthermore, only a single model of OCT machine was used, which is limiting as characteristics can vary between devices and future work will incorporate testing with other OCT machines to ensure wider clinical applicability, possibly with adjustments per device if required. The exclusion criteria were applied to simplify the approach, but this does not reflect the full extent of retinal disease that would be encountered in a real-world screening service as other retinal pathologies can often exist with DME, and this would need to be included in future work to ensure clinical applicability. It is also worth noting that the algorithms in this test overall show lower sensitivity compared to specificity for the test data for DME detection and classification and could lead to increased missed cases over un-needed referrals. This may be less ideal depending on the requirements of a given screening programme, but further work could address this potentially using more sophisticated DL techniques. Alternatively, an adjustment to the algorithm probability threshold may reduce false negatives at the expense of increasing false positives but may be acceptable in a screening setting to avoid missed cases, which may be most applicable to the NCI-DME class where the performance was reduced. Further testing could also be carried out against other deep learning implementations for OCT analysis, and this should be considered in further work. As clinicians and patients alike expect that AI systems should be reliable,[ 44 ] it is also important to consider how algorithms can provide a rationale for their prediction. This can be achieved by highlighting the most influential area on a scan,[ 44 ] and could also be implemented in future work. CONCLUSIONS This study has shown that DME severity can be classified from OCT scan images using DL using a recognised scale for DME. This supports the findings of previous similar work and adds to the evidence for the automated classification of DME severity, including in an Irish population. Such a system and the utilised grading scale has clinical significance as it could speed up referral times for those in need of urgent treatment without compromising patient safety, and to effectively use limited resources. Further work could likely improve the classification performance. Data Availability Data used in this study are available on reasonable request for further development in this area. Project code is also available on request and is also available online with the results data. https://github.com/cbreathnach/DME-Detection ADDITIONAL STATEMENTS Ethics Ethics approval for this study was obtained from the Clinical Research Ethics Committee at Galway University Hospitals with reference number C.A. 2831. Informed consent was not required from patient due to the retrospective nature of the study, and the use of fully anonymised images. This study was not registered with any other body. Funding Funding was provided from a Health Research Board Summer Scholarship to the first author with reference number SS-2022-041. Contributors DOK, FH and CB proposed the study. DOK, FH, CB and AS contributed to the study design and methods, including the analysis. CB and RH collected and organised the data. FH and DT graded the images. CB wrote the software. CB prepared the original draft manuscript. DOK, FH, DT, RH, AS reviewed the manuscript. All authors have read and agreed to the final version of the manuscript. Competing interests The authors declare no competing interests. Data availability statement Data are available on reasonable request. Project code with the prediction data is referenced in the supplemental material and are available online. Footnotes Updated graded scheme used for DME classification. REFERENCES ↵ Tan GS , Cheung N , Simó R , et al. Diabetic macular oedema . Lancet Diabetes Endocrinol . 2017 ; 5 : 143 – 55 . doi: 10.1016/S2213-8587(16)30052-3 OpenUrl CrossRef PubMed ↵ Wong TY , Sun J , Kawasaki R , et al. Guidelines on Diabetic Eye Care: The International Council of Ophthalmology Recommendations for Screening, Follow-up, Referral, and Treatment Based on Resource Settings . Ophthalmology . 2018 ; 125 : 1608 – 22 . doi: 10.1016/j.ophtha.2018.04.007 OpenUrl CrossRef PubMed ↵ Goh JKH , Cheung CY , Sim SS , et al. Retinal Imaging Techniques for Diabetic Retinopathy Screening . J Diabetes Sci Technol . 2016 ; 10 : 282 – 94 . doi: 10.1177/1932296816629491 OpenUrl CrossRef PubMed ↵ Olson J , Sharp P , Goatman K , et al. Improving the economic value of photographic screening for optical coherence tomography-detectable macular oedema: a prospective, multicentre, UK study . Health Technol Assess . 2013 ; 17 : 1 – 142 . doi: 10.3310/hta17510 OpenUrl CrossRef PubMed Web of Science ↵ Wong RL , Tsang CW , Wong DS , et al. Are we making good use of our public resources? The false-positive rate of screening by fundus photography for diabetic macular oedema . Hong Kong Med J . 2017 ; 23 : 356 – 64 . doi: 10.12809/hkmj166078 OpenUrl CrossRef PubMed ↵ Virgili G , Menchini F , Casazza G , et al. Optical coherence tomography (OCT) for detection of macular oedema in patients with diabetic retinopathy . Cochrane Database Syst Rev . 2015 ; 2015 : CD008081 . OpenUrl ↵ Grauslund J , Andersen N , Andresen J , et al. Evidence-based Danish guidelines for screening of diabetic retinopathy . Acta Ophthalmol . 2018 ; 96 : 763 – 9 . doi: 10.1111/aos.13936 OpenUrl CrossRef PubMed ↵ Pandey R , Morgan MM , Murphy C , et al. Irish National Diabetic RetinaScreen Programme: report on five rounds of retinopathy screening and screen-positive referrals. (INDEAR study report no. 1) . Br J Ophthalmol . 2022 ; 106 : 409 – 14 . doi: 10.1136/bjophthalmol-2020-317508 OpenUrl Abstract / FREE Full Text ↵ Li D , Ran AR , Cheung CY , et al. Deep learning in optical coherence tomography: Where are the gaps? Clin Exp Ophthalmol . Published Online First: 2023. doi: 10.1111/ceo.14258 OpenUrl CrossRef ↵ Ting DSW , Pasquale LR , Peng L , et al. Artificial intelligence and deep learning in ophthalmology . Br J Ophthalmol . 2019 ; 103 : 167 – 75 . doi: 10.1136/bjophthalmol-2018-313173 OpenUrl Abstract / FREE Full Text ↵ Kermany DS , Goldbaum M , Cai W , et al. Identifying Medical Diagnoses and Treatable Diseases by Image-Based Deep Learning . Cell . 2018 ; 172 : 1122 - 1131.e9 . doi: 10.1016/j.cell.2018.02.010 OpenUrl CrossRef PubMed ↵ Li H-Y , Wang D-X , Dong L , et al. Deep learning algorithms for detection of diabetic macular edema in OCT images: A systematic review and meta-analysis . Eur J Ophthalmol . 2023 ; 33 ( 1 ): 278 – 90 . doi: 10.1177/11206721221094786 OpenUrl CrossRef PubMed Yao J , Lim J , Lim GYS , et al. Novel artificial intelligence algorithms for diabetic retinopathy and diabetic macular edema . Eye and Vis . 2024 ; 11 : 23 . doi: 10.1186/s40662-024-00389-y OpenUrl CrossRef ↵ Lam C , Wong YL , Tang Z , et al. Performance of Artificial Intelligence in Detecting Diabetic Macular Edema From Fundus Photography and Optical Coherence Tomography Images: A Systematic Review and Meta-analysis . Diabetes Care . 2024 ; 47 : 304 – 19 . doi: 10.2337/dc23-0993 OpenUrl CrossRef PubMed ↵ A p S , Kar S , S G, et al. OctNET: A Lightweight CNN for Retinal Disease Classification from Optical Coherence Tomography Images . Comput Methods Programs Biomed . 2021 ; 200 : 105877 . doi: 10.1016/j.cmpb.2020.105877 OpenUrl CrossRef PubMed Wu Q , Zhang B , Hu Y , et al. Detection of morphologic patterns of diabetic macular edema using a deep learning approach based on optical coherence tomography images . Retina . 2021 ; 41 : 1110 – 7 . doi: 10.1097/IAE.0000000000002992 OpenUrl CrossRef PubMed ↵ Rasti R , Rabbani H , Mehridehnavi A , et al. Macular OCT Classification Using a Multi-Scale Convolutional Neural Network Ensemble . IEEE Trans Med Imaging . 2018 ; 37 : 1024 – 34 . doi: 10.1109/TMI.2017.2780115 OpenUrl CrossRef ↵ Asif S , Amjad K , Qurrat-ul-Ain. Deep Residual Network for Diagnosis of Retinal Diseases Using Optical Coherence Tomography Images . Interdiscip Sci . 2022 ; 14 ( 4 ): 906 – 16 . doi: 10.1007/s12539-022-00533-z OpenUrl CrossRef ↵ Lu W , Tong Y , Yu Y , et al. Deep Learning-Based Automated Classification of Multi-Categorical Abnormalities From Optical Coherence Tomography Images . Transl Vis Sci Technol . 2018 ; 7 : 41 . doi: 10.1167/tvst.7.6.41 OpenUrl CrossRef PubMed ↵ Wang X , Tang F , Chen H , et al. UD-MIL: Uncertainty-Driven Deep Multiple Instance Learning for OCT Image Classification . IEEE J Biomed Health Inform . 2020 ; 24 : 3431 – 42 . doi: 10.1109/JBHI.2020.2983730 OpenUrl CrossRef PubMed Perdomo O , Rios H , Rodríguez FJ , et al. Classification of diabetes-related retinal diseases using a deep learning approach in optical coherence tomography . Comput Methods Programs Biomed . 2019 ; 178 : 181 – 9 . doi: 10.1016/j.cmpb.2019.06.016 OpenUrl CrossRef PubMed ↵ Rodríguez-Miguel A , Arruabarrena C , Allendes G , et al. Hybrid deep learning models for the screening of Diabetic Macular Edema in optical coherence tomography volumes . Sci Rep . 2024 ; 14 : 17633 . doi: 10.1038/s41598-024-68489-2 OpenUrl CrossRef PubMed ↵ Schlegl T , Waldstein SM , Bogunovic H , et al. Fully Automated Detection and Quantification of Macular Fluid in OCT Using Deep Learning . Ophthalmology . 2018 ; 125 : 549 – 58 . doi: 10.1016/j.ophtha.2017.10.031 OpenUrl CrossRef PubMed Guo Y , Hormel TT , Xiong H , et al. Automated Segmentation of Retinal Fluid Volumes From Structural and Angiographic Optical Coherence Tomography Using Deep Learning . Transl Vis Sci Technol . 2020 ; 9 : 54 . doi: 10.1167/tvst.9.2.54 OpenUrl CrossRef PubMed ↵ Lee CS , Tyring AJ , Deruyter NP , et al. Deep-learning based, automated segmentation of macular edema in optical coherence tomography . Biomed Opt Express . 2017 ; 8 : 3440 – 8 . doi: 10.1364/BOE.8.003440 OpenUrl CrossRef PubMed ↵ De Fauw J , Ledsam JR , Romera-Paredes B , et al. Clinically applicable deep learning for diagnosis and referral in retinal disease . Nat Med . 2018 ; 24 : 1342 – 50 . doi: 10.1038/s41591-018-0107-6 OpenUrl CrossRef PubMed ↵ Tang F , Wang X , Ran A , et al. A Multitask Deep-Learning System to Classify Diabetic Macular Edema for Different Optical Coherence Tomography Devices: A Multicenter Analysis . Diabetes Care . 2021 ; 44 : 2078 – 88 . doi: 10.2337/dc20-3064 OpenUrl Abstract / FREE Full Text ↵ Browning DJ , Stewart MW , Lee C. Diabetic macular edema: Evidence-based management . Indian J Ophthalmol . 2018 ; 66 : 1736 – 50 . doi: 10.4103/ijo.IJO_1240_18 OpenUrl CrossRef PubMed ↵ Zhu H , Ji J , Lin J-W , et al. Development and validation of a 3-D deep learning system for diabetic macular oedema classification on optical coherence tomography images . BMJ Open . 2025 ; 15 : e099167 . doi: 10.1136/bmjopen-2025-099167 OpenUrl Abstract / FREE Full Text ↵ Panozzo G , Cicinelli MV , Augustin AJ , et al. An optical coherence tomography-based grading of diabetic maculopathy proposed by an international expert panel: The European School for Advanced Studies in Ophthalmology classification . Eur J Ophthalmol . 2020 ; 30 : 8 – 18 . doi: 10.1177/1120672119880394 OpenUrl CrossRef PubMed ↵ Szeto SKH , Lai TYY , Vujosevic S , et al. Optical coherence tomography in the management of diabetic macular oedema . Progress in Retinal and Eye Research . 2024 ; 98 : 101220 . doi: 10.1016/j.preteyeres.2023.101220 OpenUrl CrossRef PubMed ↵ Xu J-J , Zhou Y , Wei Q-J , et al. Three-dimensional diabetic macular edema thickness maps based on fluid segmentation and fovea detection using deep learning . Int J Ophthalmol . 2022 ; 15 : 495 – 501 . doi: 10.18240/ijo.2022.03.19 OpenUrl CrossRef PubMed ↵ Mitamura M , Saito M , Hirooka K , et al. Differences in Artificial Intelligence-Based Macular Fluid Parameters Between Clinical Stages of Diabetic Macular Edema and Their Relationship with Visual Acuity . Journal of Clinical Medicine . 2025 ; 14 : 1007 . doi: 10.3390/jcm14031007 OpenUrl CrossRef PubMed ↵ Tan T-E , Ng YP , Calhoun C , et al. Detection of Center-Involved Diabetic Macular Edema With Visual Impairment Using Multimodal Artificial Intelligence Algorithms . Ophthalmology Retina . 2025 ; 9 : 955 – 63 . doi: 10.1016/j.oret.2025.04.016 OpenUrl CrossRef PubMed ↵ Mondal A , Nandi A , Pramanik S , et al. Application of deep learning algorithm for judicious use of anti-VEGF in diabetic macular edema . Sci Rep . 2025 ; 15 : 4569 . doi: 10.1038/s41598-025-87290-3 OpenUrl CrossRef PubMed ↵ Kermany D , Zhang K , Goldbaum M. Labeled Optical Coherence Tomography (OCT) and Chest X-Ray Images for Classification . Mendeley Data [Dataset] . 2018;V2 . doi: 10.17632/rscbjbr9sj.2 OpenUrl CrossRef ↵ Cohen J. A Coefficient of Agreement for Nominal Scales . Educ Psychol Meas . 1960 ; 20 : 37 – 46 . doi: 10.1177/001316446002000104 OpenUrl CrossRef Web of Science ↵ Kingma DP , Ba J. Adam: A Method for Stochastic Optimization . Proceedings of the 3rd International Conference on Learning Representations . San Diego, USA 2017 . ↵ DeLong ER , DeLong DM , Clarke-Pearson DL . Comparing the Areas under Two or More Correlated Receiver Operating Characteristic Curves: A Nonparametric Approach . Biometrics . 1988 ; 44 : 837 – 45 . doi: 10.2307/2531595 OpenUrl CrossRef PubMed Web of Science ↵ Wilson EB . Probable Inference, the Law of Succession, and Statistical Inference . Journal of the American Statistical Association . Published Online First: 1 June 1927. ↵ Zachariah S , Wykes W , Yorston D. Grading diabetic retinopathy (DR) using the Scottish grading protocol . Community Eye Health . 2015 ; 28 : 72 – 3 . OpenUrl PubMed ↵ Wilson M , Chopra R , Wilson MZ , et al. Validation and Clinical Applicability of Whole-Volume Automated Segmentation of Optical Coherence Tomography in Retinal Disease Using Deep Learning . JAMA Ophthalmology . 2021 ; 139 : 964 . doi: 10.1001/jamaophthalmol.2021.2273 OpenUrl CrossRef PubMed ↵ Schurer-Waldheim S , Seebock P , Bogunovic H , et al. Robust Fovea Detection in Retinal OCT Imaging using Deep Learning . IEEE J Biomed Health Inform . 2022 ; 26 : 3927 – 37 . doi: 10.1109/JBHI.2022.3166068 OpenUrl CrossRef PubMed ↵ Young AT , Amara D , Bhattacharya A , et al. Patient and general public attitudes towards clinical artificial intelligence: a mixed methods systematic review . Lancet Digit Health . 2021 ; 3 : e599 – 611 . doi: 10.1016/S2589-7500(21)00132-1 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted October 16, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Classifying the severity of diabetic macular oedema from optical coherence tomography scans using deep learning: a feasibility study Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Classifying the severity of diabetic macular oedema from optical coherence tomography scans using deep learning: a feasibility study Cathal Breathnach , Fiona Harney , Deirdre Townley , Rachel Hickey , Andrew Simpkin , Derek O’Keeffe medRxiv 2025.02.19.24317749; doi: https://doi.org/10.1101/2025.02.19.24317749 Share This Article: Copy Citation Tools Classifying the severity of diabetic macular oedema from optical coherence tomography scans using deep learning: a feasibility study Cathal Breathnach , Fiona Harney , Deirdre Townley , Rachel Hickey , Andrew Simpkin , Derek O’Keeffe medRxiv 2025.02.19.24317749; doi: https://doi.org/10.1101/2025.02.19.24317749 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Ophthalmology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4421) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15212) Forensic Medicine (30) Gastroenterology (1121) Genetic and Genomic Medicine (6581) Geriatric Medicine (667) Health Economics (996) Health Informatics (4520) Health Policy (1366) Health Systems and Quality Improvement (1611) Hematology (539) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15906) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (667) Neurology (6580) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1141) Occupational and Environmental Health (956) Oncology (3324) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5432) Public and Global Health (9212) Radiology and Imaging (2193) Rehabilitation Medicine and Physical Therapy (1368) Respiratory Medicine (1194) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ff48826d93606f3',t:'MTc3OTM3NjQ3Nw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-06-15T06:18:04.506796+00:00