Full text
41,328 characters
· extracted from
preprint-html
· click to expand
Interpretable Machine Learning for Epileptic Seizure Detection on the BEED Using LIME with an Ensemble Network | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Interpretable Machine Learning for Epileptic Seizure Detection on the BEED Using LIME with an Ensemble Network View ORCID Profile Biplov Paneru doi: https://doi.org/10.1101/2025.09.30.25336996 Biplov Paneru Dept. of Electronics and Communication Engineering, Pokhara University , Nepal Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Biplov Paneru For correspondence: biplovp019402{at}nec.edu.np Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract This study aims to identify seizures in four different stages among epileptic patients, utilizing the Bangalore Epilepsy Dataset (BEED). This dataset, which has 16 channels, was sourced from the UCI Machine Learning Repository. Initially, the data underwent preprocessing through UMAP for dimensionality reduction. This was succeeded by feature extraction via the Fast Fourier Transform (FFT), which transformed the scaled signals into the frequency domain to capture their spectral characteristics. The findings show that a two-level ensemble model surpasses the performance of leading methods, reaching an accuracy rate of 97.06%. The model’s performance was confirmed through stringent nested cross-validation, guaranteeing consistency across all dataset folds. The model’s potential for real-time deployment on Edge and Internet of Things (IoT) devices is underscored by these findings. Introduction As machine learning (ML) and artificial intelligence (AI) continue to advance, there is an increasing emphasis on using these technologies to improve clinical applications. Early disease diagnosis and prediction to facilitate early preventive intervention is a major objective in healthcare [ 1 ]. Researchers have focused a lot of emphasis on epileptic seizures, which are neurological disorders marked by particular patterns in electroencephalography (EEG) recordings. EEG signal feature extraction and pattern classification have been successfully accomplished using machine learning (ML) and deep learning (DL) approaches [ 2 ]. Recurrent seizures that result in unconsciousness or muscle spasms are a common neurological disorder called epilepsy, which has a major negative influence on a patient’s health, everyday activities, and general well-being [ 3 ]. Because EEG recordings offer vital information about the electrical activity of the brain, they are widely employed in epilepsy research [ 4 ]. EEG data analysis using ML and DL techniques has been the subject of numerous studies aimed at identifying and forecasting epileptic episodes [ 5 , 10 , 12 ]. More accurate seizure identification has been made possible by developments in EEG processing techniques, such as improved feature extraction methods, improved artifact removal, and improved time-frequency analysis [ 6 ]. About 50 million individuals worldwide suffer from epilepsy, which is one of the most common and dangerous neurological conditions, affecting 1% to 2% of the world’s population [ 7 ]. Given the severity of seizures and their risks, alongside the pressing demand for more accurate and advanced therapeutic approaches have been developed to identify seizure activity and deliver targeted electrical stimulation to help suppress seizures and reduce their frequency [ 9 ]. This work solely focuses on development of hybrid models as well as ensemble, traditional ML models to find out the potentiality for deployment for aiding patient with epileptic seizure along with classifying it. The proposed research is divided into various section in which section I. is related to abstract, and general introduction along with related works. Section 2. Refers to methodology section in which various ML models are proposed and architecture proposed are shown. Section 3. Shows the results from the models as well as discussions from the results obtained finally. Comparing work with previous works and discussion on limitations of study. Final section deals with concluding the study describing its outcomes and potential application for real-world epilepsy related effects minimization with AI-powered technology. The work provides a framework for utilizing applications-based on the early seizure detection for the aiding of patients at the emergency situations. Related works Recent years have seen substantial advances in machine learning (ML) techniques for epileptic seizure prediction and detection using EEG signals. Rasheed et al. [ 1 ] provided a comprehensive review highlighting state-of-the-art ML methods for early seizure prediction, identifying research gaps, challenges, and suggesting future directions to enhance model robustness and generalizability. Several works have employed the UCI Epileptic Seizure Recognition dataset for classification tasks. Kode et al. [ 2 ] evaluated multiple classifiers, including Extreme Gradient Boosting, TabNet, Random Forest, and a 1D Convolutional Neural Network (CNN), achieving accuracies up to 99%, with their proposed 1D-CNN outperforming other models in terms of accuracy, sensitivity, precision, and recall. Similarly, Kunekar et al. [ 3 ] investigated conventional ML and deep learning (DL) approaches, demonstrating that Long Short-Term Memory (LSTM) networks provided superior results for seizure detection tasks. Vieira et al. [ 4 ] proposed an LSTM-based methodology that achieved 97% validation accuracy while also focusing on feature and channel reduction. Leveraging Explainable AI (XAI), their approach attained over 95% in key metrics using only six features and five EEG channels, emphasizing the feasibility of lightweight models suitable for mobile applications. Beyond traditional architectures, Zhu et al. [ 5 ] introduced a multidimensional Transformer fused with recurrent networks (LSTM-GRU) for seizure classification, addressing the challenge of the non-stationary and complex nature of EEG signals. Disli et al. [ 7 ] proposed a Continuous Wavelet Transform (CWT)-based depthwise CNN. Their innovative method converted multi-channel EEG data into image-like representations, achieving nearly 96% accuracy and outperforming prior methods without requiring separate feature extraction or additional classifiers. The potential of lightweight models and TinyML for real-time applications has also been explored. Tsanika et al. [ 8 ] developed a TinyML model using iEEG data, achieving up to 99% test accuracy. Their work underscores the suitability of resource-constrained ML implementations for integration into closed-loop neurostimulation devices. Fractal features and non-linear dynamics have been explored by Abhisek et al. [ 9 ], who employed parameters like Higuchi’s and Katz’s fractal dimensions alongside spectral features. Their method achieved remarkable performance—100% accuracy in classifying focal versus non-focal EEG signals in the Bern-Barcelona dataset, and significant improvements over state-of-the-art methods in detecting interictal and preictal states. Brari et al. [ 10 ] proposed a novel correlation dimension-based feature extraction technique, reporting near-perfect classification accuracy on benchmark datasets while maintaining computational simplicity. Rab et al. [ 11 ] combined deep learning with domain knowledge (frequency bands, electrode locations, and temporal patterns) and introduced XAI4EEG, a framework that integrates SHAP-based explanations to improve interpretability and reduce clinicians’ validation time during seizure analysis. Architectural innovations combining CNNs and recurrent networks have shown high effectiveness. Mallick et al. [ 12 ] designed a hybrid model incorporating 1D convolutional layers, bidirectional LSTM, GRU, and pooling layers, achieving up to 100% accuracy for binary seizure detection on the Bonn dataset and robust performance in multi-class classification tasks. Esmaeilpour et al. (2023) [ 13 ] focused on detecting preictal states using convolutional neural networks for feature extraction, followed by ensemble classifiers like random forests and support vector machines, achieving 90.76% sensitivity on the CHB-MIT dataset. Finally, privacy-preserving methods have emerged as critical in healthcare. Suryakala et al. (2023) [ 14 ] proposed a Federated Machine Learning (FML) approach that trains models on decentralized EEG data, achieving high sensitivity (98.24%) and specificity (99.23%) while maintaining patient data confidentiality. This study by Ahmad et al, (2024) addresses the critical challenge of early epileptic seizure prediction (ESP) by developing a patient-specific framework that leverages both handcrafted and deep learning features extracted from EEG signals. It employs a Bidirectional Long Short-Term Memory (BiLSTM) network combined with an attention mechanism to capture temporal dependencies while reducing feature dimensionality. Integrating Consumer Internet of Things (CIoT) technology enables real-time seizure prediction and alerting through cloud-based platforms, enhancing timely intervention. The model demonstrates robust performance using Leave-One-Out cross-validation, achieving high accuracy (91.39%), sensitivity (91.30%), specificity (90.06%), and a low false positive rate (0.12), highlighting its potential for practical clinical applications in smart healthcare systems. This research [ 22 ] by Ahmad et al, (2024) proposes an Advanced Multi-View Deep Feature Learning (AMV-DFL) framework to improve epileptic seizure detection from EEG signals by combining traditional frequency and time domain features with deep features extracted via 1D convolutional neural networks. Using a multi-view forest classifier and explainable AI techniques, the method enhances interpretability and classification performance. Experimental results show that AMV-DFL outperforms traditional and state-of-the-art models by 3–4% in accuracy, supporting clinicians in better identifying key EEG features and advancing epilepsy diagnosis. Collectively, these works illustrate the breadth of methodologies—ranging from conventional ML, deep architectures, feature engineering, explainable AI, to privacy-preserving techniques—that continue to enhance the reliability and practicality of EEG-based epileptic seizure detection systems. Numerous studies have been conducted in the past on machine learning-based seizure detection; nevertheless, further research on diverse occurrences is necessary and essential to progress in the fields of epilepsy and seizure early detection. Through the creation and advancement of Internet of Medical Things technology, assistive strategies are being developed to make the lives of sufferers simpler. The evaluation of the ML-based framework on only two classes and all four classes was done on the same dataset in earlier works [ 5 ], which lagged behind the creation of detection and application frameworks based on three classes. With the dataset [ 15 ] we are working on 3 classes events classification as well as 4-classes events classification with various ML models development. Finally, IoMT (Internet of Medical Things) approach provide a comprehensive framework to automate patient room or environmental condition as well as triggering alert notification to caretaker under cases of emergencies. Proposed ML modeling approach Dataset A dataset from the UCI repository source is collected, and UMAP, FFT were applied [ 15 , 21 ], and details of the dataset are given in table 1 and table 2 . View this table: View inline View popup Download powerpoint Table 1: Dataset detail [ 15 ] View this table: View inline View popup Table 2: Dataset breakdown of detail Preprocessing UMAP and FFT application In this workflow, data preprocessing consists of two important feature transformation techniques applied one after the other to improve the raw input data prior to modeling. To begin with, the features are normalized using standard scaling, which guarantees a mean of zero and unit variance; this step aids the performance of algorithms such as random forest and UMAP by stopping features with larger scales from taking control. Next, UMAP (Uniform Manifold Approximation and Projection) is utilized as a dimensionality reduction method that captures the intrinsic temporal or structural relationships within the data, condensing the original high-dimensional feature space into a smaller set of meaningful components. UMAP (Uniform Manifold Approximation and Projection) UMAP is a dimensionality reduction technique based on manifold learning and topological data analysis. Its goal is to find a low-dimensional representation of data that preserves the high-dimensional data’s essential topological structure. Constructing the Fuzzy Simplicial Set UMAP begins by computing a weighted local connectivity for each point x i to its neighbors x j . This connectivity is defined using a smooth exponential kernel: where: d ( x i , x j ) is the distance between points i and j . ρ i is the distance to the closest neighbor, which serves as a local connectivity adjustment. σ i is a scaling parameter chosen to balance local connectivity. Cross-Entropy Loss to Optimize Low-Dimensional Embedding UMAP minimizes the cross-entropy between fuzzy sets in the high-dimensional and low-dimensional spaces. The loss function is given by: Here, w ′ ij is the similarity between the low-dimensional embeddings y i and y j . This is typically modeled with the following equation: The parameters a and b control the shape of the curve. FFT (Fast Fourier Transform) The FFT is an efficient algorithm for computing the Discrete Fourier Transform (DFT). It transforms a discrete time-domain signal x [ n ] into its frequency components X [ k ]. Discrete Fourier Transform Equation The DFT is defined as: for k = 0,1, …, N − 1. In this equation: x [ n ] is the input signal at sample n . N is the total number of samples. j = √−1 is the imaginary unit. The magnitude spectrum, which is often used as a feature, is calculated as: These equations highlight the core principles of how UMAP models local relationships and optimizes embeddings, and how FFT decomposes signals into their frequency components. Both methods enable the creation of richer feature representations for machine learning models. After UMAP, the data is processed using FFT (Fast Fourier Transform) to extract spectral features by converting the scaled data into the frequency domain. This transformation emphasizes periodic patterns and frequency-based characteristics that may be essential for classification. The outputs from UMAP and FFT are concatenated to create a combined feature set that captures both temporal structure and spectral information. This provides a richer representation of the data for the Random Forest model during training and evaluation. The figure 1 depiction shows unprocessed EEG signals from 16 channels, demonstrating temporal variations in electrical brain activity as documented in the Bangalore EEG Epilepsy dataset. Each subplot represents a particular brain region, where the horizontal axis indicates the progression of time (sample index) and the vertical axis displays voltage measured in microvolts. In the beginning (samples 0–2500), the majority of channels display high-frequency, low-amplitude activity that is characteristic of normal or inter-ictal brain states. Around sample 2500, a significant change takes place—multiple channels (especially X1, X2, X4, X5, X9, X12, and X13) exhibit a sudden increase in amplitude and a shift to rhythmic, lower-frequency oscillations, signaling the onset of a seizure (ictal state). Download figure Open in new tab Fig 1: EEG signal plot for various channels This is followed by a post-ictal phase, where the signal flattens or reduces significantly in amplitude. The plot effectively highlights the temporal dynamics of a seizure and suggests spatial localization based on differential channel responses. i. Random Forest Random Forest is a type of ensemble learning algorithm that is mainly utilized for classification and regression tasks. During training, it constructs several decision trees and merges their predictions to enhance accuracy and manage overfitting. For classification, a majority vote is taken across all trees, while for regression, the predictions are averaged. Each tree is trained on a random data subset (bagging) and uses a random feature subset for node splitting, which adds diversity and decreases correlation among trees ii. EEGNet EEGNet is a compact convolutional neural network architecture specifically designed for EEG signal processing. It efficiently extracts spatial and temporal features from EEG data using depthwise and separable convolutions, making it well-suited for brain-computer interface (BCI) applications with limited training data and low computational cost. iii. SVC Support Vector Classification (SVC) is an algorithm for supervised machine learning that serves classification purposes. The concept is rooted in Support Vector Machine (SVM) methodology, which seeks to identify the optimal hyperplane that most effectively divides data points of varying classes within a high-dimensional space. SVC operates by maximizing the margin between the closest points (support vectors) of each class, making it suitable for both linear and non-linear classification challenges. Proposed Workflow First, models are feature extracted as preprocessing was carried out with UMAP and FFT procedure and they are trained and evaluated with various evaluation metrics. In order to find out the best performance, the nested cross-validation technique is applied. This enhanced the model development as well as ensured the model’s performance isn’t compromised on different folds of the dataset. With this cross-validation technique, model performance on the entire dataset can be rigorously tested. Then, the trained model is proposed to be deployed on the proposed IoMT system-based concept to aid patients. The various ML models are trained to make seizure detection models. A stacking ensemble models are conceptualized that performed well beyond EEGNet a common model in ML field. The figure 3 shows overall modeling concept. Download figure Open in new tab Fig 2: EEGNet architecture [ 16 ] Download figure Open in new tab Fig 3: Proposed modeling concept Stacking ensemble model This model implements a two-level stacking ensemble for EEG epilepsy classification: Level 1: Base Learners The base models are individually trained classifiers: i. Random Forest: An ensemble of decision trees that uses bootstrap sampling and feature randomness to improve accuracy and control overfitting. ii. Extra Trees: Similar to Random Forest but builds trees by selecting splits randomly, which often increases diversity among trees and reduces variance. iii. XGBoost: A gradient boosting method that builds trees sequentially to minimize classification errors, highly effective for structured/tabular data. Level 2: Meta Learner The outputs (predictions or probabilities) from the base learners are fed into a CatBoost classifier, which acts as the meta-model. CatBoost is an efficient gradient boosting algorithm known for handling categorical variables and avoiding overfitting. It learns to optimally combine the base learners’ predictions to improve the overall final accuracy. The ensemble model as shown in figure 4 architectural diagram benefits from the diversity and complementary strengths of base learners, while the meta learner integrates these predictions for better robustness and generalization on the test data. Download figure Open in new tab Fig 4: Ensemble model network Evaluation metrics Accuracy, precision, recall and F1-score: Accuracy measures the overall correctness of a classification model by calculating the proportion of total predictions that are correct. Precision evaluates how many of the positive predictions made by the model are actually correct, reflecting the model’s ability to avoid false positives. Recall (also known as sensitivity) measures the model’s ability to identify all actual positive cases by finding the proportion of true positives detected out of all real positives, thus indicating how well the model prevents false negatives. The F1-score is the harmonic mean of precision and recall, providing a balanced metric that combines both aspects to obtain one performance measure, especially useful when the class distribution is imbalanced [ 17 ]. LIME analysis LIME (Local Interpretable Model-agnostic Explanations) is an interpretability technique used to explain the predictions of any black-box machine learning model. It performs operations by perturbing the input data locally around a prediction and fitting a simple, interpretable model— like SVC or other models—on this perturbed dataset to approximate the behavior of the complex model in that local region. This helps the users to understand which features contributed most to a specific prediction, helping build trust and transparency in model decisions without further need to understand the entire model’s architecture. Results and Discussions The neural network models like EEGNet suffered overfitting, while the two-level ensemble model with approximately 97% accuracy depicted a strong outcome. The models went through cross-validation evaluation in order to evaluate performances across the entire dataset’s different folds. Model wise performances EEGNet: The EEGNet model achieved training and testing accuracies of 69.65% and 69.63%, respectively. There are indications that the model has not learned adequately, and although regularization and dropouts were subsequently implemented, there was no enhancement in the model’s performance. The primary problem with this approach was that the neural networks could not produce stable outcomes and experienced significant overfitting, despite the application of regularization and other methods to mitigate these issues. The figure 5 shows the “Average FFT Spectrum by Class” that suggests a frequency-domain analysis of EEG signals, grouped by distinct classes (e.g., different brain states or tasks). The “EEGNet Accuracy” and “Train validation” sections indicate the model’s performance during training, with metrics tracked across epochs (0 to 7). The inclusion of UMAP implies dimensionality reduction to highlight patterns or separability in the temporal features, correlating with the model’s accuracy. Overall, the figure combines signal processing (FFT), machine learning (EEGNet), and visualization (UMAP) to analyze and interpret EEG data. Download figure Open in new tab Fig 5: Model history plots Random Forest classifier The model random forest obtained accuracy of 94.19% and training accuracy of 100%. The model was cross validated across 10-folds in table 3 to check for any signs of overfitting. View this table: View inline View popup Download powerpoint Table 3: Nested Cross-Validation Results Ensemble Stacking classifier The model confusion matrix shown in figure 6 shows how well the model performed and predicted on testing data. The model obtained a test accuracy of 97.06% and a training accuracy of 100%. The model ensemble stacking classifier with 399 correct predictions for class ‘0’ and 394 for class 1 and 389 correct predictions for class ‘2’ and finally ‘385’ correct predictions for class ‘3’ could successfully classify various epileptic events. The models overfitting was evaluated with nested cross fold validation for 10 folds as tabulated. The nested cross fold validation in table 4 shows 91.51% test accuracy with only 0.0162 as standard deviation showing models consistent learning over entire dataset folds. Download figure Open in new tab Fig 6: Ensemble stacking classifier model confusion matrix plot View this table: View inline View popup Download powerpoint Table 4: Nested cross validation result: LIME intrepretability The LIME explanation for test instance 0 as shown in figure 7 highlights how specific feature ranges influenced the model’s prediction. Among the features, Feature_15 in the range 0.00 to 0.16 contributed most positively to the prediction with a weight of +0.1316, followed by Feature_7 between –0.18 and 0.00 (+0.0881), and Feature_12 between 0.00 and 0.23 (+0.0667). These features increased the model’s confidence in its decision. On the other hand, some features had a negative influence, decreasing the likelihood of the predicted class. Notably, Feature_2 greater than 0.18 had the strongest negative impact (–0.0616), followed by Feature_5 and Feature_4, both greater than 0.23, with weights of –0.0489 and –0.0476, respectively. Additional features such as Feature_1 > 0.25 and Feature_9 > 0.27 also slightly reduced the prediction confidence. Overall, LIME provides a local explanation by quantifying how specific feature intervals either supported or opposed the model’s final decision for this particular instance. The figure 7 and figure 8 shows LIME summary and local explanations plot. This shows feature 15 (channel 16 data) as most significant in predictions. Download figure Open in new tab Fig 7: LIME analysis plot Download figure Open in new tab Fig 8: Local explanations plot Besides, the Support Vector Classifier model obtained a training accuracy of 66.22% test Accuracy of 65.81% and the model catboost obtained a test Accuracy: of 90.38%. The model hyperparametrs are given in table 5 and table 6 shows the summarized classification report for all models utilized. View this table: View inline View popup Table 5: Model wise hyperparamters View this table: View inline View popup Table 6: Classification report summary View this table: View inline View popup Download powerpoint Table 7: Comparison to state of the art approaches The performance of the EEGNet and SVC models is similar and hasn’t shown better outcome compared to ensemble and boosting models. The two models reached an overall accuracy of approximately 66–70%. They excelled in class 0 (achieving flawless precision, recall, and F1-score), but their performance declined markedly for the remaining classes. In particular, the precision and recall scores for classes 1, 2, and 3 are significantly lower, suggesting that the models find it difficult to accurately recognize these classes, especially class 3, which poses the greatest challenge for both models. This imbalance is illustrated by the macro and weighted averages, which show overall scores of approximately 0.62–0.67. The comparison to state of the art approaches is shown in table 6 . Which shows that model has been successful to demonstrate an extra ordinary outcome performing better than state of the art models on same dataset. Limitations and Future works Thus, the proposed approach shows a significant concept in EEG signal classification for detecting various kinds of seizures and is vital to identify their impacting features, the channels that much affect the prediction, as well as choosing the best model for performance enhancement. With current work, the model is applicable for realtime edge and IoT devices integrations. As proposed work has depicted an IoMT concept for aiding patients, a real-time evaluation couldn’t be made due to a lack of EEG headsets and more advanced devices. In the future, rigorous testing for such prototyping can be carried out. The model can be employed with various techniques for improvement of neural network models and algorithms development. Conclusion To conclude, an ML model like stacking ensemble architecture has a higher potential to gather greater performance for EEG results. The models trained on the dataset show that prior to various epileptic conditions, early warning can be triggered using ML-based detection via victim EEG data. The UMAP and FFT allowed the features preprocessing to get better. This approach can be useful for deployment in real-time prototyping for IoMT edge and medical device development. With this approach, future intelligent biomedical systems based on EEG can be enhanced. Declaration of funding statement No funding has been received for this work. Data Availability Data is cited in manuscript available on UCI repository. https://archive.ics.uci.edu/dataset/1134/beed:+bangalore+eeg+epilepsy+dataset REFERENCES [1]. ↵ Rasheed , K. , Qayyum , A. , Qadir , J. , Sivathamboo , S. , Kwan , P. , Kuhlmann , L. , O’Brien , T. and Razi , A ., 2021 . Machine learning for predicting epileptic seizures using EEG signals: A review . IEEE Reviews in Biomedical Engineering , 14 , pp. 139 – 155 . doi: 10.1109/RBME.2020.3008792 OpenUrl CrossRef PubMed [2]. ↵ Kode , H. , Elleithy , K. and Almazaydeh , L ., 2024 . Epileptic seizure detection in EEG signals using machine learning and deep learning techniques . IEEE Access , 12 , pp. 80657 – 80668 . doi: 10.1109/ACCESS.2024.3409581 OpenUrl CrossRef [3]. ↵ Zhu , R. , Pan , W. , Liu , J. et al. , 2024 . Epileptic seizure prediction via multidimensional transformer and recurrent neural network fusion . Journal of Translational Medicine , 22 , p. 895 . doi: 10.1186/s12967-024-05678-7 OpenUrl CrossRef PubMed [4]. ↵ Kunekar , P. , Gupta , M.K. and Gaur , P ., 2024 . Detection of epileptic seizure in EEG signals using machine learning and deep learning techniques . Journal of Engineering and Applied Science , 71 , p. 21 . doi: 10.1186/s44147-023-00353-y OpenUrl CrossRef [5]. ↵ Vieira , J.C. , Guedes , L.A. , Santos , M.R. and Sanchez-Gendriz , I ., 2023 . Using explainable artificial intelligence to obtain efficient seizure-detection models based on electroencephalography signals . Sensors , 23 , p. 9871 . doi: 10.3390/s23249871 OpenUrl CrossRef PubMed [6]. ↵ Jaiswal , D. , Singh , R. and Prajapati , B ., 2025 . Advances in EEG signal processing and machine learning for epileptic seizure detection and prediction . Journal of Population Therapeutics and Clinical Pharmacology , 32 ( 2 ), pp. 355 – 364 . doi: 10.53555/drbcyr98 OpenUrl CrossRef [7]. ↵ S, A., S, S.K. , Mohan , N. and KP, S. , 2024 . EEG based automated detection of seizure using machine learning approach and traditional features . Expert Systems with Applications , 251 , p. 123991 . doi: 10.1016/j.eswa.2024.123991 OpenUrl CrossRef [8]. ↵ Dişli , F. , Gedikpınar , M. , Fırat , H. , Şengür, A., Güldemir, H. and Koundal, D., 2025 . Epilepsy diagnosis from EEG signals using continuous wavelet transform-based depthwise convolutional neural network model . Diagnostics , 15 , p. 84 . doi: 10.3390/diagnostics15010084 OpenUrl CrossRef PubMed [9]. ↵ Tsakanika , E. , Tsoukas , V. , Kakarountas , A. and Kokkinos , V ., 2025 . High accuracy of epileptic seizure detection using tiny machine learning technology for implantable closed-loop neurostimulation systems . BioMedInformatics , 5 , p. 14 . doi: 10.3390/biomedinformatics5010014 OpenUrl CrossRef [10]. ↵ Brari , Z. and Belghith , S ., 2021 . A novel machine learning approach for epilepsy diagnosis using EEG signals based on correlation dimension . IFAC-PapersOnLine , 54 ( 17 ), pp. 7 – 11 . doi: 10.1016/j.ifacol.2021.11.018 OpenUrl CrossRef [11]. ↵ Raab , D. , Theissler , A. and Spiliopoulou , M ., 2023 . XAI4EEG: spectral and spatio-temporal explanation of deep learning-based seizure detection in EEG time series . Neural Computing and Applications , 35 , pp. 10051 – 10068 . doi: 10.1007/s00521-022-07809-x OpenUrl CrossRef [12]. ↵ Mallick , S. and Baths , V ., 2024 . Novel deep learning framework for detection of epileptic seizures using EEG signals . Frontiers in Computational Neuroscience , 18 . doi: 10.3389/fncom.2024.1340251 OpenUrl CrossRef [13]. ↵ Esmaeilpour , A. , Tabarestani , S.S. and Niazi , A ., 2024 . Deep learning-based seizure prediction using EEG signals: A comparative analysis of classification methods on the CHB-MIT dataset . Engineering Reports , 6 ( 11 ). doi: 10.1002/eng2.12918 OpenUrl CrossRef [14]. ↵ Suryakala , S.V. , Vidya , T.R.S. and Ramakrishnans , S.H ., 2024 . Federated machine learning for epileptic seizure detection using EEG . International Journal of Advanced Computer Science and Applications , 15 ( 4 ). doi: 10.14569/ijacsa.2024.01504106 OpenUrl CrossRef [15]. ↵ ., N. & BANU P K, N. ( 2024 ). BEED: Bangalore EEG Epilepsy Dataset [Dataset] . UCI Machine Learning Repository . doi: 10.24432/C5K33B . OpenUrl CrossRef [16]. ↵ Xu , Fangzhou & Miao , Yunjing & Sun , Yanan & Guo , Dongju & Xu , Jiali & Wang , Yuandong & Li , Jincheng & Li , Han & Dong , Gege & Rong , Fenqi & Leng , Jiancai & Zhang , Yang . ( 2021 ). A transfer learning framework based on motor imagery rehabilitation for stroke . Scientific Reports . 11 . 19783 . doi: 10.1038/s41598-021-99114-1 . OpenUrl CrossRef PubMed [17]. ↵ Powers , D. M . ( 2020 ). Evaluation: from precision, recall and F-measure to ROC, informedness, markedness and correlation . arXiv preprint arXiv : 2010 . 16061 . OpenUrl [18]. Paneru , B. , Thapa , B. , Paneru , B. , & Sapkota , S. C . ( 2025 ). EEG right & left voluntary hand movement-based virtual brain-computer interfacing keyboard using hybrid deep learning approach . Advanced Engineering Informatics , 65 , 103304 . OpenUrl [19]. Najmusseher , N. , & K, N. B. P . ( 2025 ). Feature engineering for epileptic seizure classification using SeqBoostNet . Systems , 17 ( 1 ), 1 – 15 . OpenUrl [20]. Najmusseher , N. , Banu , P. N. , & Janardhan , D . ( 2024 ). Automated epileptic seizure classification using adaptive fast Fourier transform with non-uniform sampling and improved deep belief network . Informatics , 12 ( 4 ), 460 – 512 . OpenUrl [21]. ↵ I. Ahmad et al. , “ Multi-Feature Fusion-Based Convolutional Neural Networks for EEG Epileptic Seizure Prediction in Consumer Internet of Things ,” in IEEE Transactions on Consumer Electronics , vol. 70 , no. 3 , pp. 5631 – 5643 , Aug. 2024 , doi: 10.1109/TCE.2024.3363166 . OpenUrl CrossRef [22]. ↵ Ahmad I , Liu Z , Li L , Ullah I , Aboyeji ST , Wang X , Samuel OW , Li G , Tao Y , Chen Y , Chen S . Robust Epileptic Seizure Detection Based on Biomedical Signals Using an Advanced Multi-View Deep Feature Learning Approach . IEEE J Biomed Health Inform . 2024 Oct; 28 ( 10 ): 5742 – 5754 . doi: 10.1109/JBHI.2024.3396130 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted October 02, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Interpretable Machine Learning for Epileptic Seizure Detection on the BEED Using LIME with an Ensemble Network Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Interpretable Machine Learning for Epileptic Seizure Detection on the BEED Using LIME with an Ensemble Network Biplov Paneru medRxiv 2025.09.30.25336996; doi: https://doi.org/10.1101/2025.09.30.25336996 Share This Article: Copy Citation Tools Interpretable Machine Learning for Epileptic Seizure Detection on the BEED Using LIME with an Ensemble Network Biplov Paneru medRxiv 2025.09.30.25336996; doi: https://doi.org/10.1101/2025.09.30.25336996 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9219) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffc24f7e99306f3',t:'MTc3OTQ1NjMwMA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.