Extensive Benchmarking of Community Detection Algorithms

doi:10.1101/2025.05.07.652778

Extensive Benchmarking of Community Detection Algorithms

2025 · doi:10.1101/2025.05.07.652778

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 59,616 characters · extracted from preprint-html · click to expand

Extensive Benchmarking of Community Detection Algorithms | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Extensive Benchmarking of Community Detection Algorithms R Sapna , Harikeshav Karthik , View ORCID Profile Karthik Raman doi: https://doi.org/10.1101/2025.05.07.652778 R Sapna 1 Bhupat and Jyoti Mehta School of Biosciences, Department of Biotechnology, Indian Institute of Technology (IIT) Madras , Chennai 600 036, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site Harikeshav Karthik 2 School of Biomedical Engineering, Indian Institute of Technology (BHU) Varanasi , Uttar Pradesh, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site Karthik Raman 3 Department of Data Science and AI, Wadhwani School of Data Science and Artificial Intelligence, IIT Madras , Chennai 600 036, India 4 Centre for Integrative Biology and Systems mEdicine, Wadhwani School of Data Science and AI, IIT Madras , Chennai 600 036, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Karthik Raman For correspondence: kraman{at}iitm.ac.in Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract The detection of clusters or community in networks is an important problem in network science. We systematically evaluate many widely used community detection algorithms and their variants to identify clusters in complex networks. As the ground truth for assessing accuracy, we use artificial networks modeled on power-law distributions and real-world social networks. In addition, we also performed gene enrichment analysis on human and yeast protein–protein interaction networks to evaluate algorithms on their ability to uncover enriched communities. We implement and adapt an extensive suite of classical algorithms and their modern variants, classified into five types: stochastic, kernel-based, modularity-based, hierarchical, and local search-based. The algorithms are benchmarked primarily using the Normalized Mutual Information metric, with additional analyses focused on granularity by examining cluster ratio and computational time complexity. We find that decreasing the modularity of networks leads to a consistent decline in performance that follows a sigmoidal trajectory as communities become less defined. Algorithms with greater granularity remain stable when community structures are less distinct, while computation time remains independent of network modularity. Additionally, algorithms tend to perform poorly on smaller networks, and higher accuracy often requires a time complexity trade-off for specific high-performing methods. However, as the analysis expands to more extensive networks, this trade-off becomes more pronounced, highlighting the need for efficient scalability. Based on our benchmark and gene enrichment analysis results, we also present recommendations to practitioners. Our robust Python package, complete with a user-friendly command-line interface, empowers users to easily apply these algorithms to their datasets. Author summary In the era of big data, clustering has become an essential tool for processing and analyzing vast amounts of information. By dividing large data sets into smaller, meaningful clusters, we can simplify complex data structures, parallelize tasks, and reveal hidden patterns. This has become an essential preprocessing step that is widely used in various computational domains. Although traditional clustering algorithms remain popular, our work implements range of hybrid techniques that combine classical methods and some novel approaches. We also benchmark the performance of these approaches, their efficiency and efficacy in handling different types of networks. 1 Introduction Community detection algorithms are crucial tools for identifying clusters or groups of densely connected nodes within networks [ 1 – 5 ]. These algorithms have wide-ranging applications [ 6 ], from sociology [ 7 – 9 ] and biology [ 10 – 12 ] to big data processing [ 13 – 15 ] and parallelizing computational tasks [ 16 ]. They provide valuable insights into the structure and organization of complex systems by uncovering hidden patterns [ 17 ] and mesoscopic properties [ 18 ] of the underlying network. Historically, benchmarking efforts have focused on using standard methods available in Python and R libraries such as igraph and NetworkX [ 19 , 20 ]. A significant aspect of the present work comes from the DREAM Challenge (2018) [ 21 ], which focused on identifying disease-enriched modules in extensive anonymized biological networks. More recent developments include techniques that employ deep learning techniques [ 22 – 24 ] and overlapping community detection algorithms [ 25 , 26 ], both of which have gained considerable traction. We implement hybrid techniques that enhance traditional algorithms through various modifications and introduce novel approaches. We create a comprehensive framework through extensive benchmarking to evaluate and compare the effectiveness of different community detection methods in diverse network structures. We also provide the algorithms implemented as a Python package, making them an accessible tool for future research and applications. 2 Results In this section, we compare various community detection algorithms based on their performance. The overall benchmarking workflow is illustrated in Fig. 1 . Download figure Open in new tab Fig 1. Overall Benchmarking Workflow. The benchmarking workflow can be organized into three stages as depicted above. 2.1 Artificial Benchmarks: Mixing parameter µ based analysis In the case of artificial benchmark analysis, we chose the weighted and undirected Lancichinetti–Fortunato–Radicchi (LFR) networks following power-law distributions for this study. The details of the parameters used to generate the networks are given in Table 3 . Two types of benchmark analysis have been performed using the LFR networks shown in Fig. 2 , Fig. 3 , Fig. 4 , Fig. 5 , Fig. 6 and Fig. 7 . Download figure Open in new tab Fig 2. Mixing parameter( µ ) based analysis on Stochastic methods. The µ of the LFR network is varied from 0.1 to 0.8. Download figure Open in new tab Fig 3. Mixing parameter( µ ) based analysis on Greedy methods. The µ of the LFR network is varied from 0.1 to 0.8. Download figure Open in new tab Fig 4. Mixing parameter( µ ) based analysis on Kernel methods. The µ of the LFR network is varied from 0.1 to 0.8. Download figure Open in new tab Fig 5. Mixing parameter( µ ) based analysis on Hierarchy methods. The µ of the LFR network is varied from 0.1 to 0.8. Download figure Open in new tab Fig 6. Mixing parameter( µ ) based analysis on Local methods. The µ of the LFR network is varied from 0.1 to 0.8. Download figure Open in new tab Fig 7. Comparison between the estimated and ground truth mixing parameter ( µ ) The mixing parameter µ of the LFR network is varied from 0.1 to 0.8. The plot compares the estimated values of µ against the ground truth across different algorithms. The mixing parameter µ of an LFR network varies inversely with modularity of the network, implying that the network is more modular at lower values of µ and vice versa, as described by equation 3 . In this analysis, we fixed the network size at 1000 nodes throughout. The results for different algorithm categories from the Mixing Parameter-based Analysis are presented in Fig. 2 , Fig. 3 , Fig. 4 , Fig. 5 , Fig. 6 , arranged into three columns for each result category. The first column shows how Normalized Mutual Information (NMI) changes with increasing mixing parameters for various algorithms. The second column illustrates the cluster ratio, which is defined as the ratio of predicted communities to ground truth. The third column represents the variation in computation time as the mixing parameter increases. 2.1.1 Performance decline follows a sigmoidal pattern Algorithm performance in different categories, measured by NMI, generally declines at varying rates, typically following a sigmoidal pattern as the mixing parameter µ increases from 0.1 to 0.8, showing a sharp decrease at 0.6. This is expected since communities become less defined with higher µ values. The best performing algorithms for µ 0.5 are summarized in Table 1 . In real-world networks, where the mixing parameter µ may not be uniform, community detection algorithms can estimate µ in unsupervised settings. Fig. 7 compares the estimated values µ with the ground truth for Walktrap, Luminy multiplex, and FastGreedy. Luminy multiplex is the most accurate, providing consistently close estimates across all ranges of µ , especially in higher mixing scenarios, making it the most reliable algorithm for estimating µ . View this table: View inline View popup Download powerpoint Table 1. Mixing Parameter ( µ ) based Analysis Fig. 2 , Fig. 3 , Fig. 4 , Fig. 5 , Fig. 6 use error bars to depict the mean result along with the standard deviation (mean ± standard deviation), based on 100 different network realisations at each data point. These error bars provide a visual indication of the variability in the results. 2.1.2 Algorithms with greater granularity maintain stability at high µ values Algorithms that detect communities with finer granularity tend to remain stable even at higher µ values. For example, within the category of stochastic methods, TeamCS based on recursive infomap performs well at low µ values, but suffers a sharp decline once µ exceeds 0.65. In contrast, multi-layer regularized Markov cluster (MLRMCL) shows only moderate performance at low values of µ , but maintains stability as µ increases. This behavior is further highlighted by the cluster ratio graphs, where MLRMCL in Fig. 2 maintains a cluster ratio close to 4, explaining its moderate performance at lower µ . However, its finer community granularity helps sustain its performance at higher µ values, a trend also seen in Nextmr in Fig. 5 and shared neighbour in Fig. Fig. 6 clustering methods. Among these, MLRMCL stands out as the best performer in this regard. MLRMCL is particularly suitable when community structures are not well defined and there is no upper limit on the number of predicted communities. 2.1.3 Computation time remains largely unaffected by increasing µ In most cases, the computation time remains consistent as µ increases from 0.1 to 0.8. This is evident from the third columns in Fig. 2 , Fig. 3 , Fig. 4 , Fig. 5 , Fig. 6 ,, where most algorithms maintain constant computation times, with few exceptions. A notable outlier is TeamCS in Fig. Fig. 2 , which struggles significantly when µ exceeds 0.6. This issue arises because TeamCS relies on a recursive sparsification process that breaks down larger clusters into smaller ones until they fall below a certain size threshold, leading to increased computation time as µ increases. 2.2 Artificial Benchmarks: Network size based Analysis In this analysis, we have fixed the mixing parameter µ at 0.15, while the network size has been varied from 125 to 4000 nodes. The results of different algorithm categories for the analysis based on network size are shown in Fig. 8 , Fig. 9 , Fig. 10 , Fig. 11 , Fig. 12 organized into three columns per result category. The first column shows how Normalized Mutual Information (NMI) varies with increasing network size for different algorithms. The second column displays the cluster ratio, which is the ratio of predicted communities to ground truth communities. The third column represents the variation in computation time as the network size increases. Download figure Open in new tab Fig 8. Network size based analysis on Stochastic methods. The size of the LFR network is varied from 125 to 4000 nodes. Download figure Open in new tab Fig 9. Network size based analysis on Greedy methods. The size of the LFR network is varied from 125 to 4000 nodes. Download figure Open in new tab Fig 10. Network size based analysis on Kernel methods. The size of the LFR network is varied from 125 to 4000 nodes. Download figure Open in new tab Fig 11. Network size based analysis on Hierarchy methods. The size of the LFR network is varied from 125 to 4000 nodes. Download figure Open in new tab Fig 12. Network size based analysis on Local methods. The size of the LFR network is varied from 125 to 4000 nodes. Here are the key insights derived from this analysis: 2.2.1 Algorithms underperform with networks smaller than 500 Nodes Most algorithms across categories experience a sharp increase in NMI as the network size increases from 125 to 500 nodes, after which the performance usually stabilizes. Cluster ratio trends reveal that some algorithms initially deviate at smaller network sizes before stabilizing, while others consistently maintain values above or below 1. This suggests that many algorithms struggle with small networks and only achieve optimal performance once a certain threshold size is reached. 2.2.2 Top-Performing algorithms require time complexity optimization An interesting observation from the size-based analysis is the behaviour of the computation time as the network size increases from 125 to 4000 nodes. While most algorithms display linear-time complexity with the addition of edges, some demonstrate nonlinear or near-constant complexity. For example, SpecHier in Fig. 10 and Fig. 11 , a top performer across multiple categories, exhibited non-linear time complexity. In contrast, Spectral Clustering in Fig. 10 was the fastest, showing nearly constant time complexity, making it a time-efficient option for larger networks. The findings of the artificial benchmark analysis are summarized in Table 1 and Table 2 . View this table: View inline View popup Download powerpoint Table 2. Network Size based Analysis View this table: View inline View popup Download powerpoint Table 3. Parameters utilised in the generating LFR Benchmarks The LFR benchmarks used in the study are undirected weighted networks 2.3 Social Network benchmarking In the context of social network benchmarking, the manually annotated ground truth did not reveal any consistent trends across metrics such as NMI and modularity. However, this analysis has been valuable for pinpointing specific bottlenecks where certain algorithms underperform. For instance, the spinglass algorithm, and one of the variants of kernel based algorithms utilising an exp-Laplacian kernel require the network to be a fully connected component for the algorithms to be functional. This insight could not be deduced from the artificial benchmark analysis alone. Visualizations from the social benchmark analysis can be found in the Supplementary Figures S1 Fig, S2 Fig, S3 Fig, S4 Fig and S5 Fig. 2.4 Gene Enrichment Analysis In this subsection, we compare the performance of different algorithms by evaluating the number of statistically significant enrichments ( p -value ≤ 0.05) identified by these algorithms across communities in the Human and Yeast PPI networks. Additionally, we compare the distribution of community sizes and Fold Enrichment (FE) scores across different algorithms. 2.4.1 Number of statistically significant enrichments per algorithm The bar charts in Fig. 13 and Fig. 14 show the number of significantly enriched Gene Ontology (GO) terms identified by the algorithms in human and yeast protein-protein interaction (PPI) networks. Here are the key insights: Download figure Open in new tab Fig 13. Significant GO Terms on Human PPI Bar chart depicting the number of statistically significant GO Terms Identified by Algorithms in the Human PPI Network Download figure Open in new tab Fig 14. Significant GO Terms on Yeast PPI Bar chart depicting the number of statistically significant GO Terms Identified by Algorithms in the Yeast PPI Network Human PPI Network Top Performers : The Shared neighbor and blue genes algorithms identify the highest number of significant GO terms ≈ 70, 000, outperforming all others by a significant margin. These algorithms are particularly effective at uncovering enriched functional communities in the human PPI network. Mid-Performers : Algorithms like TripleAHC, NextMR , and csbio_iitm-hamming show moderate performance, identifying between 20,000 to 40,000 significant terms. Low Performers : Algorithms such as walktrap, Fast Greedy , and Luminy Multiplex identify fewer than 10,000 significant terms, indicating limited sensitivity to functional enrichment in human PPI data. Yeast PPI Network Top Performers : Similar to the human network, Shared neighbor and blue_genes lead in performance, identifying ≈ 8, 000 significant GO terms. The consistent success of these algorithms across both the Human and Yeast PPI networks underscores their robustness and adaptability for functional enrichment analysis. Mid-Performers : Algorithms like SVT, TripleAHC , and MLRMCL demonstrate moderate performance, identifying between 4,000 and 6,000 significant terms. These methods may be suited for smaller or less complex networks. Low Performers : Algorithms such as zhenhua, Fast Greedy , and Walktrap identify the fewest significant GO terms, ranging from 1,000 to 2,000, suggesting limited effectiveness in detecting enriched communities in yeast PPI data. Based on these results, we recommend using Shared neighbor or blue_genes for optimal functional enrichment detection. However, both algorithms exhibit slower performance in larger networks. In such cases, MLRMCL may serve as a better alternative, offering a reasonable trade-off between speed and enrichment performance. 2.4.2 Comparison of community size distributions and average Fold Enrichment scores We computed the fold enrichment (FE) scores for Gene Ontology (GO) terms related to Molecular Functions and Biological Processes across the communities detected by each algorithm. The following are the key insights derived from this analysis. The community size distributions are shown in Fig. 15 and Fig. 16 , while the average fold enrichment scores are presented in Fig.17 and Fig.18 for the Human and Yeast Protein-Protein Interaction (PPI) networks across different algorithms. Download figure Open in new tab Fig 15. Community Size Distribution in the Human PPI Box plot depicting the distribution of community sizes in different algorithms in the Human PPI network Download figure Open in new tab Fig 16. Community Size Distribution in the Yeast PPI Box plot depicting the distribution of community sizes in different algorithms in the Yeast PPI network Download figure Open in new tab Fig 17. Average Fold Enrichment in the Human PPI Bar chart depicting the average Fold Enrichment of Go Terms in different algorithms in the Human PPI network Download figure Open in new tab Fig 18. Average Fold Enrichment in the Yeast PPI Bar chart depicting the average Fold Enrichment of Go Terms in different algorithms in the Yeast PPI network Top Performing Algorithms blue_genes, Zhenhua, Nextmr stand out as top performers in terms ofaverage fold enrichment across the Human and Yeast PPI networks. These methods typically produce compact community size distributions, suggesting a strong ability to detect smaller, biologically meaningful clusters. Their effectiveness in uncovering functionally relevant communities makes them particularly well-suited for applications like functional annotation and pathway enrichment analysis. Algorithms with Balanced Performance Shared neighbor, Louvain, SVT exhibit moderate fold enrichment values while maintaining a stable distribution of community sizes. They identify communities of varying sizes without extreme variability or outliers, balancing community size and biological relevance. These algorithms are ideal for general-purpose clustering tasks that require a balance between biological significance and structural diversity. They are applicable in settings where robustness is desired across multiple community scales. High Variability Algorithms Fast greedy, Dcut are characterized by high variability in community size distribution, often identifying tiny and large clusters. However, their average fold enrichment tends to be lower, suggesting limited biological relevance in functional clustering. These algorithms may be better suited for exploratory analyses or networks with hierarchical structures than biological networks. Recommendations Based on Objectives Based on the observed performance patterns, we recommend the following algorithms depending on the specific objectives: For High Biological Relevance : Algorithms such as blue_genes, Zhenhua , or Nextmr are recommended due to their high fold enrichment scores and their ability to produce compact biologically significant clusters. For General-Purpose Clustering : Balanced methods such as Louvain, Shared neighbor , or SVT are well-suited for a range of clustering tasks, offering reasonable biological relevance and structural diversity. For Exploratory Analysis : High variability algorithms like Fast greedy or Dcut are recommended for exploratory analyzes, where the goal is to identify diverse clustering patterns rather than optimizing for biological enrichment. 3 Methods 3.1 Implementation of Community Detection Algorithms In this work, we implemented a total of twenty-one community detection algorithms, which can be broadly classified into five main types, as elaborated in the rest of this section. 3.1.1 Stochastic Methods These include methods using key concepts from Infomap, Walktrap, and Spinglass, which rely on probabilistic approaches to detect communities, often using random walks [ 27 , 28 ] and information theory to identify densely connected clusters within networks. Infomap: Infomap is a community detection algorithm that identifies groups of nodes by analysing the flow of random walks through a network. It works by finding a way to describe the paths of these walks as efficiently as possible, revealing communities where the walks tend to stay within certain groups of nodes. The method detects communities by minimizing the overall length of the description, indicating strong internal connections within those groups compared to the rest of the network. The time complexity for this algorithm is O ( m ) where m denotes the number of edges on the graph. Walktrap: The Walktrap algorithm detects communities by simulating random walks within a network, with short walks staying within densely connected nodes. It merges nodes and small clusters based on the frequency of co-visits, progressively building larger communities. The time complexity for this algorithm is O ( M log( N )) and for sparse networks is O ( N 2 log( N )). Spin glass: Spin glass is a community detection algorithm inspired by concepts from statistical physics, particularly the behaviour of spin glasses. The network is modeled as a system, where individual nodes represent spins and the edges between them represent interactions. The algorithm aims to minimize an energy function, where communities correspond to low-energy states, implying that nodes within the same community have strong interactions. A unique feature of Spin glass is its simulation of the system’s cooling process (annealing), which helps identify communities as groups of nodes that reach stable, low-energy configurations, indicating strong internal connections. The time complexity of this algorithm is O ( N 3.2 ). We also implemented some enhanced variants of stochastic Algorithms including: Multi Layered Markov clustering algorithm: The Markov Clustering Algorithm (MCL) identifies clusters through iterative expansion and inflation of the adjacency matrix. Multi-Layer Regularized Markov Clustering (MLRMCL) improves MCL by regularizing singleton clusters and using a multilayered approach to reduce computational complexity TripleAHC: Walktrap is carried out recursively until the modularity of individual clusters cross a certain threshold Zhenhua: Walktrap with further refinement of large clusters using Infomap TeamCS: An enhanced version of Infomap with recursive sparsification for larger clusters, utilizing an inverse log-weighted similarity to improve detection accuracy. 3.1.2 Kernel Based Methods These algorithms mainly utilise concepts of Spectral Clustering and build variations of the same. Spectral clustering Spectral clustering is a highly effective technique that uses Laplacian’s eigenvalues to reduce the data’s dimensionality before applying a clustering algorithm, such as K-means, to identify clusters. Using the spectral properties of the Laplacian network, spectral clustering can capture complex cluster structures that are not easily identifiable with traditional methods [ 29 ]. It works by transforming the data into a lower-dimensional space where clustering can be effectively performed, allowing for improved separation and identification of clusters. The time complexity of this algorithm is O (1). Some of the enhanced variants of kernel-based algorithm we implemented are : Tuskdmi: Applying spectral clustering to a custom Diffusion State Distance (DSD) matrix SpecHier: Using spectral clustering to determine initial clusters, followed by denoising and applying agglomerative clustering on larger subgraphs Big-S2: Performing spectral clustering followed by iterative refinement to split larger communities into sub-communities blue genes: Using spectral clustering on Laplacian exponential diffusion kernels to capture complex community structures 3.1.3 Modularity based Methods Modularity-based algorithms are designed to identify community structures in networks by optimizing a quality measure known as modularity [ 30 , 31 ]. Modularity quantifies the strength of the division of a network into communities, with higher values indicating a more pronounced community structure. These algorithms work by partitioning the network to maximize modularity, which measures the density of edges within communities relative to edges between communities. Common approaches include Louvain and Fast Greedy algorithms, which iteratively refine community assignments to improve modularity scores. Fast greedy: Fast Greedy performs hierarchical agglomerative clustering by merging communities to maximize the gain in modularity at each step. Louvain: Louvain employs a multilevel approach where nodes are iteratively moved between communities to identify shifts that lead to the most significant increase in modularity. After forming communities, nodes within each community are aggregated into supernodes, and the process is repeated recursively. Luminy_multiplex: We implemented a variation of the Louvain method, which introduces randomization and the ability to detect communities in different layers of the network. 3.1.4 Hierarchy based Algorithms The Girvan-Newman algorithm is a classic community detection method based on the concepts of divisive hierarchical clustering. It iteratively removes edges with the highest betweenness centrality, splitting the network into smaller communities. Unlike other methods that group similar nodes, Girvan-Newman reveals communities by breaking key connections between them. Although effective for small-medium sized networks, it can be computationally intensive for larger networks with a time complexity of O(E 2 N). However, due to its primary design for unweighted networks, it has not been included in Figs. Fig. 2 , Fig. 3 , Fig. 4 , Fig. 5 , Fig. 6 for the weighted LFR benchmark analysis. Most hierarchy-based clustering algorithms mainly use the concepts of agglomerative clustering [ 32 ]. These algorithms start with individual nodes as separate clusters and iteratively merge the closest clusters, updating the distance matrix at each step to build a hierarchical structure, often visualized as a dendrogram. We implemented some variations of hierarchical clustering including: csbio_iitm-hamming: In this method, an ensemble matrix is constructed from partitions at different resolution levels and hierarchical clustering is performed based on the frequency with which nodes appear together in the same community across resolutions [ 33 ]. Nextmr: Here, A Topology Overlap Matrix (TOM) is constructed from the graph’s adjacency matrix and hierarchical clustering is applied followed by modularity and entropy-based refinement. Dcut: In this method, hierarchical clustering is performed on the similarity matrix of the graph (e.g., Jaccard similarity) to build a tree, and final clusters are obtained through optimal recursive splitting of the tree [ 34 ]. Singular Value Thresholding (SVT): This method utilises a latent feature space representation of the network using Singular Value Thresholding, where agglomerative and recursive clustering is performed. 3.1.5 Local Search based Algorithms Local search community detection algorithms cluster nodes by focusing on local patterns within the network instead of optimising a global metric like modularity. This approach is particularly efficient for large networks, where local interactions are the key to uncovering meaningful communities. Label propagation This method iteratively updates node labels by assigning each node the most frequent label found in its neighborhood, continuing the process until the community structure stabilizes. The time complexity of this algorithm is Õ(m) Shared neighbor Clustering This approach iteratively merges nodes by focusing on their degree and connectivity, particularly emphasizing integrating clusters with fewer nodes to enhance community cohesion. 3.2 Benchmarking Strategies We benchmarked the community detection algorithms described above using both social networks and artificial benchmarks with known ground-truth communities. For social network analysis, we used eight different real-world networks with known communities. However, in social networks, the concept of a ground-truth community is often ambiguous and open to interpretation, which limits the reliability of such studies. Consequently, the number of studies that employ social networks to benchmark community detection algorithms remains limited. For artificial benchmark analysis, we used weighted and undirected LFR networks. One of the first synthetic benchmarks, still widely used today, is the Girvan–Newman benchmark [ 35 ]. However, the Girvan-Newman benchmark presents a very simplistic network structure, where all nodes have the same degree, and all communities are of standard size. To address these limitations, the LFR benchmarks introduced by Lancichinetti et al . [ 36 ], offering a more realistic approach by generating artificial networks that account for node degree and community size heterogeneity, both of which follow a power-law distribution. Furthermore, the generalized LFR benchmarks [ 37 ] represent another advance, incorporating features such as orphan nodes and allowing different mixing parameters within the same network, making them even more suitable for complex network analysis. We implemented our community detection algorithms in a semi-supervised setting, where the necessary parameters for each algorithm, such as community length and size limits, were tuned based on the available ground truth data. The performance of the algorithms was evaluated primarily using the normalized mutual information metric (NMI) to measure the similarity between the output clusters and the ground truth. In addition to NMI, we further analyzed the granularity of the clusters by examining the cluster ratio, which provides insight into the distribution of cluster sizes. Moreover, we conducted an analysis of the computational efficiency of different algorithms by evaluating their time complexity [ 19 ]. 3.2.1 Metrics Used Normalized Mutual Information: The Normalized Mutual Information (NMI) is a standard metric borrowed from the concepts of Information Theory [ 38 ]. NMI comparing two community partitions is given by the following formula: For the calculation of NMI, a confusion matrix N is derived first where an element in the matrix N i,j is obtained as the number of nodes in j th predicted communities found in i th real community. N refers to the number of nodes in the network. N i . refers to the number of nodes in i th real community and N . j refers to the number of nodes in j th predicted community. NMI between the predicted and real community partitions scales between 0 and 1, indicating no overlap and perfect partition, respectively. Cluster Ratio: Cluster Ratio is simply the ratio of the number of predicted communities to the number of communities in the ground truth partitions. Ideally expected to be 1, the cluster ratio indicates the granularity with which the algorithms detect communities. Computation time: Computation time refers to the runtime duration of different algorithms for a network of a given size. Modularity: Modularity quantifies the strength of the division of a network into communities, with higher values indicating a more pronounced community structure [ 31 ]. The modularity Q for a given partition is given by where A is the adjacency matrix of the network, k i and k j are the degrees of nodes i and j , and m is the total number of edges in the network. γ is a resolution parameter. δ ( c i, , c j ) is the Kronecker delta function, which takes the value 1 if i and j are in the same community, and 0, otherwise. This metric can be used to score community partitions by a given algorithm when the ground truth partition is not available. In summary, we used the Normalized Mutual Information score as the primary metric to score algorithms for their performance in different network structures. In addition, the cluster ratio and computation time serve as supporting metrics that provide additional insights. We used modularity as a metric primarily in social network benchmarking, where the annotation of ground-truth communities is not very clear. 3.3 Artificial Benchmarks In case of artificial benchmarks, we performed two types of analyses using the LFR class of networks, by varying Mixing parameter( µ ), and also by varying the size of the network, as described in the following sections: 3.3.1 Varying Mixing Parameter ( µ ) The mixing parameter µ for a given network is calculated using the following equation: where represents the external degree of node i, i.e. the number of connections that node i has outside its community, and represents the total degree of node i, which is the total number of connections the node has. A lower value of the mixing parameter µ indicates that the network is more modular, meaning that the nodes tend to have more connections within their community than outside it. In this analysis, we varied the mixing parameter of the system from 0.1 to 0.8 generating 100 different realisations of the weighted, undirected network of 1000 nodes at each value of µ to derive statistically significant results. 3.3.2 Varying Network size In this analysis, we fixed the mixing parameter ( µ ) of the network at 0.15 with 4% of overlapping nodes, while the network size varies from 125 to 4000 nodes. For each network size, we generated 100 different realizations of undirected weighted networks. This approach is particularly useful for evaluating the time complexity of various algorithms. 3.3.3 Social Networks Numerous social networks with annotated ground truth data are available in various online network repositories. Of the eight networks that we chose for this analysis, five are undirected and three are directed, all of which are unweighted. Table 4 and Table 4 provide a summary of the networks used in the study. View this table: View inline View popup Download powerpoint Table 4. Social networks Eight popular social networks have been employed in the study View this table: View inline View popup Download powerpoint Table 5. Biological Networks PPI network configurations 3.4 Gene Enrichment Analysis In addition to the benchmark analysis conducted on artificial and social networks, we performed biological benchmarking using human and yeast protein-protein interaction (PPI) networks obtained from the STRING database. We applied a threshold for filtering edges for most algorithms based on a combined score of ≥ 700, resulting in 16,201 nodes and 473,860 edges in the Human PPI network and 5,791 nodes with 208,376 edges in the Yeast PPI network. Both networks are undirected and weighted, respectively. However, for the blue genes and SVT algorithms applied to the Human PPI network, we set higher thresholds of 900 and 999, respectively, to accommodate memory and time complexity constraints. These adjustments resulted in a Human PPI network consisting of 12,323 nodes and 201,712 edges for the blue genes algorithm and 4,939 nodes with 28,284 edges for the SVT algorithm. Since ground truth communities were unavailable for these networks, we conducted a gene enrichment analysis using Gene Ontology (GO) term annotations, focusing specifically on GO terms associated with Molecular Function and Biological Process. To prepare the data, we first mapped the STRING accession IDs to UniProt IDs based on their annotations in the GO database. Following this preprocessing step, we computed each algorithm’s fold enrichment (FE) score across different communities and GO terms. The FE score is defined as: where s represents the number of proteins common to both the community and the proteins associated with the GO term, b denotes the total number of proteins related to the GO term, k is the number of proteins within the community and N is the total number of proteins in the selected PPI network. 3.5 Implementation The algorithms implemented in this work were originally developed in Python, R, Matlab, and Java. We have rewritten the BigS2 algorithm, initially written in Matlab, in Python3. We have integrated all algorithms into a single Python script that serves as the main interface. We carried out the benchmark analysis in Python3. We created Fig. 1 using Canva, while generating the remaining figures were using Python3. We conducted all benchmark experiments on a system with an Intel64 Family 6 Model 94 processor (2.71 GHz), 16 GB of RAM, and a 512 GB HDD. The system runs Microsoft Windows 10 Pro, version 10.0.19045. The system was equipped with WSL2 (Windows Subsystem for Linux 2), which allows the use of Linux-based tools and scripts alongside Windows applications. 5 Discussion In this study, we present a comprehensive implementation and benchmark analysis of both classical and hybrid classical algorithms for community detection. Our evaluation scheme spans three categories of networks: artificial, social, and biological, where we offer unique insights into algorithm performance. Since the ground truth community structures are known in artificial networks, this allowed for direct quantitative evaluation using the Normalized Mutual Information (NMI) metric. The results revealed that the algorithm performance often follows a sigmoidal or linear degradation pattern as the mixing parameter ( µ ) increases from 0 to 1, reflecting the increasing difficulty in distinguishing community boundaries. The artificial benchmarks described above may not fully capture the mesoscale and microscale properties of real-life networks. The work of Xiao et al . [ 39 ], which constructs benchmarks by rewiring real-world networks while preserving their key properties, could address the limitations of current real and artificial benchmarks. Although the notion of ground-truth communities in social networks is not very clear, the benchmark analysis highlighted practical limitations and bottlenecks in certain algorithms. For example, the Spinglass algorithm requires a fully connected network, which is often unrealistic for large-scale sparse social graphs. This analysis informed us about algorithmic constraints in real-world applications. Biological networks enabled the assessment of algorithms based on their ability to uncover biologically enriched communities, modules that are functionally relevant in molecular systems. Among the evaluated methods, the Blue Genes algorithm, which leverages an exponential Laplacian kernel, and Zhenhua’s method, which combines Walktrap clustering with further refinement using Infomap, emerged as top performers in this domain. Algorithms like SpecHier and TripleAHC, which excelled in artificial benchmark analysis, performed only moderately in biological networks. Conversely, algorithms such as NextMR and Shared neighbor, which performed less impressively in artificial benchmark analysis, achieved high fold enrichment scores in biological benchmarking. These observations yet again highlight the fundamental difference between power-law networks and biological networks. A critical dimension which would require optimization in future work is the management of computational resources. Both the Shared neighbor and blue genes algorithms exhibit non-linear increases in computation time as network size grows, creating significant challenges during inference. Similarly, SVT’s reliance on Singular Value Decomposition through scipy imposes memory constraints due to the intensive matrix operations involved. While the classical algorithms we implemented offer limited scope for GPU optimizations, future research could explore deep learning techniques such as graph neural networks, graph GANs, and graph autoencoders to achieve better computational efficiency. The field is increasingly headed toward overlapping community detection algorithms. Benchmarking these methods could significantly advance ongoing research efforts. Fortunato and Lancichinetti have developed hierarchical LFR benchmarks for weighted directed networks with overlapping communities, providing valuable tools for evaluation. Extending our analysis to evolving networks, particularly those with overlapping communities, would offer deeper insights into dynamic network structures. Additionally, Fortunato et al . have designed stochastic block model benchmarks specifically for dynamically evolving networks [ 40 ]. In summary, we present a comprehensive benchmarking of community detection algorithms, emphasizing the importance of context-aware evaluation. By integrating diverse network types, reproducible tools, and biological validation, we reveal key algorithmic trade-offs and domain-specific behaviors. Our findings underscore that performance on synthetic benchmarks does not necessarily translate to real-world utility, particularly in biological networks, guiding more informed and context-sensitive algorithm selection. Supporting information S1 Fig. Social Network Benchmarking on Stochastic Methods: Based on NMI, Cluster ratio, Computation time, Modularity S2 Fig. Social Network Benchmarking on Kernel based Methods: Based on NMI, Cluster ratio, Computation time, Modularity S3 Fig. Social Network Benchmarking on Modularity based Methods Based on NMI, Cluster ratio, Computation time, Modularity S4 Fig. Social Network Benchmarking on Hierarchy based Methods Based on NMI, Cluster ratio, Computation time, Modularity S5 Fig. Social Network Benchmarking on Local Search based Methods Based on NMI, Cluster ratio, Computation time, Modularity S6 Fig. Significant Terms Vs Average Fold Enrichment in the Human PPI network Scatter plot depicting significant Terms Vs Average Fold Enrichment for the 20 algorithms in the human PPI Network S7 Fig. Significant Terms Vs Average Fold Enrichment in the Yeast PPI network Scatter plot depicting significant Terms Vs Average Fold Enrichment for the 20 algorithms in the yeast PPI Network S8 Appendix. Gene Enrichment Analysis Summary for the Human PPI network CSV File Summarizing the Gene Enrichment Analysis results from different algorithms on the Human PPI Network S9 Appendix. Gene Enrichment Analysis Summary for the Yeast PPI network CSV File Summarizing the Gene Enrichment Analysis results from different algorithms on the Yeast PPI Network Data availability The social networks used for the benchmarking in this analysis were sourced from Network Repository , [ 41 ]. Furthermore, artificial LFR benchmarks were generated by compiling the C++ codes for weighted and undirected networks available on Santo Fortunato’s official website [ 42 ] link. Code availability All algorithms used are interfaced with one Python main script, and a complete Python package has been created. The Python package can be cloned and installed locally in the system from this repository. In addition, the module can be imported into an external script or can be accessed through the command-line interface. The Python module takes in an edgelist file or a networkx graph as input and returns a list of lists containing the communities and optionally writes the output into a.txt file when the path for the same is provided. An example of usage utilising the Python command-line interface is highlighted below. Python -m community_detection_1.main network.dat MLRMCL --output_file output.txt --algorithm_args largest=50 Here, community_detection_1 is the name of the Python package. The input file is network.dat and the algorithm chosen for this above example is MLRMCL. In addition, the output path is specified as output.txt and largest=50 is one of the algorithm parameters. Acknowledgments The authors acknowledge the support of the PG Senapathy Center for Computing Resources for providing a High Performance Computing Environment. Footnotes https://networkrepository.com/ https://www.santofortunato.net/resources References 1. ↵ Javed MA , Younis MS , Latif S , Qadir J , Baig A. Community detection in networks: A multidisciplinary review . Journal of Network and Computer Applications . 2018 ; 108 : 87 – 111 . OpenUrl CrossRef 2. Fortunato S. Community detection in graphs . Physics Reports . 2010 ; 486 ( 3 ): 75 – 174 . OpenUrl CrossRef PubMed Web of Science 3. Dao VL , Bothorel C , Lenca P. Community detection methods can discover better structural clusters than ground-truth communities . In: 2017 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM) ; 2017 . p. 395 – 400 . 4. Fortunato S , Hric D. Community detection in networks: A user guide . Physics Reports . 2016 ; 659 : 1 – 44 . OpenUrl CrossRef 5. ↵ Jin D , Yu Z , Jiao P , Pan S , He D , Wu J , et al. A survey of community detection approaches: From statistical modeling to deep learning . IEEE Transactions on Knowledge and Data Engineering . 2023 ; 35 ( 2 ): 1149 – 1170 . OpenUrl 6. ↵ Karataş A , Şahin S. Application areas of community detection: A review . In: 2018 International Congress on Big Data, Deep Learning and Fighting Cyber Terrorism (IBIGDELFT) . IEEE ; 2018 . p. 65 – 70 . 7. ↵ Bedi P , Sharma C. Community detection in social networks . WIREs Data Mining and Knowledge Discovery . 2016 ; 6 ( 3 ): 115 – 135 . Available from: https://wires.onlinelibrary.wiley.com/doi/abs/10.1002/widm.1178 . OpenUrl CrossRef 8. Fani H , Bagheri E. Community detection in social networks . Encyclopedia with Semantic Computing and Robotic Intelligence . 2017 ; 1 ( 1 ): 1630001 . Available from : doi: 10.1142/S2425038416300019 . OpenUrl CrossRef 9. ↵ Wang C , Tang W , Sun B , Fang J , Wang Y. Review on community detection algorithms in social networks . In: 2015 IEEE International Conference on Progress in Informatics and Computing (PIC) ; 2015 . p. 551 – 555 . Available from : doi: 10.1109/PIC.2015.7489908 . OpenUrl CrossRef 10. ↵ Rahiminejad S , Maurya MR , Subramaniam S. Topological and functional comparison of community detection algorithms in biological networks . BMC Bioinformatics . 2019 ; 20 ( 1 ): 212 . Available from : doi: 10.1186/s12859-019-2746-0 . OpenUrl CrossRef PubMed 11. Barabàsi AL , Gulbahce N , Loscalzo J. Network medicine: a network-based approach to human disease . Nature Reviews Genetics . 2011 ; 12 ( 1 ): 56 – 68 . Available from : doi: 10.1038/nrg2918 . OpenUrl CrossRef PubMed Web of Science 12. ↵ Sharma A , Ali HH . Analysis of clustering algorithms in biological networks . In: 2017 IEEE International Conference on Bioinformatics and Biomedicine (BIBM) ; 2017 . p. 2303 – 2305 . 13. ↵ Shomron N Kanter I , Yaari G , Kalisky T. 3. In: Shomron N , editor. Applications of Community Detection Algorithms to Large Biological Datasets . New York, NY : Springer US ; 2021 . p. 59 – 80 . Available from : doi: 10.1007/978-1-0716-1103-6_3 . OpenUrl CrossRef 14. Capocci A , Servedio VDP , Caldarelli G , Colaiori F. Detecting communities in large networks . Physica A: Statistical Mechanics and its Applications . 2005 ; 352 ( 2 ): 669 – 676 . OpenUrl CrossRef 15. ↵ Clauset A , Newman MEJ , Moore C. Finding community structure in very large networks . Phys Rev E . 2004 Dec ; 70 : 066111 . Available from: https://link.aps.org/doi/10.1103/PhysRevE.70.066111 . OpenUrl CrossRef 16. ↵ Staudt CL , Meyerhenke H. Engineering Parallel Algorithms for Community Detection in Massive Networks . IEEE Transactions on Parallel and Distributed Systems . 2016 ; 27 ( 1 ): 171 – 184 . OpenUrl CrossRef 17. ↵ He K , Li Y , Soundarajan S , Hopcroft JE . Hidden community detection in social networks . Information Sciences . 2018 ; 425 : 92 – 106 . Available from: https://www.sciencedirect.com/science/article/pii/S0020025517310101 . OpenUrl 18. ↵ Tibély G , Kovanen L , Karsai M , Kaski K , Kertész J , Saramäki J. Communities and beyond: Mesoscopic analysis of a large social network with complementary methods . Phys Rev E . 2011 May ; 83 : 056125 . Available from: https://link.aps.org/doi/10.1103/PhysRevE.83.056125 . OpenUrl CrossRef 19. ↵ Yang Z , Algesheimer R , Tessone CJ . A comparative analysis of community detection algorithms on artificial networks . Scientific reports . 2016 ; 6 ( 1 ): 30750 . OpenUrl CrossRef PubMed 20. ↵ Rahiminejad S , Maurya MR , Subramaniam S. Topological and functional comparison of community detection algorithms in biological networks . BMC bioinformatics . 2019 ; 20 : 1 – 25 . OpenUrl CrossRef PubMed 21. ↵ Choobdar S , Ahsen ME , Crawford J , Tomasoni M , Fang T , Lamparter D , et al. Assessment of network module identification across complex diseases . Nature methods . 2019 ; 16 ( 9 ): 843 – 852 . OpenUrl CrossRef PubMed 22. ↵ Sobolevsky S , Belyi A. Graph neural network inspired algorithm for unsupervised network community detection . Applied Network Science . 2022 ; 7 ( 1 ): 63 . OpenUrl CrossRef 23. Liu F , Xue S , Wu J , Zhou C , Hu W , Paris C , et al. Deep learning for community detection: progress, challenges and opportunities . arXiv preprint arXiv:200508225. 2020 ;. 24. ↵ Su X , Xue S , Liu F , Wu J , Yang J , Zhou C , et al. A comprehensive survey on community detection with deep learning . IEEE Transactions on Neural Networks and Learning Systems . 2022 ;. 25. ↵ Ding Z , Zhang X , Sun D , Luo B. Overlapping community detection based on network decomposition . Scientific reports . 2016 ; 6 ( 1 ): 24115 . OpenUrl CrossRef PubMed 26. ↵ Xie J , Kelley S , Szymanski BK . Overlapping community detection in networks: The state-of-the-art and comparative study . Acm computing surveys (csur) . 2013 ; 45 ( 4 ): 1 – 35 . OpenUrl 27. ↵ Rosvall M , Bergstrom CT . Maps of random walks on complex networks reveal community structure . Proceedings of the national academy of sciences . 2008 ; 105 ( 4 ): 1118 – 1123 . OpenUrl Abstract / FREE Full Text 28. ↵ Pons P , Latapy M. Computing communities in large networks using random walks . Journal of graph algorithms and applications . 2006 ; 10 ( 2 ): 191 – 218 . OpenUrl CrossRef 29. ↵ Von Luxburg U. A tutorial on spectral clustering . Statistics and computing . 2007 ; 17 : 395 – 416 . OpenUrl CrossRef 30. ↵ Brandes U , Delling D , Gaertler M , Gorke R , Hoefer M , Nikoloski Z , et al. On modularity clustering . IEEE transactions on knowledge and data engineering . 2007 ; 20 ( 2 ): 172 – 188 . OpenUrl 31. ↵ Newman ME . Modularity and community structure in networks . Proceedings of the national academy of sciences . 2006 ; 103 ( 23 ): 8577 – 8582 . OpenUrl Abstract / FREE Full Text 32. ↵ Bui VH , Phan HT . The computational complexity of hierarchical clustering algorithms for community detection: a review . Vietnam Journal of Computer Science . 2023 ; 10 ( 04 ): 409 – 431 . OpenUrl CrossRef 33. ↵ Tripathi B , Parthasarathy S , Sinha H , Raman K , Ravindran B. Adapting Community Detection Algorithms for Disease Module Identification in Heterogeneous Biological Networks . Frontiers in Genetics . 2019 ; 10 . 34. ↵ Shao J , Yang Q , Liu J , Kramer S. Graph Clustering with Density-Cut . CoRR . 2016 ;abs/1606.00950. Available from: http://arxiv.org/abs/1606.00950 . 35. ↵ Girvan M , Newman ME . Community structure in social and biological networks . Proceedings of the national academy of sciences . 2002 ; 99 ( 12 ): 7821 – 7826 . OpenUrl Abstract / FREE Full Text 36. ↵ Lancichinetti A , Fortunato S , Radicchi F. Benchmark graphs for testing community detection algorithms . Physical Review E—Statistical, Nonlinear, and Soft Matter Physics . 2008 ; 78 ( 4 ): 046110 . OpenUrl 37. ↵ Le BD , Nguyen HX , Shen H , Falkner N. GLFR: A generalized LFR benchmark for testing community detection algorithms . In: 2017 26th International Conference on Computer Communication and Networks (ICCCN) . IEEE ; 2017 . p. 1 – 9 . 38. ↵ McDaid AF , Greene D , Hurley N. Normalized mutual information to evaluate overlapping community finding algorithms . arXiv preprint arXiv:11102515. 2011 ;. 39. ↵ Xiao J , Ren HF , Xu XK . Constructing Real-Life Benchmarks for Community Detection by Rewiring Edges . Complexity . 2020 ; 2020 ( 1 ): 7096230 . OpenUrl 40. ↵ Granell C , Darst RK , Arenas A , Fortunato S , Gómez S. Benchmark model to assess community structure in evolving networks . Physical Review E . 2015 ; 92 ( 1 ): 012805 . OpenUrl 41. ↵ Rossi R , Ahmed N. The network data repository with interactive graph analytics and visualization . In: Proceedings of the AAAI conference on artificial intelligence . vol. 29 ; 2015 . p. 59 – 80 . OpenUrl 42. ↵ Lancichinetti A , Fortunato S. Benchmarks for testing community detection algorithms on directed and weighted graphs with overlapping communities . Physical Review E—Statistical, Nonlinear, and Soft Matter Physics . 2009 ; 80 ( 1 ): 016118 . OpenUrl View the discussion thread. Back to top Previous Next Posted May 12, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Extensive Benchmarking of Community Detection Algorithms Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Extensive Benchmarking of Community Detection Algorithms R Sapna , Harikeshav Karthik , Karthik Raman bioRxiv 2025.05.07.652778; doi: https://doi.org/10.1101/2025.05.07.652778 Share This Article: Copy Citation Tools Extensive Benchmarking of Community Detection Algorithms R Sapna , Harikeshav Karthik , Karthik Raman bioRxiv 2025.05.07.652778; doi: https://doi.org/10.1101/2025.05.07.652778 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Systems Biology Subject Areas All Articles Animal Behavior and Cognition (7633) Biochemistry (17686) Bioengineering (13891) Bioinformatics (41932) Biophysics (21450) Cancer Biology (18587) Cell Biology (25498) Clinical Trials (138) Developmental Biology (13375) Ecology (19898) Epidemiology (2067) Evolutionary Biology (24311) Genetics (15608) Genomics (22502) Immunology (17736) Microbiology (40391) Molecular Biology (17177) Neuroscience (88589) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4823) Physiology (7641) Plant Biology (15150) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9823) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-06-17T06:32:23.968882+00:00