GiulioRossetti · NouamaneA · Apr 20, 2026 · Copilot · Apr 21, 2026 · Copilot
diff --git a/cdlib/algorithms/internal/BIGCLAM.py b/cdlib/algorithms/internal/BIGCLAM.py
@@ -57,35 +57,73 @@ def gradient(F, A, i):
     grad = sum_neigh - sum_nneigh
     return grad
 
+def gradient_fast(F, A, i):
+    r"""Fast implementation of the gradient function, considering
+    equation 4 of https://cs.stanford.edu/people/jure/pubs/bigclam-wsdm13.pdf
 
-def train(A, C, iterations=100):
+    .. math::
+
+        \nabla l(F_u) =
+        \sum_{v \in N(u)} F_v \left(1 + \frac{e^{-F_u^T F_v}}{1-e^{-F_u^T F_v}}\right) 
+        - \sum_v F_v + F_u
+
+    """
+    _, C = F.shape
+    neighbours = np.where(A[i])[0]
+
+    grad = np.zeros((C,))
+    for nb in neighbours:
+        dotproduct = F[nb].dot(F[i])
+        grad += F[nb] * (1 + sigm(dotproduct))
+    grad -= np.sum(F, axis=0)
+    grad += F[i]
+    return grad
+
+def get_embeddings(A, C, iterations=100, learning_rate=0.005, naive=False):
     # initialize an F
     N = A.shape[0]
     F = np.random.rand(N, C)
 
     for n in range(iterations):
         for person in range(N):
-            grad = gradient(F, A, person)
+            if naive:
+                grad = gradient(F, A, person)
+            else:
+                grad = gradient_fast(F, A, person)
 
-            F[person] += 0.005 * grad
+            F[person] += learning_rate * grad
 
-            F[person] = np.maximum(0.001, F[person])  # F should be nonnegative
-        log_likelihood(F, A)
+            F[person] = np.maximum(0.00001, F[person])  # F should be nonnegative
+        # log_likelihood(F, A)
     return F
 
-
-def big_Clam(graph, number_communities):
-    adj = nx.to_numpy_matrix(graph)
-    F = train(adj, number_communities)
-    F_argmax = np.argmax(F, 1)
-    dict_communities = {}
-    for i in range(0, number_communities):
-        dict_communities[i] = []
-    for node, com in zip(graph.nodes(), F_argmax):
-        dict_communities[com].append(node)
+def get_communities(F, graph, number_communities, method='argmax'):
+    if method == 'argmax':
+        F_argmax = np.argmax(F, 1)
+        dict_communities = {com: [] for com in range(number_communities)}
+        for node, com in zip(graph.nodes(), F_argmax.tolist()):
+            dict_communities[com].append(node)
+    elif method == 'threshold':
+        n, m = graph.number_of_nodes(), graph.number_of_edges()
+        epsilon = 2 * m / (n * (n - 1))
-        n, m = graph.number_of_nodes(), graph.number_of_edges()
-        epsilon = 2 * m / (n * (n - 1))
+        n = graph.number_of_nodes()
+        if n < 2:
+            epsilon = 0.0
+        else:
+            epsilon = nx.density(graph)
+        epsilon = min(max(epsilon, 0.0), np.nextafter(1.0, 0.0))
-        n, m = graph.number_of_nodes(), graph.number_of_edges()
-        epsilon = 2 * m / (n * (n - 1))
+        n = graph.number_of_nodes()
+        if n < 2:
+            epsilon = 0.0
+        else:
+            epsilon = nx.density(graph)
+        epsilon = min(max(epsilon, 0.0), np.nextafter(1.0, 0.0))
+        delta = np.sqrt(-np.log(1 - epsilon))
+        memberships = np.where(F >= delta, 1, 0)
+        # in this case, a node can belong to multiple communities
+        dict_communities = {com: [] for com in range(number_communities)}
+        for node, membership in zip(graph.nodes(), memberships):
+            for com in np.nonzero(membership)[0].tolist():
+                dict_communities[com].append(node)
+    else:
+        raise ValueError("Method not supported")
-        raise ValueError("Method not supported")
+        raise ValueError(
+            f"Method '{method}' not supported. Allowed values are: 'argmax', 'threshold'."
+        )
-        raise ValueError("Method not supported")
+        raise ValueError(
+            f"Method '{method}' not supported. Allowed values are: 'argmax', 'threshold'."
+        )
 
     list_communities = []
     for com in dict_communities:
         list_communities.append(dict_communities[com])
-    list_communities = []
-    for com in dict_communities:
-        list_communities.append(dict_communities[com])
+    list_communities = [members for members in dict_communities.values() if members]
-    list_communities = []
-    for com in dict_communities:
-        list_communities.append(dict_communities[com])
+    list_communities = [members for members in dict_communities.values() if members]
 
     return list_communities
+
+def big_clam_communities(graph, number_communities, iterations=100, learning_rate=0.005, naive=False, affiliation_method='argmax'):
+    adj = nx.to_numpy_array(graph, weight=None)
+    F = get_embeddings(adj, number_communities, iterations=iterations, learning_rate=learning_rate, naive=naive)
+
+    return get_communities(F, graph, number_communities, method=affiliation_method)
diff --git a/cdlib/algorithms/overlapping_partition.py b/cdlib/algorithms/overlapping_partition.py
@@ -8,6 +8,7 @@
 from cdlib import NodeClustering
 from cdlib.random import get_seed
 from cdlib.utils import suppress_stdout, convert_graph_formats, nx_node_integer_mapping
+from cdlib.algorithms.internal.BIGCLAM import big_clam_communities
 from cdlib.algorithms.internal.CONGO import Congo_
 from cdlib.algorithms.internal.CONGA import Conga_
 from cdlib.algorithms.internal.LAIS2_nx import LAIS2
@@ -95,7 +96,7 @@
     "lemon",
     "slpa",
     "multicom",
-    # "big_clam",
+    "big_clam",
     # "danmf",
     # "egonet_splitter",
     # "nnsed",
@@ -875,72 +876,70 @@ def multicom(g_original: object, seed_node: object) -> NodeClustering:
     )
 
 
-# def big_clam(
-#     g_original: object,
-#     dimensions: int = 8,
-#     iterations: int = 50,
-#     learning_rate: float = 0.005,
-# ) -> NodeClustering:
-#     """
-#     BigClam is an overlapping community detection method that scales to large networks.
-#     The procedure uses gradient ascent to create an embedding which is used for deciding the node-cluster affiliations.
-#
-#
-#     **Supported Graph Types**
-#
-#     ========== ======== ========
-#     Undirected Directed Weighted
-#     ========== ======== ========
-#     Yes        No       No
-#     ========== ======== ========
-#
-#     :param g_original: a networkx/igraph object
-#     :param dimensions: Number of embedding dimensions. Default 8.
-#     :param iterations: Number of training iterations. Default 50.
-#     :param learning_rate: Gradient ascent learning rate. Default is 0.005.
-#     :return: NodeClustering object
-#
-#
-#     :Example:
-#
-#     >>> from cdlib import algorithms
-#     >>> import networkx as nx
-#     >>> G = nx.karate_club_graph()
-#     >>> coms = algorithms.big_clam(G)
-#
-#     :References:
-#
-#     Yang, Jaewon, and Jure Leskovec. "Overlapping community detection at scale: a nonnegative matrix factorization approach." Proceedings of the sixth ACM international conference on Web search and data mining. 2013.
-#
-#     .. note:: Reference implementation: https://karateclub.readthedocs.io/
-#     """
-#     __try_load_karate()
-#     g = convert_graph_formats(g_original, nx.Graph)
-#
-#     model = karateclub.BigClam(
-#         dimensions=dimensions, iterations=iterations, learning_rate=learning_rate
-#     )
-#     model.fit(g)
-#     members = model.get_memberships()
-#
-#     # Reshaping the results
-#     coms_to_node = defaultdict(list)
-#     for n, c in members.items():
-#         coms_to_node[c].append(n)
-#
-#     coms = [list(c) for c in coms_to_node.values()]
-#
-#     return NodeClustering(
-#         coms,
-#         g_original,
-#         "BigClam",
-#         method_parameters={
-#             "dimensions": dimensions,
-#             "iterations": iterations,
-#             "learning_rate": learning_rate,
-#         },
-#         overlap=True,
-#     )
+def big_clam(
+    g_original: object,
+    dimensions: int = 8,
+    iterations: int = 50,
+    learning_rate: float = 0.005,
+    naive: bool = False,
+    affiliation_method: str = "argmax",
+) -> NodeClustering:
+    """
+    BigClam is an overlapping community detection method that scales to large networks.
+    The procedure uses gradient ascent to create an embedding which is used for deciding the node-cluster affiliations.
+
+
+    **Supported Graph Types**
+
+    ========== ======== ========
+    Undirected Directed Weighted
+    ========== ======== ========
+    Yes        No       No
+    ========== ======== ========
+
+    :param g_original: a networkx/igraph object
+    :param dimensions: Number of embedding dimensions. Default 8.
+    :param iterations: Number of training iterations. Default 50.
+    :param learning_rate: Gradient ascent learning rate. Default is 0.005.
+    :param naive: If False, the method uses a more efficient implementation for the gradient ascent step. Default is False.
+    :param affiliation_method: Method for deciding node-cluster affiliations. "argmax" assigns each node to the cluster with the highest affiliation score, while "threshold" assigns nodes to all clusters for which their affiliation score is above a certain threshold that is computed based on the graph structure (cf. Yang and Leskovec, 2013). In the latter case, communities can overlap. Default is "argmax".
+    :return: NodeClustering object
+
+
+    :Example:
+
+    >>> from cdlib import algorithms
+    >>> import networkx as nx
+    >>> G = nx.karate_club_graph()
+    >>> coms = algorithms.big_clam(G)
+
+    :References:
+
+    Yang, Jaewon, and Jure Leskovec. "Overlapping community detection at scale: a nonnegative matrix factorization approach." Proceedings of the sixth ACM international conference on Web search and data mining. 2013.
+    """
+
+    coms = big_clam_communities(
+        g_original,
+        number_communities=dimensions,
+        iterations=iterations,
+        learning_rate=learning_rate,
+        naive=naive,
+        affiliation_method=affiliation_method,
+    )
+
+    return NodeClustering(
+        coms,
+        g_original,
+        "BigClam",
+        method_parameters={
+            "dimensions": dimensions,
+            "iterations": iterations,
+            "learning_rate": learning_rate,
+            "naive": naive,
+            "affiliation_method": affiliation_method,
+        },
+        overlap=True,
-        overlap=True,
+        overlap=(affiliation_method == "threshold"),
-        overlap=True,
+        overlap=(affiliation_method == "threshold"),
+    )
 
 
 # def danmf(

diff --git a/cdlib/test/test_community_discovery_models.py b/cdlib/test/test_community_discovery_models.py
@@ -365,15 +365,15 @@ def test_markov_clustering(self):
             if len(communities.communities[0]) > 0:
                 self.assertEqual(type(communities.communities[0][0]), int)
 
-    # def test_bigClam(self):
-    #     if karateclub is None:
-    #         return
-    #     g = nx.karate_club_graph()
-    #     coms = algorithms.big_clam(g)
-    #     self.assertEqual(type(coms.communities), list)
-    #     if len(coms.communities) > 0:
-    #         self.assertEqual(type(coms.communities[0]), list)
-    #         self.assertEqual(type(coms.communities[0][0]), int)
+    def test_bigClam(self):
-    def test_bigClam(self):
+    def test_big_clam(self):
-    def test_bigClam(self):
+    def test_big_clam(self):
+        g = nx.karate_club_graph()
+        coms = algorithms.big_clam(g)
+        self.assertEqual(type(coms.communities), list)
+        if len(coms.communities) > 0:
+            for com in coms.communities:
+                self.assertEqual(type(com), list)
+                if len(com) > 0:
+                    self.assertEqual(type(com[0]), int)
 
-
+
+        threshold_coms = algorithms.big_clam(
+            g, naive=True, affiliation_method="threshold"
+        )
+        self.assertEqual(type(threshold_coms.communities), list)
+        if len(threshold_coms.communities) > 0:
+            for com in threshold_coms.communities:
+                self.assertEqual(type(com), list)
+                if len(com) > 0:
+                    self.assertEqual(type(com[0]), int)
+
+        with self.assertRaises(ValueError):
+            algorithms.big_clam(g, affiliation_method="unsupported")
-
+
+        threshold_coms = algorithms.big_clam(
+            g, naive=True, affiliation_method="threshold"
+        )
+        self.assertEqual(type(threshold_coms.communities), list)
+        if len(threshold_coms.communities) > 0:
+            for com in threshold_coms.communities:
+                self.assertEqual(type(com), list)
+                if len(com) > 0:
+                    self.assertEqual(type(com[0]), int)
+
+        with self.assertRaises(ValueError):
+            algorithms.big_clam(g, affiliation_method="unsupported")
     def test_lemon(self):
         g = get_string_graph()