From b3909722ced22b0bebe611016abcb12781b35d1c Mon Sep 17 00:00:00 2001
From: Lord <moeen.samadi@outlook.com>
Date: Tue, 28 May 2024 02:25:05 +0330
Subject: [PATCH] phase 2 labels

---
 Logic/core/clustering/clustering_utils.py | 13 -------------
 Logic/core/clustering/main.py             |  3 +--
 2 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/Logic/core/clustering/clustering_utils.py b/Logic/core/clustering/clustering_utils.py
index 02205a1..5b4af4d 100644
--- a/Logic/core/clustering/clustering_utils.py
+++ b/Logic/core/clustering/clustering_utils.py
@@ -288,8 +288,6 @@ def plot_kmeans_cluster_scores(self, embeddings: List, true_labels: List, k_valu
             # and visualize it.
             silhouette_scores.append(cm.silhouette_score(embeddings, cluster_assignments))
             # count labels in each cluster
-            cluster_assignments = self.fix_labels(cluster_assignments, k, true_labels)
-
             purity_scores.append(cm.purity_score(true_labels, cluster_assignments))
 
         # Plotting the scores
@@ -312,17 +310,6 @@ def plot_kmeans_cluster_scores(self, embeddings: List, true_labels: List, k_valu
             run = wandb.init(project=project_name, name=run_name)
             wandb.log({"Cluster Scores": wandb.Image(fig)})
 
-    def fix_labels(self, cluster_assignments, k, true_labels):
-        counters = []
-        for i in range(k):
-            counters.append(Counter())
-        for i in range(len(cluster_assignments)):
-            counters[cluster_assignments[i]].update([true_labels[i]])
-        ci_to_label = {}
-        for i in range(k):
-            ci_to_label[i] = counters[i].most_common(1)[0][0]
-        cluster_assignments = [ci_to_label[i] for i in cluster_assignments]
-        return cluster_assignments
 
     def visualize_elbow_method_wcss(self, embeddings: List, k_values: List[int], project_name: str, run_name: str):
         """ This function implements the elbow method to determine the optimal number of clusters for K-means clustering based on the Within-Cluster Sum of Squares (WCSS).
diff --git a/Logic/core/clustering/main.py b/Logic/core/clustering/main.py
index b6a74e3..f16aa58 100644
--- a/Logic/core/clustering/main.py
+++ b/Logic/core/clustering/main.py
@@ -73,6 +73,5 @@
     cm = ClusteringMetrics()
     for k in range(2, 20, 4):
         centeroids,cluster_assignments = cu.cluster_kmeans(X, k)
-        label = cu.fix_labels(cluster_assignments, k, y)
         print(
-            f"{k}:  ari: {cm.adjusted_rand_score(y, cluster_assignments)} , purity: {cm.purity_score(y, label)} , silhouette: {cm.silhouette_score(X, cluster_assignments)}")
+            f"{k}:  ari: {cm.adjusted_rand_score(y, cluster_assignments)} , purity: {cm.purity_score(y, cluster_assignments)} , silhouette: {cm.silhouette_score(X, cluster_assignments)}")