From b3909722ced22b0bebe611016abcb12781b35d1c Mon Sep 17 00:00:00 2001 From: Lord Date: Tue, 28 May 2024 02:25:05 +0330 Subject: [PATCH] phase 2 labels --- Logic/core/clustering/clustering_utils.py | 13 ------------- Logic/core/clustering/main.py | 3 +-- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/Logic/core/clustering/clustering_utils.py b/Logic/core/clustering/clustering_utils.py index 02205a1..5b4af4d 100644 --- a/Logic/core/clustering/clustering_utils.py +++ b/Logic/core/clustering/clustering_utils.py @@ -288,8 +288,6 @@ def plot_kmeans_cluster_scores(self, embeddings: List, true_labels: List, k_valu # and visualize it. silhouette_scores.append(cm.silhouette_score(embeddings, cluster_assignments)) # count labels in each cluster - cluster_assignments = self.fix_labels(cluster_assignments, k, true_labels) - purity_scores.append(cm.purity_score(true_labels, cluster_assignments)) # Plotting the scores @@ -312,17 +310,6 @@ def plot_kmeans_cluster_scores(self, embeddings: List, true_labels: List, k_valu run = wandb.init(project=project_name, name=run_name) wandb.log({"Cluster Scores": wandb.Image(fig)}) - def fix_labels(self, cluster_assignments, k, true_labels): - counters = [] - for i in range(k): - counters.append(Counter()) - for i in range(len(cluster_assignments)): - counters[cluster_assignments[i]].update([true_labels[i]]) - ci_to_label = {} - for i in range(k): - ci_to_label[i] = counters[i].most_common(1)[0][0] - cluster_assignments = [ci_to_label[i] for i in cluster_assignments] - return cluster_assignments def visualize_elbow_method_wcss(self, embeddings: List, k_values: List[int], project_name: str, run_name: str): """ This function implements the elbow method to determine the optimal number of clusters for K-means clustering based on the Within-Cluster Sum of Squares (WCSS). diff --git a/Logic/core/clustering/main.py b/Logic/core/clustering/main.py index b6a74e3..f16aa58 100644 --- a/Logic/core/clustering/main.py +++ b/Logic/core/clustering/main.py @@ -73,6 +73,5 @@ cm = ClusteringMetrics() for k in range(2, 20, 4): centeroids,cluster_assignments = cu.cluster_kmeans(X, k) - label = cu.fix_labels(cluster_assignments, k, y) print( - f"{k}: ari: {cm.adjusted_rand_score(y, cluster_assignments)} , purity: {cm.purity_score(y, label)} , silhouette: {cm.silhouette_score(X, cluster_assignments)}") + f"{k}: ari: {cm.adjusted_rand_score(y, cluster_assignments)} , purity: {cm.purity_score(y, cluster_assignments)} , silhouette: {cm.silhouette_score(X, cluster_assignments)}")