Skip to content

Commit

Permalink
phase 2 labels
Browse files Browse the repository at this point in the history
  • Loading branch information
Moeen89 committed May 27, 2024
1 parent 929e84b commit b390972
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 15 deletions.
13 changes: 0 additions & 13 deletions Logic/core/clustering/clustering_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,6 @@ def plot_kmeans_cluster_scores(self, embeddings: List, true_labels: List, k_valu
# and visualize it.
silhouette_scores.append(cm.silhouette_score(embeddings, cluster_assignments))
# count labels in each cluster
cluster_assignments = self.fix_labels(cluster_assignments, k, true_labels)

purity_scores.append(cm.purity_score(true_labels, cluster_assignments))

# Plotting the scores
Expand All @@ -312,17 +310,6 @@ def plot_kmeans_cluster_scores(self, embeddings: List, true_labels: List, k_valu
run = wandb.init(project=project_name, name=run_name)
wandb.log({"Cluster Scores": wandb.Image(fig)})

def fix_labels(self, cluster_assignments, k, true_labels):
counters = []
for i in range(k):
counters.append(Counter())
for i in range(len(cluster_assignments)):
counters[cluster_assignments[i]].update([true_labels[i]])
ci_to_label = {}
for i in range(k):
ci_to_label[i] = counters[i].most_common(1)[0][0]
cluster_assignments = [ci_to_label[i] for i in cluster_assignments]
return cluster_assignments

def visualize_elbow_method_wcss(self, embeddings: List, k_values: List[int], project_name: str, run_name: str):
""" This function implements the elbow method to determine the optimal number of clusters for K-means clustering based on the Within-Cluster Sum of Squares (WCSS).
Expand Down
3 changes: 1 addition & 2 deletions Logic/core/clustering/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,5 @@
cm = ClusteringMetrics()
for k in range(2, 20, 4):
centeroids,cluster_assignments = cu.cluster_kmeans(X, k)
label = cu.fix_labels(cluster_assignments, k, y)
print(
f"{k}: ari: {cm.adjusted_rand_score(y, cluster_assignments)} , purity: {cm.purity_score(y, label)} , silhouette: {cm.silhouette_score(X, cluster_assignments)}")
f"{k}: ari: {cm.adjusted_rand_score(y, cluster_assignments)} , purity: {cm.purity_score(y, cluster_assignments)} , silhouette: {cm.silhouette_score(X, cluster_assignments)}")

0 comments on commit b390972

Please sign in to comment.