Merge branch 'felixbur:main' into master

bagustris · Oct 7, 2024 · 3fddca6 · 3fddca6
2 parents 47cf753 + 29b56ae
commit 3fddca6
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 40 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,15 @@
 Changelog
 =========
 
+Version 0.90.2
+--------------
+* added probability output to finetuning classification models
+* switched path to prob. output from "store" to "results"
+
+Version 0.90.1
+--------------
+* Add balancing for finetune and update data README
+
 Version 0.90.0
 --------------
 * augmentation can now be done without target

diff --git a/make_package.sh b/make_package.sh
@@ -1,31 +1,24 @@
 #!/bin/bash
 
-   git add nkululeko/*py
-   for value in augmenting autopredict data feat_extract losses models reporting segmenting utils
-   do
-      git add nkululeko/$value/*.py
-   done
-   for data in aesdd androids androids_orig androids_test ased asvp-esd baved cafe clac cmu-mosei crema-d demos ekorpus emns emodb emofilm EmoFilm emorynlp emov-db emovo emozionalmente enterface esd gerparas iemocap jl jtes laughter-types meld mesd mess mlendsnd msp-improv msp-podcast oreau2 portuguese ravdess savee shemo subesco syntact tess thorsten-emotional urdu vivae
-   do
-      git add data/$data/*.py
-      git add data/$data/*.md
-   done
-   git add README.md
-   git add requirements.txt
-   git add make_package.sh
-   git add test_runs.sh
-   git add data/README.md
-   git add tests/*.ini
-   git add CHANGELOG.md ini_file.md setup.cfg
-if [ "$1" == "--notag" ]; then
-   git commit -m  update
-else
-   source nkululeko/constants.py
-   git commit -m  $VERSION
-   git tag $VERSION
-fi
+git add nkululeko/*py
+for value in augmenting autopredict data feat_extract losses models reporting segmenting utils
+do
+   git add nkululeko/$value/*.py
+done
+for data in aesdd androids androids_orig androids_test ased asvp-esd baved cafe clac cmu-mosei crema-d demos ekorpus emns emodb emofilm EmoFilm emorynlp emov-db emovo emozionalmente enterface esd gerparas iemocap jl jtes laughter-types meld mesd mess mlendsnd msp-improv msp-podcast oreau2 portuguese ravdess savee shemo subesco syntact tess thorsten-emotional urdu vivae
+do
+   git add data/$data/*.py
+   git add data/$data/*.md
+done
+git add README.md
+git add requirements.txt
+git add make_package.sh
+git add test_runs.sh
+git add data/README.md
+git add tests/*.ini
+git add CHANGELOG.md ini_file.md setup.cfg
+source nkululeko/constants.py
+git commit -m $VERSION
+git tag $VERSION
 git push 
-
-if ! [ "$1" == "--notag" ]; then
-   git push --tags
-fi
+git push --tags
diff --git a/nkululeko/constants.py b/nkululeko/constants.py
@@ -1,2 +1,2 @@
-VERSION = "0.90.0"
+VERSION="0.90.2"
 SAMPLING_RATE = 16000
diff --git a/nkululeko/models/model_tuned.py b/nkululeko/models/model_tuned.py
@@ -30,10 +30,16 @@ def __init__(self, df_train, df_test, feats_train, feats_test):
         """Constructor taking the configuration and all dataframes."""
         super().__init__(df_train, df_test, feats_train, feats_test)
         super().set_model_type("finetuned")
+        self.df_test, self.df_train, self.feats_test, self.feats_train = (
+            df_test,
+            df_train,
+            feats_test,
+            feats_train,
+        )
         self.name = "finetuned_wav2vec2"
         self.target = glob_conf.config["DATA"]["target"]
-        labels = glob_conf.labels
-        self.class_num = len(labels)
+        self.labels = glob_conf.labels
+        self.class_num = len(self.labels)
         device = self.util.config_val("MODEL", "device", False)
         if not device:
             self.device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -304,7 +310,7 @@ def train(self):
             else:
                 self.util.error(f"criterion {criterion} not supported for classifier")
         else:
-            self.criterion = self.util.config_val("MODEL", "loss", "ccc")
+            criterion = self.util.config_val("MODEL", "loss", "1-ccc")
             if criterion == "1-ccc":
                 criterion = ConcordanceCorCoeff()
             elif criterion == "mse":
@@ -402,7 +408,7 @@ def compute_loss(
         self.load(self.run, self.epoch)
 
     def get_predictions(self):
-        results = []
+        results = [[]].pop(0)
         for (file, start, end), _ in audeer.progress_bar(
             self.df_test.iterrows(),
             total=len(self.df_test),
@@ -415,18 +421,37 @@ def get_predictions(self):
                     file, duration=end - start, offset=start, always_2d=True
                 )
             assert sr == self.sampling_rate
-            predictions = self.model.predict(signal)
-            results.append(predictions.argmax())
-        return results
+            prediction = self.model.predict(signal)
+            results.append(prediction)
+            # results.append(predictions.argmax())
+        predictions = np.asarray(results)
+        if self.util.exp_is_classification():
+            # make a dataframe for the class probabilities
+            proba_d = {}
+            for c in range(self.class_num):
+                proba_d[c] = []
+            # get the class probabilities
+            # predictions = self.clf.predict_proba(self.feats_test.to_numpy())
+            # pred = self.clf.predict(features)
+            for i in range(self.class_num):
+                proba_d[i] = list(predictions.T[i])
+            probas = pd.DataFrame(proba_d)
+            probas = probas.set_index(self.df_test.index)
+            predictions = probas.idxmax(axis=1).values
+        else:
+            predictions = predictions.flatten()
+            probas = None
+        return predictions, probas
 
     def predict(self):
         """Predict the whole eval feature set"""
-        predictions = self.get_predictions()
+        predictions, probas = self.get_predictions()
         report = Reporter(
             self.df_test[self.target].to_numpy().astype(float),
             predictions,
             self.run,
             self.epoch_num,
+            probas=probas,
         )
         self._plot_epoch_progression(report)
         return report
@@ -438,6 +463,7 @@ def _plot_epoch_progression(self, report):
         )
         with open(log_file, "r") as file:
             data = file.read()
+        data = data.strip().replace("nan", "0")
         list = ast.literal_eval(data)
         epochs, vals, loss = [], [], []
         for index, tp in enumerate(list):

diff --git a/nkululeko/utils/util.py b/nkululeko/utils/util.py
@@ -155,10 +155,10 @@ def get_save_name(self):
         return f"{store}/{self.get_exp_name()}.pkl"
 
     def get_pred_name(self):
-        store = self.get_path("store")
+        results_dir = self.get_path("res_dir")
         target = self.get_target_name()
         pred_name = self.get_model_description()
-        return f"{store}/pred_{target}_{pred_name}.csv"
+        return f"{results_dir}/pred_{target}_{pred_name}.csv"
 
     def is_categorical(self, pd_series):
         """Check if a dataframe column is categorical."""

diff --git a/tests/exp_agedb_os_mlp.ini b/tests/exp_agedb_os_mlp.ini
@@ -23,7 +23,7 @@ layers = {'l1':1024, 'l2':128}
 drop = .4
 loss = 1-ccc
 measure = ccc
-patience = 5
+patience = 10
 [PLOT]
 best_model = True
 epoch_progression = True