Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
FBurkhardt committed Jun 26, 2024
1 parent c734eca commit 9e3a4ba
Show file tree
Hide file tree
Showing 10 changed files with 191 additions and 150 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changelog
=========

Version 0.88.2
--------------
* changed combine speaker results to show speakers not samples

Version 0.88.1
--------------
* added obligatory scatter plot for regression
Expand Down
2 changes: 1 addition & 1 deletion nkululeko/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VERSION="0.88.1"
VERSION="0.88.2"
SAMPLING_RATE = 16000
61 changes: 24 additions & 37 deletions nkululeko/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,7 @@ def load_datasets(self):
# print keys/column
dbs = ",".join(list(self.datasets.keys()))
labels = self.util.config_val("DATA", "labels", False)
auto_labels = list(
next(iter(self.datasets.values())).df[self.target].unique())
auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
if labels:
self.labels = ast.literal_eval(labels)
self.util.debug(f"Using target labels (from config): {labels}")
Expand Down Expand Up @@ -158,8 +157,7 @@ def fill_tests(self):
data.split()
data.prepare_labels()
self.df_test = pd.concat(
[self.df_test, self.util.make_segmented_index(
data.df_test)]
[self.df_test, self.util.make_segmented_index(data.df_test)]
)
self.df_test.is_labeled = data.is_labeled
self.df_test.got_gender = self.got_gender
Expand Down Expand Up @@ -260,17 +258,15 @@ def fill_train_and_tests(self):
test_cats = self.df_test[self.target].unique()
else:
# if there is no target, copy a dummy label
self.df_test = self._add_random_target(
self.df_test).astype("str")
self.df_test = self._add_random_target(self.df_test).astype("str")
train_cats = self.df_train[self.target].unique()
# print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
# print(f"train_cats with target {self.target}: {train_cats}")
if self.df_test.is_labeled:
if type(test_cats) == np.ndarray:
self.util.debug(f"Categories test (nd.array): {test_cats}")
else:
self.util.debug(
f"Categories test (list): {list(test_cats)}")
self.util.debug(f"Categories test (list): {list(test_cats)}")
if type(train_cats) == np.ndarray:
self.util.debug(f"Categories train (nd.array): {train_cats}")
else:
Expand All @@ -293,8 +289,7 @@ def fill_train_and_tests(self):

target_factor = self.util.config_val("DATA", "target_divide_by", False)
if target_factor:
self.df_test[self.target] = self.df_test[self.target] / \
float(target_factor)
self.df_test[self.target] = self.df_test[self.target] / float(target_factor)
self.df_train[self.target] = self.df_train[self.target] / float(
target_factor
)
Expand All @@ -317,16 +312,14 @@ def _add_random_target(self, df):
def plot_distribution(self, df_labels):
"""Plot the distribution of samples and speaker per target class and biological sex"""
plot = Plots()
sample_selection = self.util.config_val(
"EXPL", "sample_selection", "all")
sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
plot.plot_distributions(df_labels)
if self.got_speaker:
plot.plot_distributions_speaker(df_labels)

def extract_test_feats(self):
self.feats_test = pd.DataFrame()
feats_name = "_".join(ast.literal_eval(
glob_conf.config["DATA"]["tests"]))
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["tests"]))
feats_types = self.util.config_val_list("FEATS", "type", ["os"])
self.feature_extractor = FeatureExtractor(
self.df_test, feats_types, feats_name, "test"
Expand All @@ -343,8 +336,7 @@ def extract_feats(self):
"""
df_train, df_test = self.df_train, self.df_test
feats_name = "_".join(ast.literal_eval(
glob_conf.config["DATA"]["databases"]))
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
feats_types = self.util.config_val("FEATS", "type", "os")
# Ensure feats_types is always a list of strings
Expand Down Expand Up @@ -385,8 +377,7 @@ def extract_feats(self):
f"test feats ({self.feats_test.shape[0]}) != test labels"
f" ({self.df_test.shape[0]})"
)
self.df_test = self.df_test[self.df_test.index.isin(
self.feats_test.index)]
self.df_test = self.df_test[self.df_test.index.isin(self.feats_test.index)]
self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")

self._check_scale()
Expand All @@ -401,8 +392,7 @@ def augment(self):
"""Augment the selected samples."""
from nkululeko.augmenting.augmenter import Augmenter

sample_selection = self.util.config_val(
"AUGMENT", "sample_selection", "all")
sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
if sample_selection == "all":
df = pd.concat([self.df_train, self.df_test])
elif sample_selection == "train":
Expand Down Expand Up @@ -497,8 +487,7 @@ def random_splice(self):
"""
from nkululeko.augmenting.randomsplicer import Randomsplicer

sample_selection = self.util.config_val(
"AUGMENT", "sample_selection", "all")
sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
if sample_selection == "all":
df = pd.concat([self.df_train, self.df_test])
elif sample_selection == "train":
Expand All @@ -519,8 +508,7 @@ def analyse_features(self, needs_feats):
plot_feats = eval(
self.util.config_val("EXPL", "feature_distributions", "False")
)
sample_selection = self.util.config_val(
"EXPL", "sample_selection", "all")
sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
# get the data labels
if sample_selection == "all":
df_labels = pd.concat([self.df_train, self.df_test])
Expand Down Expand Up @@ -583,8 +571,7 @@ def analyse_features(self, needs_feats):
for scat_target in scat_targets:
if self.util.is_categorical(df_labels[scat_target]):
for scatter in scatters:
plots.scatter_plot(
df_feats, df_labels, scat_target, scatter)
plots.scatter_plot(df_feats, df_labels, scat_target, scatter)
else:
self.util.debug(
f"{self.name}: binning continuous variable to categories"
Expand Down Expand Up @@ -669,15 +656,15 @@ def plot_confmat_per_speaker(self, function):
)
return
best = self.get_best_report(self.reports)
# if not best.is_classification:
# best.continuous_to_categorical()
truths = best.truths
preds = best.preds
if best.is_classification:
truths = best.truths
preds = best.preds
else:
truths = best.truths_cont
preds = best.preds_cont
speakers = self.df_test.speaker.values
print(f"{len(truths)} {len(preds)} {len(speakers) }")
df = pd.DataFrame(
data={"truth": truths, "pred": preds, "speaker": speakers})
plot_name = "result_combined_per_speaker"
df = pd.DataFrame(data={"truths": truths, "preds": preds, "speakers": speakers})
plot_name = f"{self.util.get_exp_name()}_speakercombined_{function}"
self.util.debug(
f"plotting speaker combination ({function}) confusion matrix to"
f" {plot_name}"
Expand All @@ -692,13 +679,13 @@ def print_best_model(self):

def demo(self, file, is_list, outfile):
model = self.runmgr.get_best_model()
labelEncoder = None
lab_enc = None
try:
labelEncoder = self.label_encoder
lab_enc = self.label_encoder
except AttributeError:
pass
demo = Demo_predictor(
model, file, is_list, self.feature_extractor, labelEncoder, outfile
model, file, is_list, self.feature_extractor, lab_enc, outfile
)
demo.run_demo()

Expand Down
Loading

0 comments on commit 9e3a4ba

Please sign in to comment.