diff --git a/CHANGELOG.md b/CHANGELOG.md index ff7a8686..5c85d176 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ Changelog ========= +Version 0.81.1 +-------------- +* fixed bugs in demo module +* made kernel for SVM/SVR configurable + Version 0.81.0 -------------- * added test selection to test module diff --git a/ini_file.md b/ini_file.md index faee813e..14129279 100644 --- a/ini_file.md +++ b/ini_file.md @@ -252,8 +252,11 @@ * **tree_reg**: Classification tree regressor * **svm**: Support Vector Machine * C_val = 0.001 + * kernel = rbf # ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ * **xgb**:XG-Boost * **svr**: Support Vector Regression + * C_val = 0.001 + * kernel = rbf # ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ * **xgr**: XG-Boost Regression * **mlp**: Multi-Layer-Perceptron for classification * **mlp_reg**: Multi-Layer-Perceptron for regression diff --git a/nkululeko/constants.py b/nkululeko/constants.py index b4c223c2..60596c4c 100644 --- a/nkululeko/constants.py +++ b/nkululeko/constants.py @@ -1,2 +1,2 @@ -VERSION="0.81.0" +VERSION="0.81.1" SAMPLING_RATE = 16000 diff --git a/nkululeko/demo_predictor.py b/nkululeko/demo_predictor.py index 1e0417bc..37c3b69a 100644 --- a/nkululeko/demo_predictor.py +++ b/nkululeko/demo_predictor.py @@ -94,7 +94,7 @@ def predict_signal(self, signal, sr): return result_dict else: # experiment is regression and returns one estimation - dict_2["predicted"] = result_dict[0] + dict_2["predicted"] = result_dict print(dict_2) return dict_2 diff --git a/nkululeko/models/model.py b/nkululeko/models/model.py index 1a005c71..5c4bf50c 100644 --- a/nkululeko/models/model.py +++ b/nkululeko/models/model.py @@ -269,19 +269,24 @@ def predict(self): ) return report + def get_type(self): + return "generic" + def predict_sample(self, features): """Predict one sample""" prediction = {} if self.util.exp_is_classification(): # get the class probabilities - predictions = self.clf.predict_proba([features]) + if not self.get_type() == "xgb": + features = [features] + predictions = self.clf.predict_proba(features) # pred = self.clf.predict(features) for i in range(len(self.clf.classes_)): cat = self.clf.classes_[i] prediction[cat] = predictions[0][i] else: predictions = self.clf.predict(features) - prediction["result"] = predictions[0] + prediction = predictions[0] return prediction def store(self): diff --git a/nkululeko/models/model_mlp_regression.py b/nkululeko/models/model_mlp_regression.py index 6a0f412a..4c06f71a 100644 --- a/nkululeko/models/model_mlp_regression.py +++ b/nkululeko/models/model_mlp_regression.py @@ -247,4 +247,4 @@ def predict_sample(self, features): features = np.reshape(features, (-1, 1)).T logits = self.model(features.to(self.device)).reshape(-1) a = logits.numpy() - return a + return a[0] diff --git a/nkululeko/models/model_svm.py b/nkululeko/models/model_svm.py index 7c9479f6..b299992a 100644 --- a/nkululeko/models/model_svm.py +++ b/nkululeko/models/model_svm.py @@ -12,8 +12,17 @@ class SVM_model(Model): def __init__(self, df_train, df_test, feats_train, feats_test): super().__init__(df_train, df_test, feats_train, feats_test) c = float(self.util.config_val("MODEL", "C_val", "0.001")) + if eval(self.util.config_val("MODEL", "class_weight", "False")): + class_weight = "balanced" + else: + class_weight = None + kernel = self.util.config_val("MODEL", "kernel", "rbf") self.clf = svm.SVC( - kernel="linear", C=c, gamma="scale", probability=True + kernel=kernel, + C=c, + gamma="scale", + probability=True, + class_weight=class_weight, ) # set up the classifier def set_C(self, c): diff --git a/nkululeko/models/model_svr.py b/nkululeko/models/model_svr.py index 08d2e600..71dd950a 100644 --- a/nkululeko/models/model_svr.py +++ b/nkululeko/models/model_svr.py @@ -12,7 +12,9 @@ class SVR_model(Model): def __init__(self, df_train, df_test, feats_train, feats_test): super().__init__(df_train, df_test, feats_train, feats_test) c = float(self.util.config_val("MODEL", "C_val", "0.001")) - self.clf = svm.SVR(kernel="rbf", C=c, probability=True) # set up the classifier + # kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’ + kernel = self.util.config_val("MODEL", "kernel", "rbf") + self.clf = svm.SVR(kernel=kernel, C=c) # set up the classifier def set_C(self, c): """Set the C parameter""" diff --git a/nkululeko/models/model_xgb.py b/nkululeko/models/model_xgb.py index 7e955561..b5a78469 100644 --- a/nkululeko/models/model_xgb.py +++ b/nkululeko/models/model_xgb.py @@ -10,3 +10,6 @@ class XGB_model(Model): is_classifier = True clf = XGBClassifier() # set up the classifier + + def get_type(self): + return "xgb" diff --git a/nkululeko/utils/util.py b/nkululeko/utils/util.py index a88797f2..5400f594 100644 --- a/nkululeko/utils/util.py +++ b/nkululeko/utils/util.py @@ -175,10 +175,6 @@ def get_model_description(self): mt = f'{self.config["MODEL"]["type"]}' ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"])) ft += "_" - set = self.config_val("FEATS", "set", False) - set_string = "" - if set: - set_string += set layer_string = "" layer_s = self.config_val("MODEL", "layers", False) if layer_s: @@ -186,16 +182,19 @@ def get_model_description(self): sorted_layers = sorted(layers.items(), key=lambda x: x[1]) for l in sorted_layers: layer_string += f"{str(l[1])}-" - return_string = f"{mt}_{ft}{set_string}{layer_string[:-1]}" + return_string = f"{mt}_{ft}{layer_string[:-1]}" options = [ ["MODEL", "C_val"], + ["MODEL", "kernel"], ["MODEL", "drop"], + ["MODEL", "class_weight"], ["MODEL", "loss"], ["MODEL", "logo"], ["MODEL", "learning_rate"], ["MODEL", "k_fold_cross"], ["FEATS", "balancing"], ["FEATS", "scale"], + ["FEATS", "set"], ["FEATS", "wav2vec2.layer"], ] for option in options: