Skip to content

Commit

Permalink
Add TurEV DB
Browse files Browse the repository at this point in the history
  • Loading branch information
bagustris committed May 20, 2024
1 parent 2432ddb commit 7427a63
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 5 deletions.
11 changes: 6 additions & 5 deletions data/ravdess/exp_ravdess_os_xgb.ini
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
[EXP]
root = ./
name = results/exp_ravdess
root = ./results
name = exp_ravdess_3
runs = 1
epochs = 1
save = True
[DATA]
databases = ['train', 'test', 'dev']
databases = ['train', 'dev']
train = ./data/ravdess/ravdess_train.csv
train.type = csv
train.absolute_path = False
train.split_strategy = train
dev = ./data/ravdess/ravdess_dev.csv
dev.type = csv
dev.absolute_path = False
dev.split_strategy = train
dev.split_strategy = test
test = ./data/ravdess/ravdess_test.csv
test.type = csv
test.absolute_path = False
Expand All @@ -24,4 +24,5 @@ labels = ['angry', 'happy', 'neutral', 'sad']
type = ['os']
scale = standard
[MODEL]
type = xgb
type = xgb
save = True
9 changes: 9 additions & 0 deletions data/turev/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Nkululeko pre-processing for TurEV-DB

```bash
git clone https://github.com/Xeonen/TurEV-DB.git
python3 process_database.py
cd ../..
python3 -m nkululeko.resample --config data/turev/exp.ini
python3 -m nkululeko.nkululeko --config data/turev/exp.ini
```
30 changes: 30 additions & 0 deletions data/turev/exp.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[EXP]
root = ./results/
name = exp_turev_os_knn
[DATA]
databases = ['train', 'dev', 'test']
train = ./data/turev/turev_train.csv
train.type = csv
train.absolute_path = False
train.split_strategy = train
dev = ./data/turev/turev_dev.csv
dev.type = csv
dev.absolute_path = False
dev.split_strategy = train
test = ./data/turev/turev_test.csv
test.type = csv
test.absolute_path = False
test.split_strategy = test
target = emotion
; labels = ['anger', 'neutral', 'fear']
; get the number of classes from the target column automatically
[FEATS]
type = ['os']
; type = ['hubert-xlarge-ll60k']
; no_reuse = False
; scale = standard
[MODEL]
type = knn
; save = True
[RESAMPLE]
replace = True
52 changes: 52 additions & 0 deletions data/turev/process_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# process_database.py for TurEV-DB

import argparse
from pathlib import Path
from nkululeko.utils.files import find_files
import pandas as pd


def process_database(data_dir, output_dir):
# check if data_dir exist
if not Path(data_dir).exists():
raise ValueError(f"data_dir {data_dir} does not exist")

# check if output_dir exist, create if not
if not Path(output_dir).exists():
Path(output_dir).mkdir(parents=True)

sound_source_dir = Path(data_dir) / "Sound Source"
wavs = find_files(sound_source_dir, ext=["wav"], relative=True)
print(f"Found {len(wavs)} files in {data_dir}")

# read basename of each wav file, convert to dataframe
basenames = [Path(wav) for wav in wavs]
df = pd.DataFrame(basenames, columns=["file"])

# read emotion from parent dir of basename
df["emotion"] = [Path(wav).parent.stem.lower() for wav in wavs]

# read speaker from basename first string before _
df["speaker"] = [Path(wav).stem.split("_")[0] for wav in wavs]

# split into train, val, test
# there are six speakers, use one speaker "6783","1358" as val, test
df_test = df[df["speaker"] == "6783"]
df_dev = df[df["speaker"] == "1358"]

# use the rest as train
df_train = df[df["speaker"]!= "6783"]
df_train = df_train[df_train["speaker"]!= "1358"]

# save all splits to csv
for split in ["train", "dev", "test"]:
df_split = eval(f"df_{split}")
df_split.to_csv(Path(output_dir) / f"turev_{split}.csv", index=False)
print(f"Saved {split} to {output_dir}/turev_{split}.csv with shape {df_split.shape}")

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=str, default="./TurEV-DB", help="Path to data directory")
parser.add_argument("--output_dir", type=str, default=".", help="Path to output directory")
args = parser.parse_args()
process_database(args.data_dir, args.output_dir)

0 comments on commit 7427a63

Please sign in to comment.