Spaces:
Runtime error
Runtime error
File size: 2,044 Bytes
c3e5c63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import pandas as pd
import numpy as np
import os
import gdown
from pathlib import Path
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torchaudio
if __name__ == '__main__':
if not os.path.exists(os.path.join('data')):
os.makedirs(os.path.join('data'))
os.system('gdown https://drive.google.com/uc?id=1_IAWexEWpH-ly_JaA5EGfZDp-_3flkN1')
os.system('unzip -q aesdd.zip -d data/')
os.rename(os.path.join('data', 'Acted Emotional Speech Dynamic Database'),
os.path.join('data', 'aesdd'))
data = []
# Load the annotations file
for path in tqdm(Path("data/aesdd").glob("**/*.wav")):
name = str(path).split("/")[-1]
label = str(path).split('/')[-2]
path = os.path.join("data", "aesdd", label, name)
print(path)
try:
# There are some broken files
s = torchaudio.load(path)
print(s)
data.append({
"name": name,
"path": path,
"emotion": label
})
except Exception as e:
# print(str(path), e)
pass
df = pd.DataFrame(data)
print(df.head())
# Filter broken and non-existed paths
print(f"Step 0: {len(df)}")
df["status"] = df["path"].apply(lambda path: True if os.path.exists(path) else None)
df = df.dropna(subset=["path"])
df = df.drop("status", 1)
print(f"Step 1: {len(df)}")
df = df.sample(frac=1)
df = df.reset_index(drop=True)
# Train test split
save_path = "data"
train_df, test_df = train_test_split(df, test_size=0.2, random_state=101, stratify=df["emotion"])
train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)
train_df.to_csv(f"{save_path}/train.csv", sep="\t", encoding="utf-8", index=False)
test_df.to_csv(f"{save_path}/test.csv", sep="\t", encoding="utf-8", index=False)
print(train_df.shape)
print(test_df.shape) |