Spaces:

khizon
/

emotion-classifier-demo

Runtime error

App Files Files Community

emotion-classifier-demo / download_dataset.py

khizon

initial commit

c3e5c63 over 2 years ago

raw

history blame

No virus

2.04 kB

	import pandas as pd
	import numpy as np

	import os
	import gdown

	from pathlib import Path
	from tqdm import tqdm
	from sklearn.model_selection import train_test_split

	import torchaudio

	if __name__ == '__main__':

	if not os.path.exists(os.path.join('data')):
	os.makedirs(os.path.join('data'))

	os.system('gdown https://drive.google.com/uc?id=1_IAWexEWpH-ly_JaA5EGfZDp-_3flkN1')
	os.system('unzip -q aesdd.zip -d data/')
	os.rename(os.path.join('data', 'Acted Emotional Speech Dynamic Database'),
	os.path.join('data', 'aesdd'))

	data = []
	# Load the annotations file
	for path in tqdm(Path("data/aesdd").glob("*/.wav")):
	name = str(path).split("/")[-1]
	label = str(path).split('/')[-2]
	path = os.path.join("data", "aesdd", label, name)
	print(path)

	try:
	# There are some broken files
	s = torchaudio.load(path)
	print(s)
	data.append({
	"name": name,
	"path": path,
	"emotion": label
	})
	except Exception as e:
	# print(str(path), e)
	pass



	df = pd.DataFrame(data)
	print(df.head())

	# Filter broken and non-existed paths

	print(f"Step 0: {len(df)}")

	df["status"] = df["path"].apply(lambda path: True if os.path.exists(path) else None)
	df = df.dropna(subset=["path"])
	df = df.drop("status", 1)
	print(f"Step 1: {len(df)}")

	df = df.sample(frac=1)
	df = df.reset_index(drop=True)

	# Train test split
	save_path = "data"

	train_df, test_df = train_test_split(df, test_size=0.2, random_state=101, stratify=df["emotion"])

	train_df = train_df.reset_index(drop=True)
	test_df = test_df.reset_index(drop=True)

	train_df.to_csv(f"{save_path}/train.csv", sep="\t", encoding="utf-8", index=False)
	test_df.to_csv(f"{save_path}/test.csv", sep="\t", encoding="utf-8", index=False)


	print(train_df.shape)
	print(test_df.shape)