Spaces:
Sleeping
Sleeping
import yaml | |
import json | |
from sklearn.model_selection import train_test_split | |
# Load the YAML file | |
with open("data/data.yaml", "r") as f: | |
data = yaml.safe_load(f) | |
# Separate the data by category | |
easy = [item for item in data if item["category"] == "easy"] | |
medium = [item for item in data if item["category"] == "medium"] | |
hard = [item for item in data if item["category"] == "hard"] | |
# Split each category into validation and test sets | |
easy_val, easy_test = train_test_split(easy, test_size=0.5, random_state=42) | |
medium_val, medium_test = train_test_split(medium, test_size=0.5, random_state=42) | |
hard_val, hard_test = train_test_split(hard, test_size=0.5, random_state=42) | |
# Combine the validation and test sets | |
validation = easy_val + medium_val + hard_val | |
test = easy_test + medium_test + hard_test | |
# Write the validation set to a JSON file | |
with open("validation.json", "w") as f: | |
json.dump(validation, f) | |
# Write the test set to a JSON file | |
with open("test.json", "w") as f: | |
json.dump(test, f) | |