Spaces:
Running
Running
import numpy as np | |
import json | |
from sklearn.decomposition import PCA | |
import joblib | |
# File paths | |
VOICES_JSON_PATH = "voices.json" | |
ANNOTATIONS_JSON_PATH = "annotations.json" | |
PCA_MODEL_PATH = "pca_model.pkl" | |
VECTOR_DIMENSION = 256 # Adjust based on your actual vector size | |
N_COMPONENTS = 6 # Number of PCA components for annotated features | |
def load_json(file_path): | |
"""Load a JSON file.""" | |
try: | |
with open(file_path, "r") as f: | |
return json.load(f) | |
except FileNotFoundError: | |
print(f"Error: {file_path} not found.") | |
return {} | |
except json.JSONDecodeError: | |
print(f"Error: {file_path} is not valid JSON.") | |
return {} | |
def extract_annotated_vectors(): | |
""" | |
Load annotations and match annotated features with style vectors. | |
Returns: | |
np.ndarray: Style vectors (256-dim). | |
np.ndarray: Annotated features (n_components-dim). | |
""" | |
# Load data | |
voices_data = load_json(VOICES_JSON_PATH) | |
annotations = load_json(ANNOTATIONS_JSON_PATH) | |
style_vectors = [] | |
annotated_features = [] | |
# Extract annotated features and match style vectors | |
for item in annotations: | |
# Extract the key for the style vector | |
audio_path = item.get("audio", "") | |
key = audio_path.split("/")[-1].split("-")[-1].replace(".wav", "") | |
# Skip if the style vector is missing | |
if key not in voices_data: | |
print(f"Warning: No style vector found for key '{key}'. Skipping.") | |
continue | |
# Get the style vector and ensure it's flattened to 1D | |
style_vector = np.array(voices_data[key], dtype=np.float32).squeeze() | |
if style_vector.ndim != 1: | |
print(f"Skipping vector with unexpected dimensions: {style_vector.shape}") | |
continue | |
# Extract annotated features (pacing, gender, tone, enunciation, style) | |
features = [ | |
item["gender"][0]["rating"], | |
item["tone"][0]["rating"], | |
item["pacing"][0]["rating"], | |
item["enunciation"][0]["rating"], | |
item["quality"][0]["rating"], | |
item["style"][0]["rating"], | |
] | |
# Append data | |
style_vectors.append(style_vector) | |
annotated_features.append(features) | |
if not style_vectors or not annotated_features: | |
print("Error: No valid style vectors or annotations found.") | |
return None, None | |
return np.array(style_vectors), np.array(annotated_features) | |
def train_and_save_pca_model(): | |
""" | |
Train the PCA model using annotated style vectors and save the model. | |
""" | |
# Extract style vectors and annotated features | |
style_vectors, annotated_features = extract_annotated_vectors() | |
if style_vectors is None or annotated_features is None: | |
print("Error: Unable to extract annotated data.") | |
return | |
# Validate shape of style_vectors | |
print(f"Style vectors shape: {style_vectors.shape}") # Should be (n_samples, 256) | |
print( | |
f"Annotated features shape: {annotated_features.shape}" | |
) # Should be (n_samples, 5) | |
# Train PCA on style vectors | |
print(f"Training PCA on {len(style_vectors)} style vectors...") | |
pca = PCA(n_components=N_COMPONENTS) | |
pca.fit(style_vectors) | |
# Save PCA model | |
joblib.dump(pca, PCA_MODEL_PATH) | |
print(f"PCA model saved to {PCA_MODEL_PATH}.") | |
# Optionally save annotated features for downstream tasks | |
np.save("annotated_features.npy", annotated_features) | |
print("Annotated features saved to 'annotated_features.npy'.") | |
def load_pca_model(): | |
"""Load the trained PCA model.""" | |
try: | |
return joblib.load(PCA_MODEL_PATH) | |
except FileNotFoundError: | |
print(f"Error: {PCA_MODEL_PATH} not found.") | |
return None | |
def reduce_to_pca_components(style_vector, pca): | |
""" | |
Reduce a 256-dimensional style vector to PCA space. | |
Args: | |
style_vector (np.ndarray): Original style vector (256-dim). | |
pca (PCA): Trained PCA model. | |
Returns: | |
np.ndarray: Reduced vector in PCA space (n_components-dim). | |
""" | |
return pca.transform([style_vector])[0] | |
def reconstruct_from_pca_components(pca_vector, pca): | |
""" | |
Reconstruct the original style vector from PCA space. | |
Args: | |
pca_vector (np.ndarray): Vector in PCA space (n_components-dim). | |
pca (PCA): Trained PCA model. | |
Returns: | |
np.ndarray: Reconstructed style vector (256-dim). | |
""" | |
return pca.inverse_transform([pca_vector])[0] | |
if __name__ == "__main__": | |
train_and_save_pca_model() | |