import numpy as np import json from sklearn.decomposition import PCA import joblib # File paths VOICES_JSON_PATH = "voices.json" ANNOTATIONS_JSON_PATH = "annotations.json" PCA_MODEL_PATH = "pca_model.pkl" VECTOR_DIMENSION = 256 # Adjust based on your actual vector size N_COMPONENTS = 6 # Number of PCA components for annotated features def load_json(file_path): """Load a JSON file.""" try: with open(file_path, "r") as f: return json.load(f) except FileNotFoundError: print(f"Error: {file_path} not found.") return {} except json.JSONDecodeError: print(f"Error: {file_path} is not valid JSON.") return {} def extract_annotated_vectors(): """ Load annotations and match annotated features with style vectors. Returns: np.ndarray: Style vectors (256-dim). np.ndarray: Annotated features (n_components-dim). """ # Load data voices_data = load_json(VOICES_JSON_PATH) annotations = load_json(ANNOTATIONS_JSON_PATH) style_vectors = [] annotated_features = [] # Extract annotated features and match style vectors for item in annotations: # Extract the key for the style vector audio_path = item.get("audio", "") key = audio_path.split("/")[-1].split("-")[-1].replace(".wav", "") # Skip if the style vector is missing if key not in voices_data: print(f"Warning: No style vector found for key '{key}'. Skipping.") continue # Get the style vector and ensure it's flattened to 1D style_vector = np.array(voices_data[key], dtype=np.float32).squeeze() if style_vector.ndim != 1: print(f"Skipping vector with unexpected dimensions: {style_vector.shape}") continue # Extract annotated features (pacing, gender, tone, enunciation, style) features = [ item["gender"][0]["rating"], item["tone"][0]["rating"], item["pacing"][0]["rating"], item["enunciation"][0]["rating"], item["quality"][0]["rating"], item["style"][0]["rating"], ] # Append data style_vectors.append(style_vector) annotated_features.append(features) if not style_vectors or not annotated_features: print("Error: No valid style vectors or annotations found.") return None, None return np.array(style_vectors), np.array(annotated_features) def train_and_save_pca_model(): """ Train the PCA model using annotated style vectors and save the model. """ # Extract style vectors and annotated features style_vectors, annotated_features = extract_annotated_vectors() if style_vectors is None or annotated_features is None: print("Error: Unable to extract annotated data.") return # Validate shape of style_vectors print(f"Style vectors shape: {style_vectors.shape}") # Should be (n_samples, 256) print( f"Annotated features shape: {annotated_features.shape}" ) # Should be (n_samples, 5) # Train PCA on style vectors print(f"Training PCA on {len(style_vectors)} style vectors...") pca = PCA(n_components=N_COMPONENTS) pca.fit(style_vectors) # Save PCA model joblib.dump(pca, PCA_MODEL_PATH) print(f"PCA model saved to {PCA_MODEL_PATH}.") # Optionally save annotated features for downstream tasks np.save("annotated_features.npy", annotated_features) print("Annotated features saved to 'annotated_features.npy'.") def load_pca_model(): """Load the trained PCA model.""" try: return joblib.load(PCA_MODEL_PATH) except FileNotFoundError: print(f"Error: {PCA_MODEL_PATH} not found.") return None def reduce_to_pca_components(style_vector, pca): """ Reduce a 256-dimensional style vector to PCA space. Args: style_vector (np.ndarray): Original style vector (256-dim). pca (PCA): Trained PCA model. Returns: np.ndarray: Reduced vector in PCA space (n_components-dim). """ return pca.transform([style_vector])[0] def reconstruct_from_pca_components(pca_vector, pca): """ Reconstruct the original style vector from PCA space. Args: pca_vector (np.ndarray): Vector in PCA space (n_components-dim). pca (PCA): Trained PCA model. Returns: np.ndarray: Reconstructed style vector (256-dim). """ return pca.inverse_transform([pca_vector])[0] if __name__ == "__main__": train_and_save_pca_model()