StyleTTS2_Studio / pca /generate_pca.py
Wismut's picture
initial commit
0af9841
raw
history blame
4.58 kB
import numpy as np
import json
from sklearn.decomposition import PCA
import joblib
# File paths
VOICES_JSON_PATH = "voices.json"
ANNOTATIONS_JSON_PATH = "annotations.json"
PCA_MODEL_PATH = "pca_model.pkl"
VECTOR_DIMENSION = 256 # Adjust based on your actual vector size
N_COMPONENTS = 6 # Number of PCA components for annotated features
def load_json(file_path):
"""Load a JSON file."""
try:
with open(file_path, "r") as f:
return json.load(f)
except FileNotFoundError:
print(f"Error: {file_path} not found.")
return {}
except json.JSONDecodeError:
print(f"Error: {file_path} is not valid JSON.")
return {}
def extract_annotated_vectors():
"""
Load annotations and match annotated features with style vectors.
Returns:
np.ndarray: Style vectors (256-dim).
np.ndarray: Annotated features (n_components-dim).
"""
# Load data
voices_data = load_json(VOICES_JSON_PATH)
annotations = load_json(ANNOTATIONS_JSON_PATH)
style_vectors = []
annotated_features = []
# Extract annotated features and match style vectors
for item in annotations:
# Extract the key for the style vector
audio_path = item.get("audio", "")
key = audio_path.split("/")[-1].split("-")[-1].replace(".wav", "")
# Skip if the style vector is missing
if key not in voices_data:
print(f"Warning: No style vector found for key '{key}'. Skipping.")
continue
# Get the style vector and ensure it's flattened to 1D
style_vector = np.array(voices_data[key], dtype=np.float32).squeeze()
if style_vector.ndim != 1:
print(f"Skipping vector with unexpected dimensions: {style_vector.shape}")
continue
# Extract annotated features (pacing, gender, tone, enunciation, style)
features = [
item["gender"][0]["rating"],
item["tone"][0]["rating"],
item["pacing"][0]["rating"],
item["enunciation"][0]["rating"],
item["quality"][0]["rating"],
item["style"][0]["rating"],
]
# Append data
style_vectors.append(style_vector)
annotated_features.append(features)
if not style_vectors or not annotated_features:
print("Error: No valid style vectors or annotations found.")
return None, None
return np.array(style_vectors), np.array(annotated_features)
def train_and_save_pca_model():
"""
Train the PCA model using annotated style vectors and save the model.
"""
# Extract style vectors and annotated features
style_vectors, annotated_features = extract_annotated_vectors()
if style_vectors is None or annotated_features is None:
print("Error: Unable to extract annotated data.")
return
# Validate shape of style_vectors
print(f"Style vectors shape: {style_vectors.shape}") # Should be (n_samples, 256)
print(
f"Annotated features shape: {annotated_features.shape}"
) # Should be (n_samples, 5)
# Train PCA on style vectors
print(f"Training PCA on {len(style_vectors)} style vectors...")
pca = PCA(n_components=N_COMPONENTS)
pca.fit(style_vectors)
# Save PCA model
joblib.dump(pca, PCA_MODEL_PATH)
print(f"PCA model saved to {PCA_MODEL_PATH}.")
# Optionally save annotated features for downstream tasks
np.save("annotated_features.npy", annotated_features)
print("Annotated features saved to 'annotated_features.npy'.")
def load_pca_model():
"""Load the trained PCA model."""
try:
return joblib.load(PCA_MODEL_PATH)
except FileNotFoundError:
print(f"Error: {PCA_MODEL_PATH} not found.")
return None
def reduce_to_pca_components(style_vector, pca):
"""
Reduce a 256-dimensional style vector to PCA space.
Args:
style_vector (np.ndarray): Original style vector (256-dim).
pca (PCA): Trained PCA model.
Returns:
np.ndarray: Reduced vector in PCA space (n_components-dim).
"""
return pca.transform([style_vector])[0]
def reconstruct_from_pca_components(pca_vector, pca):
"""
Reconstruct the original style vector from PCA space.
Args:
pca_vector (np.ndarray): Vector in PCA space (n_components-dim).
pca (PCA): Trained PCA model.
Returns:
np.ndarray: Reconstructed style vector (256-dim).
"""
return pca.inverse_transform([pca_vector])[0]
if __name__ == "__main__":
train_and_save_pca_model()