import gradio as gr import os from transformers import pipeline import numpy as np from model import SAMPLING_RATE token = os.getenv("HF_TOKEN") modelo = "mixed-data" # modelo = "cry-detector" pipe = pipeline("audio-classification", model=f"A-POR-LOS-8000/distilhubert-finetuned-{modelo}", use_auth_token=token) def transcribe(audio): _, y = audio y = y.astype(np.float32) # con torch.float32 da error y /= np.max(np.abs(y)) results = pipe({"sampling_rate": SAMPLING_RATE, "raw": y}) top_result = results[0] # Get the top result (most likely classification) label = top_result["label"] # Extract the label from the top result return label demo = gr.Interface( transcribe, gr.Audio( min_length=1.0, max_length=10.0, format="wav", ), "text", ) demo.launch()