import gradio as gr from transformers import pipeline import numpy as np from huggingface_hub import login from model import SAMPLING_RATE, clasificador, monitor # modelo = monitor modelo = clasificador pipe = pipeline("audio-classification", model=f"A-POR-LOS-8000/distilhubert-finetuned-cry-detector", device="cuda") token = os.getenv('HF_ACCESS_TOKEN') login(token, add_to_git_credential=True) def transcribe(audio): _, y = audio y = y.astype(np.float32) # con torch.float32 da error y /= np.max(np.abs(y)) results = pipe({"sampling_rate": SAMPLING_RATE, "raw": y}) top_result = results[0] # Get the top result (most likely classification) label = top_result["label"] # Extract the label from the top result return label demo = gr.Interface( transcribe, gr.Audio( min_length=1.0, max_length=10.0, format="wav", ), "text", ) demo.launch()