Spaces:
Runtime error
Runtime error
import os | |
from time import time | |
import gradio as gr | |
import requests | |
from languages import LANGUAGES | |
GLADIA_API_KEY = os.environ.get("GLADIA_API_KEY") | |
headers = { | |
"accept": "application/json", | |
"x-gladia-key": GLADIA_API_KEY, | |
} | |
ACCEPTED_LANGUAGE_BEHAVIOUR = [ | |
"manual", | |
"automatic single language", | |
"automatic multiple languages", | |
] | |
def transcribe( | |
audio: str = None, | |
) -> dict: | |
""" | |
This function transcribes audio to text using the Gladia API. | |
It sends a request to the API with the given audio file or audio URL, and returns the transcribed text. | |
Get your api key at gladia.io ! | |
Parameters: | |
audio (str): The path to the audio file to transcribe. | |
Returns: | |
dict: A dictionary containing the transcribed text and other metadata about the transcription process. If an error occurs, the function returns a string with an error message. | |
""" | |
DEFAULT_MANUAL_LANGUAGE = "english" | |
language_behaviour = ACCEPTED_LANGUAGE_BEHAVIOUR[2] | |
# if video file is there then send the audio field as the content of the video | |
# if video file is there then send the audio field as the content of the video | |
files = { | |
"language_behaviour": (None, language_behaviour), | |
"noise_reduction": (None, "false"), | |
'output_format': (None, 'json'), | |
'toggle_diarization': (None, 'true'), | |
} | |
# priority given to the audio or video | |
if audio: | |
files["audio"] = (audio, open(audio, "rb"), "audio/wav") | |
# if language is manual then send the language field | |
# if it's there for language_behaviour == automatic* | |
# it will ignored anyways | |
if language_behaviour == "manual": | |
files["language"] = (None, DEFAULT_MANUAL_LANGUAGE) | |
start_transfer = time() | |
response = requests.post( | |
"https://api.gladia.io/audio/text/audio-transcription/", | |
headers=headers, | |
files=files, | |
) | |
end_transfer = time() | |
if response.status_code != 200: | |
print(response.content, response.status_code) | |
return "Sorry, an error occured with your request :/" | |
# we have 2 outputs: | |
# prediction and prediction_raw | |
# prediction_raw has more details about the processing | |
# and other debugging detailed element you might be | |
# interested in | |
segments = response.json()["prediction"] | |
output = "" | |
current_speaker = "" | |
for segment in segments: | |
if segment["speaker"] != current_speaker and segment["speaker"]!= "unknown": | |
current_speaker = segment["speaker"] | |
output = output + "<br/><br/><b>" + segment["speaker"] + ":</b> " + segment["transcription"] | |
else: | |
output = output + " " + segment["transcription"] | |
return output, response.json()["prediction_raw"] | |
iface = gr.Interface( | |
title="Gladia.io fast audio transcription", | |
description="""Gladia.io Whisper large-v2 fast audio transcription API | |
is able to perform fast audio transcriptions for any audio / video (around 10s per hour) .<br/>For more details and a benchmark ran on multiple Speech-To-Text providers, please visit | |
[our post](https://medium.com/@gladia.io/gladia-alpha-launch-redefining-what-s-possible-with-speech-to-text-ai-686dd4312a86) on Medium. | |
<br/><br/> | |
You are more than welcome to join us on [Slack](https://gladia-io.slack.com) | |
and don't forget to get your own API key on [Gladia.io](https://gladia.io/) during the free alpha ! | |
""", | |
fn=transcribe, | |
inputs=[ | |
gr.Audio(label="Audio file", source="upload", type="filepath"), | |
], | |
outputs=["html", "json"], | |
examples=[ | |
["examples/good.will.hunting.wav"], | |
["examples/wolf.of.wall.street.wav"], | |
], | |
) | |
iface.queue() | |
iface.launch() | |