Spaces:
Runtime error
Runtime error
File size: 3,208 Bytes
1c82b7f 1210c2f 1c82b7f 09a1240 1210c2f 09a1240 ce955af 09a1240 1c82b7f ce955af 1c82b7f ce955af 1c82b7f ce955af dfb8f1c ce955af 1210c2f ce955af 1c82b7f ce955af 1c82b7f ce955af 1210c2f ce955af 1c82b7f ce955af 1c82b7f ce955af 1c82b7f ce955af 1c82b7f ce955af a463d9d ce955af a463d9d ce955af a463d9d ce955af 09a1240 1c82b7f ce955af 98a3806 dfb8f1c abca8c5 ce955af 1c82b7f 1210c2f 1c82b7f ce955af dfb8f1c 1210c2f dfb8f1c 1c82b7f ce955af 1c82b7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import os
from time import time
import gradio as gr
import requests
from languages import LANGUAGES
GLADIA_API_KEY = os.environ.get("GLADIA_API_KEY")
headers = {
"accept": "application/json",
"x-gladia-key": GLADIA_API_KEY,
}
ACCEPTED_LANGUAGE_BEHAVIOUR = [
"manual",
"automatic single language",
"automatic multiple languages",
]
def transcribe(
audio: str = None,
) -> dict:
"""
This function transcribes audio to text using the Gladia API.
It sends a request to the API with the given audio file or audio URL, and returns the transcribed text.
Get your api key at gladia.io !
Parameters:
audio (str): The path to the audio file to transcribe.
Returns:
dict: A dictionary containing the transcribed text and other metadata about the transcription process. If an error occurs, the function returns a string with an error message.
"""
DEFAULT_MANUAL_LANGUAGE = "english"
language_behaviour = ACCEPTED_LANGUAGE_BEHAVIOUR[2]
# if video file is there then send the audio field as the content of the video
files = {
"language_behaviour": (None, language_behaviour),
}
# priority given to the audio or video
if audio:
files["audio"] = (audio, open(audio, "rb"), "audio/wav")
# if language is manual then send the language field
# if it's there for language_behaviour == automatic*
# it will ignored anyways
if language_behaviour == "manual":
files["language"] = (None, DEFAULT_MANUAL_LANGUAGE)
start_transfer = time()
response = requests.post(
"https://api.gladia.io/audio/text/audio-transcription/",
headers=headers,
files=files,
)
end_transfer = time()
if response.status_code != 200:
print(response.content, response.status_code)
return "Sorry, an error occured with your request :/"
# we have 2 outputs:
# prediction and prediction_raw
# prediction_raw has more details about the processing
# and other debugging detailed element you might be
# interested in
segments = response.json()["prediction"]
output = ""
for segment in segments:
output += " " + transcription
return output
iface = gr.Interface(
title="Gladia.io fast audio transcription",
description="""Gladia.io Whisper large-v2 fast audio transcription API
is able to perform fast audio transcriptions for any audio / video (around 10s per hour) .<br/>For more details and a benchmark ran on multiple Speech-To-Text providers, please visit
[our post](https://medium.com/@gladia.io/gladia-alpha-launch-redefining-what-s-possible-with-speech-to-text-ai-686dd4312a86) on Medium.
<br/><br/>
You are more than welcome to join us on [Slack](https://gladia-io.slack.com)
and don't forget to get your own API key on [Gladia.io](https://gladia.io/) during the free alpha !
""",
fn=transcribe,
inputs=[
gr.Audio(label="Audio file", source="upload", type="filepath"),
],
outputs="json",
examples=[
["examples/good.will.hunting.wav"],
["examples/wolf.of.wall.street.wav"],
],
)
iface.queue()
iface.launch()
|