Spaces:
Runtime error
Runtime error
File size: 3,837 Bytes
1c82b7f 1210c2f 1c82b7f 09a1240 1210c2f 09a1240 ce955af 09a1240 1c82b7f ce955af 1c82b7f ce955af 1c82b7f ce955af dfb8f1c ce955af 1210c2f ce955af ce11ee8 ce955af 1c82b7f ce955af ce11ee8 bcf8a6e 1c82b7f ce955af 1210c2f ce955af 1c82b7f ce955af 1c82b7f ce955af 1c82b7f ce955af 1c82b7f ce955af 5eb3657 a463d9d ce955af a463d9d 5eb3657 a463d9d 5eb3657 0a7c28f 5eb3657 a463d9d ce955af 5c867fd 1c82b7f eae71ec 1c82b7f ce955af 5798954 dfb8f1c abca8c5 ce955af 1c82b7f 1210c2f 1c82b7f 5eb3657 dfb8f1c 1210c2f dfb8f1c 1c82b7f ce955af 1c82b7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import os
from time import time
import gradio as gr
import requests
from languages import LANGUAGES
GLADIA_API_KEY = os.environ.get("GLADIA_API_KEY")
headers = {
"accept": "application/json",
"x-gladia-key": GLADIA_API_KEY,
}
ACCEPTED_LANGUAGE_BEHAVIOUR = [
"manual",
"automatic single language",
"automatic multiple languages",
]
def transcribe(
audio: str = None,
) -> dict:
"""
This function transcribes audio to text using the Gladia API.
It sends a request to the API with the given audio file or audio URL, and returns the transcribed text.
Get your api key at gladia.io !
Parameters:
audio (str): The path to the audio file to transcribe.
Returns:
dict: A dictionary containing the transcribed text and other metadata about the transcription process. If an error occurs, the function returns a string with an error message.
"""
DEFAULT_MANUAL_LANGUAGE = "english"
language_behaviour = ACCEPTED_LANGUAGE_BEHAVIOUR[2]
# if video file is there then send the audio field as the content of the video
# if video file is there then send the audio field as the content of the video
files = {
"language_behaviour": (None, language_behaviour),
"noise_reduction": (None, "false"),
'output_format': (None, 'json'),
'toggle_diarization': (None, 'true'),
'diarization_max_speakers': (None, '2'),
}
# priority given to the audio or video
if audio:
files["audio"] = (audio, open(audio, "rb"), "audio/wav")
# if language is manual then send the language field
# if it's there for language_behaviour == automatic*
# it will ignored anyways
if language_behaviour == "manual":
files["language"] = (None, DEFAULT_MANUAL_LANGUAGE)
start_transfer = time()
response = requests.post(
"https://api.gladia.io/audio/text/audio-transcription/",
headers=headers,
files=files,
)
end_transfer = time()
if response.status_code != 200:
print(response.content, response.status_code)
return "Sorry, an error occured with your request :/"
# we have 2 outputs:
# prediction and prediction_raw
# prediction_raw has more details about the processing
# and other debugging detailed element you might be
# interested in
segments = response.json()["prediction"]
output = ""
current_speaker = ""
for segment in segments:
if segment["speaker"] != current_speaker and segment["speaker"]!= "unknown":
current_speaker = segment["speaker"]
output = output + "<br/><br/><b> Speaker:" + str(segment["speaker"]) + ":</b> " + segment["transcription"]
else:
output = output + " " + segment["transcription"]
return output, response.json()["prediction_raw"]
iface = gr.Interface(
title="Gladia.io fast audio transcription",
description="""Gladia.io Whisper large-v2 fast audio transcription API
is able to perform fast audio transcriptions for any audio / video (less than a minute per hour) .<br/>For more details and a benchmark ran on multiple Speech-To-Text providers, please visit
[our post](https://medium.com/@gladia.io/gladia-alpha-launch-redefining-what-s-possible-with-speech-to-text-ai-686dd4312a86) on Medium.
<br/><br/>
You are more than welcome to join us on [Slack](https://gladia-io.slack.com)
and don't forget to get your own API key on [Gladia.io](https://gladia.io/) during the free alpha !
""",
fn=transcribe,
inputs=[
gr.Audio(label="Audio file", source="upload", type="filepath"),
],
outputs=["html", "json"],
examples=[
["examples/good.will.hunting.wav"],
["examples/wolf.of.wall.street.wav"],
],
)
iface.queue()
iface.launch()
|