Spaces:

Gladiaio
/

Audio-Transcription

Runtime error

App Files Files Community

jilijeanlouis commited on Feb 15, 2023

Commit

ce955af

•

1 Parent(s): b4c7401

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -25

app.py CHANGED Viewed

@@ -3,61 +3,147 @@ import requests
 import gradio as gr
 from languages import LANGUAGES
-GLADIA_API_KEY = os.environ.get('GLADIA_API_KEY')
 headers = {
-    'accept': 'application/json',
-    'x-gladia-key': GLADIA_API_KEY,
 }
 ACCEPTED_LANGUAGE_BEHAVIOUR = [
-    'manual',
-    'automatic single language',
-    'automatic multiple languages',
 ]
-def greet(audio, language_behaviour, language: str):
     files = {
-        'audio': ("colors.wav", open(audio, 'rb'), 'audio/wav'),
-        'language': (None, language),
-        'language_behaviour': (None, language_behaviour),
     }
     response = requests.post(
-        'https://api.gladia.io/audio/text/audio-transcription/',
         headers=headers,
-        files=files
     )
     if response.status_code != 200:
         print(response.content, response.status_code)
-        return "Sorry, an error occured with you request :/"
     output = response.json()["prediction_raw"]
     del output["metadata"]["original_mediainfo"]
     return output
 iface = gr.Interface(
-    fn=greet,
     inputs=[
-        gr.Audio(source="upload", type="filepath"),
         gr.Dropdown(
-            label="Language transcription behaviour",
             choices=ACCEPTED_LANGUAGE_BEHAVIOUR,
-            value=ACCEPTED_LANGUAGE_BEHAVIOUR[1],
-            type="value",
         ),
         gr.Dropdown(
-            choices = sorted([language_name for language_name in LANGUAGES.keys()]),
             label="Language (only if language behaviour is set to manual)",
-            value="english",
-            type="value",
         ),
     ],
-    outputs="json"
 )
 iface.launch()

 import gradio as gr
 from languages import LANGUAGES
+from time import time
+GLADIA_API_KEY = os.environ.get("GLADIA_API_KEY")
 headers = {
+    "accept": "application/json",
+    "x-gladia-key": GLADIA_API_KEY,
 }
 ACCEPTED_LANGUAGE_BEHAVIOUR = [
+    "manual",
+    "automatic single language",
+    "automatic multiple languages",
 ]
+def transcribe(
+    audio_url: str = None,
+    audio: str = None,
+    video: str = None,
+    language_behaviour: str = ACCEPTED_LANGUAGE_BEHAVIOUR[2],
+    language: str = "english",
+) -> dict:
+    """
+    This function transcribes audio to text using the Gladia API.
+    It sends a request to the API with the given audio file or audio URL, and returns the transcribed text.
+    Find your api key at gladia.io
+    Parameters:
+    audio_url (str): The URL of the audio file to transcribe. If audio_url is provided, audio file will be ignored.
+    audio (str): The path to the audio file to transcribe.
+    video (str): The path to the video file. If provided, the audio field will be set to the content of this video.
+    language_behaviour (str): Determines how language detection should be performed.
+        Must be one of [
+            "manual",
+            "automatic single language",
+            "automatic multiple languages"
+            ]
+        If "manual", the language field must be provided and the API will transcribe the audio in the given language.
+        If "automatic single language", the language of the audio will be automatically detected by the API
+        but will force the transcription to be in a single language.
+        If "automatic multiple languages", the language of the audio will be automatically detected by the API for
+        each sentence allowing code-switching over 97 languages.
+    language (str): The language of the audio file. This field is ignored if language_behaviour is set to "automatic*".
+    Returns:
+    dict: A dictionary containing the transcribed text and other metadata about the transcription process. If an error occurs, the function returns a string with an error message.
+    """
+    # if video file is there then send the audio field as the content of the video
     files = {
+        "language_behaviour": (None, language_behaviour),
     }
+    # priority given to the video
+    if video:
+        audio = video
+    # priority given to the audio or video
+    if audio:
+        files["audio"] = (audio, open(audio, "rb"), "audio/wav")
+    else:
+        files["audio_url"] = ((None, audio_url),)
+    # if language is manual then send the language field
+    # if it's there for language_behaviour == automatic*
+    # it will ignored anyways
+    if language_behaviour == "manual":
+        files["language"] = (None, language)
+    start_transfer = time()
     response = requests.post(
+        "https://api.gladia.io/audio/text/audio-transcription/",
         headers=headers,
+        files=files,
     )
+    end_transfer = time()
     if response.status_code != 200:
         print(response.content, response.status_code)
+        return "Sorry, an error occured with your request :/"
+    # we have 2 outputs:
+    # prediction and prediction_raw
+    # prediction_raw has more details about the processing
+    # and other debugging detailed element you might be
+    # interested in
     output = response.json()["prediction_raw"]
+    output["metadata"]["client_total_execution_time"] = end_transfer - start_transfer
+    output["metadata"]["data_transfer_time"] = output["metadata"]["client_total_execution_time"] -output["metadata"]["total_transcription_time"]
+    output["metadata"]["api_server_transcription_time"] = output["metadata"]["total_transcription_time"]
     del output["metadata"]["original_mediainfo"]
     return output
 iface = gr.Interface(
+    title="Gladia.io fast audio transcription",
+    description="""Gladia.io Whisper large-v2 fast audio transcription API
+    is able to perform fast audio transcription for any audio / video or url format.<br/><br/>
+    However it's prefered for faster performance to provide <br/>
+    wav 16KHz with 16b encoding (pcm_u16be) to avoid further the conversion time.<br/>
+    "automatic single language" language discovery behavior may also<br/>
+    slow down (just a little bit - talking about ms) the process.
+    <br/>
+    Here is a benchmark ran on multiple Speech-To-Text providers
+    ![Benchmarks](https://storage.gra.cloud.ovh.net/v1/AUTH_90df0bdc74f749ce86783e6550b1e4aa/public-files/benchmark.png)<br/>
+    Join our [Slack](https://gladia-io.slack.com) to discuss with us.<br/><br/>
+    Get your own API key on [Gladia.io](https://gladia.io/) during free alpha
+    """,
+    fn=transcribe,
     inputs=[
+        gr.Textbox(
+            lines=1,
+            label="Audio/Video url to transcribe",
+        ),
+        gr.Audio(label="or Audio file to transcribe", source="upload", type="filepath"),
+        gr.Video(label="or Video file to transcribe", source="upload", type="filepath"),
         gr.Dropdown(
+            label="""Language transcription behaviour:\n
+        If "manual", the language field must be provided and the API will transcribe the audio in the given language.
+        If "automatic single language", the language of the audio will be automatically detected by the API
+        but will force the transcription to be in a single language.
+        If "automatic multiple languages", the language of the audio will be automatically detected by the API for
+        each sentence allowing code-switching over 97 languages.
+            """,
             choices=ACCEPTED_LANGUAGE_BEHAVIOUR,
+            value=ACCEPTED_LANGUAGE_BEHAVIOUR[1]
         ),
         gr.Dropdown(
+            choices=sorted([language_name for language_name in LANGUAGES.keys()]),
             label="Language (only if language behaviour is set to manual)",
+            value="english"
         ),
     ],
+    outputs="json",
 )
+iface.queue()
 iface.launch()