Spaces:

camparchimedes
/

nb

Sleeping

App Files Files

nb / app.py

camparchimedes

Update app.py

32e6e2c verified about 1 month ago

raw

history blame

6.92 kB

	### -----------------------------------------------------------------------
	### Transkriber version_1.00
	### app.py
	### -----------------------------------------------------------------------

	# -------------------------------------------------------------------------
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# -------------------------------------------------------------------------


	import os
	import re
	import uuid
	import time
	import psutil
	import subprocess
	from tqdm import tqdm
	import tempfile
	from fpdf import FPDF
	from pathlib import Path
	import numpy as np
	import torch
	from transformers import pipeline
	from gpuinfo import GPUInfo
	from pydub import AudioSegment
	from IPython.display import Audio
	import gradio as gr
	import huggingface_hub


	###############################################################################
	# # Configuration \| @version 1.05?
	# You are an intelligent assistant specializing in interviews with business clients
	# for in-depth content creation, etc..()
	###############################################################################

	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

	###############################################################################
	# Function to detect leading silence
	###############################################################################

	def milliseconds_until_sound(sound, silence_threshold_in_decibels=-20.0, chunk_size=10):
	trim_ms = 0
	assert chunk_size > 0
	while sound[trim_ms:trim_ms + chunk_size].dBFS < silence_threshold_in_decibels and trim_ms < len(sound):
	trim_ms += chunk_size
	return trim_ms

	###############################################################################
	# Trim the start of the audio file
	###############################################################################

	def trim_start(filepath):
	path = Path(filepath)
	directory = path.parent
	filename = path.name
	audio = AudioSegment.from_file(filepath, format="wav")
	start_trim = milliseconds_until_sound(audio)
	trimmed = audio[start_trim:]
	new_filename = directory / f"trimmed_{filename}"
	trimmed.export(new_filename, format="wav")
	return trimmed, new_filename

	###############################################################################
	# -- segment the audio into smaller parts (1-minute segments for large files)
	###############################################################################

	def segment_audio(trimmed_audio, output_dir_trimmed):
	one_minute = 1 * 60 * 1000 # 1 minute in milliseconds
	start_time = 0
	i = 0

	# -- iterate through trimmed audio, segment it
	segmented_files = []
	while start_time < len(trimmed_audio):
	segment = trimmed_audio[start_time:start_time + one_minute]

	# -- filename for each segment
	file_name = f"trimmed_{i:02d}.wav"

	# --export each segment, save to the Hugging Face hub directly
	file_path = file_name
	segment.export(file_path, format="wav")


	segmented_files.append(file_path)
	start_time += one_minute
	i += 1

	return segmented_files


	###############################################################################
	# Transcription logic
	###############################################################################

	def transcribe(file_upload, progress=gr.Progress(track_tqdm=True)):
	file = file_upload
	start_time = time.time()

	# -- trim auio, segment it for processing
	trimmed_audio, trimmed_filename = trim_start(file)
	segmented_files = segment_audio(trimmed_audio, "trimmed_audio")


	pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, device=device)

	transcriptions = [pipe(seg_file)["text"] for seg_file in segmented_files]
	text = ''.join(transcriptions)

	end_time = time.time()
	output_time = end_time - start_time

	# --Word count
	word_count = len(text.split())

	# --CPU metric
	cpu_usage = psutil.cpu_percent(interval=1)

	# --system info string
	system_info = f"""
	Processing time: {output_time:.2f} seconds.
	Number of words: {word_count}
	CPU Usage: {cpu_usage}%
	"""


	return text, system_info


	###############################################################################
	# Interface
	###############################################################################

	HEADER_INFO = """
	# This space uses the Norwegian NB-Whisper Large model by NbAiLab to transcribe long-form microphone or audio inputs in Norwegian of arbitrary length.
	""".strip()

	css = """
	#transcription_output textarea {
	background-color: #000000; /* black */
	color: #00FF00 !important; /* text color */
	font-size: 18px; /* font size */
	}

	#system_info_box textarea {
	background-color: #ffe0b3; /* orange */
	color: black !important; /* text color */
	font-size: 16px; /* font size */
	font-weight: bold; /* bold font */
	}
	"""

	iface = gr.Blocks(css=css)

	with iface:

	gr.Markdown(HEADER_INFO)

	with gr.Row():
	upload = gr.Audio(label="Upload audio", sources="upload", type="filepath")
	transcribe_btn = gr.Button("Transkriber")

	with gr.Row():
	with gr.Column(scale=3):
	text_output = gr.Textbox(label="Transkribert Tekst", placeholder="t r a n s c r i p t i o", elem_id="transcription_output")
	with gr.Column(scale=1):
	system_info = gr.Textbox(label="Antall sekunder, ord, system data:", elem_id="system_info_box")

	with gr.Row():
	gr.Markdown('''
	<div style="text-align:center;">
	<a href="https://opensource.com/resources/what-open-source" style="display: inline-block;">
	<img src="https://badgen.net/badge/Open%20Source%20%3F/Yes%21/blue?icon=github" alt="Open Source? Yes!" style="vertical-align: middle;">
	</a>
	<span style="display:inline-block; width: 20px;"></span> <!-- This adds space between the logos -->
	<a href="https://opensource.org/licenses/Apache-2.0" style="display: inline-block;">
	<img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License: Apache 2.0" style="vertical-align: middle;">
	</a>
	</div>
	''')


	transcribe_btn.click(
	fn=transcribe,
	inputs=[upload],
	outputs=[text_output, system_info]
	)

	iface.launch(debug=True)