Chatbot-Share / app.py
s3nh's picture
Update app.py
ef520df verified
import gradio as gr
import spaces
import soundfile as sf
import torch
from datetime import datetime
import random
import time
from datetime import datetime
import whisper
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, VitsModel
import torch
import numpy as np
import os
from timeit import default_timer as timer
import torch
import numpy as np
import pandas as pd
import whisper
DESCRIPTION = """\
# Ai Trek - Generative AI usage
This Space demonstrates LAIONBOT functionalities,
🔎 Large Language Models is a model notable for its ability to achieve general-purpose language generation and understanding.
🔨 On this demo, we can play with it not only by using text, but also asking questions and getting answers by Text to speech model.
"""
def load_whisper():
return whisper.load_model("medium", device = 'cpu')
def load_tts():
tts_model = VitsModel.from_pretrained("facebook/mms-tts-pol")
#tts_model.to("cuda")
tokenizer_tss = AutoTokenizer.from_pretrained("facebook/mms-tts-pol")
return tts_model, tokenizer_tss
def save_to_txt(text_to_save):
with open('prompt.txt', 'w', encoding='utf-8') as f:
f.write(text_to_save)
def read_txt():
with open('prompt.txt') as f:
lines = f.readlines()
return lines
def _load_model_tokenizer():
model_id = 'tangger/Qwen-7B-Chat'
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto",trust_remote_code=True, fp16=True).eval()
return model, tokenizer
whisper_model = load_whisper()
if torch.cuda.is_available():
whisper_model = whisper_model.to(device='cuda')
#whisper_model = load_whisper()
tts_model, tokenizer_tss = load_tts()
model, tokenizer = _load_model_tokenizer()
def postprocess(self, y):
if y is None:
return []
for i, (message, response) in enumerate(y):
y[i] = (
None if message is None else mdtex2html.convert(message),
None if response is None else mdtex2html.convert(response),
)
return y
def _parse_text(text):
lines = text.split("\n")
lines = [line for line in lines if line != ""]
count = 0
for i, line in enumerate(lines):
if "```" in line:
count += 1
items = line.split("`")
if count % 2 == 1:
lines[i] = f'<pre><code class="language-{items[-1]}">'
else:
lines[i] = f"<br></code></pre>"
else:
if i > 0:
if count % 2 == 1:
line = line.replace("`", r"\`")
line = line.replace("<", "&lt;")
line = line.replace(">", "&gt;")
line = line.replace(" ", "&nbsp;")
line = line.replace("*", "&ast;")
line = line.replace("_", "&lowbar;")
line = line.replace("-", "&#45;")
line = line.replace(".", "&#46;")
line = line.replace("!", "&#33;")
line = line.replace("(", "&#40;")
line = line.replace(")", "&#41;")
line = line.replace("$", "&#36;")
lines[i] = "<br>" + line
text = "".join(lines)
return text
@spaces.GPU
def predict(_query, _chatbot, _task_history):
print(f"User: {_parse_text(_query)}")
_chatbot.append((_parse_text(_query), ""))
full_response = ""
for response in model.chat_stream(tokenizer, _query, history=_task_history,system = "Jesteś asystentem AI. Odpowiadaj grzecznie i w języku polskim :)" ):
_chatbot[-1] = (_parse_text(_query), _parse_text(response))
yield _chatbot
full_response = _parse_text(response)
print(f"History: {_task_history}")
_task_history.append((_query, full_response))
print(f"Qwen-7B-Chat: {_parse_text(full_response)}")
@spaces.GPU
def read_text(text):
print("___Tekst do przeczytania!")
inputs = tokenizer_tss(text, return_tensors="pt")
with torch.no_grad():
output = tts_model(**inputs).waveform.squeeze().cpu().numpy()
sf.write('temp_file.wav', output, tts_model.config.sampling_rate)
return 'temp_file.wav'
def update_audio(text):
return 'temp_file.wav'
def translate(audio):
print("__Sending audio to stt model")
transcription = whisper_model.transcribe(audio, language="pl")
return transcription["text"]
@spaces.GPU(enable_queue=True)
def predict(audio, _chatbot, _task_history):
# Użyj funkcji translate, aby przekształcić audio w tekst
_query = whisper_model.transcribe(audio, language = 'pl')["text"]
print(f"____User: {_parse_text(_query)}")
_chatbot.append((_parse_text(_query), ""))
full_response = ""
for response in model.chat_stream(tokenizer,
_query,
history= _task_history,
system = "You are an AI assistant. Please be kind and answer responsibly."):
_chatbot[-1] = (_parse_text(_query), _parse_text(response))
yield _chatbot
full_response = _parse_text(response)
print(f"____History: {_task_history}")
_task_history.append((_query, full_response))
print(f"__Qwen-7B-Chat: {_parse_text(full_response)}")
print("____full_response",full_response)
audio_file = read_text(_parse_text(full_response)) # Generowanie audio
return full_response
@spaces.GPU(enable_queue=True)
def regenerate(_chatbot, _task_history):
if not _task_history:
yield _chatbot
return
item = _task_history.pop(-1)
_chatbot.pop(-1)
yield from predict(item[0], _chatbot, _task_history)
with gr.Blocks() as demo:
gr.Markdown(DESCRIPTION)
chatbot = gr.Chatbot(label='Llama Voice Chatbot', elem_classes="control-height")
query = gr.Textbox(lines=2, label='Input')
task_history = gr.State([])
audio_output = gr.Audio('ai_intro.wav', label="Generated Audio (wav)", type='filepath', autoplay=False)
# with gr.Row():
# submit_btn = gr.Button("🚀 Send an input file to LLM")
with gr.Row():
audio_upload = gr.Audio(sources="microphone", type="filepath", show_label=False)
submit_audio_btn = gr.Button("🎙️ Send an audio")
#submit_btn.click(predict, [query, chatbot, task_history], [chatbot], show_progress=True)
submit_audio_btn.click(predict, [audio_upload, chatbot, task_history], [chatbot], show_progress=True).then(update_audio, chatbot, audio_output)
demo.queue().launch()