from transformers import pipeline from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v2") import json with open("tasks.json", "r",encoding="utf-8") as json_file: global data data = json.load(json_file) def find_index(sentence): global data for key, value in data.items(): for i,j in value.items(): for s in j: if sentence == s: return i for x,item in data.items(): texts = [] for key,value in item.items(): for each in value: print(find_index(each)) texts.append(each) globals()[f"faiss_{x}"] = FAISS.from_texts(texts,HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",model_kwargs={'device':'cpu'})) def transcribe_the_command(audio_path,state): transcript = pipe(audio_path)["text"] similar = globals()[f"faiss_{state}"].similarity_search(transcript)[0].page_content print(similar) reply = find_index(similar) return reply import gradio as gr iface = gr.Interface( fn=transcribe_the_command, inputs=[gr.Audio(),gr.Textbox()], outputs="text", title="Whisper Small", description="Realtime demo for intent recognition using a Whisper small model.", ) iface.launch(share="true")