Falln87 commited on
Commit
892b053
·
verified ·
1 Parent(s): 00b7f2c

Created application file

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
+ from huggingface_hub import inference_api
5
+ import speech_recognition as sr
6
+ import pyttsx3
7
+ import diffusers
8
+
9
+ # Set up speech recognition and synthesis
10
+ r = sr.Recognizer()
11
+ engine = pyttsx3.init()
12
+
13
+ # Set up the Hugging Face Hub model and tokenizer
14
+ model_name = "distilbert-base-uncased-finetuned-sst-2-english"
15
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
17
+
18
+ # Set up the Serverless Inference API
19
+ inference_api_token = "YOUR_INFERENCES_API_TOKEN"
20
+ inference_api = inference_api.InferenceApi(token=inference_api_token)
21
+
22
+ # Set up the Diffusers library
23
+ diffusers_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
+ diffusers_model = diffusers.DDPMPipeline.from_pretrained("stabilityai/stable-diffusion-2")
25
+
26
+ def recognize_speech():
27
+ with sr.Microphone() as source:
28
+ print("Say something!")
29
+ audio = r.listen(source)
30
+ try:
31
+ text = r.recognize_google(audio, language="en-US")
32
+ return text
33
+ except sr.UnknownValueError:
34
+ print("Sorry, I didn't catch that. Try again!")
35
+ return None
36
+
37
+ def respond_to_text(text):
38
+ inputs = tokenizer.encode_plus(
39
+ text,
40
+ add_special_tokens=True,
41
+ max_length=512,
42
+ return_attention_mask=True,
43
+ return_tensors='pt'
44
+ )
45
+ outputs = inference_api.predict(model_name, inputs)
46
+ logits = outputs.logits
47
+ _, predicted = torch.max(logits, dim=1)
48
+ response = tokenizer.decode(predicted[0], skip_special_tokens=True)
49
+ return response
50
+
51
+ def generate_image(prompt):
52
+ image = diffusers_model(prompt, num_inference_steps=50, device=diffusers_device)
53
+ return image
54
+
55
+ def speak_text(text):
56
+ engine.say(text)
57
+ engine.runAndWait()
58
+
59
+ st.title("Chat with LLM and Generate Images")
60
+
61
+ chat_input = st.text_input("Type or speak something:")
62
+ if chat_input:
63
+ response = respond_to_text(chat_input)
64
+ st.write("LLM Response:", response)
65
+ speak_text(response)
66
+
67
+ generate_image_button = st.button("Generate Image")
68
+ if generate_image_button:
69
+ prompt = st.text_input("Enter a prompt for the image:")
70
+ image = generate_image(prompt)
71
+ st.image(image, use_column_width=True)
72
+
73
+ mic_button = st.button("Speak")
74
+ if mic_button:
75
+ text = recognize_speech()
76
+ if text:
77
+ response = respond_to_text(text)
78
+ st.write("LLM Response:", response)
79
+ speak_text(response)