camanalo1 commited on
Commit
67a004a
1 Parent(s): 797a570

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -65
app.py CHANGED
@@ -1,70 +1,17 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
- import os
5
- from threading import Thread
6
- import spaces
7
 
8
- token = os.environ["HF_TOKEN"]
 
9
 
10
- # Load the LLM model and tokenizer
11
- model = AutoModelForCausalLM.from_pretrained(
12
- "microsoft/Phi-3-mini-4k-instruct", token=token, trust_remote_code=True
13
- )
14
- tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", token=token)
15
- terminators = [tok.eos_token_id]
16
 
17
- # Set device
18
- if torch.cuda.is_available():
19
- device = torch.device("cuda")
20
- print(f"Using GPU: {torch.cuda.get_device_name(device)}")
21
- else:
22
- device = torch.device("cpu")
23
- print("Using CPU")
24
 
25
- model = model.to(device)
26
-
27
-
28
- @spaces.GPU(duration=60)
29
- def chat(message):
30
- # Initialize chat history with the user's message
31
- chat = [{"role": "user", "content": message}]
32
- # Convert chat history to a format suitable for the model
33
- messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
34
- # Tokenize the messages and move them to the appropriate device
35
- model_inputs = tok([messages], return_tensors="pt").to(device)
36
- # Initialize a TextIteratorStreamer for dynamic generation
37
- streamer = TextIteratorStreamer(
38
- tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True
39
- )
40
- # Set generation parameters
41
- generate_kwargs = {
42
- **model_inputs,
43
- "streamer": streamer,
44
- "max_new_tokens": 256, # You can adjust this value if needed
45
- "do_sample": True,
46
- "temperature": 0.9,
47
- "eos_token_id": terminators,
48
- }
49
- # Start model generation in a separate thread
50
- t = Thread(target=model.generate, kwargs=generate_kwargs)
51
- t.start()
52
-
53
- partial_text = ""
54
- # Yield partially generated text until generation is complete
55
- for new_text in streamer:
56
- partial_text += new_text
57
- yield partial_text
58
-
59
- # Yield the final generated text
60
- yield partial_text
61
-
62
-
63
- demo = gr.Interface(
64
- fn=chat,
65
- inputs="text",
66
- outputs=gr.Textbox(lines=5, label="Generated Text"),
67
- title="Chat With LLMs",
68
- description="Now Running [microsoft/Phi-3-mini-4k-instruct](https://huggingface.com/microsoft/Phi-3-mini-4k-instruct)",
69
- )
70
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline
 
 
 
 
3
 
4
+ # Initialize the text generation pipeline
5
+ generator = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
6
 
7
+ # Define the function to generate text based on input prompt
8
+ def generate_text(prompt):
9
+ # Generate text based on the input prompt
10
+ generated_text = generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
11
+ return generated_text
 
12
 
13
+ # Create Gradio interface
14
+ prompt_input = gr.Textbox(lines=5, label="Input Prompt")
15
+ output_text = gr.Textbox(label="Generated Text")
 
 
 
 
16
 
17
+ gr.Interface(generate_text, prompt_input, output_text, title="Conversational AI", description="Engage in conversation with our AI.").launch()