sagar007 commited on
Commit
f1ac63f
1 Parent(s): 34c5cbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -27
app.py CHANGED
@@ -1,31 +1,64 @@
 
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
-
4
- # Load the finetuned model and tokenizer from Hugging Face Model Hub
5
- model_path = "sagar007/phi3.5_finetune"
6
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
7
- model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, device_map="auto")
8
-
9
- # Create a text-generation pipeline
10
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
11
-
12
- def generate_text(prompt, max_length=100, temperature=0.7):
13
- """Generate text based on the input prompt."""
14
- generated = generator(prompt, max_length=max_length, temperature=temperature, num_return_sequences=1)
15
- return generated[0]['generated_text']
16
-
17
- # Create the Gradio interface
18
- iface = gr.Interface(
19
- fn=generate_text,
20
- inputs=[
21
- gr.Textbox(lines=5, label="Enter your prompt"),
22
- gr.Slider(minimum=50, maximum=500, value=100, step=10, label="Max Length"),
23
- gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ],
25
- outputs=gr.Textbox(lines=10, label="Generated Text"),
26
- title="Finetuned Phi-3.5 Text Generation",
27
- description="Enter a prompt and generate text using the finetuned Phi-3.5 model.",
28
  )
29
 
30
- # Launch the app
31
- iface.launch()
 
1
+ import torch
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from peft import PeftModel, PeftConfig
5
+ import spaces
6
+
7
+ # Check if CUDA is available and set the device
8
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+ print(f"Using device: {device}")
10
+
11
+ # Load model and tokenizer
12
+ MODEL_PATH = "sagar007/phi3.5_finetune"
13
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
14
+ tokenizer.pad_token = tokenizer.eos_token
15
+
16
+ base_model = AutoModelForCausalLM.from_pretrained(
17
+ "microsoft/Phi-3.5-mini-instruct",
18
+ torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
19
+ device_map="auto",
20
+ trust_remote_code=True
21
+ )
22
+
23
+ peft_config = PeftConfig.from_pretrained(MODEL_PATH)
24
+ model = PeftModel.from_pretrained(base_model, MODEL_PATH)
25
+ model.to(device)
26
+ model.eval()
27
+
28
+ @spaces.GPU(duration=60)
29
+ def generate_response(instruction, max_length=512):
30
+ prompt = f"Instruction: {instruction}\nResponse:"
31
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
32
+
33
+ with torch.no_grad():
34
+ outputs = model.generate(
35
+ **inputs,
36
+ max_length=max_length,
37
+ num_return_sequences=1,
38
+ temperature=0.7,
39
+ top_p=0.9,
40
+ do_sample=True
41
+ )
42
+
43
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
44
+ return response.split("Response:")[1].strip()
45
+
46
+ def chatbot(message, history):
47
+ response = generate_response(message)
48
+ return response
49
+
50
+ demo = gr.ChatInterface(
51
+ chatbot,
52
+ title="Fine-tuned Phi-3.5 Chatbot",
53
+ description="This is a chatbot using a fine-tuned version of the Phi-2 model.",
54
+ theme="default",
55
+ examples=[
56
+ "Explain the concept of machine learning.",
57
+ "Write a short story about a robot learning to paint.",
58
+ "What are some effective ways to reduce stress?",
59
  ],
60
+ cache_examples=True,
 
 
61
  )
62
 
63
+ if __name__ == "__main__":
64
+ demo.launch()