Yersel commited on
Commit
bdc9656
1 Parent(s): a38cd38

add application

Browse files
Files changed (2) hide show
  1. app.py +85 -4
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,7 +1,88 @@
 
 
 
1
  import gradio as gr
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ from huggingface_hub import login
3
+ import torch
4
  import gradio as gr
5
+ import os
6
+ import spaces
7
 
8
+ token = os.environ.get("HF_TOKEN_READ")
9
+ login(token)
10
 
11
+ model_id = "meta-llama/Llama-3.2-1B-Instruct"
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_id,
14
+ torch_dtype=torch.bfloat16,
15
+ token=token
16
+ )
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
19
+
20
+ if torch.cuda.is_available():
21
+ device = torch.device("cuda")
22
+ print(f"Usando GPU: {torch.cuda.get_device_name(device)}")
23
+ else:
24
+ device = torch.device("cpu")
25
+ print("Usando CPU")
26
+
27
+ model = model.to(device)
28
+
29
+ @spaces.GPU
30
+ def respond(
31
+ message,
32
+ history,
33
+ system_message,
34
+ max_tokens,
35
+ temperature,
36
+ top_p
37
+ ):
38
+ messages = [{"role": "system", "content": system_message}]
39
+
40
+ for val in history:
41
+ if val[0]:
42
+ messages.append({"role": "user", "content": val[0]})
43
+ if val[1]:
44
+ messages.append({"role": "assistant", "content": val[1]})
45
+
46
+ messages.append({"role": "user", "content": message})
47
+
48
+ input_ids = tokenizer.apply_chat_template(
49
+ messages,
50
+ add_generation_prompt=True,
51
+ return_tensors='pt'
52
+ ).to(model.device)
53
+
54
+ terminators = [
55
+ tokenizer.eos_token_id,
56
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
57
+ ]
58
+
59
+ outputs = model.generate(
60
+ input_ids,
61
+ max_new_tokens=max_tokens,
62
+ eos_token_id=terminators,
63
+ do_sample=True,
64
+ temperature=temperature,
65
+ top_p=top_p
66
+ )
67
+
68
+ response = ""
69
+
70
+ for message in tokenizer.decode(
71
+ outputs[0][input_ids.shape[-1]:],
72
+ skip_special_tokens=True
73
+ ):
74
+ response += message
75
+ yield response
76
+
77
+ demo = gr.ChatInterface(
78
+ respond,
79
+ additional_inputs=[
80
+ gr.Textbox(value="Tu eres un asistente amigable", label="System Message"),
81
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
82
+ gr.Slider(minimum=0.1, maximum=3, value=0.7, step=0.1, label="Temperature"),
83
+ gr.Slider(minimum=0.1, maximum=1, value=0.95, step=0.05, label="Top p")
84
+ ]
85
+ )
86
+
87
+ if __name__ == "__main__":
88
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ huggingface_hub
5
+ spaces