patrol114 commited on
Commit
f4a7d4e
1 Parent(s): 9cbdfb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -25
app.py CHANGED
@@ -1,63 +1,151 @@
1
  import gradio as gr
 
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
8
 
 
 
 
 
 
 
 
9
 
10
  def respond(
 
11
  message,
 
12
  history: list[tuple[str, str]],
 
13
  system_message,
 
14
  max_tokens,
 
15
  temperature,
 
16
  top_p,
 
 
 
17
  ):
 
18
  messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
25
 
26
  messages.append({"role": "user", "content": message})
27
 
28
- response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
  yield response
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  demo = gr.ChatInterface(
 
46
  respond,
 
47
  additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 
 
 
50
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
 
51
  gr.Slider(
 
52
  minimum=0.1,
 
53
  maximum=1.0,
 
54
  value=0.95,
 
55
  step=0.05,
 
56
  label="Top-p (nucleus sampling)",
 
57
  ),
 
 
 
58
  ],
 
 
 
 
 
59
  )
60
 
 
61
 
62
- if __name__ == "__main__":
63
  demo.launch()
 
1
  import gradio as gr
2
+
3
  from huggingface_hub import InferenceClient
4
 
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+
7
+ import torch
8
+
9
+ # Initialize the InferenceClient
10
+
11
+ client = InferenceClient("01-ai/Yi-Coder-9B-Chat")
12
 
13
+ # Initialize tokenizer and model
14
+
15
+ model_path = "01-ai/Yi-Coder-9B-Chat" # Make sure this is correct
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
18
+
19
+ model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto").eval()
20
 
21
  def respond(
22
+
23
  message,
24
+
25
  history: list[tuple[str, str]],
26
+
27
  system_message,
28
+
29
  max_tokens,
30
+
31
  temperature,
32
+
33
  top_p,
34
+
35
+ use_local_model: bool,
36
+
37
  ):
38
+
39
  messages = [{"role": "system", "content": system_message}]
40
 
41
+ for user, assistant in history:
42
+
43
+ if user:
44
+
45
+ messages.append({"role": "user", "content": user})
46
+
47
+ if assistant:
48
+
49
+ messages.append({"role": "assistant", "content": assistant})
50
 
51
  messages.append({"role": "user", "content": message})
52
 
53
+ if use_local_model:
54
+
55
+ # Use local model
56
+
57
+ input_ids = tokenizer.encode("".join([m["content"] for m in messages]), return_tensors="pt")
58
+
59
+ input_ids = input_ids.to(model.device)
60
+
61
+
62
+
63
+ with torch.no_grad():
64
+
65
+ output = model.generate(
66
+
67
+ input_ids,
68
+
69
+ max_new_tokens=max_tokens,
70
+
71
+ temperature=temperature,
72
+
73
+ top_p=top_p,
74
+
75
+ do_sample=True,
76
+
77
+ pad_token_id=tokenizer.eos_token_id,
78
 
79
+ )
80
+
81
+
82
+
83
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
84
 
 
85
  yield response
86
 
87
+ else:
88
+
89
+ # Use Hugging Face Inference API
90
+
91
+ response = ""
92
+
93
+ for message in client.text_generation(
94
+
95
+ "".join([m["content"] for m in messages]),
96
+
97
+ max_new_tokens=max_tokens,
98
+
99
+ stream=True,
100
+
101
+ temperature=temperature,
102
+
103
+ top_p=top_p,
104
+
105
+ ):
106
+
107
+ response += message
108
+
109
+ yield response
110
+
111
+ # Create Gradio interface
112
+
113
  demo = gr.ChatInterface(
114
+
115
  respond,
116
+
117
  additional_inputs=[
118
+
119
+ gr.Textbox(value="Odpowiadasz w Jezyku Polskim jesteś Coder/Developer/Programista tworzysz pełny kod..", label="System message"),
120
+
121
+ gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens"),
122
+
123
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
124
+
125
  gr.Slider(
126
+
127
  minimum=0.1,
128
+
129
  maximum=1.0,
130
+
131
  value=0.95,
132
+
133
  step=0.05,
134
+
135
  label="Top-p (nucleus sampling)",
136
+
137
  ),
138
+
139
+ gr.Checkbox(label="Use Local Model", value=False),
140
+
141
  ],
142
+
143
+ title="Advanced Chat Interface",
144
+
145
+ description="Chat with an AI model using either the Hugging Face Inference API or a local model.",
146
+
147
  )
148
 
149
+ if name == "__main__":
150
 
 
151
  demo.launch()