wiklif commited on
Commit
4de15a4
·
1 Parent(s): 783b16d

added cuda support

Browse files
Files changed (2) hide show
  1. app.py +14 -10
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,15 +1,19 @@
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
 
 
 
 
3
 
4
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
5
 
6
  def format_prompt(message, history):
7
- prompt = "<s>"
8
- for user_prompt, bot_response in history:
9
- prompt += f"[INST] {user_prompt} [/INST]"
10
- prompt += f" {bot_response}</s> "
11
- prompt += f"[INST] {message} [/INST]"
12
- return prompt
13
 
14
  def generate(
15
  prompt, history, temperature=0, max_new_tokens=3500, top_p=0.95, repetition_penalty=1.0,
@@ -26,6 +30,7 @@ def generate(
26
  repetition_penalty=repetition_penalty,
27
  do_sample=True,
28
  seed=42,
 
29
  )
30
 
31
  formatted_prompt = format_prompt(prompt, history)
@@ -38,15 +43,14 @@ def generate(
38
  yield output
39
  return output
40
 
41
-
42
  mychatbot = gr.Chatbot(
43
- avatar_images=["./user.png", "./botm.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
44
 
45
  demo = gr.ChatInterface(fn=generate,
46
  chatbot=mychatbot,
47
- title="Tomoniai's Mixtral 8x7b Chat",
48
  retry_btn=None,
49
  undo_btn=None
50
  )
51
 
52
- demo.queue().launch(show_api=True)
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
+ import torch
4
+
5
+ # Sprawdź, czy CUDA jest dostępne
6
+ device = "cuda" if torch.cuda.is_available() else "cpu"
7
 
8
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
9
 
10
  def format_prompt(message, history):
11
+ prompt = "<s>"
12
+ for user_prompt, bot_response in history:
13
+ prompt += f"[INST] {user_prompt} [/INST]"
14
+ prompt += f" {bot_response}</s> "
15
+ prompt += f"[INST] {message} [/INST]"
16
+ return prompt
17
 
18
  def generate(
19
  prompt, history, temperature=0, max_new_tokens=3500, top_p=0.95, repetition_penalty=1.0,
 
30
  repetition_penalty=repetition_penalty,
31
  do_sample=True,
32
  seed=42,
33
+ device=device, # Dodajemy obsługę CUDA
34
  )
35
 
36
  formatted_prompt = format_prompt(prompt, history)
 
43
  yield output
44
  return output
45
 
 
46
  mychatbot = gr.Chatbot(
47
+ bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
48
 
49
  demo = gr.ChatInterface(fn=generate,
50
  chatbot=mychatbot,
51
+ title="Test API :)",
52
  retry_btn=None,
53
  undo_btn=None
54
  )
55
 
56
+ demo.queue().launch(show_api=True)
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  huggingface_hub
2
  gradio
3
  numpy<2
 
 
 
1
  huggingface_hub
2
  gradio
3
  numpy<2
4
+ torch
5
+ bitsandbytes