wiklif commited on
Commit
0669200
·
1 Parent(s): 5dd2177

test new version mixtral-api

Browse files
Files changed (2) hide show
  1. app.py +47 -45
  2. app.py_OLD +54 -0
app.py CHANGED
@@ -1,54 +1,56 @@
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
- import torch
4
- import spaces
5
-
6
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
7
-
8
- def format_prompt(message, history):
9
- prompt = "<s>"
10
- for user_prompt, bot_response in history:
11
- prompt += f"[INST] {user_prompt} [/INST]"
12
- prompt += f" {bot_response}</s> "
13
- prompt += f"[INST] {message} [/INST]"
14
- return prompt
15
-
16
- @spaces.GPU
17
- def generate(
18
- prompt, history, temperature=0, max_new_tokens=3500, top_p=0.95, repetition_penalty=1.0,
19
- ):
20
- temperature = float(temperature)
21
- if temperature < 1e-2:
22
- temperature = 1e-2
23
- top_p = float(top_p)
24
-
25
- generate_kwargs = dict(
26
- temperature=temperature,
27
- max_new_tokens=max_new_tokens,
28
- top_p=top_p,
29
- repetition_penalty=repetition_penalty,
30
- do_sample=True,
31
- seed=42,
32
- )
33
-
34
- formatted_prompt = format_prompt(prompt, history)
35
 
36
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
37
- output = ""
38
 
 
 
 
 
39
  for response in stream:
40
  output += response.token.text
41
- yield output
42
  return output
43
 
44
- mychatbot = gr.Chatbot(
45
- bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
46
-
47
- demo = gr.ChatInterface(fn=generate,
48
- chatbot=mychatbot,
49
- title="Test API :)",
50
- retry_btn=None,
51
- undo_btn=None
52
- )
 
 
 
 
53
 
54
- demo.queue().launch(show_api=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
  from huggingface_hub import InferenceClient
3
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ client = InferenceClient('mistralai/Mixtral-8x7B-Instruct-v0.1')
 
6
 
7
+ @spaces.GPU(duration=60)
8
+ def generate_response(chat, kwargs):
9
+ output = ''
10
+ stream = client.text_generation(chat, **kwargs, stream=True, details=True, return_full_text=False)
11
  for response in stream:
12
  output += response.token.text
 
13
  return output
14
 
15
+ def function(prompt, history):
16
+ chat = "<s>"
17
+ for user_prompt, bot_response in history:
18
+ chat += f"[INST] {user_prompt} [/INST] {bot_response}</s> "
19
+ chat += f"[INST] {prompt} [/INST]"
20
+ kwargs = dict(
21
+ temperature=0.80,
22
+ max_new_tokens=2048,
23
+ top_p=0.95,
24
+ repetition_penalty=1.0,
25
+ do_sample=True, # Upewnij się, że używasz próbkowania
26
+ seed=1337
27
+ )
28
 
29
+ try:
30
+ output = generate_response(chat, kwargs)
31
+ yield output
32
+ except:
33
+ yield ''
34
+
35
+ interface = gr.ChatInterface(
36
+ fn=function,
37
+ chatbot=gr.Chatbot(
38
+ avatar_images=None,
39
+ container=False,
40
+ show_copy_button=True,
41
+ layout='bubble',
42
+ render_markdown=True,
43
+ line_breaks=True
44
+ ),
45
+ css='h1 {font-size:22px;} h2 {font-size:20px;} h3 {font-size:18px;} h4 {font-size:16px;}',
46
+ autofocus=True,
47
+ fill_height=True,
48
+ analytics_enabled=False,
49
+ submit_btn='Chat',
50
+ stop_btn=None,
51
+ retry_btn=None,
52
+ undo_btn=None,
53
+ clear_btn=None
54
+ )
55
+
56
+ interface.launch()
app.py_OLD ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ import gradio as gr
3
+ import torch
4
+ import spaces
5
+
6
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
7
+
8
+ def format_prompt(message, history):
9
+ prompt = "<s>"
10
+ for user_prompt, bot_response in history:
11
+ prompt += f"[INST] {user_prompt} [/INST]"
12
+ prompt += f" {bot_response}</s> "
13
+ prompt += f"[INST] {message} [/INST]"
14
+ return prompt
15
+
16
+ @spaces.GPU
17
+ def generate(
18
+ prompt, history, temperature=0, max_new_tokens=3500, top_p=0.95, repetition_penalty=1.0,
19
+ ):
20
+ temperature = float(temperature)
21
+ if temperature < 1e-2:
22
+ temperature = 1e-2
23
+ top_p = float(top_p)
24
+
25
+ generate_kwargs = dict(
26
+ temperature=temperature,
27
+ max_new_tokens=max_new_tokens,
28
+ top_p=top_p,
29
+ repetition_penalty=repetition_penalty,
30
+ do_sample=True,
31
+ seed=42,
32
+ )
33
+
34
+ formatted_prompt = format_prompt(prompt, history)
35
+
36
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
37
+ output = ""
38
+
39
+ for response in stream:
40
+ output += response.token.text
41
+ yield output
42
+ return output
43
+
44
+ mychatbot = gr.Chatbot(
45
+ bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
46
+
47
+ demo = gr.ChatInterface(fn=generate,
48
+ chatbot=mychatbot,
49
+ title="Test API :)",
50
+ retry_btn=None,
51
+ undo_btn=None
52
+ )
53
+
54
+ demo.queue().launch(show_api=True)