xu song
commited on
Commit
•
f0929ee
1
Parent(s):
12a161c
update
Browse files- app.py +45 -38
- app_util.py +5 -3
- models/cpp_qwen2.py +10 -0
app.py
CHANGED
@@ -12,7 +12,6 @@ system_list = [
|
|
12 |
"你是一个心理咨询师。",
|
13 |
]
|
14 |
|
15 |
-
|
16 |
doc = """\
|
17 |
There are maily two types of user simulator:
|
18 |
- prompt-based user-simulator (role-play)
|
@@ -24,48 +23,55 @@ with gr.Blocks() as demo:
|
|
24 |
gr.HTML("""<h1 align="center">Distilling the Knowledge through Self Chatting</h1>""")
|
25 |
gr.Markdown(doc, visible=False)
|
26 |
with gr.Row():
|
27 |
-
gr.
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
system = gr.Dropdown(
|
36 |
-
choices=system_list,
|
37 |
-
value=system_list[0],
|
38 |
-
allow_custom_value=True,
|
39 |
-
interactive=True,
|
40 |
-
label="System message",
|
41 |
-
scale=5,
|
42 |
-
)
|
43 |
-
|
44 |
-
chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
|
45 |
-
with gr.Row():
|
46 |
-
with gr.Column():
|
47 |
-
slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
|
48 |
-
value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
|
49 |
-
slider_temperature = gr.Slider(minimum=0.1, maximum=10.0,
|
50 |
-
value=config.DEFAULT_TEMPERATURE, step=0.1, label="Temperature",
|
51 |
-
info="Larger temperature increase the randomness")
|
52 |
-
slider_top_p = gr.Slider(
|
53 |
-
minimum=0.1,
|
54 |
-
maximum=1.0,
|
55 |
-
value=config.DEFAULT_TOP_P,
|
56 |
-
step=0.05,
|
57 |
-
label="Top-p (nucleus sampling)",
|
58 |
)
|
59 |
|
60 |
-
|
|
|
61 |
generated_text = gr.Textbox(show_label=False, placeholder="...", lines=10, visible=False)
|
62 |
generate_btn = gr.Button("🤔️ Generate", variant="primary")
|
63 |
with gr.Row():
|
64 |
-
retry_btn = gr.Button("🔄 Regenerate")
|
65 |
-
undo_btn = gr.Button("↩️ Undo")
|
66 |
-
clear_btn = gr.Button("🗑️ Clear") # 🧹 Clear History (清除历史)
|
67 |
-
# stop_btn = gr.Button("停止生成", variant="
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
|
71 |
########
|
@@ -81,8 +87,9 @@ with gr.Blocks() as demo:
|
|
81 |
undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history])
|
82 |
|
83 |
slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
|
84 |
-
slider_top_p.change(set_top_p, inputs=[slider_top_p])
|
85 |
slider_temperature.change(set_temperature, inputs=[slider_temperature])
|
|
|
|
|
86 |
|
87 |
# demo.queue().launch(share=False, server_name="0.0.0.0")
|
88 |
# demo.queue().launch(concurrency_count=1, max_size=5)
|
|
|
12 |
"你是一个心理咨询师。",
|
13 |
]
|
14 |
|
|
|
15 |
doc = """\
|
16 |
There are maily two types of user simulator:
|
17 |
- prompt-based user-simulator (role-play)
|
|
|
23 |
gr.HTML("""<h1 align="center">Distilling the Knowledge through Self Chatting</h1>""")
|
24 |
gr.Markdown(doc, visible=False)
|
25 |
with gr.Row():
|
26 |
+
with gr.Column(scale=5):
|
27 |
+
system = gr.Dropdown(
|
28 |
+
choices=system_list,
|
29 |
+
value=system_list[0],
|
30 |
+
allow_custom_value=True,
|
31 |
+
interactive=True,
|
32 |
+
label="System message",
|
33 |
+
scale=5,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
)
|
35 |
|
36 |
+
chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
|
37 |
+
|
38 |
generated_text = gr.Textbox(show_label=False, placeholder="...", lines=10, visible=False)
|
39 |
generate_btn = gr.Button("🤔️ Generate", variant="primary")
|
40 |
with gr.Row():
|
41 |
+
retry_btn = gr.Button("🔄 Regenerate", variant="secondary", size="sm", )
|
42 |
+
undo_btn = gr.Button("↩️ Undo", variant="secondary", size="sm", )
|
43 |
+
clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm", ) # 🧹 Clear History (清除历史)
|
44 |
+
# stop_btn = gr.Button("停止生成", variant="stop", visible=False)
|
45 |
|
46 |
+
with gr.Column(variant="compact"):
|
47 |
+
# with gr.Column():
|
48 |
+
model = gr.Dropdown(
|
49 |
+
["Qwen2-0.5B-Instruct", "llama3.1", "gemini"],
|
50 |
+
value="Qwen2-0.5B-Instruct",
|
51 |
+
label="Model",
|
52 |
+
interactive=True,
|
53 |
+
# visible=False
|
54 |
+
)
|
55 |
+
with gr.Accordion(label="Parameters", open=True):
|
56 |
+
slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
|
57 |
+
value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
|
58 |
+
slider_temperature = gr.Slider(minimum=0.1, maximum=10.0,
|
59 |
+
value=config.DEFAULT_TEMPERATURE, step=0.1, label="Temperature",
|
60 |
+
info="Larger temperature increase the randomness")
|
61 |
+
slider_top_p = gr.Slider(
|
62 |
+
minimum=0.1,
|
63 |
+
maximum=1.0,
|
64 |
+
value=config.DEFAULT_TOP_P,
|
65 |
+
step=0.05,
|
66 |
+
label="Top-p (nucleus sampling)",
|
67 |
+
)
|
68 |
+
slider_top_k = gr.Slider(
|
69 |
+
minimum=1,
|
70 |
+
maximum=200,
|
71 |
+
value=config.DEFAULT_TOP_K,
|
72 |
+
step=1,
|
73 |
+
label="Top-k",
|
74 |
+
)
|
75 |
|
76 |
|
77 |
########
|
|
|
87 |
undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history])
|
88 |
|
89 |
slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
|
|
|
90 |
slider_temperature.change(set_temperature, inputs=[slider_temperature])
|
91 |
+
slider_top_p.change(set_top_p, inputs=[slider_top_p])
|
92 |
+
slider_top_k.change(set_top_k, inputs=[slider_top_k])
|
93 |
|
94 |
# demo.queue().launch(share=False, server_name="0.0.0.0")
|
95 |
# demo.queue().launch(concurrency_count=1, max_size=5)
|
app_util.py
CHANGED
@@ -98,10 +98,12 @@ def reset_state(system):
|
|
98 |
def set_max_tokens(max_tokens):
|
99 |
bot.generation_kwargs["max_tokens"] = max_tokens
|
100 |
|
101 |
-
|
|
|
102 |
def set_top_p(top_p):
|
103 |
bot.generation_kwargs["top_p"] = top_p
|
104 |
|
|
|
|
|
|
|
105 |
|
106 |
-
def set_temperature(temperature):
|
107 |
-
bot.generation_kwargs["temperature"] = temperature
|
|
|
98 |
def set_max_tokens(max_tokens):
|
99 |
bot.generation_kwargs["max_tokens"] = max_tokens
|
100 |
|
101 |
+
def set_temperature(temperature):
|
102 |
+
bot.generation_kwargs["temperature"] = temperature
|
103 |
def set_top_p(top_p):
|
104 |
bot.generation_kwargs["top_p"] = top_p
|
105 |
|
106 |
+
def set_top_k(top_k):
|
107 |
+
bot.generation_kwargs["top_k"] = top_k
|
108 |
+
|
109 |
|
|
|
|
models/cpp_qwen2.py
CHANGED
@@ -13,6 +13,13 @@ python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen1.5-0.5B-C
|
|
13 |
|
14 |
./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "You are a helpful assistant" -cnv
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"""
|
17 |
|
18 |
import json
|
@@ -37,6 +44,7 @@ class Qwen2Simulator(Simulator):
|
|
37 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
38 |
n_ctx=config.MAX_SEQUENCE_LENGTH, #
|
39 |
# n_threads=None, # 默认会根据cpu数来设置 n_threads
|
|
|
40 |
verbose=False,
|
41 |
)
|
42 |
else:
|
@@ -45,6 +53,8 @@ class Qwen2Simulator(Simulator):
|
|
45 |
repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
|
46 |
filename="*fp16.gguf",
|
47 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
|
|
|
|
48 |
verbose=False,
|
49 |
)
|
50 |
logger.info(f"llm has been initialized: {self.llm}, n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}")
|
|
|
13 |
|
14 |
./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "You are a helpful assistant" -cnv
|
15 |
|
16 |
+
|
17 |
+
## reference
|
18 |
+
|
19 |
+
- https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/llms/llamacpp.py
|
20 |
+
- https://github.com/abetlen/llama-cpp-python/blob/main/examples/gradio_chat/server.py
|
21 |
+
- https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/server/app.py
|
22 |
+
|
23 |
"""
|
24 |
|
25 |
import json
|
|
|
44 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
45 |
n_ctx=config.MAX_SEQUENCE_LENGTH, #
|
46 |
# n_threads=None, # 默认会根据cpu数来设置 n_threads
|
47 |
+
use_mlock=True,
|
48 |
verbose=False,
|
49 |
)
|
50 |
else:
|
|
|
53 |
repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
|
54 |
filename="*fp16.gguf",
|
55 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
56 |
+
n_ctx=config.MAX_SEQUENCE_LENGTH,
|
57 |
+
use_mlock=True,
|
58 |
verbose=False,
|
59 |
)
|
60 |
logger.info(f"llm has been initialized: {self.llm}, n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}")
|