File size: 6,307 Bytes
a1f93e9 b6dd571 d72c532 459fbe3 5799733 d72c532 8988bbf a1f93e9 b6dd571 7085eea b099d9e a1f93e9 7085eea 2e0b320 4e4c514 2e0b320 4e4c514 2e0b320 d72c532 8988bbf 2e0b320 3ac04fa f0929ee 7085eea b6dd571 f0929ee b6dd571 7085eea f0929ee b6dd571 f0929ee 5799733 b099d9e e52ef2a d72c532 e52ef2a d89d143 e52ef2a 411027b e52ef2a 459fbe3 f0929ee 459fbe3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
"""
"""
import gradio
import config
from app_util import *
system_list = [
"You are a helpful assistant.",
"你是一个导游。",
"你是一个英语老师。",
"你是一个程序员。",
"你是一个心理咨询师。",
]
user_simulator_doc = """\
There are maily two types of user simulator:
- prompt-based user-simulator (role-play)
- model-based user-simulator
In most cases, large language models (LLMs) are used to serve as assistant generator.
Besides, it can also used as user simulator.
"""
survey = """\
## knowledge distillation 知识蒸馏
Essentially, it is a form of model compression.
## distilling knowledge != knowledge distillation
知识的形式可以是 QA纯文本,也可以是 QA+概率。
## 有不用概率的知识蒸馏吗?
"""
with gr.Blocks() as demo:
# Knowledge Distillation through Self Chatting
#
gr.HTML("""<h1 align="center">Distilling the Knowledge from LLM through Self Chatting</h1>""")
with gr.Row():
with gr.Column(scale=5):
system = gr.Dropdown(
choices=system_list,
value=system_list[0],
allow_custom_value=True,
interactive=True,
label="System message",
scale=5,
)
chatbot = gr.Chatbot(show_copy_button=True,
show_share_button=True,
avatar_images=("assets/man.png", "assets/bot.png"))
with gradio.Tab("Self Chat"):
generated_text_1 = gr.Textbox(show_label=False, placeholder="...", lines=10, visible=False)
generate_btn = gr.Button("🤔️ Self-Chat", variant="primary")
with gr.Row():
retry_btn = gr.Button("🔄 Retry", variant="secondary", size="sm", )
undo_btn = gr.Button("↩️ Undo", variant="secondary", size="sm", )
clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm", ) # 🧹 Clear History (清除历史)
# stop_btn = gr.Button("停止生成", variant="stop", visible=False)
gr.Markdown(
"Self-chat is a demo, which makes the model talk to itself. "
"It is based on user simulator and response generator.",
visible=True)
with gradio.Tab("Response Generator"):
with gr.Row():
generated_text_2 = gr.Textbox(show_label=False, placeholder="Please type your input", scale=7)
generate_btn_2 = gr.Button("Send", variant="primary")
with gr.Row():
retry_btn_2 = gr.Button("🔄 Regenerate", variant="secondary", size="sm", )
undo_btn_2 = gr.Button("↩️ Undo", variant="secondary", size="sm", )
clear_btn_2 = gr.Button("🗑️ Clear", variant="secondary", size="sm", ) # 🧹 Clear History (清除历史)
gr.Markdown("Response simulator is the most commonly used chatbot.")
with gradio.Tab("User Simulator"):
with gr.Row():
generated_text_3 = gr.Textbox(show_label=False, placeholder="Please type your response", scale=7)
generate_btn_3 = gr.Button("Send", variant="primary")
with gr.Row():
retry_btn_3 = gr.Button("🔄 Regenerate", variant="secondary", size="sm", )
undo_btn_3 = gr.Button("↩️ Undo", variant="secondary", size="sm", )
clear_btn_3 = gr.Button("🗑️ Clear", variant="secondary", size="sm", ) # 🧹 Clear History (清除历史)
gr.Markdown(user_simulator_doc)
with gr.Column(variant="compact"):
# with gr.Column():
model = gr.Dropdown(
["Qwen2-0.5B-Instruct", "llama3.1", "gemini"],
value="Qwen2-0.5B-Instruct",
label="Model",
interactive=True,
# visible=False
)
with gr.Accordion(label="Parameters", open=True):
slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
slider_temperature = gr.Slider(minimum=0.1, maximum=10.0,
value=config.DEFAULT_TEMPERATURE, step=0.1, label="Temperature",
info="Larger temperature increase the randomness")
slider_top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=config.DEFAULT_TOP_P,
step=0.05,
label="Top-p (nucleus sampling)",
)
slider_top_k = gr.Slider(
minimum=1,
maximum=200,
value=config.DEFAULT_TOP_K,
step=1,
label="Top-k",
)
########
history = gr.State([{"role": "system", "content": system_list[0]}]) # 有用信息只有个system,其他和chatbot内容重叠
system.change(reset_state, inputs=[system], outputs=[chatbot, history])
clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history])
generate_btn.click(generate, [chatbot, history], outputs=[generated_text_1, chatbot, history],
show_progress="full")
retry_btn.click(undo_generate, [chatbot, history], outputs=[generated_text_1, chatbot, history]) \
.then(generate, [chatbot, history], outputs=[generated_text_1, chatbot, history],
show_progress="full")
undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text_1, chatbot, history])
slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
slider_temperature.change(set_temperature, inputs=[slider_temperature])
slider_top_p.change(set_top_p, inputs=[slider_top_p])
slider_top_k.change(set_top_k, inputs=[slider_top_k])
# demo.queue().launch(share=False, server_name="0.0.0.0")
# demo.queue().launch(concurrency_count=1, max_size=5)
demo.queue().launch()
|