xu song commited on
Commit
e52ef2a
1 Parent(s): 5658533
Files changed (3) hide show
  1. app.py +6 -7
  2. app_util.py +16 -16
  3. models/cpp_qwen2.py +20 -17
app.py CHANGED
@@ -108,16 +108,15 @@ with gr.Blocks() as demo:
108
 
109
  ########
110
  history = gr.State([{"role": "system", "content": system_list[0]}]) # 有用信息只有个system,其他和chatbot内容重叠
111
- history_tokens = gr.State([])
112
- system.change(reset_state, inputs=[system], outputs=[chatbot, history, history_tokens])
113
- clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history, history_tokens])
114
 
115
- generate_btn.click(generate, [chatbot, history, history_tokens], outputs=[generated_text_1, chatbot, history, history_tokens],
116
  show_progress="full")
117
- retry_btn.click(undo_generate, [chatbot, history, history_tokens], outputs=[generated_text_1, chatbot, history, history_tokens]) \
118
- .then(generate, [chatbot, history, history_tokens], outputs=[generated_text_1, chatbot, history, history_tokens],
119
  show_progress="full")
120
- undo_btn.click(undo_generate, [chatbot, history, history_tokens], outputs=[generated_text_1, chatbot, history, history_tokens])
121
 
122
  slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
123
  slider_temperature.change(set_temperature, inputs=[slider_temperature])
 
108
 
109
  ########
110
  history = gr.State([{"role": "system", "content": system_list[0]}]) # 有用信息只有个system,其他和chatbot内容重叠
111
+ system.change(reset_state, inputs=[system], outputs=[chatbot, history])
112
+ clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history])
 
113
 
114
+ generate_btn.click(generate, [chatbot, history], outputs=[generated_text_1, chatbot, history],
115
  show_progress="full")
116
+ retry_btn.click(undo_generate, [chatbot, history], outputs=[generated_text_1, chatbot, history]) \
117
+ .then(generate, [chatbot, history], outputs=[generated_text_1, chatbot, history],
118
  show_progress="full")
119
+ undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text_1, chatbot, history])
120
 
121
  slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
122
  slider_temperature.change(set_temperature, inputs=[slider_temperature])
app_util.py CHANGED
@@ -19,21 +19,21 @@ from models.cpp_qwen2 import bot
19
  # gr.Chatbot.postprocess = postprocess
20
 
21
 
22
- def generate_query(chatbot, history, history_tokens):
23
  if history and history[-1]["role"] == "user":
24
  gr.Warning('You should generate assistant-response.')
25
  yield None, chatbot, history
26
  else:
27
  chatbot.append(None)
28
- streamer = bot.generate_query(history[-1], history_tokens, stream=True)
29
- for query, all_tokens in streamer:
30
  chatbot[-1] = (query, None)
31
  yield query, chatbot, history
32
- history.append({"role": "user", "content": query})
33
- yield query, chatbot, history, all_tokens
34
 
35
 
36
- def generate_response(chatbot, history, history_tokens):
37
  """
38
  auto-mode:query is None
39
  manual-mode:query 是用户输入
@@ -44,24 +44,24 @@ def generate_response(chatbot, history, history_tokens):
44
  gr.Warning('You should generate or type user-input first.')
45
  yield None, chatbot, history
46
  else:
47
- streamer = bot.generate_response(history[-1], history_tokens, stream=True)
48
- for response, all_tokens in streamer:
49
  chatbot[-1] = (query, response)
50
- yield response, chatbot, history, all_tokens
51
 
52
- history.append({"role": "assistant", "content": response})
53
  print(f"chatbot is {chatbot}")
54
  print(f"history is {history}")
55
- yield response, chatbot, history, all_tokens
56
 
57
 
58
- def generate(chatbot, history, history_tokens):
59
  logger.info(f"chatbot: {chatbot}; history: {history}")
60
  streamer = None
61
  if history[-1]["role"] in ["assistant", "system"]:
62
- streamer = generate_query(chatbot, history, history_tokens)
63
  elif history[-1]["role"] == "user":
64
- streamer = generate_response(chatbot, history, history_tokens)
65
  else:
66
  gr.Warning("bug")
67
 
@@ -69,7 +69,7 @@ def generate(chatbot, history, history_tokens):
69
  yield out
70
 
71
 
72
- def undo_generate(chatbot, history, history_tokens):
73
  if history[-1]["role"] == "user":
74
  history = history[:-1]
75
  chatbot = chatbot[:-1]
@@ -79,7 +79,7 @@ def undo_generate(chatbot, history, history_tokens):
79
  else:
80
  pass
81
  logger.info(f"after undo, {json.dumps(chatbot, ensure_ascii=False)}, {json.dumps(history, ensure_ascii=False)}")
82
- return "", chatbot, history, history_tokens
83
 
84
 
85
  def reset_user_input():
 
19
  # gr.Chatbot.postprocess = postprocess
20
 
21
 
22
+ def generate_query(chatbot, history):
23
  if history and history[-1]["role"] == "user":
24
  gr.Warning('You should generate assistant-response.')
25
  yield None, chatbot, history
26
  else:
27
  chatbot.append(None)
28
+ streamer = bot.generate_query(history[-1], stream=True)
29
+ for query, query_tokens in streamer:
30
  chatbot[-1] = (query, None)
31
  yield query, chatbot, history
32
+ history.append({"role": "user", "content": query, "tokens": query_tokens})
33
+ yield query, chatbot, history
34
 
35
 
36
+ def generate_response(chatbot, history):
37
  """
38
  auto-mode:query is None
39
  manual-mode:query 是用户输入
 
44
  gr.Warning('You should generate or type user-input first.')
45
  yield None, chatbot, history
46
  else:
47
+ streamer = bot.generate_response(history[-1], stream=True)
48
+ for response, response_tokens in streamer:
49
  chatbot[-1] = (query, response)
50
+ yield response, chatbot, history
51
 
52
+ history.append({"role": "assistant", "content": response, "tokens": response_tokens})
53
  print(f"chatbot is {chatbot}")
54
  print(f"history is {history}")
55
+ yield response, chatbot, history
56
 
57
 
58
+ def generate(chatbot, history):
59
  logger.info(f"chatbot: {chatbot}; history: {history}")
60
  streamer = None
61
  if history[-1]["role"] in ["assistant", "system"]:
62
+ streamer = generate_query(chatbot, history)
63
  elif history[-1]["role"] == "user":
64
+ streamer = generate_response(chatbot, history)
65
  else:
66
  gr.Warning("bug")
67
 
 
69
  yield out
70
 
71
 
72
+ def undo_generate(chatbot, history):
73
  if history[-1]["role"] == "user":
74
  history = history[:-1]
75
  chatbot = chatbot[:-1]
 
79
  else:
80
  pass
81
  logger.info(f"after undo, {json.dumps(chatbot, ensure_ascii=False)}, {json.dumps(history, ensure_ascii=False)}")
82
+ return "", chatbot, history
83
 
84
 
85
  def reset_user_input():
models/cpp_qwen2.py CHANGED
@@ -81,7 +81,7 @@ class Qwen2Simulator(Simulator):
81
  def tokenize(self, text):
82
  return self.llm.tokenize(text.encode("utf-8"))
83
 
84
- def generate_query(self, message, history_tokens, stream=True):
85
  """
86
  """
87
  # {% for message in messages %}
@@ -92,18 +92,28 @@ class Qwen2Simulator(Simulator):
92
  # {% endfor %}
93
  # {% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
94
 
95
- input_ids = history_tokens + self.tokenize(
96
- f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>\n<|im_start|>user\n"
97
- )
 
 
 
 
 
 
 
98
  if stream:
99
  return self._stream_generate(input_ids)
100
  else:
101
  return self._generate(input_ids)
102
 
103
- def generate_response(self, message, history_tokens, stream=True):
104
- input_ids = history_tokens + self.tokenize(
105
- f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>\n<|im_start|>assistant\n"
106
- )
 
 
 
107
  if stream:
108
  return self._stream_generate(input_ids)
109
  else:
@@ -118,18 +128,12 @@ class Qwen2Simulator(Simulator):
118
  stream=True,
119
  **self.generation_kwargs
120
  )
121
- generated_text = ""
122
  # TODO: 检测finish reason,如果是length,则shift,并继续生成。
123
  # TODO: 返回 token_id,
124
  for out in output:
125
  stream = copy.deepcopy(out)
126
  if stream["choices"][0]["finish_reason"] is None:
127
- generated_text += stream["choices"][0]["text"]
128
- if "completion_text" in stream["choices"][0]:
129
- yield stream["choices"][0]["completion_text"], stream["choices"][0]["all_tokens"]
130
- else:
131
- logger.info("completion_text not found")
132
- yield generated_text, None
133
 
134
 
135
  bot = Qwen2Simulator()
@@ -144,11 +148,10 @@ if __name__ == "__main__":
144
 
145
  message = {"role": "system", "content": "你是一个导游。"}
146
  print(message)
147
- for generated_text, all_tokens in bot.generate_query(message, [], stream=True):
148
  print(generated_text, all_tokens)
149
 
150
  message = {"role": "user", "content": generated_text}
151
  print(message)
152
  for generated_text, all_tokens in bot.generate_response(message, all_tokens, stream=True):
153
  print(generated_text, all_tokens)
154
-
 
81
  def tokenize(self, text):
82
  return self.llm.tokenize(text.encode("utf-8"))
83
 
84
+ def generate_query(self, history, stream=True):
85
  """
86
  """
87
  # {% for message in messages %}
 
92
  # {% endfor %}
93
  # {% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
94
 
95
+ # TODO: 判断是否有结束符,
96
+
97
+ input_ids = []
98
+ for message in history:
99
+ if "tokens" not in message:
100
+ message["tokens"] = self.tokenize(message["content"])
101
+ input_ids += self.tokenize(f"<|im_start|>{message['role']}\n") \
102
+ + message["tokens"] \
103
+ + self.tokenize("<|im_end|>\n")
104
+ input_ids += self.tokenize("<|im_start|>user\n")
105
  if stream:
106
  return self._stream_generate(input_ids)
107
  else:
108
  return self._generate(input_ids)
109
 
110
+ def generate_response(self, history, stream=True):
111
+ input_ids = []
112
+ for message in history:
113
+ input_ids += self.tokenize(f"<|im_start|>{message['role']}\n") \
114
+ + message["tokens"] \
115
+ + self.tokenize("<|im_end|>\n")
116
+ input_ids += self.tokenize("<|im_start|>assistant\n")
117
  if stream:
118
  return self._stream_generate(input_ids)
119
  else:
 
128
  stream=True,
129
  **self.generation_kwargs
130
  )
 
131
  # TODO: 检测finish reason,如果是length,则shift,并继续生成。
132
  # TODO: 返回 token_id,
133
  for out in output:
134
  stream = copy.deepcopy(out)
135
  if stream["choices"][0]["finish_reason"] is None:
136
+ yield stream["choices"][0]["completion_text"], stream["choices"][0]["completion_tokens"]
 
 
 
 
 
137
 
138
 
139
  bot = Qwen2Simulator()
 
148
 
149
  message = {"role": "system", "content": "你是一个导游。"}
150
  print(message)
151
+ for generated_text, all_tokens in bot.generate_query(message, stream=True):
152
  print(generated_text, all_tokens)
153
 
154
  message = {"role": "user", "content": generated_text}
155
  print(message)
156
  for generated_text, all_tokens in bot.generate_response(message, all_tokens, stream=True):
157
  print(generated_text, all_tokens)