xu song commited on
Commit
4e4c514
1 Parent(s): 2e0b320
Files changed (2) hide show
  1. app.py +8 -0
  2. models/cpp_qwen2.py +4 -1
app.py CHANGED
@@ -23,7 +23,15 @@ Besides, it can also used as user simulator.
23
  """
24
 
25
  survey = """\
 
26
 
 
 
 
 
 
 
 
27
  """
28
 
29
  with gr.Blocks() as demo:
 
23
  """
24
 
25
  survey = """\
26
+ ## knowledge distillation 知识蒸馏
27
 
28
+ Essentially, it is a form of model compression.
29
+
30
+ ## distilling knowledge != knowledge distillation
31
+
32
+ 知识的形式可以是 QA纯文本,也可以是 QA+概率。
33
+
34
+ ## 有不用概率的知识蒸馏吗?
35
  """
36
 
37
  with gr.Blocks() as demo:
models/cpp_qwen2.py CHANGED
@@ -58,7 +58,7 @@ class Qwen2Simulator(Simulator):
58
  filename="*fp16.gguf",
59
  n_ctx=config.MAX_SEQUENCE_LENGTH,
60
  # use_mlock=True,
61
- verbose=False,
62
  )
63
  logger.info(f"llm has been initialized: {self.llm}, "
64
  f"n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}, "
@@ -116,6 +116,9 @@ class Qwen2Simulator(Simulator):
116
  stream = copy.deepcopy(out)
117
  if stream["choices"][0]["finish_reason"] is None:
118
  yield stream["choices"][0]["completion_text"], stream["choices"][0]["completion_tokens"]
 
 
 
119
 
120
 
121
  bot = Qwen2Simulator()
 
58
  filename="*fp16.gguf",
59
  n_ctx=config.MAX_SEQUENCE_LENGTH,
60
  # use_mlock=True,
61
+ verbose=True,
62
  )
63
  logger.info(f"llm has been initialized: {self.llm}, "
64
  f"n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}, "
 
116
  stream = copy.deepcopy(out)
117
  if stream["choices"][0]["finish_reason"] is None:
118
  yield stream["choices"][0]["completion_text"], stream["choices"][0]["completion_tokens"]
119
+ else:
120
+ print(f'finish with text: {stream["choices"][0]["completion_text"]}, tokens: {stream["choices"][0]["completion_tokens"]}')
121
+
122
 
123
 
124
  bot = Qwen2Simulator()