qingxu98 commited on
Commit
96c1852
·
1 Parent(s): cd145c0

Merge branch 'master' into huggingface

Browse files
crazy_functions/latex_utils.py CHANGED
@@ -27,6 +27,24 @@ def set_forbidden_text(text, mask, pattern, flags=0):
27
  mask[res.span()[0]:res.span()[1]] = PRESERVE
28
  return text, mask
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
31
  """
32
  Add a preserve text area in this paper (text become untouchable for GPT).
@@ -326,6 +344,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
326
  # reverse 操作必须放在最后
327
  text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
328
  text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
 
329
  root = convert_to_linklist(text, mask)
330
 
331
  # 修复括号
@@ -672,10 +691,9 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
672
  print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
673
  return False, -1, [-1]
674
 
675
-
676
- def compile_latex_with_timeout(command, timeout=60):
677
  import subprocess
678
- process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
679
  try:
680
  stdout, stderr = process.communicate(timeout=timeout)
681
  except subprocess.TimeoutExpired:
@@ -699,24 +717,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
699
 
700
  # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
701
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
702
- os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
703
 
704
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
705
- os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
706
 
707
  if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
708
  # 只有第二步成功,才能继续下面的步骤
709
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
710
  if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
711
- os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir)
712
  if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
713
- os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir)
714
 
715
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
716
- os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
717
- os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
718
- os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
719
- os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
720
 
721
  if mode!='translate_zh':
722
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
@@ -724,13 +742,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
724
  ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
725
 
726
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
727
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
728
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
729
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
730
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
731
 
732
- # <--------------------->
733
- os.chdir(current_dir)
734
 
735
  # <---------- 检查结果 ----------->
736
  results_ = ""
@@ -766,7 +782,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
766
  yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
767
  if not can_retry: break
768
 
769
- os.chdir(current_dir)
770
  return False # 失败啦
771
 
772
 
 
27
  mask[res.span()[0]:res.span()[1]] = PRESERVE
28
  return text, mask
29
 
30
+ def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
31
+ """
32
+ Move area out of preserve area (make text editable for GPT)
33
+ count the number of the braces so as to catch compelete text area.
34
+ e.g.
35
+ \begin{abstract} blablablablablabla. \end{abstract}
36
+ """
37
+ if isinstance(pattern, list): pattern = '|'.join(pattern)
38
+ pattern_compile = re.compile(pattern, flags)
39
+ for res in pattern_compile.finditer(text):
40
+ if not forbid_wrapper:
41
+ mask[res.span()[0]:res.span()[1]] = TRANSFORM
42
+ else:
43
+ mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
44
+ mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
45
+ mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
46
+ return text, mask
47
+
48
  def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
49
  """
50
  Add a preserve text area in this paper (text become untouchable for GPT).
 
344
  # reverse 操作必须放在最后
345
  text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
346
  text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
347
+ text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
348
  root = convert_to_linklist(text, mask)
349
 
350
  # 修复括号
 
691
  print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
692
  return False, -1, [-1]
693
 
694
+ def compile_latex_with_timeout(command, cwd, timeout=60):
 
695
  import subprocess
696
+ process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
697
  try:
698
  stdout, stderr = process.communicate(timeout=timeout)
699
  except subprocess.TimeoutExpired:
 
717
 
718
  # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
719
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
720
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
721
 
722
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
723
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
724
 
725
  if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
726
  # 只有第二步成功,才能继续下面的步骤
727
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
728
  if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
729
+ ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
730
  if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
731
+ ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
732
 
733
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
734
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
735
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
736
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
737
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
738
 
739
  if mode!='translate_zh':
740
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
 
742
  ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
743
 
744
  yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
745
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
746
+ ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
747
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
748
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
749
 
 
 
750
 
751
  # <---------- 检查结果 ----------->
752
  results_ = ""
 
782
  yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
783
  if not can_retry: break
784
 
 
785
  return False # 失败啦
786
 
787
 
request_llm/bridge_all.py CHANGED
@@ -152,7 +152,7 @@ model_info = {
152
  "token_cnt": get_token_num_gpt4,
153
  },
154
 
155
- # chatglm
156
  "chatglm": {
157
  "fn_with_ui": chatglm_ui,
158
  "fn_without_ui": chatglm_noui,
@@ -161,6 +161,15 @@ model_info = {
161
  "tokenizer": tokenizer_gpt35,
162
  "token_cnt": get_token_num_gpt35,
163
  },
 
 
 
 
 
 
 
 
 
164
  # newbing
165
  "newbing": {
166
  "fn_with_ui": newbing_ui,
 
152
  "token_cnt": get_token_num_gpt4,
153
  },
154
 
155
+ # chatglm 直接对齐到 chatglm2
156
  "chatglm": {
157
  "fn_with_ui": chatglm_ui,
158
  "fn_without_ui": chatglm_noui,
 
161
  "tokenizer": tokenizer_gpt35,
162
  "token_cnt": get_token_num_gpt35,
163
  },
164
+ "chatglm2": {
165
+ "fn_with_ui": chatglm_ui,
166
+ "fn_without_ui": chatglm_noui,
167
+ "endpoint": None,
168
+ "max_token": 1024,
169
+ "tokenizer": tokenizer_gpt35,
170
+ "token_cnt": get_token_num_gpt35,
171
+ },
172
+
173
  # newbing
174
  "newbing": {
175
  "fn_with_ui": newbing_ui,
request_llm/bridge_chatglm.py CHANGED
@@ -40,12 +40,12 @@ class GetGLMHandle(Process):
40
  while True:
41
  try:
42
  if self.chatglm_model is None:
43
- self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
44
  device, = get_conf('LOCAL_MODEL_DEVICE')
45
  if device=='cpu':
46
- self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
47
  else:
48
- self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
49
  self.chatglm_model = self.chatglm_model.eval()
50
  break
51
  else:
 
40
  while True:
41
  try:
42
  if self.chatglm_model is None:
43
+ self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
44
  device, = get_conf('LOCAL_MODEL_DEVICE')
45
  if device=='cpu':
46
+ self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
47
  else:
48
+ self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
49
  self.chatglm_model = self.chatglm_model.eval()
50
  break
51
  else:
toolbox.py CHANGED
@@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot):
498
  else:
499
  report_files = find_recent_files('gpt_log')
500
  if len(report_files) == 0:
501
- return None, chatbot
502
  # files.extend(report_files)
503
  file_links = ''
504
  for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
 
498
  else:
499
  report_files = find_recent_files('gpt_log')
500
  if len(report_files) == 0:
501
+ return cookies, None, chatbot
502
  # files.extend(report_files)
503
  file_links = ''
504
  for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'