Spaces:
Sleeping
Sleeping
File size: 18,931 Bytes
55e027c 811a42d a093438 bd419af 49612ba bd419af dfaa92c bf090a3 6213fb0 9f551dd 49612ba 165a63a 9c45970 90d39c3 c87878a 49612ba 2c5812c 647c87c 3646bb0 a093438 811a42d 03f9025 90ce14b d222bb9 7dfecf7 90ce14b 811a42d 03f9025 12a17d8 b8f3115 2a68245 e0bc027 2a68245 72e1ed6 9c45970 14e3e6a a2dfe6a 90d39c3 a2dfe6a 90d39c3 a2dfe6a 90d39c3 a2dfe6a 90d39c3 a2dfe6a 90d39c3 a2dfe6a 90d39c3 a2dfe6a 90d39c3 a2dfe6a 90d39c3 bd419af 90d39c3 a2dfe6a 90d39c3 a2dfe6a 70ec742 90d39c3 ccd8f6d a2dfe6a 90d39c3 00fb9a8 90d39c3 fe45f69 70ec742 6213fb0 90d39c3 f029128 90d39c3 2c5812c 6213fb0 786c822 6213fb0 03f9025 6213fb0 a2dfe6a 2c5812c 90d39c3 a2dfe6a 90d39c3 bd419af b8f3115 4333f18 bd419af b8f3115 4333f18 bd419af b8f3115 4333f18 b8f3115 4333f18 b8f3115 b9dc38b 6125458 5d31dec ea9cb69 5d31dec 6125458 5d31dec 6125458 5d31dec b8f3115 165a63a bd419af b8f3115 165a63a 77f2c42 165a63a bd419af 165a63a bd419af 165a63a a543a3d b9dc38b bd419af b8f3115 b9dc38b 77f2c42 3646bb0 d222bb9 bd419af b8f3115 bd419af 77f2c42 bd419af 165a63a b8f3115 165a63a bd419af a543a3d bd419af b8f3115 bd419af a543a3d b8f3115 4064938 b8f3115 811a42d bd419af 77f2c42 165a63a 811a42d b8f3115 a543a3d de12b06 a543a3d b8f3115 bd419af 2c5812c b8f3115 f4e0fdd 8fdf34e f4e0fdd 5e4ca56 1bda668 5e4ca56 f4e0fdd 8fdf34e 5e4ca56 6c11728 f4e0fdd 8fdf34e f4e0fdd 1bda668 f4e0fdd 6c11728 9439f2d 8fdf34e 9439f2d 7edba86 9439f2d 8fdf34e 9439f2d 8fdf34e 9439f2d 94adb4f 9439f2d a3e8551 8fdf34e dfaa92c a2dfe6a dfaa92c d1c8d88 42d8138 d1c8d88 a2dfe6a 0f2f299 cff018d a2dfe6a ce202f9 a2dfe6a 0f2f299 ce202f9 dfaa92c a2dfe6a 0f2f299 ce202f9 0f2f299 a2dfe6a 2c5812c a2dfe6a 75b3db6 2c5812c 6bb9d92 2c5812c 75b3db6 2c5812c 791aec3 786c822 791aec3 786c822 5817e2c 9f551dd f2c192a 9f551dd 56baa88 9f551dd f2c192a 9f551dd 56baa88 f2c192a 56baa88 c12b724 c87878a 40e7d1c c87878a 40e7d1c c87878a 40e7d1c c87878a 40e7d1c c87878a 77f2c42 7620bdc dfc7bef ea9cb69 12a17d8 ea9cb69 ff7d32c ea9cb69 ff7d32c ea9cb69 ff7d32c f41581e ff7d32c ea9cb69 ff7d32c ea9cb69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 |
# -*- coding:utf-8 -*-
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type
import logging
import json
import os
import datetime
import hashlib
import csv
import requests
import re
import html
import sys
import subprocess
import gradio as gr
from pypinyin import lazy_pinyin
import tiktoken
import mdtex2html
from markdown import markdown
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
import pandas as pd
from modules.presets import *
from . import shared
from modules.config import retrieve_proxy, hide_history_when_not_logged_in
if TYPE_CHECKING:
from typing import TypedDict
class DataframeData(TypedDict):
headers: List[str]
data: List[List[str | int | bool]]
def predict(current_model, *args):
iter = current_model.predict(*args)
for i in iter:
yield i
def billing_info(current_model):
return current_model.billing_info()
def set_key(current_model, *args):
return current_model.set_key(*args)
def load_chat_history(current_model, *args):
return current_model.load_chat_history(*args)
def interrupt(current_model, *args):
return current_model.interrupt(*args)
def reset(current_model, *args):
return current_model.reset(*args)
def retry(current_model, *args):
iter = current_model.retry(*args)
for i in iter:
yield i
def delete_first_conversation(current_model, *args):
return current_model.delete_first_conversation(*args)
def delete_last_conversation(current_model, *args):
return current_model.delete_last_conversation(*args)
def set_system_prompt(current_model, *args):
return current_model.set_system_prompt(*args)
def save_chat_history(current_model, *args):
return current_model.save_chat_history(*args)
def export_markdown(current_model, *args):
return current_model.export_markdown(*args)
def load_chat_history(current_model, *args):
return current_model.load_chat_history(*args)
def upload_chat_history(current_model, *args):
return current_model.load_chat_history(*args)
def set_token_upper_limit(current_model, *args):
return current_model.set_token_upper_limit(*args)
def set_temperature(current_model, *args):
current_model.set_temperature(*args)
def set_top_p(current_model, *args):
current_model.set_top_p(*args)
def set_n_choices(current_model, *args):
current_model.set_n_choices(*args)
def set_stop_sequence(current_model, *args):
current_model.set_stop_sequence(*args)
def set_max_tokens(current_model, *args):
current_model.set_max_tokens(*args)
def set_presence_penalty(current_model, *args):
current_model.set_presence_penalty(*args)
def set_frequency_penalty(current_model, *args):
current_model.set_frequency_penalty(*args)
def set_logit_bias(current_model, *args):
current_model.set_logit_bias(*args)
def set_user_identifier(current_model, *args):
current_model.set_user_identifier(*args)
def set_single_turn(current_model, *args):
current_model.set_single_turn(*args)
def handle_file_upload(current_model, *args):
return current_model.handle_file_upload(*args)
def like(current_model, *args):
return current_model.like(*args)
def dislike(current_model, *args):
return current_model.dislike(*args)
def count_token(message):
encoding = tiktoken.get_encoding("cl100k_base")
input_str = f"role: {message['role']}, content: {message['content']}"
length = len(encoding.encode(input_str))
return length
def markdown_to_html_with_syntax_highlight(md_str):
def replacer(match):
lang = match.group(1) or "text"
code = match.group(2)
try:
lexer = get_lexer_by_name(lang, stripall=True)
except ValueError:
lexer = get_lexer_by_name("text", stripall=True)
formatter = HtmlFormatter()
highlighted_code = highlight(code, lexer, formatter)
return f'<pre><code class="{lang}">{highlighted_code}</code></pre>'
code_block_pattern = r"```(\w+)?\n([\s\S]+?)\n```"
md_str = re.sub(code_block_pattern, replacer, md_str, flags=re.MULTILINE)
html_str = markdown(md_str)
return html_str
def normalize_markdown(md_text: str) -> str:
lines = md_text.split("\n")
normalized_lines = []
inside_list = False
for i, line in enumerate(lines):
if re.match(r"^(\d+\.|-|\*|\+)\s", line.strip()):
if not inside_list and i > 0 and lines[i - 1].strip() != "":
normalized_lines.append("")
inside_list = True
normalized_lines.append(line)
elif inside_list and line.strip() == "":
if i < len(lines) - 1 and not re.match(
r"^(\d+\.|-|\*|\+)\s", lines[i + 1].strip()
):
normalized_lines.append(line)
continue
else:
inside_list = False
normalized_lines.append(line)
return "\n".join(normalized_lines)
def convert_mdtext(md_text):
code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
code_blocks = code_block_pattern.findall(md_text)
non_code_parts = code_block_pattern.split(md_text)[::2]
result = []
raw = f'<div class="raw-message hideM">{html.escape(md_text)}</div>'
for non_code, code in zip(non_code_parts, code_blocks + [""]):
if non_code.strip():
non_code = normalize_markdown(non_code)
result.append(markdown(non_code, extensions=["tables"]))
if code.strip():
# _, code = detect_language(code) # 暂时去除代码高亮功能,因为在大段代码的情况下会出现问题
# code = code.replace("\n\n", "\n") # 暂时去除代码中的空行,因为在大段代码的情况下会出现问题
code = f"\n```{code}\n\n```"
code = markdown_to_html_with_syntax_highlight(code)
result.append(code)
result = "".join(result)
output = f'<div class="md-message">{result}</div>'
output += raw
output += ALREADY_CONVERTED_MARK
return output
def convert_asis(userinput):
return (
f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
+ ALREADY_CONVERTED_MARK
)
def detect_converted_mark(userinput):
try:
if userinput.endswith(ALREADY_CONVERTED_MARK):
return True
else:
return False
except:
return True
def detect_language(code):
if code.startswith("\n"):
first_line = ""
else:
first_line = code.strip().split("\n", 1)[0]
language = first_line.lower() if first_line else ""
code_without_language = code[len(first_line) :].lstrip() if first_line else code
return language, code_without_language
def construct_text(role, text):
return {"role": role, "content": text}
def construct_user(text):
return construct_text("user", text)
def construct_system(text):
return construct_text("system", text)
def construct_assistant(text):
return construct_text("assistant", text)
def save_file(filename, system, history, chatbot, user_name):
logging.debug(f"{user_name} 保存对话历史中……")
os.makedirs(os.path.join(HISTORY_DIR, user_name), exist_ok=True)
if filename.endswith(".json"):
json_s = {"system": system, "history": history, "chatbot": chatbot}
if "/" in filename or "\\" in filename:
history_file_path = filename
else:
history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
with open(history_file_path, "w") as f:
json.dump(json_s, f)
elif filename.endswith(".md"):
md_s = f"system: \n- {system} \n"
for data in history:
md_s += f"\n{data['role']}: \n- {data['content']} \n"
with open(os.path.join(HISTORY_DIR, user_name, filename), "w", encoding="utf8") as f:
f.write(md_s)
logging.debug(f"{user_name} 保存对话历史完毕")
return os.path.join(HISTORY_DIR, user_name, filename)
def sorted_by_pinyin(list):
return sorted(list, key=lambda char: lazy_pinyin(char)[0][0])
def get_file_names(dir, plain=False, filetypes=[".json"]):
logging.debug(f"获取文件名列表,目录为{dir},文件类型为{filetypes},是否为纯文本列表{plain}")
files = []
try:
for type in filetypes:
files += [f for f in os.listdir(dir) if f.endswith(type)]
except FileNotFoundError:
files = []
files = sorted_by_pinyin(files)
if files == []:
files = [""]
logging.debug(f"files are:{files}")
if plain:
return files
else:
return gr.Dropdown.update(choices=files)
def get_history_names(plain=False, user_name=""):
logging.debug(f"从用户 {user_name} 中获取历史记录文件名列表")
if user_name == "" and hide_history_when_not_logged_in:
return ""
else:
return get_file_names(os.path.join(HISTORY_DIR, user_name), plain)
def load_template(filename, mode=0):
logging.debug(f"加载模板文件{filename},模式为{mode}(0为返回字典和下拉菜单,1为返回下拉菜单,2为返回字典)")
lines = []
if filename.endswith(".json"):
with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as f:
lines = json.load(f)
lines = [[i["act"], i["prompt"]] for i in lines]
else:
with open(
os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8"
) as csvfile:
reader = csv.reader(csvfile)
lines = list(reader)
lines = lines[1:]
if mode == 1:
return sorted_by_pinyin([row[0] for row in lines])
elif mode == 2:
return {row[0]: row[1] for row in lines}
else:
choices = sorted_by_pinyin([row[0] for row in lines])
return {row[0]: row[1] for row in lines}, gr.Dropdown.update(
choices=choices
)
def get_template_names(plain=False):
logging.debug("获取模板文件名列表")
return get_file_names(TEMPLATES_DIR, plain, filetypes=[".csv", "json"])
def get_template_content(templates, selection, original_system_prompt):
logging.debug(f"应用模板中,选择为{selection},原始系统提示为{original_system_prompt}")
try:
return templates[selection]
except:
return original_system_prompt
def reset_textbox():
logging.debug("重置文本框")
return gr.update(value="")
def reset_default():
default_host = shared.state.reset_api_host()
retrieve_proxy("")
return gr.update(value=default_host), gr.update(value=""), "API-Host 和代理已重置"
def change_api_host(host):
shared.state.set_api_host(host)
msg = f"API-Host更改为了{host}"
logging.info(msg)
return msg
def change_proxy(proxy):
retrieve_proxy(proxy)
os.environ["HTTPS_PROXY"] = proxy
msg = f"代理更改为了{proxy}"
logging.info(msg)
return msg
def hide_middle_chars(s):
if s is None:
return ""
if len(s) <= 8:
return s
else:
head = s[:4]
tail = s[-4:]
hidden = "*" * (len(s) - 8)
return head + hidden + tail
def submit_key(key):
key = key.strip()
msg = f"API密钥更改为了{hide_middle_chars(key)}"
logging.info(msg)
return key, msg
def replace_today(prompt):
today = datetime.datetime.today().strftime("%Y-%m-%d")
return prompt.replace("{current_date}", today)
def get_geoip():
try:
with retrieve_proxy():
response = requests.get("https://ipapi.co/json/", timeout=5)
data = response.json()
except:
data = {"error": True, "reason": "连接ipapi失败"}
if "error" in data.keys():
logging.warning(f"无法获取IP地址信息。\n{data}")
if data["reason"] == "RateLimited":
return (
i18n("您的IP区域:未知。")
)
else:
return i18n("获取IP地理位置失败。原因:") + f"{data['reason']}" + i18n("。你仍然可以使用聊天功能。")
else:
country = data["country_name"]
if country == "China":
text = "**您的IP区域:中国。请立即检查代理设置,在不受支持的地区使用API可能导致账号被封禁。**"
else:
text = i18n("您的IP区域:") + f"{country}。"
logging.info(text)
return text
def find_n(lst, max_num):
n = len(lst)
total = sum(lst)
if total < max_num:
return n
for i in range(len(lst)):
if total - lst[i] < max_num:
return n - i - 1
total = total - lst[i]
return 1
def start_outputing():
logging.debug("显示取消按钮,隐藏发送按钮")
return gr.Button.update(visible=False), gr.Button.update(visible=True)
def end_outputing():
return (
gr.Button.update(visible=True),
gr.Button.update(visible=False),
)
def cancel_outputing():
logging.info("中止输出……")
shared.state.interrupt()
def transfer_input(inputs):
# 一次性返回,降低延迟
textbox = reset_textbox()
outputing = start_outputing()
return (
inputs,
gr.update(value=""),
gr.Button.update(visible=False),
gr.Button.update(visible=True),
)
def run(command, desc=None, errdesc=None, custom_env=None, live=False):
if desc is not None:
print(desc)
if live:
result = subprocess.run(command, shell=True, env=os.environ if custom_env is None else custom_env)
if result.returncode != 0:
raise RuntimeError(f"""{errdesc or 'Error running command'}.
Command: {command}
Error code: {result.returncode}""")
return ""
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env=os.environ if custom_env is None else custom_env)
if result.returncode != 0:
message = f"""{errdesc or 'Error running command'}.
Command: {command}
Error code: {result.returncode}
stdout: {result.stdout.decode(encoding="utf8", errors="ignore") if len(result.stdout)>0 else '<empty>'}
stderr: {result.stderr.decode(encoding="utf8", errors="ignore") if len(result.stderr)>0 else '<empty>'}
"""
raise RuntimeError(message)
return result.stdout.decode(encoding="utf8", errors="ignore")
def versions_html():
git = os.environ.get('GIT', "git")
python_version = ".".join([str(x) for x in sys.version_info[0:3]])
try:
commit_hash = run(f"{git} rev-parse HEAD").strip()
except Exception:
commit_hash = "<none>"
if commit_hash != "<none>":
short_commit = commit_hash[0:7]
commit_info = f"<a style=\"text-decoration:none;color:inherit\" href=\"https://github.com/GaiZhenbiao/ChuanhuChatGPT/commit/{short_commit}\">{short_commit}</a>"
else:
commit_info = "unknown \U0001F615"
return f"""
Python: <span title="{sys.version}">{python_version}</span>
•
Gradio: {gr.__version__}
•
<a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
"""
def add_source_numbers(lst, source_name = "Source", use_source = True):
if use_source:
return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
else:
return [f'[{idx+1}]\t "{item}"' for idx, item in enumerate(lst)]
def add_details(lst):
nodes = []
for index, txt in enumerate(lst):
brief = txt[:25].replace("\n", "")
nodes.append(
f"<details><summary>{brief}...</summary><p>{txt}</p></details>"
)
return nodes
def sheet_to_string(sheet, sheet_name = None):
result = []
for index, row in sheet.iterrows():
row_string = ""
for column in sheet.columns:
row_string += f"{column}: {row[column]}, "
row_string = row_string.rstrip(", ")
row_string += "."
result.append(row_string)
return result
def excel_to_string(file_path):
# 读取Excel文件中的所有工作表
excel_file = pd.read_excel(file_path, engine='openpyxl', sheet_name=None)
# 初始化结果字符串
result = []
# 遍历每一个工作表
for sheet_name, sheet_data in excel_file.items():
# 处理当前工作表并添加到结果字符串
result += sheet_to_string(sheet_data, sheet_name=sheet_name)
return result
def get_last_day_of_month(any_day):
# The day 28 exists in every month. 4 days later, it's always next month
next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
# subtracting the number of the current day brings us back one month
return next_month - datetime.timedelta(days=next_month.day)
def get_model_source(model_name, alternative_source):
if model_name == "gpt2-medium":
return "https://huggingface.co/gpt2-medium"
def refresh_ui_elements_on_load(current_model, selected_model_name, user_name):
current_model.set_user_identifier(user_name)
return toggle_like_btn_visibility(selected_model_name), *current_model.auto_load()
def toggle_like_btn_visibility(selected_model_name):
if selected_model_name == "xmchat":
return gr.update(visible=True)
else:
return gr.update(visible=False)
def new_auto_history_filename(dirname):
latest_file = get_latest_filepath(dirname)
if latest_file:
with open(os.path.join(dirname, latest_file), 'r') as f:
if len(f.read()) == 0:
return latest_file
now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
return f'{now}.json'
def get_latest_filepath(dirname):
pattern = re.compile(r'\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}')
latest_time = None
latest_file = None
for filename in os.listdir(dirname):
if os.path.isfile(os.path.join(dirname, filename)):
match = pattern.search(filename)
if match and match.group(0) == filename[:19]:
time_str = filename[:19]
filetime = datetime.datetime.strptime(time_str, '%Y-%m-%d_%H-%M-%S')
if not latest_time or filetime > latest_time:
latest_time = filetime
latest_file = filename
return latest_file
def get_history_filepath(username):
dirname = os.path.join(HISTORY_DIR, username)
os.makedirs(dirname, exist_ok=True)
latest_file = get_latest_filepath(dirname)
if not latest_file:
latest_file = new_auto_history_filename(dirname)
latest_file = os.path.join(dirname, latest_file)
return latest_file
|