Spaces:

markqiu
/

prinvest_mate

Sleeping

App Files Files Community

Keldos commited on Apr 8, 2023

Commit

4282926

•

2 Parent(s): d3b93fb 0d61cce

BREAKING: Merge 'expansive': 代码重构，支持本地model (#572)

Browse files

重大新功能：
- 支持更多参数
- 支持ChatGLM
- 支持本地embedding
- 支持LLaMA本地模型

可能的问题：
- 移除了许多错误处理的代码，错误将更多地会在终端中体现
- 本地embedding对中文的支持不是很好

Files changed (13) hide show

.gitignore +3 -0
ChuanhuChatbot.py +129 -89
configs/ds_config_chatbot.json +17 -0
modules/__init__.py +0 -0
modules/base_model.py +519 -0
modules/chat_func.py +0 -497
modules/config.py +2 -0
modules/llama_func.py +74 -47
modules/models.py +568 -0
modules/openai_func.py +0 -65
modules/presets.py +46 -44
modules/utils.py +22 -103
requirements_advanced.txt +7 -0

.gitignore CHANGED Viewed

@@ -133,7 +133,10 @@ dmypy.json
 # Mac system file
 **/.DS_Store
 api_key.txt
 config.json
 auth.json
 .idea

 # Mac system file
 **/.DS_Store
+# 配置文件/模型文件
 api_key.txt
 config.json
 auth.json
+models/
+lora/
 .idea

ChuanhuChatbot.py CHANGED Viewed

@@ -10,8 +10,7 @@ from modules.config import *
 from modules.utils import *
 from modules.presets import *
 from modules.overwrites import *
-from modules.chat_func import *
-from modules.openai_func import get_usage
 gr.Chatbot.postprocess = postprocess
 PromptHelper.compact_text_chunks = compact_text_chunks
@@ -21,16 +20,14 @@ with open("assets/custom.css", "r", encoding="utf-8") as f:
 with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     user_name = gr.State("")
-    history = gr.State([])
-    token_count = gr.State([])
     promptTemplates = gr.State(load_template(get_template_names(plain=True)[0], mode=2))
-    user_api_key = gr.State(my_api_key)
     user_question = gr.State("")
-    outputing = gr.State(False)
     topic = gr.State("未命名对话历史记录")
     with gr.Row():
-        gr.HTML(title, elem_id="app_title")
         status_display = gr.Markdown(get_geoip(), elem_id="status_display")
     with gr.Row(elem_id="float_display"):
         user_info = gr.Markdown(value="getting user info...", elem_id="user_info")
@@ -64,11 +61,10 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                 retryBtn = gr.Button("🔄 重新生成")
                 delFirstBtn = gr.Button("🗑️ 删除最旧对话")
                 delLastBtn = gr.Button("🗑️ 删除最新对话")
-                reduceTokenBtn = gr.Button("♻️ 总结对话")
         with gr.Column():
             with gr.Column(min_width=50, scale=1):
-                with gr.Tab(label="ChatGPT"):
                     keyTxt = gr.Textbox(
                         show_label=True,
                         placeholder=f"OpenAI API-key...",
@@ -82,10 +78,13 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                     else:
                         usageTxt = gr.Markdown("**发送消息** 或 **提交key** 以显示额度", elem_id="usage_display", elem_classes="insert_block")
                     model_select_dropdown = gr.Dropdown(
-                        label="选择模型", choices=MODELS, multiselect=False, value=MODELS[0]
                     )
                     use_streaming_checkbox = gr.Checkbox(
-                        label="实时传输回答", value=True, visible=enable_streaming_option
                     )
                     use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
                     language_select_dropdown = gr.Dropdown(
@@ -94,7 +93,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                         multiselect=False,
                         value=REPLY_LANGUAGES[0],
                     )
-                    index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
                     two_column = gr.Checkbox(label="双栏pdf", value=advance_docs["pdf"].get("two_column", False))
                     # TODO: 公式ocr
                     # formula_ocr = gr.Checkbox(label="识别公式", value=advance_docs["pdf"].get("formula_ocr", False))
@@ -104,7 +103,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                         show_label=True,
                         placeholder=f"在这里输入System Prompt...",
                         label="System prompt",
-                        value=initial_prompt,
                         lines=10,
                     ).style(container=False)
                     with gr.Accordion(label="加载Prompt模板", open=True):
@@ -160,24 +159,84 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                 with gr.Tab(label="高级"):
                     gr.Markdown("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置")
-                    default_btn = gr.Button("🔙 恢复默认设置")
-                    gr.HTML(appearance_switcher, elem_classes="insert_block")
                     with gr.Accordion("参数", open=False):
-                        top_p = gr.Slider(
                             minimum=-0,
                             maximum=1.0,
                             value=1.0,
                             step=0.05,
                             interactive=True,
-                            label="Top-p",
                         )
-                        temperature = gr.Slider(
-                            minimum=-0,
                             maximum=2.0,
-                            value=1.0,
-                            step=0.1,
                             interactive=True,
-                            label="Temperature",
                         )
                     with gr.Accordion("网络设置", open=False):
@@ -198,27 +257,21 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                             lines=2,
                         )
                         changeProxyBtn = gr.Button("🔄 设置代理地址")
-    gr.Markdown(description)
-    gr.HTML(footer.format(versions=versions_html()), elem_id="footer")
     chatgpt_predict_args = dict(
-        fn=predict,
         inputs=[
-            user_api_key,
-            systemPromptTxt,
-            history,
             user_question,
             chatbot,
-            token_count,
-            top_p,
-            temperature,
             use_streaming_checkbox,
-            model_select_dropdown,
             use_websearch_checkbox,
             index_files,
             language_select_dropdown,
         ],
-        outputs=[chatbot, history, status_display, token_count],
         show_progress=True,
     )
@@ -242,12 +295,18 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     )
     get_usage_args = dict(
-        fn=get_usage, inputs=[user_api_key], outputs=[usageTxt], show_progress=False
     )
     # Chatbot
-    cancelBtn.click(cancel_outputing, [], [])
     user_input.submit(**transfer_input_args).then(**chatgpt_predict_args).then(**end_outputing_args)
     user_input.submit(**get_usage_args)
@@ -256,70 +315,49 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     submitBtn.click(**get_usage_args)
     emptyBtn.click(
-        reset_state,
-        outputs=[chatbot, history, token_count, status_display],
         show_progress=True,
     )
     emptyBtn.click(**reset_textbox_args)
     retryBtn.click(**start_outputing_args).then(
-        retry,
         [
-            user_api_key,
-            systemPromptTxt,
-            history,
             chatbot,
-            token_count,
-            top_p,
-            temperature,
             use_streaming_checkbox,
-            model_select_dropdown,
             language_select_dropdown,
         ],
-        [chatbot, history, status_display, token_count],
         show_progress=True,
     ).then(**end_outputing_args)
     retryBtn.click(**get_usage_args)
     delFirstBtn.click(
-        delete_first_conversation,
-        [history, token_count],
-        [history, token_count, status_display],
     )
     delLastBtn.click(
-        delete_last_conversation,
-        [chatbot, history, token_count],
-        [chatbot, history, token_count, status_display],
-        show_progress=True,
-    )
-    reduceTokenBtn.click(
-        reduce_token_size,
-        [
-            user_api_key,
-            systemPromptTxt,
-            history,
-            chatbot,
-            token_count,
-            top_p,
-            temperature,
-            gr.State(sum(token_count.value[-4:])),
-            model_select_dropdown,
-            language_select_dropdown,
-        ],
-        [chatbot, history, status_display, token_count],
-        show_progress=True,
     )
-    reduceTokenBtn.click(**get_usage_args)
     two_column.change(update_doc_config, [two_column], None)
-    # ChatGPT
-    keyTxt.change(submit_key, keyTxt, [user_api_key, status_display]).then(**get_usage_args)
     keyTxt.submit(**get_usage_args)
     # Template
     templateRefreshBtn.click(get_template_names, None, [templateFileSelectDropdown])
     templateFileSelectDropdown.change(
         load_template,
@@ -336,32 +374,34 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     # S&L
     saveHistoryBtn.click(
-        save_chat_history,
-        [saveFileName, systemPromptTxt, history, chatbot, user_name],
         downloadFile,
         show_progress=True,
     )
     saveHistoryBtn.click(get_history_names, [gr.State(False), user_name], [historyFileSelectDropdown])
     exportMarkdownBtn.click(
-        export_markdown,
-        [saveFileName, systemPromptTxt, history, chatbot, user_name],
         downloadFile,
         show_progress=True,
     )
     historyRefreshBtn.click(get_history_names, [gr.State(False), user_name], [historyFileSelectDropdown])
-    historyFileSelectDropdown.change(
-        load_chat_history,
-        [historyFileSelectDropdown, systemPromptTxt, history, chatbot, user_name],
-        [saveFileName, systemPromptTxt, history, chatbot],
-        show_progress=True,
-    )
-    downloadFile.change(
-        load_chat_history,
-        [downloadFile, systemPromptTxt, history, chatbot, user_name],
-        [saveFileName, systemPromptTxt, history, chatbot],
-    )
     # Advanced
     default_btn.click(
         reset_default, [], [apihostTxt, proxyTxt, status_display], show_progress=True
     )

 from modules.utils import *
 from modules.presets import *
 from modules.overwrites import *
+from modules.models import ModelManager
 gr.Chatbot.postprocess = postprocess
 PromptHelper.compact_text_chunks = compact_text_chunks
 with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     user_name = gr.State("")
     promptTemplates = gr.State(load_template(get_template_names(plain=True)[0], mode=2))
     user_question = gr.State("")
+    current_model = gr.State(ModelManager(model_name = MODELS[DEFAULT_MODEL], access_key = my_api_key))
     topic = gr.State("未命名对话历史记录")
     with gr.Row():
+        gr.HTML(CHUANHU_TITLE, elem_id="app_title")
         status_display = gr.Markdown(get_geoip(), elem_id="status_display")
     with gr.Row(elem_id="float_display"):
         user_info = gr.Markdown(value="getting user info...", elem_id="user_info")
                 retryBtn = gr.Button("🔄 重新生成")
                 delFirstBtn = gr.Button("🗑️ 删除最旧对话")
                 delLastBtn = gr.Button("🗑️ 删除最新对话")
         with gr.Column():
             with gr.Column(min_width=50, scale=1):
+                with gr.Tab(label="模型"):
                     keyTxt = gr.Textbox(
                         show_label=True,
                         placeholder=f"OpenAI API-key...",
                     else:
                         usageTxt = gr.Markdown("**发送消息** 或 **提交key** 以显示额度", elem_id="usage_display", elem_classes="insert_block")
                     model_select_dropdown = gr.Dropdown(
+                        label="选择模型", choices=MODELS, multiselect=False, value=MODELS[DEFAULT_MODEL], interactive=True
+                    )
+                    lora_select_dropdown = gr.Dropdown(
+                        label="选择LoRA模型", choices=[], multiselect=False, interactive=True, visible=False
                     )
                     use_streaming_checkbox = gr.Checkbox(
+                        label="实时传输回答", value=True, visible=ENABLE_STREAMING_OPTION
                     )
                     use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
                     language_select_dropdown = gr.Dropdown(
                         multiselect=False,
                         value=REPLY_LANGUAGES[0],
                     )
+                    index_files = gr.Files(label="上传索引文件", type="file")
                     two_column = gr.Checkbox(label="双栏pdf", value=advance_docs["pdf"].get("two_column", False))
                     # TODO: 公式ocr
                     # formula_ocr = gr.Checkbox(label="识别公式", value=advance_docs["pdf"].get("formula_ocr", False))
                         show_label=True,
                         placeholder=f"在这里输入System Prompt...",
                         label="System prompt",
+                        value=INITIAL_SYSTEM_PROMPT,
                         lines=10,
                     ).style(container=False)
                     with gr.Accordion(label="加载Prompt模板", open=True):
                 with gr.Tab(label="高级"):
                     gr.Markdown("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置")
+                    gr.HTML(APPEARANCE_SWITCHER, elem_classes="insert_block")
                     with gr.Accordion("参数", open=False):
+                        temperature_slider = gr.Slider(
+                            minimum=-0,
+                            maximum=2.0,
+                            value=1.0,
+                            step=0.1,
+                            interactive=True,
+                            label="temperature",
+                        )
+                        top_p_slider = gr.Slider(
                             minimum=-0,
                             maximum=1.0,
                             value=1.0,
                             step=0.05,
                             interactive=True,
+                            label="top-p",
                         )
+                        n_choices_slider = gr.Slider(
+                            minimum=1,
+                            maximum=10,
+                            value=1,
+                            step=1,
+                            interactive=True,
+                            label="n choices",
+                        )
+                        stop_sequence_txt = gr.Textbox(
+                            show_label=True,
+                            placeholder=f"在这里输入停止符，用英文逗号隔开...",
+                            label="stop",
+                            value="",
+                            lines=1,
+                        )
+                        max_context_length_slider = gr.Slider(
+                            minimum=1,
+                            maximum=32768,
+                            value=2000,
+                            step=1,
+                            interactive=True,
+                            label="max context",
+                        )
+                        max_generation_slider = gr.Slider(
+                            minimum=1,
+                            maximum=32768,
+                            value=1000,
+                            step=1,
+                            interactive=True,
+                            label="max generations",
+                        )
+                        presence_penalty_slider = gr.Slider(
+                            minimum=-2.0,
                             maximum=2.0,
+                            value=0.0,
+                            step=0.01,
+                            interactive=True,
+                            label="presence penalty",
+                        )
+                        frequency_penalty_slider = gr.Slider(
+                            minimum=-2.0,
+                            maximum=2.0,
+                            value=0.0,
+                            step=0.01,
                             interactive=True,
+                            label="frequency penalty",
+                        )
+                        logit_bias_txt = gr.Textbox(
+                            show_label=True,
+                            placeholder=f"word:likelihood",
+                            label="logit bias",
+                            value="",
+                            lines=1,
+                        )
+                        user_identifier_txt = gr.Textbox(
+                            show_label=True,
+                            placeholder=f"用于定位滥用行为",
+                            label="用户名",
+                            value=user_name.value,
+                            lines=1,
                         )
                     with gr.Accordion("网络设置", open=False):
                             lines=2,
                         )
                         changeProxyBtn = gr.Button("🔄 设置代理地址")
+                        default_btn = gr.Button("🔙 恢复默认设置")
+    gr.Markdown(CHUANHU_DESCRIPTION)
+    gr.HTML(FOOTER.format(versions=versions_html()), elem_id="footer")
     chatgpt_predict_args = dict(
+        fn=current_model.value.predict,
         inputs=[
             user_question,
             chatbot,
             use_streaming_checkbox,
             use_websearch_checkbox,
             index_files,
             language_select_dropdown,
         ],
+        outputs=[chatbot, status_display],
         show_progress=True,
     )
     )
     get_usage_args = dict(
+        fn=current_model.value.billing_info, inputs=None, outputs=[usageTxt], show_progress=False
+    )
+    load_history_from_file_args = dict(
+        fn=current_model.value.load_chat_history,
+        inputs=[historyFileSelectDropdown, chatbot, user_name],
+        outputs=[saveFileName, systemPromptTxt, chatbot]
     )
     # Chatbot
+    cancelBtn.click(current_model.value.interrupt, [], [])
     user_input.submit(**transfer_input_args).then(**chatgpt_predict_args).then(**end_outputing_args)
     user_input.submit(**get_usage_args)
     submitBtn.click(**get_usage_args)
     emptyBtn.click(
+        current_model.value.reset,
+        outputs=[chatbot, status_display],
         show_progress=True,
     )
     emptyBtn.click(**reset_textbox_args)
     retryBtn.click(**start_outputing_args).then(
+        current_model.value.retry,
         [
             chatbot,
             use_streaming_checkbox,
+            use_websearch_checkbox,
+            index_files,
             language_select_dropdown,
         ],
+        [chatbot, status_display],
         show_progress=True,
     ).then(**end_outputing_args)
     retryBtn.click(**get_usage_args)
     delFirstBtn.click(
+        current_model.value.delete_first_conversation,
+        None,
+        [status_display],
     )
     delLastBtn.click(
+        current_model.value.delete_last_conversation,
+        [chatbot],
+        [chatbot, status_display],
+        show_progress=False
     )
     two_column.change(update_doc_config, [two_column], None)
+    # LLM Models
+    keyTxt.change(current_model.value.set_key, keyTxt, [status_display]).then(**get_usage_args)
     keyTxt.submit(**get_usage_args)
+    model_select_dropdown.change(current_model.value.get_model, [model_select_dropdown, lora_select_dropdown, keyTxt, temperature_slider, top_p_slider, systemPromptTxt], [status_display, lora_select_dropdown], show_progress=True)
+    lora_select_dropdown.change(current_model.value.get_model, [model_select_dropdown, lora_select_dropdown, keyTxt, temperature_slider, top_p_slider, systemPromptTxt], [status_display], show_progress=True)
     # Template
+    systemPromptTxt.change(current_model.value.set_system_prompt, [systemPromptTxt], None)
     templateRefreshBtn.click(get_template_names, None, [templateFileSelectDropdown])
     templateFileSelectDropdown.change(
         load_template,
     # S&L
     saveHistoryBtn.click(
+        current_model.value.save_chat_history,
+        [saveFileName, chatbot, user_name],
         downloadFile,
         show_progress=True,
     )
     saveHistoryBtn.click(get_history_names, [gr.State(False), user_name], [historyFileSelectDropdown])
     exportMarkdownBtn.click(
+        current_model.value.export_markdown,
+        [saveFileName, chatbot, user_name],
         downloadFile,
         show_progress=True,
     )
     historyRefreshBtn.click(get_history_names, [gr.State(False), user_name], [historyFileSelectDropdown])
+    historyFileSelectDropdown.change(**load_history_from_file_args)
+    downloadFile.change(**load_history_from_file_args)
     # Advanced
+    max_context_length_slider.change(current_model.value.set_token_upper_limit, [max_context_length_slider], None)
+    temperature_slider.change(current_model.value.set_temperature, [temperature_slider], None)
+    top_p_slider.change(current_model.value.set_top_p, [top_p_slider], None)
+    n_choices_slider.change(current_model.value.set_n_choices, [n_choices_slider], None)
+    stop_sequence_txt.change(current_model.value.set_stop_sequence, [stop_sequence_txt], None)
+    max_generation_slider.change(current_model.value.set_max_tokens, [max_generation_slider], None)
+    presence_penalty_slider.change(current_model.value.set_presence_penalty, [presence_penalty_slider], None)
+    frequency_penalty_slider.change(current_model.value.set_frequency_penalty, [frequency_penalty_slider], None)
+    logit_bias_txt.change(current_model.value.set_logit_bias, [logit_bias_txt], None)
+    user_identifier_txt.change(current_model.value.set_user_identifier, [user_identifier_txt], None)
     default_btn.click(
         reset_default, [], [apihostTxt, proxyTxt, status_display], show_progress=True
     )

configs/ds_config_chatbot.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "fp16": {
+        "enabled": false
+    },
+    "bf16": {
+        "enabled": true
+    },
+    "comms_logger": {
+        "enabled": false,
+        "verbose": false,
+        "prof_all": false,
+        "debug": false
+    },
+    "steps_per_print": 20000000000000000,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": false
+}

modules/__init__.py ADDED Viewed

File without changes

modules/base_model.py ADDED Viewed

	@@ -0,0 +1,519 @@

+from __future__ import annotations
+from typing import TYPE_CHECKING, List
+import logging
+import json
+import commentjson as cjson
+import os
+import sys
+import requests
+import urllib3
+from tqdm import tqdm
+import colorama
+from duckduckgo_search import ddg
+import asyncio
+import aiohttp
+from enum import Enum
+from .presets import *
+from .llama_func import *
+from .utils import *
+from . import shared
+from .config import retrieve_proxy
+class ModelType(Enum):
+    Unknown = -1
+    OpenAI = 0
+    ChatGLM = 1
+    LLaMA = 2
+    @classmethod
+    def get_type(cls, model_name: str):
+        model_type = None
+        model_name_lower = model_name.lower()
+        if "gpt" in model_name_lower:
+            model_type = ModelType.OpenAI
+        elif "chatglm" in model_name_lower:
+            model_type = ModelType.ChatGLM
+        elif "llama" in model_name_lower:
+            model_type = ModelType.LLaMA
+        else:
+            model_type = ModelType.Unknown
+        return model_type
+class BaseLLMModel:
+    def __init__(
+        self,
+        model_name,
+        system_prompt="",
+        temperature=1.0,
+        top_p=1.0,
+        n_choices=1,
+        stop=None,
+        max_generation_token=None,
+        presence_penalty=0,
+        frequency_penalty=0,
+        logit_bias=None,
+        user="",
+    ) -> None:
+        self.history = []
+        self.all_token_counts = []
+        self.model_name = model_name
+        self.model_type = ModelType.get_type(model_name)
+        try:
+            self.token_upper_limit = MODEL_TOKEN_LIMIT[model_name]
+        except KeyError:
+            self.token_upper_limit = DEFAULT_TOKEN_LIMIT
+        self.interrupted = False
+        self.system_prompt = system_prompt
+        self.api_key = None
+        self.need_api_key = False
+        self.temperature = temperature
+        self.top_p = top_p
+        self.n_choices = n_choices
+        self.stop_sequence = stop
+        self.max_generation_token = None
+        self.presence_penalty = presence_penalty
+        self.frequency_penalty = frequency_penalty
+        self.logit_bias = logit_bias
+        self.user_identifier = user
+    def get_answer_stream_iter(self):
+        """stream predict, need to be implemented
+        conversations are stored in self.history, with the most recent question, in OpenAI format
+        should return a generator, each time give the next word (str) in the answer
+        """
+        logging.warning("stream predict not implemented, using at once predict instead")
+        response, _ = self.get_answer_at_once()
+        yield response
+    def get_answer_at_once(self):
+        """predict at once, need to be implemented
+        conversations are stored in self.history, with the most recent question, in OpenAI format
+        Should return:
+        the answer (str)
+        total token count (int)
+        """
+        logging.warning("at once predict not implemented, using stream predict instead")
+        response_iter = self.get_answer_stream_iter()
+        count = 0
+        for response in response_iter:
+            count += 1
+        return response, sum(self.all_token_counts) + count
+    def billing_info(self):
+        """get billing infomation, inplement if needed"""
+        logging.warning("billing info not implemented, using default")
+        return BILLING_NOT_APPLICABLE_MSG
+    def count_token(self, user_input):
+        """get token count from input, implement if needed"""
+        logging.warning("token count not implemented, using default")
+        return len(user_input)
+    def stream_next_chatbot(self, inputs, chatbot, fake_input=None, display_append=""):
+        def get_return_value():
+            return chatbot, status_text
+        status_text = "开始实时传输回答……"
+        if fake_input:
+            chatbot.append((fake_input, ""))
+        else:
+            chatbot.append((inputs, ""))
+        user_token_count = self.count_token(inputs)
+        self.all_token_counts.append(user_token_count)
+        logging.debug(f"输入token计数: {user_token_count}")
+        stream_iter = self.get_answer_stream_iter()
+        for partial_text in stream_iter:
+            chatbot[-1] = (chatbot[-1][0], partial_text + display_append)
+            self.all_token_counts[-1] += 1
+            status_text = self.token_message()
+            yield get_return_value()
+            if self.interrupted:
+                self.recover()
+                break
+        self.history.append(construct_assistant(partial_text))
+    def next_chatbot_at_once(self, inputs, chatbot, fake_input=None, display_append=""):
+        if fake_input:
+            chatbot.append((fake_input, ""))
+        else:
+            chatbot.append((inputs, ""))
+        if fake_input is not None:
+            user_token_count = self.count_token(fake_input)
+        else:
+            user_token_count = self.count_token(inputs)
+        self.all_token_counts.append(user_token_count)
+        ai_reply, total_token_count = self.get_answer_at_once()
+        self.history.append(construct_assistant(ai_reply))
+        if fake_input is not None:
+            self.history[-2] = construct_user(fake_input)
+        chatbot[-1] = (chatbot[-1][0], ai_reply + display_append)
+        if fake_input is not None:
+            self.all_token_counts[-1] += count_token(construct_assistant(ai_reply))
+        else:
+            self.all_token_counts[-1] = total_token_count - sum(self.all_token_counts)
+        status_text = self.token_message()
+        return chatbot, status_text
+    def predict(
+        self,
+        inputs,
+        chatbot,
+        stream=False,
+        use_websearch=False,
+        files=None,
+        reply_language="中文",
+        should_check_token_count=True,
+    ):  # repetition_penalty, top_k
+        from llama_index.indices.vector_store.base_query import GPTVectorStoreIndexQuery
+        from llama_index.indices.query.schema import QueryBundle
+        from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+        from langchain.chat_models import ChatOpenAI
+        from llama_index import (
+            GPTSimpleVectorIndex,
+            ServiceContext,
+            LangchainEmbedding,
+            OpenAIEmbedding,
+        )
+        logging.info(
+            "输入为：" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL
+        )
+        if should_check_token_count:
+            yield chatbot + [(inputs, "")], "开始生成回答……"
+        if reply_language == "跟随问题语言（不稳定）":
+            reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
+        old_inputs = None
+        display_reference = []
+        limited_context = False
+        if files:
+            limited_context = True
+            old_inputs = inputs
+            msg = "加载索引中……（这可能需要几分钟）"
+            logging.info(msg)
+            yield chatbot + [(inputs, "")], msg
+            index = construct_index(self.api_key, file_src=files)
+            assert index is not None, "索引构建失败"
+            msg = "索引构建完成，获取回答中……"
+            if local_embedding:
+                embed_model = LangchainEmbedding(HuggingFaceEmbeddings())
+            else:
+                embed_model = OpenAIEmbedding()
+            logging.info(msg)
+            yield chatbot + [(inputs, "")], msg
+            with retrieve_proxy():
+                prompt_helper = PromptHelper(
+                    max_input_size=4096,
+                    num_output=5,
+                    max_chunk_overlap=20,
+                    chunk_size_limit=600,
+                )
+                from llama_index import ServiceContext
+                service_context = ServiceContext.from_defaults(
+                    prompt_helper=prompt_helper, embed_model=embed_model
+                )
+                query_object = GPTVectorStoreIndexQuery(
+                    index.index_struct,
+                    service_context=service_context,
+                    similarity_top_k=5,
+                    vector_store=index._vector_store,
+                    docstore=index._docstore,
+                )
+                query_bundle = QueryBundle(inputs)
+                nodes = query_object.retrieve(query_bundle)
+            reference_results = [n.node.text for n in nodes]
+            reference_results = add_source_numbers(reference_results, use_source=False)
+            display_reference = add_details(reference_results)
+            display_reference = "\n\n" + "".join(display_reference)
+            inputs = (
+                replace_today(PROMPT_TEMPLATE)
+                .replace("{query_str}", inputs)
+                .replace("{context_str}", "\n\n".join(reference_results))
+                .replace("{reply_language}", reply_language)
+            )
+        elif use_websearch:
+            limited_context = True
+            search_results = ddg(inputs, max_results=5)
+            old_inputs = inputs
+            reference_results = []
+            for idx, result in enumerate(search_results):
+                logging.debug(f"搜索结果{idx + 1}：{result}")
+                domain_name = urllib3.util.parse_url(result["href"]).host
+                reference_results.append([result["body"], result["href"]])
+                display_reference.append(
+                    f"{idx+1}. [{domain_name}]({result['href']})\n"
+                )
+            reference_results = add_source_numbers(reference_results)
+            display_reference = "\n\n" + "".join(display_reference)
+            inputs = (
+                replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
+                .replace("{query}", inputs)
+                .replace("{web_results}", "\n\n".join(reference_results))
+                .replace("{reply_language}", reply_language)
+            )
+        else:
+            display_reference = ""
+        if (
+            self.need_api_key and
+            self.api_key is None
+            and not shared.state.multi_api_key
+        ):
+            status_text = STANDARD_ERROR_MSG + NO_APIKEY_MSG
+            logging.info(status_text)
+            chatbot.append((inputs, ""))
+            if len(self.history) == 0:
+                self.history.append(construct_user(inputs))
+                self.history.append("")
+                self.all_token_counts.append(0)
+            else:
+                self.history[-2] = construct_user(inputs)
+            yield chatbot + [(inputs, "")], status_text
+            return
+        elif len(inputs.strip()) == 0:
+            status_text = STANDARD_ERROR_MSG + NO_INPUT_MSG
+            logging.info(status_text)
+            yield chatbot + [(inputs, "")], status_text
+            return
+        self.history.append(construct_user(inputs))
+        try:
+            if stream:
+                logging.debug("使用流式传输")
+                iter = self.stream_next_chatbot(
+                    inputs,
+                    chatbot,
+                    fake_input=old_inputs,
+                    display_append=display_reference,
+                )
+                for chatbot, status_text in iter:
+                    yield chatbot, status_text
+            else:
+                logging.debug("不使用流式传输")
+                chatbot, status_text = self.next_chatbot_at_once(
+                    inputs,
+                    chatbot,
+                    fake_input=old_inputs,
+                    display_append=display_reference,
+                )
+                yield chatbot, status_text
+        except Exception as e:
+            status_text = STANDARD_ERROR_MSG + str(e)
+            yield chatbot, status_text
+        if len(self.history) > 1 and self.history[-1]["content"] != inputs:
+            logging.info(
+                "回答为："
+                + colorama.Fore.BLUE
+                + f"{self.history[-1]['content']}"
+                + colorama.Style.RESET_ALL
+            )
+        if limited_context:
+            self.history = self.history[-4:]
+            self.all_token_counts = self.all_token_counts[-2:]
+        max_token = self.token_upper_limit - TOKEN_OFFSET
+        if sum(self.all_token_counts) > max_token and should_check_token_count:
+            count = 0
+            while (
+                sum(self.all_token_counts)
+                > self.token_upper_limit * REDUCE_TOKEN_FACTOR
+                and sum(self.all_token_counts) > 0
+            ):
+                count += 1
+                del self.all_token_counts[0]
+                del self.history[:2]
+            logging.info(status_text)
+            status_text = f"为了防止token超限，模型忘记了早期的 {count} 轮对话"
+            yield chatbot, status_text
+    def retry(
+        self,
+        chatbot,
+        stream=False,
+        use_websearch=False,
+        files=None,
+        reply_language="中文",
+    ):
+        logging.debug("重试中……")
+        if len(self.history) == 0:
+            yield chatbot, f"{STANDARD_ERROR_MSG}上下文是空的"
+            return
+        inputs = self.history[-2]["content"]
+        del self.history[-2:]
+        self.all_token_counts.pop()
+        iter = self.predict(
+            inputs,
+            chatbot,
+            stream=stream,
+            use_websearch=use_websearch,
+            files=files,
+            reply_language=reply_language,
+        )
+        for x in iter:
+            yield x
+        logging.debug("重试完毕")
+    # def reduce_token_size(self, chatbot):
+    #     logging.info("开始减少token数量……")
+    #     chatbot, status_text = self.next_chatbot_at_once(
+    #         summarize_prompt,
+    #         chatbot
+    #     )
+    #     max_token_count = self.token_upper_limit * REDUCE_TOKEN_FACTOR
+    #     num_chat = find_n(self.all_token_counts, max_token_count)
+    #     logging.info(f"previous_token_count: {self.all_token_counts}, keeping {num_chat} chats")
+    #     chatbot = chatbot[:-1]
+    #     self.history = self.history[-2*num_chat:] if num_chat > 0 else []
+    #     self.all_token_counts = self.all_token_counts[-num_chat:] if num_chat > 0 else []
+    #     msg = f"保留了最近{num_chat}轮对话"
+    #     logging.info(msg)
+    #     logging.info("减少token数量完毕")
+    #     return chatbot, msg + "，" + self.token_message(self.all_token_counts if len(self.all_token_counts) > 0 else [0])
+    def interrupt(self):
+        self.interrupted = True
+    def recover(self):
+        self.interrupted = False
+    def set_token_upper_limit(self, new_upper_limit):
+        self.token_upper_limit = new_upper_limit
+        print(f"token上限设置为{new_upper_limit}")
+    def set_temperature(self, new_temperature):
+        self.temperature = new_temperature
+    def set_top_p(self, new_top_p):
+        self.top_p = new_top_p
+    def set_n_choices(self, new_n_choices):
+        self.n_choices = new_n_choices
+    def set_stop_sequence(self, new_stop_sequence: str):
+        new_stop_sequence = new_stop_sequence.split(",")
+        self.stop_sequence = new_stop_sequence
+    def set_max_tokens(self, new_max_tokens):
+        self.max_generation_token = new_max_tokens
+    def set_presence_penalty(self, new_presence_penalty):
+        self.presence_penalty = new_presence_penalty
+    def set_frequency_penalty(self, new_frequency_penalty):
+        self.frequency_penalty = new_frequency_penalty
+    def set_logit_bias(self, logit_bias):
+        logit_bias = logit_bias.split()
+        bias_map = {}
+        encoding = tiktoken.get_encoding("cl100k_base")
+        for line in logit_bias:
+            word, bias_amount = line.split(":")
+            if word:
+                for token in encoding.encode(word):
+                    bias_map[token] = float(bias_amount)
+        self.logit_bias = bias_map
+    def set_user_identifier(self, new_user_identifier):
+        self.user_identifier = new_user_identifier
+    def set_system_prompt(self, new_system_prompt):
+        self.system_prompt = new_system_prompt
+    def set_key(self, new_access_key):
+        self.api_key = new_access_key.strip()
+        msg = f"API密钥更改为了{hide_middle_chars(self.api_key)}"
+        logging.info(msg)
+        return msg
+    def reset(self):
+        self.history = []
+        self.all_token_counts = []
+        self.interrupted = False
+        return [], self.token_message([0])
+    def delete_first_conversation(self):
+        if self.history:
+            del self.history[:2]
+            del self.all_token_counts[0]
+        return self.token_message()
+    def delete_last_conversation(self, chatbot):
+        if len(chatbot) > 0 and STANDARD_ERROR_MSG in chatbot[-1][1]:
+            msg = "由于包含报错信息，只删除chatbot记录"
+            chatbot.pop()
+            return chatbot, self.history
+        if len(self.history) > 0:
+            self.history.pop()
+            self.history.pop()
+        if len(chatbot) > 0:
+            msg = "删除了一组chatbot对话"
+            chatbot.pop()
+        if len(self.all_token_counts) > 0:
+            msg = "删除了一组对话的token计数记录"
+            self.all_token_counts.pop()
+        msg = "删除了一组对话"
+        return chatbot, msg
+    def token_message(self, token_lst=None):
+        if token_lst is None:
+            token_lst = self.all_token_counts
+        token_sum = 0
+        for i in range(len(token_lst)):
+            token_sum += sum(token_lst[: i + 1])
+        return f"Token 计数: {sum(token_lst)}，本次对话累计消耗了 {token_sum} tokens"
+    def save_chat_history(self, filename, chatbot, user_name):
+        if filename == "":
+            return
+        if not filename.endswith(".json"):
+            filename += ".json"
+        return save_file(filename, self.system_prompt, self.history, chatbot, user_name)
+    def export_markdown(self, filename, chatbot, user_name):
+        if filename == "":
+            return
+        if not filename.endswith(".md"):
+            filename += ".md"
+        return save_file(filename, self.system_prompt, self.history, chatbot, user_name)
+    def load_chat_history(self, filename, chatbot, user_name):
+        logging.debug(f"{user_name} 加载对话历史中……")
+        if type(filename) != str:
+            filename = filename.name
+        try:
+            with open(os.path.join(HISTORY_DIR, user_name, filename), "r") as f:
+                json_s = json.load(f)
+            try:
+                if type(json_s["history"][0]) == str:
+                    logging.info("历史记录格式为旧版，正在转换……")
+                    new_history = []
+                    for index, item in enumerate(json_s["history"]):
+                        if index % 2 == 0:
+                            new_history.append(construct_user(item))
+                        else:
+                            new_history.append(construct_assistant(item))
+                    json_s["history"] = new_history
+                    logging.info(new_history)
+            except:
+                # 没有对话历史
+                pass
+            logging.debug(f"{user_name} 加载对话历史完毕")
+            self.history = json_s["history"]
+            return filename, json_s["system"], json_s["chatbot"]
+        except FileNotFoundError:
+            logging.warning(f"{user_name} 没有找到对话历史文件，不执行任何操作")
+            return filename, self.system_prompt, chatbot

modules/chat_func.py DELETED Viewed

@@ -1,497 +0,0 @@
-# -*- coding:utf-8 -*-
-from __future__ import annotations
-from typing import TYPE_CHECKING, List
-import logging
-import json
-import os
-import requests
-import urllib3
-from tqdm import tqdm
-import colorama
-from duckduckgo_search import ddg
-import asyncio
-import aiohttp
-from modules.presets import *
-from modules.llama_func import *
-from modules.utils import *
-from . import shared
-from modules.config import retrieve_proxy
-# logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s")
-if TYPE_CHECKING:
-    from typing import TypedDict
-    class DataframeData(TypedDict):
-        headers: List[str]
-        data: List[List[str | int | bool]]
-initial_prompt = "You are a helpful assistant."
-HISTORY_DIR = "history"
-TEMPLATES_DIR = "templates"
-@shared.state.switching_api_key # 在不开启多账号模式的时候，这个装饰器不会起作用
-def get_response(
-    openai_api_key, system_prompt, history, temperature, top_p, stream, selected_model
-):
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {openai_api_key}",
-    }
-    history = [construct_system(system_prompt), *history]
-    payload = {
-        "model": selected_model,
-        "messages": history,  # [{"role": "user", "content": f"{inputs}"}],
-        "temperature": temperature,  # 1.0,
-        "top_p": top_p,  # 1.0,
-        "n": 1,
-        "stream": stream,
-        "presence_penalty": 0,
-        "frequency_penalty": 0,
-    }
-    if stream:
-        timeout = timeout_streaming
-    else:
-        timeout = timeout_all
-    # 如果有自定义的api-host，使用自定义host发送请求，否则使用默认设置发送请求
-    if shared.state.completion_url != COMPLETION_URL:
-        logging.info(f"使用自定义API URL: {shared.state.completion_url}")
-    with retrieve_proxy():
-        response = requests.post(
-            shared.state.completion_url,
-            headers=headers,
-            json=payload,
-            stream=True,
-            timeout=timeout,
-        )
-    return response
-def stream_predict(
-    openai_api_key,
-    system_prompt,
-    history,
-    inputs,
-    chatbot,
-    all_token_counts,
-    top_p,
-    temperature,
-    selected_model,
-    fake_input=None,
-    display_append=""
-):
-    def get_return_value():
-        return chatbot, history, status_text, all_token_counts
-    logging.info("实时回答模式")
-    partial_words = ""
-    counter = 0
-    status_text = "开始实时传输回答……"
-    history.append(construct_user(inputs))
-    history.append(construct_assistant(""))
-    if fake_input:
-        chatbot.append((fake_input, ""))
-    else:
-        chatbot.append((inputs, ""))
-    user_token_count = 0
-    if fake_input is not None:
-        input_token_count = count_token(construct_user(fake_input))
-    else:
-        input_token_count = count_token(construct_user(inputs))
-    if len(all_token_counts) == 0:
-        system_prompt_token_count = count_token(construct_system(system_prompt))
-        user_token_count = (
-            input_token_count + system_prompt_token_count
-        )
-    else:
-        user_token_count = input_token_count
-    all_token_counts.append(user_token_count)
-    logging.info(f"输入token计数: {user_token_count}")
-    yield get_return_value()
-    try:
-        response = get_response(
-            openai_api_key,
-            system_prompt,
-            history,
-            temperature,
-            top_p,
-            True,
-            selected_model,
-        )
-    except requests.exceptions.ConnectTimeout:
-        status_text = (
-            standard_error_msg + connection_timeout_prompt + error_retrieve_prompt
-        )
-        yield get_return_value()
-        return
-    except requests.exceptions.ReadTimeout:
-        status_text = standard_error_msg + read_timeout_prompt + error_retrieve_prompt
-        yield get_return_value()
-        return
-    yield get_return_value()
-    error_json_str = ""
-    if fake_input is not None:
-        history[-2] = construct_user(fake_input)
-    for chunk in tqdm(response.iter_lines()):
-        if counter == 0:
-            counter += 1
-            continue
-        counter += 1
-        # check whether each line is non-empty
-        if chunk:
-            chunk = chunk.decode()
-            chunklength = len(chunk)
-            try:
-                chunk = json.loads(chunk[6:])
-            except json.JSONDecodeError:
-                logging.info(chunk)
-                error_json_str += chunk
-                status_text = f"JSON解析错误。请重置对话。收到的内容: {error_json_str}"
-                yield get_return_value()
-                continue
-            # decode each line as response data is in bytes
-            if chunklength > 6 and "delta" in chunk["choices"][0]:
-                finish_reason = chunk["choices"][0]["finish_reason"]
-                status_text = construct_token_message(all_token_counts)
-                if finish_reason == "stop":
-                    yield get_return_value()
-                    break
-                try:
-                    partial_words = (
-                        partial_words + chunk["choices"][0]["delta"]["content"]
-                    )
-                except KeyError:
-                    status_text = (
-                        standard_error_msg
-                        + "API回复中找不到内容。很可能是Token计数达到上限了。请重置对话。当前Token计数: "
-                        + str(sum(all_token_counts))
-                    )
-                    yield get_return_value()
-                    break
-                history[-1] = construct_assistant(partial_words)
-                chatbot[-1] = (chatbot[-1][0], partial_words+display_append)
-                all_token_counts[-1] += 1
-                yield get_return_value()
-def predict_all(
-    openai_api_key,
-    system_prompt,
-    history,
-    inputs,
-    chatbot,
-    all_token_counts,
-    top_p,
-    temperature,
-    selected_model,
-    fake_input=None,
-    display_append=""
-):
-    logging.info("一次性回答模式")
-    history.append(construct_user(inputs))
-    history.append(construct_assistant(""))
-    if fake_input:
-        chatbot.append((fake_input, ""))
-    else:
-        chatbot.append((inputs, ""))
-    if fake_input is not None:
-        all_token_counts.append(count_token(construct_user(fake_input)))
-    else:
-        all_token_counts.append(count_token(construct_user(inputs)))
-    try:
-        response = get_response(
-            openai_api_key,
-            system_prompt,
-            history,
-            temperature,
-            top_p,
-            False,
-            selected_model,
-        )
-    except requests.exceptions.ConnectTimeout:
-        status_text = (
-            standard_error_msg + connection_timeout_prompt + error_retrieve_prompt
-        )
-        return chatbot, history, status_text, all_token_counts
-    except requests.exceptions.ProxyError:
-        status_text = standard_error_msg + proxy_error_prompt + error_retrieve_prompt
-        return chatbot, history, status_text, all_token_counts
-    except requests.exceptions.SSLError:
-        status_text = standard_error_msg + ssl_error_prompt + error_retrieve_prompt
-        return chatbot, history, status_text, all_token_counts
-    response = json.loads(response.text)
-    if fake_input is not None:
-        history[-2] = construct_user(fake_input)
-    try:
-        content = response["choices"][0]["message"]["content"]
-        history[-1] = construct_assistant(content)
-        chatbot[-1] = (chatbot[-1][0], content+display_append)
-        total_token_count = response["usage"]["total_tokens"]
-        if fake_input is not None:
-            all_token_counts[-1] += count_token(construct_assistant(content))
-        else:
-            all_token_counts[-1] = total_token_count - sum(all_token_counts)
-        status_text = construct_token_message([total_token_count])
-        return chatbot, history, status_text, all_token_counts
-    except KeyError:
-        status_text = standard_error_msg + str(response)
-        return chatbot, history, status_text, all_token_counts
-def predict(
-    openai_api_key,
-    system_prompt,
-    history,
-    inputs,
-    chatbot,
-    all_token_counts,
-    top_p,
-    temperature,
-    stream=False,
-    selected_model=MODELS[0],
-    use_websearch=False,
-    files = None,
-    reply_language="中文",
-    should_check_token_count=True,
-):  # repetition_penalty, top_k
-    from llama_index.indices.vector_store.base_query import GPTVectorStoreIndexQuery
-    from llama_index.indices.query.schema import QueryBundle
-    from langchain.llms import OpenAIChat
-    logging.info("输入为：" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
-    if should_check_token_count:
-        yield chatbot+[(inputs, "")], history, "开始生成回答……", all_token_counts
-    if reply_language == "跟随问题语言（不稳定）":
-        reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
-    old_inputs = None
-    display_reference = []
-    limited_context = False
-    if files:
-        limited_context = True
-        old_inputs = inputs
-        msg = "加载索引中……（这可能需要几分钟）"
-        logging.info(msg)
-        yield chatbot+[(inputs, "")], history, msg, all_token_counts
-        index = construct_index(openai_api_key, file_src=files)
-        msg = "索引构建完成，获取回答中……"
-        logging.info(msg)
-        yield chatbot+[(inputs, "")], history, msg, all_token_counts
-        with retrieve_proxy():
-            llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name=selected_model))
-            prompt_helper = PromptHelper(max_input_size = 4096, num_output = 5, max_chunk_overlap = 20, chunk_size_limit=600)
-            from llama_index import ServiceContext
-            service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
-            query_object = GPTVectorStoreIndexQuery(index.index_struct, service_context=service_context, similarity_top_k=5, vector_store=index._vector_store, docstore=index._docstore)
-            query_bundle = QueryBundle(inputs)
-            nodes = query_object.retrieve(query_bundle)
-        reference_results = [n.node.text for n in nodes]
-        reference_results = add_source_numbers(reference_results, use_source=False)
-        display_reference = add_details(reference_results)
-        display_reference = "\n\n" + "".join(display_reference)
-        inputs = (
-            replace_today(PROMPT_TEMPLATE)
-            .replace("{query_str}", inputs)
-            .replace("{context_str}", "\n\n".join(reference_results))
-            .replace("{reply_language}", reply_language )
-        )
-    elif use_websearch:
-        limited_context = True
-        search_results = ddg(inputs, max_results=5)
-        old_inputs = inputs
-        reference_results = []
-        for idx, result in enumerate(search_results):
-            logging.info(f"搜索结果{idx + 1}：{result}")
-            domain_name = urllib3.util.parse_url(result["href"]).host
-            reference_results.append([result["body"], result["href"]])
-            display_reference.append(f"{idx+1}. [{domain_name}]({result['href']})\n")
-        reference_results = add_source_numbers(reference_results)
-        display_reference = "\n\n" + "".join(display_reference)
-        inputs = (
-            replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
-            .replace("{query}", inputs)
-            .replace("{web_results}", "\n\n".join(reference_results))
-            .replace("{reply_language}", reply_language )
-        )
-    else:
-        display_reference = ""
-    if len(openai_api_key) == 0 and not shared.state.multi_api_key:
-        status_text = standard_error_msg + no_apikey_msg
-        logging.info(status_text)
-        chatbot.append((inputs, ""))
-        if len(history) == 0:
-            history.append(construct_user(inputs))
-            history.append("")
-            all_token_counts.append(0)
-        else:
-            history[-2] = construct_user(inputs)
-        yield chatbot+[(inputs, "")], history, status_text, all_token_counts
-        return
-    elif len(inputs.strip()) == 0:
-        status_text = standard_error_msg + no_input_msg
-        logging.info(status_text)
-        yield chatbot+[(inputs, "")], history, status_text, all_token_counts
-        return
-    if stream:
-        logging.info("使用流式传输")
-        iter = stream_predict(
-            openai_api_key,
-            system_prompt,
-            history,
-            inputs,
-            chatbot,
-            all_token_counts,
-            top_p,
-            temperature,
-            selected_model,
-            fake_input=old_inputs,
-            display_append=display_reference
-        )
-        for chatbot, history, status_text, all_token_counts in iter:
-            if shared.state.interrupted:
-                shared.state.recover()
-                return
-            yield chatbot, history, status_text, all_token_counts
-    else:
-        logging.info("不使用流式传输")
-        chatbot, history, status_text, all_token_counts = predict_all(
-            openai_api_key,
-            system_prompt,
-            history,
-            inputs,
-            chatbot,
-            all_token_counts,
-            top_p,
-            temperature,
-            selected_model,
-            fake_input=old_inputs,
-            display_append=display_reference
-        )
-        yield chatbot, history, status_text, all_token_counts
-    logging.info(f"传输完毕。当前token计数为{all_token_counts}")
-    if len(history) > 1 and history[-1]["content"] != inputs:
-        logging.info(
-            "回答为："
-            + colorama.Fore.BLUE
-            + f"{history[-1]['content']}"
-            + colorama.Style.RESET_ALL
-        )
-    if limited_context:
-        history = history[-4:]
-        all_token_counts = all_token_counts[-2:]
-        yield chatbot, history, status_text, all_token_counts
-    if stream:
-        max_token = MODEL_SOFT_TOKEN_LIMIT[selected_model]["streaming"]
-    else:
-        max_token = MODEL_SOFT_TOKEN_LIMIT[selected_model]["all"]
-    if sum(all_token_counts) > max_token and should_check_token_count:
-        print(all_token_counts)
-        count = 0
-        while sum(all_token_counts) > max_token - 500 and sum(all_token_counts) > 0:
-            count += 1
-            del all_token_counts[0]
-            del history[:2]
-        logging.info(status_text)
-        status_text = f"为了防止token超限，模型忘记了早期的 {count} 轮对话"
-        yield chatbot, history, status_text, all_token_counts
-def retry(
-    openai_api_key,
-    system_prompt,
-    history,
-    chatbot,
-    token_count,
-    top_p,
-    temperature,
-    stream=False,
-    selected_model=MODELS[0],
-    reply_language="中文",
-):
-    logging.info("重试中……")
-    if len(history) == 0:
-        yield chatbot, history, f"{standard_error_msg}上下文是空的", token_count
-        return
-    history.pop()
-    inputs = history.pop()["content"]
-    token_count.pop()
-    iter = predict(
-        openai_api_key,
-        system_prompt,
-        history,
-        inputs,
-        chatbot,
-        token_count,
-        top_p,
-        temperature,
-        stream=stream,
-        selected_model=selected_model,
-        reply_language=reply_language,
-    )
-    logging.info("重试中……")
-    for x in iter:
-        yield x
-    logging.info("重试完毕")
-def reduce_token_size(
-    openai_api_key,
-    system_prompt,
-    history,
-    chatbot,
-    token_count,
-    top_p,
-    temperature,
-    max_token_count,
-    selected_model=MODELS[0],
-    reply_language="中文",
-):
-    logging.info("开始减少token数量……")
-    iter = predict(
-        openai_api_key,
-        system_prompt,
-        history,
-        summarize_prompt,
-        chatbot,
-        token_count,
-        top_p,
-        temperature,
-        selected_model=selected_model,
-        should_check_token_count=False,
-        reply_language=reply_language,
-    )
-    logging.info(f"chatbot: {chatbot}")
-    flag = False
-    for chatbot, history, status_text, previous_token_count in iter:
-        num_chat = find_n(previous_token_count, max_token_count)
-        logging.info(f"previous_token_count: {previous_token_count}, keeping {num_chat} chats")
-        if flag:
-            chatbot = chatbot[:-1]
-        flag = True
-        history = history[-2*num_chat:] if num_chat > 0 else []
-        token_count = previous_token_count[-num_chat:] if num_chat > 0 else []
-        msg = f"保留了最近{num_chat}轮对话"
-        yield chatbot, history, msg + "，" + construct_token_message(
-            token_count if len(token_count) > 0 else [0],
-        ), token_count
-    logging.info(msg)
-    logging.info("减少token数量完毕")

modules/config.py CHANGED Viewed

@@ -117,6 +117,8 @@ https_proxy = os.environ.get("HTTPS_PROXY", https_proxy)
 os.environ["HTTP_PROXY"] = ""
 os.environ["HTTPS_PROXY"] = ""
 @contextmanager
 def retrieve_proxy(proxy=None):
     """

 os.environ["HTTP_PROXY"] = ""
 os.environ["HTTPS_PROXY"] = ""
+local_embedding = config.get("local_embedding", False) # 是否使用本地embedding
 @contextmanager
 def retrieve_proxy(proxy=None):
     """

modules/llama_func.py CHANGED Viewed

@@ -15,6 +15,8 @@ from tqdm import tqdm
 from modules.presets import *
 from modules.utils import *
 def get_index_name(file_src):
     file_paths = [x.name for x in file_src]
@@ -28,6 +30,7 @@ def get_index_name(file_src):
     return md5_hash.hexdigest()
 def block_split(text):
     blocks = []
     while len(text) > 0:
@@ -35,6 +38,7 @@ def block_split(text):
         text = text[1000:]
     return blocks
 def get_documents(file_src):
     documents = []
     logging.debug("Loading documents...")
@@ -44,40 +48,45 @@ def get_documents(file_src):
         filename = os.path.basename(filepath)
         file_type = os.path.splitext(filepath)[1]
         logging.info(f"loading file: {filename}")
-        if file_type == ".pdf":
-            logging.debug("Loading PDF...")
-            try:
-                from modules.pdf_func import parse_pdf
-                from modules.config import advance_docs
-                two_column = advance_docs["pdf"].get("two_column", False)
-                pdftext = parse_pdf(filepath, two_column).text
-            except:
-                pdftext = ""
-                with open(filepath, 'rb') as pdfFileObj:
-                    pdfReader = PyPDF2.PdfReader(pdfFileObj)
-                    for page in tqdm(pdfReader.pages):
-                        pdftext += page.extract_text()
-            text_raw = pdftext
-        elif file_type == ".docx":
-            logging.debug("Loading Word...")
-            DocxReader = download_loader("DocxReader")
-            loader = DocxReader()
-            text_raw = loader.load_data(file=filepath)[0].text
-        elif file_type == ".epub":
-            logging.debug("Loading EPUB...")
-            EpubReader = download_loader("EpubReader")
-            loader = EpubReader()
-            text_raw = loader.load_data(file=filepath)[0].text
-        elif file_type == ".xlsx":
-            logging.debug("Loading Excel...")
-            text_list = excel_to_string(filepath)
-            for elem in text_list:
-                documents.append(Document(elem))
-            continue
-        else:
-            logging.debug("Loading text file...")
-            with open(filepath, "r", encoding="utf-8") as f:
-                text_raw = f.read()
         text = add_space(text_raw)
         # text = block_split(text)
         # documents += text
@@ -87,19 +96,21 @@ def get_documents(file_src):
 def construct_index(
-        api_key,
-        file_src,
-        max_input_size=4096,
-        num_outputs=5,
-        max_chunk_overlap=20,
-        chunk_size_limit=600,
-        embedding_limit=None,
-        separator=" "
 ):
     from langchain.chat_models import ChatOpenAI
-    from llama_index import GPTSimpleVectorIndex, ServiceContext
-    os.environ["OPENAI_API_KEY"] = api_key
     chunk_size_limit = None if chunk_size_limit == 0 else chunk_size_limit
     embedding_limit = None if embedding_limit == 0 else embedding_limit
     separator = " " if separator == "" else separator
@@ -107,7 +118,14 @@ def construct_index(
     llm_predictor = LLMPredictor(
         llm=ChatOpenAI(model_name="gpt-3.5-turbo-0301", openai_api_key=api_key)
     )
-    prompt_helper = PromptHelper(max_input_size = max_input_size, num_output = num_outputs, max_chunk_overlap = max_chunk_overlap, embedding_limit=embedding_limit, chunk_size_limit=600, separator=separator)
     index_name = get_index_name(file_src)
     if os.path.exists(f"./index/{index_name}.json"):
         logging.info("找到了缓存的索引文件，加载中……")
@@ -115,11 +133,20 @@ def construct_index(
     else:
         try:
             documents = get_documents(file_src)
             logging.info("构建索引中……")
             with retrieve_proxy():
-                service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit=chunk_size_limit)
                 index = GPTSimpleVectorIndex.from_documents(
-                    documents,  service_context=service_context
                 )
             logging.debug("索引构建完成！")
             os.makedirs("./index", exist_ok=True)

 from modules.presets import *
 from modules.utils import *
+from modules.config import local_embedding
 def get_index_name(file_src):
     file_paths = [x.name for x in file_src]
     return md5_hash.hexdigest()
 def block_split(text):
     blocks = []
     while len(text) > 0:
         text = text[1000:]
     return blocks
 def get_documents(file_src):
     documents = []
     logging.debug("Loading documents...")
         filename = os.path.basename(filepath)
         file_type = os.path.splitext(filepath)[1]
         logging.info(f"loading file: {filename}")
+        try:
+            if file_type == ".pdf":
+                logging.debug("Loading PDF...")
+                try:
+                    from modules.pdf_func import parse_pdf
+                    from modules.config import advance_docs
+                    two_column = advance_docs["pdf"].get("two_column", False)
+                    pdftext = parse_pdf(filepath, two_column).text
+                except:
+                    pdftext = ""
+                    with open(filepath, "rb") as pdfFileObj:
+                        pdfReader = PyPDF2.PdfReader(pdfFileObj)
+                        for page in tqdm(pdfReader.pages):
+                            pdftext += page.extract_text()
+                text_raw = pdftext
+            elif file_type == ".docx":
+                logging.debug("Loading Word...")
+                DocxReader = download_loader("DocxReader")
+                loader = DocxReader()
+                text_raw = loader.load_data(file=filepath)[0].text
+            elif file_type == ".epub":
+                logging.debug("Loading EPUB...")
+                EpubReader = download_loader("EpubReader")
+                loader = EpubReader()
+                text_raw = loader.load_data(file=filepath)[0].text
+            elif file_type == ".xlsx":
+                logging.debug("Loading Excel...")
+                text_list = excel_to_string(filepath)
+                for elem in text_list:
+                    documents.append(Document(elem))
+                continue
+            else:
+                logging.debug("Loading text file...")
+                with open(filepath, "r", encoding="utf-8") as f:
+                    text_raw = f.read()
+        except Exception as e:
+            logging.error(f"Error loading file: {filename}")
+            pass
         text = add_space(text_raw)
         # text = block_split(text)
         # documents += text
 def construct_index(
+    api_key,
+    file_src,
+    max_input_size=4096,
+    num_outputs=5,
+    max_chunk_overlap=20,
+    chunk_size_limit=600,
+    embedding_limit=None,
+    separator=" ",
 ):
     from langchain.chat_models import ChatOpenAI
+    from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+    from llama_index import GPTSimpleVectorIndex, ServiceContext, LangchainEmbedding, OpenAIEmbedding
+    if api_key:
+        os.environ["OPENAI_API_KEY"] = api_key
     chunk_size_limit = None if chunk_size_limit == 0 else chunk_size_limit
     embedding_limit = None if embedding_limit == 0 else embedding_limit
     separator = " " if separator == "" else separator
     llm_predictor = LLMPredictor(
         llm=ChatOpenAI(model_name="gpt-3.5-turbo-0301", openai_api_key=api_key)
     )
+    prompt_helper = PromptHelper(
+        max_input_size=max_input_size,
+        num_output=num_outputs,
+        max_chunk_overlap=max_chunk_overlap,
+        embedding_limit=embedding_limit,
+        chunk_size_limit=600,
+        separator=separator,
+    )
     index_name = get_index_name(file_src)
     if os.path.exists(f"./index/{index_name}.json"):
         logging.info("找到了缓存的索引文件，加载中……")
     else:
         try:
             documents = get_documents(file_src)
+            if local_embedding:
+                embed_model = LangchainEmbedding(HuggingFaceEmbeddings())
+            else:
+                embed_model = OpenAIEmbedding()
             logging.info("构建索引中……")
             with retrieve_proxy():
+                service_context = ServiceContext.from_defaults(
+                    llm_predictor=llm_predictor,
+                    prompt_helper=prompt_helper,
+                    chunk_size_limit=chunk_size_limit,
+                    embed_model=embed_model,
+                )
                 index = GPTSimpleVectorIndex.from_documents(
+                    documents, service_context=service_context
                 )
             logging.debug("索引构建完成！")
             os.makedirs("./index", exist_ok=True)

modules/models.py ADDED Viewed

	@@ -0,0 +1,568 @@

+from __future__ import annotations
+from typing import TYPE_CHECKING, List
+import logging
+import json
+import commentjson as cjson
+import os
+import sys
+import requests
+import urllib3
+import platform
+from tqdm import tqdm
+import colorama
+from duckduckgo_search import ddg
+import asyncio
+import aiohttp
+from enum import Enum
+from .presets import *
+from .llama_func import *
+from .utils import *
+from . import shared
+from .config import retrieve_proxy
+from modules import config
+from .base_model import BaseLLMModel, ModelType
+class OpenAIClient(BaseLLMModel):
+    def __init__(
+        self,
+        model_name,
+        api_key,
+        system_prompt=INITIAL_SYSTEM_PROMPT,
+        temperature=1.0,
+        top_p=1.0,
+    ) -> None:
+        super().__init__(
+            model_name=model_name,
+            temperature=temperature,
+            top_p=top_p,
+            system_prompt=system_prompt,
+        )
+        self.api_key = api_key
+        self.need_api_key = True
+        self._refresh_header()
+    def get_answer_stream_iter(self):
+        response = self._get_response(stream=True)
+        if response is not None:
+            iter = self._decode_chat_response(response)
+            partial_text = ""
+            for i in iter:
+                partial_text += i
+                yield partial_text
+        else:
+            yield STANDARD_ERROR_MSG + GENERAL_ERROR_MSG
+    def get_answer_at_once(self):
+        response = self._get_response()
+        response = json.loads(response.text)
+        content = response["choices"][0]["message"]["content"]
+        total_token_count = response["usage"]["total_tokens"]
+        return content, total_token_count
+    def count_token(self, user_input):
+        input_token_count = count_token(construct_user(user_input))
+        if self.system_prompt is not None and len(self.all_token_counts) == 0:
+            system_prompt_token_count = count_token(
+                construct_system(self.system_prompt)
+            )
+            return input_token_count + system_prompt_token_count
+        return input_token_count
+    def billing_info(self):
+        try:
+            curr_time = datetime.datetime.now()
+            last_day_of_month = get_last_day_of_month(curr_time).strftime("%Y-%m-%d")
+            first_day_of_month = curr_time.replace(day=1).strftime("%Y-%m-%d")
+            usage_url = f"{shared.state.usage_api_url}?start_date={first_day_of_month}&end_date={last_day_of_month}"
+            try:
+                usage_data = self._get_billing_data(usage_url)
+            except Exception as e:
+                logging.error(f"获取API使用情况失败:" + str(e))
+                return f"**获取API使用情况失败**"
+            rounded_usage = "{:.5f}".format(usage_data["total_usage"] / 100)
+            return f"**本月使用金额** \u3000 ${rounded_usage}"
+        except requests.exceptions.ConnectTimeout:
+            status_text = (
+                STANDARD_ERROR_MSG + CONNECTION_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
+            )
+            return status_text
+        except requests.exceptions.ReadTimeout:
+            status_text = STANDARD_ERROR_MSG + READ_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
+            return status_text
+        except Exception as e:
+            logging.error(f"获取API使用情况失败:" + str(e))
+            return STANDARD_ERROR_MSG + ERROR_RETRIEVE_MSG
+    def set_token_upper_limit(self, new_upper_limit):
+        pass
+    def set_key(self, new_access_key):
+        self.api_key = new_access_key.strip()
+        self._refresh_header()
+        msg = f"API密钥更改为了{hide_middle_chars(self.api_key)}"
+        logging.info(msg)
+        return msg
+    @shared.state.switching_api_key  # 在不开启多账号模式的时候，这个装饰器不会起作用
+    def _get_response(self, stream=False):
+        openai_api_key = self.api_key
+        system_prompt = self.system_prompt
+        history = self.history
+        logging.debug(colorama.Fore.YELLOW + f"{history}" + colorama.Fore.RESET)
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {openai_api_key}",
+        }
+        if system_prompt is not None:
+            history = [construct_system(system_prompt), *history]
+        payload = {
+            "model": self.model_name,
+            "messages": history,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "n": self.n_choices,
+            "stream": stream,
+            "presence_penalty": self.presence_penalty,
+            "frequency_penalty": self.frequency_penalty,
+        }
+        if self.max_generation_token is not None:
+            payload["max_tokens"] = self.max_generation_token
+        if self.stop_sequence is not None:
+            payload["stop"] = self.stop_sequence
+        if self.logit_bias is not None:
+            payload["logit_bias"] = self.logit_bias
+        if self.user_identifier is not None:
+            payload["user"] = self.user_identifier
+        if stream:
+            timeout = TIMEOUT_STREAMING
+        else:
+            timeout = TIMEOUT_ALL
+        # 如果有自定义的api-host，使用自定义host发送请求，否则使用默认设置发送请求
+        if shared.state.completion_url != COMPLETION_URL:
+            logging.info(f"使用自定义API URL: {shared.state.completion_url}")
+        with retrieve_proxy():
+            try:
+                response = requests.post(
+                    shared.state.completion_url,
+                    headers=headers,
+                    json=payload,
+                    stream=stream,
+                    timeout=timeout,
+                )
+            except:
+                return None
+        return response
+    def _refresh_header(self):
+        self.headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+    def _get_billing_data(self, billing_url):
+        with retrieve_proxy():
+            response = requests.get(
+                billing_url,
+                headers=self.headers,
+                timeout=TIMEOUT_ALL,
+            )
+        if response.status_code == 200:
+            data = response.json()
+            return data
+        else:
+            raise Exception(
+                f"API request failed with status code {response.status_code}: {response.text}"
+            )
+    def _decode_chat_response(self, response):
+        error_msg = ""
+        for chunk in response.iter_lines():
+            if chunk:
+                chunk = chunk.decode()
+                chunk_length = len(chunk)
+                try:
+                    chunk = json.loads(chunk[6:])
+                except json.JSONDecodeError:
+                    print(f"JSON解析错误,收到的内容: {chunk}")
+                    error_msg+=chunk
+                    continue
+                if chunk_length > 6 and "delta" in chunk["choices"][0]:
+                    if chunk["choices"][0]["finish_reason"] == "stop":
+                        break
+                    try:
+                        yield chunk["choices"][0]["delta"]["content"]
+                    except Exception as e:
+                        # logging.error(f"Error: {e}")
+                        continue
+        if error_msg:
+            raise Exception(error_msg)
+class ChatGLM_Client(BaseLLMModel):
+    def __init__(self, model_name) -> None:
+        super().__init__(model_name=model_name)
+        from transformers import AutoTokenizer, AutoModel
+        import torch
+        system_name = platform.system()
+        model_path=None
+        if os.path.exists("models"):
+            model_dirs = os.listdir("models")
+            if model_name in model_dirs:
+                model_path = f"models/{model_name}"
+        if model_path is not None:
+            model_source = model_path
+        else:
+            model_source = f"THUDM/{model_name}"
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_source, trust_remote_code=True
+        )
+        quantified = False
+        if "int4" in model_name:
+            quantified = True
+        if quantified:
+            model = AutoModel.from_pretrained(
+                model_source, trust_remote_code=True
+            ).float()
+        else:
+            model = AutoModel.from_pretrained(
+                model_source, trust_remote_code=True
+            ).half()
+        if torch.cuda.is_available():
+            # run on CUDA
+            logging.info("CUDA is available, using CUDA")
+            model = model.cuda()
+        # mps加速还存在一些问题，暂时不使用
+        elif system_name == "Darwin" and model_path is not None and not quantified:
+            logging.info("Running on macOS, using MPS")
+            # running on macOS and model already downloaded
+            model = model.to("mps")
+        else:
+            logging.info("GPU is not available, using CPU")
+        model = model.eval()
+        self.model = model
+    def _get_glm_style_input(self):
+        history = [x["content"] for x in self.history]
+        query = history.pop()
+        logging.debug(colorama.Fore.YELLOW + f"{history}" + colorama.Fore.RESET)
+        assert (
+            len(history) % 2 == 0
+        ), f"History should be even length. current history is: {history}"
+        history = [[history[i], history[i + 1]] for i in range(0, len(history), 2)]
+        return history, query
+    def get_answer_at_once(self):
+        history, query = self._get_glm_style_input()
+        response, _ = self.model.chat(self.tokenizer, query, history=history)
+        return response, len(response)
+    def get_answer_stream_iter(self):
+        history, query = self._get_glm_style_input()
+        for response, history in self.model.stream_chat(
+            self.tokenizer,
+            query,
+            history,
+            max_length=self.token_upper_limit,
+            top_p=self.top_p,
+            temperature=self.temperature,
+        ):
+            yield response
+class LLaMA_Client(BaseLLMModel):
+    def __init__(
+        self,
+        model_name,
+        lora_path=None,
+    ) -> None:
+        super().__init__(model_name=model_name)
+        from lmflow.datasets.dataset import Dataset
+        from lmflow.pipeline.auto_pipeline import AutoPipeline
+        from lmflow.models.auto_model import AutoModel
+        from lmflow.args import ModelArguments, DatasetArguments, InferencerArguments
+        model_path = None
+        if os.path.exists("models"):
+            model_dirs = os.listdir("models")
+            if model_name in model_dirs:
+                model_path = f"models/{model_name}"
+        if model_path is not None:
+            model_source = model_path
+        else:
+            raise Exception(f"models目录下没有这个模型: {model_name}")
+        if lora_path is not None:
+            lora_path = f"lora/{lora_path}"
+        self.max_generation_token = 1000
+        pipeline_name = "inferencer"
+        model_args = ModelArguments(model_name_or_path=model_source, lora_model_path=lora_path, model_type=None, config_overrides=None, config_name=None, tokenizer_name=None, cache_dir=None, use_fast_tokenizer=True, model_revision='main', use_auth_token=False, torch_dtype=None, use_lora=False, lora_r=8, lora_alpha=32, lora_dropout=0.1, use_ram_optimized_load=True)
+        pipeline_args = InferencerArguments(local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
+        with open(pipeline_args.deepspeed, "r") as f:
+            ds_config = json.load(f)
+        self.model = AutoModel.get_model(
+            model_args,
+            tune_strategy="none",
+            ds_config=ds_config,
+        )
+        # We don't need input data
+        data_args = DatasetArguments(dataset_path=None)
+        self.dataset = Dataset(data_args)
+        self.inferencer = AutoPipeline.get_pipeline(
+            pipeline_name=pipeline_name,
+            model_args=model_args,
+            data_args=data_args,
+            pipeline_args=pipeline_args,
+        )
+        # Chats
+        model_name = model_args.model_name_or_path
+        if model_args.lora_model_path is not None:
+            model_name += f" + {model_args.lora_model_path}"
+        # context = (
+        #     "You are a helpful assistant who follows the given instructions"
+        #     " unconditionally."
+        # )
+        self.end_string = "\n\n"
+    def _get_llama_style_input(self):
+        history = [x["content"] for x in self.history]
+        context = "\n".join(history)
+        return context
+    def get_answer_at_once(self):
+        context = self._get_llama_style_input()
+        input_dataset = self.dataset.from_dict(
+            {"type": "text_only", "instances": [{"text": context}]}
+        )
+        output_dataset = self.inferencer.inference(
+            model=self.model,
+            dataset=input_dataset,
+            max_new_tokens=self.max_generation_token,
+            temperature=self.temperature,
+        )
+        response = output_dataset.to_dict()["instances"][0]["text"]
+        try:
+            index = response.index(self.end_string)
+        except ValueError:
+            response += self.end_string
+            index = response.index(self.end_string)
+        response = response[: index + 1]
+        return response, len(response)
+    def get_answer_stream_iter(self):
+        context = self._get_llama_style_input()
+        input_dataset = self.dataset.from_dict(
+            {"type": "text_only", "instances": [{"text": context}]}
+        )
+        output_dataset = self.inferencer.inference(
+            model=self.model,
+            dataset=input_dataset,
+            max_new_tokens=self.max_generation_token,
+            temperature=self.temperature,
+        )
+        response = output_dataset.to_dict()["instances"][0]["text"]
+        try:
+            index = response.index(self.end_string)
+        except ValueError:
+            response += self.end_string
+            index = response.index(self.end_string)
+        response = response[: index + 1]
+        yield response
+class ModelManager:
+    def __init__(self, **kwargs) -> None:
+        self.get_model(**kwargs)
+    def get_model(
+        self,
+        model_name,
+        lora_model_path=None,
+        access_key=None,
+        temperature=None,
+        top_p=None,
+        system_prompt=None,
+    ) -> BaseLLMModel:
+        msg = f"模型设置为了： {model_name}"
+        model_type = ModelType.get_type(model_name)
+        lora_selector_visibility = False
+        lora_choices = []
+        dont_change_lora_selector = False
+        if model_type != ModelType.OpenAI:
+            config.local_embedding = True
+        model = None
+        try:
+            if model_type == ModelType.OpenAI:
+                model = OpenAIClient(
+                    model_name=model_name,
+                    api_key=access_key,
+                    system_prompt=system_prompt,
+                    temperature=temperature,
+                    top_p=top_p,
+                )
+            elif model_type == ModelType.ChatGLM:
+                model = ChatGLM_Client(model_name)
+            elif model_type == ModelType.LLaMA and lora_model_path == "":
+                msg = "现在请选择LoRA模型"
+                logging.info(msg)
+                lora_selector_visibility = True
+                if os.path.isdir("lora"):
+                    lora_choices = get_file_names("lora", plain=True, filetypes=[""])
+                lora_choices = ["No LoRA"] + lora_choices
+            elif model_type == ModelType.LLaMA and lora_model_path != "":
+                dont_change_lora_selector = True
+                if lora_model_path == "No LoRA":
+                    lora_model_path = None
+                    msg += " + No LoRA"
+                else:
+                    msg += f" + {lora_model_path}"
+                model = LLaMA_Client(model_name, lora_model_path)
+                pass
+            elif model_type == ModelType.Unknown:
+                raise ValueError(f"未知模型: {model_name}")
+            logging.info(msg)
+        except Exception as e:
+            logging.error(e)
+            msg = f"{STANDARD_ERROR_MSG}: {e}"
+        if model is not None:
+            self.model = model
+        if dont_change_lora_selector:
+            return msg
+        else:
+            return msg, gr.Dropdown.update(choices=lora_choices, visible=lora_selector_visibility)
+    def predict(self, *args):
+        iter = self.model.predict(*args)
+        for i in iter:
+            yield i
+    def billing_info(self):
+        return self.model.billing_info()
+    def set_key(self, *args):
+        return self.model.set_key(*args)
+    def load_chat_history(self, *args):
+        return self.model.load_chat_history(*args)
+    def interrupt(self, *args):
+        return self.model.interrupt(*args)
+    def reset(self, *args):
+        return self.model.reset(*args)
+    def retry(self, *args):
+        iter = self.model.retry(*args)
+        for i in iter:
+            yield i
+    def delete_first_conversation(self, *args):
+        return self.model.delete_first_conversation(*args)
+    def delete_last_conversation(self, *args):
+        return self.model.delete_last_conversation(*args)
+    def set_system_prompt(self, *args):
+        return self.model.set_system_prompt(*args)
+    def save_chat_history(self, *args):
+        return self.model.save_chat_history(*args)
+    def export_markdown(self, *args):
+        return self.model.export_markdown(*args)
+    def load_chat_history(self, *args):
+        return self.model.load_chat_history(*args)
+    def set_token_upper_limit(self, *args):
+        return self.model.set_token_upper_limit(*args)
+    def set_temperature(self, *args):
+        self.model.set_temperature(*args)
+    def set_top_p(self, *args):
+        self.model.set_top_p(*args)
+    def set_n_choices(self, *args):
+        self.model.set_n_choices(*args)
+    def set_stop_sequence(self, *args):
+        self.model.set_stop_sequence(*args)
+    def set_max_tokens(self, *args):
+        self.model.set_max_tokens(*args)
+    def set_presence_penalty(self, *args):
+        self.model.set_presence_penalty(*args)
+    def set_frequency_penalty(self, *args):
+        self.model.set_frequency_penalty(*args)
+    def set_logit_bias(self, *args):
+        self.model.set_logit_bias(*args)
+    def set_user_identifier(self, *args):
+        self.model.set_user_identifier(*args)
+if __name__ == "__main__":
+    with open("config.json", "r") as f:
+        openai_api_key = cjson.load(f)["openai_api_key"]
+    # set logging level to debug
+    logging.basicConfig(level=logging.DEBUG)
+    # client = ModelManager(model_name="gpt-3.5-turbo", access_key=openai_api_key)
+    client = ModelManager(model_name="chatglm-6b-int4")
+    chatbot = []
+    stream = False
+    # 测试账单功能
+    logging.info(colorama.Back.GREEN + "测试账单功能" + colorama.Back.RESET)
+    logging.info(client.billing_info())
+    # 测试问答
+    logging.info(colorama.Back.GREEN + "测试问答" + colorama.Back.RESET)
+    question = "巴黎是中国的首都吗？"
+    for i in client.predict(inputs=question, chatbot=chatbot, stream=stream):
+        logging.info(i)
+    logging.info(f"测试问答后history : {client.history}")
+    # 测试记忆力
+    logging.info(colorama.Back.GREEN + "测试记忆力" + colorama.Back.RESET)
+    question = "我刚刚问了你什么问题？"
+    for i in client.predict(inputs=question, chatbot=chatbot, stream=stream):
+        logging.info(i)
+    logging.info(f"测试记忆力后history : {client.history}")
+    # 测试重试功能
+    logging.info(colorama.Back.GREEN + "测试重试功能" + colorama.Back.RESET)
+    for i in client.retry(chatbot=chatbot, stream=stream):
+        logging.info(i)
+    logging.info(f"重试后history : {client.history}")
+    # # 测试总结功能
+    # print(colorama.Back.GREEN + "测试总结功能" + colorama.Back.RESET)
+    # chatbot, msg = client.reduce_token_size(chatbot=chatbot)
+    # print(chatbot, msg)
+    # print(f"总结后history: {client.history}")

modules/openai_func.py DELETED Viewed

@@ -1,65 +0,0 @@
-import requests
-import logging
-from modules.presets import (
-    timeout_all,
-    USAGE_API_URL,
-    BALANCE_API_URL,
-    standard_error_msg,
-    connection_timeout_prompt,
-    error_retrieve_prompt,
-    read_timeout_prompt
-)
-from . import shared
-from modules.config import retrieve_proxy
-import os, datetime
-def get_billing_data(openai_api_key, billing_url):
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {openai_api_key}"
-    }
-    timeout = timeout_all
-    with retrieve_proxy():
-        response = requests.get(
-            billing_url,
-            headers=headers,
-            timeout=timeout,
-        )
-    if response.status_code == 200:
-        data = response.json()
-        return data
-    else:
-        raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
-def get_usage(openai_api_key):
-    try:
-        curr_time = datetime.datetime.now()
-        last_day_of_month = get_last_day_of_month(curr_time).strftime("%Y-%m-%d")
-        first_day_of_month = curr_time.replace(day=1).strftime("%Y-%m-%d")
-        usage_url = f"{shared.state.usage_api_url}?start_date={first_day_of_month}&end_date={last_day_of_month}"
-        try:
-            usage_data = get_billing_data(openai_api_key, usage_url)
-        except Exception as e:
-            logging.error(f"获取API使用情况失败:"+str(e))
-            return f"**获取API使用情况失败**"
-        rounded_usage = "{:.5f}".format(usage_data['total_usage']/100)
-        return f"**本月使用金额** \u3000 ${rounded_usage}"
-    except requests.exceptions.ConnectTimeout:
-        status_text = standard_error_msg + connection_timeout_prompt + error_retrieve_prompt
-        return status_text
-    except requests.exceptions.ReadTimeout:
-        status_text = standard_error_msg + read_timeout_prompt + error_retrieve_prompt
-        return status_text
-    except Exception as e:
-        logging.error(f"获取API使用情况失败:"+str(e))
-        return standard_error_msg + error_retrieve_prompt
-def get_last_day_of_month(any_day):
-    # The day 28 exists in every month. 4 days later, it's always next month
-    next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
-    # subtracting the number of the current day brings us back one month
-    return next_month - datetime.timedelta(days=next_month.day)

modules/presets.py CHANGED Viewed

@@ -3,48 +3,50 @@ import gradio as gr
 from pathlib import Path
 # ChatGPT 设置
-initial_prompt = "You are a helpful assistant."
 API_HOST = "api.openai.com"
 COMPLETION_URL = "https://api.openai.com/v1/chat/completions"
 BALANCE_API_URL="https://api.openai.com/dashboard/billing/credit_grants"
 USAGE_API_URL="https://api.openai.com/dashboard/billing/usage"
 HISTORY_DIR = Path("history")
 TEMPLATES_DIR = "templates"
 # 错误信息
-standard_error_msg = "☹️发生了错误："  # 错误信息的标准前缀
-error_retrieve_prompt = "请检查网络连接，或者API-Key是否有效。"  # 获取对话时发生错误
-connection_timeout_prompt = "连接超时，无法获取对话。"  # 连接超时
-read_timeout_prompt = "读取超时，无法获取对话。"  # 读取超时
-proxy_error_prompt = "代理错误，无法获取对话。"  # 代理错误
-ssl_error_prompt = "SSL错误，无法获取对话。"  # SSL 错误
-no_apikey_msg = "API key长度不是51位，请检查是否输入正确。"  # API key 长度不足 51 位
-no_input_msg = "请输入对话内容。"  # 未输入对话内容
-timeout_streaming = 60  # 流式对话时的超时时间
-timeout_all = 200  # 非流式对话时的超时时间
-enable_streaming_option = True  # 是否启用选择选择是否实时显示回答的勾选框
 HIDE_MY_KEY = False  # 如果你想在UI中隐藏你的 API 密钥，将此值设置为 True
 CONCURRENT_COUNT = 100 # 允许同时使用的用户数量
 SIM_K = 5
 INDEX_QUERY_TEMPRATURE = 1.0
-title = """<h1 align="left">川虎ChatGPT 🚀</h1>"""
-description = """\
 <div align="center" style="margin:16px 0">
 由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536) 和 [明昭MZhao](https://space.bilibili.com/24807452)开发
 访问川虎ChatGPT的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本
-此App使用 `gpt-3.5-turbo` 大语言模型
 </div>
 """
-footer = """<div class="versions">{versions}</div>"""
-appearance_switcher = """
 <div style="display: flex; justify-content: space-between;">
 <span style="margin-top: 4px !important;">切换亮暗色主题</span>
 <span><label class="apSwitch" for="checkbox">
@@ -53,7 +55,8 @@ appearance_switcher = """
 </label></span>
 </div>
 """
-summarize_prompt = "你是谁？我们刚才聊了什么？"  # 总结对话时的 prompt
 MODELS = [
     "gpt-3.5-turbo",
@@ -62,35 +65,34 @@ MODELS = [
     "gpt-4-0314",
     "gpt-4-32k",
     "gpt-4-32k-0314",
 ]  # 可选的模型
-MODEL_SOFT_TOKEN_LIMIT = {
-    "gpt-3.5-turbo": {
-        "streaming": 3500,
-        "all": 3500
-    },
-    "gpt-3.5-turbo-0301": {
-        "streaming": 3500,
-        "all": 3500
-    },
-    "gpt-4": {
-        "streaming": 7500,
-        "all": 7500
-    },
-    "gpt-4-0314": {
-        "streaming": 7500,
-        "all": 7500
-    },
-    "gpt-4-32k": {
-        "streaming": 31000,
-        "all": 31000
-    },
-    "gpt-4-32k-0314": {
-        "streaming": 31000,
-        "all": 31000
-    }
 }
 REPLY_LANGUAGES = [
     "简体中文",
     "繁體中文",

 from pathlib import Path
 # ChatGPT 设置
+INITIAL_SYSTEM_PROMPT = "You are a helpful assistant."
 API_HOST = "api.openai.com"
 COMPLETION_URL = "https://api.openai.com/v1/chat/completions"
 BALANCE_API_URL="https://api.openai.com/dashboard/billing/credit_grants"
 USAGE_API_URL="https://api.openai.com/dashboard/billing/usage"
 HISTORY_DIR = Path("history")
+HISTORY_DIR = "history"
 TEMPLATES_DIR = "templates"
 # 错误信息
+STANDARD_ERROR_MSG = "☹️发生了错误："  # 错误信息的标准前缀
+GENERAL_ERROR_MSG = "获取对话时发生错误，请查看后台日志"
+ERROR_RETRIEVE_MSG = "请检查网络连接，或者API-Key是否有效。"
+CONNECTION_TIMEOUT_MSG = "连接超时，无法获取对话。"  # 连接超时
+READ_TIMEOUT_MSG = "读取超时，无法获取对话。"  # 读取超时
+PROXY_ERROR_MSG = "代理错误，无法获取对话。"  # 代理错误
+SSL_ERROR_PROMPT = "SSL错误，无法获取对话。"  # SSL 错误
+NO_APIKEY_MSG = "API key为空，请检查是否输入正确。"  # API key 长度不足 51 位
+NO_INPUT_MSG = "请输入对话内容。"  # 未输入对话内容
+BILLING_NOT_APPLICABLE_MSG = "模型本地运行中" # 本地运行的模型返回的账单信息
+TIMEOUT_STREAMING = 60  # 流式对话时的超时时间
+TIMEOUT_ALL = 200  # 非流式对话时的超时时间
+ENABLE_STREAMING_OPTION = True  # 是否启用选择选择是否实时显示回答的勾选框
 HIDE_MY_KEY = False  # 如果你想在UI中隐藏你的 API 密钥，将此值设置为 True
 CONCURRENT_COUNT = 100 # 允许同时使用的用户数量
 SIM_K = 5
 INDEX_QUERY_TEMPRATURE = 1.0
+CHUANHU_TITLE = """<h1 align="left">川虎ChatGPT 🚀</h1>"""
+CHUANHU_DESCRIPTION = """\
 <div align="center" style="margin:16px 0">
 由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536) 和 [明昭MZhao](https://space.bilibili.com/24807452)开发
 访问川虎ChatGPT的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本
 </div>
 """
+FOOTER = """<div class="versions">{versions}</div>"""
+APPEARANCE_SWITCHER = """
 <div style="display: flex; justify-content: space-between;">
 <span style="margin-top: 4px !important;">切换亮暗色主题</span>
 <span><label class="apSwitch" for="checkbox">
 </label></span>
 </div>
 """
+SUMMARIZE_PROMPT = "你是谁？我们刚才聊了什么？"  # 总结对话时的 prompt
 MODELS = [
     "gpt-3.5-turbo",
     "gpt-4-0314",
     "gpt-4-32k",
     "gpt-4-32k-0314",
+    "chatglm-6b",
+    "chatglm-6b-int4",
+    "chatglm-6b-int4-qe",
+    "llama-7b-hf",
+    "llama-7b-hf-int4",
+    "llama-7b-hf-int8",
+    "llama-13b-hf",
+    "llama-13b-hf-int4",
+    "llama-30b-hf",
+    "llama-30b-hf-int4",
+    "llama-65b-hf",
 ]  # 可选的模型
+DEFAULT_MODEL = 0  # 默认的模型在MODELS中的序号，从0开始数
+MODEL_TOKEN_LIMIT = {
+    "gpt-3.5-turbo": 4096,
+    "gpt-3.5-turbo-0301": 4096,
+    "gpt-4": 8192,
+    "gpt-4-0314": 8192,
+    "gpt-4-32k": 32768,
+    "gpt-4-32k-0314": 32768
 }
+TOKEN_OFFSET = 1000 # 模型的token上限减去这个值，得到软上限。到达软上限之后，自动尝试减少token占用。
+DEFAULT_TOKEN_LIMIT = 3000 # 默认的token上限
+REDUCE_TOKEN_FACTOR = 0.5 # 与模型token上限想乘，得到目标token数。减少token占用时，将token占用减少到目标token数以下。
 REPLY_LANGUAGES = [
     "简体中文",
     "繁體中文",

modules/utils.py CHANGED Viewed

@@ -153,107 +153,22 @@ def construct_assistant(text):
     return construct_text("assistant", text)
-def construct_token_message(tokens: List[int]):
-    token_sum = 0
-    for i in range(len(tokens)):
-        token_sum += sum(tokens[: i + 1])
-    return f"Token 计数: {sum(tokens)}，本次对话累计消耗了 {token_sum} tokens"
-def delete_first_conversation(history, previous_token_count):
-    if history:
-        del history[:2]
-        del previous_token_count[0]
-    return (
-        history,
-        previous_token_count,
-        construct_token_message(previous_token_count),
-    )
-def delete_last_conversation(chatbot, history, previous_token_count):
-    if len(chatbot) > 0 and standard_error_msg in chatbot[-1][1]:
-        logging.info("由于包含报错信息，只删除chatbot记录")
-        chatbot.pop()
-        return chatbot, history
-    if len(history) > 0:
-        logging.info("删除了一组对话历史")
-        history.pop()
-        history.pop()
-    if len(chatbot) > 0:
-        logging.info("删除了一组chatbot对话")
-        chatbot.pop()
-    if len(previous_token_count) > 0:
-        logging.info("删除了一组对话的token计数记录")
-        previous_token_count.pop()
-    return (
-        chatbot,
-        history,
-        previous_token_count,
-        construct_token_message(previous_token_count),
-    )
 def save_file(filename, system, history, chatbot, user_name):
-    logging.info(f"{user_name} 保存对话历史中……")
-    os.makedirs(HISTORY_DIR / user_name, exist_ok=True)
     if filename.endswith(".json"):
         json_s = {"system": system, "history": history, "chatbot": chatbot}
         print(json_s)
-        with open(os.path.join(HISTORY_DIR / user_name, filename), "w") as f:
             json.dump(json_s, f)
     elif filename.endswith(".md"):
         md_s = f"system: \n- {system} \n"
         for data in history:
             md_s += f"\n{data['role']}: \n- {data['content']} \n"
-        with open(os.path.join(HISTORY_DIR / user_name, filename), "w", encoding="utf8") as f:
             f.write(md_s)
-    logging.info(f"{user_name} 保存对话历史完毕")
-    return os.path.join(HISTORY_DIR / user_name, filename)
-def save_chat_history(filename, system, history, chatbot, user_name):
-    if filename == "":
-        return
-    if not filename.endswith(".json"):
-        filename += ".json"
-    return save_file(filename, system, history, chatbot, user_name)
-def export_markdown(filename, system, history, chatbot, user_name):
-    if filename == "":
-        return
-    if not filename.endswith(".md"):
-        filename += ".md"
-    return save_file(filename, system, history, chatbot, user_name)
-def load_chat_history(filename, system, history, chatbot, user_name):
-    logging.info(f"{user_name} 加载对话历史中……")
-    if type(filename) != str:
-        filename = filename.name
-    try:
-        with open(os.path.join(HISTORY_DIR / user_name, filename), "r") as f:
-            json_s = json.load(f)
-        try:
-            if type(json_s["history"][0]) == str:
-                logging.info("历史记录格式为旧版，正在转换……")
-                new_history = []
-                for index, item in enumerate(json_s["history"]):
-                    if index % 2 == 0:
-                        new_history.append(construct_user(item))
-                    else:
-                        new_history.append(construct_assistant(item))
-                json_s["history"] = new_history
-                logging.info(new_history)
-        except:
-            # 没有对话历史
-            pass
-        logging.info(f"{user_name} 加载对话历史完毕")
-        return filename, json_s["system"], json_s["history"], json_s["chatbot"]
-    except FileNotFoundError:
-        logging.info(f"{user_name} 没有找到对话历史文件，不执行任何操作")
-        return filename, system, history, chatbot
 def sorted_by_pinyin(list):
@@ -261,7 +176,7 @@ def sorted_by_pinyin(list):
 def get_file_names(dir, plain=False, filetypes=[".json"]):
-    logging.info(f"获取文件名列表，目录为{dir}，文件类型为{filetypes}，是否为纯文本列表{plain}")
     files = []
     try:
         for type in filetypes:
@@ -279,14 +194,13 @@ def get_file_names(dir, plain=False, filetypes=[".json"]):
 def get_history_names(plain=False, user_name=""):
-    logging.info(f"从用户 {user_name} 中获取历史记录文件名列表")
-    return get_file_names(HISTORY_DIR / user_name, plain)
 def load_template(filename, mode=0):
-    logging.info(f"加载模板文件{filename}，模式为{mode}（0为返回字典和下拉菜单，1为返回下拉菜单，2为返回字典）")
     lines = []
-    logging.info("Loading template...")
     if filename.endswith(".json"):
         with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as f:
             lines = json.load(f)
@@ -310,23 +224,18 @@ def load_template(filename, mode=0):
 def get_template_names(plain=False):
-    logging.info("获取模板文件名列表")
     return get_file_names(TEMPLATES_DIR, plain, filetypes=[".csv", "json"])
 def get_template_content(templates, selection, original_system_prompt):
-    logging.info(f"应用模板中，选择为{selection}，原始系统提示为{original_system_prompt}")
     try:
         return templates[selection]
     except:
         return original_system_prompt
-def reset_state():
-    logging.info("重置状态")
-    return [], [], [], construct_token_message([0])
 def reset_textbox():
     logging.debug("重置文本框")
     return gr.update(value="")
@@ -530,3 +439,13 @@ def excel_to_string(file_path):
     return result

     return construct_text("assistant", text)
 def save_file(filename, system, history, chatbot, user_name):
+    logging.debug(f"{user_name} 保存对话历史中……")
+    os.makedirs(os.path.join(HISTORY_DIR, user_name), exist_ok=True)
     if filename.endswith(".json"):
         json_s = {"system": system, "history": history, "chatbot": chatbot}
         print(json_s)
+        with open(os.path.join(HISTORY_DIR, user_name, filename), "w") as f:
             json.dump(json_s, f)
     elif filename.endswith(".md"):
         md_s = f"system: \n- {system} \n"
         for data in history:
             md_s += f"\n{data['role']}: \n- {data['content']} \n"
+        with open(os.path.join(HISTORY_DIR, user_name, filename), "w", encoding="utf8") as f:
             f.write(md_s)
+    logging.debug(f"{user_name} 保存对话历史完毕")
+    return os.path.join(HISTORY_DIR, user_name, filename)
 def sorted_by_pinyin(list):
 def get_file_names(dir, plain=False, filetypes=[".json"]):
+    logging.debug(f"获取文件名列表，目录为{dir}，文件类型为{filetypes}，是否为纯文本列表{plain}")
     files = []
     try:
         for type in filetypes:
 def get_history_names(plain=False, user_name=""):
+    logging.debug(f"从用户 {user_name} 中获取历史记录文件名列表")
+    return get_file_names(os.path.join(HISTORY_DIR, user_name), plain)
 def load_template(filename, mode=0):
+    logging.debug(f"加载模板文件{filename}，模式为{mode}（0为返回字典和下拉菜单，1为返回下拉菜单，2为返回字典）")
     lines = []
     if filename.endswith(".json"):
         with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as f:
             lines = json.load(f)
 def get_template_names(plain=False):
+    logging.debug("获取模板文件名列表")
     return get_file_names(TEMPLATES_DIR, plain, filetypes=[".csv", "json"])
 def get_template_content(templates, selection, original_system_prompt):
+    logging.debug(f"应用模板中，选择为{selection}，原始系统提示为{original_system_prompt}")
     try:
         return templates[selection]
     except:
         return original_system_prompt
 def reset_textbox():
     logging.debug("重置文本框")
     return gr.update(value="")
     return result
+def get_last_day_of_month(any_day):
+    # The day 28 exists in every month. 4 days later, it's always next month
+    next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
+    # subtracting the number of the current day brings us back one month
+    return next_month - datetime.timedelta(days=next_month.day)
+def get_model_source(model_name, alternative_source):
+    if model_name == "gpt2-medium":
+        return "https://huggingface.co/gpt2-medium"

requirements_advanced.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers
+torch
+icetk
+protobuf==3.19.0
+git+https://github.com/OptimalScale/LMFlow.git#egg=lmflow
+cpm-kernels
+sentence_transformers