Spaces:

markqiu
/

prinvest_mate

Sleeping

App Files Files Community

Tuchuanhuhuhu commited on Apr 7, 2023

Commit

77f2c42

•

1 Parent(s): 64eb375

去除chat_func文件，改用类控制模型

Browse files

Files changed (10) hide show

ChuanhuChatbot.py +34 -71
modules/__init__.py +0 -0
modules/base_model.py +427 -0
modules/chat_func.py +0 -497
modules/config.py +1 -1
modules/llama_func.py +38 -34
modules/models.py +210 -0
modules/openai_func.py +0 -65
modules/presets.py +16 -28
modules/utils.py +11 -96

ChuanhuChatbot.py CHANGED Viewed

@@ -10,8 +10,7 @@ from modules.config import *
 from modules.utils import *
 from modules.presets import *
 from modules.overwrites import *
-from modules.chat_func import *
-from modules.openai_func import get_usage
 gr.Chatbot.postprocess = postprocess
 PromptHelper.compact_text_chunks = compact_text_chunks
@@ -21,12 +20,11 @@ with open("assets/custom.css", "r", encoding="utf-8") as f:
 with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     user_name = gr.State("")
-    history = gr.State([])
-    token_count = gr.State([])
     promptTemplates = gr.State(load_template(get_template_names(plain=True)[0], mode=2))
     user_api_key = gr.State(my_api_key)
     user_question = gr.State("")
-    outputing = gr.State(False)
     topic = gr.State("未命名对话历史记录")
     with gr.Row():
@@ -64,7 +62,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                 retryBtn = gr.Button("🔄 重新生成")
                 delFirstBtn = gr.Button("🗑️ 删除最旧对话")
                 delLastBtn = gr.Button("🗑️ 删除最新对话")
-                reduceTokenBtn = gr.Button("♻️ 总结对话")
         with gr.Column():
             with gr.Column(min_width=50, scale=1):
@@ -94,7 +91,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                         multiselect=False,
                         value=REPLY_LANGUAGES[0],
                     )
-                    index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
                     two_column = gr.Checkbox(label="双栏pdf", value=advance_docs["pdf"].get("two_column", False))
                     # TODO: 公式ocr
                     # formula_ocr = gr.Checkbox(label="识别公式", value=advance_docs["pdf"].get("formula_ocr", False))
@@ -104,7 +101,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                         show_label=True,
                         placeholder=f"在这里输入System Prompt...",
                         label="System prompt",
-                        value=initial_prompt,
                         lines=10,
                     ).style(container=False)
                     with gr.Accordion(label="加载Prompt模板", open=True):
@@ -202,23 +199,16 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     gr.Markdown(description)
     gr.HTML(footer.format(versions=versions_html()), elem_id="footer")
     chatgpt_predict_args = dict(
-        fn=predict,
         inputs=[
-            user_api_key,
-            systemPromptTxt,
-            history,
             user_question,
             chatbot,
-            token_count,
-            top_p,
-            temperature,
             use_streaming_checkbox,
-            model_select_dropdown,
             use_websearch_checkbox,
             index_files,
             language_select_dropdown,
         ],
-        outputs=[chatbot, history, status_display, token_count],
         show_progress=True,
     )
@@ -242,12 +232,18 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     )
     get_usage_args = dict(
-        fn=get_usage, inputs=[user_api_key], outputs=[usageTxt], show_progress=False
     )
     # Chatbot
-    cancelBtn.click(cancel_outputing, [], [])
     user_input.submit(**transfer_input_args).then(**chatgpt_predict_args).then(**end_outputing_args)
     user_input.submit(**get_usage_args)
@@ -256,63 +252,39 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     submitBtn.click(**get_usage_args)
     emptyBtn.click(
-        reset_state,
-        outputs=[chatbot, history, token_count, status_display],
         show_progress=True,
     )
     emptyBtn.click(**reset_textbox_args)
     retryBtn.click(**start_outputing_args).then(
-        retry,
         [
-            user_api_key,
-            systemPromptTxt,
-            history,
             chatbot,
-            token_count,
-            top_p,
-            temperature,
             use_streaming_checkbox,
-            model_select_dropdown,
             language_select_dropdown,
         ],
-        [chatbot, history, status_display, token_count],
         show_progress=True,
     ).then(**end_outputing_args)
     retryBtn.click(**get_usage_args)
     delFirstBtn.click(
-        delete_first_conversation,
-        [history, token_count],
-        [history, token_count, status_display],
     )
     delLastBtn.click(
-        delete_last_conversation,
-        [chatbot, history, token_count],
-        [chatbot, history, token_count, status_display],
-        show_progress=True,
     )
-    reduceTokenBtn.click(
-        reduce_token_size,
-        [
-            user_api_key,
-            systemPromptTxt,
-            history,
-            chatbot,
-            token_count,
-            top_p,
-            temperature,
-            gr.State(sum(token_count.value[-4:])),
-            model_select_dropdown,
-            language_select_dropdown,
-        ],
-        [chatbot, history, status_display, token_count],
-        show_progress=True,
-    )
-    reduceTokenBtn.click(**get_usage_args)
     two_column.change(update_doc_config, [two_column], None)
     # ChatGPT
@@ -336,30 +308,21 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     # S&L
     saveHistoryBtn.click(
-        save_chat_history,
-        [saveFileName, systemPromptTxt, history, chatbot, user_name],
         downloadFile,
         show_progress=True,
     )
     saveHistoryBtn.click(get_history_names, [gr.State(False), user_name], [historyFileSelectDropdown])
     exportMarkdownBtn.click(
-        export_markdown,
-        [saveFileName, systemPromptTxt, history, chatbot, user_name],
         downloadFile,
         show_progress=True,
     )
     historyRefreshBtn.click(get_history_names, [gr.State(False), user_name], [historyFileSelectDropdown])
-    historyFileSelectDropdown.change(
-        load_chat_history,
-        [historyFileSelectDropdown, systemPromptTxt, history, chatbot, user_name],
-        [saveFileName, systemPromptTxt, history, chatbot],
-        show_progress=True,
-    )
-    downloadFile.change(
-        load_chat_history,
-        [downloadFile, systemPromptTxt, history, chatbot, user_name],
-        [saveFileName, systemPromptTxt, history, chatbot],
-    )
     # Advanced
     default_btn.click(

 from modules.utils import *
 from modules.presets import *
 from modules.overwrites import *
+from modules.models import get_model
 gr.Chatbot.postprocess = postprocess
 PromptHelper.compact_text_chunks = compact_text_chunks
 with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     user_name = gr.State("")
     promptTemplates = gr.State(load_template(get_template_names(plain=True)[0], mode=2))
     user_api_key = gr.State(my_api_key)
     user_question = gr.State("")
+    current_model = gr.State(get_model(MODELS[0], my_api_key))
     topic = gr.State("未命名对话历史记录")
     with gr.Row():
                 retryBtn = gr.Button("🔄 重新生成")
                 delFirstBtn = gr.Button("🗑️ 删除最旧对话")
                 delLastBtn = gr.Button("🗑️ 删除最新对话")
         with gr.Column():
             with gr.Column(min_width=50, scale=1):
                         multiselect=False,
                         value=REPLY_LANGUAGES[0],
                     )
+                    index_files = gr.Files(label="上传索引文件", type="file")
                     two_column = gr.Checkbox(label="双栏pdf", value=advance_docs["pdf"].get("two_column", False))
                     # TODO: 公式ocr
                     # formula_ocr = gr.Checkbox(label="识别公式", value=advance_docs["pdf"].get("formula_ocr", False))
                         show_label=True,
                         placeholder=f"在这里输入System Prompt...",
                         label="System prompt",
+                        value=INITIAL_SYSTEM_PROMPT,
                         lines=10,
                     ).style(container=False)
                     with gr.Accordion(label="加载Prompt模板", open=True):
     gr.Markdown(description)
     gr.HTML(footer.format(versions=versions_html()), elem_id="footer")
     chatgpt_predict_args = dict(
+        fn=current_model.value.predict,
         inputs=[
             user_question,
             chatbot,
             use_streaming_checkbox,
             use_websearch_checkbox,
             index_files,
             language_select_dropdown,
         ],
+        outputs=[chatbot, status_display],
         show_progress=True,
     )
     )
     get_usage_args = dict(
+        fn=current_model.value.billing_info, inputs=None, outputs=[usageTxt], show_progress=False
+    )
+    load_history_from_file_args = dict(
+        fn=current_model.value.load_chat_history,
+        inputs=[historyFileSelectDropdown, chatbot, user_name],
+        outputs=[saveFileName, systemPromptTxt, chatbot]
     )
     # Chatbot
+    cancelBtn.click(current_model.value.interrupt, [], [])
     user_input.submit(**transfer_input_args).then(**chatgpt_predict_args).then(**end_outputing_args)
     user_input.submit(**get_usage_args)
     submitBtn.click(**get_usage_args)
     emptyBtn.click(
+        current_model.value.reset,
+        outputs=[chatbot, status_display],
         show_progress=True,
     )
     emptyBtn.click(**reset_textbox_args)
     retryBtn.click(**start_outputing_args).then(
+        current_model.value.retry,
         [
             chatbot,
             use_streaming_checkbox,
+            use_websearch_checkbox,
+            index_files,
             language_select_dropdown,
         ],
+        [chatbot, status_display],
         show_progress=True,
     ).then(**end_outputing_args)
     retryBtn.click(**get_usage_args)
     delFirstBtn.click(
+        current_model.value.delete_first_conversation,
+        None,
+        [status_display],
     )
     delLastBtn.click(
+        current_model.value.delete_last_conversation,
+        [chatbot],
+        [chatbot, status_display],
+        show_progress=False
     )
     two_column.change(update_doc_config, [two_column], None)
     # ChatGPT
     # S&L
     saveHistoryBtn.click(
+        current_model.value.save_chat_history,
+        [saveFileName, chatbot, user_name],
         downloadFile,
         show_progress=True,
     )
     saveHistoryBtn.click(get_history_names, [gr.State(False), user_name], [historyFileSelectDropdown])
     exportMarkdownBtn.click(
+        current_model.value.export_markdown,
+        [saveFileName, chatbot, user_name],
         downloadFile,
         show_progress=True,
     )
     historyRefreshBtn.click(get_history_names, [gr.State(False), user_name], [historyFileSelectDropdown])
+    historyFileSelectDropdown.change(**load_history_from_file_args)
+    downloadFile.change(**load_history_from_file_args)
     # Advanced
     default_btn.click(

modules/__init__.py ADDED Viewed

File without changes

modules/base_model.py ADDED Viewed

	@@ -0,0 +1,427 @@

+from __future__ import annotations
+from typing import TYPE_CHECKING, List
+import logging
+import json
+import commentjson as cjson
+import os
+import sys
+import requests
+import urllib3
+from tqdm import tqdm
+import colorama
+from duckduckgo_search import ddg
+import asyncio
+import aiohttp
+from enum import Enum
+from .presets import *
+from .llama_func import *
+from .utils import *
+from . import shared
+from .config import retrieve_proxy
+class ModelType(Enum):
+    OpenAI = 0
+    ChatGLM = 1
+    LLaMA = 2
+    @classmethod
+    def get_type(cls, model_name: str):
+        model_type = None
+        if "gpt" in model_name.lower():
+            model_type = ModelType.OpenAI
+        elif "chatglm" in model_name.upper():
+            model_type = ModelType.ChatGLM
+        else:
+            model_type = ModelType.LLaMA
+        return model_type
+class BaseLLMModel:
+    def __init__(self, model_name, temperature=1.0, top_p=1.0, max_generation_token=None, system_prompt="") -> None:
+        self.history = []
+        self.all_token_counts = []
+        self.model_name = model_name
+        self.model_type = ModelType.get_type(model_name)
+        self.api_key = None
+        self.token_upper_limit = MODEL_TOKEN_LIMIT[model_name]
+        self.max_generation_token = max_generation_token if max_generation_token is not None else self.token_upper_limit
+        self.interrupted = False
+        self.temperature = temperature
+        self.top_p = top_p
+        self.system_prompt = system_prompt
+    def get_answer_stream_iter(self):
+        """stream predict, need to be implemented
+        conversations are stored in self.history, with the most recent question, in OpenAI format
+        should return a generator, each time give the next word (str) in the answer
+        """
+        pass
+    def get_answer_at_once(self):
+        """predict at once, need to be implemented
+        conversations are stored in self.history, with the most recent question, in OpenAI format
+        Should return:
+        the answer (str)
+        total token count (int)
+        """
+        pass
+    def billing_info(self):
+        """get billing infomation, inplement if needed"""
+        return billing_not_applicable_msg
+    def count_token(self, user_input):
+        """get token count from input, implement if needed
+        """
+        return 0
+    def stream_next_chatbot(
+        self, inputs, chatbot, fake_input=None, display_append=""
+    ):
+        def get_return_value():
+            return chatbot, status_text
+        status_text = "开始实时传输回答……"
+        if fake_input:
+            chatbot.append((fake_input, ""))
+        else:
+            chatbot.append((inputs, ""))
+        user_token_count = self.count_token(inputs)
+        self.all_token_counts.append(user_token_count)
+        logging.debug(f"输入token计数: {user_token_count}")
+        stream_iter = self.get_answer_stream_iter()
+        for partial_text in stream_iter:
+            self.history[-1] = construct_assistant(partial_text)
+            chatbot[-1] = (chatbot[-1][0], partial_text + display_append)
+            self.all_token_counts[-1] += 1
+            status_text = self.token_message()
+            yield get_return_value()
+    def next_chatbot_at_once(
+        self, inputs, chatbot, fake_input=None, display_append=""
+    ):
+        if fake_input:
+            chatbot.append((fake_input, ""))
+        else:
+            chatbot.append((inputs, ""))
+        if fake_input is not None:
+            user_token_count = self.count_token(fake_input)
+        else:
+            user_token_count = self.count_token(inputs)
+        self.all_token_counts.append(user_token_count)
+        ai_reply, total_token_count = self.get_answer_at_once()
+        if fake_input is not None:
+            self.history[-2] = construct_user(fake_input)
+        self.history[-1] = construct_assistant(ai_reply)
+        chatbot[-1] = (chatbot[-1][0], ai_reply+display_append)
+        if fake_input is not None:
+            self.all_token_counts[-1] += count_token(construct_assistant(ai_reply))
+        else:
+            self.all_token_counts[-1] = total_token_count - sum(self.all_token_counts)
+        status_text = self.token_message()
+        return chatbot, status_text
+    def predict(
+        self,
+        inputs,
+        chatbot,
+        stream=False,
+        use_websearch=False,
+        files=None,
+        reply_language="中文",
+        should_check_token_count=True,
+    ):  # repetition_penalty, top_k
+        from llama_index.indices.vector_store.base_query import GPTVectorStoreIndexQuery
+        from llama_index.indices.query.schema import QueryBundle
+        from langchain.llms import OpenAIChat
+        logging.info(
+            "输入为：" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL
+        )
+        if should_check_token_count:
+            yield chatbot + [(inputs, "")], "开始生成回答……"
+        if reply_language == "跟随问题语言（不稳定）":
+            reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
+        old_inputs = None
+        display_reference = []
+        limited_context = False
+        if files and self.api_key:
+            limited_context = True
+            old_inputs = inputs
+            msg = "加载索引中……（这可能需要几分钟）"
+            logging.info(msg)
+            yield chatbot + [(inputs, "")], msg
+            index = construct_index(self.api_key, file_src=files)
+            msg = "索引构建完成，获取回答中……"
+            logging.info(msg)
+            yield chatbot + [(inputs, "")], msg
+            with retrieve_proxy():
+                llm_predictor = LLMPredictor(
+                    llm=OpenAIChat(temperature=0, model_name=self.model_name)
+                )
+                prompt_helper = PromptHelper(
+                    max_input_size=4096,
+                    num_output=5,
+                    max_chunk_overlap=20,
+                    chunk_size_limit=600,
+                )
+                from llama_index import ServiceContext
+                service_context = ServiceContext.from_defaults(
+                    llm_predictor=llm_predictor, prompt_helper=prompt_helper
+                )
+                query_object = GPTVectorStoreIndexQuery(
+                    index.index_struct,
+                    service_context=service_context,
+                    similarity_top_k=5,
+                    vector_store=index._vector_store,
+                    docstore=index._docstore,
+                )
+                query_bundle = QueryBundle(inputs)
+                nodes = query_object.retrieve(query_bundle)
+            reference_results = [n.node.text for n in nodes]
+            reference_results = add_source_numbers(reference_results, use_source=False)
+            display_reference = add_details(reference_results)
+            display_reference = "\n\n" + "".join(display_reference)
+            inputs = (
+                replace_today(PROMPT_TEMPLATE)
+                .replace("{query_str}", inputs)
+                .replace("{context_str}", "\n\n".join(reference_results))
+                .replace("{reply_language}", reply_language)
+            )
+        elif use_websearch:
+            limited_context = True
+            search_results = ddg(inputs, max_results=5)
+            old_inputs = inputs
+            reference_results = []
+            for idx, result in enumerate(search_results):
+                logging.debug(f"搜索结果{idx + 1}：{result}")
+                domain_name = urllib3.util.parse_url(result["href"]).host
+                reference_results.append([result["body"], result["href"]])
+                display_reference.append(
+                    f"{idx+1}. [{domain_name}]({result['href']})\n"
+                )
+            reference_results = add_source_numbers(reference_results)
+            display_reference = "\n\n" + "".join(display_reference)
+            inputs = (
+                replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
+                .replace("{query}", inputs)
+                .replace("{web_results}", "\n\n".join(reference_results))
+                .replace("{reply_language}", reply_language)
+            )
+        else:
+            display_reference = ""
+        if len(self.api_key) == 0 and not shared.state.multi_api_key:
+            status_text = standard_error_msg + no_apikey_msg
+            logging.info(status_text)
+            chatbot.append((inputs, ""))
+            if len(self.history) == 0:
+                self.history.append(construct_user(inputs))
+                self.history.append("")
+                self.all_token_counts.append(0)
+            else:
+                self.history[-2] = construct_user(inputs)
+            yield chatbot + [(inputs, "")], status_text
+            return
+        elif len(inputs.strip()) == 0:
+            status_text = standard_error_msg + no_input_msg
+            logging.info(status_text)
+            yield chatbot + [(inputs, "")], status_text
+            return
+        self.history.append(construct_user(inputs))
+        self.history.append(construct_assistant(""))
+        if stream:
+            logging.debug("使用流式传输")
+            iter = self.stream_next_chatbot(
+                inputs,
+                chatbot,
+                fake_input=old_inputs,
+                display_append=display_reference,
+            )
+            for chatbot, status_text in iter:
+                yield chatbot, status_text
+                if self.interrupted:
+                    self.recover()
+                    break
+        else:
+            logging.debug("不使用流式传输")
+            chatbot, status_text = self.next_chatbot_at_once(
+                inputs,
+                chatbot,
+                fake_input=old_inputs,
+                display_append=display_reference,
+            )
+            yield chatbot, status_text
+        if len(self.history) > 1 and self.history[-1]["content"] != inputs:
+            logging.info(
+                "回答为："
+                + colorama.Fore.BLUE
+                + f"{self.history[-1]['content']}"
+                + colorama.Style.RESET_ALL
+            )
+        if limited_context:
+            self.history = self.history[-4:]
+            self.all_token_counts = self.all_token_counts[-2:]
+        max_token = self.token_upper_limit - TOKEN_OFFSET
+        if sum(self.all_token_counts) > max_token and should_check_token_count:
+            count = 0
+            while sum(self.all_token_counts) > self.token_upper_limit * REDUCE_TOKEN_FACTOR and sum(self.all_token_counts) > 0:
+                count += 1
+                del self.all_token_counts[0]
+                del self.history[:2]
+            logging.info(status_text)
+            status_text = f"为了防止token超限，模型忘记了早期的 {count} 轮对话"
+            yield chatbot, status_text
+    def retry(
+        self,
+        chatbot,
+        stream=False,
+        use_websearch=False,
+        files=None,
+        reply_language="中文",
+    ):
+        logging.info("重试中……")
+        if len(self.history) == 0:
+            yield chatbot, f"{standard_error_msg}上下文是空的"
+            return
+        del self.history[-2:]
+        inputs = chatbot[-1][0]
+        self.all_token_counts.pop()
+        iter = self.predict(
+            inputs,
+            chatbot,
+            stream=stream,
+            use_websearch=use_websearch,
+            files=files,
+            reply_language=reply_language,
+        )
+        for x in iter:
+            yield x
+        logging.info("重试完毕")
+    # def reduce_token_size(self, chatbot):
+    #     logging.info("开始减少token数量……")
+    #     chatbot, status_text = self.next_chatbot_at_once(
+    #         summarize_prompt,
+    #         chatbot
+    #     )
+    #     max_token_count = self.token_upper_limit * REDUCE_TOKEN_FACTOR
+    #     num_chat = find_n(self.all_token_counts, max_token_count)
+    #     logging.info(f"previous_token_count: {self.all_token_counts}, keeping {num_chat} chats")
+    #     chatbot = chatbot[:-1]
+    #     self.history = self.history[-2*num_chat:] if num_chat > 0 else []
+    #     self.all_token_counts = self.all_token_counts[-num_chat:] if num_chat > 0 else []
+    #     msg = f"保留了最近{num_chat}轮对话"
+    #     logging.info(msg)
+    #     logging.info("减少token数量完毕")
+    #     return chatbot, msg + "，" + self.token_message(self.all_token_counts if len(self.all_token_counts) > 0 else [0])
+    def interrupt(self):
+        self.interrupted = True
+    def recover(self):
+        self.interrupted = False
+    def set_temprature(self, new_temprature):
+        self.temperature = new_temprature
+    def set_top_p(self, new_top_p):
+        self.top_p = new_top_p
+    def reset(self):
+        self.history = []
+        self.all_token_counts = []
+        self.interrupted = False
+        return [], self.token_message([0])
+    def delete_first_conversation(self):
+        if self.history:
+            del self.history[:2]
+            del self.all_token_counts[0]
+        return self.token_message()
+    def delete_last_conversation(self, chatbot):
+        if len(chatbot) > 0 and standard_error_msg in chatbot[-1][1]:
+            msg = "由于包含报错信息，只删除chatbot记录"
+            chatbot.pop()
+            return chatbot, self.history
+        if len(self.history) > 0:
+            self.history.pop()
+            self.history.pop()
+        if len(chatbot) > 0:
+            msg = "删除了一组chatbot对话"
+            chatbot.pop()
+        if len(self.all_token_counts) > 0:
+            msg = "删除了一组对话的token计数记录"
+            self.all_token_counts.pop()
+        msg = "删除了一组对话"
+        return chatbot, msg
+    def token_message(self, token_lst = None):
+        if token_lst is None:
+            token_lst = self.all_token_counts
+        token_sum = 0
+        for i in range(len(token_lst)):
+            token_sum += sum(token_lst[: i + 1])
+        return f"Token 计数: {sum(token_lst)}，本次对话累计消耗了 {token_sum} tokens"
+    def save_chat_history(self, filename, chatbot, user_name):
+        if filename == "":
+            return
+        if not filename.endswith(".json"):
+            filename += ".json"
+        return save_file(filename, self.system_prompt, self.history, chatbot, user_name)
+    def export_markdown(self, filename, chatbot, user_name):
+        if filename == "":
+            return
+        if not filename.endswith(".md"):
+            filename += ".md"
+        return save_file(filename, self.system_prompt, self.history, chatbot, user_name)
+    def load_chat_history(self, filename, chatbot, user_name):
+        logging.info(f"{user_name} 加载对话历史中……")
+        if type(filename) != str:
+            filename = filename.name
+        try:
+            with open(os.path.join(HISTORY_DIR / user_name, filename), "r") as f:
+                json_s = json.load(f)
+            try:
+                if type(json_s["history"][0]) == str:
+                    logging.info("历史记录格式为旧版，正在转换……")
+                    new_history = []
+                    for index, item in enumerate(json_s["history"]):
+                        if index % 2 == 0:
+                            new_history.append(construct_user(item))
+                        else:
+                            new_history.append(construct_assistant(item))
+                    json_s["history"] = new_history
+                    logging.info(new_history)
+            except:
+                # 没有对话历史
+                pass
+            logging.info(f"{user_name} 加载对话历史完毕")
+            self.history = json_s["history"]
+            return filename, json_s["system"], json_s["chatbot"]
+        except FileNotFoundError:
+            logging.info(f"{user_name} 没有找到对话历史文件，不执行任何操作")
+            return filename, self.system_prompt, chatbot

modules/chat_func.py DELETED Viewed

@@ -1,497 +0,0 @@
-# -*- coding:utf-8 -*-
-from __future__ import annotations
-from typing import TYPE_CHECKING, List
-import logging
-import json
-import os
-import requests
-import urllib3
-from tqdm import tqdm
-import colorama
-from duckduckgo_search import ddg
-import asyncio
-import aiohttp
-from modules.presets import *
-from modules.llama_func import *
-from modules.utils import *
-from . import shared
-from modules.config import retrieve_proxy
-# logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s")
-if TYPE_CHECKING:
-    from typing import TypedDict
-    class DataframeData(TypedDict):
-        headers: List[str]
-        data: List[List[str | int | bool]]
-initial_prompt = "You are a helpful assistant."
-HISTORY_DIR = "history"
-TEMPLATES_DIR = "templates"
-@shared.state.switching_api_key # 在不开启多账号模式的时候，这个装饰器不会起作用
-def get_response(
-    openai_api_key, system_prompt, history, temperature, top_p, stream, selected_model
-):
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {openai_api_key}",
-    }
-    history = [construct_system(system_prompt), *history]
-    payload = {
-        "model": selected_model,
-        "messages": history,  # [{"role": "user", "content": f"{inputs}"}],
-        "temperature": temperature,  # 1.0,
-        "top_p": top_p,  # 1.0,
-        "n": 1,
-        "stream": stream,
-        "presence_penalty": 0,
-        "frequency_penalty": 0,
-    }
-    if stream:
-        timeout = timeout_streaming
-    else:
-        timeout = timeout_all
-    # 如果有自定义的api-host，使用自定义host发送请求，否则使用默认设置发送请求
-    if shared.state.completion_url != COMPLETION_URL:
-        logging.info(f"使用自定义API URL: {shared.state.completion_url}")
-    with retrieve_proxy():
-        response = requests.post(
-            shared.state.completion_url,
-            headers=headers,
-            json=payload,
-            stream=True,
-            timeout=timeout,
-        )
-    return response
-def stream_predict(
-    openai_api_key,
-    system_prompt,
-    history,
-    inputs,
-    chatbot,
-    all_token_counts,
-    top_p,
-    temperature,
-    selected_model,
-    fake_input=None,
-    display_append=""
-):
-    def get_return_value():
-        return chatbot, history, status_text, all_token_counts
-    logging.info("实时回答模式")
-    partial_words = ""
-    counter = 0
-    status_text = "开始实时传输回答……"
-    history.append(construct_user(inputs))
-    history.append(construct_assistant(""))
-    if fake_input:
-        chatbot.append((fake_input, ""))
-    else:
-        chatbot.append((inputs, ""))
-    user_token_count = 0
-    if fake_input is not None:
-        input_token_count = count_token(construct_user(fake_input))
-    else:
-        input_token_count = count_token(construct_user(inputs))
-    if len(all_token_counts) == 0:
-        system_prompt_token_count = count_token(construct_system(system_prompt))
-        user_token_count = (
-            input_token_count + system_prompt_token_count
-        )
-    else:
-        user_token_count = input_token_count
-    all_token_counts.append(user_token_count)
-    logging.info(f"输入token计数: {user_token_count}")
-    yield get_return_value()
-    try:
-        response = get_response(
-            openai_api_key,
-            system_prompt,
-            history,
-            temperature,
-            top_p,
-            True,
-            selected_model,
-        )
-    except requests.exceptions.ConnectTimeout:
-        status_text = (
-            standard_error_msg + connection_timeout_prompt + error_retrieve_prompt
-        )
-        yield get_return_value()
-        return
-    except requests.exceptions.ReadTimeout:
-        status_text = standard_error_msg + read_timeout_prompt + error_retrieve_prompt
-        yield get_return_value()
-        return
-    yield get_return_value()
-    error_json_str = ""
-    if fake_input is not None:
-        history[-2] = construct_user(fake_input)
-    for chunk in tqdm(response.iter_lines()):
-        if counter == 0:
-            counter += 1
-            continue
-        counter += 1
-        # check whether each line is non-empty
-        if chunk:
-            chunk = chunk.decode()
-            chunklength = len(chunk)
-            try:
-                chunk = json.loads(chunk[6:])
-            except json.JSONDecodeError:
-                logging.info(chunk)
-                error_json_str += chunk
-                status_text = f"JSON解析错误。请重置对话。收到的内容: {error_json_str}"
-                yield get_return_value()
-                continue
-            # decode each line as response data is in bytes
-            if chunklength > 6 and "delta" in chunk["choices"][0]:
-                finish_reason = chunk["choices"][0]["finish_reason"]
-                status_text = construct_token_message(all_token_counts)
-                if finish_reason == "stop":
-                    yield get_return_value()
-                    break
-                try:
-                    partial_words = (
-                        partial_words + chunk["choices"][0]["delta"]["content"]
-                    )
-                except KeyError:
-                    status_text = (
-                        standard_error_msg
-                        + "API回复中找不到内容。很可能是Token计数达到上限了。请重置对话。当前Token计数: "
-                        + str(sum(all_token_counts))
-                    )
-                    yield get_return_value()
-                    break
-                history[-1] = construct_assistant(partial_words)
-                chatbot[-1] = (chatbot[-1][0], partial_words+display_append)
-                all_token_counts[-1] += 1
-                yield get_return_value()
-def predict_all(
-    openai_api_key,
-    system_prompt,
-    history,
-    inputs,
-    chatbot,
-    all_token_counts,
-    top_p,
-    temperature,
-    selected_model,
-    fake_input=None,
-    display_append=""
-):
-    logging.info("一次性回答模式")
-    history.append(construct_user(inputs))
-    history.append(construct_assistant(""))
-    if fake_input:
-        chatbot.append((fake_input, ""))
-    else:
-        chatbot.append((inputs, ""))
-    if fake_input is not None:
-        all_token_counts.append(count_token(construct_user(fake_input)))
-    else:
-        all_token_counts.append(count_token(construct_user(inputs)))
-    try:
-        response = get_response(
-            openai_api_key,
-            system_prompt,
-            history,
-            temperature,
-            top_p,
-            False,
-            selected_model,
-        )
-    except requests.exceptions.ConnectTimeout:
-        status_text = (
-            standard_error_msg + connection_timeout_prompt + error_retrieve_prompt
-        )
-        return chatbot, history, status_text, all_token_counts
-    except requests.exceptions.ProxyError:
-        status_text = standard_error_msg + proxy_error_prompt + error_retrieve_prompt
-        return chatbot, history, status_text, all_token_counts
-    except requests.exceptions.SSLError:
-        status_text = standard_error_msg + ssl_error_prompt + error_retrieve_prompt
-        return chatbot, history, status_text, all_token_counts
-    response = json.loads(response.text)
-    if fake_input is not None:
-        history[-2] = construct_user(fake_input)
-    try:
-        content = response["choices"][0]["message"]["content"]
-        history[-1] = construct_assistant(content)
-        chatbot[-1] = (chatbot[-1][0], content+display_append)
-        total_token_count = response["usage"]["total_tokens"]
-        if fake_input is not None:
-            all_token_counts[-1] += count_token(construct_assistant(content))
-        else:
-            all_token_counts[-1] = total_token_count - sum(all_token_counts)
-        status_text = construct_token_message(total_token_count)
-        return chatbot, history, status_text, all_token_counts
-    except KeyError:
-        status_text = standard_error_msg + str(response)
-        return chatbot, history, status_text, all_token_counts
-def predict(
-    openai_api_key,
-    system_prompt,
-    history,
-    inputs,
-    chatbot,
-    all_token_counts,
-    top_p,
-    temperature,
-    stream=False,
-    selected_model=MODELS[0],
-    use_websearch=False,
-    files = None,
-    reply_language="中文",
-    should_check_token_count=True,
-):  # repetition_penalty, top_k
-    from llama_index.indices.vector_store.base_query import GPTVectorStoreIndexQuery
-    from llama_index.indices.query.schema import QueryBundle
-    from langchain.llms import OpenAIChat
-    logging.info("输入为：" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
-    if should_check_token_count:
-        yield chatbot+[(inputs, "")], history, "开始生成回答……", all_token_counts
-    if reply_language == "跟随问题语言（不稳定）":
-        reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
-    old_inputs = None
-    display_reference = []
-    limited_context = False
-    if files:
-        limited_context = True
-        old_inputs = inputs
-        msg = "加载索引中……（这可能需要几分钟）"
-        logging.info(msg)
-        yield chatbot+[(inputs, "")], history, msg, all_token_counts
-        index = construct_index(openai_api_key, file_src=files)
-        msg = "索引构建完成，获取回答中……"
-        logging.info(msg)
-        yield chatbot+[(inputs, "")], history, msg, all_token_counts
-        with retrieve_proxy():
-            llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name=selected_model))
-            prompt_helper = PromptHelper(max_input_size = 4096, num_output = 5, max_chunk_overlap = 20, chunk_size_limit=600)
-            from llama_index import ServiceContext
-            service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
-            query_object = GPTVectorStoreIndexQuery(index.index_struct, service_context=service_context, similarity_top_k=5, vector_store=index._vector_store, docstore=index._docstore)
-            query_bundle = QueryBundle(inputs)
-            nodes = query_object.retrieve(query_bundle)
-        reference_results = [n.node.text for n in nodes]
-        reference_results = add_source_numbers(reference_results, use_source=False)
-        display_reference = add_details(reference_results)
-        display_reference = "\n\n" + "".join(display_reference)
-        inputs = (
-            replace_today(PROMPT_TEMPLATE)
-            .replace("{query_str}", inputs)
-            .replace("{context_str}", "\n\n".join(reference_results))
-            .replace("{reply_language}", reply_language )
-        )
-    elif use_websearch:
-        limited_context = True
-        search_results = ddg(inputs, max_results=5)
-        old_inputs = inputs
-        reference_results = []
-        for idx, result in enumerate(search_results):
-            logging.info(f"搜索结果{idx + 1}：{result}")
-            domain_name = urllib3.util.parse_url(result["href"]).host
-            reference_results.append([result["body"], result["href"]])
-            display_reference.append(f"{idx+1}. [{domain_name}]({result['href']})\n")
-        reference_results = add_source_numbers(reference_results)
-        display_reference = "\n\n" + "".join(display_reference)
-        inputs = (
-            replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
-            .replace("{query}", inputs)
-            .replace("{web_results}", "\n\n".join(reference_results))
-            .replace("{reply_language}", reply_language )
-        )
-    else:
-        display_reference = ""
-    if len(openai_api_key) == 0 and not shared.state.multi_api_key:
-        status_text = standard_error_msg + no_apikey_msg
-        logging.info(status_text)
-        chatbot.append((inputs, ""))
-        if len(history) == 0:
-            history.append(construct_user(inputs))
-            history.append("")
-            all_token_counts.append(0)
-        else:
-            history[-2] = construct_user(inputs)
-        yield chatbot+[(inputs, "")], history, status_text, all_token_counts
-        return
-    elif len(inputs.strip()) == 0:
-        status_text = standard_error_msg + no_input_msg
-        logging.info(status_text)
-        yield chatbot+[(inputs, "")], history, status_text, all_token_counts
-        return
-    if stream:
-        logging.info("使用流式传输")
-        iter = stream_predict(
-            openai_api_key,
-            system_prompt,
-            history,
-            inputs,
-            chatbot,
-            all_token_counts,
-            top_p,
-            temperature,
-            selected_model,
-            fake_input=old_inputs,
-            display_append=display_reference
-        )
-        for chatbot, history, status_text, all_token_counts in iter:
-            if shared.state.interrupted:
-                shared.state.recover()
-                return
-            yield chatbot, history, status_text, all_token_counts
-    else:
-        logging.info("不使用流式传输")
-        chatbot, history, status_text, all_token_counts = predict_all(
-            openai_api_key,
-            system_prompt,
-            history,
-            inputs,
-            chatbot,
-            all_token_counts,
-            top_p,
-            temperature,
-            selected_model,
-            fake_input=old_inputs,
-            display_append=display_reference
-        )
-        yield chatbot, history, status_text, all_token_counts
-    logging.info(f"传输完毕。当前token计数为{all_token_counts}")
-    if len(history) > 1 and history[-1]["content"] != inputs:
-        logging.info(
-            "回答为："
-            + colorama.Fore.BLUE
-            + f"{history[-1]['content']}"
-            + colorama.Style.RESET_ALL
-        )
-    if limited_context:
-        history = history[-4:]
-        all_token_counts = all_token_counts[-2:]
-        yield chatbot, history, status_text, all_token_counts
-    if stream:
-        max_token = MODEL_SOFT_TOKEN_LIMIT[selected_model]["streaming"]
-    else:
-        max_token = MODEL_SOFT_TOKEN_LIMIT[selected_model]["all"]
-    if sum(all_token_counts) > max_token and should_check_token_count:
-        print(all_token_counts)
-        count = 0
-        while sum(all_token_counts) > max_token - 500 and sum(all_token_counts) > 0:
-            count += 1
-            del all_token_counts[0]
-            del history[:2]
-        logging.info(status_text)
-        status_text = f"为了防止token超限，模型忘记了早期的 {count} 轮对话"
-        yield chatbot, history, status_text, all_token_counts
-def retry(
-    openai_api_key,
-    system_prompt,
-    history,
-    chatbot,
-    token_count,
-    top_p,
-    temperature,
-    stream=False,
-    selected_model=MODELS[0],
-    reply_language="中文",
-):
-    logging.info("重试中……")
-    if len(history) == 0:
-        yield chatbot, history, f"{standard_error_msg}上下文是空的", token_count
-        return
-    history.pop()
-    inputs = history.pop()["content"]
-    token_count.pop()
-    iter = predict(
-        openai_api_key,
-        system_prompt,
-        history,
-        inputs,
-        chatbot,
-        token_count,
-        top_p,
-        temperature,
-        stream=stream,
-        selected_model=selected_model,
-        reply_language=reply_language,
-    )
-    logging.info("重试中……")
-    for x in iter:
-        yield x
-    logging.info("重试完毕")
-def reduce_token_size(
-    openai_api_key,
-    system_prompt,
-    history,
-    chatbot,
-    token_count,
-    top_p,
-    temperature,
-    max_token_count,
-    selected_model=MODELS[0],
-    reply_language="中文",
-):
-    logging.info("开始减少token数量……")
-    iter = predict(
-        openai_api_key,
-        system_prompt,
-        history,
-        summarize_prompt,
-        chatbot,
-        token_count,
-        top_p,
-        temperature,
-        selected_model=selected_model,
-        should_check_token_count=False,
-        reply_language=reply_language,
-    )
-    logging.info(f"chatbot: {chatbot}")
-    flag = False
-    for chatbot, history, status_text, previous_token_count in iter:
-        num_chat = find_n(previous_token_count, max_token_count)
-        logging.info(f"previous_token_count: {previous_token_count}, keeping {num_chat} chats")
-        if flag:
-            chatbot = chatbot[:-1]
-        flag = True
-        history = history[-2*num_chat:] if num_chat > 0 else []
-        token_count = previous_token_count[-num_chat:] if num_chat > 0 else []
-        msg = f"保留了最近{num_chat}轮对话"
-        yield chatbot, history, msg + "，" + construct_token_message(
-            token_count if len(token_count) > 0 else [0],
-        ), token_count
-    logging.info(msg)
-    logging.info("减少token数量完毕")

modules/config.py CHANGED Viewed

@@ -3,7 +3,7 @@ from contextlib import contextmanager
 import os
 import logging
 import sys
-import json
 from . import shared

 import os
 import logging
 import sys
+import commentjson as json
 from . import shared

modules/llama_func.py CHANGED Viewed

@@ -44,40 +44,44 @@ def get_documents(file_src):
         filename = os.path.basename(filepath)
         file_type = os.path.splitext(filepath)[1]
         logging.info(f"loading file: {filename}")
-        if file_type == ".pdf":
-            logging.debug("Loading PDF...")
-            try:
-                from modules.pdf_func import parse_pdf
-                from modules.config import advance_docs
-                two_column = advance_docs["pdf"].get("two_column", False)
-                pdftext = parse_pdf(filepath, two_column).text
-            except:
-                pdftext = ""
-                with open(filepath, 'rb') as pdfFileObj:
-                    pdfReader = PyPDF2.PdfReader(pdfFileObj)
-                    for page in tqdm(pdfReader.pages):
-                        pdftext += page.extract_text()
-            text_raw = pdftext
-        elif file_type == ".docx":
-            logging.debug("Loading Word...")
-            DocxReader = download_loader("DocxReader")
-            loader = DocxReader()
-            text_raw = loader.load_data(file=filepath)[0].text
-        elif file_type == ".epub":
-            logging.debug("Loading EPUB...")
-            EpubReader = download_loader("EpubReader")
-            loader = EpubReader()
-            text_raw = loader.load_data(file=filepath)[0].text
-        elif file_type == ".xlsx":
-            logging.debug("Loading Excel...")
-            text_list = excel_to_string(filepath)
-            for elem in text_list:
-                documents.append(Document(elem))
-            continue
-        else:
-            logging.debug("Loading text file...")
-            with open(filepath, "r", encoding="utf-8") as f:
-                text_raw = f.read()
         text = add_space(text_raw)
         # text = block_split(text)
         # documents += text

         filename = os.path.basename(filepath)
         file_type = os.path.splitext(filepath)[1]
         logging.info(f"loading file: {filename}")
+        try:
+            if file_type == ".pdf":
+                logging.debug("Loading PDF...")
+                try:
+                    from modules.pdf_func import parse_pdf
+                    from modules.config import advance_docs
+                    two_column = advance_docs["pdf"].get("two_column", False)
+                    pdftext = parse_pdf(filepath, two_column).text
+                except:
+                    pdftext = ""
+                    with open(filepath, 'rb') as pdfFileObj:
+                        pdfReader = PyPDF2.PdfReader(pdfFileObj)
+                        for page in tqdm(pdfReader.pages):
+                            pdftext += page.extract_text()
+                text_raw = pdftext
+            elif file_type == ".docx":
+                logging.debug("Loading Word...")
+                DocxReader = download_loader("DocxReader")
+                loader = DocxReader()
+                text_raw = loader.load_data(file=filepath)[0].text
+            elif file_type == ".epub":
+                logging.debug("Loading EPUB...")
+                EpubReader = download_loader("EpubReader")
+                loader = EpubReader()
+                text_raw = loader.load_data(file=filepath)[0].text
+            elif file_type == ".xlsx":
+                logging.debug("Loading Excel...")
+                text_list = excel_to_string(filepath)
+                for elem in text_list:
+                    documents.append(Document(elem))
+                continue
+            else:
+                logging.debug("Loading text file...")
+                with open(filepath, "r", encoding="utf-8") as f:
+                    text_raw = f.read()
+        except Exception as e:
+            logging.error(f"Error loading file: {filename}")
+            pass
         text = add_space(text_raw)
         # text = block_split(text)
         # documents += text

modules/models.py ADDED Viewed

	@@ -0,0 +1,210 @@

+from __future__ import annotations
+from typing import TYPE_CHECKING, List
+import logging
+import json
+import commentjson as cjson
+import os
+import sys
+import requests
+import urllib3
+from tqdm import tqdm
+import colorama
+from duckduckgo_search import ddg
+import asyncio
+import aiohttp
+from enum import Enum
+from .presets import *
+from .llama_func import *
+from .utils import *
+from . import shared
+from .config import retrieve_proxy
+from .base_model import BaseLLMModel, ModelType
+class OpenAIClient(BaseLLMModel):
+    def __init__(
+        self, model_name, api_key, system_prompt=INITIAL_SYSTEM_PROMPT, temperature=1.0, top_p=1.0
+    ) -> None:
+        super().__init__(model_name=model_name, temperature=temperature, top_p=top_p, system_prompt=system_prompt)
+        self.api_key = api_key
+        self.completion_url = shared.state.completion_url
+        self.usage_api_url = shared.state.usage_api_url
+        self.headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+    def get_answer_stream_iter(self):
+        response = self._get_response(stream=True)
+        if response is not None:
+            iter = self._decode_chat_response(response)
+            partial_text = ""
+            for i in iter:
+                partial_text += i
+                yield partial_text
+        else:
+            yield standard_error_msg + general_error_msg
+    def get_answer_at_once(self):
+        response = self._get_response()
+        response = json.loads(response.text)
+        content = response["choices"][0]["message"]["content"]
+        total_token_count = response["usage"]["total_tokens"]
+        return content, total_token_count
+    def count_token(self, user_input):
+        input_token_count = count_token(construct_user(user_input))
+        if self.system_prompt is not None and len(self.all_token_counts) == 0:
+            system_prompt_token_count = count_token(construct_system(self.system_prompt))
+            return input_token_count + system_prompt_token_count
+        return input_token_count
+    def set_system_prompt(self, new_system_prompt):
+        self.system_prompt = new_system_prompt
+    def billing_info(self):
+        try:
+            curr_time = datetime.datetime.now()
+            last_day_of_month = get_last_day_of_month(curr_time).strftime("%Y-%m-%d")
+            first_day_of_month = curr_time.replace(day=1).strftime("%Y-%m-%d")
+            usage_url = f"{self.usage_api_url}?start_date={first_day_of_month}&end_date={last_day_of_month}"
+            try:
+                usage_data = self._get_billing_data(usage_url)
+            except Exception as e:
+                logging.error(f"获取API使用情况失败:"+str(e))
+                return f"**获取API使用情况失败**"
+            rounded_usage = "{:.5f}".format(usage_data['total_usage']/100)
+            return f"**本月使用金额** \u3000 ${rounded_usage}"
+        except requests.exceptions.ConnectTimeout:
+            status_text = standard_error_msg + connection_timeout_prompt + error_retrieve_prompt
+            return status_text
+        except requests.exceptions.ReadTimeout:
+            status_text = standard_error_msg + read_timeout_prompt + error_retrieve_prompt
+            return status_text
+        except Exception as e:
+            logging.error(f"获取API使用情况失败:"+str(e))
+            return standard_error_msg + error_retrieve_prompt
+    @shared.state.switching_api_key  # 在不开启多账号模式的时候，这个装饰器不会起作用
+    def _get_response(self, stream=False):
+        openai_api_key = self.api_key
+        system_prompt = self.system_prompt
+        history = self.history
+        logging.debug(colorama.Fore.YELLOW + f"{history}" + colorama.Fore.RESET)
+        temperature = self.temperature
+        top_p = self.top_p
+        selected_model = self.model_name
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {openai_api_key}",
+        }
+        if system_prompt is not None:
+            history = [construct_system(system_prompt), *history]
+        payload = {
+            "model": selected_model,
+            "messages": history,  # [{"role": "user", "content": f"{inputs}"}],
+            "temperature": temperature,  # 1.0,
+            "top_p": top_p,  # 1.0,
+            "n": 1,
+            "stream": stream,
+            "presence_penalty": 0,
+            "frequency_penalty": 0,
+        }
+        if stream:
+            timeout = timeout_streaming
+        else:
+            timeout = TIMEOUT_ALL
+        # 如果有自定义的api-host，使用自定义host发送请求，否则使用默认设置发送请求
+        if shared.state.completion_url != COMPLETION_URL:
+            logging.info(f"使用自定义API URL: {shared.state.completion_url}")
+        with retrieve_proxy():
+            try:
+                response = requests.post(
+                    shared.state.completion_url,
+                    headers=headers,
+                    json=payload,
+                    stream=stream,
+                    timeout=timeout,
+                )
+            except:
+                return None
+        return response
+    def _get_billing_data(self, usage_url):
+        with retrieve_proxy():
+            response = requests.get(
+                usage_url,
+                headers=self.headers,
+                timeout=TIMEOUT_ALL,
+            )
+        if response.status_code == 200:
+            data = response.json()
+            return data
+        else:
+            raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
+    def _decode_chat_response(self, response):
+        for chunk in response.iter_lines():
+            if chunk:
+                chunk = chunk.decode()
+                chunk_length = len(chunk)
+                try:
+                    chunk = json.loads(chunk[6:])
+                except json.JSONDecodeError:
+                    print(f"JSON解析错误,收到的内容: {chunk}")
+                    continue
+                if chunk_length > 6 and "delta" in chunk["choices"][0]:
+                    if chunk["choices"][0]["finish_reason"] == "stop":
+                        break
+                    try:
+                        yield chunk["choices"][0]["delta"]["content"]
+                    except Exception as e:
+                        # logging.error(f"Error: {e}")
+                        continue
+def get_model(model_name, access_key=None, temprature=None, top_p=None, system_prompt = None) -> BaseLLMModel:
+    model_type = ModelType.get_type(model_name)
+    if model_type == ModelType.OpenAI:
+        model = OpenAIClient(model_name, access_key, system_prompt, temprature, top_p)
+    return model
+if __name__=="__main__":
+    with open("config.json", "r") as f:
+        openai_api_key = cjson.load(f)["openai_api_key"]
+    client = OpenAIClient("gpt-3.5-turbo", openai_api_key)
+    chatbot = []
+    stream = False
+    # 测试账单功能
+    print(colorama.Back.GREEN + "测试账单功能" + colorama.Back.RESET)
+    print(client.billing_info())
+    # 测试问答
+    print(colorama.Back.GREEN + "测试问答" + colorama.Back.RESET)
+    question = "巴黎是中国的首都吗？"
+    for i in client.predict(inputs=question, chatbot=chatbot, stream=stream):
+        print(i)
+    print(f"测试问答后history : {client.history}")
+    # 测试记忆力
+    print(colorama.Back.GREEN + "测试记忆力" + colorama.Back.RESET)
+    question = "我刚刚问了你什么问题？"
+    for i in client.predict(inputs=question, chatbot=chatbot, stream=stream):
+        print(i)
+    print(f"测试记忆力后history : {client.history}")
+    # 测试重试功能
+    print(colorama.Back.GREEN + "测试重试功能" + colorama.Back.RESET)
+    for i in client.retry(chatbot=chatbot, stream=stream):
+        print(i)
+    print(f"重试后history : {client.history}")
+    # # 测试总结功能
+    # print(colorama.Back.GREEN + "测试总结功能" + colorama.Back.RESET)
+    # chatbot, msg = client.reduce_token_size(chatbot=chatbot)
+    # print(chatbot, msg)
+    # print(f"总结后history: {client.history}")

modules/openai_func.py DELETED Viewed

@@ -1,65 +0,0 @@
-import requests
-import logging
-from modules.presets import (
-    timeout_all,
-    USAGE_API_URL,
-    BALANCE_API_URL,
-    standard_error_msg,
-    connection_timeout_prompt,
-    error_retrieve_prompt,
-    read_timeout_prompt
-)
-from . import shared
-from modules.config import retrieve_proxy
-import os, datetime
-def get_billing_data(openai_api_key, billing_url):
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {openai_api_key}"
-    }
-    timeout = timeout_all
-    with retrieve_proxy():
-        response = requests.get(
-            billing_url,
-            headers=headers,
-            timeout=timeout,
-        )
-    if response.status_code == 200:
-        data = response.json()
-        return data
-    else:
-        raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
-def get_usage(openai_api_key):
-    try:
-        curr_time = datetime.datetime.now()
-        last_day_of_month = get_last_day_of_month(curr_time).strftime("%Y-%m-%d")
-        first_day_of_month = curr_time.replace(day=1).strftime("%Y-%m-%d")
-        usage_url = f"{shared.state.usage_api_url}?start_date={first_day_of_month}&end_date={last_day_of_month}"
-        try:
-            usage_data = get_billing_data(openai_api_key, usage_url)
-        except Exception as e:
-            logging.error(f"获取API使用情况失败:"+str(e))
-            return f"**获取API使用情况失败**"
-        rounded_usage = "{:.5f}".format(usage_data['total_usage']/100)
-        return f"**本月使用金额** \u3000 ${rounded_usage}"
-    except requests.exceptions.ConnectTimeout:
-        status_text = standard_error_msg + connection_timeout_prompt + error_retrieve_prompt
-        return status_text
-    except requests.exceptions.ReadTimeout:
-        status_text = standard_error_msg + read_timeout_prompt + error_retrieve_prompt
-        return status_text
-    except Exception as e:
-        logging.error(f"获取API使用情况失败:"+str(e))
-        return standard_error_msg + error_retrieve_prompt
-def get_last_day_of_month(any_day):
-    # The day 28 exists in every month. 4 days later, it's always next month
-    next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
-    # subtracting the number of the current day brings us back one month
-    return next_month - datetime.timedelta(days=next_month.day)

modules/presets.py CHANGED Viewed

@@ -3,26 +3,29 @@ import gradio as gr
 from pathlib import Path
 # ChatGPT 设置
-initial_prompt = "You are a helpful assistant."
 API_HOST = "api.openai.com"
 COMPLETION_URL = "https://api.openai.com/v1/chat/completions"
 BALANCE_API_URL="https://api.openai.com/dashboard/billing/credit_grants"
 USAGE_API_URL="https://api.openai.com/dashboard/billing/usage"
 HISTORY_DIR = Path("history")
 TEMPLATES_DIR = "templates"
 # 错误信息
 standard_error_msg = "☹️发生了错误："  # 错误信息的标准前缀
-error_retrieve_prompt = "请检查网络连接，或者API-Key是否有效。"  # 获取对话时发生错误
 connection_timeout_prompt = "连接超时，无法获取对话。"  # 连接超时
 read_timeout_prompt = "读取超时，无法获取对话。"  # 读取超时
 proxy_error_prompt = "代理错误，无法获取对话。"  # 代理错误
 ssl_error_prompt = "SSL错误，无法获取对话。"  # SSL 错误
 no_apikey_msg = "API key长度不是51位，请检查是否输入正确。"  # API key 长度不足 51 位
 no_input_msg = "请输入对话内容。"  # 未输入对话内容
 timeout_streaming = 10  # 流式对话时的超时时间
-timeout_all = 200  # 非流式对话时的超时时间
 enable_streaming_option = True  # 是否启用选择选择是否实时显示回答的勾选框
 HIDE_MY_KEY = False  # 如果你想在UI中隐藏你的 API 密钥，将此值设置为 True
 CONCURRENT_COUNT = 100 # 允许同时使用的用户数量
@@ -57,33 +60,18 @@ MODELS = [
     "gpt-4-32k-0314",
 ]  # 可选的模型
-MODEL_SOFT_TOKEN_LIMIT = {
-    "gpt-3.5-turbo": {
-        "streaming": 3500,
-        "all": 3500
-    },
-    "gpt-3.5-turbo-0301": {
-        "streaming": 3500,
-        "all": 3500
-    },
-    "gpt-4": {
-        "streaming": 7500,
-        "all": 7500
-    },
-    "gpt-4-0314": {
-        "streaming": 7500,
-        "all": 7500
-    },
-    "gpt-4-32k": {
-        "streaming": 31000,
-        "all": 31000
-    },
-    "gpt-4-32k-0314": {
-        "streaming": 31000,
-        "all": 31000
-    }
 }
 REPLY_LANGUAGES = [
     "简体中文",
     "繁體中文",

 from pathlib import Path
 # ChatGPT 设置
+INITIAL_SYSTEM_PROMPT = "You are a helpful assistant."
 API_HOST = "api.openai.com"
 COMPLETION_URL = "https://api.openai.com/v1/chat/completions"
 BALANCE_API_URL="https://api.openai.com/dashboard/billing/credit_grants"
 USAGE_API_URL="https://api.openai.com/dashboard/billing/usage"
 HISTORY_DIR = Path("history")
+HISTORY_DIR = "history"
 TEMPLATES_DIR = "templates"
 # 错误信息
 standard_error_msg = "☹️发生了错误："  # 错误信息的标准前缀
+general_error_msg = "获取对话时发生错误，请查看后台日志"
+error_retrieve_prompt = "请检查网络连接，或者API-Key是否有效。"
 connection_timeout_prompt = "连接超时，无法获取对话。"  # 连接超时
 read_timeout_prompt = "读取超时，无法获取对话。"  # 读取超时
 proxy_error_prompt = "代理错误，无法获取对话。"  # 代理错误
 ssl_error_prompt = "SSL错误，无法获取对话。"  # SSL 错误
 no_apikey_msg = "API key长度不是51位，请检查是否输入正确。"  # API key 长度不足 51 位
 no_input_msg = "请输入对话内容。"  # 未输入对话内容
+billing_not_applicable_msg = "模型本地运行中" # 本地运行的模型返回的账单信息
 timeout_streaming = 10  # 流式对话时的超时时间
+TIMEOUT_ALL = 200  # 非流式对话时的超时时间
 enable_streaming_option = True  # 是否启用选择选择是否实时显示回答的勾选框
 HIDE_MY_KEY = False  # 如果你想在UI中隐藏你的 API 密钥，将此值设置为 True
 CONCURRENT_COUNT = 100 # 允许同时使用的用户数量
     "gpt-4-32k-0314",
 ]  # 可选的模型
+MODEL_TOKEN_LIMIT = {
+    "gpt-3.5-turbo": 4096,
+    "gpt-3.5-turbo-0301": 4096,
+    "gpt-4": 8192,
+    "gpt-4-0314": 8192,
+    "gpt-4-32k": 32768,
+    "gpt-4-32k-0314": 32768
 }
+TOKEN_OFFSET = 1000 # 模型的token上限减去这个值，得到软上限。到达软上限之后，自动尝试减少token占用。
+REDUCE_TOKEN_FACTOR = 0.5 # 与模型token上限想乘，得到目标token数。减少token占用时，将token占用减少到目标token数以下。
 REPLY_LANGUAGES = [
     "简体中文",
     "繁體中文",

modules/utils.py CHANGED Viewed

@@ -153,47 +153,6 @@ def construct_assistant(text):
     return construct_text("assistant", text)
-def construct_token_message(tokens: List[int]):
-    token_sum = 0
-    for i in range(len(tokens)):
-        token_sum += sum(tokens[: i + 1])
-    return f"Token 计数: {sum(tokens)}，本次对话累计消耗了 {token_sum} tokens"
-def delete_first_conversation(history, previous_token_count):
-    if history:
-        del history[:2]
-        del previous_token_count[0]
-    return (
-        history,
-        previous_token_count,
-        construct_token_message(previous_token_count),
-    )
-def delete_last_conversation(chatbot, history, previous_token_count):
-    if len(chatbot) > 0 and standard_error_msg in chatbot[-1][1]:
-        logging.info("由于包含报错信息，只删除chatbot记录")
-        chatbot.pop()
-        return chatbot, history
-    if len(history) > 0:
-        logging.info("删除了一组对话历史")
-        history.pop()
-        history.pop()
-    if len(chatbot) > 0:
-        logging.info("删除了一组chatbot对话")
-        chatbot.pop()
-    if len(previous_token_count) > 0:
-        logging.info("删除了一组对话的token计数记录")
-        previous_token_count.pop()
-    return (
-        chatbot,
-        history,
-        previous_token_count,
-        construct_token_message(previous_token_count),
-    )
 def save_file(filename, system, history, chatbot, user_name):
     logging.info(f"{user_name} 保存对话历史中……")
     os.makedirs(HISTORY_DIR / user_name, exist_ok=True)
@@ -212,56 +171,12 @@ def save_file(filename, system, history, chatbot, user_name):
     return os.path.join(HISTORY_DIR / user_name, filename)
-def save_chat_history(filename, system, history, chatbot, user_name):
-    if filename == "":
-        return
-    if not filename.endswith(".json"):
-        filename += ".json"
-    return save_file(filename, system, history, chatbot, user_name)
-def export_markdown(filename, system, history, chatbot, user_name):
-    if filename == "":
-        return
-    if not filename.endswith(".md"):
-        filename += ".md"
-    return save_file(filename, system, history, chatbot, user_name)
-def load_chat_history(filename, system, history, chatbot, user_name):
-    logging.info(f"{user_name} 加载对话历史中……")
-    if type(filename) != str:
-        filename = filename.name
-    try:
-        with open(os.path.join(HISTORY_DIR / user_name, filename), "r") as f:
-            json_s = json.load(f)
-        try:
-            if type(json_s["history"][0]) == str:
-                logging.info("历史记录格式为旧版，正在转换……")
-                new_history = []
-                for index, item in enumerate(json_s["history"]):
-                    if index % 2 == 0:
-                        new_history.append(construct_user(item))
-                    else:
-                        new_history.append(construct_assistant(item))
-                json_s["history"] = new_history
-                logging.info(new_history)
-        except:
-            # 没有对话历史
-            pass
-        logging.info(f"{user_name} 加载对话历史完毕")
-        return filename, json_s["system"], json_s["history"], json_s["chatbot"]
-    except FileNotFoundError:
-        logging.info(f"{user_name} 没有找到对话历史文件，不执行任何操作")
-        return filename, system, history, chatbot
 def sorted_by_pinyin(list):
     return sorted(list, key=lambda char: lazy_pinyin(char)[0][0])
 def get_file_names(dir, plain=False, filetypes=[".json"]):
-    logging.info(f"获取文件名列表，目录为{dir}，文件类型为{filetypes}，是否为纯文本列表{plain}")
     files = []
     try:
         for type in filetypes:
@@ -279,14 +194,13 @@ def get_file_names(dir, plain=False, filetypes=[".json"]):
 def get_history_names(plain=False, user_name=""):
-    logging.info(f"从用户 {user_name} 中获取历史记录文件名列表")
-    return get_file_names(HISTORY_DIR / user_name, plain)
 def load_template(filename, mode=0):
-    logging.info(f"加载模板文件{filename}，模式为{mode}（0为返回字典和下拉菜单，1为返回下拉菜单，2为返回字典）")
     lines = []
-    logging.info("Loading template...")
     if filename.endswith(".json"):
         with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as f:
             lines = json.load(f)
@@ -310,7 +224,7 @@ def load_template(filename, mode=0):
 def get_template_names(plain=False):
-    logging.info("获取模板文件名列表")
     return get_file_names(TEMPLATES_DIR, plain, filetypes=[".csv", "json"])
@@ -322,11 +236,6 @@ def get_template_content(templates, selection, original_system_prompt):
         return original_system_prompt
-def reset_state():
-    logging.info("重置状态")
-    return [], [], [], construct_token_message([0])
 def reset_textbox():
     logging.debug("重置文本框")
     return gr.update(value="")
@@ -530,3 +439,9 @@ def excel_to_string(file_path):
     return result

     return construct_text("assistant", text)
 def save_file(filename, system, history, chatbot, user_name):
     logging.info(f"{user_name} 保存对话历史中……")
     os.makedirs(HISTORY_DIR / user_name, exist_ok=True)
     return os.path.join(HISTORY_DIR / user_name, filename)
 def sorted_by_pinyin(list):
     return sorted(list, key=lambda char: lazy_pinyin(char)[0][0])
 def get_file_names(dir, plain=False, filetypes=[".json"]):
+    logging.debug(f"获取文件名列表，目录为{dir}，文件类型为{filetypes}，是否为纯文本列表{plain}")
     files = []
     try:
         for type in filetypes:
 def get_history_names(plain=False, user_name=""):
+    logging.debug(f"从用户 {user_name} 中获取历史记录文件名列表")
+    return get_file_names(os.path.join(HISTORY_DIR, user_name), plain)
 def load_template(filename, mode=0):
+    logging.debug(f"加载模板文件{filename}，模式为{mode}（0为返回字典和下拉菜单，1为返回下拉菜单，2为返回字典）")
     lines = []
     if filename.endswith(".json"):
         with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as f:
             lines = json.load(f)
 def get_template_names(plain=False):
+    logging.debug("获取模���文件名列表")
     return get_file_names(TEMPLATES_DIR, plain, filetypes=[".csv", "json"])
         return original_system_prompt
 def reset_textbox():
     logging.debug("重置文本框")
     return gr.update(value="")
     return result
+def get_last_day_of_month(any_day):
+    # The day 28 exists in every month. 4 days later, it's always next month
+    next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
+    # subtracting the number of the current day brings us back one month
+    return next_month - datetime.timedelta(days=next_month.day)