Spaces:

XuBailing
/

CongMa

Configuration error

App Files Files Community

XuBailing commited on Jul 23, 2023

Commit

107f987

•

1 Parent(s): 41ad9d7

Upload 243 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
img/docker_logs.png +0 -0
img/langchain+chatglm.png +3 -0
img/langchain+chatglm2.png +0 -0
img/qr_code_36.jpg +0 -0
img/qr_code_37.jpg +0 -0
img/qr_code_38.jpg +0 -0
img/qr_code_39.jpg +0 -0
img/vue_0521_0.png +0 -0
img/vue_0521_1.png +3 -0
img/vue_0521_2.png +3 -0
img/webui_0419.png +0 -0
img/webui_0510_0.png +0 -0
img/webui_0510_1.png +0 -0
img/webui_0510_2.png +0 -0
img/webui_0521_0.png +0 -0
loader/RSS_loader.py +54 -0
loader/__init__.py +14 -0
loader/__pycache__/__init__.cpython-310.pyc +0 -0
loader/__pycache__/__init__.cpython-311.pyc +0 -0
loader/__pycache__/dialogue.cpython-310.pyc +0 -0
loader/__pycache__/image_loader.cpython-310.pyc +0 -0
loader/__pycache__/image_loader.cpython-311.pyc +0 -0
loader/__pycache__/pdf_loader.cpython-310.pyc +0 -0
loader/dialogue.py +131 -0
loader/image_loader.py +42 -0
loader/pdf_loader.py +58 -0
models/__init__.py +4 -0
models/__pycache__/__init__.cpython-310.pyc +0 -0
models/__pycache__/chatglm_llm.cpython-310.pyc +0 -0
models/__pycache__/fastchat_openai_llm.cpython-310.pyc +0 -0
models/__pycache__/llama_llm.cpython-310.pyc +0 -0
models/__pycache__/moss_llm.cpython-310.pyc +0 -0
models/__pycache__/shared.cpython-310.pyc +0 -0
models/base/__init__.py +13 -0
models/base/__pycache__/__init__.cpython-310.pyc +0 -0
models/base/__pycache__/base.cpython-310.pyc +0 -0
models/base/__pycache__/remote_rpc_model.cpython-310.pyc +0 -0
models/base/base.py +41 -0
models/base/lavis_blip2_multimodel.py +26 -0
models/base/remote_rpc_model.py +33 -0
models/chatglm_llm.py +83 -0
models/fastchat_openai_llm.py +137 -0
models/llama_llm.py +185 -0
models/loader/__init__.py +2 -0
models/loader/__pycache__/__init__.cpython-310.pyc +0 -0
models/loader/__pycache__/args.cpython-310.pyc +0 -0
models/loader/__pycache__/loader.cpython-310.pyc +0 -0
models/loader/args.py +55 -0
models/loader/loader.py +447 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+img/langchain+chatglm.png filter=lfs diff=lfs merge=lfs -text
+img/vue_0521_1.png filter=lfs diff=lfs merge=lfs -text
+img/vue_0521_2.png filter=lfs diff=lfs merge=lfs -text

img/docker_logs.png ADDED Viewed

img/langchain+chatglm.png ADDED Viewed

Git LFS Details

SHA256: 9ae4af8281129ba13033d172ce0556baf2c5f4b07f1bcf50ec233082266208b5
Pointer size: 132 Bytes
Size of remote file: 1.12 MB

img/langchain+chatglm2.png ADDED Viewed

img/qr_code_36.jpg ADDED Viewed

img/qr_code_37.jpg ADDED Viewed

img/qr_code_38.jpg ADDED Viewed

img/qr_code_39.jpg ADDED Viewed

img/vue_0521_0.png ADDED Viewed

img/vue_0521_1.png ADDED Viewed

Git LFS Details

SHA256: 46e3281e62ff86fb3e21ce4dc7f93e38c4739c522e731f4dcb84c2b54150490c
Pointer size: 132 Bytes
Size of remote file: 2.47 MB

img/vue_0521_2.png ADDED Viewed

Git LFS Details

SHA256: 315bc855f7c5278b448e3ef47a200da659da8fd806278b4e75db4d7ef4d6509e
Pointer size: 132 Bytes
Size of remote file: 1.81 MB

img/webui_0419.png ADDED Viewed

img/webui_0510_0.png ADDED Viewed

img/webui_0510_1.png ADDED Viewed

img/webui_0510_2.png ADDED Viewed

img/webui_0521_0.png ADDED Viewed

loader/RSS_loader.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from langchain.docstore.document import Document
+import feedparser
+import html2text
+import ssl
+import time
+class RSS_Url_loader:
+    def __init__(self, urls=None,interval=60):
+        '''可用参数urls数组或者是字符串形式的url列表'''
+        self.urls = []
+        self.interval = interval
+        if urls is not None:
+            try:
+                if isinstance(urls, str):
+                    urls = [urls]
+                elif isinstance(urls, list):
+                    pass
+                else:
+                    raise TypeError('urls must be a list or a string.')
+                self.urls = urls
+            except:
+                Warning('urls must be a list or a string.')
+    #定时代码还要考虑是不是引入其他类，暂时先不对外开放
+    def scheduled_execution(self):
+        while True:
+            docs = self.load()
+            return docs
+            time.sleep(self.interval)
+    def load(self):
+        if hasattr(ssl, '_create_unverified_context'):
+            ssl._create_default_https_context = ssl._create_unverified_context
+        documents = []
+        for url in self.urls:
+            parsed = feedparser.parse(url)
+            for entry in parsed.entries:
+                if "content" in entry:
+                    data = entry.content[0].value
+                else:
+                    data = entry.description or entry.summary
+                data = html2text.html2text(data)
+                metadata = {"title": entry.title, "link": entry.link}
+                documents.append(Document(page_content=data, metadata=metadata))
+        return documents
+if __name__=="__main__":
+    #需要在配置文件中加入urls的配置，或者是在用户界面上加入urls的配置
+    urls = ["https://www.zhihu.com/rss", "https://www.36kr.com/feed"]
+    loader = RSS_Url_loader(urls)
+    docs = loader.load()
+    for doc in docs:
+        print(doc)

loader/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from .image_loader import UnstructuredPaddleImageLoader
+from .pdf_loader import UnstructuredPaddlePDFLoader
+from .dialogue import (
+    Person,
+    Dialogue,
+    Turn,
+    DialogueLoader
+)
+__all__ = [
+    "UnstructuredPaddleImageLoader",
+    "UnstructuredPaddlePDFLoader",
+    "DialogueLoader",
+]

loader/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (414 Bytes). View file

loader/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (531 Bytes). View file

loader/__pycache__/dialogue.cpython-310.pyc ADDED Viewed

Binary file (4.95 kB). View file

loader/__pycache__/image_loader.cpython-310.pyc ADDED Viewed

Binary file (2.23 kB). View file

loader/__pycache__/image_loader.cpython-311.pyc ADDED Viewed

Binary file (3.94 kB). View file

loader/__pycache__/pdf_loader.cpython-310.pyc ADDED Viewed

Binary file (2.57 kB). View file

loader/dialogue.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import json
+from abc import ABC
+from typing import List
+from langchain.docstore.document import Document
+from langchain.document_loaders.base import BaseLoader
+class Person:
+    def __init__(self, name, age):
+        self.name = name
+        self.age = age
+class Dialogue:
+    """
+    Build an abstract dialogue model using classes and methods to represent different dialogue elements.
+    This class serves as a fundamental framework for constructing dialogue models.
+    """
+    def __init__(self, file_path: str):
+        self.file_path = file_path
+        self.turns = []
+    def add_turn(self, turn):
+        """
+        Create an instance of a conversation participant
+        :param turn:
+        :return:
+        """
+        self.turns.append(turn)
+    def parse_dialogue(self):
+        """
+        The parse_dialogue function reads the specified dialogue file and parses each dialogue turn line by line.
+        For each turn, the function extracts the name of the speaker and the message content from the text,
+        creating a Turn instance. If the speaker is not already present in the participants dictionary,
+        a new Person instance is created. Finally, the parsed Turn instance is added to the Dialogue object.
+        Please note that this sample code assumes that each line in the file follows a specific format:
+        <speaker>:\r\n<message>\r\n\r\n. If your file has a different format or includes other metadata,
+         you may need to adjust the parsing logic accordingly.
+        """
+        participants = {}
+        speaker_name = None
+        message = None
+        with open(self.file_path, encoding='utf-8') as file:
+            lines = file.readlines()
+            for i, line in enumerate(lines):
+                line = line.strip()
+                if not line:
+                    continue
+                if speaker_name is None:
+                    speaker_name, _ = line.split(':', 1)
+                elif message is None:
+                    message = line
+                    if speaker_name not in participants:
+                        participants[speaker_name] = Person(speaker_name, None)
+                    speaker = participants[speaker_name]
+                    turn = Turn(speaker, message)
+                    self.add_turn(turn)
+                    # Reset speaker_name and message for the next turn
+                    speaker_name = None
+                    message = None
+    def display(self):
+        for turn in self.turns:
+            print(f"{turn.speaker.name}: {turn.message}")
+    def export_to_file(self, file_path):
+        with open(file_path, 'w', encoding='utf-8') as file:
+            for turn in self.turns:
+                file.write(f"{turn.speaker.name}: {turn.message}\n")
+    def to_dict(self):
+        dialogue_dict = {"turns": []}
+        for turn in self.turns:
+            turn_dict = {
+                "speaker": turn.speaker.name,
+                "message": turn.message
+            }
+            dialogue_dict["turns"].append(turn_dict)
+        return dialogue_dict
+    def to_json(self):
+        dialogue_dict = self.to_dict()
+        return json.dumps(dialogue_dict, ensure_ascii=False, indent=2)
+    def participants_to_export(self):
+        """
+        participants_to_export
+        :return:
+        """
+        participants = set()
+        for turn in self.turns:
+            participants.add(turn.speaker.name)
+        return ', '.join(participants)
+class Turn:
+    def __init__(self, speaker, message):
+        self.speaker = speaker
+        self.message = message
+class DialogueLoader(BaseLoader, ABC):
+    """Load dialogue."""
+    def __init__(self, file_path: str):
+        """Initialize with dialogue."""
+        self.file_path = file_path
+        dialogue = Dialogue(file_path=file_path)
+        dialogue.parse_dialogue()
+        self.dialogue = dialogue
+    def load(self) -> List[Document]:
+        """Load from dialogue."""
+        documents = []
+        participants = self.dialogue.participants_to_export()
+        for turn in self.dialogue.turns:
+            metadata = {"source": f"Dialogue File：{self.dialogue.file_path},"
+                                  f"speaker：{turn.speaker.name}，"
+                                  f"participant：{participants}"}
+            turn_document = Document(page_content=turn.message, metadata=metadata.copy())
+            documents.append(turn_document)
+        return documents

loader/image_loader.py ADDED Viewed

	@@ -0,0 +1,42 @@

+"""Loader that loads image files."""
+from typing import List
+from langchain.document_loaders.unstructured import UnstructuredFileLoader
+from paddleocr import PaddleOCR
+import os
+import nltk
+from configs.model_config import NLTK_DATA_PATH
+nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
+class UnstructuredPaddleImageLoader(UnstructuredFileLoader):
+    """Loader that uses unstructured to load image files, such as PNGs and JPGs."""
+    def _get_elements(self) -> List:
+        def image_ocr_txt(filepath, dir_path="tmp_files"):
+            full_dir_path = os.path.join(os.path.dirname(filepath), dir_path)
+            if not os.path.exists(full_dir_path):
+                os.makedirs(full_dir_path)
+            filename = os.path.split(filepath)[-1]
+            ocr = PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=False, show_log=False)
+            result = ocr.ocr(img=filepath)
+            ocr_result = [i[1][0] for line in result for i in line]
+            txt_file_path = os.path.join(full_dir_path, "%s.txt" % (filename))
+            with open(txt_file_path, 'w', encoding='utf-8') as fout:
+                fout.write("\n".join(ocr_result))
+            return txt_file_path
+        txt_file_path = image_ocr_txt(self.file_path)
+        from unstructured.partition.text import partition_text
+        return partition_text(filename=txt_file_path, **self.unstructured_kwargs)
+if __name__ == "__main__":
+    import sys
+    sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+    filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base", "samples", "content", "test.jpg")
+    loader = UnstructuredPaddleImageLoader(filepath, mode="elements")
+    docs = loader.load()
+    for doc in docs:
+        print(doc)

loader/pdf_loader.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""Loader that loads image files."""
+from typing import List
+from langchain.document_loaders.unstructured import UnstructuredFileLoader
+from paddleocr import PaddleOCR
+import os
+import fitz
+import nltk
+from configs.model_config import NLTK_DATA_PATH
+nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
+class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
+    """Loader that uses unstructured to load image files, such as PNGs and JPGs."""
+    def _get_elements(self) -> List:
+        def pdf_ocr_txt(filepath, dir_path="tmp_files"):
+            full_dir_path = os.path.join(os.path.dirname(filepath), dir_path)
+            if not os.path.exists(full_dir_path):
+                os.makedirs(full_dir_path)
+            ocr = PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=False, show_log=False)
+            doc = fitz.open(filepath)
+            txt_file_path = os.path.join(full_dir_path, f"{os.path.split(filepath)[-1]}.txt")
+            img_name = os.path.join(full_dir_path, 'tmp.png')
+            with open(txt_file_path, 'w', encoding='utf-8') as fout:
+                for i in range(doc.page_count):
+                    page = doc[i]
+                    text = page.get_text("")
+                    fout.write(text)
+                    fout.write("\n")
+                    img_list = page.get_images()
+                    for img in img_list:
+                        pix = fitz.Pixmap(doc, img[0])
+                        if pix.n - pix.alpha >= 4:
+                            pix = fitz.Pixmap(fitz.csRGB, pix)
+                        pix.save(img_name)
+                        result = ocr.ocr(img_name)
+                        ocr_result = [i[1][0] for line in result for i in line]
+                        fout.write("\n".join(ocr_result))
+            if os.path.exists(img_name):
+                os.remove(img_name)
+            return txt_file_path
+        txt_file_path = pdf_ocr_txt(self.file_path)
+        from unstructured.partition.text import partition_text
+        return partition_text(filename=txt_file_path, **self.unstructured_kwargs)
+if __name__ == "__main__":
+    import sys
+    sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+    filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base", "samples", "content", "test.pdf")
+    loader = UnstructuredPaddlePDFLoader(filepath, mode="elements")
+    docs = loader.load()
+    for doc in docs:
+        print(doc)

models/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .chatglm_llm import ChatGLM
+from .llama_llm import LLamaLLM
+from .moss_llm import MOSSLLM
+from .fastchat_openai_llm import FastChatOpenAILLM

models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (338 Bytes). View file

models/__pycache__/chatglm_llm.cpython-310.pyc ADDED Viewed

Binary file (2.66 kB). View file

models/__pycache__/fastchat_openai_llm.cpython-310.pyc ADDED Viewed

Binary file (4.45 kB). View file

models/__pycache__/llama_llm.cpython-310.pyc ADDED Viewed

Binary file (6.45 kB). View file

models/__pycache__/moss_llm.cpython-310.pyc ADDED Viewed

Binary file (3.88 kB). View file

models/__pycache__/shared.cpython-310.pyc ADDED Viewed

Binary file (1.48 kB). View file

models/base/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from models.base.base import (
+    AnswerResult,
+    BaseAnswer
+)
+from models.base.remote_rpc_model import (
+    RemoteRpcModel
+)
+__all__ = [
+    "AnswerResult",
+    "BaseAnswer",
+    "RemoteRpcModel",
+]

models/base/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (334 Bytes). View file

models/base/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (1.79 kB). View file

models/base/__pycache__/remote_rpc_model.cpython-310.pyc ADDED Viewed

Binary file (1.59 kB). View file

models/base/base.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from abc import ABC, abstractmethod
+from typing import Optional, List
+import traceback
+from collections import deque
+from queue import Queue
+from threading import Thread
+import torch
+import transformers
+from models.loader import LoaderCheckPoint
+class AnswerResult:
+    """
+    消息实体
+    """
+    history: List[List[str]] = []
+    llm_output: Optional[dict] = None
+class BaseAnswer(ABC):
+    """上层业务包装器.用于结果生成统一api调用"""
+    @property
+    @abstractmethod
+    def _check_point(self) -> LoaderCheckPoint:
+        """Return _check_point of llm."""
+    @property
+    @abstractmethod
+    def _history_len(self) -> int:
+        """Return _history_len of llm."""
+    @abstractmethod
+    def set_history_len(self, history_len: int) -> None:
+        """Return _history_len of llm."""
+    def generatorAnswer(self, prompt: str,
+                        history: List[List[str]] = [],
+                        streaming: bool = False):
+        pass

models/base/lavis_blip2_multimodel.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from abc import ABC, abstractmethod
+import torch
+from models.base import (BaseAnswer,
+                         AnswerResult)
+class MultimodalAnswerResult(AnswerResult):
+    image: str = None
+class LavisBlip2Multimodal(BaseAnswer, ABC):
+    @property
+    @abstractmethod
+    def _blip2_instruct(self) -> any:
+        """Return _blip2_instruct of blip2."""
+    @property
+    @abstractmethod
+    def _image_blip2_vis_processors(self) -> dict:
+        """Return _image_blip2_vis_processors of blip2 image processors."""
+    @abstractmethod
+    def set_image_path(self, image_path: str):
+        """set set_image_path"""

models/base/remote_rpc_model.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from abc import ABC, abstractmethod
+import torch
+from models.base import (BaseAnswer,
+                         AnswerResult)
+class MultimodalAnswerResult(AnswerResult):
+    image: str = None
+class RemoteRpcModel(BaseAnswer, ABC):
+    @property
+    @abstractmethod
+    def _api_key(self) -> str:
+        """Return _api_key of client."""
+    @property
+    @abstractmethod
+    def _api_base_url(self) -> str:
+        """Return _api_base of client host bash url."""
+    @abstractmethod
+    def set_api_key(self, api_key: str):
+        """set set_api_key"""
+    @abstractmethod
+    def set_api_base_url(self, api_base_url: str):
+        """set api_base_url"""
+    @abstractmethod
+    def call_model_name(self, model_name):
+        """call model name of client"""

models/chatglm_llm.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from abc import ABC
+from langchain.llms.base import LLM
+from typing import Optional, List
+from models.loader import LoaderCheckPoint
+from models.base import (BaseAnswer,
+                         AnswerResult)
+class ChatGLM(BaseAnswer, LLM, ABC):
+    max_token: int = 10000
+    temperature: float = 0.01
+    top_p = 0.9
+    checkPoint: LoaderCheckPoint = None
+    # history = []
+    history_len: int = 10
+    def __init__(self, checkPoint: LoaderCheckPoint = None):
+        super().__init__()
+        self.checkPoint = checkPoint
+    @property
+    def _llm_type(self) -> str:
+        return "ChatGLM"
+    @property
+    def _check_point(self) -> LoaderCheckPoint:
+        return self.checkPoint
+    @property
+    def _history_len(self) -> int:
+        return self.history_len
+    def set_history_len(self, history_len: int = 10) -> None:
+        self.history_len = history_len
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        print(f"__call:{prompt}")
+        response, _ = self.checkPoint.model.chat(
+            self.checkPoint.tokenizer,
+            prompt,
+            history=[],
+            max_length=self.max_token,
+            temperature=self.temperature
+        )
+        print(f"response:{response}")
+        print(f"+++++++++++++++++++++++++++++++++++")
+        return response
+    def generatorAnswer(self, prompt: str,
+                         history: List[List[str]] = [],
+                         streaming: bool = False):
+        if streaming:
+            history += [[]]
+            for inum, (stream_resp, _) in enumerate(self.checkPoint.model.stream_chat(
+                    self.checkPoint.tokenizer,
+                    prompt,
+                    history=history[-self.history_len:-1] if self.history_len > 1 else [],
+                    max_length=self.max_token,
+                    temperature=self.temperature
+            )):
+                # self.checkPoint.clear_torch_cache()
+                history[-1] = [prompt, stream_resp]
+                answer_result = AnswerResult()
+                answer_result.history = history
+                answer_result.llm_output = {"answer": stream_resp}
+                yield answer_result
+        else:
+            response, _ = self.checkPoint.model.chat(
+                self.checkPoint.tokenizer,
+                prompt,
+                history=history[-self.history_len:] if self.history_len > 0 else [],
+                max_length=self.max_token,
+                temperature=self.temperature
+            )
+            self.checkPoint.clear_torch_cache()
+            history += [[prompt, response]]
+            answer_result = AnswerResult()
+            answer_result.history = history
+            answer_result.llm_output = {"answer": response}
+            yield answer_result

models/fastchat_openai_llm.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from abc import ABC
+import requests
+from typing import Optional, List
+from langchain.llms.base import LLM
+from models.loader import LoaderCheckPoint
+from models.base import (RemoteRpcModel,
+                         AnswerResult)
+from typing import (
+    Collection,
+    Dict
+)
+def _build_message_template() -> Dict[str, str]:
+    """
+    :return: 结构
+    """
+    return {
+        "role": "",
+        "content": "",
+    }
+class FastChatOpenAILLM(RemoteRpcModel, LLM, ABC):
+    api_base_url: str = "http://localhost:8000/v1"
+    model_name: str = "chatglm-6b"
+    max_token: int = 10000
+    temperature: float = 0.01
+    top_p = 0.9
+    checkPoint: LoaderCheckPoint = None
+    history = []
+    history_len: int = 10
+    def __init__(self, checkPoint: LoaderCheckPoint = None):
+        super().__init__()
+        self.checkPoint = checkPoint
+    @property
+    def _llm_type(self) -> str:
+        return "FastChat"
+    @property
+    def _check_point(self) -> LoaderCheckPoint:
+        return self.checkPoint
+    @property
+    def _history_len(self) -> int:
+        return self.history_len
+    def set_history_len(self, history_len: int = 10) -> None:
+        self.history_len = history_len
+    @property
+    def _api_key(self) -> str:
+        pass
+    @property
+    def _api_base_url(self) -> str:
+        return self.api_base_url
+    def set_api_key(self, api_key: str):
+        pass
+    def set_api_base_url(self, api_base_url: str):
+        self.api_base_url = api_base_url
+    def call_model_name(self, model_name):
+        self.model_name = model_name
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        print(f"__call:{prompt}")
+        try:
+            import openai
+            # Not support yet
+            openai.api_key = "EMPTY"
+            openai.api_base = self.api_base_url
+        except ImportError:
+            raise ValueError(
+                "Could not import openai python package. "
+                "Please install it with `pip install openai`."
+            )
+        # create a chat completion
+        completion = openai.ChatCompletion.create(
+            model=self.model_name,
+            messages=self.build_message_list(prompt)
+        )
+        print(f"response:{completion.choices[0].message.content}")
+        print(f"+++++++++++++++++++++++++++++++++++")
+        return completion.choices[0].message.content
+    # 将历史对话数组转换为文本格式
+    def build_message_list(self, query) -> Collection[Dict[str, str]]:
+        build_message_list: Collection[Dict[str, str]] = []
+        history = self.history[-self.history_len:] if self.history_len > 0 else []
+        for i, (old_query, response) in enumerate(history):
+            user_build_message = _build_message_template()
+            user_build_message['role'] = 'user'
+            user_build_message['content'] = old_query
+            system_build_message = _build_message_template()
+            system_build_message['role'] = 'system'
+            system_build_message['content'] = response
+            build_message_list.append(user_build_message)
+            build_message_list.append(system_build_message)
+        user_build_message = _build_message_template()
+        user_build_message['role'] = 'user'
+        user_build_message['content'] = query
+        build_message_list.append(user_build_message)
+        return build_message_list
+    def generatorAnswer(self, prompt: str,
+                        history: List[List[str]] = [],
+                        streaming: bool = False):
+        try:
+            import openai
+            # Not support yet
+            openai.api_key = "EMPTY"
+            openai.api_base = self.api_base_url
+        except ImportError:
+            raise ValueError(
+                "Could not import openai python package. "
+                "Please install it with `pip install openai`."
+            )
+        # create a chat completion
+        completion = openai.ChatCompletion.create(
+            model=self.model_name,
+            messages=self.build_message_list(prompt)
+        )
+        history += [[prompt, completion.choices[0].message.content]]
+        answer_result = AnswerResult()
+        answer_result.history = history
+        answer_result.llm_output = {"answer": completion.choices[0].message.content}
+        yield answer_result

models/llama_llm.py ADDED Viewed

	@@ -0,0 +1,185 @@

+from abc import ABC
+from langchain.llms.base import LLM
+import random
+import torch
+import transformers
+from transformers.generation.logits_process import LogitsProcessor
+from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList
+from typing import Optional, List, Dict, Any
+from models.loader import LoaderCheckPoint
+from models.base import (BaseAnswer,
+                         AnswerResult)
+class InvalidScoreLogitsProcessor(LogitsProcessor):
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+        if torch.isnan(scores).any() or torch.isinf(scores).any():
+            scores.zero_()
+            scores[..., 5] = 5e4
+        return scores
+class LLamaLLM(BaseAnswer, LLM, ABC):
+    checkPoint: LoaderCheckPoint = None
+    # history = []
+    history_len: int = 3
+    max_new_tokens: int = 500
+    num_beams: int = 1
+    temperature: float = 0.5
+    top_p: float = 0.4
+    top_k: int = 10
+    repetition_penalty: float = 1.2
+    encoder_repetition_penalty: int = 1
+    min_length: int = 0
+    logits_processor: LogitsProcessorList = None
+    stopping_criteria: Optional[StoppingCriteriaList] = None
+    eos_token_id: Optional[int] = [2]
+    state: object = {'max_new_tokens': 50,
+                     'seed': 1,
+                     'temperature': 0, 'top_p': 0.1,
+                     'top_k': 40, 'typical_p': 1,
+                     'repetition_penalty': 1.2,
+                     'encoder_repetition_penalty': 1,
+                     'no_repeat_ngram_size': 0,
+                     'min_length': 0,
+                     'penalty_alpha': 0,
+                     'num_beams': 1,
+                     'length_penalty': 1,
+                     'early_stopping': False, 'add_bos_token': True, 'ban_eos_token': False,
+                     'truncation_length': 2048, 'custom_stopping_strings': '',
+                     'cpu_memory': 0, 'auto_devices': False, 'disk': False, 'cpu': False, 'bf16': False,
+                     'load_in_8bit': False, 'wbits': 'None', 'groupsize': 'None', 'model_type': 'None',
+                     'pre_layer': 0, 'gpu_memory_0': 0}
+    def __init__(self, checkPoint: LoaderCheckPoint = None):
+        super().__init__()
+        self.checkPoint = checkPoint
+    @property
+    def _llm_type(self) -> str:
+        return "LLamaLLM"
+    @property
+    def _check_point(self) -> LoaderCheckPoint:
+        return self.checkPoint
+    def encode(self, prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None):
+        input_ids = self.checkPoint.tokenizer.encode(str(prompt), return_tensors='pt',
+                                                     add_special_tokens=add_special_tokens)
+        # This is a hack for making replies more creative.
+        if not add_bos_token and input_ids[0][0] == self.checkPoint.tokenizer.bos_token_id:
+            input_ids = input_ids[:, 1:]
+        # Llama adds this extra token when the first character is '\n', and this
+        # compromises the stopping criteria, so we just remove it
+        if type(self.checkPoint.tokenizer) is transformers.LlamaTokenizer and input_ids[0][0] == 29871:
+            input_ids = input_ids[:, 1:]
+        # Handling truncation
+        if truncation_length is not None:
+            input_ids = input_ids[:, -truncation_length:]
+        return input_ids.cuda()
+    def decode(self, output_ids):
+        reply = self.checkPoint.tokenizer.decode(output_ids, skip_special_tokens=True)
+        return reply
+    # 将历史对话数组转换为文本格式
+    def history_to_text(self, query, history):
+        """
+        历史对话软提示
+            这段代码首先定义了一个名为 history_to_text 的函数，用于将 self.history
+            数组转换为所需的文本格式。然后，我们将格式化后的历史文本
+            再用 self.encode 将其转换为向量表示。最后，将历史对话向量与当前输入的对话向量拼接在一起。
+        :return:
+        """
+        formatted_history = ''
+        history = history[-self.history_len:] if self.history_len > 0 else []
+        if len(history) > 0:
+            for i, (old_query, response) in enumerate(history):
+                formatted_history += "### Human：{}\n### Assistant：{}\n".format(old_query, response)
+        formatted_history += "### Human：{}\n### Assistant：".format(query)
+        return formatted_history
+    def prepare_inputs_for_generation(self,
+                                      input_ids: torch.LongTensor):
+        """
+        预生成注意力掩码和 输入序列中每个位置的索引的张量
+        # TODO 没有思路
+        :return:
+        """
+        mask_positions = torch.zeros((1, input_ids.shape[1]), dtype=input_ids.dtype).to(self.checkPoint.model.device)
+        attention_mask = self.get_masks(input_ids, input_ids.device)
+        position_ids = self.get_position_ids(
+            input_ids,
+            device=input_ids.device,
+            mask_positions=mask_positions
+        )
+        return input_ids, position_ids, attention_mask
+    @property
+    def _history_len(self) -> int:
+        return self.history_len
+    def set_history_len(self, history_len: int = 10) -> None:
+        self.history_len = history_len
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        print(f"__call:{prompt}")
+        if self.logits_processor is None:
+            self.logits_processor = LogitsProcessorList()
+        self.logits_processor.append(InvalidScoreLogitsProcessor())
+        gen_kwargs = {
+            "max_new_tokens": self.max_new_tokens,
+            "num_beams": self.num_beams,
+            "top_p": self.top_p,
+            "do_sample": True,
+            "top_k": self.top_k,
+            "repetition_penalty": self.repetition_penalty,
+            "encoder_repetition_penalty": self.encoder_repetition_penalty,
+            "min_length": self.min_length,
+            "temperature": self.temperature,
+            "eos_token_id": self.checkPoint.tokenizer.eos_token_id,
+            "logits_processor": self.logits_processor}
+        #  向量转换
+        input_ids = self.encode(prompt, add_bos_token=self.state['add_bos_token'], truncation_length=self.max_new_tokens)
+        # input_ids, position_ids, attention_mask = self.prepare_inputs_for_generation(input_ids=filler_input_ids)
+        gen_kwargs.update({'inputs': input_ids})
+        # 注意力掩码
+        # gen_kwargs.update({'attention_mask': attention_mask})
+        # gen_kwargs.update({'position_ids': position_ids})
+        if self.stopping_criteria is None:
+            self.stopping_criteria = transformers.StoppingCriteriaList()
+        # 观测输出
+        gen_kwargs.update({'stopping_criteria': self.stopping_criteria})
+        output_ids = self.checkPoint.model.generate(**gen_kwargs)
+        new_tokens = len(output_ids[0]) - len(input_ids[0])
+        reply = self.decode(output_ids[0][-new_tokens:])
+        print(f"response:{reply}")
+        print(f"+++++++++++++++++++++++++++++++++++")
+        return reply
+    def generatorAnswer(self, prompt: str,
+                         history: List[List[str]] = [],
+                         streaming: bool = False):
+        # TODO 需要实现chat对话模块和注意力模型，目前_call为langchain的LLM拓展的api，默认为无提示词模式，如果需要操作注意力模型，可以参考chat_glm的实现
+        softprompt = self.history_to_text(prompt,history=history)
+        response = self._call(prompt=softprompt, stop=['\n###'])
+        answer_result = AnswerResult()
+        answer_result.history = history + [[prompt, response]]
+        answer_result.llm_output = {"answer": response}
+        yield answer_result

models/loader/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+
2	+ from .loader import *

models/loader/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (182 Bytes). View file

models/loader/__pycache__/args.cpython-310.pyc ADDED Viewed

Binary file (1.73 kB). View file

models/loader/__pycache__/loader.cpython-310.pyc ADDED Viewed

Binary file (11.1 kB). View file

models/loader/args.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import argparse
+import os
+from configs.model_config import *
+# Additional argparse types
+def path(string):
+    if not string:
+        return ''
+    s = os.path.expanduser(string)
+    if not os.path.exists(s):
+        raise argparse.ArgumentTypeError(f'No such file or directory: "{string}"')
+    return s
+def file_path(string):
+    if not string:
+        return ''
+    s = os.path.expanduser(string)
+    if not os.path.isfile(s):
+        raise argparse.ArgumentTypeError(f'No such file: "{string}"')
+    return s
+def dir_path(string):
+    if not string:
+        return ''
+    s = os.path.expanduser(string)
+    if not os.path.isdir(s):
+        raise argparse.ArgumentTypeError(f'No such directory: "{string}"')
+    return s
+parser = argparse.ArgumentParser(prog='langchain-ChatGLM',
+                                 description='About langchain-ChatGLM, local knowledge based ChatGLM with langchain ｜ '
+                                             '基于本地知识库的 ChatGLM 问答')
+parser.add_argument('--no-remote-model', action='store_true', help='remote in the model on '
+                                                                   'loader checkpoint, '
+                                                                   'if your load local '
+                                                                   'model to add the ` '
+                                                                   '--no-remote-model`')
+parser.add_argument('--model-name', type=str, default=LLM_MODEL, help='Name of the model to load by default.')
+parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.')
+parser.add_argument("--lora-dir", type=str, default=LORA_DIR, help="Path to directory with all the loras")
+# Accelerate/transformers
+parser.add_argument('--load-in-8bit', action='store_true', default=LOAD_IN_8BIT,
+                    help='Load the model with 8-bit precision.')
+parser.add_argument('--bf16', action='store_true', default=BF16,
+                    help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
+args = parser.parse_args([])
+# Generares dict with a default value for each argument
+DEFAULT_ARGS = vars(args)

models/loader/loader.py ADDED Viewed

	@@ -0,0 +1,447 @@

+import gc
+import json
+import os
+import re
+import time
+from pathlib import Path
+from typing import Optional, List, Dict, Tuple, Union
+import torch
+import transformers
+from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
+                          AutoTokenizer, LlamaTokenizer)
+from configs.model_config import LLM_DEVICE
+class LoaderCheckPoint:
+    """
+    加载自定义 model CheckPoint
+    """
+    # remote in the model on loader checkpoint
+    no_remote_model: bool = False
+    # 模型名称
+    model_name: str = None
+    tokenizer: object = None
+    # 模型全路径
+    model_path: str = None
+    model: object = None
+    model_config: object = None
+    lora_names: set = []
+    lora_dir: str = None
+    ptuning_dir: str = None
+    use_ptuning_v2: bool = False
+    # 如果开启了8bit量化加载,项目无法启动，参考此位置，选择合适的cuda版本，https://github.com/TimDettmers/bitsandbytes/issues/156
+    # 另一个原因可能是由于bitsandbytes安装时选择了系统环境变量里不匹配的cuda版本，
+    # 例如PATH下存在cuda10.2和cuda11.2，bitsandbytes安装时选择了10.2，而torch等安装依赖的版本是11.2
+    # 因此主要的解决思路是清理环境变量里PATH下的不匹配的cuda版本，一劳永逸的方法是：
+    # 0. 在终端执行`pip uninstall bitsandbytes`
+    # 1. 删除.bashrc文件下关于PATH的条目
+    # 2. 在终端执行 `echo $PATH >> .bashrc`
+    # 3. 删除.bashrc文件下PATH中关于不匹配的cuda版本路径
+    # 4. 在终端执行`source .bashrc`
+    # 5. 再执行`pip install bitsandbytes`
+    load_in_8bit: bool = False
+    is_llamacpp: bool = False
+    bf16: bool = False
+    params: object = None
+    # 自定义设备网络
+    device_map: Optional[Dict[str, int]] = None
+    # 默认 cuda ，如果不支持cuda使用多卡， 如果不支持多卡 使用cpu
+    llm_device = LLM_DEVICE
+    def __init__(self, params: dict = None):
+        """
+        模型初始化
+        :param params:
+        """
+        self.model = None
+        self.tokenizer = None
+        self.params = params or {}
+        self.model_name = params.get('model_name', False)
+        self.model_path = params.get('model_path', None)
+        self.no_remote_model = params.get('no_remote_model', False)
+        self.lora = params.get('lora', '')
+        self.use_ptuning_v2 = params.get('use_ptuning_v2', False)
+        self.lora_dir = params.get('lora_dir', '')
+        self.ptuning_dir = params.get('ptuning_dir', 'ptuning-v2')
+        self.load_in_8bit = params.get('load_in_8bit', False)
+        self.bf16 = params.get('bf16', False)
+    def _load_model_config(self, model_name):
+        if self.model_path:
+            checkpoint = Path(f'{self.model_path}')
+        else:
+            if not self.no_remote_model:
+                checkpoint = model_name
+            else:
+                raise ValueError(
+                    "本地模型local_model_path未配置路径"
+                )
+        model_config = AutoConfig.from_pretrained(checkpoint, trust_remote_code=True)
+        return model_config
+    def _load_model(self, model_name):
+        """
+        加载自定义位置的model
+        :param model_name:
+        :return:
+        """
+        print(f"Loading {model_name}...")
+        t0 = time.time()
+        if self.model_path:
+            checkpoint = Path(f'{self.model_path}')
+        else:
+            if not self.no_remote_model:
+                checkpoint = model_name
+            else:
+                raise ValueError(
+                    "本地模型local_model_path未配置路径"
+                )
+        self.is_llamacpp = len(list(Path(f'{checkpoint}').glob('ggml*.bin'))) > 0
+        if 'chatglm' in model_name.lower():
+            LoaderClass = AutoModel
+        else:
+            LoaderClass = AutoModelForCausalLM
+        # Load the model in simple 16-bit mode by default
+        # 如果加载没问题，但在推理时报错RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`
+        # 那还是因为显存不够，此时只能考虑--load-in-8bit,或者配置默认模型为`chatglm-6b-int8`
+        if not any([self.llm_device.lower() == "cpu",
+                    self.load_in_8bit, self.is_llamacpp]):
+            if torch.cuda.is_available() and self.llm_device.lower().startswith("cuda"):
+                # 根据当前设备GPU数量决定是否进行多卡部署
+                num_gpus = torch.cuda.device_count()
+                if num_gpus < 2 and self.device_map is None:
+                    model = (
+                        LoaderClass.from_pretrained(checkpoint,
+                                                    config=self.model_config,
+                                                    torch_dtype=torch.bfloat16 if self.bf16 else torch.float16,
+                                                    trust_remote_code=True)
+                        .half()
+                        .cuda()
+                    )
+                else:
+                    from accelerate import dispatch_model
+                    model = LoaderClass.from_pretrained(checkpoint,
+                                                        config=self.model_config,
+                                                        torch_dtype=torch.bfloat16 if self.bf16 else torch.float16,
+                                                        trust_remote_code=True).half()
+                    # 可传入device_map自定义每张卡的部署情况
+                    if self.device_map is None:
+                        if 'chatglm' in model_name.lower():
+                            self.device_map = self.chatglm_auto_configure_device_map(num_gpus)
+                        elif 'moss' in model_name.lower():
+                            self.device_map = self.moss_auto_configure_device_map(num_gpus, model_name)
+                        else:
+                            self.device_map = self.chatglm_auto_configure_device_map(num_gpus)
+                    model = dispatch_model(model, device_map=self.device_map)
+            else:
+                model = (
+                    LoaderClass.from_pretrained(
+                        checkpoint,
+                        config=self.model_config,
+                        trust_remote_code=True)
+                    .float()
+                    .to(self.llm_device)
+                )
+        elif self.is_llamacpp:
+            try:
+                from models.extensions.llamacpp_model_alternative import LlamaCppModel
+            except ImportError as exc:
+                raise ValueError(
+                    "Could not import depend python package "
+                    "Please install it with `pip install llama-cpp-python`."
+                ) from exc
+            model_file = list(checkpoint.glob('ggml*.bin'))[0]
+            print(f"llama.cpp weights detected: {model_file}\n")
+            model, tokenizer = LlamaCppModel.from_pretrained(model_file)
+            return model, tokenizer
+        elif self.load_in_8bit:
+            try:
+                from accelerate import init_empty_weights
+                from accelerate.utils import get_balanced_memory, infer_auto_device_map
+                from transformers import BitsAndBytesConfig
+            except ImportError as exc:
+                raise ValueError(
+                    "Could not import depend python package "
+                    "Please install it with `pip install transformers` "
+                    "`pip install bitsandbytes``pip install accelerate`."
+                ) from exc
+            params = {"low_cpu_mem_usage": True}
+            if not self.llm_device.lower().startswith("cuda"):
+                raise SystemError("8bit 模型需要 CUDA 支持，或者改用量化后模型！")
+            else:
+                params["device_map"] = 'auto'
+                params["trust_remote_code"] = True
+                params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True,
+                                                                   llm_int8_enable_fp32_cpu_offload=False)
+            with init_empty_weights():
+                model = LoaderClass.from_config(self.model_config,trust_remote_code = True)
+            model.tie_weights()
+            if self.device_map is not None:
+                params['device_map'] = self.device_map
+            else:
+                params['device_map'] = infer_auto_device_map(
+                    model,
+                    dtype=torch.int8,
+                    no_split_module_classes=model._no_split_modules
+                )
+            try:
+                model = LoaderClass.from_pretrained(checkpoint, **params)
+            except ImportError as exc:
+                raise ValueError(
+                    "如果开启了8bit量化加载,项目无法启动，参考此位置，选择合适的cuda版本，https://github.com/TimDettmers/bitsandbytes/issues/156"
+                ) from exc
+        # Custom
+        else:
+            print(
+                "Warning: self.llm_device is False.\nThis means that no use GPU  bring to be load CPU mode\n")
+            params = {"low_cpu_mem_usage": True, "torch_dtype": torch.float32, "trust_remote_code": True}
+            model = LoaderClass.from_pretrained(checkpoint, **params).to(self.llm_device, dtype=float)
+        # Loading the tokenizer
+        if type(model) is transformers.LlamaForCausalLM:
+            tokenizer = LlamaTokenizer.from_pretrained(checkpoint, clean_up_tokenization_spaces=True)
+            # Leaving this here until the LLaMA tokenizer gets figured out.
+            # For some people this fixes things, for others it causes an error.
+            try:
+                tokenizer.eos_token_id = 2
+                tokenizer.bos_token_id = 1
+                tokenizer.pad_token_id = 0
+            except Exception as e:
+                print(e)
+                pass
+        else:
+            tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
+        print(f"Loaded the model in {(time.time() - t0):.2f} seconds.")
+        return model, tokenizer
+    def chatglm_auto_configure_device_map(self, num_gpus: int) -> Dict[str, int]:
+        # transformer.word_embeddings 占用1层
+        # transformer.final_layernorm 和 lm_head 占用1层
+        # transformer.layers 占用 28 层
+        # 总共30层分配到num_gpus张卡上
+        num_trans_layers = 28
+        per_gpu_layers = 30 / num_gpus
+        # bugfix: PEFT加载lora模型出现的层命名不同
+        if self.lora:
+            layer_prefix = 'base_model.model.transformer'
+        else:
+            layer_prefix = 'transformer'
+        # bugfix: 在linux中调用torch.embedding传入的weight,input不在同一device上,导致RuntimeError
+        # windows下 model.device 会被设置成 transformer.word_embeddings.device
+        # linux下 model.device 会被设置成 lm_head.device
+        # 在调用chat或者stream_chat时,input_ids会被放到model.device上
+        # 如果transformer.word_embeddings.device和model.device不同,则会导致RuntimeError
+        # 因此这里将transformer.word_embeddings,transformer.final_layernorm,lm_head都放到第一张卡上
+        encode = ""
+        if 'chatglm2' in self.model_name:
+            device_map = {
+                f"{layer_prefix}.embedding.word_embeddings": 0,
+                f"{layer_prefix}.rotary_pos_emb": 0,
+                f"{layer_prefix}.output_layer": 0,
+                f"{layer_prefix}.encoder.final_layernorm": 0,
+                f"base_model.model.output_layer": 0
+            }
+            encode = ".encoder"
+        else:
+            device_map = {f'{layer_prefix}.word_embeddings': 0,
+                      f'{layer_prefix}.final_layernorm': 0, 'lm_head': 0,
+                      f'base_model.model.lm_head': 0, }
+        used = 2
+        gpu_target = 0
+        for i in range(num_trans_layers):
+            if used >= per_gpu_layers:
+                gpu_target += 1
+                used = 0
+            assert gpu_target < num_gpus
+            device_map[f'{layer_prefix}{encode}.layers.{i}'] = gpu_target
+            used += 1
+        return device_map
+    def moss_auto_configure_device_map(self, num_gpus: int, model_name) -> Dict[str, int]:
+        try:
+            from accelerate import init_empty_weights
+            from accelerate.utils import get_balanced_memory, infer_auto_device_map
+            from transformers.dynamic_module_utils import get_class_from_dynamic_module
+            from transformers.modeling_utils import no_init_weights
+            from transformers.utils import ContextManagers
+        except ImportError as exc:
+            raise ValueError(
+                "Could not import depend python package "
+                "Please install it with `pip install transformers` "
+                "`pip install bitsandbytes``pip install accelerate`."
+            ) from exc
+        if self.model_path:
+            checkpoint = Path(f'{self.model_path}')
+        else:
+            if not self.no_remote_model:
+                checkpoint = model_name
+            else:
+                raise ValueError(
+                    "本地模型local_model_path未配置路径"
+                )
+        cls = get_class_from_dynamic_module(class_reference="fnlp/moss-moon-003-sft--modeling_moss.MossForCausalLM",
+                                            pretrained_model_name_or_path=checkpoint)
+        with ContextManagers([no_init_weights(_enable=True), init_empty_weights()]):
+            model = cls(self.model_config)
+            max_memory = get_balanced_memory(model, dtype=torch.int8 if self.load_in_8bit else None,
+                                             low_zero=False, no_split_module_classes=model._no_split_modules)
+            device_map = infer_auto_device_map(
+                model, dtype=torch.float16 if not self.load_in_8bit else torch.int8, max_memory=max_memory,
+                no_split_module_classes=model._no_split_modules)
+            device_map["transformer.wte"] = 0
+            device_map["transformer.drop"] = 0
+            device_map["transformer.ln_f"] = 0
+            device_map["lm_head"] = 0
+            return device_map
+    def _add_lora_to_model(self, lora_names):
+        try:
+            from peft import PeftModel
+        except ImportError as exc:
+            raise ValueError(
+                "Could not import depend python package. "
+                "Please install it with `pip install peft``pip install accelerate`."
+            ) from exc
+        # 目前加载的lora
+        prior_set = set(self.lora_names)
+        # 需要加载的
+        added_set = set(lora_names) - prior_set
+        # 删除的lora
+        removed_set = prior_set - set(lora_names)
+        self.lora_names = list(lora_names)
+        # Nothing to do = skip.
+        if len(added_set) == 0 and len(removed_set) == 0:
+            return
+        # Only adding, and already peft? Do it the easy way.
+        if len(removed_set) == 0 and len(prior_set) > 0:
+            print(f"Adding the LoRA(s) named {added_set} to the model...")
+            for lora in added_set:
+                self.model.load_adapter(Path(f"{self.lora_dir}/{lora}"), lora)
+            return
+        # If removing anything, disable all and re-add.
+        if len(removed_set) > 0:
+            self.model.disable_adapter()
+        if len(lora_names) > 0:
+            print("Applying the following LoRAs to {}: {}".format(self.model_name, ', '.join(lora_names)))
+            params = {}
+            if self.llm_device.lower() != "cpu":
+                params['dtype'] = self.model.dtype
+                if hasattr(self.model, "hf_device_map"):
+                    params['device_map'] = {"base_model.model." + k: v for k, v in self.model.hf_device_map.items()}
+                elif self.load_in_8bit:
+                    params['device_map'] = {'': 0}
+            self.model.resize_token_embeddings(len(self.tokenizer))
+            self.model = PeftModel.from_pretrained(self.model, Path(f"{self.lora_dir}/{lora_names[0]}"), **params)
+            for lora in lora_names[1:]:
+                self.model.load_adapter(Path(f"{self.lora_dir}/{lora}"), lora)
+            if not self.load_in_8bit and self.llm_device.lower() != "cpu":
+                if not hasattr(self.model, "hf_device_map"):
+                    if torch.has_mps:
+                        device = torch.device('mps')
+                        self.model = self.model.to(device)
+                    else:
+                        self.model = self.model.cuda()
+    def clear_torch_cache(self):
+        gc.collect()
+        if self.llm_device.lower() != "cpu":
+            if torch.has_mps:
+                try:
+                    from torch.mps import empty_cache
+                    empty_cache()
+                except Exception as e:
+                    print(e)
+                    print(
+                        "如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本，以支持及时清理 torch 产生的内存占用。")
+            elif torch.has_cuda:
+                device_id = "0" if torch.cuda.is_available() else None
+                CUDA_DEVICE = f"{self.llm_device}:{device_id}" if device_id else self.llm_device
+                with torch.cuda.device(CUDA_DEVICE):
+                    torch.cuda.empty_cache()
+                    torch.cuda.ipc_collect()
+            else:
+                print("未检测到 cuda 或 mps，暂不支持清理显存")
+    def unload_model(self):
+        del self.model
+        del self.tokenizer
+        self.model = self.tokenizer = None
+        self.clear_torch_cache()
+    def set_model_path(self, model_path):
+        self.model_path = model_path
+    def reload_model(self):
+        self.unload_model()
+        self.model_config = self._load_model_config(self.model_name)
+        if self.use_ptuning_v2:
+            try:
+                prefix_encoder_file = open(Path(f'{self.ptuning_dir}/config.json'), 'r')
+                prefix_encoder_config = json.loads(prefix_encoder_file.read())
+                prefix_encoder_file.close()
+                self.model_config.pre_seq_len = prefix_encoder_config['pre_seq_len']
+                self.model_config.prefix_projection = prefix_encoder_config['prefix_projection']
+            except Exception as e:
+                print("加载PrefixEncoder config.json失败")
+        self.model, self.tokenizer = self._load_model(self.model_name)
+        if self.lora:
+            self._add_lora_to_model([self.lora])
+        if self.use_ptuning_v2:
+            try:
+                prefix_state_dict = torch.load(Path(f'{self.ptuning_dir}/pytorch_model.bin'))
+                new_prefix_state_dict = {}
+                for k, v in prefix_state_dict.items():
+                    if k.startswith("transformer.prefix_encoder."):
+                        new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
+                self.model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
+                self.model.transformer.prefix_encoder.float()
+            except Exception as e:
+                print("加载PrefixEncoder模型参数失败")
+        self.model = self.model.eval()