Spaces:
Sleeping
Sleeping
Tuchuanhuhuhu
commited on
Commit
•
ee70a9d
1
Parent(s):
c4a5dd4
更新langchain版本,默认不自动总结,使用google替换duckduckgo
Browse files- modules/index_func.py +1 -1
- modules/models/base_model.py +18 -18
- requirements.txt +2 -2
modules/index_func.py
CHANGED
@@ -83,7 +83,7 @@ def get_documents(file_src):
|
|
83 |
logging.error(f"Error loading file: {filename}")
|
84 |
traceback.print_exc()
|
85 |
|
86 |
-
texts = text_splitter.split_documents(
|
87 |
documents.extend(texts)
|
88 |
logging.debug("Documents loaded.")
|
89 |
return documents
|
|
|
83 |
logging.error(f"Error loading file: {filename}")
|
84 |
traceback.print_exc()
|
85 |
|
86 |
+
texts = text_splitter.split_documents(texts)
|
87 |
documents.extend(texts)
|
88 |
logging.debug("Documents loaded.")
|
89 |
return documents
|
modules/models/base_model.py
CHANGED
@@ -13,7 +13,7 @@ import pathlib
|
|
13 |
|
14 |
from tqdm import tqdm
|
15 |
import colorama
|
16 |
-
from
|
17 |
import asyncio
|
18 |
import aiohttp
|
19 |
from enum import Enum
|
@@ -264,19 +264,19 @@ class BaseLLMModel:
|
|
264 |
index = construct_index(self.api_key, file_src=files)
|
265 |
status = i18n("索引构建完成")
|
266 |
# Summarize the document
|
267 |
-
logging.info(i18n("生成内容总结中……"))
|
268 |
-
os.environ["OPENAI_API_KEY"] = self.api_key
|
269 |
-
from langchain.chains.summarize import load_summarize_chain
|
270 |
-
from langchain.prompts import PromptTemplate
|
271 |
-
from langchain.chat_models import ChatOpenAI
|
272 |
-
from langchain.callbacks import StdOutCallbackHandler
|
273 |
-
prompt_template = "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN " + language + ":"
|
274 |
-
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
|
275 |
-
llm = ChatOpenAI()
|
276 |
-
chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
|
277 |
-
summary = chain({"input_documents": list(index.docstore.__dict__["_dict"].values())}, return_only_outputs=True)["output_text"]
|
278 |
-
print(i18n("总结") + f": {summary}")
|
279 |
-
chatbot.append([i18n("上传了")+len(files)+"个文件", summary])
|
280 |
return gr.Files.update(), chatbot, status
|
281 |
|
282 |
def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
|
@@ -309,15 +309,15 @@ class BaseLLMModel:
|
|
309 |
)
|
310 |
elif use_websearch:
|
311 |
limited_context = True
|
312 |
-
search_results =
|
313 |
reference_results = []
|
314 |
for idx, result in enumerate(search_results):
|
315 |
logging.debug(f"搜索结果{idx + 1}:{result}")
|
316 |
-
domain_name = urllib3.util.parse_url(result
|
317 |
-
reference_results.append([result
|
318 |
display_append.append(
|
319 |
# f"{idx+1}. [{domain_name}]({result['href']})\n"
|
320 |
-
f"<li><a href=\"{result
|
321 |
)
|
322 |
reference_results = add_source_numbers(reference_results)
|
323 |
display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
|
|
|
13 |
|
14 |
from tqdm import tqdm
|
15 |
import colorama
|
16 |
+
from googlesearch import search
|
17 |
import asyncio
|
18 |
import aiohttp
|
19 |
from enum import Enum
|
|
|
264 |
index = construct_index(self.api_key, file_src=files)
|
265 |
status = i18n("索引构建完成")
|
266 |
# Summarize the document
|
267 |
+
# logging.info(i18n("生成内容总结中……"))
|
268 |
+
# os.environ["OPENAI_API_KEY"] = self.api_key
|
269 |
+
# from langchain.chains.summarize import load_summarize_chain
|
270 |
+
# from langchain.prompts import PromptTemplate
|
271 |
+
# from langchain.chat_models import ChatOpenAI
|
272 |
+
# from langchain.callbacks import StdOutCallbackHandler
|
273 |
+
# prompt_template = "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN " + language + ":"
|
274 |
+
# PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
|
275 |
+
# llm = ChatOpenAI()
|
276 |
+
# chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
|
277 |
+
# summary = chain({"input_documents": list(index.docstore.__dict__["_dict"].values())}, return_only_outputs=True)["output_text"]
|
278 |
+
# print(i18n("总结") + f": {summary}")
|
279 |
+
# chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
|
280 |
return gr.Files.update(), chatbot, status
|
281 |
|
282 |
def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
|
|
|
309 |
)
|
310 |
elif use_websearch:
|
311 |
limited_context = True
|
312 |
+
search_results = [i for i in search(real_inputs, advanced=True)]
|
313 |
reference_results = []
|
314 |
for idx, result in enumerate(search_results):
|
315 |
logging.debug(f"搜索结果{idx + 1}:{result}")
|
316 |
+
domain_name = urllib3.util.parse_url(result.url).host
|
317 |
+
reference_results.append([result.description, result.url])
|
318 |
display_append.append(
|
319 |
# f"{idx+1}. [{domain_name}]({result['href']})\n"
|
320 |
+
f"<li><a href=\"{result.url}\" target=\"_blank\">{domain_name}</a></li>\n"
|
321 |
)
|
322 |
reference_results = add_source_numbers(reference_results)
|
323 |
display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
|
requirements.txt
CHANGED
@@ -6,9 +6,9 @@ tiktoken
|
|
6 |
socksio
|
7 |
tqdm
|
8 |
colorama
|
9 |
-
|
10 |
Pygments
|
11 |
-
langchain==0.0.
|
12 |
markdown
|
13 |
PyPDF2
|
14 |
pdfplumber
|
|
|
6 |
socksio
|
7 |
tqdm
|
8 |
colorama
|
9 |
+
googlesearch-python
|
10 |
Pygments
|
11 |
+
langchain==0.0.180
|
12 |
markdown
|
13 |
PyPDF2
|
14 |
pdfplumber
|