Tuchuanhuhuhu commited on
Commit
c4727d5
1 Parent(s): 0f39a35

feat: 搜索使用 duckduckgo搜索

Browse files
modules/models/ChuanhuAgent.py CHANGED
@@ -14,7 +14,8 @@ from langchain.tools import BaseTool, StructuredTool, Tool, tool
14
  from langchain.callbacks.stdout import StdOutCallbackHandler
15
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
16
  from langchain.callbacks.manager import BaseCallbackManager
17
- from googlesearch import search
 
18
 
19
  from typing import Any, Dict, List, Optional, Union
20
 
@@ -93,7 +94,15 @@ class ChuanhuAgent_Client(BaseLLMModel):
93
  )
94
 
95
  def google_search_simple(self, query):
96
- results = [{"title": i.title, "link": i.url, "snippet": i.description} for i in search(query, advanced=True)]
 
 
 
 
 
 
 
 
97
  return str(results)
98
 
99
  def handle_file_upload(self, files, chatbot, language):
 
14
  from langchain.callbacks.stdout import StdOutCallbackHandler
15
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
16
  from langchain.callbacks.manager import BaseCallbackManager
17
+ from duckduckgo_search import DDGS
18
+ from itertools import islice
19
 
20
  from typing import Any, Dict, List, Optional, Union
21
 
 
94
  )
95
 
96
  def google_search_simple(self, query):
97
+ results = []
98
+ with DDGS() as ddgs:
99
+ ddgs_gen = ddgs.text("notes from a dead house", backend="lite")
100
+ for r in islice(ddgs_gen, 10):
101
+ results.append({
102
+ "title": r["title"],
103
+ "link": r["href"],
104
+ "snippet": r["body"]
105
+ })
106
  return str(results)
107
 
108
  def handle_file_upload(self, files, chatbot, language):
modules/models/base_model.py CHANGED
@@ -13,7 +13,8 @@ import pathlib
13
 
14
  from tqdm import tqdm
15
  import colorama
16
- from googlesearch import search
 
17
  import asyncio
18
  import aiohttp
19
  from enum import Enum
@@ -335,16 +336,19 @@ class BaseLLMModel:
335
  .replace("{reply_language}", reply_language)
336
  )
337
  elif use_websearch:
338
- limited_context = True
339
- search_results = [i for i in search(real_inputs, advanced=True)]
 
 
 
340
  reference_results = []
341
  for idx, result in enumerate(search_results):
342
  logging.debug(f"搜索结果{idx + 1}:{result}")
343
- domain_name = urllib3.util.parse_url(result.url).host
344
- reference_results.append([result.description, result.url])
345
  display_append.append(
346
  # f"{idx+1}. [{domain_name}]({result['href']})\n"
347
- f"<li><a href=\"{result.url}\" target=\"_blank\">{domain_name}</a></li>\n"
348
  )
349
  reference_results = add_source_numbers(reference_results)
350
  display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
 
13
 
14
  from tqdm import tqdm
15
  import colorama
16
+ from duckduckgo_search import DDGS
17
+ from itertools import islice
18
  import asyncio
19
  import aiohttp
20
  from enum import Enum
 
336
  .replace("{reply_language}", reply_language)
337
  )
338
  elif use_websearch:
339
+ search_results = []
340
+ with DDGS() as ddgs:
341
+ ddgs_gen = ddgs.text(real_inputs, backend="lite")
342
+ for r in islice(ddgs_gen, 10):
343
+ search_results.append(r)
344
  reference_results = []
345
  for idx, result in enumerate(search_results):
346
  logging.debug(f"搜索结果{idx + 1}:{result}")
347
+ domain_name = urllib3.util.parse_url(result['href']).host
348
+ reference_results.append([result['body'], result['href']])
349
  display_append.append(
350
  # f"{idx+1}. [{domain_name}]({result['href']})\n"
351
+ f"<li><a href=\"{result['href']}\" target=\"_blank\">{result['title']}</a></li>\n"
352
  )
353
  reference_results = add_source_numbers(reference_results)
354
  display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
requirements.txt CHANGED
@@ -18,7 +18,7 @@ openpyxl
18
  pandoc
19
  wolframalpha
20
  faiss-cpu
21
- google-search-results
22
  arxiv
23
  wikipedia
24
  google.generativeai
 
18
  pandoc
19
  wolframalpha
20
  faiss-cpu
21
+ duckduckgo-search
22
  arxiv
23
  wikipedia
24
  google.generativeai