File size: 3,516 Bytes
d727a17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
import chainlit as cl
from PIL import Image
from io import BytesIO
from bs4 import BeautifulSoup
import requests
from langchain.document_loaders import WebBaseLoader
from utils import get_completion,model_info,model_load
from duckduckgo_search import ddg
from file_handle import upload_chroma,get_qa_chain_answers_llm
model_name = "medalpaca/medalpaca-7b"
# get model info and print it
model_current = model_info()
print("Current model: ", model_current)
if model_current != model_name:
print("Loading model: ", model_name)
model_load(model_name)
model_current = model_info()
print("Current model: ", model_current)
def get_sources(query):
results = ddg(query, max_results=4)
# headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
sources = []
for link in results:
# cookies_jar = requests.cookies.RequestsCookieJar()
# cookies_jar.set('name', 'jerry', domain=link, path='/cookies')
# page = requests.get(results[0]['href'], headers=headers).text
# soup = BeautifulSoup(page,"html.parser")
loader = WebBaseLoader(results[0]['href'])
data = loader.load()
#text = soup.text.replace("\n","").strip()[:600]
text = data[0].page_content.replace("\n","")
if len(text) > 100:
sources.append(text)
else:
print(text)
if len(sources[0]) > 892:
return sources[0][:890]
else:
return sources[1][:890]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
system_template = """Use the following pieces of context to answer the users question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
The "SOURCES" part should be a reference to the source of the document from which you got your answer.
Example of your response should be:
```
The answer is foo
SOURCES: xyz
```
Begin!
----------------
{summaries}"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate.from_messages(messages)
chain_type_kwargs = {"prompt": prompt}
@cl.on_chat_start
async def init():
files = None
# Wait for the user to upload a file
while files == None:
files = await cl.AskFileMessage(
content="Upload your medical reports and files to begin!", accept= {
'image/png': ['.png','.jpg','.jpeg'],
'text/html': ['.html', '.htm'],
"application/pdf": ['.pdf'],
}
).send()
file = files[0]
stream = BytesIO(file.content)
msg = cl.Message(content=f"Processing `{file.name}`...")
await msg.send()
# print(file, file.name)
upload_chroma(stream,file.name, "test")
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
await msg.update()
@cl.on_message
async def main(message):
res = get_qa_chain_answers_llm(message, "test")
print(res)
await cl.Message(
content=f"{res}",
).send() |