File size: 3,516 Bytes
d727a17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
import chainlit as cl

from PIL import Image
from io import BytesIO
from bs4 import BeautifulSoup
import requests
from langchain.document_loaders import WebBaseLoader
from utils import get_completion,model_info,model_load

from duckduckgo_search import ddg
from file_handle import upload_chroma,get_qa_chain_answers_llm

model_name =  "medalpaca/medalpaca-7b"
    
# get model info and print it
model_current = model_info()
print("Current model: ", model_current)
if model_current != model_name:
    print("Loading model: ", model_name)
    model_load(model_name)
    model_current = model_info()
    print("Current model: ", model_current)
    

def get_sources(query):
  results = ddg(query, max_results=4)

  # headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
  sources = []

  for link in results:
    # cookies_jar = requests.cookies.RequestsCookieJar()
    # cookies_jar.set('name', 'jerry', domain=link, path='/cookies')
    # page = requests.get(results[0]['href'], headers=headers).text
    # soup = BeautifulSoup(page,"html.parser")
    loader = WebBaseLoader(results[0]['href'])
    data = loader.load()
    #text = soup.text.replace("\n","").strip()[:600]
    text = data[0].page_content.replace("\n","")
    if len(text) > 100:
      sources.append(text)
    else:
      print(text)

    if len(sources[0]) > 892:
      return sources[0][:890]
    else:
      return sources[1][:890]
  

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

system_template = """Use the following pieces of context to answer the users question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
The "SOURCES" part should be a reference to the source of the document from which you got your answer.

Example of your response should be:

```
The answer is foo
SOURCES: xyz
```

Begin!
----------------
{summaries}"""
messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate.from_messages(messages)
chain_type_kwargs = {"prompt": prompt}


@cl.on_chat_start
async def init():
    files = None

    # Wait for the user to upload a file
    while files == None:
        files = await cl.AskFileMessage(
            content="Upload your medical reports and files to begin!", accept= {
    'image/png': ['.png','.jpg','.jpeg'],
    'text/html': ['.html', '.htm'],
    "application/pdf": ['.pdf'],
  }
        ).send()

    file = files[0]
    stream = BytesIO(file.content)
    msg = cl.Message(content=f"Processing `{file.name}`...")
    await msg.send()
    # print(file, file.name)
    upload_chroma(stream,file.name, "test")
    msg.content = f"Processing `{file.name}` done. You can now ask questions!"
    await msg.update()



@cl.on_message
async def main(message):
    res = get_qa_chain_answers_llm(message, "test")
    print(res)
    await cl.Message(
        content=f"{res}",
    ).send()