Spaces:
Sleeping
Sleeping
apahilaj
commited on
Commit
•
3bb6107
1
Parent(s):
75f200e
pls chat
Browse files
app.py
CHANGED
@@ -1,30 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
import
|
|
|
3 |
from langchain.embeddings import HuggingFaceEmbeddings
|
4 |
-
from langchain.vectorstores import
|
5 |
-
from langchain_community.llms import
|
6 |
-
from langchain.chains import
|
7 |
-
from langchain_community.document_loaders.csv_loader import CSVLoader
|
8 |
from langchain_community.document_loaders import PyPDFLoader
|
9 |
-
from langchain.text_splitter import
|
10 |
-
from langchain_community.document_loaders import TextLoader
|
11 |
-
from langchain_community import vectorstores
|
12 |
from langchain.prompts import PromptTemplate
|
13 |
-
from langchain.chains import RetrievalQA
|
14 |
-
from langchain.memory import ConversationBufferMemory
|
15 |
-
from langchain.chains import ConversationalRetrievalChain
|
16 |
-
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
17 |
-
from langchain.vectorstores import DocArrayInMemorySearch
|
18 |
-
from langchain.document_loaders import TextLoader
|
19 |
-
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
|
20 |
-
from langchain.memory import ConversationBufferMemory
|
21 |
-
from langchain.chat_models import ChatOpenAI
|
22 |
-
from langchain.document_loaders import TextLoader
|
23 |
-
from langchain.document_loaders import PyPDFLoader
|
24 |
-
import panel as pn
|
25 |
-
import param
|
26 |
-
import re
|
27 |
-
import os
|
28 |
|
29 |
api_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
|
30 |
|
@@ -34,6 +105,7 @@ model = HuggingFaceHub(
|
|
34 |
task="conversational",
|
35 |
model_kwargs={"temperature": 0.8, "max_length": 1000},
|
36 |
)
|
|
|
37 |
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
|
38 |
{context}
|
39 |
Question: {question}
|
@@ -41,21 +113,13 @@ Helpful Answer:"""
|
|
41 |
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
|
42 |
|
43 |
def load_db(file, k):
|
44 |
-
# load documents
|
45 |
loader = PyPDFLoader(file)
|
46 |
documents = loader.load()
|
47 |
-
# split documents
|
48 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
49 |
docs = text_splitter.split_documents(documents)
|
50 |
-
# define embedding
|
51 |
embeddings = HuggingFaceEmbeddings()
|
52 |
-
|
53 |
-
db = vectorstores.FAISS.from_documents(docs, embeddings)
|
54 |
-
# define retriever
|
55 |
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
|
56 |
-
# create a chatbot chain. Memory is managed externally.
|
57 |
-
question_generator_chain = LLMChain(llm=model, prompt=QA_CHAIN_PROMPT)
|
58 |
-
|
59 |
qa = ConversationalRetrievalChain.from_llm(
|
60 |
llm=model,
|
61 |
chain_type="stuff",
|
@@ -63,23 +127,27 @@ def load_db(file, k):
|
|
63 |
return_source_documents=True,
|
64 |
return_generated_question=True,
|
65 |
)
|
66 |
-
|
67 |
return qa
|
68 |
|
69 |
chat_history = [] # initialize chat history
|
70 |
|
71 |
-
def greet(
|
72 |
global chat_history
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
if
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
85 |
iface.launch(share=True)
|
|
|
1 |
+
# import gradio as gr
|
2 |
+
# import pandas as pd
|
3 |
+
# from langchain.embeddings import HuggingFaceEmbeddings
|
4 |
+
# from langchain.vectorstores import Chroma, faiss
|
5 |
+
# from langchain_community.llms import HuggingFaceEndpoint, HuggingFaceHub
|
6 |
+
# from langchain.chains import LLMChain
|
7 |
+
# from langchain_community.document_loaders.csv_loader import CSVLoader
|
8 |
+
# from langchain_community.document_loaders import PyPDFLoader
|
9 |
+
# from langchain.text_splitter import CharacterTextSplitter
|
10 |
+
# from langchain_community.document_loaders import TextLoader
|
11 |
+
# from langchain_community import vectorstores
|
12 |
+
# from langchain.prompts import PromptTemplate
|
13 |
+
# from langchain.chains import RetrievalQA
|
14 |
+
# from langchain.memory import ConversationBufferMemory
|
15 |
+
# from langchain.chains import ConversationalRetrievalChain
|
16 |
+
# from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
17 |
+
# from langchain.vectorstores import DocArrayInMemorySearch
|
18 |
+
# from langchain.document_loaders import TextLoader
|
19 |
+
# from langchain.chains import RetrievalQA, ConversationalRetrievalChain
|
20 |
+
# from langchain.memory import ConversationBufferMemory
|
21 |
+
# from langchain.chat_models import ChatOpenAI
|
22 |
+
# from langchain.document_loaders import TextLoader
|
23 |
+
# from langchain.document_loaders import PyPDFLoader
|
24 |
+
# import panel as pn
|
25 |
+
# import param
|
26 |
+
# import re
|
27 |
+
# import os
|
28 |
+
|
29 |
+
# api_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
|
30 |
+
|
31 |
+
# model = HuggingFaceHub(
|
32 |
+
# huggingfacehub_api_token=api_token,
|
33 |
+
# repo_id="mistralai/Mistral-7B-Instruct-v0.2",
|
34 |
+
# task="conversational",
|
35 |
+
# model_kwargs={"temperature": 0.8, "max_length": 1000},
|
36 |
+
# )
|
37 |
+
# template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
|
38 |
+
# {context}
|
39 |
+
# Question: {question}
|
40 |
+
# Helpful Answer:"""
|
41 |
+
# QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
|
42 |
+
|
43 |
+
# def load_db(file, k):
|
44 |
+
# # load documents
|
45 |
+
# loader = PyPDFLoader(file)
|
46 |
+
# documents = loader.load()
|
47 |
+
# # split documents
|
48 |
+
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
49 |
+
# docs = text_splitter.split_documents(documents)
|
50 |
+
# # define embedding
|
51 |
+
# embeddings = HuggingFaceEmbeddings()
|
52 |
+
# # create vector database from data
|
53 |
+
# db = vectorstores.FAISS.from_documents(docs, embeddings)
|
54 |
+
# # define retriever
|
55 |
+
# retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
|
56 |
+
# # create a chatbot chain. Memory is managed externally.
|
57 |
+
# question_generator_chain = LLMChain(llm=model, prompt=QA_CHAIN_PROMPT)
|
58 |
+
|
59 |
+
# qa = ConversationalRetrievalChain.from_llm(
|
60 |
+
# llm=model,
|
61 |
+
# chain_type="stuff",
|
62 |
+
# retriever=retriever,
|
63 |
+
# return_source_documents=True,
|
64 |
+
# return_generated_question=True,
|
65 |
+
# )
|
66 |
+
|
67 |
+
# return qa
|
68 |
+
|
69 |
+
# chat_history = [] # initialize chat history
|
70 |
+
|
71 |
+
# def greet(question, pdf_file):
|
72 |
+
# global chat_history
|
73 |
+
# a = load_db(pdf_file, 3)
|
74 |
+
# r = a.invoke({"question": question, "chat_history": chat_history})
|
75 |
+
# match = re.search(r'Helpful Answer:(.*)', r['answer'])
|
76 |
+
# if match:
|
77 |
+
# helpful_answer = match.group(1).strip()
|
78 |
+
# # Extend chat history with the current question and answer
|
79 |
+
# chat_history.extend([(question, helpful_answer)])
|
80 |
+
# return helpful_answer
|
81 |
+
# else:
|
82 |
+
# return "No helpful answer found."
|
83 |
+
|
84 |
+
# iface = gr.Interface(fn=greet, inputs=["text", "file"], outputs="text")
|
85 |
+
# iface.launch(share=True)
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
import gradio as gr
|
90 |
+
import os
|
91 |
+
import re
|
92 |
from langchain.embeddings import HuggingFaceEmbeddings
|
93 |
+
from langchain.vectorstores import faiss
|
94 |
+
from langchain_community.llms import HuggingFaceHub
|
95 |
+
from langchain.chains import ConversationalRetrievalChain
|
|
|
96 |
from langchain_community.document_loaders import PyPDFLoader
|
97 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
|
98 |
from langchain.prompts import PromptTemplate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
api_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
|
101 |
|
|
|
105 |
task="conversational",
|
106 |
model_kwargs={"temperature": 0.8, "max_length": 1000},
|
107 |
)
|
108 |
+
|
109 |
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
|
110 |
{context}
|
111 |
Question: {question}
|
|
|
113 |
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
|
114 |
|
115 |
def load_db(file, k):
|
|
|
116 |
loader = PyPDFLoader(file)
|
117 |
documents = loader.load()
|
|
|
118 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
119 |
docs = text_splitter.split_documents(documents)
|
|
|
120 |
embeddings = HuggingFaceEmbeddings()
|
121 |
+
db = faiss.FAISS.from_documents(docs, embeddings)
|
|
|
|
|
122 |
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
|
|
|
|
|
|
|
123 |
qa = ConversationalRetrievalChain.from_llm(
|
124 |
llm=model,
|
125 |
chain_type="stuff",
|
|
|
127 |
return_source_documents=True,
|
128 |
return_generated_question=True,
|
129 |
)
|
|
|
130 |
return qa
|
131 |
|
132 |
chat_history = [] # initialize chat history
|
133 |
|
134 |
+
def greet(conversation):
|
135 |
global chat_history
|
136 |
+
user_input = conversation['user']
|
137 |
+
pdf_file = conversation['file'][0] if 'file' in conversation else None
|
138 |
+
|
139 |
+
if user_input:
|
140 |
+
a = load_db(pdf_file, 3)
|
141 |
+
r = a.invoke({"question": user_input, "chat_history": chat_history})
|
142 |
+
match = re.search(r'Helpful Answer:(.*)', r['answer'])
|
143 |
+
if match:
|
144 |
+
helpful_answer = match.group(1).strip()
|
145 |
+
# Extend chat history with the current question and answer
|
146 |
+
chat_history.extend([(user_input, helpful_answer)])
|
147 |
+
return {"system": "", "user": user_input, "assistant": helpful_answer}
|
148 |
+
else:
|
149 |
+
return {"system": "", "user": user_input, "assistant": "No helpful answer found."}
|
150 |
+
return {"system": "", "user": "", "assistant": ""}
|
151 |
+
|
152 |
+
iface = gr.Interface(fn=greet, inputs=gr.Chat(), outputs=gr.Chat())
|
153 |
iface.launch(share=True)
|