Spaces:
Sleeping
Sleeping
arborvitae
commited on
Commit
β’
fb7d447
1
Parent(s):
a3fb2ca
Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
-
import streamlit as st
|
2 |
import os
|
3 |
import base64
|
4 |
import time
|
5 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
from transformers import pipeline
|
7 |
-
import torch
|
8 |
-
import textwrap
|
9 |
-
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, PDFMinerLoader
|
10 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
-
from langchain.embeddings import SentenceTransformerEmbeddings
|
12 |
-
from langchain.vectorstores import Chroma
|
13 |
from langchain.llms import HuggingFacePipeline
|
14 |
-
from langchain.chains import RetrievalQA
|
15 |
from constants import CHROMA_SETTINGS
|
16 |
from streamlit_chat import message
|
17 |
|
@@ -28,16 +28,6 @@ base_model = AutoModelForSeq2SeqLM.from_pretrained(
|
|
28 |
torch_dtype=torch.float32
|
29 |
)
|
30 |
|
31 |
-
|
32 |
-
# checkpoint = "LaMini-T5-738M"
|
33 |
-
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
34 |
-
# base_model = AutoModelForSeq2SeqLM.from_pretrained(
|
35 |
-
# checkpoint,
|
36 |
-
# device_map="auto",
|
37 |
-
# torch_dtype = torch.float32,
|
38 |
-
# from_tf=True
|
39 |
-
# )
|
40 |
-
|
41 |
persist_directory = "db"
|
42 |
|
43 |
@st.cache_resource
|
@@ -55,7 +45,7 @@ def data_ingestion():
|
|
55 |
#create vector store here
|
56 |
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS)
|
57 |
db.persist()
|
58 |
-
db=None
|
59 |
|
60 |
@st.cache_resource
|
61 |
def llm_pipeline():
|
@@ -100,8 +90,11 @@ def get_file_size(file):
|
|
100 |
file.seek(0)
|
101 |
return file_size
|
102 |
|
|
|
|
|
|
|
103 |
@st.cache_data
|
104 |
-
#function to display the PDF of a given file
|
105 |
def displayPDF(file):
|
106 |
# Opening file from file path
|
107 |
with open(file, "rb") as f:
|
@@ -125,57 +118,40 @@ def main():
|
|
125 |
|
126 |
st.markdown("<h2 style='text-align: center; color:red;'>Upload your Legal Document π</h2>", unsafe_allow_html=True)
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
with
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
st.session_state["past"] = ["Hey there!"]
|
160 |
-
|
161 |
-
# Search the database for a response based on user input and update session state
|
162 |
-
if user_input:
|
163 |
-
answer = process_answer({'query': user_input})
|
164 |
-
st.session_state["past"].append(user_input)
|
165 |
-
response = answer
|
166 |
-
st.session_state["generated"].append(response)
|
167 |
-
|
168 |
-
# Display conversation history using Streamlit messages
|
169 |
-
if st.session_state["generated"]:
|
170 |
-
display_conversation(st.session_state)
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
|
178 |
if __name__ == "__main__":
|
179 |
main()
|
180 |
|
181 |
|
|
|
|
1 |
+
import streamlit as st
|
2 |
import os
|
3 |
import base64
|
4 |
import time
|
5 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
from transformers import pipeline
|
7 |
+
import torch
|
8 |
+
import textwrap
|
9 |
+
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, PDFMinerLoader
|
10 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
+
from langchain.embeddings import SentenceTransformerEmbeddings
|
12 |
+
from langchain.vectorstores import Chroma
|
13 |
from langchain.llms import HuggingFacePipeline
|
14 |
+
from langchain.chains import RetrievalQA
|
15 |
from constants import CHROMA_SETTINGS
|
16 |
from streamlit_chat import message
|
17 |
|
|
|
28 |
torch_dtype=torch.float32
|
29 |
)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
persist_directory = "db"
|
32 |
|
33 |
@st.cache_resource
|
|
|
45 |
#create vector store here
|
46 |
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS)
|
47 |
db.persist()
|
48 |
+
db=None
|
49 |
|
50 |
@st.cache_resource
|
51 |
def llm_pipeline():
|
|
|
90 |
file.seek(0)
|
91 |
return file_size
|
92 |
|
93 |
+
# Specify the path to your PDF document directly
|
94 |
+
filepath = "removed_null.pdf"
|
95 |
+
|
96 |
@st.cache_data
|
97 |
+
#function to display the PDF of a given file
|
98 |
def displayPDF(file):
|
99 |
# Opening file from file path
|
100 |
with open(file, "rb") as f:
|
|
|
118 |
|
119 |
st.markdown("<h2 style='text-align: center; color:red;'>Upload your Legal Document π</h2>", unsafe_allow_html=True)
|
120 |
|
121 |
+
col1, col2= st.columns([1,2])
|
122 |
+
with col1:
|
123 |
+
st.markdown("<h4 style color:black;'>File details</h4>", unsafe_allow_html=True)
|
124 |
+
# You can display any additional file details here if needed
|
125 |
+
st.markdown("<h4 style color:black;'>File preview</h4>", unsafe_allow_html=True)
|
126 |
+
pdf_view = displayPDF(filepath)
|
127 |
+
|
128 |
+
with col2:
|
129 |
+
with st.spinner('Embeddings are in process...'):
|
130 |
+
ingested_data = data_ingestion()
|
131 |
+
st.success('Embeddings are created successfully!')
|
132 |
+
st.markdown("<h4 style color:black;'>Chat Here</h4>", unsafe_allow_html=True)
|
133 |
+
|
134 |
+
user_input = st.text_input("", key="input")
|
135 |
+
|
136 |
+
# Initialize session state for generated responses and past messages
|
137 |
+
if "generated" not in st.session_state:
|
138 |
+
st.session_state["generated"] = ["I am ready to help you"]
|
139 |
+
if "past" not in st.session_state:
|
140 |
+
st.session_state["past"] = ["Hey there!"]
|
141 |
+
|
142 |
+
# Search the database for a response based on user input and update session state
|
143 |
+
if user_input:
|
144 |
+
answer = process_answer({'query': user_input})
|
145 |
+
st.session_state["past"].append(user_input)
|
146 |
+
response = answer
|
147 |
+
st.session_state["generated"].append(response)
|
148 |
+
|
149 |
+
# Display conversation history using Streamlit messages
|
150 |
+
if st.session_state["generated"]:
|
151 |
+
display_conversation(st.session_state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
if __name__ == "__main__":
|
154 |
main()
|
155 |
|
156 |
|
157 |
+
|