Spaces:
Running
Running
File size: 3,261 Bytes
eaf0e00 952eb35 5aee298 eaf0e00 8c5d334 eaf0e00 8c5d334 952eb35 8c5d334 952eb35 8c5d334 952eb35 8c5d334 952eb35 8c5d334 952eb35 8c5d334 952eb35 8c5d334 952eb35 8c5d334 952eb35 8c5d334 5aee298 8c5d334 5aee298 8c5d334 5aee298 8c5d334 5aee298 8c5d334 eaf0e00 8c5d334 eaf0e00 8c5d334 eaf0e00 8c5d334 eaf0e00 8c5d334 eaf0e00 8c5d334 eaf0e00 8c5d334 37c7e44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import os
import streamlit as st
from pathlib import Path
from io import StringIO
#for textfiles
from langchain.document_loaders import TextLoader
#text splitter
from langchain.text_splitter import CharacterTextSplitter
#for using HugginFace models & embeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import HuggingFaceHub
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
from langchain.vectorstores import FAISS
#facebook vectorization
from langchain.chains.question_answering import load_qa_chain
#load pdf
#vectorize db index with chromadb
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredPDFLoader
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["hf_api_key"]
def init():
global embeddings, llm, llm2, chain
# Embeddings
embeddings = HuggingFaceEmbeddings()
llm=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
chain = load_qa_chain(llm, chain_type="stuff")
def pdf_file(txtFileObj):
st.subheader('Uploaded PDF File:')
st.write(txtFileObj.name)
with open(txtFileObj.name, "wb") as f:
f.write(txtFileObj.getbuffer())
loaders = [UnstructuredPDFLoader(txtFileObj.name)]
index = VectorstoreIndexCreator(
embedding=embeddings,
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loaders)
chain = RetrievalQA.from_chain_type(llm=llm,
chain_type="stuff",
retriever=index.vectorstore.as_retriever(),
input_key="question")
st.subheader('Enter query')
query = st.text_input('Ask anything about the Document you uploaded')
if (query):
answer = chain.run(question=query)
st.subheader('Answer')
st.write(answer)
def text_file(txtFileObj):
st.subheader('Uploaded Text File:')
st.write(txtFileObj.name)
#stringio = StringIO(txtFileObj.getvalue().decode("utf-8"))
with open(txtFileObj.name, "wb") as f:
f.write(txtFileObj.getbuffer())
loader = TextLoader(txtFileObj.name)
documents = loader.load()
# Text Splitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = text_splitter.split_documents(documents)
db = FAISS.from_documents(docs, embeddings)
st.subheader('Enter query')
query = st.text_input('Ask anything about the Document you uploaded')
if (query):
docs = db.similarity_search(query)
answer = chain.run(input_documents=docs, question=query)
st.subheader('Answer')
st.write(answer)
st.title('Document Q&A - Ask anything in your Document')
st.subheader('This application can be used to upload text(.txt) and PDF(.pdf) files and ask questions about their contents.')
init()
st.sidebar.subheader('Upload document')
uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf'])
if uploaded_file and Path(uploaded_file.name).suffix == '.txt':
st.sidebar.info(Path(uploaded_file.name))
text_file(uploaded_file)
if uploaded_file and Path(uploaded_file.name).suffix == '.pdf':
pdf_file(uploaded_file)
with st.sidebar.expander('File'):
if (uploaded_file):
st.info(uploaded_file.name)
if os.path.exists('/content/'):
st.info(os.listdir('/content/')) |