ashok2216 commited on
Commit
e5d1312
1 Parent(s): 52da32c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -14
app.py CHANGED
@@ -3,16 +3,15 @@ from chromadb.utils import embedding_functions
3
  from chromadb.config import Settings
4
  from transformers import pipeline
5
  import streamlit as st
6
- import fitz # PyMuPDF for PDF parsing
7
  from PIL import Image
8
 
9
- # Configure ChromaDB with persistent SQLite database
10
  config = Settings(
11
  persist_directory="./chromadb_data",
12
  chroma_db_impl="sqlite",
13
  )
14
 
15
- # Initialize persistent client with SQLite
16
  def setup_chromadb():
17
  client = chromadb.PersistentClient(path="./chromadb_data")
18
  collection = client.get_or_create_collection(
@@ -23,9 +22,8 @@ def setup_chromadb():
23
  )
24
  return client, collection
25
 
26
- # Clear the collection
27
  def clear_collection(client, collection_name):
28
- # Delete the collection and recreate it
29
  client.delete_collection(name=collection_name)
30
  return client.get_or_create_collection(
31
  name=collection_name,
@@ -42,9 +40,9 @@ def extract_text_from_pdf(uploaded_file):
42
  return text
43
 
44
  def add_pdf_text_to_db(collection, pdf_text):
45
- sentences = pdf_text.split("\n") # Split text into lines for granularity
46
  for idx, sentence in enumerate(sentences):
47
- if sentence.strip(): # Avoid empty lines
48
  collection.add(
49
  ids=[f"pdf_text_{idx}"],
50
  documents=[sentence],
@@ -61,27 +59,27 @@ def query_pdf_data(collection, query, retriever_model):
61
  answer = retriever_model(f"Context: {context}\nQuestion: {query}")
62
  return answer, results["metadatas"]
63
 
64
- # Streamlit Interface
65
  def main():
66
  image = Image.open('LOGO.PNG')
67
  st.image(
68
  image, width=250)
69
- st.title("PDF Chatbot with Retrieval-Augmented Generation")
 
 
70
  st.write("Upload a PDF, and ask questions about its content!")
71
 
72
- # Initialize components
73
  client, collection = setup_chromadb()
74
- retriever_model = pipeline("text2text-generation", model="google/flan-t5-small") # Free LLM
75
 
76
  # File upload
77
  uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
78
  if uploaded_file:
79
  try:
80
- # Clear existing data
81
  collection = clear_collection(client, "pdf_data")
82
  st.info("Existing data cleared from the database.")
83
 
84
- # Extract and add new data
85
  pdf_text = extract_text_from_pdf(uploaded_file)
86
  st.success("Text extracted successfully!")
87
  st.text_area("Extracted Text:", pdf_text, height=300)
@@ -108,4 +106,3 @@ def main():
108
  if __name__ == "__main__":
109
  main()
110
 
111
-
 
3
  from chromadb.config import Settings
4
  from transformers import pipeline
5
  import streamlit as st
6
+ import fitz
7
  from PIL import Image
8
 
9
+
10
  config = Settings(
11
  persist_directory="./chromadb_data",
12
  chroma_db_impl="sqlite",
13
  )
14
 
 
15
  def setup_chromadb():
16
  client = chromadb.PersistentClient(path="./chromadb_data")
17
  collection = client.get_or_create_collection(
 
22
  )
23
  return client, collection
24
 
25
+
26
  def clear_collection(client, collection_name):
 
27
  client.delete_collection(name=collection_name)
28
  return client.get_or_create_collection(
29
  name=collection_name,
 
40
  return text
41
 
42
  def add_pdf_text_to_db(collection, pdf_text):
43
+ sentences = pdf_text.split("\n")
44
  for idx, sentence in enumerate(sentences):
45
+ if sentence.strip():
46
  collection.add(
47
  ids=[f"pdf_text_{idx}"],
48
  documents=[sentence],
 
59
  answer = retriever_model(f"Context: {context}\nQuestion: {query}")
60
  return answer, results["metadatas"]
61
 
62
+
63
  def main():
64
  image = Image.open('LOGO.PNG')
65
  st.image(
66
  image, width=250)
67
+ st.title("PDF Chatbot with RAG")
68
+ st.markdown("Google Flan-T5-Small + ChromaDB")
69
+ st.header('', divider='rainbow')
70
  st.write("Upload a PDF, and ask questions about its content!")
71
 
72
+
73
  client, collection = setup_chromadb()
74
+ retriever_model = pipeline("text2text-generation", model="google/flan-t5-small")
75
 
76
  # File upload
77
  uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
78
  if uploaded_file:
79
  try:
 
80
  collection = clear_collection(client, "pdf_data")
81
  st.info("Existing data cleared from the database.")
82
 
 
83
  pdf_text = extract_text_from_pdf(uploaded_file)
84
  st.success("Text extracted successfully!")
85
  st.text_area("Extracted Text:", pdf_text, height=300)
 
106
  if __name__ == "__main__":
107
  main()
108