Shanulhaq commited on
Commit
acb9e8b
1 Parent(s): 4414faa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -13
app.py CHANGED
@@ -6,7 +6,7 @@ import PyPDF2
6
  import pandas as pd
7
  import streamlit as st
8
 
9
- # Function to extract text from a single PDF
10
  def extract_text_from_pdf(uploaded_file):
11
  pdf_text = ""
12
  reader = PyPDF2.PdfReader(uploaded_file)
@@ -15,13 +15,13 @@ def extract_text_from_pdf(uploaded_file):
15
  pdf_text += page.extract_text()
16
  return pdf_text
17
 
18
- # Function to extract text from a single CSV
19
  def extract_text_from_csv(uploaded_file):
20
  df = pd.read_csv(uploaded_file)
21
  csv_text = df.to_string(index=False)
22
  return csv_text
23
 
24
- # Initialize the tokenizer and model on CPU first
25
  tokenizer = AutoTokenizer.from_pretrained("ricepaper/vi-gemma-2b-RAG")
26
 
27
  model = AutoModelForCausalLM.from_pretrained(
@@ -64,31 +64,36 @@ def generate_answer(context, query):
64
  return answer
65
 
66
  # Streamlit App
67
- st.title("RAG-Based PDF and CSV Question Answering Application")
68
 
69
- # Upload PDF and CSV files
70
- uploaded_files = st.file_uploader("Upload PDF or CSV files", type=["pdf", "csv"], accept_multiple_files=True)
71
 
72
  if uploaded_files:
73
  combined_text = ""
74
 
 
75
  for uploaded_file in uploaded_files:
76
  if uploaded_file.type == "application/pdf":
 
77
  pdf_text = extract_text_from_pdf(uploaded_file)
78
- combined_text += pdf_text + "\n\n"
 
 
79
  elif uploaded_file.type == "text/csv":
 
80
  csv_text = extract_text_from_csv(uploaded_file)
81
- combined_text += csv_text + "\n\n"
82
-
83
- st.write("Combined extracted text from PDFs and CSVs:")
84
- st.text_area("Document Content", combined_text, height=200)
85
 
86
  # User inputs their question
87
- query = st.text_input("Enter your question about the uploaded documents:")
88
 
89
  if st.button("Get Answer"):
90
  if query.strip() != "":
91
- # Generate answer based on extracted document text and the query
92
  answer = generate_answer(combined_text, query)
93
  st.write("Answer:", answer)
94
  else:
 
6
  import pandas as pd
7
  import streamlit as st
8
 
9
+ # Function to extract text from PDF
10
  def extract_text_from_pdf(uploaded_file):
11
  pdf_text = ""
12
  reader = PyPDF2.PdfReader(uploaded_file)
 
15
  pdf_text += page.extract_text()
16
  return pdf_text
17
 
18
+ # Function to extract text from CSV
19
  def extract_text_from_csv(uploaded_file):
20
  df = pd.read_csv(uploaded_file)
21
  csv_text = df.to_string(index=False)
22
  return csv_text
23
 
24
+ # Initialize the tokenizer and model
25
  tokenizer = AutoTokenizer.from_pretrained("ricepaper/vi-gemma-2b-RAG")
26
 
27
  model = AutoModelForCausalLM.from_pretrained(
 
64
  return answer
65
 
66
  # Streamlit App
67
+ st.title("RAG-Based Multi-File Question Answering Application")
68
 
69
+ # Upload PDF or CSV
70
+ uploaded_files = st.file_uploader("Upload PDF or CSV files", type=['pdf', 'csv'], accept_multiple_files=True)
71
 
72
  if uploaded_files:
73
  combined_text = ""
74
 
75
+ # Process each uploaded file
76
  for uploaded_file in uploaded_files:
77
  if uploaded_file.type == "application/pdf":
78
+ # Extract text from PDF
79
  pdf_text = extract_text_from_pdf(uploaded_file)
80
+ combined_text += pdf_text + "\n"
81
+ st.write(f"Extracted text from PDF: {uploaded_file.name}")
82
+
83
  elif uploaded_file.type == "text/csv":
84
+ # Extract text from CSV
85
  csv_text = extract_text_from_csv(uploaded_file)
86
+ combined_text += csv_text + "\n"
87
+ st.write(f"Extracted text from CSV: {uploaded_file.name}")
88
+
89
+ st.text_area("Combined File Content", combined_text, height=200)
90
 
91
  # User inputs their question
92
+ query = st.text_input("Enter your question about the uploaded content:")
93
 
94
  if st.button("Get Answer"):
95
  if query.strip() != "":
96
+ # Generate answer based on combined extracted text and the query
97
  answer = generate_answer(combined_text, query)
98
  st.write("Answer:", answer)
99
  else: