Spaces:

Shanulhaq
/

Doctore-AI

Sleeping

App Files Files Community

Shanulhaq commited on Sep 1

Commit

acb9e8b

•

1 Parent(s): 4414faa

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -13

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import PyPDF2
 import pandas as pd
 import streamlit as st
-# Function to extract text from a single PDF
 def extract_text_from_pdf(uploaded_file):
     pdf_text = ""
     reader = PyPDF2.PdfReader(uploaded_file)
@@ -15,13 +15,13 @@ def extract_text_from_pdf(uploaded_file):
         pdf_text += page.extract_text()
     return pdf_text
-# Function to extract text from a single CSV
 def extract_text_from_csv(uploaded_file):
     df = pd.read_csv(uploaded_file)
     csv_text = df.to_string(index=False)
     return csv_text
-# Initialize the tokenizer and model on CPU first
 tokenizer = AutoTokenizer.from_pretrained("ricepaper/vi-gemma-2b-RAG")
 model = AutoModelForCausalLM.from_pretrained(
@@ -64,31 +64,36 @@ def generate_answer(context, query):
     return answer
 # Streamlit App
-st.title("RAG-Based PDF and CSV Question Answering Application")
-# Upload PDF and CSV files
-uploaded_files = st.file_uploader("Upload PDF or CSV files", type=["pdf", "csv"], accept_multiple_files=True)
 if uploaded_files:
     combined_text = ""
     for uploaded_file in uploaded_files:
         if uploaded_file.type == "application/pdf":
             pdf_text = extract_text_from_pdf(uploaded_file)
-            combined_text += pdf_text + "\n\n"
         elif uploaded_file.type == "text/csv":
             csv_text = extract_text_from_csv(uploaded_file)
-            combined_text += csv_text + "\n\n"
-    st.write("Combined extracted text from PDFs and CSVs:")
-    st.text_area("Document Content", combined_text, height=200)
     # User inputs their question
-    query = st.text_input("Enter your question about the uploaded documents:")
     if st.button("Get Answer"):
         if query.strip() != "":
-            # Generate answer based on extracted document text and the query
             answer = generate_answer(combined_text, query)
             st.write("Answer:", answer)
         else:

 import pandas as pd
 import streamlit as st
+# Function to extract text from PDF
 def extract_text_from_pdf(uploaded_file):
     pdf_text = ""
     reader = PyPDF2.PdfReader(uploaded_file)
         pdf_text += page.extract_text()
     return pdf_text
+# Function to extract text from CSV
 def extract_text_from_csv(uploaded_file):
     df = pd.read_csv(uploaded_file)
     csv_text = df.to_string(index=False)
     return csv_text
+# Initialize the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("ricepaper/vi-gemma-2b-RAG")
 model = AutoModelForCausalLM.from_pretrained(
     return answer
 # Streamlit App
+st.title("RAG-Based Multi-File Question Answering Application")
+# Upload PDF or CSV
+uploaded_files = st.file_uploader("Upload PDF or CSV files", type=['pdf', 'csv'], accept_multiple_files=True)
 if uploaded_files:
     combined_text = ""
+    # Process each uploaded file
     for uploaded_file in uploaded_files:
         if uploaded_file.type == "application/pdf":
+            # Extract text from PDF
             pdf_text = extract_text_from_pdf(uploaded_file)
+            combined_text += pdf_text + "\n"
+            st.write(f"Extracted text from PDF: {uploaded_file.name}")
         elif uploaded_file.type == "text/csv":
+            # Extract text from CSV
             csv_text = extract_text_from_csv(uploaded_file)
+            combined_text += csv_text + "\n"
+            st.write(f"Extracted text from CSV: {uploaded_file.name}")
+    st.text_area("Combined File Content", combined_text, height=200)
     # User inputs their question
+    query = st.text_input("Enter your question about the uploaded content:")
     if st.button("Get Answer"):
         if query.strip() != "":
+            # Generate answer based on combined extracted text and the query
             answer = generate_answer(combined_text, query)
             st.write("Answer:", answer)
         else: