pdf-chatbot-opensource-llm

Runtime error

App Files Files Community

Ubai commited on Feb 26

Commit

af00b58

•

1 Parent(s): 7da7eb1

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -116

app.py CHANGED Viewed

@@ -1,136 +1,93 @@
 import gradio as gr
 import os
 from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
 from langchain.chains import ConversationalRetrievalChain
 from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.llms import HuggingFaceHub
 from pathlib import Path
 import chromadb
-# List of available LLM models
-list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1",
-            "google/gemma-7b-it", "google/gemma-2b-it",
-            "HuggingFaceH4/zephyr-7b-beta", "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2",
-            "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct",
-            "google/flan-t5-xxl"
-]
-list_llm_simple = [os.path.basename(llm) for llm in list_llm]
-# Load PDF document and create doc splits
-def load_doc(list_file_path, chunk_size, chunk_overlap):
-    loaders = [PyPDFLoader(x) for x in list_file_path]
-    pages = []
-    for loader in loaders:
-        pages.extend(loader.load())
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
-    doc_splits = text_splitter.split_documents(pages)
-    return doc_splits
-# Create vector database
-def create_db(splits, collection_name):
-    embedding = HuggingFaceEmbeddings()
-    new_client = chromadb.EphemeralClient()
-    vectordb = Chroma.from_documents(
-        documents=splits,
-        embedding=embedding,
-        client=new_client,
-        collection_name=collection_name
-    )
-    return vectordb
-# Initialize langchain LLM chain
-def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
-    if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
-        model_kwargs = {"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
-    elif llm_model == "microsoft/phi-2":
-        raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
-    elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
-        model_kwargs = {"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
-    else:
-        model_kwargs = {"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
-    llm = HuggingFaceHub(
-        repo_id=llm_model,
-        model_kwargs=model_kwargs
-    )
-    memory = ConversationBufferMemory(
-        memory_key="chat_history",
-        output_key='answer',
-        return_messages=True
-    )
-    retriever = vector_db.as_retriever()
-    qa_chain = ConversationalRetrievalChain.from_llm(
-        llm,
-        retriever=retriever,
-        chain_type="stuff",
-        memory=memory,
-        return_source_documents=True,
-        verbose=False
-    )
-    progress(0.9, desc="Done!")
-    return qa_chain
-def initialize_demo(list_file_obj, chunk_size, chunk_overlap, db_progress):
-    list_file_path = [file.name for file in list_file_obj if file is not None]
-    collection_name = Path(list_file_path[0]).stem.replace(" ", "-")[:50]
-    doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
-    vector_db = create_db(doc_splits, collection_name)
-    qa_chain = initialize_llmchain(
-        list_llm[0],  # Using Mistral-7B-Instruct-v0.2 as the LLM model
-        0.7,  # Temperature
-        1024,  # Max Tokens
-        3,  # Top K
-        vector_db,
-        db_progress
-    )
-    return vector_db, collection_name, qa_chain, "Complete!"
-def upload_file(file_obj):
-    list_file_path = []
-    for file in file_obj:
-        if file is not None:
-            file_path = file.name
-            list_file_path.append(file_path)
-    return list_file_path
 def demo():
     with gr.Blocks(theme="base") as demo:
-        vector_db = gr.State()
         collection_name = gr.State()
-        qa_chain = gr.State()
-        with gr.Tab("Step 1 - Document pre-processing"):
-            document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
-            slider_chunk_size = gr.Slider(minimum=100, maximum=1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
-            slider_chunk_overlap = gr.Slider(minimum=10, maximum=200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
-            db_progress = gr.Textbox(label="Vector database initialization", value="None")
-            db_btn = gr.Button("Generate vector database...")
-        with gr.Tab("Step 2 - QA chain initialization"):
-            llm_progress = gr.Textbox(value="None", label="QA chain initialization")
-            qachain_btn = gr.Button("Initialize question-answering chain...")
-        with gr.Tab("Step 3 - Conversation with chatbot"):
             chatbot = gr.Chatbot(height=300)
-            doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
-            source1_page = gr.Number(label="Page", scale=1)
-            doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
-            source2_page = gr.Number(label="Page", scale=1)
-            doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
-            source3_page = gr.Number(label="Page", scale=1)
             msg = gr.Textbox(placeholder="Type message", container=True)
             submit_btn = gr.Button("Submit")
             clear_btn = gr.ClearButton([msg, chatbot])
-        document.upload(initialize_demo, inputs=[document, slider_chunk_size, slider_chunk_overlap, db_progress], outputs=[vector_db, collection_name, qa_chain, db_progress])
-        qachain_btn.click(initialize_llmchain, inputs=[qa_chain, llm_progress], outputs=[qa_chain, llm_progress])
-        submit_btn.click(lambda: None, inputs=None, outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page])

 import gradio as gr
 import os
 from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
 from langchain.chains import ConversationalRetrievalChain
 from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.llms import HuggingFacePipeline, HuggingFaceHub
+from langchain.chains import ConversationChain
+from langchain.memory import ConversationBufferMemory
 from pathlib import Path
 import chromadb
+from transformers import AutoTokenizer
+import transformers
+import torch
+import tqdm
+import accelerate
+# Default LLM model
+chosen_llm_model = "mistralai/Mistral-7B-Instruct-v0.2"
+# Default chunk size and overlap
+chunk_size = 600
+chunk_overlap = 40
+# Default model configuration
+llm_temperature = 0.7
+max_tokens = 1024
+top_k = 3
+# Initialize vector database in background
+accelerated(initialize_database)()  # Run in background with Accelerate
+# Define functions (no changes needed here)
+# ... (your existing functions here)
 def demo():
     with gr.Blocks(theme="base") as demo:
+        qa_chain = gr.State()  # Store the initialized QA chain
         collection_name = gr.State()
+        gr.Markdown(
+            """
+            <center><h2>PDF-based chatbot (powered by LangChain and open-source LLMs)</center></h2>
+            <h3>Ask any questions about your PDF documents, along with follow-ups</h3>
+            <b>Note:</b> This AI assistant performs retrieval-augmented generation from your PDF documents. \
+            When generating answers, it takes past questions into account (via conversational memory), and includes document references for clarity purposes.</i>
+            <br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate an output.<br>
+            """
+        )
+        with gr.Row():
+            document = gr.Files(
+                height=100,
+                file_count="multiple",
+                file_types=["pdf"],
+                interactive=True,
+                label="Upload your PDF documents (single or multiple)",
+            )
+        with gr.Row():
             chatbot = gr.Chatbot(height=300)
+        with gr.Accordion("Advanced - Document references", open=False):
+            with gr.Row():
+                doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
+                source1_page = gr.Number(label="Page", scale=1)
+            with gr.Row():
+                doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
+                source2_page = gr.Number(label="Page", scale=1)
+            with gr.Row():
+                doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
+                source3_page = gr.Number(label="Page", scale=1)
+        with gr.Row():
             msg = gr.Textbox(placeholder="Type message", container=True)
+        with gr.Row():
             submit_btn = gr.Button("Submit")
             clear_btn = gr.ClearButton([msg, chatbot])
+        # Initialize default QA chain when documents are uploaded
+        document.uploaded(initialize_LLM, inputs=[chosen_llm_model])
+        # Chatbot events
+        msg.submit(conversation, inputs=[qa_chain, msg, chatbot])
+        submit_btn.click(conversation, inputs=[qa_chain, msg, chatbot])
+        clear_btn.click(lambda: [None, "", 0, "", 0, "", 0], inputs=None, outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page])
+    demo.launch(debug=True)
+if __name__ == "__main__":
+    demo()