|
import os |
|
import gradio as gr |
|
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from llama_index.llms.llama_cpp import LlamaCPP |
|
from llama_index.llms.llama_cpp.llama_utils import ( |
|
messages_to_prompt, |
|
completion_to_prompt, |
|
) |
|
|
|
model_url = 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf' |
|
llm = LlamaCPP( |
|
|
|
model_url=model_url, |
|
temperature=0.1, |
|
max_new_tokens=256, |
|
context_window=2048, |
|
|
|
generate_kwargs={}, |
|
|
|
|
|
model_kwargs={"n_gpu_layers": 1}, |
|
|
|
messages_to_prompt=messages_to_prompt, |
|
completion_to_prompt=completion_to_prompt, |
|
verbose=True, |
|
) |
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") |
|
|
|
def initialize_index(): |
|
"""Initialize the vector store index from PDF files in the data directory""" |
|
|
|
loader = SimpleDirectoryReader( |
|
input_dir="data", |
|
required_exts=[".pdf"] |
|
) |
|
documents = loader.load_data() |
|
|
|
|
|
index = VectorStoreIndex.from_documents( |
|
documents, |
|
embed_model=embeddings, |
|
) |
|
|
|
|
|
return index.as_query_engine(llm=llm) |
|
|
|
|
|
query_engine = initialize_index() |
|
|
|
def process_query( |
|
message: str, |
|
history: list[tuple[str, str]], |
|
) -> str: |
|
"""Process a query using the RAG system""" |
|
try: |
|
|
|
response = query_engine.query( |
|
message, |
|
|
|
) |
|
return str(response) |
|
except Exception as e: |
|
return f"Error processing query: {str(e)}" |
|
|
|
|
|
demo = gr.ChatInterface( |
|
process_query, |
|
title="PDF Question Answering with RAG + Llama", |
|
description="Ask questions about the content of the loaded PDF documents using Llama model", |
|
|
|
|
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True) |