File size: 2,348 Bytes
6d6b66e 0ddbfad 6d6b66e 0ddbfad 6d6b66e 0ddbfad 6d6b66e 9e5edff 6d6b66e f37e98d 6d6b66e 6565328 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import os
import gradio as gr
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.llama_cpp.llama_utils import (
messages_to_prompt,
completion_to_prompt,
)
model_url = 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf'
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
model_url=model_url,
temperature=0.1,
max_new_tokens=256,
context_window=2048,
# kwargs to pass to __call__()
generate_kwargs={},
# kwargs to pass to __init__()
# set to at least 1 to use GPU
model_kwargs={"n_gpu_layers": 1},
# transform inputs into Llama2 format
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
# Initialize embeddings and LLM
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
def initialize_index():
"""Initialize the vector store index from PDF files in the data directory"""
# Load documents from the data directory
loader = SimpleDirectoryReader(
input_dir="data",
required_exts=[".pdf"]
)
documents = loader.load_data()
# Create index
index = VectorStoreIndex.from_documents(
documents,
embed_model=embeddings,
)
# Return query engine with Llama
return index.as_query_engine(llm=llm)
# Initialize the query engine at startup
query_engine = initialize_index()
def process_query(
message: str,
history: list[tuple[str, str]],
) -> str:
"""Process a query using the RAG system"""
try:
# Get response from the query engine
response = query_engine.query(
message,
#streaming=True
)
return str(response)
except Exception as e:
return f"Error processing query: {str(e)}"
# Create the Gradio interface
demo = gr.ChatInterface(
process_query,
title="PDF Question Answering with RAG + Llama",
description="Ask questions about the content of the loaded PDF documents using Llama model",
#undo_btn="Delete Previous",
#clear_btn="Clear",
)
if __name__ == "__main__":
demo.launch(debug=True) |