Spaces:

AI-RESEARCHER-2024
/

Computing-Chatbot

Running

App Files Files Community

Computing-Chatbot / app.py

AI-RESEARCHER-2024

Update app.py

6565328 verified 8 days ago

raw

history blame contribute delete

2.35 kB

	import os
	import gradio as gr
	from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from llama_index.llms.llama_cpp import LlamaCPP
	from llama_index.llms.llama_cpp.llama_utils import (
	messages_to_prompt,
	completion_to_prompt,
	)

	model_url = 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf'
	llm = LlamaCPP(
	# You can pass in the URL to a GGML model to download it automatically
	model_url=model_url,
	temperature=0.1,
	max_new_tokens=256,
	context_window=2048,
	# kwargs to pass to __call__()
	generate_kwargs={},
	# kwargs to pass to __init__()
	# set to at least 1 to use GPU
	model_kwargs={"n_gpu_layers": 1},
	# transform inputs into Llama2 format
	messages_to_prompt=messages_to_prompt,
	completion_to_prompt=completion_to_prompt,
	verbose=True,
	)
	# Initialize embeddings and LLM
	embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

	def initialize_index():
	"""Initialize the vector store index from PDF files in the data directory"""
	# Load documents from the data directory
	loader = SimpleDirectoryReader(
	input_dir="data",
	required_exts=[".pdf"]
	)
	documents = loader.load_data()

	# Create index
	index = VectorStoreIndex.from_documents(
	documents,
	embed_model=embeddings,
	)

	# Return query engine with Llama
	return index.as_query_engine(llm=llm)

	# Initialize the query engine at startup
	query_engine = initialize_index()

	def process_query(
	message: str,
	history: list[tuple[str, str]],
	) -> str:
	"""Process a query using the RAG system"""
	try:
	# Get response from the query engine
	response = query_engine.query(
	message,
	#streaming=True
	)
	return str(response)
	except Exception as e:
	return f"Error processing query: {str(e)}"

	# Create the Gradio interface
	demo = gr.ChatInterface(
	process_query,
	title="PDF Question Answering with RAG + Llama",
	description="Ask questions about the content of the loaded PDF documents using Llama model",
	#undo_btn="Delete Previous",
	#clear_btn="Clear",
	)

	if __name__ == "__main__":
	demo.launch(debug=True)