Spaces:

lingyit1108
/

ragtest-sakimilo

Running

App Files Files Community

ragtest-sakimilo / evaluate_model.py

lingyit1108

to create RAGAs result with triad of metrics

b580d80 7 months ago

raw

history blame contribute delete

No virus

2.83 kB

	import os, time
	import pandas as pd
	from tqdm import tqdm

	import chromadb
	import openai
	from llama_index import (
	SimpleDirectoryReader,
	StorageContext,
	Document,
	VectorStoreIndex,
	ServiceContext
	)

	from llama_index.vector_stores.chroma import ChromaVectorStore
	from llama_index.llms import OpenAI
	from llama_index.embeddings import HuggingFaceEmbedding
	from trulens_eval import Tru

	import utils
	from utils import get_prebuilt_trulens_recorder

	openai.api_key = utils.get_openai_api_key()

	def main():

	start_time = time.time()

	llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
	fine_tuned_path = "local:./models/fine-tuned-embeddings"

	db = chromadb.PersistentClient(path="./models/chroma_db")
	chroma_collection = db.get_or_create_collection("quickstart")

	# assign chroma as the vector_store to the context
	vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
	storage_context = StorageContext.from_defaults(vector_store=vector_store)
	service_context = ServiceContext.from_defaults(llm=llm, embed_model=fine_tuned_path)

	print("Loading embeddings from vector store..")
	index = VectorStoreIndex.from_vector_store(
	vector_store=vector_store,
	storage_context=storage_context,
	service_context=service_context
	)
	query_engine = index.as_query_engine()

	mock_qna_source = pd.read_csv("./database/mock_qna_source.csv")
	mock_qna_source = mock_qna_source[ mock_qna_source["question"].notnull() ]
	print("mock_qna_source.shape", mock_qna_source.shape)

	with open("./raw_documents/eval_questions.txt", "r") as fp:
	questions_content = fp.read()
	questions_content_ls = questions_content.split("\n\n")

	eval_questions = mock_qna_source["question"].tolist() + questions_content_ls
	response = query_engine.query(eval_questions[0])
	print(str(response))

	tru = Tru(database_file="./models/trulens_eval.sqlite")
	tru_recorder = get_prebuilt_trulens_recorder(query_engine,
	app_id="Direct Query Engine")

	print("Sending each question to llm..")
	with tru_recorder as recording:
	for question in tqdm(eval_questions):
	response = query_engine.query(question)

	records, feedback = tru.get_records_and_feedback(app_ids=[])

	os.makedirs("./results", exist_ok=True)
	records.to_csv("./results/records.csv", index=False)

	print(tru.db.engine.url.render_as_string(hide_password=False))

	end_time = time.time()
	time_spent_mins = (end_time - start_time) / 60
	with open("./results/time_cost.txt", "w") as fp:
	fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.")

	if __name__ == "__main__":

	main()