import os, time import pandas as pd from tqdm import tqdm import chromadb import openai from llama_index import ( SimpleDirectoryReader, StorageContext, Document, VectorStoreIndex, ServiceContext ) from llama_index.vector_stores.chroma import ChromaVectorStore from llama_index.llms import OpenAI from llama_index.embeddings import HuggingFaceEmbedding from trulens_eval import Tru import utils from utils import get_prebuilt_trulens_recorder openai.api_key = utils.get_openai_api_key() def main(): start_time = time.time() llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0) fine_tuned_path = "local:./models/fine-tuned-embeddings" db = chromadb.PersistentClient(path="./models/chroma_db") chroma_collection = db.get_or_create_collection("quickstart") # assign chroma as the vector_store to the context vector_store = ChromaVectorStore(chroma_collection=chroma_collection) storage_context = StorageContext.from_defaults(vector_store=vector_store) service_context = ServiceContext.from_defaults(llm=llm, embed_model=fine_tuned_path) print("Loading embeddings from vector store..") index = VectorStoreIndex.from_vector_store( vector_store=vector_store, storage_context=storage_context, service_context=service_context ) query_engine = index.as_query_engine() mock_qna_source = pd.read_csv("./database/mock_qna_source.csv") mock_qna_source = mock_qna_source[ mock_qna_source["question"].notnull() ] print("mock_qna_source.shape", mock_qna_source.shape) with open("./raw_documents/eval_questions.txt", "r") as fp: questions_content = fp.read() questions_content_ls = questions_content.split("\n\n") eval_questions = mock_qna_source["question"].tolist() + questions_content_ls response = query_engine.query(eval_questions[0]) print(str(response)) tru = Tru(database_file="./models/trulens_eval.sqlite") tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="Direct Query Engine") print("Sending each question to llm..") with tru_recorder as recording: for question in tqdm(eval_questions): response = query_engine.query(question) records, feedback = tru.get_records_and_feedback(app_ids=[]) os.makedirs("./results", exist_ok=True) records.to_csv("./results/records.csv", index=False) print(tru.db.engine.url.render_as_string(hide_password=False)) end_time = time.time() time_spent_mins = (end_time - start_time) / 60 with open("./results/time_cost.txt", "w") as fp: fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.") if __name__ == "__main__": main()