import utils import os import openai from llama_index import SimpleDirectoryReader from llama_index import Document from llama_index import VectorStoreIndex from llama_index import ServiceContext from llama_index.llms import OpenAI from llama_index.embeddings import HuggingFaceEmbedding from trulens_eval import Tru from utils import get_prebuilt_trulens_recorder openai.api_key = utils.get_openai_api_key() def main(): if not os.path.exists("./default.sqlite"): documents = SimpleDirectoryReader( input_files=["./raw_documents/HI_knowledge_base.pdf"] ).load_data() document = Document(text="\n\n".join([doc.text for doc in documents])) ### gpt-4-1106-preview ### gpt-3.5-turbo-1106 / gpt-3.5-turbo llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.1) embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model) index = VectorStoreIndex.from_documents([document], service_context=service_context) query_engine = index.as_query_engine() separator = "\n\n" eval_questions = [] with open('./raw_documents/eval_questions.txt', 'r') as file: content = file.read() for question in content.split(separator): print(question) print(separator) eval_questions.append(question.strip()) response = query_engine.query(eval_questions[0]) print(str(response)) tru = Tru() # tru.reset_database() tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="Direct Query Engine") with tru_recorder as recording: for question in eval_questions: response = query_engine.query(question) records, feedback = tru.get_records_and_feedback(app_ids=[]) os.makedirs("./results", exist_ok=True) records.to_csv("./results/records.csv", index=False) print(tru.db.engine.url.render_as_string(hide_password=False)) # tru.run_dashboard() if __name__ == "__main__": main()