File size: 2,828 Bytes
b580d80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os, time
import pandas as pd
from tqdm import tqdm

import chromadb
import openai
from llama_index import (
            SimpleDirectoryReader,
            StorageContext,
            Document,
            VectorStoreIndex,
            ServiceContext
)

from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms import OpenAI
from llama_index.embeddings import HuggingFaceEmbedding
from trulens_eval import Tru

import utils
from utils import get_prebuilt_trulens_recorder

openai.api_key = utils.get_openai_api_key()

def main():
    
    start_time = time.time()

    llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.0)
    fine_tuned_path = "local:./models/fine-tuned-embeddings"

    db = chromadb.PersistentClient(path="./models/chroma_db")
    chroma_collection = db.get_or_create_collection("quickstart")

    # assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    service_context = ServiceContext.from_defaults(llm=llm, embed_model=fine_tuned_path)

    print("Loading embeddings from vector store..")
    index = VectorStoreIndex.from_vector_store(
        vector_store=vector_store,
        storage_context=storage_context,
        service_context=service_context
    )
    query_engine = index.as_query_engine()
    
    mock_qna_source = pd.read_csv("./database/mock_qna_source.csv")
    mock_qna_source = mock_qna_source[ mock_qna_source["question"].notnull() ]
    print("mock_qna_source.shape", mock_qna_source.shape)

    with open("./raw_documents/eval_questions.txt", "r") as fp:
        questions_content = fp.read()
        questions_content_ls = questions_content.split("\n\n")

    eval_questions = mock_qna_source["question"].tolist() + questions_content_ls
    response = query_engine.query(eval_questions[0])
    print(str(response))

    tru = Tru(database_file="./models/trulens_eval.sqlite")
    tru_recorder = get_prebuilt_trulens_recorder(query_engine,
                                                 app_id="Direct Query Engine")
    
    print("Sending each question to llm..")
    with tru_recorder as recording:
        for question in tqdm(eval_questions):
            response = query_engine.query(question)

    records, feedback = tru.get_records_and_feedback(app_ids=[])

    os.makedirs("./results", exist_ok=True)
    records.to_csv("./results/records.csv", index=False)

    print(tru.db.engine.url.render_as_string(hide_password=False))
    
    end_time = time.time()
    time_spent_mins = (end_time - start_time) / 60
    with open("./results/time_cost.txt", "w") as fp:
        fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.")

if __name__ == "__main__":

    main()