# %% from dotenv import load_dotenv load_dotenv() """ # %% import pandas as pd df = pd.read_parquet( "../raw_data/dale_carnegie/how_to_win_friends_and_influence_people.parquet" ) df.head() # %% from langchain.schema import Document documents = [] for index, row in df.iterrows(): doc = Document(page_content=row["text"]) documents.append(doc) documents # %% from autorag.utils import cast_corpus_dataset from autorag.data.corpus import langchain_documents_to_parquet corpus_df = langchain_documents_to_parquet(documents) corpus_df = cast_corpus_dataset(corpus_df) corpus_df.to_parquet("./data/corpus.parquet") # %% import nest_asyncio nest_asyncio.apply() import os from llama_index.llms.openai import OpenAI from autorag.data.qacreation import generate_qa_llama_index, make_single_content_qa llm = OpenAI( api_base=os.getenv("OPENAI_BASE_URL"), model="gpt-4o", ) qa_df = make_single_content_qa( corpus_df, content_size=49, qa_creation_func=generate_qa_llama_index, llm=llm, question_num_per_content=1, ) qa_df.to_parquet("./data/qa.parquet") """ # %% import nest_asyncio nest_asyncio.apply() import autorag as ag from autorag.evaluator import Evaluator from llama_index.embeddings.huggingface import HuggingFaceEmbedding ag.embedding_models["huggingface_baai_llm_embedder"] = HuggingFaceEmbedding( "BAAI/llm-embedder" ) ag.embedding_models["huggingface_baai_bge_large_en"] = HuggingFaceEmbedding( "BAAI/bge-large-en-v1.5" ) ag.embedding_models["huggingface_baai_bge_base_en"] = HuggingFaceEmbedding( "BAAI/bge-base-en-v1.5" ) ag.embedding_models["huggingface_baai_bge_small_en"] = HuggingFaceEmbedding( "BAAI/bge-small-en-v1.5" ) ag.embedding_models["huggingface_baai_bge_m3"] = HuggingFaceEmbedding("BAAI/bge-m3") evaluator = Evaluator( qa_data_path="./data/qa.parquet", corpus_data_path="./data/corpus.parquet", project_dir="./benchmark", ) evaluator.start_trial("./config/config_small.yaml") # %%