# project/test.py import os import unittest from timeit import default_timer as timer from langchain.callbacks.base import BaseCallbackHandler from langchain.schema import HumanMessage from app_modules.llm_loader import LLMLoader from app_modules.utils import * user_question = "What's the capital city of Malaysia?" n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4") hf_embeddings_device_type, hf_pipeline_device_type = get_device_types() print(f"hf_embeddings_device_type: {hf_embeddings_device_type}") print(f"hf_pipeline_device_type: {hf_pipeline_device_type}") class MyCustomHandler(BaseCallbackHandler): def __init__(self): self.reset() def reset(self): self.texts = [] def get_standalone_question(self) -> str: return self.texts[0].strip() if len(self.texts) > 0 else None def on_llm_end(self, response, **kwargs) -> None: """Run when chain ends running.""" print("\non_llm_end - response:") print(response) self.texts.append(response.generations[0][0].text) class TestLLMLoader(unittest.TestCase): def run_test_case(self, llm_model_type, query): llm_loader = LLMLoader(llm_model_type) start = timer() llm_loader.init( n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type ) end = timer() print(f"Model loaded in {end - start:.3f}s") result = llm_loader.llm( [HumanMessage(content=query)] if llm_model_type == "openai" else query ) end2 = timer() print(f"Inference completed in {end2 - end:.3f}s") print(result) def test_openai(self): self.run_test_case("openai", user_question) def test_llamacpp(self): self.run_test_case("llamacpp", user_question) def test_gpt4all_j(self): self.run_test_case("gpt4all-j", user_question) def test_huggingface(self): self.run_test_case("huggingface", user_question) if __name__ == "__main__": unittest.main()