Makefile CHANGED
@@ -2,12 +2,10 @@
2
 
3
 
4
  style:
5
- python -m black --line-length 119 .
6
- python -m isort .
7
  ruff check --fix .
 
8
 
9
 
10
  quality:
11
- python -m black --check --line-length 119 .
12
- python -m isort --check-only .
13
  ruff check .
 
 
2
 
3
 
4
  style:
 
 
5
  ruff check --fix .
6
+ ruff format .
7
 
8
 
9
  quality:
 
 
10
  ruff check .
11
+ ruff format --check .
app.py CHANGED
@@ -1,32 +1,31 @@
1
  import logging
2
- from apscheduler.schedulers.background import BackgroundScheduler
3
-
4
- from src.logging import configure_root_logger
5
-
6
- logging.getLogger("httpx").setLevel(logging.WARNING)
7
- logging.getLogger("numexpr").setLevel(logging.WARNING)
8
- logging.getLogger("absl").setLevel(logging.WARNING)
9
- configure_root_logger()
10
-
11
  from functools import partial
12
 
13
  import gradio as gr
 
 
14
  # Choose ligtheval or harness backend
 
15
  from main_backend_lighteval import run_auto_eval
16
- #from main_backend_harness import run_auto_eval
17
 
18
- from src.display.log_visualizer import log_file_to_html_string
19
  from src.display.css_html_js import dark_mode_gradio_js
20
- from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
21
- from src.logging import setup_logger, log_file
 
 
 
 
 
 
 
22
 
23
  logging.basicConfig(level=logging.INFO)
24
  logger = setup_logger(__name__)
25
 
26
 
27
- intro_md = f"""
28
  # Intro
29
- This is a visual for the auto evaluator.
30
  """
31
 
32
  links_md = f"""
@@ -39,6 +38,7 @@ links_md = f"""
39
  | Results Repo | [{RESULTS_REPO}](https://huggingface.co/datasets/{RESULTS_REPO}) |
40
  """
41
 
 
42
  def auto_eval():
43
  logger.info("Triggering Auto Eval")
44
  run_auto_eval()
@@ -52,20 +52,18 @@ with gr.Blocks(js=dark_mode_gradio_js) as demo:
52
  output_html = gr.HTML(partial(log_file_to_html_string, reverse=reverse_order_checkbox), every=1)
53
  with gr.Row():
54
  download_button = gr.DownloadButton("Download Log File", value=log_file)
55
- with gr.Accordion('Log View Configuration', open=False):
56
  reverse_order_checkbox.render()
57
  # Add a button that when pressed, triggers run_auto_eval
58
  button = gr.Button("Manually Run Evaluation")
59
  gr.Markdown(links_md)
60
 
61
- #dummy = gr.Markdown(auto_eval, every=REFRESH_RATE, visible=False)
62
 
63
  button.click(fn=auto_eval, inputs=[], outputs=[])
64
 
65
- if __name__ == '__main__':
66
  scheduler = BackgroundScheduler()
67
  scheduler.add_job(auto_eval, "interval", seconds=REFRESH_RATE)
68
  scheduler.start()
69
- demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
70
- show_error=True,
71
- server_port=7860)
 
1
  import logging
 
 
 
 
 
 
 
 
 
2
  from functools import partial
3
 
4
  import gradio as gr
5
+ from apscheduler.schedulers.background import BackgroundScheduler
6
+
7
  # Choose ligtheval or harness backend
8
+ # from main_backend_harness import run_auto_eval
9
  from main_backend_lighteval import run_auto_eval
 
10
 
 
11
  from src.display.css_html_js import dark_mode_gradio_js
12
+ from src.display.log_visualizer import log_file_to_html_string
13
+ from src.envs import QUEUE_REPO, REFRESH_RATE, REPO_ID, RESULTS_REPO
14
+ from src.logging import configure_root_logger, log_file, setup_logger
15
+
16
+
17
+ logging.getLogger("httpx").setLevel(logging.WARNING)
18
+ logging.getLogger("numexpr").setLevel(logging.WARNING)
19
+ logging.getLogger("absl").setLevel(logging.WARNING)
20
+ configure_root_logger()
21
 
22
  logging.basicConfig(level=logging.INFO)
23
  logger = setup_logger(__name__)
24
 
25
 
26
+ intro_md = """
27
  # Intro
28
+ This is a visual for the auto evaluator.
29
  """
30
 
31
  links_md = f"""
 
38
  | Results Repo | [{RESULTS_REPO}](https://huggingface.co/datasets/{RESULTS_REPO}) |
39
  """
40
 
41
+
42
  def auto_eval():
43
  logger.info("Triggering Auto Eval")
44
  run_auto_eval()
 
52
  output_html = gr.HTML(partial(log_file_to_html_string, reverse=reverse_order_checkbox), every=1)
53
  with gr.Row():
54
  download_button = gr.DownloadButton("Download Log File", value=log_file)
55
+ with gr.Accordion("Log View Configuration", open=False):
56
  reverse_order_checkbox.render()
57
  # Add a button that when pressed, triggers run_auto_eval
58
  button = gr.Button("Manually Run Evaluation")
59
  gr.Markdown(links_md)
60
 
61
+ # dummy = gr.Markdown(auto_eval, every=REFRESH_RATE, visible=False)
62
 
63
  button.click(fn=auto_eval, inputs=[], outputs=[])
64
 
65
+ if __name__ == "__main__":
66
  scheduler = BackgroundScheduler()
67
  scheduler.add_job(auto_eval, "interval", seconds=REFRESH_RATE)
68
  scheduler.start()
69
+ demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
 
 
custom_tasks.py CHANGED
@@ -6,6 +6,7 @@ This file generally create just a TASKS_TABLE and TASKS_GROUPS which are then im
6
 
7
  Author:
8
  """
 
9
  from lighteval.tasks.lighteval_task import LightevalTaskConfig
10
  from lighteval.tasks.requests import Doc
11
  from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES
 
6
 
7
  Author:
8
  """
9
+
10
  from lighteval.tasks.lighteval_task import LightevalTaskConfig
11
  from lighteval.tasks.requests import Doc
12
  from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES
main_backend_harness.py CHANGED
@@ -3,25 +3,56 @@ import pprint
3
 
4
  from huggingface_hub import snapshot_download
5
 
6
- logging.getLogger("openai").setLevel(logging.WARNING)
7
-
 
 
 
 
 
 
 
8
  from src.backend.run_eval_suite_harness import run_evaluation
9
- from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request, PENDING_STATUS, RUNNING_STATUS, FINISHED_STATUS, FAILED_STATUS
10
  from src.backend.sort_queue import sort_models_by_priority
11
-
12
- from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT, TOKEN
13
- from src.envs import TASKS_HARNESS, NUM_FEWSHOT
 
 
 
 
 
 
 
 
 
14
  from src.logging import setup_logger
15
 
16
 
 
17
 
18
  # logging.basicConfig(level=logging.ERROR)
19
  logger = setup_logger(__name__)
20
  pp = pprint.PrettyPrinter(width=80)
21
 
22
 
23
- snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
24
- snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def run_auto_eval():
27
  current_pending_status = [PENDING_STATUS]
@@ -36,11 +67,13 @@ def run_auto_eval():
36
  hf_repo=QUEUE_REPO,
37
  local_dir=EVAL_REQUESTS_PATH_BACKEND,
38
  hf_repo_results=RESULTS_REPO,
39
- local_dir_results=EVAL_RESULTS_PATH_BACKEND
40
  )
41
 
42
  # Get all eval request that are PENDING, if you want to run other evals, change this parameter
43
- eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
 
 
44
  # Sort the evals by priority (first submitted first run)
45
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
46
 
@@ -61,16 +94,16 @@ def run_auto_eval():
61
  )
62
 
63
  run_evaluation(
64
- eval_request=eval_request,
65
- task_names=TASKS_HARNESS,
66
- num_fewshot=NUM_FEWSHOT,
67
  local_dir=EVAL_RESULTS_PATH_BACKEND,
68
  results_repo=RESULTS_REPO,
69
  batch_size="auto",
70
- device=DEVICE,
71
- limit=LIMIT
72
- )
73
 
74
 
75
  if __name__ == "__main__":
76
- run_auto_eval()
 
3
 
4
  from huggingface_hub import snapshot_download
5
 
6
+ from src.backend.manage_requests import (
7
+ FAILED_STATUS,
8
+ FINISHED_STATUS,
9
+ PENDING_STATUS,
10
+ RUNNING_STATUS,
11
+ check_completed_evals,
12
+ get_eval_requests,
13
+ set_eval_request,
14
+ )
15
  from src.backend.run_eval_suite_harness import run_evaluation
 
16
  from src.backend.sort_queue import sort_models_by_priority
17
+ from src.envs import (
18
+ API,
19
+ DEVICE,
20
+ EVAL_REQUESTS_PATH_BACKEND,
21
+ EVAL_RESULTS_PATH_BACKEND,
22
+ LIMIT,
23
+ NUM_FEWSHOT,
24
+ QUEUE_REPO,
25
+ RESULTS_REPO,
26
+ TASKS_HARNESS,
27
+ TOKEN,
28
+ )
29
  from src.logging import setup_logger
30
 
31
 
32
+ logging.getLogger("openai").setLevel(logging.WARNING)
33
 
34
  # logging.basicConfig(level=logging.ERROR)
35
  logger = setup_logger(__name__)
36
  pp = pprint.PrettyPrinter(width=80)
37
 
38
 
39
+ snapshot_download(
40
+ repo_id=RESULTS_REPO,
41
+ revision="main",
42
+ local_dir=EVAL_RESULTS_PATH_BACKEND,
43
+ repo_type="dataset",
44
+ max_workers=60,
45
+ token=TOKEN,
46
+ )
47
+ snapshot_download(
48
+ repo_id=QUEUE_REPO,
49
+ revision="main",
50
+ local_dir=EVAL_REQUESTS_PATH_BACKEND,
51
+ repo_type="dataset",
52
+ max_workers=60,
53
+ token=TOKEN,
54
+ )
55
+
56
 
57
  def run_auto_eval():
58
  current_pending_status = [PENDING_STATUS]
 
67
  hf_repo=QUEUE_REPO,
68
  local_dir=EVAL_REQUESTS_PATH_BACKEND,
69
  hf_repo_results=RESULTS_REPO,
70
+ local_dir_results=EVAL_RESULTS_PATH_BACKEND,
71
  )
72
 
73
  # Get all eval request that are PENDING, if you want to run other evals, change this parameter
74
+ eval_requests = get_eval_requests(
75
+ job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND
76
+ )
77
  # Sort the evals by priority (first submitted first run)
78
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
79
 
 
94
  )
95
 
96
  run_evaluation(
97
+ eval_request=eval_request,
98
+ task_names=TASKS_HARNESS,
99
+ num_fewshot=NUM_FEWSHOT,
100
  local_dir=EVAL_RESULTS_PATH_BACKEND,
101
  results_repo=RESULTS_REPO,
102
  batch_size="auto",
103
+ device=DEVICE,
104
+ limit=LIMIT,
105
+ )
106
 
107
 
108
  if __name__ == "__main__":
109
+ run_auto_eval()
main_backend_lighteval.py CHANGED
@@ -3,22 +3,57 @@ import pprint
3
 
4
  from huggingface_hub import snapshot_download
5
 
6
- logging.getLogger("openai").setLevel(logging.WARNING)
7
-
 
 
 
 
 
 
 
8
  from src.backend.run_eval_suite_lighteval import run_evaluation
9
- from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request, PENDING_STATUS, RUNNING_STATUS, FINISHED_STATUS, FAILED_STATUS
10
  from src.backend.sort_queue import sort_models_by_priority
11
-
12
- from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, API, LIMIT, TOKEN, ACCELERATOR, VENDOR, REGION, TASKS_LIGHTEVAL
 
 
 
 
 
 
 
 
 
 
 
13
  from src.logging import setup_logger
14
 
 
 
 
15
  logger = setup_logger(__name__)
16
 
17
  # logging.basicConfig(level=logging.ERROR)
18
  pp = pprint.PrettyPrinter(width=80)
19
 
20
- snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
21
- snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def run_auto_eval():
24
  current_pending_status = [PENDING_STATUS]
@@ -33,11 +68,13 @@ def run_auto_eval():
33
  hf_repo=QUEUE_REPO,
34
  local_dir=EVAL_REQUESTS_PATH_BACKEND,
35
  hf_repo_results=RESULTS_REPO,
36
- local_dir_results=EVAL_RESULTS_PATH_BACKEND
37
  )
38
 
39
  # Get all eval request that are PENDING, if you want to run other evals, change this parameter
40
- eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
 
 
41
  # Sort the evals by priority (first submitted first run)
42
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
43
 
@@ -49,7 +86,6 @@ def run_auto_eval():
49
  eval_request = eval_requests[0]
50
  logger.info(pp.pformat(eval_request))
51
 
52
-
53
  set_eval_request(
54
  api=API,
55
  eval_request=eval_request,
@@ -59,29 +95,33 @@ def run_auto_eval():
59
  )
60
 
61
  # This needs to be done
62
- #instance_size, instance_type = get_instance_for_model(eval_request)
63
  # For GPU
64
- # instance_size, instance_type = "small", "g4dn.xlarge"
65
  # For CPU
66
  # Updated naming available at https://huggingface.co/docs/inference-endpoints/pricing
67
  instance_size, instance_type = "x4", "intel-icl"
68
- logger.info(f'Starting Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}')
 
 
69
 
70
  run_evaluation(
71
- eval_request=eval_request,
72
- task_names=TASKS_LIGHTEVAL,
73
  local_dir=EVAL_RESULTS_PATH_BACKEND,
74
- batch_size=1,
75
- accelerator=ACCELERATOR,
76
- region=REGION,
77
- vendor=VENDOR,
78
- instance_size=instance_size,
79
- instance_type=instance_type,
80
- limit=LIMIT
81
- )
82
 
83
- logger.info(f'Completed Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}')
 
 
84
 
85
 
86
  if __name__ == "__main__":
87
- run_auto_eval()
 
3
 
4
  from huggingface_hub import snapshot_download
5
 
6
+ from src.backend.manage_requests import (
7
+ FAILED_STATUS,
8
+ FINISHED_STATUS,
9
+ PENDING_STATUS,
10
+ RUNNING_STATUS,
11
+ check_completed_evals,
12
+ get_eval_requests,
13
+ set_eval_request,
14
+ )
15
  from src.backend.run_eval_suite_lighteval import run_evaluation
 
16
  from src.backend.sort_queue import sort_models_by_priority
17
+ from src.envs import (
18
+ ACCELERATOR,
19
+ API,
20
+ EVAL_REQUESTS_PATH_BACKEND,
21
+ EVAL_RESULTS_PATH_BACKEND,
22
+ LIMIT,
23
+ QUEUE_REPO,
24
+ REGION,
25
+ RESULTS_REPO,
26
+ TASKS_LIGHTEVAL,
27
+ TOKEN,
28
+ VENDOR,
29
+ )
30
  from src.logging import setup_logger
31
 
32
+
33
+ logging.getLogger("openai").setLevel(logging.WARNING)
34
+
35
  logger = setup_logger(__name__)
36
 
37
  # logging.basicConfig(level=logging.ERROR)
38
  pp = pprint.PrettyPrinter(width=80)
39
 
40
+ snapshot_download(
41
+ repo_id=RESULTS_REPO,
42
+ revision="main",
43
+ local_dir=EVAL_RESULTS_PATH_BACKEND,
44
+ repo_type="dataset",
45
+ max_workers=60,
46
+ token=TOKEN,
47
+ )
48
+ snapshot_download(
49
+ repo_id=QUEUE_REPO,
50
+ revision="main",
51
+ local_dir=EVAL_REQUESTS_PATH_BACKEND,
52
+ repo_type="dataset",
53
+ max_workers=60,
54
+ token=TOKEN,
55
+ )
56
+
57
 
58
  def run_auto_eval():
59
  current_pending_status = [PENDING_STATUS]
 
68
  hf_repo=QUEUE_REPO,
69
  local_dir=EVAL_REQUESTS_PATH_BACKEND,
70
  hf_repo_results=RESULTS_REPO,
71
+ local_dir_results=EVAL_RESULTS_PATH_BACKEND,
72
  )
73
 
74
  # Get all eval request that are PENDING, if you want to run other evals, change this parameter
75
+ eval_requests = get_eval_requests(
76
+ job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND
77
+ )
78
  # Sort the evals by priority (first submitted first run)
79
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
80
 
 
86
  eval_request = eval_requests[0]
87
  logger.info(pp.pformat(eval_request))
88
 
 
89
  set_eval_request(
90
  api=API,
91
  eval_request=eval_request,
 
95
  )
96
 
97
  # This needs to be done
98
+ # instance_size, instance_type = get_instance_for_model(eval_request)
99
  # For GPU
100
+ # instance_size, instance_type = "small", "g4dn.xlarge"
101
  # For CPU
102
  # Updated naming available at https://huggingface.co/docs/inference-endpoints/pricing
103
  instance_size, instance_type = "x4", "intel-icl"
104
+ logger.info(
105
+ f"Starting Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}"
106
+ )
107
 
108
  run_evaluation(
109
+ eval_request=eval_request,
110
+ task_names=TASKS_LIGHTEVAL,
111
  local_dir=EVAL_RESULTS_PATH_BACKEND,
112
+ batch_size=1,
113
+ accelerator=ACCELERATOR,
114
+ region=REGION,
115
+ vendor=VENDOR,
116
+ instance_size=instance_size,
117
+ instance_type=instance_type,
118
+ limit=LIMIT,
119
+ )
120
 
121
+ logger.info(
122
+ f"Completed Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}"
123
+ )
124
 
125
 
126
  if __name__ == "__main__":
127
+ run_auto_eval()
pyproject.toml CHANGED
@@ -1,13 +1,10 @@
1
  [tool.ruff]
2
- # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
3
- select = ["E", "F"]
4
- ignore = ["E501"] # line too long (black is taking care of this)
5
  line-length = 119
6
- fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
7
 
8
- [tool.isort]
9
- profile = "black"
10
- line_length = 119
11
 
12
- [tool.black]
13
- line-length = 119
 
 
1
  [tool.ruff]
 
 
 
2
  line-length = 119
 
3
 
4
+ [tool.ruff.lint]
5
+ select = ["C", "E", "F", "I", "W"]
6
+ ignore = ["E501"] # line too long (the formatter is taking care of this)
7
 
8
+ [tool.ruff.lint.isort]
9
+ lines-after-imports = 2
10
+ known-local-folder = ["src"]
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  APScheduler==3.10.1
2
- black==23.11.0
3
  click==8.1.3
4
  huggingface-hub>=0.18.0
5
  python-dateutil==2.8.2
 
1
  APScheduler==3.10.1
 
2
  click==8.1.3
3
  huggingface-hub>=0.18.0
4
  python-dateutil==2.8.2
scripts/create_request_file.py CHANGED
@@ -7,7 +7,9 @@ from datetime import datetime, timezone
7
  import click
8
  from colorama import Fore
9
  from huggingface_hub import HfApi, snapshot_download
10
- from src.envs import TOKEN, EVAL_REQUESTS_PATH, QUEUE_REPO
 
 
11
 
12
  precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ", "float32")
13
  model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
@@ -34,7 +36,9 @@ def get_model_size(model_info, precision: str):
34
  def main():
35
  api = HfApi()
36
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
37
- snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", token=TOKEN)
 
 
38
 
39
  model_name = click.prompt("Enter model name")
40
  revision = click.prompt("Enter revision", default="main")
 
7
  import click
8
  from colorama import Fore
9
  from huggingface_hub import HfApi, snapshot_download
10
+
11
+ from src.envs import EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
12
+
13
 
14
  precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ", "float32")
15
  model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
 
36
  def main():
37
  api = HfApi()
38
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
39
+ snapshot_download(
40
+ repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", token=TOKEN
41
+ )
42
 
43
  model_name = click.prompt("Enter model name")
44
  revision = click.prompt("Enter revision", default="main")
scripts/fix_harness_import.py CHANGED
@@ -2,10 +2,12 @@
2
  It creates a folder not ported during harness package creation (as they don't use a Manifest file atm and it ignore `.json` files).
3
  It will need to be updated if we want to use the harness' version of big bench to actually copy the json files.
4
  """
 
5
  import os
6
 
7
  import lm_eval
8
 
 
9
  if __name__ == "__main__":
10
  lm_eval_path = lm_eval.__path__[0]
11
- os.makedirs(os.path.join(lm_eval_path, "datasets", "bigbench_resources"), exist_ok=True)
 
2
  It creates a folder not ported during harness package creation (as they don't use a Manifest file atm and it ignore `.json` files).
3
  It will need to be updated if we want to use the harness' version of big bench to actually copy the json files.
4
  """
5
+
6
  import os
7
 
8
  import lm_eval
9
 
10
+
11
  if __name__ == "__main__":
12
  lm_eval_path = lm_eval.__path__[0]
13
+ os.makedirs(os.path.join(lm_eval_path, "datasets", "bigbench_resources"), exist_ok=True)
src/backend/manage_requests.py CHANGED
@@ -4,9 +4,11 @@ from dataclasses import dataclass
4
  from typing import Optional
5
 
6
  from huggingface_hub import HfApi, snapshot_download
 
7
  from src.envs import TOKEN
8
  from src.logging import setup_logger
9
 
 
10
  logger = setup_logger(__name__)
11
 
12
  PENDING_STATUS = "PENDING"
@@ -14,27 +16,30 @@ RUNNING_STATUS = "RUNNING"
14
  FINISHED_STATUS = "FINISHED"
15
  FAILED_STATUS = "FAILED"
16
 
 
17
  @dataclass
18
  class EvalRequest:
19
- """This class represents one evaluation request file.
20
- """
21
  model: str
22
  status: str
23
  json_filepath: str
24
  weight_type: str = "Original"
25
  model_type: str = "" # pretrained, finetuned, with RL
26
  precision: str = "" # float16, bfloat16
27
- revision: str = "main" # commit hash
28
- submitted_time: Optional[str] = "2022-05-18T11:40:22.519222" # random date just so that we can still order requests by date
29
- model_type: Optional[str] = None # pretrained, fine-tuned, etc - define your own categories in
 
 
30
  likes: Optional[int] = 0
31
  params: Optional[int] = None
32
  license: Optional[str] = ""
33
  base_model: Optional[str] = ""
34
  private: Optional[bool] = False
35
-
36
  def get_model_args(self):
37
- """Edit this function if you want to manage more complex quantization issues. You'll need to map it to
38
  the evaluation suite you chose.
39
  """
40
  model_args = f"pretrained={self.model},revision={self.revision}"
@@ -45,7 +50,7 @@ class EvalRequest:
45
  # Quantized models need some added config, the install of bits and bytes, etc
46
  else:
47
  raise Exception(f"Unknown precision {self.precision}.")
48
-
49
  return model_args
50
 
51
 
@@ -77,7 +82,9 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
77
  Returns:
78
  `list[EvalRequest]`: a list of model info dicts.
79
  """
80
- snapshot_download(repo_id=hf_repo, revision="main", local_dir=local_dir, repo_type="dataset", max_workers=60, token=TOKEN)
 
 
81
  json_files = glob.glob(f"{local_dir}/**/*.json", recursive=True)
82
 
83
  eval_requests = []
@@ -102,6 +109,7 @@ def eval_was_running(eval_request: EvalRequest):
102
  status = data["status"]
103
  return status == RUNNING_STATUS
104
 
 
105
  def check_completed_evals(
106
  api: HfApi,
107
  hf_repo: str,
@@ -114,12 +122,12 @@ def check_completed_evals(
114
  ):
115
  """Checks if the currently running evals are completed, if yes, update their status on the hub."""
116
  snapshot_download(
117
- repo_id=hf_repo_results,
118
- revision="main",
119
- local_dir=local_dir_results,
120
- repo_type="dataset",
121
- max_workers=60,
122
- token=TOKEN
123
  )
124
 
125
  running_evals = get_eval_requests(checked_status, hf_repo=hf_repo, local_dir=local_dir)
@@ -134,13 +142,9 @@ def check_completed_evals(
134
  output_file_exists = len(glob.glob(output_file)) > 0
135
 
136
  if output_file_exists:
137
- logger.info(
138
- f"EXISTS output file exists for {model} setting it to {completed_status}"
139
- )
140
  set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
141
  else:
142
  if eval_was_running(eval_request=eval_request):
143
- logger.info(
144
- f"No result file found for {model} setting it to {failed_status}"
145
- )
146
  set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)
 
4
  from typing import Optional
5
 
6
  from huggingface_hub import HfApi, snapshot_download
7
+
8
  from src.envs import TOKEN
9
  from src.logging import setup_logger
10
 
11
+
12
  logger = setup_logger(__name__)
13
 
14
  PENDING_STATUS = "PENDING"
 
16
  FINISHED_STATUS = "FINISHED"
17
  FAILED_STATUS = "FAILED"
18
 
19
+
20
  @dataclass
21
  class EvalRequest:
22
+ """This class represents one evaluation request file."""
23
+
24
  model: str
25
  status: str
26
  json_filepath: str
27
  weight_type: str = "Original"
28
  model_type: str = "" # pretrained, finetuned, with RL
29
  precision: str = "" # float16, bfloat16
30
+ revision: str = "main" # commit hash
31
+ submitted_time: Optional[str] = (
32
+ "2022-05-18T11:40:22.519222" # random date just so that we can still order requests by date
33
+ )
34
+ model_type: Optional[str] = None # pretrained, fine-tuned, etc - define your own categories in
35
  likes: Optional[int] = 0
36
  params: Optional[int] = None
37
  license: Optional[str] = ""
38
  base_model: Optional[str] = ""
39
  private: Optional[bool] = False
40
+
41
  def get_model_args(self):
42
+ """Edit this function if you want to manage more complex quantization issues. You'll need to map it to
43
  the evaluation suite you chose.
44
  """
45
  model_args = f"pretrained={self.model},revision={self.revision}"
 
50
  # Quantized models need some added config, the install of bits and bytes, etc
51
  else:
52
  raise Exception(f"Unknown precision {self.precision}.")
53
+
54
  return model_args
55
 
56
 
 
82
  Returns:
83
  `list[EvalRequest]`: a list of model info dicts.
84
  """
85
+ snapshot_download(
86
+ repo_id=hf_repo, revision="main", local_dir=local_dir, repo_type="dataset", max_workers=60, token=TOKEN
87
+ )
88
  json_files = glob.glob(f"{local_dir}/**/*.json", recursive=True)
89
 
90
  eval_requests = []
 
109
  status = data["status"]
110
  return status == RUNNING_STATUS
111
 
112
+
113
  def check_completed_evals(
114
  api: HfApi,
115
  hf_repo: str,
 
122
  ):
123
  """Checks if the currently running evals are completed, if yes, update their status on the hub."""
124
  snapshot_download(
125
+ repo_id=hf_repo_results,
126
+ revision="main",
127
+ local_dir=local_dir_results,
128
+ repo_type="dataset",
129
+ max_workers=60,
130
+ token=TOKEN,
131
  )
132
 
133
  running_evals = get_eval_requests(checked_status, hf_repo=hf_repo, local_dir=local_dir)
 
142
  output_file_exists = len(glob.glob(output_file)) > 0
143
 
144
  if output_file_exists:
145
+ logger.info(f"EXISTS output file exists for {model} setting it to {completed_status}")
 
 
146
  set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
147
  else:
148
  if eval_was_running(eval_request=eval_request):
149
+ logger.info(f"No result file found for {model} setting it to {failed_status}")
 
 
150
  set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)
src/backend/run_eval_suite_harness.py CHANGED
@@ -1,21 +1,32 @@
1
  import json
2
- import os
3
  import logging
 
4
  from datetime import datetime
 
5
 
6
- from lm_eval import tasks, evaluator, utils
7
  from lm_eval.tasks import TaskManager
8
 
9
- from src.envs import RESULTS_REPO, API
10
  from src.backend.manage_requests import EvalRequest
 
11
  from src.logging import setup_logger
12
 
13
- from typing import Union
14
 
15
  logging.getLogger("openai").setLevel(logging.WARNING)
16
  logger = setup_logger(__name__)
17
 
18
- def run_evaluation(eval_request: EvalRequest, task_names: list, num_fewshot: int, batch_size: Union[int, str], device: str, local_dir: str, results_repo: str, no_cache: bool =True, limit: int =None):
 
 
 
 
 
 
 
 
 
 
 
19
  """Runs one evaluation for the current evaluation request file, then pushes the results to the hub.
20
 
21
  Args:
@@ -51,7 +62,7 @@ def run_evaluation(eval_request: EvalRequest, task_names: list, num_fewshot: int
51
  batch_size=batch_size,
52
  device=device,
53
  limit=limit,
54
- write_out=True # Whether to write out an example document and model input, for checking task integrity
55
  )
56
 
57
  results["config"]["model_dtype"] = eval_request.precision
 
1
  import json
 
2
  import logging
3
+ import os
4
  from datetime import datetime
5
+ from typing import Union
6
 
7
+ from lm_eval import evaluator, utils
8
  from lm_eval.tasks import TaskManager
9
 
 
10
  from src.backend.manage_requests import EvalRequest
11
+ from src.envs import API
12
  from src.logging import setup_logger
13
 
 
14
 
15
  logging.getLogger("openai").setLevel(logging.WARNING)
16
  logger = setup_logger(__name__)
17
 
18
+
19
+ def run_evaluation(
20
+ eval_request: EvalRequest,
21
+ task_names: list,
22
+ num_fewshot: int,
23
+ batch_size: Union[int, str],
24
+ device: str,
25
+ local_dir: str,
26
+ results_repo: str,
27
+ no_cache: bool = True,
28
+ limit: int = None,
29
+ ):
30
  """Runs one evaluation for the current evaluation request file, then pushes the results to the hub.
31
 
32
  Args:
 
62
  batch_size=batch_size,
63
  device=device,
64
  limit=limit,
65
+ write_out=True, # Whether to write out an example document and model input, for checking task integrity
66
  )
67
 
68
  results["config"]["model_dtype"] = eval_request.precision
src/backend/run_eval_suite_lighteval.py CHANGED
@@ -1,23 +1,31 @@
1
  import json
2
- import argparse
3
  import logging
4
- from datetime import datetime
5
 
6
- import lighteval
7
  from lighteval.logging.evaluation_tracker import EvaluationTracker
8
  from lighteval.models.model_config import InferenceEndpointModelConfig
9
  from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
10
 
11
- from lighteval.main_accelerate import main, EnvConfig, create_model_config
12
-
13
- from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN
14
  from src.backend.manage_requests import EvalRequest
 
15
  from src.logging import setup_logger
16
 
 
17
  logging.getLogger("openai").setLevel(logging.WARNING)
18
  logger = setup_logger(__name__)
19
 
20
- def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int, local_dir: str, accelerator: str, region: str, vendor: str, instance_size: str, instance_type: str, limit=None):
 
 
 
 
 
 
 
 
 
 
 
 
21
  """Runs one evaluation for the current evaluation request file using lighteval, then pushes the results to the hub.
22
 
23
  Args:
@@ -32,18 +40,20 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
32
  local_dir (str): Where to save the results locally
33
  no_cache (bool, optional): Whether to use a cache or not.
34
  limit (int, optional): Whether to use a number of samples only for the evaluation - only for debugging
35
- """
36
 
37
  if limit:
38
- logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
 
 
39
 
40
  evaluation_tracker = EvaluationTracker(
41
  output_dir="./results",
42
- save_details = True,
43
- push_to_hub = True,
44
- push_to_tensorboard = False,
45
- hub_results_org= RESULTS_REPO,
46
- public = False,
47
  )
48
 
49
  pipeline_params = PipelineParameters(
@@ -52,21 +62,21 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
52
  max_samples=limit,
53
  use_chat_template=False,
54
  system_prompt=None,
55
- custom_tasks_directory="custom_tasks.py", # if using a custom task
56
  )
57
 
58
  model_config = InferenceEndpointModelConfig(
59
  # Endpoint parameters
60
- name = eval_request.model.replace(".", "-").lower(),
61
- repository = eval_request.model,
62
- accelerator = accelerator,
63
- vendor= vendor,
64
- region= region,
65
- instance_size= instance_size,
66
- instance_type= instance_type,
67
- should_reuse_existing= False,
68
- model_dtype= eval_request.precision,
69
- revision= eval_request.revision,
70
  )
71
 
72
  pipeline = Pipeline(
@@ -85,7 +95,7 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
85
  dumped = json.dumps(results, indent=2)
86
  logger.info(dumped)
87
 
88
- except Exception as e: # if eval failed, we force a cleanup
89
  pipeline.model.cleanup()
90
 
91
  return results
 
1
  import json
 
2
  import logging
 
3
 
 
4
  from lighteval.logging.evaluation_tracker import EvaluationTracker
5
  from lighteval.models.model_config import InferenceEndpointModelConfig
6
  from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
7
 
 
 
 
8
  from src.backend.manage_requests import EvalRequest
9
+ from src.envs import RESULTS_REPO
10
  from src.logging import setup_logger
11
 
12
+
13
  logging.getLogger("openai").setLevel(logging.WARNING)
14
  logger = setup_logger(__name__)
15
 
16
+
17
+ def run_evaluation(
18
+ eval_request: EvalRequest,
19
+ task_names: str,
20
+ batch_size: int,
21
+ local_dir: str,
22
+ accelerator: str,
23
+ region: str,
24
+ vendor: str,
25
+ instance_size: str,
26
+ instance_type: str,
27
+ limit=None,
28
+ ):
29
  """Runs one evaluation for the current evaluation request file using lighteval, then pushes the results to the hub.
30
 
31
  Args:
 
40
  local_dir (str): Where to save the results locally
41
  no_cache (bool, optional): Whether to use a cache or not.
42
  limit (int, optional): Whether to use a number of samples only for the evaluation - only for debugging
43
+ """
44
 
45
  if limit:
46
+ logger.info(
47
+ "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
48
+ )
49
 
50
  evaluation_tracker = EvaluationTracker(
51
  output_dir="./results",
52
+ save_details=True,
53
+ push_to_hub=True,
54
+ push_to_tensorboard=False,
55
+ hub_results_org=RESULTS_REPO,
56
+ public=False,
57
  )
58
 
59
  pipeline_params = PipelineParameters(
 
62
  max_samples=limit,
63
  use_chat_template=False,
64
  system_prompt=None,
65
+ custom_tasks_directory="custom_tasks.py", # if using a custom task
66
  )
67
 
68
  model_config = InferenceEndpointModelConfig(
69
  # Endpoint parameters
70
+ name=eval_request.model.replace(".", "-").lower(),
71
+ repository=eval_request.model,
72
+ accelerator=accelerator,
73
+ vendor=vendor,
74
+ region=region,
75
+ instance_size=instance_size,
76
+ instance_type=instance_type,
77
+ should_reuse_existing=False,
78
+ model_dtype=eval_request.precision,
79
+ revision=eval_request.revision,
80
  )
81
 
82
  pipeline = Pipeline(
 
95
  dumped = json.dumps(results, indent=2)
96
  logger.info(dumped)
97
 
98
+ except Exception: # if eval failed, we force a cleanup
99
  pipeline.model.cleanup()
100
 
101
  return results
src/backend/sort_queue.py CHANGED
@@ -1,4 +1,3 @@
1
- import re
2
  from dataclasses import dataclass
3
 
4
  from huggingface_hub import HfApi
@@ -11,6 +10,7 @@ class ModelMetadata:
11
  likes: int = 0
12
  size: int = 15
13
 
 
14
  # All the functions below sort the models in the queue based on different parameters
15
  def sort_models_by_priority(api: HfApi, models: list[EvalRequest]) -> list[EvalRequest]:
16
  private_models = [model for model in models if model.private]
@@ -18,11 +18,14 @@ def sort_models_by_priority(api: HfApi, models: list[EvalRequest]) -> list[EvalR
18
 
19
  return sort_by_submit_date(private_models) + sort_by_submit_date(public_models)
20
 
 
21
  def sort_by_submit_date(eval_requests: list[EvalRequest]) -> list[EvalRequest]:
22
  return sorted(eval_requests, key=lambda x: x.submitted_time, reverse=False)
23
 
 
24
  def sort_by_size(eval_requests: list[EvalRequest]) -> list[EvalRequest]:
25
  return sorted(eval_requests, key=lambda x: x.params, reverse=False)
26
 
 
27
  def sort_by_likes(eval_requests: list[EvalRequest]) -> list[EvalRequest]:
28
- return sorted(eval_requests, key=lambda x: x.likes, reverse=False)
 
 
1
  from dataclasses import dataclass
2
 
3
  from huggingface_hub import HfApi
 
10
  likes: int = 0
11
  size: int = 15
12
 
13
+
14
  # All the functions below sort the models in the queue based on different parameters
15
  def sort_models_by_priority(api: HfApi, models: list[EvalRequest]) -> list[EvalRequest]:
16
  private_models = [model for model in models if model.private]
 
18
 
19
  return sort_by_submit_date(private_models) + sort_by_submit_date(public_models)
20
 
21
+
22
  def sort_by_submit_date(eval_requests: list[EvalRequest]) -> list[EvalRequest]:
23
  return sorted(eval_requests, key=lambda x: x.submitted_time, reverse=False)
24
 
25
+
26
  def sort_by_size(eval_requests: list[EvalRequest]) -> list[EvalRequest]:
27
  return sorted(eval_requests, key=lambda x: x.params, reverse=False)
28
 
29
+
30
  def sort_by_likes(eval_requests: list[EvalRequest]) -> list[EvalRequest]:
31
+ return sorted(eval_requests, key=lambda x: x.likes, reverse=False)
src/display/log_visualizer.py CHANGED
@@ -1,5 +1,4 @@
1
  from io import StringIO
2
- from pathlib import Path
3
 
4
  from bs4 import BeautifulSoup
5
  from rich.console import Console
@@ -12,8 +11,8 @@ from src.logging import log_file
12
 
13
  def log_file_to_html_string(reverse=True):
14
  with open(log_file, "rt") as f:
15
- lines = f.readlines()
16
- lines = lines[-NUM_LINES_VISUALIZE:]
17
 
18
  if reverse:
19
  lines = reversed(lines)
@@ -26,12 +25,12 @@ def log_file_to_html_string(reverse=True):
26
  html_content = console.export_html(inline_styles=True)
27
 
28
  # Parse the HTML content using BeautifulSoup
29
- soup = BeautifulSoup(html_content, 'lxml')
30
 
31
  # Modify the <pre> tag and add custom styles
32
  pre_tag = soup.pre
33
- pre_tag['class'] = 'scrollable'
34
- del pre_tag['style']
35
 
36
  # Add your custom styles and the .scrollable CSS to the <style> tag
37
  style_tag = soup.style
 
1
  from io import StringIO
 
2
 
3
  from bs4 import BeautifulSoup
4
  from rich.console import Console
 
11
 
12
  def log_file_to_html_string(reverse=True):
13
  with open(log_file, "rt") as f:
14
+ lines = f.readlines()
15
+ lines = lines[-NUM_LINES_VISUALIZE:]
16
 
17
  if reverse:
18
  lines = reversed(lines)
 
25
  html_content = console.export_html(inline_styles=True)
26
 
27
  # Parse the HTML content using BeautifulSoup
28
+ soup = BeautifulSoup(html_content, "lxml")
29
 
30
  # Modify the <pre> tag and add custom styles
31
  pre_tag = soup.pre
32
+ pre_tag["class"] = "scrollable"
33
+ del pre_tag["style"]
34
 
35
  # Add your custom styles and the .scrollable CSS to the <style> tag
36
  style_tag = soup.style
src/envs.py CHANGED
@@ -2,23 +2,24 @@ import os
2
 
3
  from huggingface_hub import HfApi
4
 
 
5
  # Info to change for your repository
6
  # ----------------------------------
7
- TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
- OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset
10
 
11
  # For harness evaluations
12
- DEVICE = "cpu" # "cuda:0" if you add compute, for harness evaluations
13
- LIMIT = 20 # !!!! For testing, should be None for actual evaluations!!!
14
- NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
15
  TASKS_HARNESS = ["anli_r1", "logiqa"]
16
 
17
  # For lighteval evaluations
18
  ACCELERATOR = "cpu"
19
  REGION = "us-east-1"
20
  VENDOR = "aws"
21
- TASKS_LIGHTEVAL = "lighteval|anli:r1|0|0,lighteval|logiqa|0|0"
22
  # To add your own tasks, edit the custom file and launch it with `custom|myothertask|0|0``
23
 
24
  # ---------------------------------------------------
@@ -27,7 +28,7 @@ QUEUE_REPO = f"{OWNER}/requests"
27
  RESULTS_REPO = f"{OWNER}/results"
28
 
29
  # If you setup a cache later, just change HF_HOME
30
- CACHE_PATH=os.getenv("HF_HOME", ".")
31
 
32
  # Local caches
33
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
@@ -39,4 +40,3 @@ REFRESH_RATE = 10 * 60 # 10 min
39
  NUM_LINES_VISUALIZE = 300
40
 
41
  API = HfApi(token=TOKEN)
42
-
 
2
 
3
  from huggingface_hub import HfApi
4
 
5
+
6
  # Info to change for your repository
7
  # ----------------------------------
8
+ TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
9
 
10
+ OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset
11
 
12
  # For harness evaluations
13
+ DEVICE = "cpu" # "cuda:0" if you add compute, for harness evaluations
14
+ LIMIT = 20 # !!!! For testing, should be None for actual evaluations!!!
15
+ NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
16
  TASKS_HARNESS = ["anli_r1", "logiqa"]
17
 
18
  # For lighteval evaluations
19
  ACCELERATOR = "cpu"
20
  REGION = "us-east-1"
21
  VENDOR = "aws"
22
+ TASKS_LIGHTEVAL = "lighteval|anli:r1|0|0,lighteval|logiqa|0|0"
23
  # To add your own tasks, edit the custom file and launch it with `custom|myothertask|0|0``
24
 
25
  # ---------------------------------------------------
 
28
  RESULTS_REPO = f"{OWNER}/results"
29
 
30
  # If you setup a cache later, just change HF_HOME
31
+ CACHE_PATH = os.getenv("HF_HOME", ".")
32
 
33
  # Local caches
34
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 
40
  NUM_LINES_VISUALIZE = 300
41
 
42
  API = HfApi(token=TOKEN)
 
src/logging.py CHANGED
@@ -1,19 +1,17 @@
1
- import sys
2
  from pathlib import Path
3
 
4
- proj_dir = Path(__file__).parents[1]
5
-
6
- log_file = proj_dir/"output.log"
7
 
 
8
 
9
- import logging
10
 
11
 
12
  def setup_logger(name: str):
13
  logger = logging.getLogger(name)
14
  logger.setLevel(logging.INFO)
15
 
16
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
17
 
18
  # Create a file handler to write logs to a file
19
  file_handler = logging.FileHandler(log_file)
@@ -29,10 +27,10 @@ def configure_root_logger():
29
  logging.basicConfig(level=logging.INFO)
30
  root_logger = logging.getLogger()
31
 
32
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
33
 
34
  file_handler = logging.FileHandler(log_file)
35
  file_handler.setLevel(logging.INFO)
36
  file_handler.setFormatter(formatter)
37
 
38
- root_logger.addHandler(file_handler)
 
1
+ import logging
2
  from pathlib import Path
3
 
 
 
 
4
 
5
+ proj_dir = Path(__file__).parents[1]
6
 
7
+ log_file = proj_dir / "output.log"
8
 
9
 
10
  def setup_logger(name: str):
11
  logger = logging.getLogger(name)
12
  logger.setLevel(logging.INFO)
13
 
14
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
15
 
16
  # Create a file handler to write logs to a file
17
  file_handler = logging.FileHandler(log_file)
 
27
  logging.basicConfig(level=logging.INFO)
28
  root_logger = logging.getLogger()
29
 
30
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
31
 
32
  file_handler = logging.FileHandler(log_file)
33
  file_handler.setLevel(logging.INFO)
34
  file_handler.setFormatter(formatter)
35
 
36
+ root_logger.addHandler(file_handler)