Clémentine commited on
Commit
b93d1b1
1 Parent(s): 27511e0

Added restrictor on model cards and licenses

Browse files
Files changed (2) hide show
  1. app.py +19 -9
  2. src/display_models/modelcard_filter.py +25 -0
app.py CHANGED
@@ -17,6 +17,7 @@ from src.assets.text_content import (
17
  TITLE,
18
  )
19
  from src.display_models.get_model_metadata import DO_NOT_SUBMIT_MODELS, ModelType
 
20
  from src.display_models.utils import (
21
  AutoEvalColumn,
22
  EvalQueueColumn,
@@ -121,6 +122,10 @@ def add_new_eval(
121
  precision = precision.split(" ")[0]
122
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
123
 
 
 
 
 
124
  num_models_submitted_in_period = user_submission_permission(model, users_to_submission_dates, RATE_LIMIT_PERIOD)
125
  if num_models_submitted_in_period > RATE_LIMIT_QUOTA:
126
  error_msg = f"Organisation or user `{model.split('/')[0]}`"
@@ -129,10 +134,11 @@ def add_new_eval(
129
  error_msg += "Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗"
130
  return styled_error(error_msg)
131
 
132
- if model_type is None or model_type == "":
133
- return styled_error("Please select a model type.")
 
134
 
135
- # check the model actually exists before adding the eval
136
  if revision == "":
137
  revision = "main"
138
 
@@ -145,8 +151,14 @@ def add_new_eval(
145
  model_on_hub, error = is_model_on_hub(model, revision)
146
  if not model_on_hub:
147
  return styled_error(f'Model "{model}" {error}')
 
 
 
 
 
148
 
149
- print("adding new eval")
 
150
 
151
  eval_entry = {
152
  "model": model,
@@ -166,14 +178,11 @@ def add_new_eval(
166
  user_name = model.split("/")[0]
167
  model_path = model.split("/")[1]
168
 
 
169
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
170
  os.makedirs(OUT_DIR, exist_ok=True)
171
  out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
172
 
173
- # Check if the model has been forbidden:
174
- if out_path.split("eval-queue/")[1] in DO_NOT_SUBMIT_MODELS:
175
- return styled_warning("Model authors have requested that their model be not submitted on the leaderboard.")
176
-
177
  # Check for duplicate submission
178
  if f"{model}_{revision}_{precision}" in requested_models:
179
  return styled_warning("This model has been already submitted.")
@@ -181,6 +190,7 @@ def add_new_eval(
181
  with open(out_path, "w") as f:
182
  f.write(json.dumps(eval_entry))
183
 
 
184
  api.upload_file(
185
  path_or_fileobj=out_path,
186
  path_in_repo=out_path.split("eval-queue/")[1],
@@ -189,7 +199,7 @@ def add_new_eval(
189
  commit_message=f"Add {model} to eval queue",
190
  )
191
 
192
- # remove the local file
193
  os.remove(out_path)
194
 
195
  return styled_message(
 
17
  TITLE,
18
  )
19
  from src.display_models.get_model_metadata import DO_NOT_SUBMIT_MODELS, ModelType
20
+ from src.display_models.modelcard_filter import check_model_card
21
  from src.display_models.utils import (
22
  AutoEvalColumn,
23
  EvalQueueColumn,
 
122
  precision = precision.split(" ")[0]
123
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
124
 
125
+ if model_type is None or model_type == "":
126
+ return styled_error("Please select a model type.")
127
+
128
+ # Is the user rate limited?
129
  num_models_submitted_in_period = user_submission_permission(model, users_to_submission_dates, RATE_LIMIT_PERIOD)
130
  if num_models_submitted_in_period > RATE_LIMIT_QUOTA:
131
  error_msg = f"Organisation or user `{model.split('/')[0]}`"
 
134
  error_msg += "Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗"
135
  return styled_error(error_msg)
136
 
137
+ # Did the model authors forbid its submission to the leaderboard?
138
+ if model in DO_NOT_SUBMIT_MODELS or base_model in DO_NOT_SUBMIT_MODELS:
139
+ return styled_warning("Model authors have requested that their model be not submitted on the leaderboard.")
140
 
141
+ # Does the model actually exist?
142
  if revision == "":
143
  revision = "main"
144
 
 
151
  model_on_hub, error = is_model_on_hub(model, revision)
152
  if not model_on_hub:
153
  return styled_error(f'Model "{model}" {error}')
154
+
155
+ # Were the model card and license filled?
156
+ modelcard_OK, error_msg = check_model_card(model)
157
+ if not modelcard_OK:
158
+ return styled_error(error_msg)
159
 
160
+ # Seems good, creating the eval
161
+ print("Adding new eval")
162
 
163
  eval_entry = {
164
  "model": model,
 
178
  user_name = model.split("/")[0]
179
  model_path = model.split("/")[1]
180
 
181
+ print("Creating eval file")
182
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
183
  os.makedirs(OUT_DIR, exist_ok=True)
184
  out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
185
 
 
 
 
 
186
  # Check for duplicate submission
187
  if f"{model}_{revision}_{precision}" in requested_models:
188
  return styled_warning("This model has been already submitted.")
 
190
  with open(out_path, "w") as f:
191
  f.write(json.dumps(eval_entry))
192
 
193
+ print("Uploading eval file")
194
  api.upload_file(
195
  path_or_fileobj=out_path,
196
  path_in_repo=out_path.split("eval-queue/")[1],
 
199
  commit_message=f"Add {model} to eval queue",
200
  )
201
 
202
+ # Remove the local file
203
  os.remove(out_path)
204
 
205
  return styled_message(
src/display_models/modelcard_filter.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import huggingface_hub
2
+ from huggingface_hub import ModelCard
3
+
4
+ # ht to @Wauplin, thank you for the snippet!
5
+ # See https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/317
6
+ def check_model_card(repo_id: str) -> tuple[bool, str]:
7
+ # Returns operation status, and error message
8
+ try:
9
+ card = ModelCard.load(repo_id)
10
+ except huggingface_hub.utils.EntryNotFoundError:
11
+ return False, "Please add a model card to your model to explain how you trained/fine-tuned it."
12
+
13
+ # Enforce license metadata
14
+ if card.data.license is None:
15
+ if not ("license_name" in card.data and "license_link" in card.data):
16
+ return False, (
17
+ "License not found. Please add a license to your model card using the `license` metadata or a"
18
+ " `license_name`/`license_link` pair."
19
+ )
20
+
21
+ # Enforce card content
22
+ if len(card.text) < 200:
23
+ return False, "Please add a description to your model card, it is too short."
24
+
25
+ return True, ""