GSK-2434-Fix-bugs-mask-token-and-feature-mapping

#19
Files changed (2) hide show
  1. io_utils.py +7 -3
  2. text_classification_ui_helpers.py +2 -3
io_utils.py CHANGED
@@ -120,7 +120,6 @@ def pop_job_from_pipe():
120
  job = f.readline().strip()
121
  remaining = f.readlines()
122
  f.close()
123
- print(job, remaining, ">>>>")
124
  with open(PIPE_PATH, "w") as f:
125
  f.write("\n".join(remaining))
126
  f.close()
@@ -130,10 +129,13 @@ def pop_job_from_pipe():
130
  if len(job_info) != 2:
131
  raise ValueError("Invalid job info: ", job_info)
132
 
133
- write_log_to_user_file(job_info[0], f"Running job {job_info}")
134
  command = job_info[1].split(",")
 
 
 
 
135
 
136
- write_log_to_user_file(job_info[0], f"Running command {command}")
137
  log_file = open(f"./tmp/{job_info[0]}_log", "a")
138
  subprocess.Popen(
139
  command,
@@ -141,3 +143,5 @@ def pop_job_from_pipe():
141
  stdout=log_file,
142
  stderr=log_file,
143
  )
 
 
 
120
  job = f.readline().strip()
121
  remaining = f.readlines()
122
  f.close()
 
123
  with open(PIPE_PATH, "w") as f:
124
  f.write("\n".join(remaining))
125
  f.close()
 
129
  if len(job_info) != 2:
130
  raise ValueError("Invalid job info: ", job_info)
131
 
132
+ write_log_to_user_file(job_info[0], f"Running job id {job_info[0]}\n")
133
  command = job_info[1].split(",")
134
+ masked_command = command.copy()
135
+ hf_token_index = masked_command.index("--hf_token")
136
+ masked_command[hf_token_index + 1] = "hf_********"
137
+ write_log_to_user_file(job_info[0], f"Running command {masked_command}\n")
138
 
 
139
  log_file = open(f"./tmp/{job_info[0]}_log", "a")
140
  subprocess.Popen(
141
  command,
 
143
  stdout=log_file,
144
  stderr=log_file,
145
  )
146
+
147
+
text_classification_ui_helpers.py CHANGED
@@ -67,19 +67,18 @@ def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *l
67
  for i, label in enumerate(labels[:MAX_LABELS]):
68
  if label:
69
  all_mappings["labels"][label] = ds_labels[i]
70
-
71
  if "features" not in all_mappings.keys():
72
  all_mappings["features"] = dict()
73
  for i, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
74
  if feat:
75
- all_mappings["features"][feat] = ds_features[i]
 
76
  write_column_mapping(all_mappings)
77
 
78
 
79
  def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
80
  model_labels = list(model_id2label.values())
81
  len_model_labels = len(model_labels)
82
- print(model_labels, model_id2label, 3 % len_model_labels)
83
  lables = [
84
  gr.Dropdown(
85
  label=f"{label}",
 
67
  for i, label in enumerate(labels[:MAX_LABELS]):
68
  if label:
69
  all_mappings["labels"][label] = ds_labels[i]
 
70
  if "features" not in all_mappings.keys():
71
  all_mappings["features"] = dict()
72
  for i, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
73
  if feat:
74
+ # TODO: Substitute 'text' with more features for zero-shot
75
+ all_mappings["features"]["text"] = feat
76
  write_column_mapping(all_mappings)
77
 
78
 
79
  def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
80
  model_labels = list(model_id2label.values())
81
  len_model_labels = len(model_labels)
 
82
  lables = [
83
  gr.Dropdown(
84
  label=f"{label}",