m-ric HF staff commited on
Commit
7a37cfb
β€’
1 Parent(s): 124b7c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import torch
2
- import spaces
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import numpy as np
5
  import gradio as gr
 
6
 
7
  print(f"Is CUDA available: {torch.cuda.is_available()}")
8
  # True
@@ -122,6 +122,7 @@ model = AutoModelForCausalLM.from_pretrained("gpt2")
122
  tokenizer.pad_token_id = tokenizer.eos_token_id
123
  print("Loading finished.")
124
 
 
125
  def generate_html(token, node):
126
  """Recursively generate HTML for the tree."""
127
 
@@ -133,18 +134,17 @@ def generate_html(token, node):
133
  for token, subnode in node["children"].items():
134
  html_content += generate_html(token, subnode)
135
  html_content += "</ul>"
136
-
137
  html_content += "</li>"
138
-
139
  return html_content
140
 
141
 
142
- def generate_markdown_table(scores, top_k=4, chosen_tokens=None):
143
  markdown_table = """
144
  <table>
145
  <tr>
146
  <th><b>Token</b></th>
147
- <th><b>Score</b></th>
 
148
  </tr>"""
149
  for token_idx in np.argsort(scores)[-top_k:]:
150
  token = tokenizer.decode([token_idx])
@@ -155,6 +155,7 @@ def generate_markdown_table(scores, top_k=4, chosen_tokens=None):
155
  <tr style={style}>
156
  <td>{token}</td>
157
  <td>{scores[token_idx]:.4f}</td>
 
158
  </tr>"""
159
  markdown_table += """
160
  </table>"""
@@ -169,9 +170,11 @@ def display_tree(start_sentence, scores, sequences, beam_indices):
169
  print(tokenizer.batch_decode(sequences))
170
  original_tree = {"table": None, "children": {}}
171
  for sequence_ix in range(len(sequences)):
 
172
  current_tree = original_tree
173
  for step, step_scores in enumerate(scores):
174
- current_token_choice = tokenizer.decode([sequences[sequence_ix, step]])
 
175
  current_beam = beam_indices[sequence_ix, step]
176
 
177
  if current_token_choice not in current_tree["children"]:
@@ -182,13 +185,16 @@ def display_tree(start_sentence, scores, sequences, beam_indices):
182
 
183
  # Rewrite the probs table even if it was there before, since new chosen nodes have appeared in the children of current tree
184
  markdown_table = generate_markdown_table(
185
- step_scores[current_beam, :],
186
  chosen_tokens=current_tree["children"].keys(),
187
  )
188
  current_tree["table"] = markdown_table
189
 
190
  current_tree = current_tree["children"][current_token_choice]
191
 
 
 
 
192
  display += generate_html(start_sentence, original_tree)
193
 
194
  display += """
@@ -213,6 +219,7 @@ def get_tables(input_text, number_steps, number_beams):
213
  temperature=1.0,
214
  do_sample=True,
215
  )
 
216
 
217
  tables = display_tree(
218
  input_text,
@@ -221,7 +228,8 @@ def get_tables(input_text, number_steps, number_beams):
221
  outputs.beam_indices[:, : -len(inputs)],
222
  )
223
  return tables
224
-
 
225
  with gr.Blocks(
226
  theme=gr.themes.Soft(
227
  text_size="lg", font=["monospace"], primary_hue=gr.themes.colors.green
 
1
  import torch
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import numpy as np
4
  import gradio as gr
5
+ import spaces
6
 
7
  print(f"Is CUDA available: {torch.cuda.is_available()}")
8
  # True
 
122
  tokenizer.pad_token_id = tokenizer.eos_token_id
123
  print("Loading finished.")
124
 
125
+
126
  def generate_html(token, node):
127
  """Recursively generate HTML for the tree."""
128
 
 
134
  for token, subnode in node["children"].items():
135
  html_content += generate_html(token, subnode)
136
  html_content += "</ul>"
 
137
  html_content += "</li>"
 
138
  return html_content
139
 
140
 
141
+ def generate_markdown_table(scores, sequence_prob, top_k=4, chosen_tokens=None):
142
  markdown_table = """
143
  <table>
144
  <tr>
145
  <th><b>Token</b></th>
146
+ <th><b>Step score</b></th>
147
+ <th><b>Cumulative score</b></th>
148
  </tr>"""
149
  for token_idx in np.argsort(scores)[-top_k:]:
150
  token = tokenizer.decode([token_idx])
 
155
  <tr style={style}>
156
  <td>{token}</td>
157
  <td>{scores[token_idx]:.4f}</td>
158
+ <td>{scores[token_idx] + sequence_prob:.4f}</td>
159
  </tr>"""
160
  markdown_table += """
161
  </table>"""
 
170
  print(tokenizer.batch_decode(sequences))
171
  original_tree = {"table": None, "children": {}}
172
  for sequence_ix in range(len(sequences)):
173
+ current_sequence_score = 0
174
  current_tree = original_tree
175
  for step, step_scores in enumerate(scores):
176
+ current_token_choice_ix = sequences[sequence_ix, step]
177
+ current_token_choice = tokenizer.decode([current_token_choice_ix])
178
  current_beam = beam_indices[sequence_ix, step]
179
 
180
  if current_token_choice not in current_tree["children"]:
 
185
 
186
  # Rewrite the probs table even if it was there before, since new chosen nodes have appeared in the children of current tree
187
  markdown_table = generate_markdown_table(
188
+ step_scores[current_beam, :], current_sequence_score,
189
  chosen_tokens=current_tree["children"].keys(),
190
  )
191
  current_tree["table"] = markdown_table
192
 
193
  current_tree = current_tree["children"][current_token_choice]
194
 
195
+ # Keep up to date the current cumulative score
196
+ current_sequence_score += step_scores[current_beam, current_token_choice_ix]
197
+
198
  display += generate_html(start_sentence, original_tree)
199
 
200
  display += """
 
219
  temperature=1.0,
220
  do_sample=True,
221
  )
222
+ print(outputs.sequences_scores)
223
 
224
  tables = display_tree(
225
  input_text,
 
228
  outputs.beam_indices[:, : -len(inputs)],
229
  )
230
  return tables
231
+
232
+
233
  with gr.Blocks(
234
  theme=gr.themes.Soft(
235
  text_size="lg", font=["monospace"], primary_hue=gr.themes.colors.green