m-ric HF staff commited on
Commit
aaaec2a
β€’
1 Parent(s): a9a7993

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -300,8 +300,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
300
  token_scores,
301
  ) = ([], [], [], [], [], [])
302
 
303
- score_idx = 0
304
- for beam_ix in range(len(beam_trees)):
305
  current_beam = beam_trees[beam_ix]
306
 
307
  # skip if the beam is already final
@@ -310,18 +309,17 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
310
 
311
  # Get top cumulative scores for the current beam
312
  current_top_token_indexes = list(
313
- np.array(scores[step][score_idx].argsort()[-n_beams:])[::-1]
314
  )
315
  top_token_indexes += current_top_token_indexes
316
- token_scores += list(np.array(scores[step][score_idx][current_top_token_indexes]))
317
  top_cumulative_scores += list(
318
- np.array(scores[step][score_idx][current_top_token_indexes])
319
  + current_beam.cumulative_score
320
  )
321
  beam_indexes += [beam_ix] * n_beams
322
  current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
323
  top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
324
- score_idx += 1
325
 
326
  top_df = pd.DataFrame.from_dict(
327
  {
@@ -358,7 +356,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
358
  break
359
  top_df_selected_filtered = top_df_selected.iloc[:beams_to_keep]
360
 
361
- # Write the scores table - one per beam source
362
  score_idx = 0
363
  for beam_ix in range(len(beam_trees)):
364
  current_beam = beam_trees[beam_ix]
 
300
  token_scores,
301
  ) = ([], [], [], [], [], [])
302
 
303
+ for beam_ix in range(len(beam_trees)): # not range(n_beams) since there might be more ongoing trees.
 
304
  current_beam = beam_trees[beam_ix]
305
 
306
  # skip if the beam is already final
 
309
 
310
  # Get top cumulative scores for the current beam
311
  current_top_token_indexes = list(
312
+ np.array(scores[step][beam_ix].argsort()[-n_beams:])[::-1]
313
  )
314
  top_token_indexes += current_top_token_indexes
315
+ token_scores += list(np.array(scores[step][beam_ix][current_top_token_indexes]))
316
  top_cumulative_scores += list(
317
+ np.array(scores[step][beam_ix][current_top_token_indexes])
318
  + current_beam.cumulative_score
319
  )
320
  beam_indexes += [beam_ix] * n_beams
321
  current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
322
  top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
 
323
 
324
  top_df = pd.DataFrame.from_dict(
325
  {
 
356
  break
357
  top_df_selected_filtered = top_df_selected.iloc[:beams_to_keep]
358
 
359
+ # Write the scores table in each beam tree
360
  score_idx = 0
361
  for beam_ix in range(len(beam_trees)):
362
  current_beam = beam_trees[beam_ix]