Spaces:
Sleeping
Sleeping
Taranosaurus
commited on
Commit
β’
507d429
1
Parent(s):
7dae6b7
Changed "Unknown Token" token to "Token 0"
Browse filesSome models have the unknown (UNK) token at a non-zero index in the vocab
app.py
CHANGED
@@ -128,13 +128,13 @@ with gr.Blocks() as frontend:
|
|
128 |
gr.Markdown("### π² Tokenizer Data")
|
129 |
output_checkpoint = gr.Textbox(visible=False)
|
130 |
output_vocab_count = gr.Number(label="Vocab Size", interactive=False)
|
131 |
-
|
132 |
output_vocab = gr.Code(label="Vocabulary IDs")
|
133 |
|
134 |
-
input_checkpoint.change(fn=load_vocab, inputs=[input_checkpoint, output_checkpoint], outputs=[output_checkpoint, output_vocab_count,
|
135 |
btn_tokenize.click(fn=tokenize_er, inputs=[input_checkpoint, input_sequence], outputs=[token_id_pair], queue=True)
|
136 |
btn_random_seq.click(fn=randomize_sequence, inputs=[], outputs=[input_sequence])
|
137 |
btn_decode.click(fn=de_tokenize_er, inputs=[input_checkpoint, token_id_pair], outputs=[output_decoded_token_ids,output_decoded_tokens, output_decoded_ids], queue=True)
|
138 |
-
frontend.load(fn=load_vocab, inputs=[input_checkpoint, output_checkpoint], outputs=[output_checkpoint, output_vocab_count,
|
139 |
|
140 |
frontend.launch()
|
|
|
128 |
gr.Markdown("### π² Tokenizer Data")
|
129 |
output_checkpoint = gr.Textbox(visible=False)
|
130 |
output_vocab_count = gr.Number(label="Vocab Size", interactive=False)
|
131 |
+
output_token_zero = gr.Textbox(label="Token 0", interactive=False)
|
132 |
output_vocab = gr.Code(label="Vocabulary IDs")
|
133 |
|
134 |
+
input_checkpoint.change(fn=load_vocab, inputs=[input_checkpoint, output_checkpoint], outputs=[output_checkpoint, output_vocab_count, output_token_zero, output_vocab], queue=True)
|
135 |
btn_tokenize.click(fn=tokenize_er, inputs=[input_checkpoint, input_sequence], outputs=[token_id_pair], queue=True)
|
136 |
btn_random_seq.click(fn=randomize_sequence, inputs=[], outputs=[input_sequence])
|
137 |
btn_decode.click(fn=de_tokenize_er, inputs=[input_checkpoint, token_id_pair], outputs=[output_decoded_token_ids,output_decoded_tokens, output_decoded_ids], queue=True)
|
138 |
+
frontend.load(fn=load_vocab, inputs=[input_checkpoint, output_checkpoint], outputs=[output_checkpoint, output_vocab_count, output_token_zero, output_vocab], queue=True)
|
139 |
|
140 |
frontend.launch()
|