|
import gradio as gr |
|
from transformers import AutoTokenizer |
|
|
|
|
|
def tokenize_text(text, tokenizer_name): |
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) |
|
tokenized_text = tokenizer.tokenize(text) |
|
input_ids = tokenizer.convert_tokens_to_ids(tokenized_text) |
|
return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}" |
|
|
|
|
|
tokenizer_names = [ |
|
"riotu-lab/ArabianGPT-01B", |
|
"riotu-lab/ArabianGPT-03B", |
|
"riotu-lab/ArabianGPT-08B", |
|
|
|
] |
|
|
|
|
|
iface = gr.Interface( |
|
fn=tokenize_text, |
|
inputs=[ |
|
gr.Textbox(label="Enter Text"), |
|
gr.Dropdown(choices=tokenizer_names, label="Select Tokenizer"), |
|
], |
|
outputs="text", |
|
title="Hugging Face Tokenizer Demo", |
|
description="Try different tokenizers and see the tokenized form with input IDs.", |
|
) |
|
|
|
|
|
iface.launch() |