Kalemat / app.py
Omartificial-Intelligence-Space's picture
Upload files
9715926 verified
raw
history blame
940 Bytes
import gradio as gr
from transformers import AutoTokenizer
# Define a function to tokenize text with a selected tokenizer
def tokenize_text(text, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
tokenized_text = tokenizer.tokenize(text)
input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}"
# Define available tokenizers
tokenizer_names = [
"riotu-lab/ArabianGPT-01B",
"riotu-lab/ArabianGPT-03B",
"riotu-lab/ArabianGPT-08B",
# Add more tokenizers here
]
# Create the Gradio interface
iface = gr.Interface(
fn=tokenize_text,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Dropdown(choices=tokenizer_names, label="Select Tokenizer"),
],
outputs="text",
title="Hugging Face Tokenizer Demo",
description="Try different tokenizers and see the tokenized form with input IDs.",
)
# Launch the app
iface.launch()