tokenisation / app.py
Constantin Orasan
Trying to improve the display
7328020
raw
history blame
734 Bytes
import gradio as gr
import sentencepiece as spm
examples = [
"Hello, world!",
"This is a test.",
"The Page title option has a text input box for changing the title of the image gallery page.",
"Algoritmos de diseño de árboles"]
def greet(sentence):
sp = spm.SentencePieceProcessor()
sp.load('bpe.model')
return " • ".join(sp.encode_as_pieces(sentence))
demo = gr.Interface(fn=greet, inputs="text", outputs="text",
examples=examples, title="SentencePiece BPE",
description="Demo for SentencePiece BPE.",
cache_examples="lazy",
concurrency_limit=30,
css="textarea {font-size: 150%;}")
demo.launch()