tokenisation / app.py
corasan's picture
Update app.py
9fc7882 verified
raw
history blame
835 Bytes
import gradio as gr
import sentencepiece as spm
examples = [
"Hello, world!",
"This is a test.",
"The Page title option has a text input box for changing the title of the image gallery page.",
"Algoritmos de diseño de árboles"]
def greet(sentence):
sp = spm.SentencePieceProcessor()
sp.load('bpe.model')
return "<div class='output'>" + "<span class='yellow'> • </span>".join(sp.encode_as_pieces(sentence)) + "</div>"
demo = gr.Interface(fn=greet, inputs="text", outputs="html",
examples=examples, title="SentencePiece BPE",
description="Demo for SentencePiece BPE.",
cache_examples="lazy",
concurrency_limit=30,
css=".output {font-size: 150%;}; .yellow {background-color: yellow; }")
demo.launch()