Update chat tempalte to allow the use of the assistant_mask
#45
by
Butanium
- opened
Using return_assistant_tokens_mask=True allows to get a maksk of which token are from the assistant. Useful for finetuning on chat data
tokens = tokenizer.apply_chat_template(
chat,
tokenize=True,
return_assistant_tokens_mask=True,
return_dict=True,
chat_template=better_template,
)
highlighted_tokens = [
(
f"<span style='color: red; border: 1px solid red; padding: 2px;'>{token.replace('<', '<').replace('>', '>')}</span>"
if mask
else token.replace("<", "<").replace(">", ">")
)
for token, mask in zip(
tokenizer.convert_ids_to_tokens(tokens["input_ids"]), tokens["assistant_masks"]
)
]
md = "".join(highlighted_tokens)
from IPython.display import display, HTML
display(HTML(md))