model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda") | |
input_length = model_inputs.input_ids.shape[1] | |
generated_ids = model.generate(**model_inputs, max_new_tokens=20) | |
print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0]) | |
"I'm not a thug, but i can tell you that a human cannot eat" | |
Oh no, it did not follow our instruction to reply as a thug! |