model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda") input_length = model_inputs.input_ids.shape[1] generated_ids = model.generate(**model_inputs, max_new_tokens=20) print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0]) "I'm not a thug, but i can tell you that a human cannot eat" Oh no, it did not follow our instruction to reply as a thug!