Update README.md
Browse files
README.md
CHANGED
@@ -30,7 +30,10 @@ from transformers import AutoModel, AutoTokenizer
|
|
30 |
checkpoint = "codesage/codesage-large"
|
31 |
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
32 |
|
33 |
-
|
|
|
|
|
|
|
34 |
model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True).to(device)
|
35 |
|
36 |
inputs = tokenizer.encode("def print_hello_world():\tprint('Hello World!')", return_tensors="pt").to(device)
|
|
|
30 |
checkpoint = "codesage/codesage-large"
|
31 |
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
32 |
|
33 |
+
# Note: CodeSage requires adding eos token at the end of
|
34 |
+
# each tokenized sequence to ensure good performance
|
35 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True, add_eos_token=True)
|
36 |
+
|
37 |
model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True).to(device)
|
38 |
|
39 |
inputs = tokenizer.encode("def print_hello_world():\tprint('Hello World!')", return_tensors="pt").to(device)
|