Update README.md
Browse files
README.md
CHANGED
@@ -22,11 +22,29 @@ and first released at [this page](https://openai.com/blog/better-language-models
|
|
22 |
# How to use the model
|
23 |
|
24 |
~~~~
|
25 |
-
|
|
|
26 |
|
27 |
-
tokenizer = GPT2Tokenizer.from_pretrained(
|
|
|
28 |
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
~~~~
|
31 |
|
32 |
# Model architecture
|
|
|
22 |
# How to use the model
|
23 |
|
24 |
~~~~
|
25 |
+
import torch
|
26 |
+
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
27 |
|
28 |
+
tokenizer = GPT2Tokenizer.from_pretrained('NlpHUST/gpt2-vietnamese')
|
29 |
+
model = GPT2LMHeadModel.from_pretrained('NlpHUST/gpt2-vietnamese')
|
30 |
|
31 |
+
text = "Albert Einstein là nhà vật lý học tạo ra thuyết lượng tử"
|
32 |
+
input_ids = tokenizer.encode(text, return_tensors='pt')
|
33 |
+
max_length = 100
|
34 |
+
|
35 |
+
sample_outputs = model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,
|
36 |
+
do_sample=True,
|
37 |
+
max_length=max_length,
|
38 |
+
min_length=max_length,
|
39 |
+
top_k=40,
|
40 |
+
num_beams=5,
|
41 |
+
early_stopping=True,
|
42 |
+
no_repeat_ngram_size=2,
|
43 |
+
num_return_sequences=3)
|
44 |
+
|
45 |
+
for i, sample_output in enumerate(sample_outputs):
|
46 |
+
print(">> Generated text {}\n\n{}".format(i+1, tokenizer.decode(sample_output.tolist())))
|
47 |
+
print('\n---')
|
48 |
~~~~
|
49 |
|
50 |
# Model architecture
|