aapot
commited on
Commit
•
c1f8001
1
Parent(s):
0394e28
Fix
Browse files
EasyLM/models/llama/llama_model.py
CHANGED
@@ -343,7 +343,9 @@ class LLaMAConfig(PretrainedConfig):
|
|
343 |
@classmethod
|
344 |
def get_tokenizer(cls, config, padding_side='left', truncation_side='right'):
|
345 |
config = cls.get_tokenizer_config(config)
|
346 |
-
|
|
|
|
|
347 |
if config.pretrained_model_name_or_path != '':
|
348 |
tokenizer = AutoTokenizer.from_pretrained(
|
349 |
config.pretrained_model_name_or_path,
|
|
|
343 |
@classmethod
|
344 |
def get_tokenizer(cls, config, padding_side='left', truncation_side='right'):
|
345 |
config = cls.get_tokenizer_config(config)
|
346 |
+
if config.vocab_file == '':
|
347 |
+
assert config.pretrained_model_name_or_path != '', 'vocab_file or pretrained_model_name_or_path must be specified'
|
348 |
+
|
349 |
if config.pretrained_model_name_or_path != '':
|
350 |
tokenizer = AutoTokenizer.from_pretrained(
|
351 |
config.pretrained_model_name_or_path,
|
pretrain_llama_7b.sh
CHANGED
@@ -8,7 +8,7 @@ export LIBTPU_INIT_ARGS='--xla_jf_spmd_threshold_for_windowed_einsum_mib=0 --xla
|
|
8 |
|
9 |
|
10 |
python3 -m EasyLM.models.llama.llama_train \
|
11 |
-
--initialize_jax_distributed=True \
|
12 |
--mesh_dim='1,-1,4' \
|
13 |
--dtype='bf16' \
|
14 |
--total_steps=1000000 \
|
@@ -20,7 +20,7 @@ python3 -m EasyLM.models.llama.llama_train \
|
|
20 |
--update_llama_config='' \
|
21 |
--load_dataset_state='' \
|
22 |
--load_checkpoint='' \
|
23 |
-
--tokenizer.
|
24 |
--optimizer.type='lion' \
|
25 |
--optimizer.lion_optimizer.weight_decay=1.0 \
|
26 |
--optimizer.lion_optimizer.lr=3e-5 \
|
|
|
8 |
|
9 |
|
10 |
python3 -m EasyLM.models.llama.llama_train \
|
11 |
+
--jax_distributed.initialize_jax_distributed=True \
|
12 |
--mesh_dim='1,-1,4' \
|
13 |
--dtype='bf16' \
|
14 |
--total_steps=1000000 \
|
|
|
20 |
--update_llama_config='' \
|
21 |
--load_dataset_state='' \
|
22 |
--load_checkpoint='' \
|
23 |
+
--tokenizer.pretrained_model_name_or_path='./' \
|
24 |
--optimizer.type='lion' \
|
25 |
--optimizer.lion_optimizer.weight_decay=1.0 \
|
26 |
--optimizer.lion_optimizer.lr=3e-5 \
|