- Training Parameters: | |
- base_model: TinyLlama/TinyLlama_v1.1 | |
- save_ckpt_log_name: tinyllama_prune_log | |
- pruning_ratio: 0.39 | |
- pruner_type: taylor | |
- temperature: 1.0 | |
- top_p: 0.95 | |
- max_seq_len: 2048 | |
- channel_wise: False | |
- block_wise: True | |
- layer_wise: False | |
- layer: 12 | |
- block_attention_layer_start: 3 | |
- block_attention_layer_end: 19 | |
- block_mlp_layer_start: 3 | |
- block_mlp_layer_end: 19 | |
- iterative_steps: 1 | |
- grouping_strategy: sum | |
- global_pruning: False | |
- taylor: param_first | |
- num_examples: 10 | |
- device: cuda | |
- test_before_train: True | |
- eval_device: cuda | |
- test_after_train: True | |
- seed: 42 | |
- save_model: True | |
- torch_version: 2.4 | |