{ | |
"layers_per_ipu": [0, 12, 4, 0], | |
"inference_layers_per_ipu": [0, 8, 8, 0], | |
"embedding_serialization_factor": 4, | |
"projection_serialization_factor": 4, | |
"matmul_proportion": 0.25, | |
"inference_matmul_proportion": 0.6, | |
"recompute_checkpoint_every_layer": true, | |
"optimizer_state_offchip": true, | |
"replicated_tensor_sharding": true, | |
"device_iterations": 10, | |
"gradient_accumulation_steps": 128, | |
"executable_cache_dir": "./exe_cache" | |
} |