crystal-technologies
/

CRYSTAL-R1

Model card Files Files and versions Community

CRYSTAL-R1 / Perceptrix /finetune /crystal.yaml

crystal-technologies's picture

crystal-technologies

Upload 2711 files

6e73cd3 about 1 year ago

history blame contribute delete

2.36 kB

	tokenizer_name: eluzhnica/mpt-7b-instruct-peft-compatible # Change to 30b when training is working
	max_seq_len: 8192
	global_seed: 17

	model:
	name: hf_causal_lm
	pretrained: true
	pretrained_model_name_or_path: eluzhnica/mpt-7b-instruct-peft-compatible # Change to 30b when training is working
	init_device: meta
	config_overrides:
	max_seq_len: ${max_seq_len}
	attn_config:
	attn_uses_sequence_id: false

	tokenizer:
	name: ${tokenizer_name}
	kwargs:
	model_max_length: ${max_seq_len}

	train_loader:
	name: finetuning
	dataset:
	hf_name: json
	hf_kwargs:
	data_dir: finetune-data
	split: train
	max_seq_len: ${max_seq_len}
	allow_pad_trimming: false
	decoder_only_format: true
	packing_ratio: 9
	shuffle: true
	drop_last: true
	num_workers: 8
	pin_memory: false
	prefetch_factor: 2
	persistent_workers: true
	timeout: 0


	####### This part is copy pasted from CPU example. Change back with other examples
	scheduler:
	name: cosine_with_warmup
	t_warmup: 100ba
	alpha_f: 0.1

	optimizer:
	name: decoupled_adamw
	lr: 6.0e-4
	betas:
	- 0.9
	- 0.95
	eps: 1.0e-08
	weight_decay: 0.0

	algorithms:
	gradient_clipping:
	clipping_type: norm
	clipping_threshold: 1.0

	############

	max_duration: 1ep # Change to different values. I changed to 1ep because CPU example said so. Default was 8ep
	eval_interval: 1 # Same here. But default was 1ep
	eval_first: false # Same here again. Default was true
	global_train_batch_size: 2 # Increase when training samples are complete

	seed: ${global_seed}
	device_eval_batch_size: 4
	device_train_microbatch_size: 1
	precision: fp32 # Change to amp_bf16 when on a supported GPU. Use fp32 of CPU

	fsdp_config:
	sharding_strategy: FULL_SHARD
	mixed_precision: PURE
	activation_checkpointing: true
	activation_checkpointing_reentrant: false
	activation_cpu_offload: false
	limit_all_gathers: true
	sync_module_states: true
	verbose: false

	progress_bar: true
	log_to_console: true
	console_log_interval: 20ba

	callbacks:
	speed_monitor:
	window_size: 10
	runtime_estimator: {}
	lr_monitor: {}

	# loggers:
	# wandb: {}

	# save_folder:
	# save_interval: 3ep
	# save_num_checkpoints_to_keep: 1

	# need to use converted checkpoint with llm-foundry code
	# load_path:
	autoresume: false
	load_weights_only: false
	python_log_level: debug


	icl_max_seq_len: 2048