model: _component_: torchtune.models.llama3_2_vision.llama3_2_vision_11b decoder_trainable: false encoder_trainable: true fusion_trainable: true image_size: 560 tokenizer: _component_: torchtune.models.llama3_2_vision.llama3_2_vision_transform path: /tmp/Llama-3.2-11B-Vision-Instruct/original/tokenizer.model image_size: 560 checkpointer: _component_: torchtune.training.FullModelMetaCheckpointer checkpoint_dir: /tmp/Llama-3.2-11B-Vision-Instruct/original/ checkpoint_files: - consolidated.pth recipe_checkpoint: null output_dir: /tmp/Llama-3.2-11B-Vision-Instruct/ model_type: LLAMA3_VISION resume_from_checkpoint: false dataset: _component_: data.chart_dataset source: jrc/data-viz-qa split: train seed: 42 shuffle: true collate_fn: torchtune.data.padded_collate_tiled_images_and_mask epochs: 2 max_steps_per_epoch: null batch_size: 8 gradient_accumulation_steps: 4 optimizer: _component_: torch.optim.AdamW lr: 2.0e-05 fused: true loss: _component_: torchtune.modules.loss.CEWithChunkedOutputLoss clip_grad_norm: 1.0 compile: false device: cuda enable_activation_checkpointing: true custom_sharded_layers: [] dtype: bf16 output_dir: /tmp/full-llama3.2-vision--finetune metric_logger: _component_: torchtune.training.metric_logging.WandBLogger project: plot-huh name: dataviz-qa-full log_every_n_steps: 1 log_peak_memory_stats: true