kaelee commited on Jul 14, 2023

Commit

59ad4fd

•

1 Parent(s): ae9c5d7

Upload folder using huggingface_hub

Browse files

Files changed (32) hide show

checkpoint-7200/config.json +61 -0
checkpoint-7200/generation_config.json +5 -0
checkpoint-7200/optimizer.pt +3 -0
checkpoint-7200/pytorch_model-00001-of-00003.bin +3 -0
checkpoint-7200/pytorch_model-00002-of-00003.bin +3 -0
checkpoint-7200/pytorch_model-00003-of-00003.bin +3 -0
checkpoint-7200/pytorch_model.bin.index.json +203 -0
checkpoint-7200/rng_state_0.pth +3 -0
checkpoint-7200/rng_state_1.pth +3 -0
checkpoint-7200/rng_state_2.pth +3 -0
checkpoint-7200/rng_state_3.pth +3 -0
checkpoint-7200/scheduler.pt +3 -0
checkpoint-7200/special_tokens_map.json +6 -0
checkpoint-7200/tokenizer.json +0 -0
checkpoint-7200/tokenizer_config.json +11 -0
checkpoint-7200/trainer_state.json +0 -0
checkpoint-7200/training_args.bin +3 -0
config.json +61 -0
generation_config.json +5 -0
mm_projector.bin +3 -0
mm_projector/checkpoint-2400.bin +3 -0
mm_projector/checkpoint-4800.bin +3 -0
mm_projector/checkpoint-7200.bin +3 -0
pytorch_model-00001-of-00003.bin +3 -0
pytorch_model-00002-of-00003.bin +3 -0
pytorch_model-00003-of-00003.bin +3 -0
pytorch_model.bin.index.json +203 -0
special_tokens_map.json +6 -0
tokenizer.json +0 -0
tokenizer_config.json +11 -0
trainer_state.json +0 -0
training_args.bin +3 -0

checkpoint-7200/config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "_name_or_path": "mosaicml/mpt-7b-instruct",
+  "architectures": [
+    "LlavaMPTForCausalLM"
+  ],
+  "attn_config": {
+    "alibi": true,
+    "alibi_bias_max": 8,
+    "attn_impl": "torch",
+    "attn_pdrop": 0,
+    "attn_type": "multihead_attention",
+    "attn_uses_sequence_id": false,
+    "clip_qkv": null,
+    "prefix_lm": false,
+    "qk_ln": false,
+    "softmax_scale": null
+  },
+  "auto_map": {
+    "AutoConfig": "configuration_mpt.MPTConfig",
+    "AutoModelForCausalLM": "modeling_mpt.MPTForCausalLM"
+  },
+  "d_model": 4096,
+  "emb_pdrop": 0,
+  "embedding_fraction": 1.0,
+  "expansion_ratio": 4,
+  "freeze_mm_mlp_adapter": false,
+  "init_config": {
+    "emb_init_std": null,
+    "emb_init_uniform_lim": null,
+    "fan_mode": "fan_in",
+    "init_div_is_residual": true,
+    "init_gain": 0,
+    "init_nonlinearity": "relu",
+    "init_std": 0.02,
+    "name": "kaiming_normal_",
+    "verbose": 0
+  },
+  "init_device": "cpu",
+  "learned_pos_emb": true,
+  "logit_scale": null,
+  "max_seq_len": 2048,
+  "mm_hidden_size": 1024,
+  "mm_use_im_start_end": true,
+  "mm_vision_select_layer": -2,
+  "mm_vision_tower": "openai/clip-vit-large-patch14",
+  "model_type": "llava_mpt",
+  "n_heads": 32,
+  "n_layers": 32,
+  "no_bias": true,
+  "norm_type": "low_precision_layernorm",
+  "resid_pdrop": 0,
+  "sep_image_conv_front": false,
+  "tokenizer_name": "EleutherAI/gpt-neox-20b",
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.0.dev0",
+  "tune_mm_mlp_adapter": true,
+  "use_cache": false,
+  "use_mm_proj": true,
+  "verbose": 0,
+  "vocab_size": 50280
+}

checkpoint-7200/generation_config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "_from_model_config": true,
+  "transformers_version": "4.28.0.dev0",
+  "use_cache": false
+}

checkpoint-7200/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f3407258eea544abd007cb4c421ba24ce5a85281c707aa32f5ab637e804b988
+size 33589607

checkpoint-7200/pytorch_model-00001-of-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fe14e74fcfb2b961887e122ba24c2d1405c3384b4763ac88d7ddf16744cdd27
+size 9951011493

checkpoint-7200/pytorch_model-00002-of-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51d38b05039d89b9e1d5ad6258d99b23f4aabaaad96e489324ccd20825479509
+size 9932531069

checkpoint-7200/pytorch_model-00003-of-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d73e4d688b9f33c953396480581f4b159a950e14765f3da451b121151fbd3589
+size 6727976839

checkpoint-7200/pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,203 @@

+{
+  "metadata": {
+    "total_size": 26611449856
+  },
+  "weight_map": {
+    "transformer.blocks.0.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.11.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.11.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.11.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.11.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.11.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.11.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.12.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.2.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.20.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.23.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.24.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.3.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.30.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.4.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.mm_projector.bias": "pytorch_model-00003-of-00003.bin",
+    "transformer.mm_projector.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.norm_f.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.wte.weight": "pytorch_model-00001-of-00003.bin"
+  }
+}

checkpoint-7200/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e75c96f06b249e57a701db73ce821398e69672027a86d3a44063830602a29ab4
+size 14583

checkpoint-7200/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dae7f45b6bac644ac207a61f43cba6d4b919a4cac22022bbb02907914422f5d
+size 14583

checkpoint-7200/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e53c770fe48635faad7fa341007d771781f1397cd47daab5b58f879ffb65f178
+size 14583

checkpoint-7200/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68692af1001e65d02e07ac9974ccf4c332cfb23bc8f89566e1a908b1f2c4a1ed
+size 14583

checkpoint-7200/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a129ca5ec20f4a6c3e5435cc01ced782bef0ef09fd6e6a8f82452e6dcb4f4662
+size 627

checkpoint-7200/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

checkpoint-7200/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-7200/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 2048,
+  "padding_side": "right",
+  "special_tokens_map_file": "/home/ubuntu/.cache/huggingface/hub/models--mosaicml--mpt-7b-instruct/snapshots/1fc4634127ec64a45716003578b9cfae23265849/special_tokens_map.json",
+  "tokenizer_class": "GPTNeoXTokenizer",
+  "unk_token": "<|endoftext|>"
+}

checkpoint-7200/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-7200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:499e95a18ec8111d7e7627b4f751606ef8f56f60a773b5003e79240171b023ff
+size 3963

config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "_name_or_path": "mosaicml/mpt-7b-instruct",
+  "architectures": [
+    "LlavaMPTForCausalLM"
+  ],
+  "attn_config": {
+    "alibi": true,
+    "alibi_bias_max": 8,
+    "attn_impl": "torch",
+    "attn_pdrop": 0,
+    "attn_type": "multihead_attention",
+    "attn_uses_sequence_id": false,
+    "clip_qkv": null,
+    "prefix_lm": false,
+    "qk_ln": false,
+    "softmax_scale": null
+  },
+  "auto_map": {
+    "AutoConfig": "configuration_mpt.MPTConfig",
+    "AutoModelForCausalLM": "modeling_mpt.MPTForCausalLM"
+  },
+  "d_model": 4096,
+  "emb_pdrop": 0,
+  "embedding_fraction": 1.0,
+  "expansion_ratio": 4,
+  "freeze_mm_mlp_adapter": false,
+  "init_config": {
+    "emb_init_std": null,
+    "emb_init_uniform_lim": null,
+    "fan_mode": "fan_in",
+    "init_div_is_residual": true,
+    "init_gain": 0,
+    "init_nonlinearity": "relu",
+    "init_std": 0.02,
+    "name": "kaiming_normal_",
+    "verbose": 0
+  },
+  "init_device": "cpu",
+  "learned_pos_emb": true,
+  "logit_scale": null,
+  "max_seq_len": 2048,
+  "mm_hidden_size": 1024,
+  "mm_use_im_start_end": true,
+  "mm_vision_select_layer": -2,
+  "mm_vision_tower": "openai/clip-vit-large-patch14",
+  "model_type": "llava_mpt",
+  "n_heads": 32,
+  "n_layers": 32,
+  "no_bias": true,
+  "norm_type": "low_precision_layernorm",
+  "resid_pdrop": 0,
+  "sep_image_conv_front": false,
+  "tokenizer_name": "EleutherAI/gpt-neox-20b",
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.0.dev0",
+  "tune_mm_mlp_adapter": true,
+  "use_cache": false,
+  "use_mm_proj": true,
+  "verbose": 0,
+  "vocab_size": 50280
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "_from_model_config": true,
+  "transformers_version": "4.28.0.dev0",
+  "use_cache": false
+}

mm_projector.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab8060530a37295ebc3f85fe92d8be3ab43cda30013f2b0701da12b527373c8f
+size 16794683

mm_projector/checkpoint-2400.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c78339d408c43e44fe3dd6e4a64faa4a61674a2c3a2036ed019c91b77073ccb
+size 16794695

mm_projector/checkpoint-4800.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15337be5f5cf33e7b55fa5d2f399237540d9bfa6870bae6b156a38206d92a756
+size 16794695

mm_projector/checkpoint-7200.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf33b04ba818e3ff23909914525748a47b2adfbc4c89a0f03f8945b3817342f4
+size 16794695

pytorch_model-00001-of-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee8a86b6c06ab73b5f2a40aa146dba1fe1c72f9c4bbd9cd074dda1e11d35fe8e
+size 9951010469

pytorch_model-00002-of-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddd640111d8792ff43f5667303350665a9f04a51143411fd0e881815d1a07497
+size 9932530109

pytorch_model-00003-of-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66ea039e9772bd94cae8b99c8cb58ee5fc6ddc2a916398713fa7132a74e54cc6
+size 6727976135

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,203 @@

+{
+  "metadata": {
+    "total_size": 26611449856
+  },
+  "weight_map": {
+    "transformer.blocks.0.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.0.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.1.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.10.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.11.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.11.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.11.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.11.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.11.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.11.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.12.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.12.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.13.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.14.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.15.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.16.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.17.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.18.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.19.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.2.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.2.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.20.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.20.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.21.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.ffn.down_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.22.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.attn.Wqkv.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.attn.out_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.23.ffn.up_proj.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.norm_1.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.23.norm_2.weight": "pytorch_model-00002-of-00003.bin",
+    "transformer.blocks.24.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.24.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.25.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.26.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.27.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.28.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.29.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.3.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.3.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.30.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.30.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.attn.Wqkv.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.attn.out_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.ffn.down_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.ffn.up_proj.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.norm_1.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.31.norm_2.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.blocks.4.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.4.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.5.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.6.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.7.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.8.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.attn.Wqkv.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.attn.out_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.ffn.down_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.ffn.up_proj.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.norm_1.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.blocks.9.norm_2.weight": "pytorch_model-00001-of-00003.bin",
+    "transformer.mm_projector.bias": "pytorch_model-00003-of-00003.bin",
+    "transformer.mm_projector.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.norm_f.weight": "pytorch_model-00003-of-00003.bin",
+    "transformer.wte.weight": "pytorch_model-00001-of-00003.bin"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 2048,
+  "padding_side": "right",
+  "special_tokens_map_file": "/home/ubuntu/.cache/huggingface/hub/models--mosaicml--mpt-7b-instruct/snapshots/1fc4634127ec64a45716003578b9cfae23265849/special_tokens_map.json",
+  "tokenizer_class": "GPTNeoXTokenizer",
+  "unk_token": "<|endoftext|>"
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:499e95a18ec8111d7e7627b4f751606ef8f56f60a773b5003e79240171b023ff
+size 3963