Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

README.md +45 -0
added_tokens.json +3 -0
config.json +29 -0
generation_config.json +4 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +30 -0
tokenizer.model +3 -0
tokenizer_config.json +52 -0
trainer_state.json +0 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,45 @@

+---
+license: cc-by-nc-sa-4.0
+language:
+- bn
+---
+## Description
+**Biswabangla-335M-io** is a 335 million parameters open source instruction-tuned Generative pretrained Language Model for Bangla/Bengali.
+Biswabangla is a monolingual Bangla/Bengali Generative Language model. The tokenizer of Biswabangla also works for Assamese language.
+This is a pretrained model from scratch at a context size of 4096. Furthermore instruction-tuned on 1 million Bengali input-output pairs across various Bengali NLP tasks.
+This model is instruction-tuned on 1 million Bangla instructions in the form of (input,output) pairs.
+This model is strictly prohibited to use for commercial purposes.
+If you use our model, please cite our paper [Niyogi and Bhattacharya, 2024](https://arxiv.org/abs/2401.18034)
+The architecture of Biswabangla is different than the language models, mentioned in [Niyogi and Bhattacharya, 2024](https://arxiv.org/abs/2401.18034)
+### Model Architecture
+Transformer Decoder Only Auto Regressive Model
+### Limitations
+The model was trained on data that contains toxic language, unsafe content, and societal biases originally crawled from the internet.
+Therefore, the model may amplify those biases and return toxic responses especially when prompted with toxic prompts.
+The model may generate answers that may be inaccurate, omit key information, or include irrelevant or redundant text producing socially unacceptable or undesirable text, even if the prompt itself does not include anything explicitly offensive.
+Gyan AI Research does own the output generated from the model.
+### Citations
+```
+@misc{niyogi2024paramanufamilynovelefficient,
+      title={Paramanu: A Family of Novel Efficient Generative Foundation Language Models for Indian Languages},
+      author={Mitodru Niyogi and Arnab Bhattacharya},
+      year={2024},
+      eprint={2401.18034},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2401.18034},
+}

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 16000
+}

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "./bangla-llm-335M-4k-16ktok-hf",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 3416,
+  "max_position_embeddings": 4096,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 16,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "use_cache": false,
+  "vocab_size": 16001
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_new_tokens": 256,
+  "transformers_version": "4.44.2"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89b8dd586d77870c5ef962cbbd98fc2936cafe40c362cd221a5b28fc977e7235
+size 1341057192

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3942fb795996a4828a716d2522195fbb10087d6cb2c7cd8094100484fe7c0791
+size 2682204474

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9eedd1acfe54839f44457898e48b54cde571e685f3d35289b19bfa5078cf1b13
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f79e8fde30a634034139b8848693edbec43a2e4776eb32358ce3a6136e49e82
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:633f27df1cb3d08190fef3fcc56dfeda6e1b0277de36e52c3ea1b954ffc59a68
+size 399658

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16000": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false,
+  "use_fast": true
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab0fd569d92ac31cc4445bb433583497d6bed841c9d25a942331785b6a31a034
+size 7160