bh4 commited on
Commit
caa551c
1 Parent(s): bdc7077

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-sa-4.0
3
+ language:
4
+ - bn
5
+ ---
6
+ ## Description
7
+
8
+ **Biswabangla-335M-io** is a 335 million parameters open source instruction-tuned Generative pretrained Language Model for Bangla/Bengali.
9
+
10
+ Biswabangla is a monolingual Bangla/Bengali Generative Language model. The tokenizer of Biswabangla also works for Assamese language.
11
+
12
+ This is a pretrained model from scratch at a context size of 4096. Furthermore instruction-tuned on 1 million Bengali input-output pairs across various Bengali NLP tasks.
13
+
14
+ This model is instruction-tuned on 1 million Bangla instructions in the form of (input,output) pairs.
15
+
16
+
17
+ This model is strictly prohibited to use for commercial purposes.
18
+
19
+ If you use our model, please cite our paper [Niyogi and Bhattacharya, 2024](https://arxiv.org/abs/2401.18034)
20
+
21
+ The architecture of Biswabangla is different than the language models, mentioned in [Niyogi and Bhattacharya, 2024](https://arxiv.org/abs/2401.18034)
22
+
23
+ ### Model Architecture
24
+ Transformer Decoder Only Auto Regressive Model
25
+
26
+ ### Limitations
27
+ The model was trained on data that contains toxic language, unsafe content, and societal biases originally crawled from the internet.
28
+ Therefore, the model may amplify those biases and return toxic responses especially when prompted with toxic prompts.
29
+ The model may generate answers that may be inaccurate, omit key information, or include irrelevant or redundant text producing socially unacceptable or undesirable text, even if the prompt itself does not include anything explicitly offensive.
30
+
31
+ Gyan AI Research does own the output generated from the model.
32
+
33
+
34
+ ### Citations
35
+
36
+ ```
37
+ @misc{niyogi2024paramanufamilynovelefficient,
38
+ title={Paramanu: A Family of Novel Efficient Generative Foundation Language Models for Indian Languages},
39
+ author={Mitodru Niyogi and Arnab Bhattacharya},
40
+ year={2024},
41
+ eprint={2401.18034},
42
+ archivePrefix={arXiv},
43
+ primaryClass={cs.CL},
44
+ url={https://arxiv.org/abs/2401.18034},
45
+ }
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 16000
3
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./bangla-llm-335M-4k-16ktok-hf",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1280,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3416,
14
+ "max_position_embeddings": 4096,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 16,
19
+ "num_key_value_heads": 16,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 10000.0,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.44.2",
27
+ "use_cache": false,
28
+ "vocab_size": 16001
29
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_new_tokens": 256,
3
+ "transformers_version": "4.44.2"
4
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b8dd586d77870c5ef962cbbd98fc2936cafe40c362cd221a5b28fc977e7235
3
+ size 1341057192
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3942fb795996a4828a716d2522195fbb10087d6cb2c7cd8094100484fe7c0791
3
+ size 2682204474
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eedd1acfe54839f44457898e48b54cde571e685f3d35289b19bfa5078cf1b13
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f79e8fde30a634034139b8848693edbec43a2e4776eb32358ce3a6136e49e82
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:633f27df1cb3d08190fef3fcc56dfeda6e1b0277de36e52c3ea1b954ffc59a68
3
+ size 399658
tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "16000": {
31
+ "content": "[PAD]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<s>",
40
+ "clean_up_tokenization_spaces": false,
41
+ "eos_token": "</s>",
42
+ "legacy": true,
43
+ "model_max_length": 1000000000000000019884624838656,
44
+ "pad_token": "[PAD]",
45
+ "padding_side": "right",
46
+ "sp_model_kwargs": {},
47
+ "spaces_between_special_tokens": false,
48
+ "tokenizer_class": "LlamaTokenizer",
49
+ "unk_token": "<unk>",
50
+ "use_default_system_prompt": false,
51
+ "use_fast": true
52
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab0fd569d92ac31cc4445bb433583497d6bed841c9d25a942331785b6a31a034
3
+ size 7160