newly_fine_tuned_bert / config.json
pingkeest's picture
Training in progress, epoch 37
9bf65f1 verified
{
"_name_or_path": "bert-base-uncased",
"architectures": [
"BertForSequenceClassification"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"id2label": {
"0": "1000",
"1": "1001",
"2": "1002",
"3": "1003",
"4": "1004",
"5": "1005",
"6": "1006",
"7": "1007",
"8": "1008",
"9": "1009",
"10": "1010",
"11": "1011",
"12": "1012",
"13": "1013",
"14": "1014",
"15": "1015",
"16": "1016",
"17": "1017",
"18": "1018",
"19": "1019",
"20": "1020",
"21": "1021",
"22": "1022",
"23": "1023",
"24": "1024",
"25": "1025",
"26": "1026",
"27": "1027",
"28": "1028",
"29": "1029",
"30": "1030",
"31": "1031",
"32": "1032",
"33": "1033",
"34": "1034",
"35": "1035",
"36": "1036",
"37": "1037",
"38": "1038",
"39": "1039",
"40": "1041",
"41": "1042"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"1000": 0,
"1001": 1,
"1002": 2,
"1003": 3,
"1004": 4,
"1005": 5,
"1006": 6,
"1007": 7,
"1008": 8,
"1009": 9,
"1010": 10,
"1011": 11,
"1012": 12,
"1013": 13,
"1014": 14,
"1015": 15,
"1016": 16,
"1017": 17,
"1018": 18,
"1019": 19,
"1020": 20,
"1021": 21,
"1022": 22,
"1023": 23,
"1024": 24,
"1025": 25,
"1026": 26,
"1027": 27,
"1028": 28,
"1029": 29,
"1030": 30,
"1031": 31,
"1032": 32,
"1033": 33,
"1034": 34,
"1035": 35,
"1036": 36,
"1037": 37,
"1038": 38,
"1039": 39,
"1041": 40,
"1042": 41
},
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"problem_type": "multi_label_classification",
"torch_dtype": "float32",
"transformers_version": "4.45.2",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 30522
}