{ "_name_or_path": "tarteel-ai/whisper-base-ar-quran", "activation_dropout": 0.0, "activation_function": "gelu", "apply_spec_augment": false, "architectures": [ "WhisperForAudioClassification" ], "attention_dropout": 0.0, "begin_suppress_tokens": [ 220, 50257 ], "bos_token_id": 50257, "classifier_proj_size": 256, "d_model": 512, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.0, "decoder_layers": 6, "decoder_start_token_id": 50258, "dropout": 0.0, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.0, "encoder_layers": 6, "eos_token_id": 50257, "forced_decoder_ids": null, "id2label": { "0": "01_A", "1": "02_Ba", "2": "03_Ta", "3": "04_Tsa", "4": "05_Ja", "5": "06_Hha", "6": "07_Kha", "7": "08_Da", "8": "09_Dza", "9": "10_Ro", "10": "11_Za", "11": "12_Sa", "12": "13_Sya", "13": "14_Sho", "14": "15_Dho", "15": "16_Tho", "16": "17_Zho", "17": "18_Ain", "18": "19_Gho", "19": "20_Fa", "20": "21_Qo", "21": "22_Ka", "22": "23_La", "23": "24_Ma", "24": "25_Na", "25": "26_Ha", "26": "27_Wa", "27": "28_Ya" }, "init_std": 0.02, "is_encoder_decoder": true, "label2id": { "01_A": 0, "02_Ba": 1, "03_Ta": 2, "04_Tsa": 3, "05_Ja": 4, "06_Hha": 5, "07_Kha": 6, "08_Da": 7, "09_Dza": 8, "10_Ro": 9, "11_Za": 10, "12_Sa": 11, "13_Sya": 12, "14_Sho": 13, "15_Dho": 14, "16_Tho": 15, "17_Zho": 16, "18_Ain": 17, "19_Gho": 18, "20_Fa": 19, "21_Qo": 20, "22_Ka": 21, "23_La": 22, "24_Ma": 23, "25_Na": 24, "26_Ha": 25, "27_Wa": 26, "28_Ya": 27 }, "mask_feature_length": 10, "mask_feature_min_masks": 0, "mask_feature_prob": 0.0, "mask_time_length": 10, "mask_time_min_masks": 2, "mask_time_prob": 0.05, "max_length": 1024, "max_source_positions": 1500, "max_target_positions": 448, "median_filter_width": 7, "model_type": "whisper", "num_hidden_layers": 6, "num_mel_bins": 80, "pad_token_id": 50257, "scale_embedding": false, "torch_dtype": "float32", "transformers_version": "4.46.2", "use_cache": false, "use_weighted_layer_sum": false, "vocab_size": 51865 }