model_params: model_name: "Transformer" past_values: - "Target_Y" past_observed_mask: - "Target_Y" future_observed_mask: - "Target_Y" future_values: - "Target_Y" time_index: - "Time_Index" past_time_features: - Feature_1 - Feature_2 - Feature_3 - Feature_4 - Feature_5 - Feature_6 - Feature_7 - Feature_8 - Feature_9 - Feature_10 - Feature_11 - Feature_12 - Feature_13 - Feature_14 - Feature_15 - Feature_16 - Feature_17 - Feature_18 - Feature_19 - Feature_20 - Feature_21 - Feature_22 - Feature_23 - Feature_24 - Feature_25 - Feature_26 - Feature_27 - Feature_28 - Feature_29 - Feature_30 - Feature_31 - Feature_32 - Feature_33 - Feature_34 - Feature_35 - Feature_36 - Feature_37 - Feature_38 - Feature_39 - Feature_40 - Feature_41 - Feature_42 - Feature_43 - Feature_44 - Feature_45 - Feature_46 - Feature_47 - Feature_48 - Feature_49 - Feature_50 - Feature_51 - Feature_52 - Feature_53 - Feature_54 - Feature_55 - Feature_56 future_time_features: - Feature_1 - Feature_2 - Feature_3 - Feature_4 - Feature_5 - Feature_6 - Feature_7 - Feature_8 - Feature_9 - Feature_10 - Feature_11 - Feature_12 - Feature_13 - Feature_14 - Feature_15 - Feature_16 - Feature_17 - Feature_18 - Feature_19 - Feature_20 - Feature_21 - Feature_22 - Feature_23 - Feature_24 - Feature_25 - Feature_26 - Feature_27 - Feature_28 - Feature_29 - Feature_30 - Feature_31 - Feature_32 - Feature_33 - Feature_34 - Feature_35 - Feature_36 - Feature_37 - Feature_38 - Feature_39 - Feature_40 - Feature_41 - Feature_42 - Feature_43 - Feature_44 - Feature_45 - Feature_46 - Feature_47 - Feature_48 - Feature_49 - Feature_50 - Feature_51 - Feature_52 - Feature_53 - Feature_54 - Feature_55 - Feature_56 static_categorical_features: #- "Static_Categorical" static_real_features: List_to_AddTimeFeatures: hour: true dayofweek: true day: true month: true quarter: true dayofmonth: true weekofyear: true is_holiday: false ConvertTimeFeature: ListTimeTransforme: - cos - sin - identity cardinality: 1000 embedding_dimension: - 256 encoder_layers: 8 decoder_layers: 8 d_model: 64 n_heads: 64 predict_length: 24 context_length: 168 stride : 1 distribution_output: "student_t" scaling: "mean" use_cache : false lags_sequence: - 0 rate_mask: 0.0 num_parallel_samples : 100 encoder_ffn_dim: 256 decoder_ffn_dim: 256 encoder_attention_heads: 128 decoder_attention_heads: 128 is_encoder_decoder: True activation_function: "grelu" dropout: 0.2 encoder_layerdrop: 0.2 decoder_layerdrop: 0.2 attention_dropout: 0.2 activation_dropout: 0.1 init_std: 0.02 optimizing_prams: learning_rate: 0.001 optimizer: "AdamW" beta_1: 0.9 beta_2: 0.95 weight_decay: 0.01 batch_size: 32 epoch_max: 200 precision: "64-true" gradient_clip_val: 0.8 experiment: name : "Transformer" log_model : "all" experiment : "0" monitor: 'val_loss' min_delta : 0.000007 patience_train : 200 verbose : True checkpoint: