{ "activation_dropout": 0.1, "activation_function": "gelu", "architectures": [ "TimeSeriesTransformerForPrediction" ], "attention_dropout": 0.1, "cardinality": [ 0 ], "context_length": 336, "d_model": 512, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.1, "decoder_layers": 2, "distribution_output": "student_t", "dropout": 0.1, "embedding_dimension": [ 0 ], "encoder_attention_heads": 16, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.1, "encoder_layers": 4, "feature_size": 489, "init_std": 0.02, "input_size": 22, "is_encoder_decoder": true, "lags_sequence": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 ], "loss": "nll", "model_type": "time_series_transformer", "num_dynamic_real_features": 0, "num_parallel_samples": 100, "num_static_categorical_features": 0, "num_static_real_features": 0, "num_time_features": 5, "prediction_length": 96, "scaling": "mean", "torch_dtype": "float32", "transformers_version": "4.37.2", "use_cache": true }