{
    "dataset_reader": {
        "type": "snli",
        "token_indexers": {
            "elmo": {
                "type": "elmo_characters"
            }
        },
        "tokenizer": {
            "end_tokens": [
                "@@NULL@@"
            ]
        }
    },
    "train_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/snli/snli_1.0_train.jsonl",
    "validation_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/snli/snli_1.0_dev.jsonl",
    "model": {
        "type": "decomposable_attention",
        "text_field_embedder": {
          "token_embedders": {
            "elmo": {
                "type": "elmo_token_embedder",
                "do_layer_norm": false,
                "dropout": 0.2
            }
          }
        },
        "attend_feedforward": {
            "input_dim": 1024,
            "num_layers": 2,
            "hidden_dims": 200,
            "activations": "relu",
            "dropout": 0.2
        },
        "matrix_attention": {
          "type": "dot_product"
        },
        "compare_feedforward": {
            "input_dim": 2048,
            "num_layers": 2,
            "hidden_dims": 200,
            "activations": "relu",
            "dropout": 0.2
        },
        "aggregate_feedforward": {
            "input_dim": 400,
            "num_layers": 2,
            "hidden_dims": [
                200,
                3
            ],
            "activations": [
                "relu",
                "linear"
            ],
            "dropout": [
                0.2,
                0.0
            ]
        },
        "initializer": {
          "regexes": [
            [
                ".*linear_layers.*weight",
                {
                    "type": "xavier_normal"
                }
            ],
            [
                ".*token_embedder_tokens\\._projection.*weight",
                {
                    "type": "xavier_normal"
                }
            ]
          ]
        }
    },
    "iterator": {
        "type": "bucket",
        "sorting_keys": [
            [
                "premise",
                "num_tokens"
            ],
            [
                "hypothesis",
                "num_tokens"
            ]
        ],
        "batch_size": 64
    },
    "trainer": {
        "num_epochs": 140,
        "patience": 20,
        "cuda_device": 0,
        "grad_clipping": 5.0,
        "validation_metric": "+accuracy",
        "optimizer": {
            "type": "adagrad"
        }
    }
}