import pytest
import numpy as np
import gym
from easydict import EasyDict
import atari_py

from dizoo.atari.envs import AtariEnv, AtariEnvMR


@pytest.mark.envtest
class TestAtariEnv:

    def test_pong(self):
        cfg = {'env_id': 'PongNoFrameskip-v4', 'frame_stack': 4, 'is_train': True}
        cfg = EasyDict(cfg)
        pong_env = AtariEnv(cfg)
        pong_env.seed(0)
        obs = pong_env.reset()
        assert obs.shape == (cfg.frame_stack, 84, 84)
        act_dim = pong_env.action_space.n
        i = 0
        while True:
            # Both ``env.random_action()``, and utilizing ``np.random`` as well as action space,
            # can generate legal random action.
            if i < 10:
                random_action = np.random.choice(range(act_dim), size=(1, ))
                i += 1
            else:
                random_action = pong_env.random_action()
            timestep = pong_env.step(random_action)
            assert timestep.obs.shape == (cfg.frame_stack, 84, 84)
            assert timestep.reward.shape == (1, )
            if timestep.done:
                assert 'eval_episode_return' in timestep.info, timestep.info
                break
        print(pong_env.observation_space, pong_env.action_space, pong_env.reward_space)
        print('eval_episode_return: {}'.format(timestep.info['eval_episode_return']))
        pong_env.close()

    def test_montezuma_revenge(self):
        cfg = {'env_id': 'MontezumaRevengeDeterministic-v4', 'frame_stack': 4, 'is_train': True}
        cfg = EasyDict(cfg)
        mr_env = AtariEnvMR(cfg)
        mr_env.seed(0)
        obs = mr_env.reset()
        assert obs.shape == (cfg.frame_stack, 84, 84)
        act_dim = mr_env.action_space.n
        i = 0
        while True:
            if i < 10:
                random_action = np.random.choice(range(act_dim), size=(1, ))
                i += 1
            else:
                random_action = mr_env.random_action()
            timestep = mr_env.step(random_action)
            assert timestep.obs.shape == (cfg.frame_stack, 84, 84)
            assert timestep.reward.shape == (1, )
            if timestep.done:
                assert 'eval_episode_return' in timestep.info, timestep.info
                break
        print(mr_env.observation_space, mr_env.action_space, mr_env.reward_space)
        print('eval_episode_return: {}'.format(timestep.info['eval_episode_return']))
        mr_env.close()