gomoku / LightZero /lzero /envs /tests /test_lightzero_env_wrapper.py
zjowowen's picture
init space
079c32c
raw
history blame
No virus
4.6 kB
import pytest
from ding.envs import DingEnvWrapper
from lzero.envs.wrappers import ActionDiscretizationEnvWrapper, LightZeroEnvWrapper
from easydict import EasyDict
import gym
import numpy as np
@pytest.mark.unittest
class TestLightZeroEnvWrapper:
def test_continuous_pendulum(self):
env_cfg = EasyDict(
dict(
env_name='Pendulum-v1',
manually_discretization=False,
continuous=True,
each_dim_disc_size=None,
is_train=True,
)
)
lightzero_env = DingEnvWrapper(
gym.make(env_cfg.env_name), cfg={'env_wrapper': [
lambda env: LightZeroEnvWrapper(env, env_cfg),
]}
)
obs = lightzero_env.reset()
print("obs: ", obs)
print(lightzero_env.observation_space, lightzero_env.action_space, lightzero_env.reward_space)
assert isinstance(obs, dict)
assert isinstance(obs['observation'], np.ndarray) and obs['observation'].shape == (3, )
assert obs['action_mask'] is None and obs['to_play'] == -1
action = lightzero_env.random_action()
print('random_action: {}, action_space: {}'.format(action.shape, lightzero_env.action_space))
def test_discretization_pendulum(self):
env_cfg = EasyDict(
dict(
env_name='Pendulum-v1',
manually_discretization=True,
continuous=False,
each_dim_disc_size=11,
is_train=True,
)
)
lightzero_env = DingEnvWrapper(
gym.make(env_cfg.env_name),
cfg={
'env_wrapper': [
lambda env: ActionDiscretizationEnvWrapper(env, env_cfg),
lambda env: LightZeroEnvWrapper(env, env_cfg),
]
}
)
obs = lightzero_env.reset()
print("obs: ", obs)
print(lightzero_env.observation_space, lightzero_env.action_space, lightzero_env.reward_space)
assert isinstance(obs, dict)
assert isinstance(obs['observation'], np.ndarray) and obs['observation'].shape == (3, )
assert obs['action_mask'].sum() == 11 and obs['to_play'] == -1
action = lightzero_env.random_action()
print('random_action: {}, action_space: {}'.format(action.shape, lightzero_env.action_space))
def test_continuous_bipedalwalker(self):
env_cfg = EasyDict(
dict(
env_name='BipedalWalker-v3',
manually_discretization=False,
continuous=True,
each_dim_disc_size=4,
is_train=True,
)
)
lightzero_env = DingEnvWrapper(
gym.make(env_cfg.env_name), cfg={'env_wrapper': [
lambda env: LightZeroEnvWrapper(env, env_cfg),
]}
)
obs = lightzero_env.reset()
print("obs: ", obs)
print(lightzero_env.observation_space, lightzero_env.action_space, lightzero_env.reward_space)
assert isinstance(obs, dict)
assert isinstance(obs['observation'], np.ndarray) and obs['observation'].shape == (24, )
assert obs['action_mask'] is None and obs['to_play'] == -1
action = lightzero_env.random_action()
print('random_action: {}, action_space: {}'.format(action.shape, lightzero_env.action_space))
def test_discretization_bipedalwalker(self):
env_cfg = EasyDict(
dict(
env_name='BipedalWalker-v3',
manually_discretization=True,
continuous=False,
each_dim_disc_size=4,
is_train=True,
)
)
lightzero_env = DingEnvWrapper(
gym.make(env_cfg.env_name),
cfg={
'env_wrapper': [
lambda env: ActionDiscretizationEnvWrapper(env, env_cfg),
lambda env: LightZeroEnvWrapper(env, env_cfg),
]
}
)
obs = lightzero_env.reset()
print("obs: ", obs)
print(lightzero_env.observation_space, lightzero_env.action_space, lightzero_env.reward_space)
assert isinstance(obs, dict)
assert isinstance(obs['observation'], np.ndarray) and obs['observation'].shape == (24, )
assert obs['action_mask'].sum() == 256 and obs['to_play'] == -1
action = lightzero_env.random_action()
print('random_action: {}, action_space: {}'.format(action.shape, lightzero_env.action_space))