File size: 1,462 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import pytest
import numpy as np
from easydict import EasyDict
from ding.utils import set_pkg_seed
from dizoo.evogym.envs import EvoGymEnv
@pytest.mark.envtest
@pytest.mark.parametrize('robot', ['speed_bot', 'random'])
def test_evogym_env_eval_episode_return(robot):
set_pkg_seed(1234, use_cuda=False)
env = EvoGymEnv(EasyDict({'env_id': 'Walker-v0', 'robot': robot, 'robot_dir': '../'}))
env.seed(1234)
env.reset()
action_dim = env.action_space.shape
eval_episode_return = np.array([0.], dtype=np.float32)
if robot == 'speed_bot':
assert env.observation_space.shape == (58, )
assert action_dim == (10, )
while True:
action = np.random.random(size=action_dim)
timestep = env.step(action)
eval_episode_return += timestep.reward
print("{}(dtype: {})".format(timestep.reward, timestep.reward.dtype))
if timestep.done:
print(
"{}({}), {}({})".format(
timestep.info['eval_episode_return'], type(timestep.info['eval_episode_return']),
eval_episode_return, type(eval_episode_return)
)
)
# timestep.reward and the cumulative reward in wrapper EvalEpisodeReturn are not the same.
assert abs(timestep.info['eval_episode_return'].item() - eval_episode_return.item()) / \
abs(timestep.info['eval_episode_return'].item()) < 1e-5
break
|