|
import os |
|
import pytest |
|
import numpy as np |
|
from easydict import EasyDict |
|
|
|
from ding.utils import set_pkg_seed |
|
from dizoo.mujoco.envs import MujocoEnv |
|
|
|
|
|
@pytest.mark.envtest |
|
@pytest.mark.parametrize('delay_reward_step', [1, 10]) |
|
def test_mujoco_env_delay_reward(delay_reward_step): |
|
set_pkg_seed(1234, use_cuda=False) |
|
env = MujocoEnv( |
|
EasyDict( |
|
{ |
|
'env_id': 'Ant-v3', |
|
'action_clip': False, |
|
'delay_reward_step': delay_reward_step, |
|
'save_replay_gif': False, |
|
'replay_path_gif': None |
|
} |
|
) |
|
) |
|
env.seed(1234) |
|
env.reset() |
|
action_dim = env.action_space.shape |
|
for i in range(25): |
|
|
|
|
|
if i < 10: |
|
action = np.random.random(size=action_dim) |
|
else: |
|
action = env.random_action() |
|
timestep = env.step(action) |
|
print(timestep.reward) |
|
assert timestep.reward.shape == (1, ), timestep.reward.shape |
|
|
|
|
|
@pytest.mark.envtest |
|
def test_mujoco_env_eval_episode_return(): |
|
set_pkg_seed(1234, use_cuda=False) |
|
env = MujocoEnv( |
|
EasyDict( |
|
{ |
|
'env_id': 'Ant-v3', |
|
'action_clip': False, |
|
'delay_reward_step': 4, |
|
'save_replay_gif': False, |
|
'replay_path_gif': None |
|
} |
|
) |
|
) |
|
env.seed(1234) |
|
env.reset() |
|
action_dim = env.action_space.shape |
|
eval_episode_return = np.array([0.], dtype=np.float32) |
|
while True: |
|
action = np.random.random(size=action_dim) |
|
timestep = env.step(action) |
|
eval_episode_return += timestep.reward |
|
|
|
if timestep.done: |
|
print( |
|
"{}({}), {}({})".format( |
|
timestep.info['eval_episode_return'], type(timestep.info['eval_episode_return']), |
|
eval_episode_return, type(eval_episode_return) |
|
) |
|
) |
|
|
|
assert abs(timestep.info['eval_episode_return'].item() - eval_episode_return.item()) / \ |
|
abs(timestep.info['eval_episode_return'].item()) < 1e-5 |
|
break |
|
|