File size: 1,439 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
from typing import Dict
import gym
import numpy as np
from ding.envs import ObsNormWrapper, RewardNormWrapper, DelayRewardWrapper, EvalEpisodeReturnWrapper
def wrap_mujoco(
env_id,
norm_obs: Dict = dict(use_norm=False, ),
norm_reward: Dict = dict(use_norm=False, ),
delay_reward_step: int = 1
) -> gym.Env:
r"""
Overview:
Wrap Mujoco Env to preprocess env step's return info, e.g. observation normalization, reward normalization, etc.
Arguments:
- env_id (:obj:`str`): Mujoco environment id, for example "HalfCheetah-v3"
- norm_obs (:obj:`EasyDict`): Whether to normalize observation or not
- norm_reward (:obj:`EasyDict`): Whether to normalize reward or not. For evaluator, environment's reward \
should not be normalized: Either ``norm_reward`` is None or ``norm_reward.use_norm`` is False can do this.
Returns:
- wrapped_env (:obj:`gym.Env`): The wrapped mujoco environment
"""
# import customized gym environment
from . import mujoco_gym_env
env = gym.make(env_id)
env = EvalEpisodeReturnWrapper(env)
if norm_obs is not None and norm_obs.use_norm:
env = ObsNormWrapper(env)
if norm_reward is not None and norm_reward.use_norm:
env = RewardNormWrapper(env, norm_reward.reward_discount)
if delay_reward_step > 1:
env = DelayRewardWrapper(env, delay_reward_step)
return env
|