File size: 2,286 Bytes
079c32c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from typing import Dict
import gym
import numpy as np
from ditk import logging
from ding.envs import ObsNormWrapper, StaticObsNormWrapper, RewardNormWrapper

try:
    import d4rl  # register d4rl enviroments with open ai gym
except ImportError:
    logging.warning("not found d4rl env, please install it, refer to https://github.com/rail-berkeley/d4rl")


def wrap_d4rl(
        env_id,
        norm_obs: Dict = dict(use_norm=False, offline_stats=dict(use_offline_stats=False, )),
        norm_reward: Dict = dict(use_norm=False, ),
        only_info=False
) -> gym.Env:
    r"""
    Overview:
        Wrap Mujoco Env to preprocess env step's return info, e.g. observation normalization, reward normalization, etc.
    Arguments:
        - env_id (:obj:`str`): Mujoco environment id, for example "HalfCheetah-v3"
        - norm_obs (:obj:`EasyDict`): Whether to normalize observation or not
        - norm_reward (:obj:`EasyDict`): Whether to normalize reward or not. For evaluator, environment's reward \
            should not be normalized: Either ``norm_reward`` is None or ``norm_reward.use_norm`` is False can do this.
    Returns:
        - wrapped_env (:obj:`gym.Env`): The wrapped mujoco environment
    """
    if not only_info:
        env = gym.make(env_id)
        if norm_obs is not None and norm_obs.use_norm:
            offline_stats = norm_obs.get('offline_stats', dict(use_offline_stats=False))
            if offline_stats.use_offline_stats:
                env = StaticObsNormWrapper(env, offline_stats.mean, offline_stats.std)
            else:
                env = ObsNormWrapper(env)
        if norm_reward is not None and norm_reward.use_norm:
            env = RewardNormWrapper(env, norm_reward.reward_discount)
        return env
    else:
        wrapper_info = ''
        if norm_obs is not None and norm_obs.use_norm:
            offline_stats = norm_obs.get('offline_stats', dict(use_offline_stats=False))
            if offline_stats.use_offline_stats:
                wrapper_info = StaticObsNormWrapper.__name__ + '\n'
            else:
                wrapper_info = ObsNormWrapper.__name__ + '\n'
        if norm_reward is not None and norm_reward.use_norm:
            wrapper_info += RewardNormWrapper.__name__ + '\n'
        return wrapper_info