diff options
author | erwincoumans <erwincoumans@google.com> | 2019-05-08 10:36:51 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-08 10:36:51 -0700 |
commit | 1ae63d097d455112924fdeb18fab3f4f7bdde4af (patch) | |
tree | bf91cc9769e68a12f09a829714445b1142ef1c90 | |
parent | a5fe998f1368c9219579ce25682cb33fecabb7a9 (diff) | |
parent | 36ed4419bff1ddc05b0b22d9887b807a2afa2d8d (diff) | |
download | bullet3-1ae63d097d455112924fdeb18fab3f4f7bdde4af.tar.gz |
Merge pull request #2244 from erwincoumans/master
add minitaur extended to pybullet_envs
-rw-r--r-- | examples/pybullet/gym/pybullet_envs/__init__.py | 8 | ||||
-rw-r--r-- | examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py | 256 |
2 files changed, 264 insertions, 0 deletions
diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py index 03c064c36..42c5d7ccf 100644 --- a/examples/pybullet/gym/pybullet_envs/__init__.py +++ b/examples/pybullet/gym/pybullet_envs/__init__.py @@ -40,6 +40,14 @@ register( ) register( + id='MinitaurExtendedEnv-v0', + entry_point='pybullet_envs.minitaur.envs:MinitaurExtendedEnv', + max_episode_steps=1000, + reward_threshold=5.0, +) + + +register( id='MinitaurReactiveEnv-v0', entry_point='pybullet_envs.minitaur.envs:MinitaurReactiveEnv', max_episode_steps=1000, diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py new file mode 100644 index 000000000..1449441dd --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py @@ -0,0 +1,256 @@ +"""Extends the environment by adding observation and action history. + +The implementation is a bit dirty import of the implementation in +the experimental branch. + +""" + +from gym import spaces +import numpy as np + +from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEnv + + +class MinitaurExtendedEnv(MinitaurReactiveEnv): + """The 'extended' environment for Markovian property. + + This class implements to include prior actions and observations to the + observation vector, thus making the environment "more" Markovian. This is + especially useful for systems with latencies. + + Args: + history_length: the length of the historic data + history_include_actions: a flag for including actions as history + history_include_states: a flag for including states as history + include_state_difference: a flag for including the first-order differences + as history + include_second_state_difference: a flag for including the second-order state + differences as history. + include_base_position: a flag for including the base as observation, + never_terminate: if this is on, the environment unconditionally never + terminates. + action_scale: the scale of actions, + """ + MAX_BUFFER_SIZE = 1001 + ACTION_DIM = 8 + PARENT_OBSERVATION_DIM = 12 + INIT_EXTENSION_POS = 2.0 + INIT_SWING_POS = 0.0 + + metadata = { + "render.modes": ["human", "rgb_array"], + "video.frames_per_second": 50, + } + + def __init__(self, + history_length=1, + history_include_actions=True, + history_include_states=False, + include_state_difference=False, + include_second_state_difference=False, + include_base_position=False, + include_leg_model=False, + never_terminate=False, + action_scale=0.5, + **kwargs): + self._kwargs = kwargs + + self._history_length = history_length + self._history_include_actions = history_include_actions + self._history_include_states = history_include_states + self._include_state_difference = include_state_difference + self._include_second_state_difference = include_second_state_difference + self._include_base_position = include_base_position + self._include_leg_model = include_leg_model + + self._never_terminate = never_terminate + self._action_scale = action_scale + + self._past_parent_observations = np.zeros((self.MAX_BUFFER_SIZE + 1, + self.PARENT_OBSERVATION_DIM)) + self._past_motor_angles = np.zeros((self.MAX_BUFFER_SIZE + 1, 8)) + self._past_actions = np.zeros((self.MAX_BUFFER_SIZE, self.ACTION_DIM)) + self._counter = 0 + + super(MinitaurExtendedEnv, self).__init__(**kwargs) + self.action_space = spaces.Box(-1.0, 1.0, self.action_space.shape) + self.observation_space = spaces.Box(-np.inf, np.inf, + self._get_observation().shape) + # This is mainly for the TF-Agents compatibility + self.action_space.flat_dim = len(self.action_space.low) + self.observation_space.flat_dim = len(self.observation_space.low) + + def _get_observation(self): + """Maybe concatenate motor velocity and torque into observations.""" + parent_observation = super(MinitaurExtendedEnv, self)._get_observation() + parent_observation = np.array(parent_observation) + # Base class might require this. + self._observation = parent_observation + self._past_parent_observations[self._counter] = parent_observation + num_motors = self.minitaur.num_motors + self._past_motor_angles[self._counter] = parent_observation[-num_motors:] + + history_states = [] + history_actions = [] + for i in range(self._history_length): + t = max(self._counter - i - 1, 0) + + if self._history_include_states: + history_states.append(self._past_parent_observations[t]) + + if self._history_include_actions: + history_actions.append(self._past_actions[t]) + + t = self._counter + tm, tmm = max(0, self._counter - 1), max(0, self._counter - 2) + + state_difference, second_state_difference = [], [] + if self._include_state_difference: + state_difference = [ + self._past_motor_angles[t] - self._past_motor_angles[tm] + ] + if self._include_second_state_difference: + second_state_difference = [ + self._past_motor_angles[t] - 2 * self._past_motor_angles[tm] + + self._past_motor_angles[tmm] + ] + + base_position = [] + if self._include_base_position: + base_position = np.array((self.minitaur.GetBasePosition())) + + leg_model = [] + if self._include_leg_model: + raw_motor_angles = self.minitaur.GetMotorAngles() + leg_model = self._convert_to_leg_model(raw_motor_angles) + + observation_list = ( + [parent_observation] + history_states + history_actions + + state_difference + second_state_difference + [base_position] + + [leg_model]) + + full_observation = np.concatenate(observation_list) + return full_observation + + def reset(self): + """Resets the time and history buffer.""" + self._counter = 0 + self._signal(self._counter) # This sets the current phase + self._past_parent_observations = np.zeros((self.MAX_BUFFER_SIZE + 1, + self.PARENT_OBSERVATION_DIM)) + self._past_motor_angles = np.zeros((self.MAX_BUFFER_SIZE + 1, 8)) + self._past_actions = np.zeros((self.MAX_BUFFER_SIZE, self.ACTION_DIM)) + self._counter = 0 + + return np.array(super(MinitaurExtendedEnv, self).reset()) + + def step(self, action): + """Step function wrapper can be used to add shaping terms to the reward. + + Args: + action: an array of the given action + + Returns: + next_obs: the next observation + reward: the reward for this experience tuple + done: the terminal flag + info: an additional information + """ + + action *= self._action_scale + + self._past_actions[self._counter] = action + self._counter += 1 + + next_obs, _, done, info = super(MinitaurExtendedEnv, self).step(action) + + reward = self.reward() + info.update(base_reward=reward) + + return next_obs, reward, done, info + + def terminate(self): + """The helper function to terminate the environment.""" + super(MinitaurExtendedEnv, self)._close() + + def _termination(self): + """Determines whether the env is terminated or not. + + checks whether 1) the front leg is bent too much or 2) the time exceeds + the manually set weights. + + Returns: + terminal: the terminal flag whether the env is terminated or not + """ + if self._never_terminate: + return False + + leg_model = self._convert_to_leg_model(self.minitaur.GetMotorAngles()) + swing0 = leg_model[0] + swing1 = leg_model[2] + maximum_swing_angle = 0.8 + if swing0 > maximum_swing_angle or swing1 > maximum_swing_angle: + return True + + if self._counter >= 500: + return True + + return False + + def reward(self): + """Compute rewards for the given time step. + + It considers two terms: 1) forward velocity reward and 2) action + acceleration penalty. + + Returns: + reward: the computed reward. + """ + current_base_position = self.minitaur.GetBasePosition() + dt = self.control_time_step + velocity = (current_base_position[0] - self._last_base_position[0]) / dt + velocity_reward = np.clip(velocity, -0.5, 0.5) + + action = self._past_actions[self._counter - 1] + prev_action = self._past_actions[max(self._counter - 2, 0)] + prev_prev_action = self._past_actions[max(self._counter - 3, 0)] + acc = action - 2 * prev_action + prev_prev_action + action_acceleration_penalty = np.mean(np.abs(acc)) + + reward = 0.0 + reward += 1.0 * velocity_reward + reward -= 0.1 * action_acceleration_penalty + + return reward + + @staticmethod + def convert_to_leg_model(motor_angles): + """A helper function to convert motor angles to leg model. + + Args: + motor_angles: raw motor angles: + + Returns: + leg_angles: the leg pose model represented in swing and extension. + """ + # TODO(sehoonha): clean up model conversion codes + num_legs = 4 + # motor_angles = motor_angles / (np.pi / 4.) + leg_angles = np.zeros(num_legs * 2) + for i in range(num_legs): + motor1, motor2 = motor_angles[2 * i:2 * i + 2] + swing = (-1)**(i // 2) * 0.5 * (motor2 - motor1) + extension = 0.5 * (motor1 + motor2) + + leg_angles[i] = swing + leg_angles[i + num_legs] = extension + + return leg_angles + + def __getstate__(self): + """A helper get state function for pickling.""" + return {"kwargs": self._kwargs} + + def __setstate__(self, state): + """A helper set state function for pickling.""" + self.__init__(**state["kwargs"]) |