summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorerwincoumans <erwincoumans@google.com>2019-05-08 10:36:51 -0700
committerGitHub <noreply@github.com>2019-05-08 10:36:51 -0700
commit1ae63d097d455112924fdeb18fab3f4f7bdde4af (patch)
treebf91cc9769e68a12f09a829714445b1142ef1c90
parenta5fe998f1368c9219579ce25682cb33fecabb7a9 (diff)
parent36ed4419bff1ddc05b0b22d9887b807a2afa2d8d (diff)
downloadbullet3-1ae63d097d455112924fdeb18fab3f4f7bdde4af.tar.gz
Merge pull request #2244 from erwincoumans/master
add minitaur extended to pybullet_envs
-rw-r--r--examples/pybullet/gym/pybullet_envs/__init__.py8
-rw-r--r--examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py256
2 files changed, 264 insertions, 0 deletions
diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py
index 03c064c36..42c5d7ccf 100644
--- a/examples/pybullet/gym/pybullet_envs/__init__.py
+++ b/examples/pybullet/gym/pybullet_envs/__init__.py
@@ -40,6 +40,14 @@ register(
)
register(
+ id='MinitaurExtendedEnv-v0',
+ entry_point='pybullet_envs.minitaur.envs:MinitaurExtendedEnv',
+ max_episode_steps=1000,
+ reward_threshold=5.0,
+)
+
+
+register(
id='MinitaurReactiveEnv-v0',
entry_point='pybullet_envs.minitaur.envs:MinitaurReactiveEnv',
max_episode_steps=1000,
diff --git a/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py
new file mode 100644
index 000000000..1449441dd
--- /dev/null
+++ b/examples/pybullet/gym/pybullet_envs/minitaur/envs/minitaur_extended_env.py
@@ -0,0 +1,256 @@
+"""Extends the environment by adding observation and action history.
+
+The implementation is a bit dirty import of the implementation in
+the experimental branch.
+
+"""
+
+from gym import spaces
+import numpy as np
+
+from pybullet_envs.minitaur.envs.minitaur_reactive_env import MinitaurReactiveEnv
+
+
+class MinitaurExtendedEnv(MinitaurReactiveEnv):
+ """The 'extended' environment for Markovian property.
+
+ This class implements to include prior actions and observations to the
+ observation vector, thus making the environment "more" Markovian. This is
+ especially useful for systems with latencies.
+
+ Args:
+ history_length: the length of the historic data
+ history_include_actions: a flag for including actions as history
+ history_include_states: a flag for including states as history
+ include_state_difference: a flag for including the first-order differences
+ as history
+ include_second_state_difference: a flag for including the second-order state
+ differences as history.
+ include_base_position: a flag for including the base as observation,
+ never_terminate: if this is on, the environment unconditionally never
+ terminates.
+ action_scale: the scale of actions,
+ """
+ MAX_BUFFER_SIZE = 1001
+ ACTION_DIM = 8
+ PARENT_OBSERVATION_DIM = 12
+ INIT_EXTENSION_POS = 2.0
+ INIT_SWING_POS = 0.0
+
+ metadata = {
+ "render.modes": ["human", "rgb_array"],
+ "video.frames_per_second": 50,
+ }
+
+ def __init__(self,
+ history_length=1,
+ history_include_actions=True,
+ history_include_states=False,
+ include_state_difference=False,
+ include_second_state_difference=False,
+ include_base_position=False,
+ include_leg_model=False,
+ never_terminate=False,
+ action_scale=0.5,
+ **kwargs):
+ self._kwargs = kwargs
+
+ self._history_length = history_length
+ self._history_include_actions = history_include_actions
+ self._history_include_states = history_include_states
+ self._include_state_difference = include_state_difference
+ self._include_second_state_difference = include_second_state_difference
+ self._include_base_position = include_base_position
+ self._include_leg_model = include_leg_model
+
+ self._never_terminate = never_terminate
+ self._action_scale = action_scale
+
+ self._past_parent_observations = np.zeros((self.MAX_BUFFER_SIZE + 1,
+ self.PARENT_OBSERVATION_DIM))
+ self._past_motor_angles = np.zeros((self.MAX_BUFFER_SIZE + 1, 8))
+ self._past_actions = np.zeros((self.MAX_BUFFER_SIZE, self.ACTION_DIM))
+ self._counter = 0
+
+ super(MinitaurExtendedEnv, self).__init__(**kwargs)
+ self.action_space = spaces.Box(-1.0, 1.0, self.action_space.shape)
+ self.observation_space = spaces.Box(-np.inf, np.inf,
+ self._get_observation().shape)
+ # This is mainly for the TF-Agents compatibility
+ self.action_space.flat_dim = len(self.action_space.low)
+ self.observation_space.flat_dim = len(self.observation_space.low)
+
+ def _get_observation(self):
+ """Maybe concatenate motor velocity and torque into observations."""
+ parent_observation = super(MinitaurExtendedEnv, self)._get_observation()
+ parent_observation = np.array(parent_observation)
+ # Base class might require this.
+ self._observation = parent_observation
+ self._past_parent_observations[self._counter] = parent_observation
+ num_motors = self.minitaur.num_motors
+ self._past_motor_angles[self._counter] = parent_observation[-num_motors:]
+
+ history_states = []
+ history_actions = []
+ for i in range(self._history_length):
+ t = max(self._counter - i - 1, 0)
+
+ if self._history_include_states:
+ history_states.append(self._past_parent_observations[t])
+
+ if self._history_include_actions:
+ history_actions.append(self._past_actions[t])
+
+ t = self._counter
+ tm, tmm = max(0, self._counter - 1), max(0, self._counter - 2)
+
+ state_difference, second_state_difference = [], []
+ if self._include_state_difference:
+ state_difference = [
+ self._past_motor_angles[t] - self._past_motor_angles[tm]
+ ]
+ if self._include_second_state_difference:
+ second_state_difference = [
+ self._past_motor_angles[t] - 2 * self._past_motor_angles[tm] +
+ self._past_motor_angles[tmm]
+ ]
+
+ base_position = []
+ if self._include_base_position:
+ base_position = np.array((self.minitaur.GetBasePosition()))
+
+ leg_model = []
+ if self._include_leg_model:
+ raw_motor_angles = self.minitaur.GetMotorAngles()
+ leg_model = self._convert_to_leg_model(raw_motor_angles)
+
+ observation_list = (
+ [parent_observation] + history_states + history_actions +
+ state_difference + second_state_difference + [base_position] +
+ [leg_model])
+
+ full_observation = np.concatenate(observation_list)
+ return full_observation
+
+ def reset(self):
+ """Resets the time and history buffer."""
+ self._counter = 0
+ self._signal(self._counter) # This sets the current phase
+ self._past_parent_observations = np.zeros((self.MAX_BUFFER_SIZE + 1,
+ self.PARENT_OBSERVATION_DIM))
+ self._past_motor_angles = np.zeros((self.MAX_BUFFER_SIZE + 1, 8))
+ self._past_actions = np.zeros((self.MAX_BUFFER_SIZE, self.ACTION_DIM))
+ self._counter = 0
+
+ return np.array(super(MinitaurExtendedEnv, self).reset())
+
+ def step(self, action):
+ """Step function wrapper can be used to add shaping terms to the reward.
+
+ Args:
+ action: an array of the given action
+
+ Returns:
+ next_obs: the next observation
+ reward: the reward for this experience tuple
+ done: the terminal flag
+ info: an additional information
+ """
+
+ action *= self._action_scale
+
+ self._past_actions[self._counter] = action
+ self._counter += 1
+
+ next_obs, _, done, info = super(MinitaurExtendedEnv, self).step(action)
+
+ reward = self.reward()
+ info.update(base_reward=reward)
+
+ return next_obs, reward, done, info
+
+ def terminate(self):
+ """The helper function to terminate the environment."""
+ super(MinitaurExtendedEnv, self)._close()
+
+ def _termination(self):
+ """Determines whether the env is terminated or not.
+
+ checks whether 1) the front leg is bent too much or 2) the time exceeds
+ the manually set weights.
+
+ Returns:
+ terminal: the terminal flag whether the env is terminated or not
+ """
+ if self._never_terminate:
+ return False
+
+ leg_model = self._convert_to_leg_model(self.minitaur.GetMotorAngles())
+ swing0 = leg_model[0]
+ swing1 = leg_model[2]
+ maximum_swing_angle = 0.8
+ if swing0 > maximum_swing_angle or swing1 > maximum_swing_angle:
+ return True
+
+ if self._counter >= 500:
+ return True
+
+ return False
+
+ def reward(self):
+ """Compute rewards for the given time step.
+
+ It considers two terms: 1) forward velocity reward and 2) action
+ acceleration penalty.
+
+ Returns:
+ reward: the computed reward.
+ """
+ current_base_position = self.minitaur.GetBasePosition()
+ dt = self.control_time_step
+ velocity = (current_base_position[0] - self._last_base_position[0]) / dt
+ velocity_reward = np.clip(velocity, -0.5, 0.5)
+
+ action = self._past_actions[self._counter - 1]
+ prev_action = self._past_actions[max(self._counter - 2, 0)]
+ prev_prev_action = self._past_actions[max(self._counter - 3, 0)]
+ acc = action - 2 * prev_action + prev_prev_action
+ action_acceleration_penalty = np.mean(np.abs(acc))
+
+ reward = 0.0
+ reward += 1.0 * velocity_reward
+ reward -= 0.1 * action_acceleration_penalty
+
+ return reward
+
+ @staticmethod
+ def convert_to_leg_model(motor_angles):
+ """A helper function to convert motor angles to leg model.
+
+ Args:
+ motor_angles: raw motor angles:
+
+ Returns:
+ leg_angles: the leg pose model represented in swing and extension.
+ """
+ # TODO(sehoonha): clean up model conversion codes
+ num_legs = 4
+ # motor_angles = motor_angles / (np.pi / 4.)
+ leg_angles = np.zeros(num_legs * 2)
+ for i in range(num_legs):
+ motor1, motor2 = motor_angles[2 * i:2 * i + 2]
+ swing = (-1)**(i // 2) * 0.5 * (motor2 - motor1)
+ extension = 0.5 * (motor1 + motor2)
+
+ leg_angles[i] = swing
+ leg_angles[i + num_legs] = extension
+
+ return leg_angles
+
+ def __getstate__(self):
+ """A helper get state function for pickling."""
+ return {"kwargs": self._kwargs}
+
+ def __setstate__(self, state):
+ """A helper set state function for pickling."""
+ self.__init__(**state["kwargs"])