1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
# Copyright 2017 The TensorFlow Agents Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mock environment for testing reinforcement learning code."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gym
import gym.spaces
import numpy as np
class MockEnvironment(object):
"""Generate random agent input and keep track of statistics."""
def __init__(self, observ_shape, action_shape, min_duration, max_duration):
"""Generate random agent input and keep track of statistics.
Args:
observ_shape: Shape for the random observations.
action_shape: Shape for the action space.
min_duration: Minimum number of steps per episode.
max_duration: Maximum number of steps per episode.
Attributes:
steps: List of actual simulated lengths for all episodes.
durations: List of decided lengths for all episodes.
"""
self._observ_shape = observ_shape
self._action_shape = action_shape
self._min_duration = min_duration
self._max_duration = max_duration
self._random = np.random.RandomState(0)
self.steps = []
self.durations = []
@property
def observation_space(self):
low = np.zeros(self._observ_shape)
high = np.ones(self._observ_shape)
return gym.spaces.Box(low, high)
@property
def action_space(self):
low = np.zeros(self._action_shape)
high = np.ones(self._action_shape)
return gym.spaces.Box(low, high)
@property
def unwrapped(self):
return self
def step(self, action):
assert self.action_space.contains(action)
assert self.steps[-1] < self.durations[-1]
self.steps[-1] += 1
observ = self._current_observation()
reward = self._current_reward()
done = self.steps[-1] >= self.durations[-1]
info = {}
return observ, reward, done, info
def reset(self):
duration = self._random.randint(self._min_duration, self._max_duration + 1)
self.steps.append(0)
self.durations.append(duration)
return self._current_observation()
def _current_observation(self):
return self._random.uniform(0, 1, self._observ_shape)
def _current_reward(self):
return self._random.uniform(-1, 1)
|