1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
import numpy as np
import copy
import tensorflow as tf
from pybullet_envs.deep_mimic.learning.normalizer import Normalizer
class TFNormalizer(Normalizer):
def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
self.sess = sess
self.scope = scope
super().__init__(size, groups_ids, eps, clip)
with tf.variable_scope(self.scope):
self._build_resource_tf()
return
# initialze count when loading saved values so that things don't change to quickly during updates
def load(self):
self.count = self.count_tf.eval()[0]
self.mean = self.mean_tf.eval()
self.std = self.std_tf.eval()
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
return
def update(self):
super().update()
self._update_resource_tf()
return
def set_mean_std(self, mean, std):
super().set_mean_std(mean, std)
self._update_resource_tf()
return
def normalize_tf(self, x):
norm_x = (x - self.mean_tf) / self.std_tf
norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
return norm_x
def unnormalize_tf(self, norm_x):
x = norm_x * self.std_tf + self.mean_tf
return x
def _build_resource_tf(self):
self.count_tf = tf.get_variable(dtype=tf.int32,
name='count',
initializer=np.array([self.count], dtype=np.int32),
trainable=False)
self.mean_tf = tf.get_variable(dtype=tf.float32,
name='mean',
initializer=self.mean.astype(np.float32),
trainable=False)
self.std_tf = tf.get_variable(dtype=tf.float32,
name='std',
initializer=self.std.astype(np.float32),
trainable=False)
self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
self._update_op = tf.group(self.count_tf.assign(self.count_ph),
self.mean_tf.assign(self.mean_ph), self.std_tf.assign(self.std_ph))
return
def _update_resource_tf(self):
feed = {
self.count_ph: np.array([self.count], dtype=np.int32),
self.mean_ph: self.mean,
self.std_ph: self.std
}
self.sess.run(self._update_op, feed_dict=feed)
return
|