summaryrefslogtreecommitdiff
path: root/examples/pybullet/gym/pybullet_envs/minitaur/agents/tools/loop_test.py
blob: d4d03c5131575f4666f26e68a2728077aaa00181 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Copyright 2017 The TensorFlow Agents Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for the training loop."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from google3.robotics.reinforcement_learning.agents import tools


class LoopTest(tf.test.TestCase):

  def test_report_every_step(self):
    step = tf.Variable(0, False, dtype=tf.int32, name='step')
    loop = tools.Loop(None, step)
    loop.add_phase(
        'phase_1', done=True, score=0, summary='', steps=1, report_every=3)
    # Step:   0 1 2 3 4 5 6 7 8
    # Report:     x     x     x
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      scores = loop.run(sess, saver=None, max_step=9)
      next(scores)
      self.assertEqual(3, sess.run(step))
      next(scores)
      self.assertEqual(6, sess.run(step))
      next(scores)
      self.assertEqual(9, sess.run(step))

  def test_phases_feed(self):
    score = tf.placeholder(tf.float32, [])
    loop = tools.Loop(None)
    loop.add_phase(
        'phase_1', done=True, score=score, summary='', steps=1, report_every=1,
        log_every=None, checkpoint_every=None, feed={score: 1})
    loop.add_phase(
        'phase_2', done=True, score=score, summary='', steps=3, report_every=1,
        log_every=None, checkpoint_every=None, feed={score: 2})
    loop.add_phase(
        'phase_3', done=True, score=score, summary='', steps=2, report_every=1,
        log_every=None, checkpoint_every=None, feed={score: 3})
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      scores = list(loop.run(sess, saver=None, max_step=15))
    self.assertAllEqual([1, 2, 2, 2, 3, 3, 1, 2, 2, 2, 3, 3, 1, 2, 2], scores)

  def test_average_score_over_phases(self):
    loop = tools.Loop(None)
    loop.add_phase(
        'phase_1', done=True, score=1, summary='', steps=1, report_every=2)
    loop.add_phase(
        'phase_2', done=True, score=2, summary='', steps=2, report_every=5)
    # Score:    1 2 2 1 2 2 1 2 2 1 2 2 1 2 2 1 2
    # Report 1:       x           x           x
    # Report 2:               x             x
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      scores = list(loop.run(sess, saver=None, max_step=17))
    self.assertAllEqual([1, 2, 1, 2, 1], scores)

  def test_not_done(self):
    step = tf.Variable(0, False, dtype=tf.int32, name='step')
    done = tf.equal((step + 1) % 2, 0)
    score = tf.cast(step, tf.float32)
    loop = tools.Loop(None, step)
    loop.add_phase(
        'phase_1', done, score, summary='', steps=1, report_every=3)
    # Score:  0 1 2 3 4 5 6 7 8
    # Done:     x   x   x   x
    # Report:     x     x     x
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      scores = list(loop.run(sess, saver=None, max_step=9))
    self.assertAllEqual([1, 4, 7], scores)

  def test_not_done_batch(self):
    step = tf.Variable(0, False, dtype=tf.int32, name='step')
    done = tf.equal([step % 3, step % 4], 0)
    score = tf.cast([step, step ** 2], tf.float32)
    loop = tools.Loop(None, step)
    loop.add_phase(
        'phase_1', done, score, summary='', steps=1, report_every=8)
    # Step:    0  2  4  6
    # Score 1: 0  2  4  6
    # Done 1:  x        x
    # Score 2: 0  4 16 32
    # Done 2:  x     x
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      scores = list(loop.run(sess, saver=None, max_step=8))
      self.assertEqual(8, sess.run(step))
    self.assertAllEqual([(0 + 0 + 16 + 6) / 4], scores)


if __name__ == '__main__':
  tf.test.main()