src/test/filestore/run_seed_to.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

#!/bin/bash
# vim: ts=8 sw=2 smarttab
#
# run_seed_to.sh - Run ceph_test_filestore_idempotent_sequence up until an 
# injection point, generating a sequence of operations based on a
# provided seed.
#
# We also perform three additional tests, focused on assessing if 
# replaying a larger chunck of the journal affects the expected store
# behavior. These tests will be performed by increasing the store's
# journal sync interval to a very large value, allowing the store to
# finish execution before the first sync (unless the store runs for
# over 10 hours, case on which the interval variables must be changed
# to an appropriate value). Unless the '--no-journal-test' option is
# specified, we will run the 3 following scenarios:
#  
#  1) journal sync'ing for both stores is good as disabled
#     (we call it '00', for store naming purposes)
#  2) journal sync'ing for store A is as good as disabled
#     (we call it '01', for store naming purposes)
#  3) journal sync'ing for store B is as good as disabled
#     (we call it '10', for store naming purposes)
#
# All log files are also appropriately named accordingly (i.e., a.00.fail,
# a.10.recover, or b.01.clean).
#
# By default, the test will not exit on error, although it will show the
# fail message. This behavior is so defined so we run the whole battery of
# tests, and obtain as many mismatches as possible in one go. We may force
# the test to exit on error by specifying the '--exit-on-error' option.
#
#
set -e

test_opts=""

usage() {
  echo "usage: $1 [options..] <seed> <kill-at>"
  echo 
  echo "options:"
  echo "  -c, --colls <VAL>    # of collections"
  echo "  -o, --objs <VAL>     # of objects"
  echo "  -b, --btrfs <VAL>    seq number for btrfs stores"
  echo "  --no-journal-test    don't perform journal replay tests"
  echo "  -e, --exit-on-error  exit with 1 on error"
  echo "  -v, --valgrind       run commands through valgrind"
  echo
  echo "env vars:"
  echo "  OPTS_STORE           additional opts for both stores"
  echo "  OPTS_STORE_A         additional opts for store A"
  echo "  OPTS_STORE_B         additional opts for store B"
  echo
}

die_on_missing_arg() {
  if [[ "$2" == "" ]]; then
    echo "$1: missing required parameter"
    exit 1
  fi
}


required_args=2
obtained_args=0

seed=""
killat=""
on_btrfs=0
on_btrfs_seq=0
journal_test=1
min_sync_interval="36000" # ten hours, yes.
max_sync_interval="36001"
exit_on_error=0
v=""

do_rm() {
  if [[ $on_btrfs -eq 0 ]]; then
    rm -fr $*
  fi
}

set_arg() {
  if [[ $1 -eq 1 ]]; then
    seed=$2
  elif [[ $1 -eq 2 ]]; then
    killat=$2
  else
    echo "error: unknown purpose for '$2'"
    usage $0
    exit 1
  fi
}

while [[ $# -gt 0 ]];
do
  case "$1" in
    -c | --colls)
      die_on_missing_arg "$1" "$2"
      test_opts="$test_opts --test-num-colls $2"
      shift 2
      ;;
    -o | --objs)
      die_on_missing_arg "$1" "$2"
      test_opts="$test_opts --test-num-objs $2"
      shift 2
      ;;
    -h | --help)
      usage $0 ;
      exit 0
      ;;
    -b | --btrfs)
      die_on_missing_arg "$1" "$2"
      on_btrfs=1
      on_btrfs_seq=$2
      shift 2
      ;;
    --no-journal-test)
      journal_test=0
      shift
      ;;
    -e | --exit-on-error)
      exit_on_error=1
      shift
      ;;
    -v | --valgrind)
      v="valgrind --leak-check=full"
      shift
      ;;
    --)
      shift
      break
      ;;
    -*)
      echo "$1: unknown option" >&2
      usage $0
      exit 1
      ;;
    *)
      obtained_args=$(($obtained_args+1))
      set_arg $obtained_args $1
      shift
      ;;
  esac
done

if [[ $obtained_args -ne $required_args ]]; then
  echo "error: missing argument"
  usage $0 ;
  exit 1
fi

if [[ "$OPTS_STORE" != "" ]]; then
  test_opts="$test_opts $OPTS_STORE"
fi

test_opts_a="$test_opts"
test_opts_b="$test_opts"

if [[ "$OPTS_STORE_A" != "" ]]; then
    test_opts_a="$test_opts_a $OPTS_STORE_A"
fi
if [[ "$OPTS_STORE_B" != "" ]]; then
    test_opts_b="$test_opts_b $OPTS_STORE_B"
fi

echo seed $seed
echo kill at $killat

# run forever, until $killat...
to=1000000000

#
# store names 
#
# We need these for two reasons:
#  1) if we are running the tests on a btrfs volume, then we need to use
#     a seq number for each run. Being on btrfs means we will fail when
#     removing the store's directories and it's far more simple to just
#     specify differente store names such as 'a.$seq' or 'b.$seq'.
#  
#  2) unless the '--no-journal-test' option is specified, we will run
#     three additional tests for each store, and we will reuse the same
#     command for each one of the runs, but varying the store's name and
#     arguments.
#
store_a="a"
store_b="b"

if [[ $on_btrfs -eq 1 ]]; then
  store_a="$store_a.$on_btrfs_seq"
  store_b="$store_b.$on_btrfs_seq"
fi

total_runs=1

if [[ $journal_test -eq 1 ]]; then
  total_runs=$(($total_runs + 3))
fi

num_runs=0

opt_min_sync="--filestore-min-sync-interval $min_sync_interval"
opt_max_sync="--filestore-max-sync-interval $max_sync_interval"

ret=0

while [[ $num_runs -lt $total_runs ]];
do
  tmp_name_a=$store_a
  tmp_name_b=$store_b
  tmp_opts_a=$test_opts_a
  tmp_opts_b=$test_opts_b

  #
  # We have already tested whether there are diffs when both journals
  # are properly working. Now let's try on three other scenarios:
  #  1) journal sync'ing for both stores is good as disabled
  #     (we call it '00')
  #  2) journal sync'ing for store A is as good as disabled
  #     (we call it '01')
  #  3) journal sync'ing for store B is as good as disabled
  #     (we call it '10')
  #
  if [[ $num_runs -gt 0 && $journal_test -eq 1 ]]; then
    echo "run #$num_runs"
    case $num_runs in
      1)
	tmp_name_a="$tmp_name_a.00"
	tmp_name_b="$tmp_name_b.00"
	tmp_opts_a="$tmp_opts_a $opt_min_sync $opt_max_sync"
	tmp_opts_b="$tmp_opts_b $opt_min_sync $opt_max_sync"
	;;
      2)
	tmp_name_a="$tmp_name_a.01"
	tmp_name_b="$tmp_name_b.01"
	tmp_opts_a="$tmp_opts_a $opt_min_sync $opt_max_sync"
	;;
      3)
	tmp_name_a="$tmp_name_a.10"
	tmp_name_b="$tmp_name_b.10"
	tmp_opts_b="$tmp_opts_b $opt_min_sync $opt_max_sync"
	;;
    esac
  fi

  do_rm $tmp_name_a $tmp_name_a.fail $tmp_name_a.recover
  $v ceph_test_filestore_idempotent_sequence run-sequence-to $to \
    $tmp_name_a $tmp_name_a/journal \
    --filestore-xattr-use-omap --test-seed $seed --osd-journal-size 100 \
    --filestore-kill-at $killat $tmp_opts_a \
    --log-file $tmp_name_a.fail --debug-filestore 20 || true

  stop_at=`ceph_test_filestore_idempotent_sequence get-last-op \
    $tmp_name_a $tmp_name_a/journal \
    --filestore-xattr-use-omap --log-file $tmp_name_a.recover \
    --debug-filestore 20 --debug-journal 20`

  if [[ "`expr $stop_at - $stop_at 2>/dev/null`" != "0" ]]; then
    echo "error: get-last-op returned '$stop_at'"
    exit 1
  fi

  echo stopped at $stop_at

  do_rm $tmp_name_b $tmp_name_b.clean
  $v ceph_test_filestore_idempotent_sequence run-sequence-to \
    $stop_at $tmp_name_b $tmp_name_b/journal \
    --filestore-xattr-use-omap --test-seed $seed --osd-journal-size 100 \
    --log-file $tmp_name_b.clean --debug-filestore 20 $tmp_opts_b

  if $v ceph_test_filestore_idempotent_sequence diff \
    $tmp_name_a $tmp_name_a/journal $tmp_name_b $tmp_name_b/journal \
    --filestore-xattr-use-omap; then
      echo OK
  else
    echo "FAIL"
    echo " see:"
    echo "   $tmp_name_a.fail     -- leading up to failure"
    echo "   $tmp_name_a.recover  -- journal replay"
    echo "   $tmp_name_b.clean    -- the clean reference"

    ret=1
    if [[ $exit_on_error -eq 1 ]]; then
      exit 1
    fi
  fi

  num_runs=$(($num_runs+1))
done

exit $ret