summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClay Gerrard <clay.gerrard@gmail.com>2015-04-03 16:23:14 -0700
committerClay Gerrard <clay.gerrard@gmail.com>2015-04-14 00:52:17 -0700
commitb2189ef47ae08c39c348e7f4c90697ecb9ba64f9 (patch)
tree40b7f94ff15d2404a488c667e29f6989bed1fbb2
parent61a9d35fd58381b7c299f125ef01d00f9b0203fe (diff)
downloadswift-b2189ef47ae08c39c348e7f4c90697ecb9ba64f9.tar.gz
Fix account reaper for > 3 replicas
There's a pre-existing IndexError in the pop from the container node list in reaper's reap_object method for object rings with a replica count greater than the container replica count. Which is more likely on EC storage policies. When making the backend direct delete requests to the nodes once the container node's list is exhausted the generic exception handler logs the error and breaks out of any other backend object requests - but the reaper marches forward and eventually the tombstones are replicated. This change just cycles the container headers across all the nodes - which seems reasonable enough - but could certainly garner bikeshedding. Change-Id: I5897d00b0a8c1e05884945dd93d9ce891b207001
-rw-r--r--swift/account/reaper.py5
-rw-r--r--test/unit/account/test_reaper.py64
2 files changed, 47 insertions, 22 deletions
diff --git a/swift/account/reaper.py b/swift/account/reaper.py
index ce69fab92..06a008535 100644
--- a/swift/account/reaper.py
+++ b/swift/account/reaper.py
@@ -19,6 +19,7 @@ from swift import gettext_ as _
from logging import DEBUG
from math import sqrt
from time import time
+import itertools
from eventlet import GreenPool, sleep, Timeout
@@ -432,7 +433,7 @@ class AccountReaper(Daemon):
* See also: :func:`swift.common.ring.Ring.get_nodes` for a description
of the container node dicts.
"""
- container_nodes = list(container_nodes)
+ cnodes = itertools.cycle(container_nodes)
try:
ring = self.get_object_ring(policy_index)
except PolicyError:
@@ -443,7 +444,7 @@ class AccountReaper(Daemon):
successes = 0
failures = 0
for node in nodes:
- cnode = container_nodes.pop()
+ cnode = next(cnodes)
try:
direct_delete_object(
node, part, account, container, obj,
diff --git a/test/unit/account/test_reaper.py b/test/unit/account/test_reaper.py
index d42bb4dbb..d81b565fc 100644
--- a/test/unit/account/test_reaper.py
+++ b/test/unit/account/test_reaper.py
@@ -141,7 +141,7 @@ cont_nodes = [{'device': 'sda1',
@unit.patch_policies([StoragePolicy(0, 'zero', False,
object_ring=unit.FakeRing()),
StoragePolicy(1, 'one', True,
- object_ring=unit.FakeRing())])
+ object_ring=unit.FakeRing(replicas=4))])
class TestReaper(unittest.TestCase):
def setUp(self):
@@ -215,7 +215,7 @@ class TestReaper(unittest.TestCase):
r.stats_objects_possibly_remaining = 0
r.myips = myips
if fakelogger:
- r.logger = FakeLogger()
+ r.logger = unit.debug_logger('test-reaper')
return r
def fake_reap_account(self, *args, **kwargs):
@@ -287,7 +287,7 @@ class TestReaper(unittest.TestCase):
policy.idx)
for i, call_args in enumerate(
fake_direct_delete.call_args_list):
- cnode = cont_nodes[i]
+ cnode = cont_nodes[i % len(cont_nodes)]
host = '%(ip)s:%(port)s' % cnode
device = cnode['device']
headers = {
@@ -302,7 +302,8 @@ class TestReaper(unittest.TestCase):
headers=headers, conn_timeout=0.5,
response_timeout=10)
self.assertEqual(call_args, expected)
- self.assertEqual(r.stats_objects_deleted, 3)
+ self.assertEqual(r.stats_objects_deleted,
+ policy.object_ring.replicas)
def test_reap_object_fail(self):
r = self.init_reaper({}, fakelogger=True)
@@ -313,7 +314,26 @@ class TestReaper(unittest.TestCase):
self.fake_direct_delete_object):
r.reap_object('a', 'c', 'partition', cont_nodes, 'o',
policy.idx)
- self.assertEqual(r.stats_objects_deleted, 1)
+ # IMHO, the stat handling in the node loop of reap object is
+ # over indented, but no one has complained, so I'm not inclined
+ # to move it. However it's worth noting we're currently keeping
+ # stats on deletes per *replica* - which is rather obvious from
+ # these tests, but this results is surprising because of some
+ # funny logic to *skip* increments on successful deletes of
+ # replicas until we have more successful responses than
+ # failures. This means that while the first replica doesn't
+ # increment deleted because of the failure, the second one
+ # *does* get successfully deleted, but *also does not* increment
+ # the counter (!?).
+ #
+ # In the three replica case this leaves only the last deleted
+ # object incrementing the counter - in the four replica case
+ # this leaves the last two.
+ #
+ # Basically this test will always result in:
+ # deleted == num_replicas - 2
+ self.assertEqual(r.stats_objects_deleted,
+ policy.object_ring.replicas - 2)
self.assertEqual(r.stats_objects_remaining, 1)
self.assertEqual(r.stats_objects_possibly_remaining, 1)
@@ -348,7 +368,7 @@ class TestReaper(unittest.TestCase):
mocks['direct_get_container'].side_effect = fake_get_container
r.reap_container('a', 'partition', acc_nodes, 'c')
mock_calls = mocks['direct_delete_object'].call_args_list
- self.assertEqual(3, len(mock_calls))
+ self.assertEqual(policy.object_ring.replicas, len(mock_calls))
for call_args in mock_calls:
_args, kwargs = call_args
self.assertEqual(kwargs['headers']
@@ -356,7 +376,7 @@ class TestReaper(unittest.TestCase):
policy.idx)
self.assertEquals(mocks['direct_delete_container'].call_count, 3)
- self.assertEqual(r.stats_objects_deleted, 3)
+ self.assertEqual(r.stats_objects_deleted, policy.object_ring.replicas)
def test_reap_container_get_object_fail(self):
r = self.init_reaper({}, fakelogger=True)
@@ -374,7 +394,7 @@ class TestReaper(unittest.TestCase):
self.fake_reap_object)]
with nested(*ctx):
r.reap_container('a', 'partition', acc_nodes, 'c')
- self.assertEqual(r.logger.inc['return_codes.4'], 1)
+ self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 1)
self.assertEqual(r.stats_containers_deleted, 1)
def test_reap_container_partial_fail(self):
@@ -393,7 +413,7 @@ class TestReaper(unittest.TestCase):
self.fake_reap_object)]
with nested(*ctx):
r.reap_container('a', 'partition', acc_nodes, 'c')
- self.assertEqual(r.logger.inc['return_codes.4'], 2)
+ self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 2)
self.assertEqual(r.stats_containers_possibly_remaining, 1)
def test_reap_container_full_fail(self):
@@ -412,7 +432,7 @@ class TestReaper(unittest.TestCase):
self.fake_reap_object)]
with nested(*ctx):
r.reap_container('a', 'partition', acc_nodes, 'c')
- self.assertEqual(r.logger.inc['return_codes.4'], 3)
+ self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 3)
self.assertEqual(r.stats_containers_remaining, 1)
@patch('swift.account.reaper.Ring',
@@ -437,8 +457,8 @@ class TestReaper(unittest.TestCase):
mocks['direct_get_container'].side_effect = fake_get_container
r.reap_container('a', 'partition', acc_nodes, 'c')
- self.assertEqual(r.logger.msg,
- 'ERROR: invalid storage policy index: 2')
+ self.assertEqual(r.logger.get_lines_for_level('error'), [
+ 'ERROR: invalid storage policy index: 2'])
def fake_reap_container(self, *args, **kwargs):
self.called_amount += 1
@@ -463,13 +483,16 @@ class TestReaper(unittest.TestCase):
nodes = r.get_account_ring().get_part_nodes()
self.assertTrue(r.reap_account(broker, 'partition', nodes))
self.assertEqual(self.called_amount, 4)
- self.assertEqual(r.logger.msg.find('Completed pass'), 0)
- self.assertTrue(r.logger.msg.find('1 containers deleted'))
- self.assertTrue(r.logger.msg.find('1 objects deleted'))
- self.assertTrue(r.logger.msg.find('1 containers remaining'))
- self.assertTrue(r.logger.msg.find('1 objects remaining'))
- self.assertTrue(r.logger.msg.find('1 containers possibly remaining'))
- self.assertTrue(r.logger.msg.find('1 objects possibly remaining'))
+ info_lines = r.logger.get_lines_for_level('info')
+ self.assertEqual(len(info_lines), 2)
+ start_line, stat_line = info_lines
+ self.assertEqual(start_line, 'Beginning pass on account a')
+ self.assertTrue(stat_line.find('1 containers deleted'))
+ self.assertTrue(stat_line.find('1 objects deleted'))
+ self.assertTrue(stat_line.find('1 containers remaining'))
+ self.assertTrue(stat_line.find('1 objects remaining'))
+ self.assertTrue(stat_line.find('1 containers possibly remaining'))
+ self.assertTrue(stat_line.find('1 objects possibly remaining'))
def test_reap_account_no_container(self):
broker = FakeAccountBroker(tuple())
@@ -483,7 +506,8 @@ class TestReaper(unittest.TestCase):
with nested(*ctx):
nodes = r.get_account_ring().get_part_nodes()
self.assertTrue(r.reap_account(broker, 'partition', nodes))
- self.assertEqual(r.logger.msg.find('Completed pass'), 0)
+ self.assertTrue(r.logger.get_lines_for_level(
+ 'info')[-1].startswith('Completed pass'))
self.assertEqual(self.called_amount, 0)
def test_reap_device(self):