summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Merritt <sam@swiftstack.com>2015-01-11 13:41:35 -0800
committerSamuel Merritt <sam@swiftstack.com>2015-03-30 17:47:28 -0700
commit8d3b3b2ee0a86b37b6ad1f4d201bfaa0103af07d (patch)
treee9658bb56802a0e136601cf097977e187aa469a5
parentcf21db4bc68aff190c55d416b406a44bf204ba36 (diff)
downloadswift-8d3b3b2ee0a86b37b6ad1f4d201bfaa0103af07d.tar.gz
Add some debug output to the ring builder
Sometimes, I get handed a builder file in a support ticket and a question of the form "why is the balance [not] doing $thing?". When that happens, I add a bunch of print statements to my local swift/common/ring/builder.py, figure things out, and then delete the print statements. This time, instead of deleting the print statements, I turned them into debug() calls and added a "--debug" flag to the rebalance command in hopes that someone else will find it useful. Change-Id: I697af90984fa5b314ddf570280b4585ba0ba363c
-rw-r--r--doc/source/admin_guide.rst10
-rwxr-xr-xswift/cli/ringbuilder.py25
-rw-r--r--swift/common/ring/builder.py39
-rw-r--r--test/unit/cli/test_ringbuilder.py27
4 files changed, 93 insertions, 8 deletions
diff --git a/doc/source/admin_guide.rst b/doc/source/admin_guide.rst
index d96353c84..5b7a02850 100644
--- a/doc/source/admin_guide.rst
+++ b/doc/source/admin_guide.rst
@@ -88,6 +88,16 @@ attempting to write to or read the builder/ring files while operations are in
progress. This can be useful in environments where ring management has been
automated but the operator still needs to interact with the rings manually.
+If the ring builder is not producing the balances that you are
+expecting, you can gain visibility into what it's doing with the
+``--debug`` flag.::
+
+ swift-ring-builder <builder-file> rebalance --debug
+
+This produces a great deal of output that is mostly useful if you are
+either (a) attempting to fix the ring builder, or (b) filing a bug
+against the ring builder.
+
-----------------------
Scripting Ring Creation
-----------------------
diff --git a/swift/cli/ringbuilder.py b/swift/cli/ringbuilder.py
index 0a7dab533..16315627d 100755
--- a/swift/cli/ringbuilder.py
+++ b/swift/cli/ringbuilder.py
@@ -14,12 +14,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import logging
+
from errno import EEXIST
from itertools import islice, izip
from operator import itemgetter
from os import mkdir
from os.path import basename, abspath, dirname, exists, join as pathjoin
-from sys import argv as sys_argv, exit, stderr
+from sys import argv as sys_argv, exit, stderr, stdout
from textwrap import wrap
from time import time
import optparse
@@ -831,6 +833,8 @@ swift-ring-builder <builder_file> rebalance [options]
help='Force a rebalanced ring to save even '
'if < 1% of parts changed')
parser.add_option('-s', '--seed', help="seed to use for rebalance")
+ parser.add_option('-d', '--debug', action='store_true',
+ help="print debug information")
options, args = parser.parse_args(argv)
def get_seed(index):
@@ -841,6 +845,14 @@ swift-ring-builder <builder_file> rebalance [options]
except IndexError:
pass
+ if options.debug:
+ logger = logging.getLogger("swift.ring.builder")
+ logger.setLevel(logging.DEBUG)
+ handler = logging.StreamHandler(stdout)
+ formatter = logging.Formatter("%(levelname)s: %(message)s")
+ handler.setFormatter(formatter)
+ logger.addHandler(handler)
+
devs_changed = builder.devs_changed
try:
last_balance = builder.get_balance()
@@ -889,11 +901,12 @@ swift-ring-builder <builder_file> rebalance [options]
status = EXIT_SUCCESS
if builder.dispersion > 0:
print '-' * 79
- print('NOTE: Dispersion of %.06f indicates some parts are not\n'
- ' optimally dispersed.\n\n'
- ' You may want adjust some device weights, increase\n'
- ' the overload or review the dispersion report.' %
- builder.dispersion)
+ print(
+ 'NOTE: Dispersion of %.06f indicates some parts are not\n'
+ ' optimally dispersed.\n\n'
+ ' You may want to adjust some device weights, increase\n'
+ ' the overload or review the dispersion report.' %
+ builder.dispersion)
status = EXIT_WARNING
print '-' * 79
elif balance > 5 and balance / 100.0 > builder.overload:
diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py
index b464bd76b..6672fdbec 100644
--- a/swift/common/ring/builder.py
+++ b/swift/common/ring/builder.py
@@ -17,6 +17,7 @@ import bisect
import copy
import errno
import itertools
+import logging
import math
import random
import cPickle as pickle
@@ -33,6 +34,16 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree, \
MAX_BALANCE = 999.99
+try:
+ # python 2.7+
+ from logging import NullHandler
+except ImportError:
+ # python 2.6
+ class NullHandler(logging.Handler):
+ def emit(self, *a, **kw):
+ pass
+
+
class RingBuilder(object):
"""
Used to build swift.common.ring.RingData instances to be written to disk
@@ -96,6 +107,11 @@ class RingBuilder(object):
self._remove_devs = []
self._ring = None
+ self.logger = logging.getLogger("swift.ring.builder")
+ if not self.logger.handlers:
+ # silence "no handler for X" error messages
+ self.logger.addHandler(NullHandler())
+
def weight_of_one_part(self):
"""
Returns the weight of each partition as calculated from the
@@ -355,6 +371,7 @@ class RingBuilder(object):
self._ring = None
if self._last_part_moves_epoch is None:
+ self.logger.debug("New builder; performing initial balance")
self._initial_balance()
self.devs_changed = False
self._build_dispersion_graph()
@@ -363,16 +380,23 @@ class RingBuilder(object):
self._update_last_part_moves()
last_balance = 0
new_parts, removed_part_count = self._adjust_replica2part2dev_size()
+ self.logger.debug(
+ "%d new parts and %d removed parts from replica-count change",
+ len(new_parts), removed_part_count)
changed_parts += removed_part_count
self._set_parts_wanted()
self._reassign_parts(new_parts)
changed_parts += len(new_parts)
while True:
reassign_parts = self._gather_reassign_parts()
- self._reassign_parts(reassign_parts)
changed_parts += len(reassign_parts)
+ self.logger.debug("Gathered %d parts", changed_parts)
+ self._reassign_parts(reassign_parts)
+ self.logger.debug("Assigned %d parts", changed_parts)
while self._remove_devs:
- self.devs[self._remove_devs.pop()['id']] = None
+ remove_dev_id = self._remove_devs.pop()['id']
+ self.logger.debug("Removing dev %d", remove_dev_id)
+ self.devs[remove_dev_id] = None
balance = self.get_balance()
if balance < 1 or abs(last_balance - balance) < 1 or \
changed_parts == self.parts:
@@ -786,6 +810,9 @@ class RingBuilder(object):
if dev_id in dev_ids:
self._last_part_moves[part] = 0
removed_dev_parts[part].append(replica)
+ self.logger.debug(
+ "Gathered %d/%d from dev %d [dev removed]",
+ part, replica, dev_id)
# Now we gather partitions that are "at risk" because they aren't
# currently sufficient spread out across the cluster.
@@ -859,6 +886,9 @@ class RingBuilder(object):
dev['parts'] -= 1
removed_replica = True
moved_parts += 1
+ self.logger.debug(
+ "Gathered %d/%d from dev %d [dispersion]",
+ part, replica, dev['id'])
break
if removed_replica:
for tier in tfd[dev['id']]:
@@ -894,6 +924,9 @@ class RingBuilder(object):
dev['parts_wanted'] += 1
dev['parts'] -= 1
reassign_parts[part].append(replica)
+ self.logger.debug(
+ "Gathered %d/%d from dev %d [weight]",
+ part, replica, dev['id'])
reassign_parts.update(spread_out_parts)
reassign_parts.update(removed_dev_parts)
@@ -1121,6 +1154,8 @@ class RingBuilder(object):
new_index, new_last_sort_key)
self._replica2part2dev[replica][part] = dev['id']
+ self.logger.debug(
+ "Placed %d/%d onto dev %d", part, replica, dev['id'])
# Just to save memory and keep from accidental reuse.
for dev in self._iter_devs():
diff --git a/test/unit/cli/test_ringbuilder.py b/test/unit/cli/test_ringbuilder.py
index 38991bfe4..8b96f2516 100644
--- a/test/unit/cli/test_ringbuilder.py
+++ b/test/unit/cli/test_ringbuilder.py
@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import logging
import mock
import os
import StringIO
@@ -1744,6 +1745,32 @@ class TestRebalanceCommand(unittest.TestCase, RunSwiftRingBuilderMixin):
raise
return (mock_stdout.getvalue(), mock_stderr.getvalue())
+ def test_debug(self):
+ # NB: getLogger(name) always returns the same object
+ rb_logger = logging.getLogger("swift.ring.builder")
+ try:
+ self.assertNotEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
+
+ self.run_srb("create", 8, 3, 1)
+ self.run_srb("add",
+ "r1z1-10.1.1.1:2345/sda", 100.0,
+ "r1z1-10.1.1.1:2345/sdb", 100.0,
+ "r1z1-10.1.1.1:2345/sdc", 100.0,
+ "r1z1-10.1.1.1:2345/sdd", 100.0)
+ self.run_srb("rebalance", "--debug")
+ self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
+
+ rb_logger.setLevel(logging.INFO)
+ self.run_srb("rebalance", "--debug", "123")
+ self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
+
+ rb_logger.setLevel(logging.INFO)
+ self.run_srb("rebalance", "123", "--debug")
+ self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
+
+ finally:
+ rb_logger.setLevel(logging.INFO) # silence other test cases
+
def test_rebalance_warning_appears(self):
self.run_srb("create", 8, 3, 24)
# all in one machine: totally balanceable