summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles-Henri de Boysson <ceache@users.noreply.github.com>2019-10-20 00:58:29 -0400
committerJeff Widman <jeff@jeffwidman.com>2020-02-07 10:00:06 -0800
commit0bb0659cf3fd73714a9d75b093a75c7122e116e6 (patch)
treef552d972dcd6798ee7123db9b9fb8b4cc280d57d
parentc7e80503f3be97d26ff2de27f9e424070c8181bb (diff)
downloadkazoo-0bb0659cf3fd73714a9d75b093a75c7122e116e6.tar.gz
fix(core): Implement proper retry backoff logic with jitter.
New retry logic takes a maximum percentage off the canonical backoff, ensure gradual predictable retries timings while still having a controlable amount of jitter (re-introducing the `max_jitter` parameter) to avoids swarming client retries. Fix regression introduced in 60366d2c7910fc833991fad8e04bbe33817c0544 where retry/backoff logic produced only whole second (integer) retry delays. This produced inadequate retries on first retry and would generally not work on fast network where sub miliseconds retries are desired. Additionally, with high `max_delay` setting, as the range was always spanning from 0 until the last delay, it would also produce extremely random results with short delays following longer ones which is contrary to the expected backoff logic.
-rw-r--r--kazoo/retry.py38
1 files changed, 17 insertions, 21 deletions
diff --git a/kazoo/retry.py b/kazoo/retry.py
index eef7fbf..707b378 100644
--- a/kazoo/retry.py
+++ b/kazoo/retry.py
@@ -1,7 +1,6 @@
import logging
import random
import time
-import warnings
from kazoo.exceptions import (
ConnectionClosedError,
@@ -43,19 +42,20 @@ class KazooRetry(object):
SessionExpiredError,
)
- def __init__(self, max_tries=1, delay=0.1, backoff=2, max_jitter=None,
- max_delay=60, ignore_expire=True, sleep_func=time.sleep,
+ def __init__(self, max_tries=1, delay=0.1, backoff=2, max_jitter=0.4,
+ max_delay=60.0, ignore_expire=True, sleep_func=time.sleep,
deadline=None, interrupt=None):
"""Create a :class:`KazooRetry` instance for retrying function
- calls with uniform jitter
+ calls.
:param max_tries: How many times to retry the command. -1 means
infinite tries.
:param delay: Initial delay between retry attempts.
:param backoff: Backoff multiplier between retry attempts.
Defaults to 2 for exponential backoff.
- :param max_jitter: *Deprecated* Jitter is now uniformly distributed
- across retries.
+ :param max_jitter: Percentage of jitter to apply to each retry's delay
+ to ensure all clients to do not hammer the server
+ at the same time. Between 0.0 and 1.0.
:param max_delay: Maximum delay in seconds, regardless of other
backoff settings. Defaults to one minute.
:param ignore_expire:
@@ -68,15 +68,11 @@ class KazooRetry(object):
between retries.
"""
- if max_jitter is not None:
- warnings.warn(
- 'Passing max_jitter to retry configuration is deprecated.'
- ' Retry jitter is now automacallity uniform across retries.'
- ' The parameter will be ignored.',
- DeprecationWarning, stacklevel=2)
self.max_tries = max_tries
self.delay = delay
self.backoff = backoff
+ # Ensure max_jitter is in (0, 1)
+ self.max_jitter = max(min(max_jitter, 1.0), 0.0)
self.max_delay = float(max_delay)
self._attempts = 0
self._cur_delay = delay
@@ -99,6 +95,7 @@ class KazooRetry(object):
obj = KazooRetry(max_tries=self.max_tries,
delay=self.delay,
backoff=self.backoff,
+ max_jitter=self.max_jitter,
max_delay=self.max_delay,
sleep_func=self.sleep_func,
deadline=self.deadline,
@@ -134,25 +131,24 @@ class KazooRetry(object):
if self._attempts == self.max_tries:
raise RetryFailedError("Too many retry attempts")
self._attempts += 1
- sleeptime = random.randint(0, 1 + int(self._cur_delay))
+ jitter = random.uniform(1.0-self.max_jitter,
+ 1.0+self.max_jitter)
+ sleeptime = self._cur_delay * jitter
if self._cur_stoptime is not None and \
time.time() + sleeptime >= self._cur_stoptime:
raise RetryFailedError("Exceeded retry deadline")
if self.interrupt:
- while sleeptime > 0:
+ remain_time = sleeptime
+ while remain_time > 0:
# Break the time period down and sleep for no
# longer than 0.1 before calling the interrupt
- if sleeptime < 0.1:
- self.sleep_func(sleeptime)
- sleeptime -= sleeptime
- else:
- self.sleep_func(0.1)
- sleeptime -= 0.1
+ self.sleep_func(min(0.1, remain_time))
+ remain_time -= 0.1
if self.interrupt():
raise InterruptedError()
else:
self.sleep_func(sleeptime)
- self._cur_delay = min(self._cur_delay * self.backoff,
+ self._cur_delay = min(sleeptime * self.backoff,
self.max_delay)