diff options
author | Charles-Henri de Boysson <ceache@users.noreply.github.com> | 2019-10-20 00:58:29 -0400 |
---|---|---|
committer | Jeff Widman <jeff@jeffwidman.com> | 2020-02-07 10:00:06 -0800 |
commit | 0bb0659cf3fd73714a9d75b093a75c7122e116e6 (patch) | |
tree | f552d972dcd6798ee7123db9b9fb8b4cc280d57d | |
parent | c7e80503f3be97d26ff2de27f9e424070c8181bb (diff) | |
download | kazoo-0bb0659cf3fd73714a9d75b093a75c7122e116e6.tar.gz |
fix(core): Implement proper retry backoff logic with jitter.
New retry logic takes a maximum percentage off the canonical backoff,
ensure gradual predictable retries timings while still having a
controlable amount of jitter (re-introducing the `max_jitter` parameter)
to avoids swarming client retries.
Fix regression introduced in 60366d2c7910fc833991fad8e04bbe33817c0544
where retry/backoff logic produced only whole second (integer) retry
delays.
This produced inadequate retries on first retry and would generally not
work on fast network where sub miliseconds retries are desired.
Additionally, with high `max_delay` setting, as the range was always
spanning from 0 until the last delay, it would also produce extremely
random results with short delays following longer ones which is contrary
to the expected backoff logic.
-rw-r--r-- | kazoo/retry.py | 38 |
1 files changed, 17 insertions, 21 deletions
diff --git a/kazoo/retry.py b/kazoo/retry.py index eef7fbf..707b378 100644 --- a/kazoo/retry.py +++ b/kazoo/retry.py @@ -1,7 +1,6 @@ import logging import random import time -import warnings from kazoo.exceptions import ( ConnectionClosedError, @@ -43,19 +42,20 @@ class KazooRetry(object): SessionExpiredError, ) - def __init__(self, max_tries=1, delay=0.1, backoff=2, max_jitter=None, - max_delay=60, ignore_expire=True, sleep_func=time.sleep, + def __init__(self, max_tries=1, delay=0.1, backoff=2, max_jitter=0.4, + max_delay=60.0, ignore_expire=True, sleep_func=time.sleep, deadline=None, interrupt=None): """Create a :class:`KazooRetry` instance for retrying function - calls with uniform jitter + calls. :param max_tries: How many times to retry the command. -1 means infinite tries. :param delay: Initial delay between retry attempts. :param backoff: Backoff multiplier between retry attempts. Defaults to 2 for exponential backoff. - :param max_jitter: *Deprecated* Jitter is now uniformly distributed - across retries. + :param max_jitter: Percentage of jitter to apply to each retry's delay + to ensure all clients to do not hammer the server + at the same time. Between 0.0 and 1.0. :param max_delay: Maximum delay in seconds, regardless of other backoff settings. Defaults to one minute. :param ignore_expire: @@ -68,15 +68,11 @@ class KazooRetry(object): between retries. """ - if max_jitter is not None: - warnings.warn( - 'Passing max_jitter to retry configuration is deprecated.' - ' Retry jitter is now automacallity uniform across retries.' - ' The parameter will be ignored.', - DeprecationWarning, stacklevel=2) self.max_tries = max_tries self.delay = delay self.backoff = backoff + # Ensure max_jitter is in (0, 1) + self.max_jitter = max(min(max_jitter, 1.0), 0.0) self.max_delay = float(max_delay) self._attempts = 0 self._cur_delay = delay @@ -99,6 +95,7 @@ class KazooRetry(object): obj = KazooRetry(max_tries=self.max_tries, delay=self.delay, backoff=self.backoff, + max_jitter=self.max_jitter, max_delay=self.max_delay, sleep_func=self.sleep_func, deadline=self.deadline, @@ -134,25 +131,24 @@ class KazooRetry(object): if self._attempts == self.max_tries: raise RetryFailedError("Too many retry attempts") self._attempts += 1 - sleeptime = random.randint(0, 1 + int(self._cur_delay)) + jitter = random.uniform(1.0-self.max_jitter, + 1.0+self.max_jitter) + sleeptime = self._cur_delay * jitter if self._cur_stoptime is not None and \ time.time() + sleeptime >= self._cur_stoptime: raise RetryFailedError("Exceeded retry deadline") if self.interrupt: - while sleeptime > 0: + remain_time = sleeptime + while remain_time > 0: # Break the time period down and sleep for no # longer than 0.1 before calling the interrupt - if sleeptime < 0.1: - self.sleep_func(sleeptime) - sleeptime -= sleeptime - else: - self.sleep_func(0.1) - sleeptime -= 0.1 + self.sleep_func(min(0.1, remain_time)) + remain_time -= 0.1 if self.interrupt(): raise InterruptedError() else: self.sleep_func(sleeptime) - self._cur_delay = min(self._cur_delay * self.backoff, + self._cur_delay = min(sleeptime * self.backoff, self.max_delay) |