summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMorgan Jones <morgan@parelastic.com>2014-09-10 10:31:00 -0700
committerNikhil Manchanda <SlickNik@gmail.com>2014-09-29 00:24:19 -0700
commit0fe9c9dd59cf4bc084a9875ce789f0b2943c799a (patch)
treed484a345a9aaf85b124f8c3c838893432d7b011b
parent3cebe105188dd0b53e38a25c8dcede35f7c3dfce (diff)
downloadtrove-0fe9c9dd59cf4bc084a9875ce789f0b2943c799a.tar.gz
Make the replication snapshot timeout configurable
There is no way to tell how long the snapshot for replication will take, and we have no good way to poll for the slave state. Eventually, we will need to have an intelligent poll (perhaps based on guest heartbeats), but in the meantime we will have the the snapshot use a configurable timeout which can be set as needed, and independently of the agent_call timeouts. Co-Authored-By: Nikhil Manchanda <SlickNik@gmail.com> Change-Id: I6316d748e91d1ec3eebe25a14bb43fbfe10db669 Closes-bug: 1362310
-rw-r--r--etc/trove/trove-taskmanager.conf.sample1
-rw-r--r--trove/common/cfg.py3
-rw-r--r--trove/common/utils.py2
-rw-r--r--trove/guestagent/api.py3
4 files changed, 7 insertions, 2 deletions
diff --git a/etc/trove/trove-taskmanager.conf.sample b/etc/trove/trove-taskmanager.conf.sample
index f7046319..89f43426 100644
--- a/etc/trove/trove-taskmanager.conf.sample
+++ b/etc/trove/trove-taskmanager.conf.sample
@@ -134,6 +134,7 @@ trove_security_group_rule_cidr = 0.0.0.0/0
agent_heartbeat_time = 10
agent_call_low_timeout = 5
agent_call_high_timeout = 150
+agent_replication_snapshot_timeout = 36000
# Whether to use nova's contrib api for create server with volume
use_nova_server_volume = False
diff --git a/trove/common/cfg.py b/trove/common/cfg.py
index 6d77e0f6..d627c250 100644
--- a/trove/common/cfg.py
+++ b/trove/common/cfg.py
@@ -139,6 +139,9 @@ common_opts = [
cfg.IntOpt('agent_call_high_timeout', default=60,
help="Maximum time (in seconds) to wait for Guest Agent 'slow' "
"requests (such as restarting the database)."),
+ cfg.IntOpt('agent_replication_snapshot_timeout', default=36000,
+ help='Maximum time (in seconds) to wait for taking a Guest '
+ 'Agent replication snapshot.'),
# The guest_id opt definition must match the one in cmd/guest.py
cfg.StrOpt('guest_id', default=None, help="ID of the Guest Instance."),
cfg.IntOpt('state_change_wait_time', default=3 * 60,
diff --git a/trove/common/utils.py b/trove/common/utils.py
index a27cd8d8..2f05aee7 100644
--- a/trove/common/utils.py
+++ b/trove/common/utils.py
@@ -250,7 +250,7 @@ def poll_until(retriever, condition=lambda value: value,
obj = retriever()
if condition(obj):
raise LoopingCallDone(retvalue=obj)
- if time_out is not None and time.time() > start_time + time_out:
+ if time_out is not None and time.time() - start_time > time_out:
raise exception.PollTimeOut
lc = LoopingCall(f=poll_and_check).start(sleep_time, True)
return lc.wait()
diff --git a/trove/guestagent/api.py b/trove/guestagent/api.py
index 3e712ccf..bdefa8fe 100644
--- a/trove/guestagent/api.py
+++ b/trove/guestagent/api.py
@@ -33,6 +33,7 @@ CONF = cfg.CONF
LOG = logging.getLogger(__name__)
AGENT_LOW_TIMEOUT = CONF.agent_call_low_timeout
AGENT_HIGH_TIMEOUT = CONF.agent_call_high_timeout
+AGENT_SNAPSHOT_TIMEOUT = CONF.agent_replication_snapshot_timeout
RPC_API_VERSION = "1.0"
@@ -326,7 +327,7 @@ class API(proxy.RpcProxy):
def get_replication_snapshot(self, snapshot_info=None):
LOG.debug("Retrieving replication snapshot from instance %s.", self.id)
- return self._call("get_replication_snapshot", AGENT_HIGH_TIMEOUT,
+ return self._call("get_replication_snapshot", AGENT_SNAPSHOT_TIMEOUT,
snapshot_info=snapshot_info)
def attach_replication_slave(self, snapshot, slave_config=None):