summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Tantsur <dtantsur@protonmail.com>2021-06-23 17:11:16 +0200
committerDmitry Tantsur <dtantsur@protonmail.com>2021-07-05 18:42:20 +0200
commit0cb15a223a24927a203d84dded451de461008945 (patch)
tree1c17209b70d402db05a62b1e3ccf0f7037d39e39
parent4ac6ad7317e20e1f2ebd99b1ce53d3ad2b1f09b0 (diff)
downloadironic-0cb15a223a24927a203d84dded451de461008945.tar.gz
Cache AgentClient on Task, not globally
In order to avoid potential cache coherency issues when using a globally cached AgentClient, e.g. with TSL certificates from the IPA, cache the AgentClient on a per task basis. Co-Authored-By: Arne Wiebalck <arne.wiebalck@cern.ch> Conflicts: ironic/drivers/modules/agent.py ironic/drivers/modules/agent_base.py ironic/drivers/modules/ansible/deploy.py ironic/drivers/modules/iscsi_deploy.py ironic/tests/unit/drivers/modules/test_agent.py Story: #2009004 Task: #42678 Change-Id: I0c458c8d9ae673181beb6d85c2ee68235ccef239 (cherry picked from commit fcb6a1096f48336f19a5e8aa80445f57d4480c08)
-rw-r--r--ironic/drivers/modules/agent.py14
-rw-r--r--ironic/drivers/modules/agent_base.py31
-rw-r--r--ironic/drivers/modules/agent_client.py9
-rw-r--r--ironic/drivers/modules/ansible/deploy.py6
-rw-r--r--ironic/drivers/modules/iscsi_deploy.py4
-rw-r--r--ironic/tests/unit/drivers/modules/test_agent.py4
-rw-r--r--ironic/tests/unit/drivers/modules/test_agent_base.py3
-rw-r--r--ironic/tests/unit/drivers/modules/test_iscsi_deploy.py3
-rw-r--r--releasenotes/notes/cache-agentclient-per-task-ec2231684e6876d9.yaml5
9 files changed, 49 insertions, 30 deletions
diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py
index 1cb380ab2..bae6feda8 100644
--- a/ironic/drivers/modules/agent.py
+++ b/ironic/drivers/modules/agent.py
@@ -31,6 +31,7 @@ from ironic.conductor import utils as manager_utils
from ironic.conf import CONF
from ironic.drivers import base
from ironic.drivers.modules import agent_base
+from ironic.drivers.modules import agent_client
from ironic.drivers.modules import boot_mode_utils
from ironic.drivers.modules import deploy_utils
@@ -269,6 +270,8 @@ class AgentDeployMixin(agent_base.AgentDeployMixin):
'step deploy.write_image, using the deprecated '
'synchronous fall-back', task.node.uuid)
+ client = agent_client.get_client(task)
+
if self.has_decomposed_deploy_steps and has_write_image:
configdrive = node.instance_info.get('configdrive')
# Now switch into the corresponding in-band deploy step and let the
@@ -278,10 +281,10 @@ class AgentDeployMixin(agent_base.AgentDeployMixin):
'args': {'image_info': image_info,
'configdrive': configdrive}}
return agent_base.execute_step(task, new_step, 'deploy',
- client=self._client)
+ client=client)
else:
# TODO(dtantsur): remove in W
- command = self._client.prepare_image(node, image_info, wait=True)
+ command = client.prepare_image(node, image_info, wait=True)
if command['command_status'] == 'FAILED':
# TODO(jimrollenhagen) power off if using neutron dhcp to
# align with pxe driver?
@@ -292,8 +295,8 @@ class AgentDeployMixin(agent_base.AgentDeployMixin):
# TODO(dtantsur): remove in W
def _get_uuid_from_result(self, task, type_uuid):
- command = self._client.get_last_command_status(task.node,
- 'prepare_image')
+ client = agent_client.get_client(task)
+ command = client.get_last_command_status(task.node, 'prepare_image')
if (not command
or not command.get('command_result', {}).get('result')):
msg = _('Unexpected response from the agent for node %s: the '
@@ -347,8 +350,9 @@ class AgentDeployMixin(agent_base.AgentDeployMixin):
# ppc64* hardware we need to provide the 'PReP_Boot_partition_uuid' to
# direct where the bootloader should be installed.
driver_internal_info = task.node.driver_internal_info
+ client = agent_client.get_client(task)
try:
- partition_uuids = self._client.get_partition_uuids(node).get(
+ partition_uuids = client.get_partition_uuids(node).get(
'command_result') or {}
root_uuid = partition_uuids.get('root uuid')
except exception.AgentAPIError:
diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py
index c826e6a25..226c34bdc 100644
--- a/ironic/drivers/modules/agent_base.py
+++ b/ironic/drivers/modules/agent_base.py
@@ -99,11 +99,6 @@ _FASTTRACK_HEARTBEAT_ALLOWED = (states.DEPLOYWAIT, states.CLEANWAIT,
FASTTRACK_HEARTBEAT_ALLOWED = frozenset(_FASTTRACK_HEARTBEAT_ALLOWED)
-def _get_client():
- client = agent_client.AgentClient()
- return client
-
-
@METRICS.timer('post_clean_step_hook')
def post_clean_step_hook(interface, step):
"""Decorator method for adding a post clean step hook.
@@ -372,7 +367,7 @@ def execute_step(task, step, step_type, client=None):
completed async
"""
if client is None:
- client = _get_client()
+ client = agent_client.get_client(task)
ports = objects.Port.list_by_node_id(
task.context, task.node.id)
call = getattr(client, 'execute_%s_step' % step_type)
@@ -410,8 +405,9 @@ class HeartbeatMixin(object):
more of the deployment flow is driven by deploy steps.
"""
+ collect_deploy_logs = True
+
def __init__(self):
- self._client = _get_client()
if not self.has_decomposed_deploy_steps:
LOG.warning('%s does not support decomposed deploy steps. This '
'is deprecated and will stop working in a future '
@@ -564,7 +560,7 @@ class HeartbeatMixin(object):
# Do not call the error handler is the node is already DEPLOYFAIL
if node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
deploy_utils.set_failed_state(
- task, last_error, collect_logs=bool(self._client))
+ task, last_error, collect_logs=self.collect_deploy_logs)
def _heartbeat_clean_wait(self, task):
node = task.node
@@ -678,7 +674,8 @@ class HeartbeatMixin(object):
"""
node = task.node
try:
- result = self._client.finalize_rescue(node)
+ client = agent_client.get_client(task)
+ result = client.finalize_rescue(node)
except exception.IronicException as e:
raise exception.InstanceRescueFailure(node=node.uuid,
instance=node.instance_uuid,
@@ -899,7 +896,8 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin):
{'node': node.uuid, 'type': step_type,
'steps': previous_steps})
- call = getattr(self._client, 'get_%s_steps' % step_type)
+ client = agent_client.get_client(task)
+ call = getattr(client, 'get_%s_steps' % step_type)
try:
agent_result = call(node, task.ports).get('command_result', {})
except exception.AgentInProgress as exc:
@@ -1083,7 +1081,8 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin):
assert step_type in ('clean', 'deploy')
node = task.node
- agent_commands = self._client.get_commands_status(task.node)
+ client = agent_client.get_client(task)
+ agent_commands = client.get_commands_status(task.node)
if _freshly_booted(agent_commands, step_type):
field = ('cleaning_reboot' if step_type == 'clean'
@@ -1191,16 +1190,17 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin):
can_power_on = (states.POWER_ON in
task.driver.power.get_supported_power_states(task))
+ client = agent_client.get_client(task)
try:
if not can_power_on:
LOG.info('Power interface of node %(node)s does not support '
'power on, using reboot to switch to the instance',
node.uuid)
- self._client.sync(node)
+ client.sync(node)
manager_utils.node_power_action(task, states.REBOOT)
elif not oob_power_off:
try:
- self._client.power_off(node)
+ client.power_off(node)
except Exception as e:
LOG.warning('Failed to soft power off node %(node_uuid)s. '
'%(cls)s: %(error)s',
@@ -1223,7 +1223,7 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin):
manager_utils.node_power_action(task, states.POWER_OFF)
else:
# Flush the file system prior to hard rebooting the node
- result = self._client.sync(node)
+ result = client.sync(node)
error = result.get('faultstring')
if error:
if 'Unknown command' in error:
@@ -1366,7 +1366,8 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin):
'partition %(part)s, EFI system partition %(efi)s',
{'node': node.uuid, 'part': root_uuid,
'efi': efi_system_part_uuid})
- result = self._client.install_bootloader(
+ client = agent_client.get_client(task)
+ result = client.install_bootloader(
node, root_uuid=root_uuid,
efi_system_part_uuid=efi_system_part_uuid,
prep_boot_part_uuid=prep_boot_part_uuid,
diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py
index 59975777f..f04d7160b 100644
--- a/ironic/drivers/modules/agent_client.py
+++ b/ironic/drivers/modules/agent_client.py
@@ -35,6 +35,15 @@ DEFAULT_IPA_PORTAL_PORT = 3260
REBOOT_COMMAND = 'run_image'
+def get_client(task):
+ """Get client for this node."""
+ try:
+ return task.cached_agent_client
+ except AttributeError:
+ task.cached_agent_client = AgentClient()
+ return task.cached_agent_client
+
+
def get_command_error(command):
"""Extract an error string from the command result.
diff --git a/ironic/drivers/modules/ansible/deploy.py b/ironic/drivers/modules/ansible/deploy.py
index d4186741f..ae5151d95 100644
--- a/ironic/drivers/modules/ansible/deploy.py
+++ b/ironic/drivers/modules/ansible/deploy.py
@@ -382,11 +382,7 @@ class AnsibleDeploy(agent_base.HeartbeatMixin,
has_decomposed_deploy_steps = True
- def __init__(self):
- super(AnsibleDeploy, self).__init__()
- # NOTE(pas-ha) overriding agent creation as we won't be
- # communicating with it, only processing heartbeats
- self._client = None
+ collect_deploy_logs = False
def get_properties(self):
"""Return the properties of the interface."""
diff --git a/ironic/drivers/modules/iscsi_deploy.py b/ironic/drivers/modules/iscsi_deploy.py
index 7e3687306..108e87871 100644
--- a/ironic/drivers/modules/iscsi_deploy.py
+++ b/ironic/drivers/modules/iscsi_deploy.py
@@ -34,6 +34,7 @@ from ironic.conductor import utils as manager_utils
from ironic.conf import CONF
from ironic.drivers import base
from ironic.drivers.modules import agent_base
+from ironic.drivers.modules import agent_client
from ironic.drivers.modules import boot_mode_utils
from ironic.drivers.modules import deploy_utils
@@ -700,7 +701,8 @@ class ISCSIDeploy(agent_base.AgentDeployMixin, agent_base.AgentBaseMixin,
node = task.node
LOG.debug('Continuing the deployment on node %s', node.uuid)
- uuid_dict_returned = do_agent_iscsi_deploy(task, self._client)
+ client = agent_client.get_client(task)
+ uuid_dict_returned = do_agent_iscsi_deploy(task, client)
utils.set_node_nested_field(node, 'driver_internal_info',
'deployment_uuids', uuid_dict_returned)
node.save()
diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py
index b55d61cdd..27047b549 100644
--- a/ironic/tests/unit/drivers/modules/test_agent.py
+++ b/ironic/tests/unit/drivers/modules/test_agent.py
@@ -1253,7 +1253,7 @@ class TestAgentDeploy(db_base.DbTestCase):
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
- task.driver.deploy._client = client_mock
+ task.cached_agent_client = client_mock
task.driver.deploy.write_image(task)
if compat:
@@ -1350,7 +1350,7 @@ class TestAgentDeploy(db_base.DbTestCase):
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
- task.driver.deploy._client = client_mock
+ task.cached_agent_client = client_mock
task.driver.deploy.write_image(task)
client_mock.prepare_image.assert_called_with(task.node,
diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py
index 8c1b3cfdb..a655c24d6 100644
--- a/ironic/tests/unit/drivers/modules/test_agent_base.py
+++ b/ironic/tests/unit/drivers/modules/test_agent_base.py
@@ -1121,7 +1121,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
collect_mock.assert_called_once_with(task.node)
self.assertFalse(power_on_node_if_needed_mock.called)
- sync_mock.assert_called_once_with(self.deploy._client, task.node)
+ sync_mock.assert_called_once_with(agent_client.get_client(task),
+ task.node)
@mock.patch.object(manager_utils, 'restore_power_state_if_needed',
autospec=True)
diff --git a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py
index 1cd12feb0..c1f683c40 100644
--- a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py
+++ b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py
@@ -1000,9 +1000,10 @@ class ISCSIDeployTestCase(db_base.DbTestCase):
do_agent_iscsi_deploy_mock.return_value = deployment_uuids
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
+ task.cached_agent_client = mock.sentinel.agent_client
task.driver.deploy.write_image(task)
do_agent_iscsi_deploy_mock.assert_called_once_with(
- task, task.driver.deploy._client)
+ task, mock.sentinel.agent_client)
self.assertEqual(
task.node.driver_internal_info['deployment_uuids'],
deployment_uuids)
diff --git a/releasenotes/notes/cache-agentclient-per-task-ec2231684e6876d9.yaml b/releasenotes/notes/cache-agentclient-per-task-ec2231684e6876d9.yaml
new file mode 100644
index 000000000..d6822b734
--- /dev/null
+++ b/releasenotes/notes/cache-agentclient-per-task-ec2231684e6876d9.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+ - |
+ Fixes potential cache coherency issues by caching the AgentClient
+ per task, rather than globally.