diff options
author | Dmitry Tantsur <dtantsur@protonmail.com> | 2021-06-23 17:11:16 +0200 |
---|---|---|
committer | Dmitry Tantsur <dtantsur@protonmail.com> | 2021-07-05 18:42:20 +0200 |
commit | 0cb15a223a24927a203d84dded451de461008945 (patch) | |
tree | 1c17209b70d402db05a62b1e3ccf0f7037d39e39 | |
parent | 4ac6ad7317e20e1f2ebd99b1ce53d3ad2b1f09b0 (diff) | |
download | ironic-0cb15a223a24927a203d84dded451de461008945.tar.gz |
Cache AgentClient on Task, not globally
In order to avoid potential cache coherency issues
when using a globally cached AgentClient, e.g. with
TSL certificates from the IPA, cache the AgentClient
on a per task basis.
Co-Authored-By: Arne Wiebalck <arne.wiebalck@cern.ch>
Conflicts:
ironic/drivers/modules/agent.py
ironic/drivers/modules/agent_base.py
ironic/drivers/modules/ansible/deploy.py
ironic/drivers/modules/iscsi_deploy.py
ironic/tests/unit/drivers/modules/test_agent.py
Story: #2009004
Task: #42678
Change-Id: I0c458c8d9ae673181beb6d85c2ee68235ccef239
(cherry picked from commit fcb6a1096f48336f19a5e8aa80445f57d4480c08)
-rw-r--r-- | ironic/drivers/modules/agent.py | 14 | ||||
-rw-r--r-- | ironic/drivers/modules/agent_base.py | 31 | ||||
-rw-r--r-- | ironic/drivers/modules/agent_client.py | 9 | ||||
-rw-r--r-- | ironic/drivers/modules/ansible/deploy.py | 6 | ||||
-rw-r--r-- | ironic/drivers/modules/iscsi_deploy.py | 4 | ||||
-rw-r--r-- | ironic/tests/unit/drivers/modules/test_agent.py | 4 | ||||
-rw-r--r-- | ironic/tests/unit/drivers/modules/test_agent_base.py | 3 | ||||
-rw-r--r-- | ironic/tests/unit/drivers/modules/test_iscsi_deploy.py | 3 | ||||
-rw-r--r-- | releasenotes/notes/cache-agentclient-per-task-ec2231684e6876d9.yaml | 5 |
9 files changed, 49 insertions, 30 deletions
diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 1cb380ab2..bae6feda8 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -31,6 +31,7 @@ from ironic.conductor import utils as manager_utils from ironic.conf import CONF from ironic.drivers import base from ironic.drivers.modules import agent_base +from ironic.drivers.modules import agent_client from ironic.drivers.modules import boot_mode_utils from ironic.drivers.modules import deploy_utils @@ -269,6 +270,8 @@ class AgentDeployMixin(agent_base.AgentDeployMixin): 'step deploy.write_image, using the deprecated ' 'synchronous fall-back', task.node.uuid) + client = agent_client.get_client(task) + if self.has_decomposed_deploy_steps and has_write_image: configdrive = node.instance_info.get('configdrive') # Now switch into the corresponding in-band deploy step and let the @@ -278,10 +281,10 @@ class AgentDeployMixin(agent_base.AgentDeployMixin): 'args': {'image_info': image_info, 'configdrive': configdrive}} return agent_base.execute_step(task, new_step, 'deploy', - client=self._client) + client=client) else: # TODO(dtantsur): remove in W - command = self._client.prepare_image(node, image_info, wait=True) + command = client.prepare_image(node, image_info, wait=True) if command['command_status'] == 'FAILED': # TODO(jimrollenhagen) power off if using neutron dhcp to # align with pxe driver? @@ -292,8 +295,8 @@ class AgentDeployMixin(agent_base.AgentDeployMixin): # TODO(dtantsur): remove in W def _get_uuid_from_result(self, task, type_uuid): - command = self._client.get_last_command_status(task.node, - 'prepare_image') + client = agent_client.get_client(task) + command = client.get_last_command_status(task.node, 'prepare_image') if (not command or not command.get('command_result', {}).get('result')): msg = _('Unexpected response from the agent for node %s: the ' @@ -347,8 +350,9 @@ class AgentDeployMixin(agent_base.AgentDeployMixin): # ppc64* hardware we need to provide the 'PReP_Boot_partition_uuid' to # direct where the bootloader should be installed. driver_internal_info = task.node.driver_internal_info + client = agent_client.get_client(task) try: - partition_uuids = self._client.get_partition_uuids(node).get( + partition_uuids = client.get_partition_uuids(node).get( 'command_result') or {} root_uuid = partition_uuids.get('root uuid') except exception.AgentAPIError: diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index c826e6a25..226c34bdc 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -99,11 +99,6 @@ _FASTTRACK_HEARTBEAT_ALLOWED = (states.DEPLOYWAIT, states.CLEANWAIT, FASTTRACK_HEARTBEAT_ALLOWED = frozenset(_FASTTRACK_HEARTBEAT_ALLOWED) -def _get_client(): - client = agent_client.AgentClient() - return client - - @METRICS.timer('post_clean_step_hook') def post_clean_step_hook(interface, step): """Decorator method for adding a post clean step hook. @@ -372,7 +367,7 @@ def execute_step(task, step, step_type, client=None): completed async """ if client is None: - client = _get_client() + client = agent_client.get_client(task) ports = objects.Port.list_by_node_id( task.context, task.node.id) call = getattr(client, 'execute_%s_step' % step_type) @@ -410,8 +405,9 @@ class HeartbeatMixin(object): more of the deployment flow is driven by deploy steps. """ + collect_deploy_logs = True + def __init__(self): - self._client = _get_client() if not self.has_decomposed_deploy_steps: LOG.warning('%s does not support decomposed deploy steps. This ' 'is deprecated and will stop working in a future ' @@ -564,7 +560,7 @@ class HeartbeatMixin(object): # Do not call the error handler is the node is already DEPLOYFAIL if node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT): deploy_utils.set_failed_state( - task, last_error, collect_logs=bool(self._client)) + task, last_error, collect_logs=self.collect_deploy_logs) def _heartbeat_clean_wait(self, task): node = task.node @@ -678,7 +674,8 @@ class HeartbeatMixin(object): """ node = task.node try: - result = self._client.finalize_rescue(node) + client = agent_client.get_client(task) + result = client.finalize_rescue(node) except exception.IronicException as e: raise exception.InstanceRescueFailure(node=node.uuid, instance=node.instance_uuid, @@ -899,7 +896,8 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): {'node': node.uuid, 'type': step_type, 'steps': previous_steps}) - call = getattr(self._client, 'get_%s_steps' % step_type) + client = agent_client.get_client(task) + call = getattr(client, 'get_%s_steps' % step_type) try: agent_result = call(node, task.ports).get('command_result', {}) except exception.AgentInProgress as exc: @@ -1083,7 +1081,8 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): assert step_type in ('clean', 'deploy') node = task.node - agent_commands = self._client.get_commands_status(task.node) + client = agent_client.get_client(task) + agent_commands = client.get_commands_status(task.node) if _freshly_booted(agent_commands, step_type): field = ('cleaning_reboot' if step_type == 'clean' @@ -1191,16 +1190,17 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): can_power_on = (states.POWER_ON in task.driver.power.get_supported_power_states(task)) + client = agent_client.get_client(task) try: if not can_power_on: LOG.info('Power interface of node %(node)s does not support ' 'power on, using reboot to switch to the instance', node.uuid) - self._client.sync(node) + client.sync(node) manager_utils.node_power_action(task, states.REBOOT) elif not oob_power_off: try: - self._client.power_off(node) + client.power_off(node) except Exception as e: LOG.warning('Failed to soft power off node %(node_uuid)s. ' '%(cls)s: %(error)s', @@ -1223,7 +1223,7 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): manager_utils.node_power_action(task, states.POWER_OFF) else: # Flush the file system prior to hard rebooting the node - result = self._client.sync(node) + result = client.sync(node) error = result.get('faultstring') if error: if 'Unknown command' in error: @@ -1366,7 +1366,8 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): 'partition %(part)s, EFI system partition %(efi)s', {'node': node.uuid, 'part': root_uuid, 'efi': efi_system_part_uuid}) - result = self._client.install_bootloader( + client = agent_client.get_client(task) + result = client.install_bootloader( node, root_uuid=root_uuid, efi_system_part_uuid=efi_system_part_uuid, prep_boot_part_uuid=prep_boot_part_uuid, diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index 59975777f..f04d7160b 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -35,6 +35,15 @@ DEFAULT_IPA_PORTAL_PORT = 3260 REBOOT_COMMAND = 'run_image' +def get_client(task): + """Get client for this node.""" + try: + return task.cached_agent_client + except AttributeError: + task.cached_agent_client = AgentClient() + return task.cached_agent_client + + def get_command_error(command): """Extract an error string from the command result. diff --git a/ironic/drivers/modules/ansible/deploy.py b/ironic/drivers/modules/ansible/deploy.py index d4186741f..ae5151d95 100644 --- a/ironic/drivers/modules/ansible/deploy.py +++ b/ironic/drivers/modules/ansible/deploy.py @@ -382,11 +382,7 @@ class AnsibleDeploy(agent_base.HeartbeatMixin, has_decomposed_deploy_steps = True - def __init__(self): - super(AnsibleDeploy, self).__init__() - # NOTE(pas-ha) overriding agent creation as we won't be - # communicating with it, only processing heartbeats - self._client = None + collect_deploy_logs = False def get_properties(self): """Return the properties of the interface.""" diff --git a/ironic/drivers/modules/iscsi_deploy.py b/ironic/drivers/modules/iscsi_deploy.py index 7e3687306..108e87871 100644 --- a/ironic/drivers/modules/iscsi_deploy.py +++ b/ironic/drivers/modules/iscsi_deploy.py @@ -34,6 +34,7 @@ from ironic.conductor import utils as manager_utils from ironic.conf import CONF from ironic.drivers import base from ironic.drivers.modules import agent_base +from ironic.drivers.modules import agent_client from ironic.drivers.modules import boot_mode_utils from ironic.drivers.modules import deploy_utils @@ -700,7 +701,8 @@ class ISCSIDeploy(agent_base.AgentDeployMixin, agent_base.AgentBaseMixin, node = task.node LOG.debug('Continuing the deployment on node %s', node.uuid) - uuid_dict_returned = do_agent_iscsi_deploy(task, self._client) + client = agent_client.get_client(task) + uuid_dict_returned = do_agent_iscsi_deploy(task, client) utils.set_node_nested_field(node, 'driver_internal_info', 'deployment_uuids', uuid_dict_returned) node.save() diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index b55d61cdd..27047b549 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -1253,7 +1253,7 @@ class TestAgentDeploy(db_base.DbTestCase): with task_manager.acquire(self.context, self.node.uuid, shared=False) as task: - task.driver.deploy._client = client_mock + task.cached_agent_client = client_mock task.driver.deploy.write_image(task) if compat: @@ -1350,7 +1350,7 @@ class TestAgentDeploy(db_base.DbTestCase): with task_manager.acquire(self.context, self.node.uuid, shared=False) as task: - task.driver.deploy._client = client_mock + task.cached_agent_client = client_mock task.driver.deploy.write_image(task) client_mock.prepare_image.assert_called_with(task.node, diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py index 8c1b3cfdb..a655c24d6 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base.py @@ -1121,7 +1121,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertEqual(states.ACTIVE, task.node.target_provision_state) collect_mock.assert_called_once_with(task.node) self.assertFalse(power_on_node_if_needed_mock.called) - sync_mock.assert_called_once_with(self.deploy._client, task.node) + sync_mock.assert_called_once_with(agent_client.get_client(task), + task.node) @mock.patch.object(manager_utils, 'restore_power_state_if_needed', autospec=True) diff --git a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py index 1cd12feb0..c1f683c40 100644 --- a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py +++ b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py @@ -1000,9 +1000,10 @@ class ISCSIDeployTestCase(db_base.DbTestCase): do_agent_iscsi_deploy_mock.return_value = deployment_uuids self.node.save() with task_manager.acquire(self.context, self.node.uuid) as task: + task.cached_agent_client = mock.sentinel.agent_client task.driver.deploy.write_image(task) do_agent_iscsi_deploy_mock.assert_called_once_with( - task, task.driver.deploy._client) + task, mock.sentinel.agent_client) self.assertEqual( task.node.driver_internal_info['deployment_uuids'], deployment_uuids) diff --git a/releasenotes/notes/cache-agentclient-per-task-ec2231684e6876d9.yaml b/releasenotes/notes/cache-agentclient-per-task-ec2231684e6876d9.yaml new file mode 100644 index 000000000..d6822b734 --- /dev/null +++ b/releasenotes/notes/cache-agentclient-per-task-ec2231684e6876d9.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes potential cache coherency issues by caching the AgentClient + per task, rather than globally. |