diff options
Diffstat (limited to 'ironic/drivers')
22 files changed, 581 insertions, 303 deletions
diff --git a/ironic/drivers/base.py b/ironic/drivers/base.py index 3399a63b9..e0685d0e4 100644 --- a/ironic/drivers/base.py +++ b/ironic/drivers/base.py @@ -367,6 +367,9 @@ class PowerInterface(BaseInterface): def reboot(self, task): """Perform a hard reboot of the task's node. + Drivers are expected to properly handle case when node is powered off + by powering it on. + :param task: a TaskManager instance containing the node to act on. :raises: MissingParameterValue if a required parameter is missing. """ diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index d2f07eaa2..6a590c8c4 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -57,7 +57,14 @@ agent_opts = [ 'Python Agent ramdisk. If unset, will use the priority ' 'set in the ramdisk (defaults to 10 for the ' 'GenericHardwareManager). If set to 0, will not run ' - 'during cleaning.') + 'during cleaning.'), + cfg.BoolOpt('manage_tftp', + default=True, + help='Whether Ironic will manage TFTP files for the deploy ' + 'ramdisks. If set to False, you will need to configure ' + 'your own TFTP server that allows booting the deploy ' + 'ramdisks.' + ), ] CONF = cfg.CONF @@ -97,7 +104,9 @@ def build_agent_options(node): keystone.get_service_url()).rstrip('/') agent_config_opts = { 'ipa-api-url': ironic_api, - 'ipa-driver-name': node.driver + 'ipa-driver-name': node.driver, + # NOTE: The below entry is a temporary workaround for bug/1433812 + 'coreos.configdrive': 0, } root_device = deploy_utils.parse_root_device_hints(node) if root_device: @@ -194,17 +203,19 @@ def build_instance_info_for_deploy(task): def _prepare_pxe_boot(task): """Prepare the files required for PXE booting the agent.""" - pxe_info = _get_tftp_image_info(task.node) - pxe_options = _build_pxe_config_options(task.node, pxe_info) - pxe_utils.create_pxe_config(task, - pxe_options, - CONF.agent.agent_pxe_config_template) - _cache_tftp_images(task.context, task.node, pxe_info) + if CONF.agent.manage_tftp: + pxe_info = _get_tftp_image_info(task.node) + pxe_options = _build_pxe_config_options(task.node, pxe_info) + pxe_utils.create_pxe_config(task, + pxe_options, + CONF.agent.agent_pxe_config_template) + _cache_tftp_images(task.context, task.node, pxe_info) def _do_pxe_boot(task, ports=None): """Reboot the node into the PXE ramdisk. + :param task: a TaskManager instance :param ports: a list of Neutron port dicts to update DHCP options on. If None, will get the list of ports from the Ironic port objects. """ @@ -217,13 +228,13 @@ def _do_pxe_boot(task, ports=None): def _clean_up_pxe(task): """Clean up left over PXE and DHCP files.""" - pxe_info = _get_tftp_image_info(task.node) - for label in pxe_info: - path = pxe_info[label][1] - utils.unlink_without_raise(path) - AgentTFTPImageCache().clean_up() - - pxe_utils.clean_up_pxe_config(task) + if CONF.agent.manage_tftp: + pxe_info = _get_tftp_image_info(task.node) + for label in pxe_info: + path = pxe_info[label][1] + utils.unlink_without_raise(path) + AgentTFTPImageCache().clean_up() + pxe_utils.clean_up_pxe_config(task) class AgentDeploy(base.DeployInterface): @@ -248,10 +259,11 @@ class AgentDeploy(base.DeployInterface): """ node = task.node params = {} - params['driver_info.deploy_kernel'] = node.driver_info.get( - 'deploy_kernel') - params['driver_info.deploy_ramdisk'] = node.driver_info.get( - 'deploy_ramdisk') + if CONF.agent.manage_tftp: + params['driver_info.deploy_kernel'] = node.driver_info.get( + 'deploy_kernel') + params['driver_info.deploy_ramdisk'] = node.driver_info.get( + 'deploy_ramdisk') image_source = node.instance_info.get('image_source') params['instance_info.image_source'] = image_source error_msg = _('Node %s failed to validate deploy image info. Some ' @@ -264,6 +276,15 @@ class AgentDeploy(base.DeployInterface): "image_source's image_checksum must be provided in " "instance_info for node %s") % node.uuid) + is_whole_disk_image = node.driver_internal_info.get( + 'is_whole_disk_image') + # TODO(sirushtim): Remove once IPA has support for partition images. + if is_whole_disk_image is False: + raise exception.InvalidParameterValue(_( + "Node %(node)s is configured to use the %(driver)s driver " + "which currently does not support deploying partition " + "images.") % {'node': node.uuid, 'driver': node.driver}) + # Validate the root device hints deploy_utils.parse_root_device_hints(node) @@ -348,7 +369,7 @@ class AgentDeploy(base.DeployInterface): :returns: A list of clean step dictionaries """ steps = deploy_utils.agent_get_clean_steps(task) - if CONF.agent.agent_erase_devices_priority: + if CONF.agent.agent_erase_devices_priority is not None: for step in steps: if (step.get('step') == 'erase_devices' and step.get('interface') == 'deploy'): @@ -368,29 +389,44 @@ class AgentDeploy(base.DeployInterface): return deploy_utils.agent_execute_clean_step(task, step) def prepare_cleaning(self, task): - """Boot into the agent to prepare for cleaning.""" + """Boot into the agent to prepare for cleaning. + + :param task: a TaskManager object containing the node + :raises NodeCleaningFailure: if the previous cleaning ports cannot + be removed or if new cleaning ports cannot be created + :returns: states.CLEANING to signify an asynchronous prepare + """ provider = dhcp_factory.DHCPFactory() # If we have left over ports from a previous cleaning, remove them if getattr(provider.provider, 'delete_cleaning_ports', None): + # Allow to raise if it fails, is caught and handled in conductor provider.provider.delete_cleaning_ports(task) # Create cleaning ports if necessary ports = None if getattr(provider.provider, 'create_cleaning_ports', None): + # Allow to raise if it fails, is caught and handled in conductor ports = provider.provider.create_cleaning_ports(task) + _prepare_pxe_boot(task) _do_pxe_boot(task, ports) # Tell the conductor we are waiting for the agent to boot. return states.CLEANING def tear_down_cleaning(self, task): - """Clean up the PXE and DHCP files after cleaning.""" + """Clean up the PXE and DHCP files after cleaning. + + :param task: a TaskManager object containing the node + :raises NodeCleaningFailure: if the cleaning ports cannot be + removed + """ manager_utils.node_power_action(task, states.POWER_OFF) _clean_up_pxe(task) # If we created cleaning ports, delete them provider = dhcp_factory.DHCPFactory() if getattr(provider.provider, 'delete_cleaning_ports', None): + # Allow to raise if it fails, is caught and handled in conductor provider.provider.delete_cleaning_ports(task) diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index da8cc4aa9..f09aa4aff 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -147,10 +147,10 @@ class BaseAgentVendor(base.VendorInterface): we restart cleaning. """ command = self._get_completed_cleaning_command(task) - LOG.debug('Cleaning command status for node %(node)s on step %(step)s ' - '(command)%', {'node': task.node.uuid, - 'step': task.node.clean_step, - 'command': command}) + LOG.debug('Cleaning command status for node %(node)s on step %(step)s:' + ' %(command)s', {'node': task.node.uuid, + 'step': task.node.clean_step, + 'command': command}) if not command: # Command is not done yet @@ -163,7 +163,7 @@ class BaseAgentVendor(base.VendorInterface): 'err': command.get('command_error'), 'step': task.node.clean_step}) LOG.error(msg) - manager.cleaning_error_handler(task, msg) + return manager.cleaning_error_handler(task, msg) elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH': # Restart cleaning, agent must have rebooted to new version try: @@ -175,7 +175,7 @@ class BaseAgentVendor(base.VendorInterface): 'err': command.get('command_error'), 'step': task.node.clean_step}) LOG.exception(msg) - manager.cleaning_error_handler(task, msg) + return manager.cleaning_error_handler(task, msg) self._notify_conductor_resume_clean(task) elif command.get('command_status') == 'SUCCEEDED': @@ -187,7 +187,7 @@ class BaseAgentVendor(base.VendorInterface): 'err': command.get('command_status'), 'step': task.node.clean_step}) LOG.error(msg) - manager.cleaning_error_handler(task, msg) + return manager.cleaning_error_handler(task, msg) @base.passthru(['POST']) def heartbeat(self, task, **kwargs): @@ -223,7 +223,12 @@ class BaseAgentVendor(base.VendorInterface): # TODO(jimrollenhagen) improve error messages here msg = _('Failed checking if deploy is done.') try: - if node.provision_state == states.DEPLOYWAIT: + if node.maintenance: + # this shouldn't happen often, but skip the rest if it does. + LOG.debug('Heartbeat from node %(node)s in maintenance mode; ' + 'not taking any action.', {'node': node.uuid}) + return + elif node.provision_state == states.DEPLOYWAIT: msg = _('Node failed to get image for deploy.') self.continue_deploy(task, **kwargs) elif (node.provision_state == states.DEPLOYING and @@ -308,8 +313,19 @@ class BaseAgentVendor(base.VendorInterface): last_command = commands[-1] + if last_command['command_name'] != 'execute_clean_step': + # catches race condition where execute_clean_step is still + # processing so the command hasn't started yet + return + + last_step = last_command['command_result'].get('clean_step') if last_command['command_status'] == 'RUNNING': return + elif (last_command['command_status'] == 'SUCCEEDED' and + last_step != task.node.clean_step): + # A previous clean_step was running, the new command has not yet + # started. + return else: return last_command @@ -431,7 +447,7 @@ class BaseAgentVendor(base.VendorInterface): task.process_event('done') LOG.info(_LI('Deployment to node %s done'), task.node.uuid) - def configure_local_boot(self, task, root_uuid, + def configure_local_boot(self, task, root_uuid=None, efi_system_part_uuid=None): """Helper method to configure local boot on the node. @@ -441,14 +457,17 @@ class BaseAgentVendor(base.VendorInterface): :param task: a TaskManager object containing the node :param root_uuid: The UUID of the root partition. This is used - for identifying the partition which contains the image deployed. + for identifying the partition which contains the image deployed + or None in case of whole disk images which we expect to already + have a bootloader installed. :param efi_system_part_uuid: The UUID of the efi system partition. This is used only in uefi boot mode. :raises: InstanceDeployFailure if bootloader installation failed or on encountering error while setting the boot device on the node. """ node = task.node - if not node.driver_internal_info.get('is_whole_disk_image'): + if not node.driver_internal_info.get( + 'is_whole_disk_image') and root_uuid: result = self._client.install_bootloader( node, root_uuid=root_uuid, efi_system_part_uuid=efi_system_part_uuid) diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index 280b3741a..6f715dca1 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -73,7 +73,16 @@ class AgentClient(object): headers=headers) # TODO(russellhaering): real error handling - return response.json() + try: + return response.json() + except ValueError: + msg = _( + 'Unable to decode response as JSON.\n' + 'Request URL: %(url)s\nRequest body: "%(body)s"\n' + 'Response: "%(response)s"' + ) % ({'response': response.text, 'body': body, 'url': url}) + LOG.error(msg) + raise exception.IronicException(msg) def get_commands_status(self, node): url = self._get_command_url(node) diff --git a/ironic/drivers/modules/agent_config.template b/ironic/drivers/modules/agent_config.template index a23342df5..5c219cacb 100644 --- a/ironic/drivers/modules/agent_config.template +++ b/ironic/drivers/modules/agent_config.template @@ -2,4 +2,4 @@ default deploy label deploy kernel {{ pxe_options.deployment_aki_path }} -append initrd={{ pxe_options.deployment_ari_path }} text {{ pxe_options.pxe_append_params }} ipa-api-url={{ pxe_options['ipa-api-url'] }} ipa-driver-name={{ pxe_options['ipa-driver-name'] }}{% if pxe_options.root_device %} root_device={{ pxe_options.root_device }}{% endif %} +append initrd={{ pxe_options.deployment_ari_path }} text {{ pxe_options.pxe_append_params }} ipa-api-url={{ pxe_options['ipa-api-url'] }} ipa-driver-name={{ pxe_options['ipa-driver-name'] }}{% if pxe_options.root_device %} root_device={{ pxe_options.root_device }}{% endif %} coreos.configdrive=0 diff --git a/ironic/drivers/modules/amt/vendor.py b/ironic/drivers/modules/amt/vendor.py index d1b5a962f..eccf8cbcc 100644 --- a/ironic/drivers/modules/amt/vendor.py +++ b/ironic/drivers/modules/amt/vendor.py @@ -17,6 +17,7 @@ AMT Vendor Methods from ironic.common import boot_devices from ironic.conductor import task_manager from ironic.drivers import base +from ironic.drivers.modules import iscsi_deploy from ironic.drivers.modules import pxe @@ -25,6 +26,14 @@ class AMTPXEVendorPassthru(pxe.VendorPassthru): @base.passthru(['POST']) @task_manager.require_exclusive_lock def pass_deploy_info(self, task, **kwargs): - task.driver.management.ensure_next_boot_device(task.node, - boot_devices.PXE) + if iscsi_deploy.get_boot_option(task.node) == "netboot": + task.driver.management.ensure_next_boot_device(task.node, + boot_devices.PXE) super(AMTPXEVendorPassthru, self).pass_deploy_info(task, **kwargs) + + @task_manager.require_exclusive_lock + def continue_deploy(self, task, **kwargs): + if iscsi_deploy.get_boot_option(task.node) == "netboot": + task.driver.management.ensure_next_boot_device(task.node, + boot_devices.PXE) + super(AMTPXEVendorPassthru, self).continue_deploy(task, **kwargs) diff --git a/ironic/drivers/modules/boot.ipxe b/ironic/drivers/modules/boot.ipxe index 25a0ea8dc..3567dc029 100644 --- a/ironic/drivers/modules/boot.ipxe +++ b/ironic/drivers/modules/boot.ipxe @@ -1,7 +1,7 @@ #!ipxe # load the MAC-specific file or fail if it's not found -chain --autofree pxelinux.cfg/${mac:hexraw} || goto error_no_config +chain --autofree pxelinux.cfg/${mac:hexhyp} || goto error_no_config :error_no_config echo PXE boot failed. No configuration found for MAC ${mac} diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 1591ea297..e6c2c48ef 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -73,10 +73,6 @@ LOG = logging.getLogger(__name__) VALID_ROOT_DEVICE_HINTS = set(('size', 'model', 'wwn', 'serial', 'vendor')) -def _get_agent_client(): - return agent_client.AgentClient() - - # All functions are called from deploy() directly or indirectly. # They are split for stub-out. @@ -755,8 +751,14 @@ def _iscsi_setup_and_handle_errors(address, port, iqn, lun, delete_iscsi(address, port, iqn) -def notify_deploy_complete(address): - """Notifies the completion of deployment to the baremetal node. +def notify_ramdisk_to_proceed(address): + """Notifies the ramdisk waiting for instructions from Ironic. + + DIB ramdisk (from init script) makes vendor passhthrus and listens + on port 10000 for Ironic to notify back the completion of the task. + This method connects to port 10000 of the bare metal running the + ramdisk and then sends some data to notify the ramdisk to proceed + with it's next task. :param address: The IP address of the node. """ @@ -897,7 +899,7 @@ def agent_get_clean_steps(task): :raises: NodeCleaningFailure if the agent returns invalid results :returns: A list of clean step dictionaries """ - client = _get_agent_client() + client = agent_client.AgentClient() ports = objects.Port.list_by_node_id( task.context, task.node.id) result = client.get_clean_steps(task.node, ports).get('command_result') @@ -908,10 +910,10 @@ def agent_get_clean_steps(task): 'get_clean_steps for node %(node)s returned invalid result:' ' %(result)s') % ({'node': task.node.uuid, 'result': result})) - driver_info = task.node.driver_internal_info - driver_info['hardware_manager_version'] = result[ + driver_internal_info = task.node.driver_internal_info + driver_internal_info['hardware_manager_version'] = result[ 'hardware_manager_version'] - task.node.driver_internal_info = driver_info + task.node.driver_internal_info = driver_internal_info task.node.save() # Clean steps looks like {'HardwareManager': [{step1},{steps2}..]..} @@ -935,7 +937,7 @@ def agent_execute_clean_step(task, step): :raises: NodeCleaningFailure if the agent does not return a command status :returns: states.CLEANING to signify the step will be completed async """ - client = _get_agent_client() + client = agent_client.AgentClient() ports = objects.Port.list_by_node_id( task.context, task.node.id) result = client.execute_clean_step(step, task.node, ports) @@ -968,8 +970,7 @@ def try_set_boot_device(task, device, persistent=True): manager_utils.node_set_boot_device(task, device, persistent=persistent) except exception.IPMIFailure: - if driver_utils.get_node_capability(task.node, - 'boot_mode') == 'uefi': + if get_boot_mode_for_deploy(task.node) == 'uefi': LOG.warning(_LW("ipmitool is unable to set boot device while " "the node %s is in UEFI boot mode. Please set " "the boot device manually.") % task.node.uuid) @@ -1040,3 +1041,34 @@ def is_secure_boot_requested(node): sec_boot = capabilities.get('secure_boot', 'false').lower() return sec_boot == 'true' + + +def get_boot_mode_for_deploy(node): + """Returns the boot mode that would be used for deploy. + + This method returns boot mode to be used for deploy. + It returns 'uefi' if 'secure_boot' is set to 'true' in + 'instance_info/capabilities' of node. + Otherwise it returns value of 'boot_mode' in 'properties/capabilities' + of node if set. If that is not set, it returns boot mode in + 'instance_info/deploy_boot_mode' for the node. + It would return None if boot mode is present neither in 'capabilities' of + node 'properties' nor in node's 'instance_info' (which could also be None). + + :param node: an ironic node object. + :returns: 'bios', 'uefi' or None + """ + + if is_secure_boot_requested(node): + LOG.debug('Deploy boot mode is uefi for %s.', node.uuid) + return 'uefi' + + boot_mode = driver_utils.get_node_capability(node, 'boot_mode') + if boot_mode is None: + instance_info = node.instance_info + boot_mode = instance_info.get('deploy_boot_mode') + + LOG.debug('Deploy boot mode is %(boot_mode)s for %(node)s.', + {'boot_mode': boot_mode, 'node': node.uuid}) + + return boot_mode.lower() if boot_mode else boot_mode diff --git a/ironic/drivers/modules/drac/client.py b/ironic/drivers/modules/drac/client.py index 3b4b88929..3b40bd8d6 100644 --- a/ironic/drivers/modules/drac/client.py +++ b/ironic/drivers/modules/drac/client.py @@ -15,15 +15,20 @@ Wrapper for pywsman.Client """ +import time from xml.etree import ElementTree from oslo_utils import importutils from ironic.common import exception +from ironic.common.i18n import _LW from ironic.drivers.modules.drac import common as drac_common +from ironic.openstack.common import log as logging pywsman = importutils.try_import('pywsman') +LOG = logging.getLogger(__name__) + _SOAP_ENVELOPE_URI = 'http://www.w3.org/2003/05/soap-envelope' # Filter Dialects, see (Section 2.3.1): @@ -36,6 +41,9 @@ RET_SUCCESS = '0' RET_ERROR = '2' RET_CREATED = '4096' +RETRY_COUNT = 5 +RETRY_DELAY = 5 + def get_wsman_client(node): """Return a DRAC client object. @@ -53,6 +61,29 @@ def get_wsman_client(node): return client +def retry_on_empty_response(client, action, *args, **kwargs): + """Wrapper to retry an action on failure.""" + + func = getattr(client, action) + for i in range(RETRY_COUNT): + response = func(*args, **kwargs) + if response: + return response + else: + LOG.warning(_LW('Empty response on calling %(action)s on client. ' + 'Last error (cURL error code): %(last_error)s, ' + 'fault string: "%(fault_string)s" ' + 'response_code: %(response_code)s. ' + 'Retry attempt %(count)d') % + {'action': action, + 'last_error': client.last_error(), + 'fault_string': client.fault_string(), + 'response_code': client.response_code(), + 'count': i + 1}) + + time.sleep(RETRY_DELAY) + + class Client(object): def __init__(self, drac_host, drac_port, drac_path, drac_protocol, @@ -96,15 +127,16 @@ class Client(object): options.set_flags(pywsman.FLAG_ENUMERATION_OPTIMIZATION) options.set_max_elements(100) - doc = self.client.enumerate(options, filter_, resource_uri) + doc = retry_on_empty_response(self.client, 'enumerate', + options, filter_, resource_uri) root = self._get_root(doc) final_xml = root find_query = './/{%s}Body' % _SOAP_ENVELOPE_URI insertion_point = final_xml.find(find_query) while doc.context() is not None: - doc = self.client.pull(options, None, resource_uri, - str(doc.context())) + doc = retry_on_empty_response(self.client, 'pull', options, None, + resource_uri, str(doc.context())) root = self._get_root(doc) for result in root.findall(find_query): for child in list(result): @@ -160,7 +192,9 @@ class Client(object): for name, value in properties.items(): options.add_property(name, value) - doc = self.client.invoke(options, resource_uri, method, xml_doc) + doc = retry_on_empty_response(self.client, 'invoke', options, + resource_uri, method, xml_doc) + root = self._get_root(doc) return_value = drac_common.find_xml(root, 'ReturnValue', diff --git a/ironic/drivers/modules/drac/power.py b/ironic/drivers/modules/drac/power.py index 41c5aa3e0..1ea374234 100644 --- a/ironic/drivers/modules/drac/power.py +++ b/ironic/drivers/modules/drac/power.py @@ -135,7 +135,7 @@ class DracPower(base.PowerInterface): with unexpected return value. """ - return _set_power_state(task.node, power_state) + _set_power_state(task.node, power_state) @task_manager.require_exclusive_lock def reboot(self, task): @@ -148,4 +148,11 @@ class DracPower(base.PowerInterface): :raises: DracUnexpectedReturnValue if the client received a response with unexpected return value. """ - return _set_power_state(task.node, states.REBOOT) + + current_power_state = _get_power_state(task.node) + if current_power_state == states.POWER_ON: + target_power_state = states.REBOOT + else: + target_power_state = states.POWER_ON + + _set_power_state(task.node, target_power_state) diff --git a/ironic/drivers/modules/elilo_efi_pxe_config.template b/ironic/drivers/modules/elilo_efi_pxe_config.template index 88e1831d9..3aea37e3f 100644 --- a/ironic/drivers/modules/elilo_efi_pxe_config.template +++ b/ironic/drivers/modules/elilo_efi_pxe_config.template @@ -3,7 +3,7 @@ default=deploy image={{pxe_options.deployment_aki_path}} label=deploy initrd={{pxe_options.deployment_ari_path}} - append="selinux=0 disk={{ pxe_options.disk }} iscsi_target_iqn={{ pxe_options.iscsi_target_iqn }} deployment_id={{ pxe_options.deployment_id }} deployment_key={{ pxe_options.deployment_key }} ironic_api_url={{ pxe_options.ironic_api_url }} troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} ip=%I:{{pxe_options.tftp_server}}:%G:%M:%H::on {% if pxe_options.root_device %}root_device={{ pxe_options.root_device }}{% endif %} ipa-api-url={{ pxe_options['ipa-api-url'] }} ipa-driver-name={{ pxe_options['ipa-driver-name'] }} boot_option={{ pxe_options.boot_option }} boot_mode={{ pxe_options['boot_mode'] }}" + append="selinux=0 disk={{ pxe_options.disk }} iscsi_target_iqn={{ pxe_options.iscsi_target_iqn }} deployment_id={{ pxe_options.deployment_id }} deployment_key={{ pxe_options.deployment_key }} ironic_api_url={{ pxe_options.ironic_api_url }} troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} ip=%I:{{pxe_options.tftp_server}}:%G:%M:%H::on {% if pxe_options.root_device %}root_device={{ pxe_options.root_device }}{% endif %} ipa-api-url={{ pxe_options['ipa-api-url'] }} ipa-driver-name={{ pxe_options['ipa-driver-name'] }} boot_option={{ pxe_options.boot_option }} boot_mode={{ pxe_options['boot_mode'] }} coreos.configdrive=0" image={{pxe_options.aki_path}} @@ -13,4 +13,4 @@ image={{pxe_options.aki_path}} image=chain.c32 label=boot_whole_disk - append mbr:{{ DISK_IDENTIFIER }} + append="mbr:{{ DISK_IDENTIFIER }}" diff --git a/ironic/drivers/modules/ilo/common.py b/ironic/drivers/modules/ilo/common.py index 034a84c1e..1d1363d0d 100644 --- a/ironic/drivers/modules/ilo/common.py +++ b/ironic/drivers/modules/ilo/common.py @@ -30,7 +30,7 @@ from ironic.common.i18n import _LI from ironic.common import images from ironic.common import swift from ironic.common import utils -from ironic.drivers import utils as driver_utils +from ironic.drivers.modules import deploy_utils from ironic.openstack.common import log as logging ilo_client = importutils.try_import('proliantutils.ilo.client') @@ -75,15 +75,6 @@ CONSOLE_PROPERTIES = { 'console_port': _("node's UDP port to connect to. Only required for " "console access.") } -INSPECT_PROPERTIES = { - 'inspect_ports': _("Comma-separated values of ethernet ports " - "to be identified for creating node " - "ports. Valid values may be " - "inspect_ports = '1,2,...n' or " - "inspect_ports = 'all' or " - "inspect_ports = 'none'. " - "Required only for inspection.") -} CLEAN_PROPERTIES = { 'ilo_change_password': _("new password for iLO. Required if the clean " "step 'reset_ilo_credential' is enabled.") @@ -141,11 +132,6 @@ def parse_driver_info(node): except ValueError: not_integers.append(param) - for param in INSPECT_PROPERTIES: - value = info.get(param) - if value: - d_info[param] = value - if not_integers: raise exception.InvalidParameterValue(_( "The following iLO parameters from the node's driver_info " @@ -346,42 +332,58 @@ def set_boot_mode(node, boot_mode): def update_boot_mode(task): - """Update 'boot_mode' capability value of node's 'capabilities' property. + """Update instance_info with boot mode to be used for deploy. - This method updates the 'boot_mode' capability in node's 'capabilities' - property if not set. - It also sets the boot mode to be used in the next boot. + This method updates instance_info with boot mode to be used for + deploy if node properties['capabilities'] do not have boot_mode. + It sets the boot mode on the node. :param task: Task object. :raises: IloOperationError if setting boot mode failed. """ + node = task.node + boot_mode = deploy_utils.get_boot_mode_for_deploy(node) - boot_mode = driver_utils.get_node_capability(node, 'boot_mode') if boot_mode is not None: LOG.debug("Node %(uuid)s boot mode is being set to %(boot_mode)s", {'uuid': node.uuid, 'boot_mode': boot_mode}) set_boot_mode(node, boot_mode) return - ilo_object = get_ilo_object(task.node) + LOG.debug("Check pending boot mode for node %s.", node.uuid) + ilo_object = get_ilo_object(node) try: - p_boot_mode = ilo_object.get_pending_boot_mode() - if p_boot_mode == 'UNKNOWN': - # NOTE(faizan) ILO will return this in remote cases and mostly on - # the nodes which supports UEFI. Such nodes mostly comes with UEFI - # as default boot mode. So we will try setting bootmode to UEFI - # and if it fails then we fall back to BIOS boot mode. - ilo_object.set_pending_boot_mode('UEFI') - p_boot_mode = 'UEFI' + boot_mode = ilo_object.get_pending_boot_mode() except ilo_error.IloCommandNotSupportedError: - p_boot_mode = DEFAULT_BOOT_MODE + boot_mode = 'legacy' - driver_utils.rm_node_capability(task, 'boot_mode') + if boot_mode != 'UNKNOWN': + boot_mode = BOOT_MODE_ILO_TO_GENERIC[boot_mode.lower()] - driver_utils.add_node_capability(task, 'boot_mode', - BOOT_MODE_ILO_TO_GENERIC[p_boot_mode.lower()]) + if boot_mode == 'UNKNOWN': + # NOTE(faizan) ILO will return this in remote cases and mostly on + # the nodes which supports UEFI. Such nodes mostly comes with UEFI + # as default boot mode. So we will try setting bootmode to UEFI + # and if it fails then we fall back to BIOS boot mode. + try: + boot_mode = 'uefi' + ilo_object.set_pending_boot_mode( + BOOT_MODE_GENERIC_TO_ILO[boot_mode].upper()) + except ilo_error.IloError as ilo_exception: + operation = _("Setting %s as boot mode") % boot_mode + raise exception.IloOperationError(operation=operation, + error=ilo_exception) + + LOG.debug("Node %(uuid)s boot mode is being set to %(boot_mode)s " + "as pending boot mode is unknown.", + {'uuid': node.uuid, 'boot_mode': boot_mode}) + + instance_info = node.instance_info + instance_info['deploy_boot_mode'] = boot_mode + node.instance_info = instance_info + node.save() def setup_vmedia_for_boot(task, boot_iso, parameters=None): diff --git a/ironic/drivers/modules/ilo/deploy.py b/ironic/drivers/modules/ilo/deploy.py index 50b3b8621..86dc0c729 100644 --- a/ironic/drivers/modules/ilo/deploy.py +++ b/ironic/drivers/modules/ilo/deploy.py @@ -160,7 +160,7 @@ def _get_boot_iso(task, root_uuid): # Option 3 - Create boot_iso from kernel/ramdisk, upload to Swift # and provide its name. deploy_iso_uuid = deploy_info['ilo_deploy_iso'] - boot_mode = driver_utils.get_node_capability(task.node, 'boot_mode') + boot_mode = deploy_utils.get_boot_mode_for_deploy(task.node) boot_iso_object_name = _get_boot_iso_object_name(task.node) kernel_params = CONF.pxe.pxe_append_params container = CONF.ilo.swift_ilo_container @@ -264,11 +264,13 @@ def _reboot_into(task, iso, ramdisk_options): """ ilo_common.setup_vmedia_for_boot(task, iso, ramdisk_options) - # In secure boot mode, node will reboot twice internally to - # enable/disable secure boot. Any one-time boot settings would - # be lost. Hence setting persistent=True. - manager_utils.node_set_boot_device(task, boot_devices.CDROM, - persistent=True) + # In UEFI boot mode, upon inserting virtual CDROM, one has to reset the + # system to see it as a valid boot device in persistent boot devices. + # But virtual CDROM device is always available for one-time boot. + # During enable/disable of secure boot settings, iLO internally resets + # the server twice. But it retains one time boot settings across internal + # resets. Hence no impact of this change for secure boot deploy. + manager_utils.node_set_boot_device(task, boot_devices.CDROM) manager_utils.node_power_action(task, states.REBOOT) @@ -298,7 +300,7 @@ def _disable_secure_boot(task): cur_sec_state = ilo_common.get_secure_boot_mode(task) except exception.IloOperationNotSupported: LOG.debug('Secure boot mode is not supported for node %s', - task.node.uuid) + task.node.uuid) return False if cur_sec_state: @@ -330,13 +332,17 @@ def _prepare_node_for_deploy(task): if _disable_secure_boot(task): change_boot_mode = False - # Set boot_mode capability to uefi for secure boot - if deploy_utils.is_secure_boot_requested(task.node): - LOG.debug('Secure boot deploy requested for node %s', task.node.uuid) - _enable_uefi_capability(task) - if change_boot_mode: ilo_common.update_boot_mode(task) + else: + # Need to update boot mode that will be used during deploy, if one is + # not provided. + # Since secure boot was disabled, we are in 'uefi' boot mode. + if deploy_utils.get_boot_mode_for_deploy(task.node) is None: + instance_info = task.node.instance_info + instance_info['deploy_boot_mode'] = 'uefi' + task.node.instance_info = instance_info + task.node.save() def _update_secure_boot_mode(task, mode): @@ -361,15 +367,6 @@ def _update_secure_boot_mode(task, mode): {'mode': mode, 'node': task.node.uuid}) -def _enable_uefi_capability(task): - """Adds capability boot_mode='uefi' into Node property. - - :param task: a TaskManager instance containing the node to act on. - """ - driver_utils.rm_node_capability(task, 'boot_mode') - driver_utils.add_node_capability(task, 'boot_mode', 'uefi') - - class IloVirtualMediaIscsiDeploy(base.DeployInterface): def get_properties(self): @@ -613,7 +610,7 @@ class IloVirtualMediaAgentDeploy(base.DeployInterface): class IloVirtualMediaAgentVendorInterface(agent.AgentVendorInterface): - """Interface for vendor passthru rateled actions.""" + """Interface for vendor passthru related actions.""" def reboot_to_instance(self, task, **kwargs): node = task.node @@ -622,6 +619,10 @@ class IloVirtualMediaAgentVendorInterface(agent.AgentVendorInterface): error = self.check_deploy_success(node) if error is None: + # Set boot mode + ilo_common.update_boot_mode(task) + + # Need to enable secure boot, if being requested _update_secure_boot_mode(task, True) super(IloVirtualMediaAgentVendorInterface, @@ -641,8 +642,21 @@ class IloPXEDeploy(pxe.PXEDeploy): environment for the node :param task: a TaskManager instance containing the node to act on. + :raises: IloOperationError, if some operation on iLO failed. + :raises: InvalidParameterValue, if some information is invalid. """ ilo_common.update_boot_mode(task) + + # Check if 'boot_option' is compatible with 'boot_mode' and image. + # Whole disk image deploy is not supported in UEFI boot mode if + # 'boot_option' is not 'local'. + # If boot_mode is not set in the node properties/capabilities then + # PXEDeploy.validate() would pass. + # Boot mode gets updated in prepare stage. It is possible that the + # deploy boot mode is 'uefi' after call to update_boot_mode(). + # Hence a re-check is required here. + pxe.validate_boot_option_for_uefi(task.node) + super(IloPXEDeploy, self).prepare(task) def deploy(self, task): @@ -715,6 +729,8 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): """ if method == 'pass_deploy_info': iscsi_deploy.get_deploy_info(task.node, **kwargs) + elif method == 'pass_bootloader_install_info': + iscsi_deploy.validate_pass_bootloader_info_input(task, kwargs) def _configure_vmedia_boot(self, task, root_uuid): """Configure vmedia boot for the node.""" @@ -724,9 +740,9 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): LOG.error(_LE("Cannot get boot ISO for node %s"), node.uuid) return - # In secure boot mode, node will reboot twice internally to - # enable/disable secure boot. Any one-time boot settings would - # be lost. Hence setting persistent=True. + # Upon deploy complete, some distros cloud images reboot the system as + # part of its configuration. Hence boot device should be persistent and + # not one-time. ilo_common.setup_vmedia_for_boot(task, boot_iso) manager_utils.node_set_boot_device(task, boot_devices.CDROM, @@ -739,6 +755,29 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): @base.passthru(['POST']) @task_manager.require_exclusive_lock + def pass_bootloader_install_info(self, task, **kwargs): + """Accepts the results of bootloader installation. + + This method acts as a vendor passthru and accepts the result of + bootloader installation. If the bootloader installation was + successful, then it notifies the baremetal to proceed to reboot + and makes the instance active. If bootloader installation failed, + then it sets provisioning as failed and powers off the node. + + :param task: A TaskManager object. + :param kwargs: The arguments sent with vendor passthru. The expected + kwargs are:: + 'key': The deploy key for authorization + 'status': 'SUCCEEDED' or 'FAILED' + 'error': The error message if status == 'FAILED' + 'address': The IP address of the ramdisk + """ + task.process_event('resume') + iscsi_deploy.validate_bootloader_install_status(task, kwargs) + iscsi_deploy.finish_deploy(task, kwargs['address']) + + @base.passthru(['POST']) + @task_manager.require_exclusive_lock def pass_deploy_info(self, task, **kwargs): """Continues the iSCSI deployment from where ramdisk left off. @@ -767,30 +806,36 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): 'root uuid', uuid_dict.get('disk identifier')) try: - # For iscsi_ilo driver, we boot from disk everytime if the image - # deployed is a whole disk image. - if iscsi_deploy.get_boot_option(node) == "local" or iwdi: - manager_utils.node_set_boot_device(task, boot_devices.DISK, - persistent=True) - else: - self._configure_vmedia_boot(task, root_uuid_or_disk_id) - # Set boot mode ilo_common.update_boot_mode(task) # Need to enable secure boot, if being requested _update_secure_boot_mode(task, True) - deploy_utils.notify_deploy_complete(kwargs.get('address')) + # For iscsi_ilo driver, we boot from disk every time if the image + # deployed is a whole disk image. + if iscsi_deploy.get_boot_option(node) == "local" or iwdi: + manager_utils.node_set_boot_device(task, boot_devices.DISK, + persistent=True) - LOG.info(_LI('Deployment to node %s done'), node.uuid) - task.process_event('done') + # Ask the ramdisk to install bootloader and + # wait for the call-back through the vendor passthru + # 'pass_bootloader_install_info', if it's not a whole + # disk image. + if not iwdi: + deploy_utils.notify_ramdisk_to_proceed(kwargs['address']) + task.process_event('wait') + return + else: + self._configure_vmedia_boot(task, root_uuid_or_disk_id) except Exception as e: LOG.error(_LE('Deploy failed for instance %(instance)s. ' 'Error: %(error)s'), {'instance': node.instance_uuid, 'error': e}) msg = _('Failed to continue iSCSI deployment.') deploy_utils.set_failed_state(task, msg) + else: + iscsi_deploy.finish_deploy(task, kwargs.get('address')) @task_manager.require_exclusive_lock def continue_deploy(self, task, **kwargs): @@ -812,10 +857,11 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): ilo_common.cleanup_vmedia_boot(task) + iwdi = node.driver_internal_info.get('is_whole_disk_image') uuid_dict = iscsi_deploy.do_agent_iscsi_deploy(task, self._client) root_uuid = uuid_dict.get('root uuid') - if iscsi_deploy.get_boot_option(node) == "local": + if iscsi_deploy.get_boot_option(node) == "local" or iwdi: efi_system_part_uuid = uuid_dict.get( 'efi system partition uuid') self.configure_local_boot( diff --git a/ironic/drivers/modules/ilo/inspect.py b/ironic/drivers/modules/ilo/inspect.py index a2756ce07..459e34bfe 100644 --- a/ironic/drivers/modules/ilo/inspect.py +++ b/ironic/drivers/modules/ilo/inspect.py @@ -170,7 +170,7 @@ def _update_capabilities(node, new_capabilities): # occur in malformed capabilities like: # properties/capabilities='boot_mode:bios,boot_option'. msg = (_("Node %(node)s has invalid capabilities string " - "%(capabilities), unable to modify the node " + "%(capabilities)s, unable to modify the node " "properties['capabilities'] string") % {'node': node.uuid, 'capabilities': node_capabilities}) raise exception.InvalidParameterValue(msg) @@ -186,78 +186,6 @@ def _update_capabilities(node, new_capabilities): for key, value in six.iteritems(cap_dict)]) -def _get_macs_for_desired_ports(node, macs): - """Get the dict of MACs which are desired by the operator. - - Get the MACs for desired ports. - Returns a dictionary of MACs associated with the ports specified - in the node's driver_info/inspect_ports. - - The driver_info field is expected to be populated with - comma-separated port numbers like driver_info/inspect_ports='1,2'. - In this case the inspection is expected to create ironic ports - only for these two ports. - The proliantutils is expected to return key value pair for each - MAC address like: - {'Port 1': 'aa:aa:aa:aa:aa:aa', 'Port 2': 'bb:bb:bb:bb:bb:bb'} - - Possible scenarios: - 'inspect_ports' == 'all' : creates ports for all inspected MACs - 'inspect_ports' == <valid_port_numbers>: creates ports for - requested port numbers. - 'inspect_ports' == <mix_of_valid_invalid> : raise error for - invalid inputs. - 'inspect_ports' == 'none' : doesn't do any action with the - inspected mac addresses. - - This method is not called if 'inspect_ports' == 'none', hence the - scenario is not covered under this method. - - :param node: a node object. - :param macs: a dictionary of MAC addresses returned by the hardware - with inspection. - :returns: a dictionary of port numbers and MAC addresses with only - the MACs requested by operator in - node.driver_info['inspect_ports'] - :raises: HardwareInspectionFailure for the non-existing ports - requested in node.driver_info['inspect_ports'] - - """ - driver_info = node.driver_info - desired_macs = str(driver_info.get('inspect_ports')) - - # If the operator has given 'all' just return all the macs - # returned by inspection. - if desired_macs.lower() == 'all': - to_be_created_macs = macs - else: - to_be_created_macs = {} - # The list should look like ['Port 1', 'Port 2'] as - # iLO returns port numbers like this. - desired_macs_list = [ - 'Port %s' % port_number - for port_number in (desired_macs.split(','))] - - # Check if the given input is valid or not. Return all the - # requested macs. - non_existing_ports = [] - for port_number in desired_macs_list: - mac_address = macs.get(port_number) - if mac_address: - to_be_created_macs[port_number] = mac_address - else: - non_existing_ports.append(port_number) - - # It is possible that operator has given a wrong input by mistake. - if non_existing_ports: - error = (_("Could not find requested ports %(ports)s on the " - "node %(node)s") - % {'ports': non_existing_ports, 'node': node.uuid}) - raise exception.HardwareInspectionFailure(error=error) - - return to_be_created_macs - - def _get_capabilities(node, ilo_object): """inspects hardware and gets additional capabilities. @@ -281,9 +209,7 @@ def _get_capabilities(node, ilo_object): class IloInspect(base.InspectInterface): def get_properties(self): - d = ilo_common.REQUIRED_PROPERTIES.copy() - d.update(ilo_common.INSPECT_PROPERTIES) - return d + return ilo_common.REQUIRED_PROPERTIES def validate(self, task): """Check that 'driver_info' contains required ILO credentials. @@ -295,33 +221,18 @@ class IloInspect(base.InspectInterface): :raises: InvalidParameterValue if required iLO parameters are not valid. :raises: MissingParameterValue if a required parameter is missing. - :raises: InvalidParameterValue if invalid input provided. - """ node = task.node - driver_info = ilo_common.parse_driver_info(node) - if 'inspect_ports' not in driver_info: - raise exception.MissingParameterValue(_( - "Missing 'inspect_ports' parameter in node's driver_info.")) - value = driver_info['inspect_ports'] - if (value.lower() != 'all' and value.lower() != 'none' - and not all(s.isdigit() for s in value.split(','))): - raise exception.InvalidParameterValue(_( - "inspect_ports can accept either comma separated " - "port numbers, or a single port number, or 'all' " - "or 'none'. %(value)s given for node %(node)s " - "driver_info['inspect_ports']") - % {'value': value, 'node': node}) + ilo_common.parse_driver_info(node) def inspect_hardware(self, task): """Inspect hardware to get the hardware properties. Inspects hardware to get the essential and additional hardware properties. It fails if any of the essential properties - are not received from the node or if 'inspect_ports' is - not provided in driver_info. - It doesn't fail if node fails to return any capabilities as - the capabilities differ from hardware to hardware mostly. + are not received from the node. It doesn't fail if node fails + to return any capabilities as the capabilities differ from hardware + to hardware mostly. :param task: a TaskManager instance. :raises: HardwareInspectionFailure if essential properties @@ -371,22 +282,8 @@ class IloInspect(base.InspectInterface): task.node.save() - # Get the desired node inputs from the driver_info and create ports - # as requested. It doesn't delete the ports because there is - # no way for the operator to know which all MACs are associated - # with the node and which are not. The proliantutils can - # return only embedded NICs mac addresses and not the STANDUP NIC - # cards. The port creation code is not excercised if - # 'inspect_ports' == 'none'. - - driver_info = task.node.driver_info - if (driver_info['inspect_ports']).lower() != 'none': - macs_input_given = ( - _get_macs_for_desired_ports(task.node, result['macs'])) - - if macs_input_given: - # Create ports only for the requested ports. - _create_ports_if_not_exist(task.node, macs_input_given) + # Create ports for the nics detected. + _create_ports_if_not_exist(task.node, result['macs']) LOG.debug(("Node properties for %(node)s are updated as " "%(properties)s"), diff --git a/ironic/drivers/modules/ilo/power.py b/ironic/drivers/modules/ilo/power.py index 97e1c23e3..8b2760118 100644 --- a/ironic/drivers/modules/ilo/power.py +++ b/ironic/drivers/modules/ilo/power.py @@ -60,8 +60,15 @@ def _attach_boot_iso(task): :param task: a TaskManager instance containing the node to act on. """ i_info = task.node.instance_info - - if 'ilo_boot_iso' in i_info: + node_state = task.node.provision_state + + # NOTE: On instance rebuild, ilo_boot_iso will be present in + # instance_info but the node will be in DEPLOYING state. + # In such a scenario, the ilo_boot_iso shouldn't be + # attached to the node while powering on the node (the node + # should boot from deploy ramdisk instead, which will already + # be attached by the deploy driver). + if 'ilo_boot_iso' in i_info and node_state == states.ACTIVE: ilo_common.setup_vmedia_for_boot(task, i_info['ilo_boot_iso']) manager_utils.node_set_boot_device(task, boot_devices.CDROM) diff --git a/ironic/drivers/modules/ipmitool.py b/ironic/drivers/modules/ipmitool.py index 2826dd1e7..ddebd68ef 100644 --- a/ironic/drivers/modules/ipmitool.py +++ b/ironic/drivers/modules/ipmitool.py @@ -111,6 +111,13 @@ ipmitool_command_options = { 'dual_bridge': ['ipmitool', '-m', '0', '-b', '0', '-t', '0', '-B', '0', '-T', '0', '-h']} +# Note(TheJulia): This string is hardcoded in ipmitool's lanplus driver +# and is substituted in return for the error code received from the IPMI +# controller. As of 1.8.15, no internationalization support appears to +# be in ipmitool which means the string should always be returned in this +# form regardless of locale. +IPMITOOL_RETRYABLE_FAILURES = ['insufficient resources for session'] + def _check_option_support(options): """Checks if the specific ipmitool options are supported on host. @@ -335,32 +342,68 @@ def _exec_ipmitool(driver_info, command): args.append(driver_info[name]) # specify retry timing more precisely, if supported + num_tries = max( + (CONF.ipmi.retry_timeout // CONF.ipmi.min_command_interval), 1) + if _is_option_supported('timing'): - num_tries = max( - (CONF.ipmi.retry_timeout // CONF.ipmi.min_command_interval), 1) args.append('-R') args.append(str(num_tries)) args.append('-N') args.append(str(CONF.ipmi.min_command_interval)) - # 'ipmitool' command will prompt password if there is no '-f' option, - # we set it to '\0' to write a password file to support empty password - with _make_password_file(driver_info['password'] or '\0') as pw_file: - args.append('-f') - args.append(pw_file) - args.extend(command.split(" ")) + end_time = (time.time() + CONF.ipmi.retry_timeout) + + while True: + num_tries = num_tries - 1 # NOTE(deva): ensure that no communications are sent to a BMC more # often than once every min_command_interval seconds. time_till_next_poll = CONF.ipmi.min_command_interval - ( time.time() - LAST_CMD_TIME.get(driver_info['address'], 0)) if time_till_next_poll > 0: time.sleep(time_till_next_poll) - try: - out, err = utils.execute(*args) - finally: - LAST_CMD_TIME[driver_info['address']] = time.time() - return out, err + # Resetting the list that will be utilized so the password arguments + # from any previous execution are preserved. + cmd_args = args[:] + # 'ipmitool' command will prompt password if there is no '-f' + # option, we set it to '\0' to write a password file to support + # empty password + with _make_password_file( + driver_info['password'] or '\0' + ) as pw_file: + cmd_args.append('-f') + cmd_args.append(pw_file) + cmd_args.extend(command.split(" ")) + try: + out, err = utils.execute(*cmd_args) + return out, err + except processutils.ProcessExecutionError as e: + with excutils.save_and_reraise_exception() as ctxt: + err_list = [x for x in IPMITOOL_RETRYABLE_FAILURES + if x in e.message] + if ((time.time() > end_time) or + (num_tries == 0) or + not err_list): + LOG.error(_LE('IPMI Error while attempting ' + '"%(cmd)s" for node %(node)s. ' + 'Error: %(error)s'), + { + 'node': driver_info['uuid'], + 'cmd': e.cmd, + 'error': e + }) + else: + ctxt.reraise = False + LOG.warning(_LW('IPMI Error encountered, retrying ' + '"%(cmd)s" for node %(node)s. ' + 'Error: %(error)s'), + { + 'node': driver_info['uuid'], + 'cmd': e.cmd, + 'error': e + }) + finally: + LAST_CMD_TIME[driver_info['address']] = time.time() def _sleep_time(iter): diff --git a/ironic/drivers/modules/ipxe_config.template b/ironic/drivers/modules/ipxe_config.template index bd5647841..062776cc8 100644 --- a/ironic/drivers/modules/ipxe_config.template +++ b/ironic/drivers/modules/ipxe_config.template @@ -5,7 +5,7 @@ dhcp goto deploy :deploy -kernel {{ pxe_options.deployment_aki_path }} selinux=0 disk={{ pxe_options.disk }} iscsi_target_iqn={{ pxe_options.iscsi_target_iqn }} deployment_id={{ pxe_options.deployment_id }} deployment_key={{ pxe_options.deployment_key }} ironic_api_url={{ pxe_options.ironic_api_url }} troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} boot_option={{ pxe_options.boot_option }} ip=${ip}:${next-server}:${gateway}:${netmask} BOOTIF=${mac} {% if pxe_options.root_device %}root_device={{ pxe_options.root_device }}{% endif %} ipa-api-url={{ pxe_options['ipa-api-url'] }} ipa-driver-name={{ pxe_options['ipa-driver-name'] }} +kernel {{ pxe_options.deployment_aki_path }} selinux=0 disk={{ pxe_options.disk }} iscsi_target_iqn={{ pxe_options.iscsi_target_iqn }} deployment_id={{ pxe_options.deployment_id }} deployment_key={{ pxe_options.deployment_key }} ironic_api_url={{ pxe_options.ironic_api_url }} troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} boot_option={{ pxe_options.boot_option }} ip=${ip}:${next-server}:${gateway}:${netmask} BOOTIF=${mac} {% if pxe_options.root_device %}root_device={{ pxe_options.root_device }}{% endif %} ipa-api-url={{ pxe_options['ipa-api-url'] }} ipa-driver-name={{ pxe_options['ipa-driver-name'] }} coreos.configdrive=0 initrd {{ pxe_options.deployment_ari_path }} boot diff --git a/ironic/drivers/modules/iscsi_deploy.py b/ironic/drivers/modules/iscsi_deploy.py index 000e76130..ac7315d33 100644 --- a/ironic/drivers/modules/iscsi_deploy.py +++ b/ironic/drivers/modules/iscsi_deploy.py @@ -22,9 +22,13 @@ from six.moves.urllib import parse from ironic.common import exception from ironic.common.glance_service import service_utils as glance_service_utils from ironic.common.i18n import _ +from ironic.common.i18n import _LE +from ironic.common.i18n import _LI from ironic.common import image_service as service from ironic.common import keystone +from ironic.common import states from ironic.common import utils +from ironic.conductor import utils as manager_utils from ironic.drivers.modules import deploy_utils from ironic.drivers.modules import image_cache from ironic.drivers import utils as driver_utils @@ -416,9 +420,9 @@ def _get_boot_mode(node): :param node: A single Node. :returns: A string representing the boot mode type. Defaults to 'bios'. """ - boot_mode = driver_utils.get_node_capability(node, 'boot_mode') + boot_mode = deploy_utils.get_boot_mode_for_deploy(node) if boot_mode: - return boot_mode.lower() + return boot_mode return "bios" @@ -443,14 +447,23 @@ def build_deploy_ramdisk_options(node): node.instance_info = i_info node.save() + # XXX(jroll) DIB relies on boot_option=local to decide whether or not to + # lay down a bootloader. Hack this for now; fix it for real in Liberty. + # See also bug #1441556. + boot_option = get_boot_option(node) + if node.driver_internal_info.get('is_whole_disk_image'): + boot_option = 'netboot' + deploy_options = { 'deployment_id': node['uuid'], 'deployment_key': deploy_key, 'iscsi_target_iqn': "iqn-%s" % node.uuid, 'ironic_api_url': ironic_api, 'disk': CONF.pxe.disk_devices, - 'boot_option': get_boot_option(node), + 'boot_option': boot_option, 'boot_mode': _get_boot_mode(node), + # NOTE: The below entry is a temporary workaround for bug/1433812 + 'coreos.configdrive': 0, } root_device = deploy_utils.parse_root_device_hints(node) @@ -537,3 +550,87 @@ def validate(task): # Validate the root device hints deploy_utils.parse_root_device_hints(node) + + +def validate_pass_bootloader_info_input(task, input_params): + """Validates the input sent with bootloader install info passthru. + + This method validates the input sent with bootloader install info + passthru. + + :param task: A TaskManager object. + :param input_params: A dictionary of params sent as input to passthru. + :raises: InvalidParameterValue, if deploy key passed doesn't match the + one stored in instance_info. + :raises: MissingParameterValue, if some input is missing. + """ + params = {'address': input_params.get('address'), + 'key': input_params.get('key'), + 'status': input_params.get('status')} + msg = _("Some mandatory input missing in 'pass_bootloader_info' " + "vendor passthru from ramdisk.") + deploy_utils.check_for_missing_params(params, msg) + + deploy_key = task.node.instance_info['deploy_key'] + if deploy_key != input_params.get('key'): + raise exception.InvalidParameterValue( + _("Deploy key %(key_sent)s does not match " + "with %(expected_key)s") % + {'key_sent': input_params.get('key'), 'expected_key': deploy_key}) + + +def validate_bootloader_install_status(task, input_params): + """Validate if bootloader was installed. + + This method first validates if deploy key sent in vendor passthru + was correct one, and then validates whether bootloader installation + was successful or not. + + :param task: A TaskManager object. + :param input_params: A dictionary of params sent as input to passthru. + :raises: InstanceDeployFailure, if bootloader installation was + reported from ramdisk as failure. + """ + if input_params['status'] != 'SUCCEEDED': + msg = (_('Failed to install bootloader on node %(node)s. ' + 'Error: %(error)s.') % + {'node': task.node.uuid, 'error': input_params.get('error')}) + LOG.error(msg) + deploy_utils.set_failed_state(task, msg) + raise exception.InstanceDeployFailure(msg) + + +def finish_deploy(task, address): + """Notifies the ramdisk to reboot the node and makes the instance active. + + This method notifies the ramdisk to proceed to reboot and then + makes the instance active. + + :param task: a TaskManager object. + :param address: The IP address of the bare metal node. + :raises: InstanceDeployFailure, if notifying ramdisk failed. + """ + node = task.node + try: + deploy_utils.notify_ramdisk_to_proceed(address) + except Exception as e: + LOG.error(_LE('Deploy failed for instance %(instance)s. ' + 'Error: %(error)s'), + {'instance': node.instance_uuid, 'error': e}) + msg = (_('Failed to notify ramdisk to reboot after bootloader ' + 'installation. Error: %s') % e) + deploy_utils.set_failed_state(task, msg) + raise exception.InstanceDeployFailure(msg) + + # TODO(lucasagomes): When deploying a node with the DIB ramdisk + # Ironic will not power control the node at the end of the deployment, + # it's the DIB ramdisk that reboots the node. But, for the SSH driver + # some changes like setting the boot device only gets applied when the + # machine is powered off and on again. So the code below is enforcing + # it. For Liberty we need to change the DIB ramdisk so that Ironic + # always controls the power state of the node for all drivers. + if get_boot_option(node) == "local" and 'ssh' in node.driver: + manager_utils.node_power_action(task, states.REBOOT) + + LOG.info(_LI('Deployment to node %s done'), node.uuid) + task.process_event('done') diff --git a/ironic/drivers/modules/pxe.py b/ironic/drivers/modules/pxe.py index 7fdfcde65..9ab3fc32c 100644 --- a/ironic/drivers/modules/pxe.py +++ b/ironic/drivers/modules/pxe.py @@ -28,7 +28,6 @@ from ironic.common import exception from ironic.common.glance_service import service_utils from ironic.common.i18n import _ from ironic.common.i18n import _LE -from ironic.common.i18n import _LI from ironic.common.i18n import _LW from ironic.common import image_service as service from ironic.common import keystone @@ -186,6 +185,11 @@ def _build_pxe_config_options(node, pxe_info, ctx): template. """ is_whole_disk_image = node.driver_internal_info.get('is_whole_disk_image') + if is_whole_disk_image: + # These are dummy values to satisfy elilo. + # image and initrd fields in elilo config cannot be blank. + kernel = 'no_kernel' + ramdisk = 'no_ramdisk' if CONF.pxe.ipxe_enabled: deploy_kernel = '/'.join([CONF.pxe.http_url, node.uuid, @@ -206,13 +210,11 @@ def _build_pxe_config_options(node, pxe_info, ctx): 'deployment_aki_path': deploy_kernel, 'deployment_ari_path': deploy_ramdisk, 'pxe_append_params': CONF.pxe.pxe_append_params, - 'tftp_server': CONF.pxe.tftp_server + 'tftp_server': CONF.pxe.tftp_server, + 'aki_path': kernel, + 'ari_path': ramdisk } - if not is_whole_disk_image: - pxe_options.update({'aki_path': kernel, - 'ari_path': ramdisk}) - deploy_ramdisk_options = iscsi_deploy.build_deploy_ramdisk_options(node) pxe_options.update(deploy_ramdisk_options) @@ -230,6 +232,30 @@ def _get_token_file_path(node_uuid): return os.path.join(CONF.pxe.tftp_root, 'token-' + node_uuid) +def validate_boot_option_for_uefi(node): + """In uefi boot mode, validate if the boot option is compatible. + + This method raises exception if whole disk image being deployed + in UEFI boot mode without 'boot_option' being set to 'local'. + + :param node: a single Node. + :raises: InvalidParameterValue + """ + + boot_mode = deploy_utils.get_boot_mode_for_deploy(node) + boot_option = iscsi_deploy.get_boot_option(node) + if (boot_mode == 'uefi' and + node.driver_internal_info.get('is_whole_disk_image') and + boot_option != 'local'): + LOG.error(_LE("Whole disk image with netboot is not supported in UEFI " + "boot mode.")) + raise exception.InvalidParameterValue(_( + "Conflict: Whole disk image being used for deploy, but " + "cannot be used with node %(node_uuid)s configured to use " + "UEFI boot with netboot option") % + {'node_uuid': node.uuid}) + + @image_cache.cleanup(priority=25) class TFTPImageCache(image_cache.ImageCache): def __init__(self, image_service=None): @@ -327,7 +353,7 @@ class PXEDeploy(base.DeployInterface): driver_utils.validate_boot_mode_capability(node) driver_utils.validate_boot_option_capability(node) - boot_mode = driver_utils.get_node_capability(node, 'boot_mode') + boot_mode = deploy_utils.get_boot_mode_for_deploy(task.node) if CONF.pxe.ipxe_enabled: if not CONF.pxe.http_url or not CONF.pxe.http_root: @@ -343,6 +369,10 @@ class PXEDeploy(base.DeployInterface): "%(node_uuid)s configured to use UEFI boot") % {'node_uuid': node.uuid}) + # Check if 'boot_option' is compatible with 'boot_mode' of uefi and + # image being deployed + validate_boot_option_for_uefi(task.node) + d_info = _parse_deploy_info(node) iscsi_deploy.validate(task) @@ -417,7 +447,7 @@ class PXEDeploy(base.DeployInterface): pxe_options = _build_pxe_config_options(task.node, pxe_info, task.context) - if driver_utils.get_node_capability(task.node, 'boot_mode') == 'uefi': + if deploy_utils.get_boot_mode_for_deploy(task.node) == 'uefi': pxe_config_template = CONF.pxe.uefi_pxe_config_template else: pxe_config_template = CONF.pxe.pxe_config_template @@ -455,7 +485,7 @@ class PXEDeploy(base.DeployInterface): task.node.uuid) deploy_utils.switch_pxe_config( pxe_config_path, root_uuid_or_disk_id, - driver_utils.get_node_capability(task.node, 'boot_mode'), + deploy_utils.get_boot_mode_for_deploy(task.node), iwdi) def clean_up(self, task): @@ -513,6 +543,7 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): Valid methods: * pass_deploy_info + * pass_bootloader_install_info :param task: a TaskManager instance containing the node to act on. :param method: method to be validated. @@ -522,6 +553,30 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): if method == 'pass_deploy_info': driver_utils.validate_boot_option_capability(task.node) iscsi_deploy.get_deploy_info(task.node, **kwargs) + elif method == 'pass_bootloader_install_info': + iscsi_deploy.validate_pass_bootloader_info_input(task, kwargs) + + @base.passthru(['POST']) + @task_manager.require_exclusive_lock + def pass_bootloader_install_info(self, task, **kwargs): + """Accepts the results of bootloader installation. + + This method acts as a vendor passthru and accepts the result of + the bootloader installation. If bootloader installation was + successful, then it notifies the bare metal to proceed to reboot + and makes the instance active. If the bootloader installation failed, + then it sets provisioning as failed and powers off the node. + :param task: A TaskManager object. + :param kwargs: The arguments sent with vendor passthru. The expected + kwargs are:: + 'key': The deploy key for authorization + 'status': 'SUCCEEDED' or 'FAILED' + 'error': The error message if status == 'FAILED' + 'address': The IP address of the ramdisk + """ + task.process_event('resume') + iscsi_deploy.validate_bootloader_install_status(task, kwargs) + iscsi_deploy.finish_deploy(task, kwargs['address']) @base.passthru(['POST']) @task_manager.require_exclusive_lock @@ -556,25 +611,34 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): try: if iscsi_deploy.get_boot_option(node) == "local": deploy_utils.try_set_boot_device(task, boot_devices.DISK) + # If it's going to boot from the local disk, get rid of # the PXE configuration files used for the deployment pxe_utils.clean_up_pxe_config(task) + + # Ask the ramdisk to install bootloader and + # wait for the call-back through the vendor passthru + # 'pass_bootloader_install_info', if it's not a + # whole disk image. + if not is_whole_disk_image: + deploy_utils.notify_ramdisk_to_proceed(kwargs['address']) + task.process_event('wait') + return else: pxe_config_path = pxe_utils.get_pxe_config_file_path(node.uuid) - node_cap = driver_utils.get_node_capability(node, 'boot_mode') + boot_mode = deploy_utils.get_boot_mode_for_deploy(node) deploy_utils.switch_pxe_config(pxe_config_path, root_uuid_or_disk_id, - node_cap, is_whole_disk_image) + boot_mode, is_whole_disk_image) - deploy_utils.notify_deploy_complete(kwargs['address']) - LOG.info(_LI('Deployment to node %s done'), node.uuid) - task.process_event('done') except Exception as e: LOG.error(_LE('Deploy failed for instance %(instance)s. ' 'Error: %(error)s'), {'instance': node.instance_uuid, 'error': e}) msg = _('Failed to continue iSCSI deployment.') deploy_utils.set_failed_state(task, msg) + else: + iscsi_deploy.finish_deploy(task, kwargs.get('address')) @task_manager.require_exclusive_lock def continue_deploy(self, task, **kwargs): @@ -617,7 +681,7 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): root_uuid_or_disk_id = uuid_dict.get( 'root uuid', uuid_dict.get('disk identifier')) pxe_config_path = pxe_utils.get_pxe_config_file_path(node.uuid) - boot_mode = driver_utils.get_node_capability(node, 'boot_mode') + boot_mode = deploy_utils.get_boot_mode_for_deploy(node) deploy_utils.switch_pxe_config(pxe_config_path, root_uuid_or_disk_id, boot_mode, is_whole_disk_image) diff --git a/ironic/drivers/modules/pxe_config.template b/ironic/drivers/modules/pxe_config.template index 334a6ac81..242182f6e 100644 --- a/ironic/drivers/modules/pxe_config.template +++ b/ironic/drivers/modules/pxe_config.template @@ -2,7 +2,7 @@ default deploy label deploy kernel {{ pxe_options.deployment_aki_path }} -append initrd={{ pxe_options.deployment_ari_path }} selinux=0 disk={{ pxe_options.disk }} iscsi_target_iqn={{ pxe_options.iscsi_target_iqn }} deployment_id={{ pxe_options.deployment_id }} deployment_key={{ pxe_options.deployment_key }} ironic_api_url={{ pxe_options.ironic_api_url }} troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} boot_option={{ pxe_options.boot_option }} {% if pxe_options.root_device %}root_device={{ pxe_options.root_device }}{% endif %} ipa-api-url={{ pxe_options['ipa-api-url'] }} ipa-driver-name={{ pxe_options['ipa-driver-name'] }} boot_mode={{ pxe_options['boot_mode'] }} +append initrd={{ pxe_options.deployment_ari_path }} selinux=0 disk={{ pxe_options.disk }} iscsi_target_iqn={{ pxe_options.iscsi_target_iqn }} deployment_id={{ pxe_options.deployment_id }} deployment_key={{ pxe_options.deployment_key }} ironic_api_url={{ pxe_options.ironic_api_url }} troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} boot_option={{ pxe_options.boot_option }} {% if pxe_options.root_device %}root_device={{ pxe_options.root_device }}{% endif %} ipa-api-url={{ pxe_options['ipa-api-url'] }} ipa-driver-name={{ pxe_options['ipa-driver-name'] }} boot_mode={{ pxe_options['boot_mode'] }} coreos.configdrive=0 ipappend 3 diff --git a/ironic/drivers/modules/snmp.py b/ironic/drivers/modules/snmp.py index 10ab43166..7a252c691 100644 --- a/ironic/drivers/modules/snmp.py +++ b/ironic/drivers/modules/snmp.py @@ -118,7 +118,7 @@ class SNMPClient(object): """ if self.version == SNMP_V3: # Handling auth/encryption credentials is not (yet) supported. - # This version supports a security name analagous to community. + # This version supports a security name analogous to community. return cmdgen.UsmUserData(self.security) else: mp_model = 1 if self.version == SNMP_V2C else 0 diff --git a/ironic/drivers/utils.py b/ironic/drivers/utils.py index be2887f67..7caa6cabe 100644 --- a/ironic/drivers/utils.py +++ b/ironic/drivers/utils.py @@ -144,33 +144,6 @@ def get_node_capability(node, capability): "Format should be 'key:val'."), node_capability) -def rm_node_capability(task, capability): - """Remove 'capability' from node's 'capabilities' property. - - :param task: Task object. - :param capability: Capability key. - - """ - node = task.node - properties = node.properties - capabilities = properties.get('capabilities') - - if not capabilities: - return - - caps = [] - for cap in capabilities.split(','): - parts = cap.split(':') - if len(parts) == 2 and parts[0] and parts[1]: - if parts[0] == capability: - continue - caps.append(cap) - new_cap_str = ",".join(caps) - properties['capabilities'] = new_cap_str if new_cap_str else None - node.properties = properties - node.save() - - def add_node_capability(task, capability, value): """Add 'capability' to node's 'capabilities' property. |