diff options
27 files changed, 987 insertions, 23 deletions
diff --git a/doc/source/admin/troubleshooting.rst b/doc/source/admin/troubleshooting.rst index fa04d3006..2791430fd 100644 --- a/doc/source/admin/troubleshooting.rst +++ b/doc/source/admin/troubleshooting.rst @@ -973,3 +973,40 @@ Unfortunately, due to the way the conductor is designed, it is not possible to gracefully break a stuck lock held in ``*-ing`` states. As the last resort, you may need to restart the affected conductor. See `Why are my nodes stuck in a "-ing" state?`_. + +What is ConcurrentActionLimit? +============================== + +ConcurrentActionLimit is an exception which is raised to clients when an +operation is requested, but cannot be serviced at that moment because the +overall threshold of nodes in concurrent "Deployment" or "Cleaning" +operations has been reached. + +These limits exist for two distinct reasons. + +The first is they allow an operator to tune a deployment such that too many +concurrent deployments cannot be triggered at any given time, as a single +conductor has an internal limit to the number of overall concurrent tasks, +this restricts only the number of running concurrent actions. As such, this +accounts for the number of nodes in ``deploy`` and ``deploy wait`` states. +In the case of deployments, the default value is relatively high and should +be suitable for *most* larger operators. + +The second is to help slow down the ability in which an entire population of +baremetal nodes can be moved into and through cleaning, in order to help +guard against authenticated malicious users, or accidental script driven +operations. In this case, the total number of nodes in ``deleting``, +``cleaning``, and ``clean wait`` are evaluated. The default maximum limit +for cleaning operations is *50* and should be suitable for the majority of +baremetal operators. + +These settings can be modified by using the +``[conductor]max_concurrent_deploy`` and ``[conductor]max_concurrent_clean`` +settings from the ironic.conf file supporting the ``ironic-conductor`` +service. Neither setting can be explicity disabled, however there is also no +upper limit to the setting. + +.. note:: + This was an infrastructure operator requested feature from actual lessons + learned in the operation of Ironic in large scale production. The defaults + may not be suitable for the largest scale operators. diff --git a/doc/source/contributor/webapi-version-history.rst b/doc/source/contributor/webapi-version-history.rst index c395bdcbe..074106bc8 100644 --- a/doc/source/contributor/webapi-version-history.rst +++ b/doc/source/contributor/webapi-version-history.rst @@ -2,8 +2,8 @@ REST API Version History ======================== -1.80 (Zed) ----------- +1.80 (Zed, 21.1) +---------------------- This verison is a signifier of additional RBAC functionality allowing a project scoped ``admin`` to create or delete nodes in Ironic. diff --git a/driver-requirements.txt b/driver-requirements.txt index 3725c3ddf..876e817cb 100644 --- a/driver-requirements.txt +++ b/driver-requirements.txt @@ -17,4 +17,4 @@ ansible>=2.7 python-ibmcclient>=0.2.2,<0.3.0 # Dell EMC iDRAC sushy OEM extension -sushy-oem-idrac>=4.0.0,<6.0.0 +sushy-oem-idrac>=5.0.0,<6.0.0 diff --git a/ironic/common/exception.py b/ironic/common/exception.py index ddbce6f47..38316d2e7 100644 --- a/ironic/common/exception.py +++ b/ironic/common/exception.py @@ -851,3 +851,13 @@ class ImageRefIsARedirect(IronicException): message=msg, image_ref=image_ref, redirect_url=redirect_url) + + +class ConcurrentActionLimit(IronicException): + # NOTE(TheJulia): We explicitly don't report the concurrent + # action limit configuration value as a security guard since + # if informed of the limit, an attacker can tailor their attack. + _msg_fmt = _("Unable to process request at this time. " + "The concurrent action limit for %(task_type)s " + "has been reached. Please contact your administrator " + "and try again later.") diff --git a/ironic/common/pxe_utils.py b/ironic/common/pxe_utils.py index 33a24ecb6..489f52737 100644 --- a/ironic/common/pxe_utils.py +++ b/ironic/common/pxe_utils.py @@ -59,6 +59,7 @@ DHCPV6_BOOTFILE_NAME = '59' # rfc5970 DHCP_TFTP_SERVER_ADDRESS = '150' # rfc5859 DHCP_IPXE_ENCAP_OPTS = '175' # Tentatively Assigned DHCP_TFTP_PATH_PREFIX = '210' # rfc5071 +DHCP_SERVER_IP_ADDRESS = '255' # dnsmasq server-ip-address DEPLOY_KERNEL_RAMDISK_LABELS = ['deploy_kernel', 'deploy_ramdisk'] RESCUE_KERNEL_RAMDISK_LABELS = ['rescue_kernel', 'rescue_ramdisk'] @@ -488,7 +489,7 @@ def dhcp_options_for_instance(task, ipxe_enabled=False, url_boot=False, else: use_ip_version = int(CONF.pxe.ip_version) dhcp_opts = [] - dhcp_provider_name = CONF.dhcp.dhcp_provider + api = dhcp_factory.DHCPFactory().provider if use_ip_version == 4: boot_file_param = DHCP_BOOTFILE_NAME else: @@ -517,7 +518,7 @@ def dhcp_options_for_instance(task, ipxe_enabled=False, url_boot=False, ipxe_script_url = '/'.join([CONF.deploy.http_url, script_name]) # if the request comes from dumb firmware send them the iPXE # boot image. - if dhcp_provider_name == 'neutron': + if api.supports_ipxe_tag(): # Neutron use dnsmasq as default DHCP agent. Neutron carries the # configuration to relate to the tags below. The ipxe6 tag was # added in the Stein cycle which identifies the iPXE User-Class @@ -588,7 +589,7 @@ def dhcp_options_for_instance(task, ipxe_enabled=False, url_boot=False, # Related bug was opened on Neutron side: # https://bugs.launchpad.net/neutron/+bug/1723354 if not url_boot: - dhcp_opts.append({'opt_name': 'server-ip-address', + dhcp_opts.append({'opt_name': DHCP_SERVER_IP_ADDRESS, 'opt_value': CONF.pxe.tftp_server}) # Append the IP version for all the configuration options diff --git a/ironic/common/release_mappings.py b/ironic/common/release_mappings.py index 940321870..d32027b59 100644 --- a/ironic/common/release_mappings.py +++ b/ironic/common/release_mappings.py @@ -490,6 +490,26 @@ RELEASE_MAPPING = { 'VolumeTarget': ['1.0'], } }, + '21.1': { + 'api': '1.80', + 'rpc': '1.55', + 'objects': { + 'Allocation': ['1.1'], + 'BIOSSetting': ['1.1'], + 'Node': ['1.36'], + 'NodeHistory': ['1.0'], + 'Conductor': ['1.3'], + 'Chassis': ['1.3'], + 'Deployment': ['1.0'], + 'DeployTemplate': ['1.1'], + 'Port': ['1.10'], + 'Portgroup': ['1.4'], + 'Trait': ['1.0'], + 'TraitList': ['1.0'], + 'VolumeConnector': ['1.0'], + 'VolumeTarget': ['1.0'], + } + }, 'master': { 'api': '1.80', 'rpc': '1.55', @@ -525,9 +545,9 @@ RELEASE_MAPPING = { # # There should be at most two named mappings here. -# NOTE(mgoddard): remove xena prior to the zed release. -RELEASE_MAPPING['xena'] = RELEASE_MAPPING['18.2'] +# NOTE(mgoddard): remove yoga prior to the antelope release. RELEASE_MAPPING['yoga'] = RELEASE_MAPPING['20.1'] +RELEASE_MAPPING['zed'] = RELEASE_MAPPING['21.1'] # List of available versions with named versions first; 'master' is excluded. RELEASE_VERSIONS = sorted(set(RELEASE_MAPPING) - {'master'}, reverse=True) diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 13d11d1d9..7e98459ff 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -886,7 +886,8 @@ class ConductorManager(base_manager.BaseConductorManager): exception.NodeInMaintenance, exception.InstanceDeployFailure, exception.InvalidStateRequested, - exception.NodeProtected) + exception.NodeProtected, + exception.ConcurrentActionLimit) def do_node_deploy(self, context, node_id, rebuild=False, configdrive=None, deploy_steps=None): """RPC method to initiate deployment to a node. @@ -910,8 +911,11 @@ class ConductorManager(base_manager.BaseConductorManager): :raises: InvalidStateRequested when the requested state is not a valid target from the current state. :raises: NodeProtected if the node is protected. + :raises: ConcurrentActionLimit if this action would exceed the maximum + number of configured concurrent actions of this type. """ LOG.debug("RPC do_node_deploy called for node %s.", node_id) + self._concurrent_action_limit(action='provisioning') event = 'rebuild' if rebuild else 'deploy' # NOTE(comstud): If the _sync_power_states() periodic task happens @@ -983,7 +987,8 @@ class ConductorManager(base_manager.BaseConductorManager): exception.NodeLocked, exception.InstanceDeployFailure, exception.InvalidStateRequested, - exception.NodeProtected) + exception.NodeProtected, + exception.ConcurrentActionLimit) def do_node_tear_down(self, context, node_id): """RPC method to tear down an existing node deployment. @@ -998,8 +1003,11 @@ class ConductorManager(base_manager.BaseConductorManager): :raises: InvalidStateRequested when the requested state is not a valid target from the current state. :raises: NodeProtected if the node is protected. + :raises: ConcurrentActionLimit if this action would exceed the maximum + number of configured concurrent actions of this type. """ LOG.debug("RPC do_node_tear_down called for node %s.", node_id) + self._concurrent_action_limit(action='unprovisioning') with task_manager.acquire(context, node_id, shared=False, purpose='node tear down') as task: @@ -1121,7 +1129,8 @@ class ConductorManager(base_manager.BaseConductorManager): exception.InvalidStateRequested, exception.NodeInMaintenance, exception.NodeLocked, - exception.NoFreeConductorWorker) + exception.NoFreeConductorWorker, + exception.ConcurrentActionLimit) def do_node_clean(self, context, node_id, clean_steps, disable_ramdisk=False): """RPC method to initiate manual cleaning. @@ -1150,7 +1159,10 @@ class ConductorManager(base_manager.BaseConductorManager): :raises: NodeLocked if node is locked by another conductor. :raises: NoFreeConductorWorker when there is no free worker to start async task. + :raises: ConcurrentActionLimit If this action would exceed the + configured limits of the deployment. """ + self._concurrent_action_limit(action='cleaning') with task_manager.acquire(context, node_id, shared=False, purpose='node manual cleaning') as task: node = task.node @@ -3549,6 +3561,40 @@ class ConductorManager(base_manager.BaseConductorManager): # impact DB access if done in excess. eventlet.sleep(0) + def _concurrent_action_limit(self, action): + """Check Concurrency limits and block operations if needed. + + This method is used to serve as a central place for the logic + for checks on concurrency limits. If a limit is reached, then + an appropriate exception is raised. + + :raises: ConcurrentActionLimit If the system configuration + is exceeded. + """ + # NOTE(TheJulia): Keeping this all in one place for simplicity. + if action == 'provisioning': + node_count = self.dbapi.count_nodes_in_provision_state([ + states.DEPLOYING, + states.DEPLOYWAIT + ]) + if node_count >= CONF.conductor.max_concurrent_deploy: + raise exception.ConcurrentActionLimit( + task_type=action) + + if action == 'unprovisioning' or action == 'cleaning': + # NOTE(TheJulia): This also checks for the deleting state + # which is super transitory, *but* you can get a node into + # the state. So in order to guard against a DoS attack, we + # need to check even the super transitory node state. + node_count = self.dbapi.count_nodes_in_provision_state([ + states.DELETING, + states.CLEANING, + states.CLEANWAIT + ]) + if node_count >= CONF.conductor.max_concurrent_clean: + raise exception.ConcurrentActionLimit( + task_type=action) + @METRICS.timer('get_vendor_passthru_metadata') def get_vendor_passthru_metadata(route_dict): diff --git a/ironic/conf/__init__.py b/ironic/conf/__init__.py index 4e4b7bf7a..ad1ba227c 100644 --- a/ironic/conf/__init__.py +++ b/ironic/conf/__init__.py @@ -27,6 +27,7 @@ from ironic.conf import database from ironic.conf import default from ironic.conf import deploy from ironic.conf import dhcp +from ironic.conf import dnsmasq from ironic.conf import drac from ironic.conf import glance from ironic.conf import healthcheck @@ -62,6 +63,7 @@ default.register_opts(CONF) deploy.register_opts(CONF) drac.register_opts(CONF) dhcp.register_opts(CONF) +dnsmasq.register_opts(CONF) glance.register_opts(CONF) healthcheck.register_opts(CONF) ibmc.register_opts(CONF) diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index b1d6bae4f..2161b9434 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -358,6 +358,32 @@ opts = [ 'model. The conductor does *not* record this value ' 'otherwise, and this information is not backfilled ' 'for prior instances which have been deployed.')), + cfg.IntOpt('max_concurrent_deploy', + default=250, + min=1, + mutable=True, + help=_('The maximum number of concurrent nodes in deployment ' + 'which are permitted in this Ironic system. ' + 'If this limit is reached, new requests will be ' + 'rejected until the number of deployments in progress ' + 'is lower than this maximum. As this is a security ' + 'mechanism requests are not queued, and this setting ' + 'is a global setting applying to all requests this ' + 'conductor receives, regardless of access rights. ' + 'The concurrent deployment limit cannot be disabled.')), + cfg.IntOpt('max_concurrent_clean', + default=50, + min=1, + mutable=True, + help=_('The maximum number of concurrent nodes in cleaning ' + 'which are permitted in this Ironic system. ' + 'If this limit is reached, new requests will be ' + 'rejected until the number of nodes in cleaning ' + 'is lower than this maximum. As this is a security ' + 'mechanism requests are not queued, and this setting ' + 'is a global setting applying to all requests this ' + 'conductor receives, regardless of access rights. ' + 'The concurrent clean limit cannot be disabled.')), ] diff --git a/ironic/conf/dhcp.py b/ironic/conf/dhcp.py index 2c58529fd..17a937f7d 100644 --- a/ironic/conf/dhcp.py +++ b/ironic/conf/dhcp.py @@ -20,7 +20,8 @@ from ironic.common.i18n import _ opts = [ cfg.StrOpt('dhcp_provider', default='neutron', - help=_('DHCP provider to use. "neutron" uses Neutron, and ' + help=_('DHCP provider to use. "neutron" uses Neutron, ' + '"dnsmasq" uses the Dnsmasq provider, and ' '"none" uses a no-op provider.')), ] diff --git a/ironic/conf/dnsmasq.py b/ironic/conf/dnsmasq.py new file mode 100644 index 000000000..f1ba1de23 --- /dev/null +++ b/ironic/conf/dnsmasq.py @@ -0,0 +1,43 @@ +# +# Copyright 2022 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_config import cfg + +from ironic.common.i18n import _ + +opts = [ + cfg.StrOpt('dhcp_optsdir', + default='/etc/dnsmasq.d/optsdir.d', + help=_('Directory where the "dnsmasq" provider will write ' + 'option configuration files for an external ' + 'Dnsmasq to read. Use the same path for the ' + 'dhcp-optsdir dnsmasq configuration directive.')), + cfg.StrOpt('dhcp_hostsdir', + default='/etc/dnsmasq.d/hostsdir.d', + help=_('Directory where the "dnsmasq" provider will write ' + 'host configuration files for an external ' + 'Dnsmasq to read. Use the same path for the ' + 'dhcp-hostsdir dnsmasq configuration directive.')), + cfg.StrOpt('dhcp_leasefile', + default='/var/lib/dnsmasq/dnsmasq.leases', + help=_('Dnsmasq leases file for the "dnsmasq" driver to ' + 'discover IP addresses of managed nodes. Use the' + 'same path for the dhcp-leasefile dnsmasq ' + 'configuration directive.')), +] + + +def register_opts(conf): + conf.register_opts(opts, group='dnsmasq') diff --git a/ironic/db/api.py b/ironic/db/api.py index 712919bb3..45e3fe2ca 100644 --- a/ironic/db/api.py +++ b/ironic/db/api.py @@ -1416,3 +1416,12 @@ class Connection(object, metaclass=abc.ABCMeta): :param entires: A list of node history entriy id's to be queried for deletion. """ + + @abc.abstractmethod + def count_nodes_in_provision_state(self, state): + """Count the number of nodes in given provision state. + + :param state: A provision_state value to match for the + count operation. This can be a single provision + state value or a list of values. + """ diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py index 05d5cc45e..c14719af8 100644 --- a/ironic/db/sqlalchemy/api.py +++ b/ironic/db/sqlalchemy/api.py @@ -30,6 +30,7 @@ from oslo_utils import timeutils from oslo_utils import uuidutils from osprofiler import sqlalchemy as osp_sqlalchemy import sqlalchemy as sa +from sqlalchemy import or_ from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound from sqlalchemy.orm import joinedload from sqlalchemy.orm import Load @@ -2400,3 +2401,26 @@ class Connection(api.Connection): ).filter( models.NodeHistory.id.in_(entries) ).delete(synchronize_session=False) + + def count_nodes_in_provision_state(self, state): + if not isinstance(state, list): + state = [state] + with _session_for_read() as session: + # Intentionally does not use the full ORM model + # because that is de-duped by pkey, but we already + # have unique constraints on UUID/name, so... shouldn't + # be a big deal. #JuliaFamousLastWords. + # Anyway, intent here is to be as quick as possible and + # literally have the DB do *all* of the world, so no + # client side ops occur. The column is also indexed, + # which means this will be an index based response. + # TODO(TheJulia): This might need to be revised for + # SQLAlchemy 2.0 as it should be a scaler select and count + # instead. + return session.query( + models.Node.provision_state + ).filter( + or_( + models.Node.provision_state == v for v in state + ) + ).count() diff --git a/ironic/dhcp/base.py b/ironic/dhcp/base.py index 57a4e7911..b2b711307 100644 --- a/ironic/dhcp/base.py +++ b/ironic/dhcp/base.py @@ -102,3 +102,14 @@ class BaseDHCP(object, metaclass=abc.ABCMeta): :raises: FailedToCleanDHCPOpts """ pass + + def supports_ipxe_tag(self): + """Whether the provider will correctly apply the 'ipxe' tag. + + When iPXE makes a DHCP request, does this provider support adding + the tag `ipxe` or `ipxe6` (for IPv6). When the provider returns True, + options can be added which filter on these tags. + + :returns: True when the driver supports tagging iPXE DHCP requests + """ + return False diff --git a/ironic/dhcp/dnsmasq.py b/ironic/dhcp/dnsmasq.py new file mode 100644 index 000000000..c6f27afe4 --- /dev/null +++ b/ironic/dhcp/dnsmasq.py @@ -0,0 +1,159 @@ +# +# Copyright 2022 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os + +from oslo_log import log as logging +from oslo_utils import uuidutils + +from ironic.conf import CONF +from ironic.dhcp import base + +LOG = logging.getLogger(__name__) + + +class DnsmasqDHCPApi(base.BaseDHCP): + """API for managing host specific Dnsmasq configuration.""" + + def update_port_dhcp_opts(self, port_id, dhcp_options, token=None, + context=None): + pass + + def update_dhcp_opts(self, task, options, vifs=None): + """Send or update the DHCP BOOT options for this node. + + :param task: A TaskManager instance. + :param options: this will be a list of dicts, e.g. + + :: + + [{'opt_name': '67', + 'opt_value': 'pxelinux.0', + 'ip_version': 4}, + {'opt_name': '66', + 'opt_value': '123.123.123.456', + 'ip_version': 4}] + :param vifs: Ignored argument + """ + node = task.node + macs = set(self._pxe_enabled_macs(task.ports)) + + opt_file = self._opt_file_path(node) + tag = node.driver_internal_info.get('dnsmasq_tag') + if not tag: + tag = uuidutils.generate_uuid() + node.set_driver_internal_info('dnsmasq_tag', tag) + node.save() + + LOG.debug('Writing to %s:', opt_file) + with open(opt_file, 'w') as f: + # Apply each option by tag + for option in options: + entry = 'tag:{tag},{opt_name},{opt_value}\n'.format( + tag=tag, + opt_name=option.get('opt_name'), + opt_value=option.get('opt_value'), + ) + LOG.debug(entry) + f.write(entry) + + for mac in macs: + host_file = self._host_file_path(mac) + LOG.debug('Writing to %s:', host_file) + with open(host_file, 'w') as f: + # Tag each address with the unique uuid scoped to + # this node and DHCP transaction + entry = '{mac},set:{tag},set:ironic\n'.format( + mac=mac, tag=tag) + LOG.debug(entry) + f.write(entry) + + def _opt_file_path(self, node): + return os.path.join(CONF.dnsmasq.dhcp_optsdir, + 'ironic-{}.conf'.format(node.uuid)) + + def _host_file_path(self, mac): + return os.path.join(CONF.dnsmasq.dhcp_hostsdir, + 'ironic-{}.conf'.format(mac)) + + def _pxe_enabled_macs(self, ports): + for port in ports: + if port.pxe_enabled: + yield port.address + + def get_ip_addresses(self, task): + """Get IP addresses for all ports/portgroups in `task`. + + :param task: a TaskManager instance. + :returns: List of IP addresses associated with + task's ports/portgroups. + """ + lease_path = CONF.dnsmasq.dhcp_leasefile + macs = set(self._pxe_enabled_macs(task.ports)) + addresses = [] + with open(lease_path, 'r') as f: + for line in f.readlines(): + lease = line.split() + if lease[1] in macs: + addresses.append(lease[2]) + LOG.debug('Found addresses for %s: %s', + task.node.uuid, ', '.join(addresses)) + return addresses + + def clean_dhcp_opts(self, task): + """Clean up the DHCP BOOT options for the host in `task`. + + :param task: A TaskManager instance. + + :raises: FailedToCleanDHCPOpts + """ + + node = task.node + # Discard this unique tag + node.del_driver_internal_info('dnsmasq_tag') + node.save() + + # Changing the host rule to ignore will be picked up by dnsmasq + # without requiring a SIGHUP. When the mac address is active again + # this file will be replaced with one that applies a new unique tag. + macs = set(self._pxe_enabled_macs(task.ports)) + for mac in macs: + host_file = self._host_file_path(mac) + with open(host_file, 'w') as f: + entry = '{mac},ignore\n'.format(mac=mac) + f.write(entry) + + # Deleting the file containing dhcp-option won't remove the rules from + # dnsmasq but no requests will be tagged with the dnsmasq_tag uuid so + # these rules will not apply. + opt_file = self._opt_file_path(node) + if os.path.exists(opt_file): + os.remove(opt_file) + + def supports_ipxe_tag(self): + """Whether the provider will correctly apply the 'ipxe' tag. + + When iPXE makes a DHCP request, does this provider support adding + the tag `ipxe` or `ipxe6` (for IPv6). When the provider returns True, + options can be added which filter on these tags. + + The `dnsmasq` provider sets this to True on the assumption that the + following is included in the dnsmasq.conf: + + dhcp-match=set:ipxe,175 + + :returns: True + """ + return True diff --git a/ironic/dhcp/neutron.py b/ironic/dhcp/neutron.py index a5cb09282..06962ad42 100644 --- a/ironic/dhcp/neutron.py +++ b/ironic/dhcp/neutron.py @@ -278,3 +278,14 @@ class NeutronDHCPApi(base.BaseDHCP): task, task.portgroups, client) return port_ip_addresses + portgroup_ip_addresses + + def supports_ipxe_tag(self): + """Whether the provider will correctly apply the 'ipxe' tag. + + When iPXE makes a DHCP request, does this provider support adding + the tag `ipxe` or `ipxe6` (for IPv6). When the provider returns True, + options can be added which filter on these tags. + + :returns: True + """ + return True diff --git a/ironic/drivers/modules/drac/raid.py b/ironic/drivers/modules/drac/raid.py index ae06f0dfa..8bad02bba 100644 --- a/ironic/drivers/modules/drac/raid.py +++ b/ironic/drivers/modules/drac/raid.py @@ -1327,6 +1327,8 @@ class DracRedfishRAID(redfish_raid.RedfishRAID): """Perform post delete_configuration action to commit the config. Clears foreign configuration for all RAID controllers. + If no foreign configuration to clear, then checks if any controllers + can be converted to RAID mode. :param task: a TaskManager instance containing the node to act on. :param raid_configs: a list of dictionaries containing the RAID @@ -1338,7 +1340,15 @@ class DracRedfishRAID(redfish_raid.RedfishRAID): async_proc = DracRedfishRAID._clear_foreign_config(system, task) if async_proc: # Async processing with system rebooting in progress + task.node.set_driver_internal_info( + 'raid_config_substep', 'clear_foreign_config') + task.node.save() return deploy_utils.get_async_step_return_state(task.node) + else: + conv_state = DracRedfishRAID._convert_controller_to_raid_mode( + task) + if conv_state: + return conv_state return return_state @@ -1486,6 +1496,69 @@ class DracRedfishRAID(redfish_raid.RedfishRAID): task_mon.wait(CONF.drac.raid_job_timeout) return False + @staticmethod + def _convert_controller_to_raid_mode(task): + """Convert eligible controllers to RAID mode if not already. + + :param task: a TaskManager instance containing the node to act on + :returns: Return state if there are controllers to convert and + and rebooting, otherwise None. + """ + + system = redfish_utils.get_system(task.node) + task_mons = [] + warning_msg_templ = ( + 'Possibly because `%(pkg)s` is too old. Without newer `%(pkg)s` ' + 'PERC 9 and PERC 10 controllers that are not in RAID mode will ' + 'not be used or have limited RAID support. To avoid that update ' + '`%(pkg)s`') + for storage in system.storage.get_members(): + storage_controllers = None + try: + storage_controllers = storage.controllers + except sushy.exceptions.MissingAttributeError: + # Check if there storage_controllers to separate old iDRAC and + # storage without controller + if storage.storage_controllers: + LOG.warning('%(storage)s does not have controllers for ' + 'node %(node)s' + warning_msg_templ, + {'storage': storage.identity, + 'node': task.node.uuid, + 'pkg': 'iDRAC'}) + continue + except AttributeError: + LOG.warning('%(storage)s does not have controllers attribute. ' + + warning_msg_templ, {'storage': storage.identity, + 'pkg': 'sushy'}) + return None + if storage_controllers: + controller = storage.controllers.get_members()[0] + try: + oem_controller = controller.get_oem_extension('Dell') + except sushy.exceptions.ExtensionError as ee: + LOG.warning('Failed to find extension to convert ' + 'controller to RAID mode. ' + + warning_msg_templ + '. Error: %(err)s', + {'err': ee, 'pkg': 'sushy-oem-idrac'}) + return None + task_mon = oem_controller.convert_to_raid() + if task_mon: + task_mons.append(task_mon) + + if task_mons: + deploy_utils.set_async_step_flags( + task.node, + reboot=True, + skip_current_step=True, + polling=True) + + task.upgrade_lock() + task.node.set_driver_internal_info( + 'raid_task_monitor_uris', + [tm.task_monitor_uri for tm in task_mons]) + task.node.save() + return deploy_utils.reboot_to_finish_step(task) + @METRICS.timer('DracRedfishRAID._query_raid_tasks_status') @periodics.node_periodic( purpose='checking async RAID tasks', @@ -1545,6 +1618,15 @@ class DracRedfishRAID(redfish_raid.RedfishRAID): else: # all tasks completed and none of them failed node.del_driver_internal_info('raid_task_monitor_uris') + substep = node.driver_internal_info.get( + 'raid_config_substep') + if substep == 'clear_foreign_config': + node.del_driver_internal_info('raid_config_substep') + node.save() + res = DracRedfishRAID._convert_controller_to_raid_mode( + task) + if res: # New tasks submitted + return self._set_success(task) node.save() diff --git a/ironic/tests/unit/common/test_pxe_utils.py b/ironic/tests/unit/common/test_pxe_utils.py index 4d4fbb5b5..b775c68a1 100644 --- a/ironic/tests/unit/common/test_pxe_utils.py +++ b/ironic/tests/unit/common/test_pxe_utils.py @@ -25,6 +25,7 @@ from oslo_config import cfg from oslo_utils import fileutils from oslo_utils import uuidutils +from ironic.common import dhcp_factory from ironic.common import exception from ironic.common.glance_service import image_service from ironic.common import image_service as base_image_service @@ -45,6 +46,11 @@ DRV_INFO_DICT = db_utils.get_test_pxe_driver_info() DRV_INTERNAL_INFO_DICT = db_utils.get_test_pxe_driver_internal_info() +def _reset_dhcp_provider(config, provider_name): + config(dhcp_provider=provider_name, group='dhcp') + dhcp_factory.DHCPFactory._dhcp_provider = None + + # Prevent /httpboot validation on creating the node @mock.patch('ironic.drivers.modules.pxe.PXEBoot.__init__', lambda self: None) class TestPXEUtils(db_base.DbTestCase): @@ -674,7 +680,7 @@ class TestPXEUtils(db_base.DbTestCase): # TODO(TheJulia): We should... like... fix the template to # enable mac address usage..... grub_tmplte = "ironic/drivers/modules/pxe_grub_config.template" - self.config(dhcp_provider='none', group='dhcp') + _reset_dhcp_provider(self.config, 'none') self.config(tftp_root=tempfile.mkdtemp(), group='pxe') link_ip_configs_mock.side_effect = \ exception.FailedToGetIPAddressOnPort(port_id='blah') @@ -898,7 +904,7 @@ class TestPXEUtils(db_base.DbTestCase): {'opt_name': '150', 'opt_value': '192.0.2.1', 'ip_version': ip_version}, - {'opt_name': 'server-ip-address', + {'opt_name': '255', 'opt_value': '192.0.2.1', 'ip_version': ip_version} ] @@ -1904,7 +1910,8 @@ class iPXEBuildConfigOptionsTestCase(db_base.DbTestCase): self.config(tftp_server='ff80::1', group='pxe') self.config(http_url='http://[ff80::1]:1234', group='deploy') - self.config(dhcp_provider='isc', group='dhcp') + _reset_dhcp_provider(self.config, 'none') + if ip_version == 6: # NOTE(TheJulia): DHCPv6 RFCs seem to indicate that the prior # options are not imported, although they may be supported @@ -1932,7 +1939,7 @@ class iPXEBuildConfigOptionsTestCase(db_base.DbTestCase): {'opt_name': '67', 'opt_value': expected_boot_script_url, 'ip_version': ip_version}, - {'opt_name': 'server-ip-address', + {'opt_name': '255', 'opt_value': '192.0.2.1', 'ip_version': ip_version}] @@ -1940,7 +1947,8 @@ class iPXEBuildConfigOptionsTestCase(db_base.DbTestCase): pxe_utils.dhcp_options_for_instance( task, ipxe_enabled=True)) - self.config(dhcp_provider='neutron', group='dhcp') + _reset_dhcp_provider(self.config, 'neutron') + if ip_version == 6: # Boot URL variable set from prior test of isc parameters. expected_info = [{'opt_name': 'tag:!ipxe6,59', @@ -1963,7 +1971,7 @@ class iPXEBuildConfigOptionsTestCase(db_base.DbTestCase): {'opt_name': 'tag:ipxe,67', 'opt_value': expected_boot_script_url, 'ip_version': ip_version}, - {'opt_name': 'server-ip-address', + {'opt_name': '255', 'opt_value': '192.0.2.1', 'ip_version': ip_version}] diff --git a/ironic/tests/unit/common/test_release_mappings.py b/ironic/tests/unit/common/test_release_mappings.py index da1eeedd2..96dbdfa22 100644 --- a/ironic/tests/unit/common/test_release_mappings.py +++ b/ironic/tests/unit/common/test_release_mappings.py @@ -44,7 +44,7 @@ NUMERIC_RELEASES = sorted( map(versionutils.convert_version_to_tuple, set(release_mappings.RELEASE_MAPPING) # Update the exceptions whenever needed - - {'master', 'yoga', 'xena'}), + - {'master', 'zed', 'yoga'}), reverse=True) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 378d65f15..5d84dbbef 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -1829,6 +1829,7 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin, def test_do_node_deploy_maintenance(self, mock_iwdi): mock_iwdi.return_value = False + self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware', maintenance=True) exc = self.assertRaises(messaging.rpc.ExpectedException, @@ -1843,6 +1844,7 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin, self.assertFalse(mock_iwdi.called) def _test_do_node_deploy_validate_fail(self, mock_validate, mock_iwdi): + self._start_service() mock_iwdi.return_value = False # InvalidParameterValue should be re-raised as InstanceDeployFailure mock_validate.side_effect = exception.InvalidParameterValue('error') @@ -2389,6 +2391,7 @@ class DoNodeTearDownTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): @mock.patch('ironic.drivers.modules.fake.FakePower.validate', autospec=True) def test_do_node_tear_down_validate_fail(self, mock_validate): + self._start_service() # InvalidParameterValue should be re-raised as InstanceDeployFailure mock_validate.side_effect = exception.InvalidParameterValue('error') node = obj_utils.create_test_node( @@ -8374,7 +8377,6 @@ class NodeHistoryRecordCleanupTestCase(mgr_utils.ServiceSetUpMixin, # 9 retained due to days, 3 to config self.service._manage_node_history(self.context) events = objects.NodeHistory.list(self.context) - print(events) self.assertEqual(12, len(events)) events = objects.NodeHistory.list_by_node_id(self.context, 10) self.assertEqual(4, len(events)) @@ -8394,3 +8396,73 @@ class NodeHistoryRecordCleanupTestCase(mgr_utils.ServiceSetUpMixin, self.assertEqual('one', events[1].event) self.assertEqual('two', events[2].event) self.assertEqual('three', events[3].event) + + +class ConcurrentActionLimitTestCase(mgr_utils.ServiceSetUpMixin, + db_base.DbTestCase): + + def setUp(self): + super(ConcurrentActionLimitTestCase, self).setUp() + self._start_service() + self.node1 = obj_utils.get_test_node( + self.context, + driver='fake-hardware', + id=110, + uuid=uuidutils.generate_uuid()) + self.node2 = obj_utils.get_test_node( + self.context, + driver='fake-hardware', + id=111, + uuid=uuidutils.generate_uuid()) + self.node3 = obj_utils.get_test_node( + self.context, + driver='fake-hardware', + id=112, + uuid=uuidutils.generate_uuid()) + self.node4 = obj_utils.get_test_node( + self.context, + driver='fake-hardware', + id=113, + uuid=uuidutils.generate_uuid()) + # Create the nodes, as the tasks need to operate across tables. + self.node1.create() + self.node2.create() + self.node3.create() + self.node4.create() + + def test_concurrent_action_limit_deploy(self): + self.node1.provision_state = states.DEPLOYING + self.node2.provision_state = states.DEPLOYWAIT + self.node1.save() + self.node2.save() + CONF.set_override('max_concurrent_deploy', 2, group='conductor') + self.assertRaises( + exception.ConcurrentActionLimit, + self.service._concurrent_action_limit, + 'provisioning') + self.service._concurrent_action_limit('unprovisioning') + self.service._concurrent_action_limit('cleaning') + CONF.set_override('max_concurrent_deploy', 3, group='conductor') + self.service._concurrent_action_limit('provisioning') + + def test_concurrent_action_limit_cleaning(self): + self.node1.provision_state = states.DELETING + self.node2.provision_state = states.CLEANING + self.node3.provision_state = states.CLEANWAIT + self.node1.save() + self.node2.save() + self.node3.save() + + CONF.set_override('max_concurrent_clean', 3, group='conductor') + self.assertRaises( + exception.ConcurrentActionLimit, + self.service._concurrent_action_limit, + 'cleaning') + self.assertRaises( + exception.ConcurrentActionLimit, + self.service._concurrent_action_limit, + 'unprovisioning') + self.service._concurrent_action_limit('provisioning') + CONF.set_override('max_concurrent_clean', 4, group='conductor') + self.service._concurrent_action_limit('cleaning') + self.service._concurrent_action_limit('unprovisioning') diff --git a/ironic/tests/unit/db/test_nodes.py b/ironic/tests/unit/db/test_nodes.py index eb5200f4e..b4d70b2dd 100644 --- a/ironic/tests/unit/db/test_nodes.py +++ b/ironic/tests/unit/db/test_nodes.py @@ -1081,3 +1081,39 @@ class DbNodeTestCase(base.DbTestCase): self.dbapi.check_node_list, [node1.uuid, 'this/cannot/be/a/name']) self.assertIn('this/cannot/be/a/name', str(exc)) + + def test_node_provision_state_count(self): + active_nodes = 5 + manageable_nodes = 3 + deploywait_nodes = 1 + for i in range(0, active_nodes): + utils.create_test_node(uuid=uuidutils.generate_uuid(), + provision_state=states.ACTIVE) + for i in range(0, manageable_nodes): + utils.create_test_node(uuid=uuidutils.generate_uuid(), + provision_state=states.MANAGEABLE) + for i in range(0, deploywait_nodes): + utils.create_test_node(uuid=uuidutils.generate_uuid(), + provision_state=states.DEPLOYWAIT) + + self.assertEqual( + active_nodes, + self.dbapi.count_nodes_in_provision_state(states.ACTIVE) + ) + self.assertEqual( + manageable_nodes, + self.dbapi.count_nodes_in_provision_state(states.MANAGEABLE) + ) + self.assertEqual( + deploywait_nodes, + self.dbapi.count_nodes_in_provision_state(states.DEPLOYWAIT) + ) + total = active_nodes + manageable_nodes + deploywait_nodes + self.assertEqual( + total, + self.dbapi.count_nodes_in_provision_state([ + states.ACTIVE, + states.MANAGEABLE, + states.DEPLOYWAIT + ]) + ) diff --git a/ironic/tests/unit/dhcp/test_dnsmasq.py b/ironic/tests/unit/dhcp/test_dnsmasq.py new file mode 100644 index 000000000..64fe46f33 --- /dev/null +++ b/ironic/tests/unit/dhcp/test_dnsmasq.py @@ -0,0 +1,140 @@ +# +# Copyright 2022 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import tempfile + +from ironic.common import dhcp_factory +from ironic.common import utils as common_utils +from ironic.conductor import task_manager +from ironic.tests.unit.db import base as db_base +from ironic.tests.unit.objects import utils as object_utils + + +class TestDnsmasqDHCPApi(db_base.DbTestCase): + + def setUp(self): + super(TestDnsmasqDHCPApi, self).setUp() + self.config(dhcp_provider='dnsmasq', + group='dhcp') + self.node = object_utils.create_test_node(self.context) + + self.ports = [ + object_utils.create_test_port( + self.context, node_id=self.node.id, id=2, + uuid='1be26c0b-03f2-4d2e-ae87-c02d7f33c782', + address='52:54:00:cf:2d:32', + pxe_enabled=True)] + + self.optsdir = tempfile.mkdtemp() + self.addCleanup(lambda: common_utils.rmtree_without_raise( + self.optsdir)) + self.config(dhcp_optsdir=self.optsdir, group='dnsmasq') + + self.hostsdir = tempfile.mkdtemp() + self.addCleanup(lambda: common_utils.rmtree_without_raise( + self.hostsdir)) + self.config(dhcp_hostsdir=self.hostsdir, group='dnsmasq') + + dhcp_factory.DHCPFactory._dhcp_provider = None + self.api = dhcp_factory.DHCPFactory() + self.opts = [ + { + 'ip_version': 4, + 'opt_name': '67', + 'opt_value': 'bootx64.efi' + }, + { + 'ip_version': 4, + 'opt_name': '210', + 'opt_value': '/tftpboot/' + }, + { + 'ip_version': 4, + 'opt_name': '66', + 'opt_value': '192.0.2.135', + }, + { + 'ip_version': 4, + 'opt_name': '150', + 'opt_value': '192.0.2.135' + }, + { + 'ip_version': 4, + 'opt_name': '255', + 'opt_value': '192.0.2.135' + } + ] + + def test_update_dhcp(self): + with task_manager.acquire(self.context, + self.node.uuid) as task: + self.api.update_dhcp(task, self.opts) + + dnsmasq_tag = task.node.driver_internal_info.get('dnsmasq_tag') + self.assertEqual(36, len(dnsmasq_tag)) + + hostfile = os.path.join(self.hostsdir, + 'ironic-52:54:00:cf:2d:32.conf') + with open(hostfile, 'r') as f: + self.assertEqual( + '52:54:00:cf:2d:32,set:%s,set:ironic\n' % dnsmasq_tag, + f.readline()) + + optsfile = os.path.join(self.optsdir, + 'ironic-%s.conf' % self.node.uuid) + with open(optsfile, 'r') as f: + self.assertEqual([ + 'tag:%s,67,bootx64.efi\n' % dnsmasq_tag, + 'tag:%s,210,/tftpboot/\n' % dnsmasq_tag, + 'tag:%s,66,192.0.2.135\n' % dnsmasq_tag, + 'tag:%s,150,192.0.2.135\n' % dnsmasq_tag, + 'tag:%s,255,192.0.2.135\n' % dnsmasq_tag], + f.readlines()) + + def test_get_ip_addresses(self): + with task_manager.acquire(self.context, + self.node.uuid) as task: + with tempfile.NamedTemporaryFile() as fp: + self.config(dhcp_leasefile=fp.name, group='dnsmasq') + fp.write(b"1659975057 52:54:00:cf:2d:32 192.0.2.198 * *\n") + fp.flush() + self.assertEqual( + ['192.0.2.198'], + self.api.provider.get_ip_addresses(task)) + + def test_clean_dhcp_opts(self): + with task_manager.acquire(self.context, + self.node.uuid) as task: + self.api.update_dhcp(task, self.opts) + + hostfile = os.path.join(self.hostsdir, + 'ironic-52:54:00:cf:2d:32.conf') + optsfile = os.path.join(self.optsdir, + 'ironic-%s.conf' % self.node.uuid) + self.assertTrue(os.path.isfile(hostfile)) + self.assertTrue(os.path.isfile(optsfile)) + + with task_manager.acquire(self.context, + self.node.uuid) as task: + self.api.clean_dhcp(task) + + # assert the host file remains with the ignore directive, and the opts + # file is deleted + with open(hostfile, 'r') as f: + self.assertEqual( + '52:54:00:cf:2d:32,ignore\n', + f.readline()) + self.assertFalse(os.path.isfile(optsfile)) diff --git a/ironic/tests/unit/drivers/modules/drac/test_raid.py b/ironic/tests/unit/drivers/modules/drac/test_raid.py index 780d2893c..acbd009d3 100644 --- a/ironic/tests/unit/drivers/modules/drac/test_raid.py +++ b/ironic/tests/unit/drivers/modules/drac/test_raid.py @@ -2457,13 +2457,145 @@ class DracRedfishRAIDTestCase(test_utils.BaseDracTest): self.assertEqual(False, result) mock_log.assert_called_once() + @mock.patch.object(deploy_utils, 'reboot_to_finish_step', + autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode( + self, mock_get_system, mock_reboot): + mock_task_mon = mock.Mock(task_monitor_uri='/TaskService/1') + mock_oem_controller = mock.Mock() + mock_oem_controller.convert_to_raid.return_value = mock_task_mon + mock_controller = mock.Mock() + mock_controller.get_oem_extension.return_value = mock_oem_controller + mock_controllers_col = mock.Mock() + mock_controllers_col.get_members.return_value = [mock_controller] + mock_storage = mock.Mock(controllers=mock_controllers_col) + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertEqual( + ['/TaskService/1'], + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertEqual(mock_reboot.return_value, result) + + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_no_conversion( + self, mock_get_system): + mock_oem_controller = mock.Mock() + mock_oem_controller.convert_to_raid.return_value = None + mock_controller = mock.Mock() + mock_controller.get_oem_extension.return_value = mock_oem_controller + mock_controllers_col = mock.Mock() + mock_controllers_col.get_members.return_value = [mock_controller] + mock_storage = mock.Mock(controllers=mock_controllers_col) + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + + @mock.patch.object(drac_raid.LOG, 'warning', autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_not_raid( + self, mock_get_system, mock_log): + mock_storage = mock.Mock(storage_controllers=None) + mock_controllers = mock.PropertyMock( + side_effect=sushy.exceptions.MissingAttributeError) + type(mock_storage).controllers = mock_controllers + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + mock_log.assert_not_called() + + @mock.patch.object(drac_raid.LOG, 'warning', autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_old_idrac( + self, mock_get_system, mock_log): + mock_storage = mock.Mock(storage_controllers=mock.Mock()) + mock_controllers = mock.PropertyMock( + side_effect=sushy.exceptions.MissingAttributeError) + type(mock_storage).controllers = mock_controllers + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + mock_log.assert_called_once() + + @mock.patch.object(drac_raid.LOG, 'warning', autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_old_sushy( + self, mock_get_system, mock_log): + mock_storage = mock.Mock(spec=[]) + mock_storage.identity = "Storage 1" + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + mock_log.assert_called_once() + + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test__convert_controller_to_raid_mode_old_sushy_oem( + self, mock_get_system): + mock_controller = mock.Mock() + mock_controller.get_oem_extension.side_effect =\ + sushy.exceptions.ExtensionError + mock_controllers_col = mock.Mock() + mock_controllers_col.get_members.return_value = [mock_controller] + mock_storage = mock.Mock(controllers=mock_controllers_col) + mock_storage_col = mock.Mock() + mock_storage_col.get_members.return_value = [mock_storage] + mock_system = mock.Mock(storage=mock_storage_col) + mock_get_system.return_value = mock_system + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + result = self.raid._convert_controller_to_raid_mode(task) + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone(result) + + @mock.patch.object(drac_raid.DracRedfishRAID, + '_convert_controller_to_raid_mode', autospec=True) @mock.patch.object(deploy_utils, 'get_async_step_return_state', autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(redfish_utils, 'get_system', autospec=True) def test_post_delete_configuration_foreign_async( self, mock_get_system, mock_build_agent_options, - mock_get_async_step_return_state): + mock_get_async_step_return_state, mock_convert): fake_oem_system = mock.Mock() fake_system = mock.Mock() fake_system.get_oem_extension.return_value = fake_oem_system @@ -2497,9 +2629,13 @@ class DracRedfishRAIDTestCase(test_utils.BaseDracTest): mock_get_async_step_return_state.assert_called_once_with(task.node) mock_task_mon1.wait.assert_not_called() mock_task_mon2.wait.assert_not_called() + mock_convert.assert_not_called() + @mock.patch.object(drac_raid.DracRedfishRAID, + '_convert_controller_to_raid_mode', autospec=True) @mock.patch.object(redfish_utils, 'get_system', autospec=True) - def test_post_delete_configuration_foreign_sync(self, mock_get_system): + def test_post_delete_configuration_foreign_sync( + self, mock_get_system, mock_convert): fake_oem_system = mock.Mock() fake_system = mock.Mock() fake_system.get_oem_extension.return_value = fake_oem_system @@ -2520,15 +2656,34 @@ class DracRedfishRAIDTestCase(test_utils.BaseDracTest): mock_task_mon2.get_task.return_value = mock_task2 fake_oem_system.clear_foreign_config.return_value = [ mock_task_mon1, mock_task_mon2] + mock_convert_state = mock.Mock() + mock_convert.return_value = mock_convert_state result = self.raid.post_delete_configuration( task, None, return_state=mock_return_state1) - self.assertEqual(result, mock_return_state1) + self.assertEqual(result, mock_convert_state) fake_oem_system.clear_foreign_config.assert_called_once() mock_task_mon1.wait.assert_called_once_with(CONF.drac.raid_job_timeout) mock_task_mon2.wait.assert_not_called() + @mock.patch.object(drac_raid.DracRedfishRAID, + '_convert_controller_to_raid_mode', autospec=True) + @mock.patch.object(drac_raid.DracRedfishRAID, + '_clear_foreign_config', autospec=True) + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test_post_delete_configuration_no_subtasks( + self, mock_get_system, mock_foreign, mock_convert): + mock_foreign.return_value = False + mock_convert.return_value = None + task = mock.Mock(node=self.node, context=self.context) + mock_return_state1 = mock.Mock() + + result = self.raid.post_delete_configuration( + task, None, return_state=mock_return_state1) + + self.assertEqual(mock_return_state1, result) + @mock.patch.object(drac_raid.LOG, 'warning', autospec=True) def test__clear_foreign_config_attribute_error(self, mock_log): fake_oem_system = mock.Mock(spec=[]) @@ -2682,6 +2837,41 @@ class DracRedfishRAIDTestCase(test_utils.BaseDracTest): task.node.driver_internal_info.get('raid_task_monitor_uris')) self.raid._set_failed.assert_called_once() + @mock.patch.object(drac_raid.DracRedfishRAID, + '_convert_controller_to_raid_mode', autospec=True) + @mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True) + def test__check_raid_tasks_status_convert_controller( + self, mock_get_task_monitor, mock_convert): + driver_internal_info = { + 'raid_task_monitor_uris': '/TaskService/1', + 'raid_config_substep': 'clear_foreign_config'} + self.node.driver_internal_info = driver_internal_info + self.node.save() + + mock_config_task = mock.Mock() + mock_config_task.task_state = sushy.TASK_STATE_COMPLETED + mock_config_task.task_status = sushy.HEALTH_OK + mock_task_monitor = mock.Mock() + mock_task_monitor.is_processing = False + mock_task_monitor.get_task.return_value = mock_config_task + mock_get_task_monitor.return_value = mock_task_monitor + + self.raid._set_success = mock.Mock() + self.raid._set_failed = mock.Mock() + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.raid._check_raid_tasks_status( + task, ['/TaskService/1']) + + mock_convert.assert_called_once_with(task) + self.raid._set_success.assert_not_called() + self.raid._set_failed.assert_not_called() + self.assertIsNone( + task.node.driver_internal_info.get('raid_task_monitor_uris')) + self.assertIsNone( + task.node.driver_internal_info.get('raid_config_substep')) + @mock.patch.object(manager_utils, 'notify_conductor_resume_deploy', autospec=True) @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', diff --git a/releasenotes/notes/concurrency-limit-control-4b101bca7136e08d.yaml b/releasenotes/notes/concurrency-limit-control-4b101bca7136e08d.yaml new file mode 100644 index 000000000..5eb8dd449 --- /dev/null +++ b/releasenotes/notes/concurrency-limit-control-4b101bca7136e08d.yaml @@ -0,0 +1,23 @@ +--- +features: + - | + Adds a concurrency limiter for number of nodes in states related to + *Cleaning* and *Provisioning* operations across the ironic deployment. + These settings default to a maximum number of concurrent deployments to + ``250`` and a maximum number of concurrent deletes and cleaning operations + to ``50``. These settings can be tuned using + ``[conductor]max_concurrent_deploy`` and + ``[conductor]max_concurrent_clean``, respectively. + The defaults should generally be good for most operators in most cases. + Large scale operators should evaluate the defaults and tune appropriately + as this feature cannot be disabled, as it is a security mechanism. +upgrade: + - | + Large scale operators should be aware that a new feature, referred to as + "Concurrent Action Limit" was introduced as a security mechanism to + provide a means to limit attackers, or faulty scripts, from potentially + causing irreperable harm to an environment. This feature cannot be + disabled, and operators are encouraged to tune the new settings + ``[conductor]max_concurrent_deploy`` and + ``[conductor]max_concurrent_clean`` to match the needs of their + environment. diff --git a/releasenotes/notes/dnsmasq_dhcp-9154fcae927dc3de.yaml b/releasenotes/notes/dnsmasq_dhcp-9154fcae927dc3de.yaml new file mode 100644 index 000000000..bbf7dad40 --- /dev/null +++ b/releasenotes/notes/dnsmasq_dhcp-9154fcae927dc3de.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + The ``[dhcp]dhcp_provider`` configuration option can now be set to + ``dnsmasq`` as an alternative to ``none`` for standalone deployments. This + enables the same node-specific DHCP capabilities as the ``neutron`` provider. + See the ``[dnsmasq]`` section for configuration options.
\ No newline at end of file diff --git a/releasenotes/notes/fix-idrac-redfish-controller-mode-7b55c58d09240d3c.yaml b/releasenotes/notes/fix-idrac-redfish-controller-mode-7b55c58d09240d3c.yaml new file mode 100644 index 000000000..bf476dd63 --- /dev/null +++ b/releasenotes/notes/fix-idrac-redfish-controller-mode-7b55c58d09240d3c.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes ``idrac-redfish`` RAID ``delete_configuration`` step to convert PERC + 9 and PERC 10 controllers to RAID mode if it is not already set. @@ -52,6 +52,7 @@ wsgi_scripts = ironic-api-wsgi = ironic.api.wsgi:initialize_wsgi_app ironic.dhcp = + dnsmasq = ironic.dhcp.dnsmasq:DnsmasqDHCPApi neutron = ironic.dhcp.neutron:NeutronDHCPApi none = ironic.dhcp.none:NoneDHCPApi |