summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--devstack/lib/ironic41
-rw-r--r--doc/source/admin/drivers.rst1
-rw-r--r--doc/source/admin/drivers/fake.rst36
-rw-r--r--doc/source/admin/troubleshooting.rst37
-rw-r--r--ironic/common/exception.py10
-rw-r--r--ironic/common/pxe_utils.py77
-rw-r--r--ironic/conductor/cleaning.py2
-rw-r--r--ironic/conductor/manager.py52
-rw-r--r--ironic/conf/conductor.py26
-rw-r--r--ironic/conf/deploy.py2
-rw-r--r--ironic/db/api.py9
-rw-r--r--ironic/db/sqlalchemy/api.py24
-rw-r--r--ironic/tests/base.py81
-rw-r--r--ironic/tests/unit/common/test_pxe_utils.py104
-rw-r--r--ironic/tests/unit/conductor/test_cleaning.py26
-rw-r--r--ironic/tests/unit/conductor/test_manager.py74
-rw-r--r--ironic/tests/unit/db/test_nodes.py36
-rw-r--r--releasenotes/notes/concurrency-limit-control-4b101bca7136e08d.yaml23
-rw-r--r--releasenotes/notes/correct-source-path-handling-lookups-4ce2023a56372f10.yaml16
-rw-r--r--releasenotes/notes/fix-cleaning-stuck-on-networkerror-4aedbf3673413af6.yaml8
-rw-r--r--releasenotes/notes/maximum-disk-erasure-concurrency-6d132bd84e3df4cf.yaml10
-rw-r--r--tox.ini1
-rw-r--r--zuul.d/ironic-jobs.yaml42
-rw-r--r--zuul.d/project.yaml2
24 files changed, 634 insertions, 106 deletions
diff --git a/devstack/lib/ironic b/devstack/lib/ironic
index ab96638c0..08cccce7a 100644
--- a/devstack/lib/ironic
+++ b/devstack/lib/ironic
@@ -1332,6 +1332,17 @@ function configure_ironic_networks {
configure_ironic_cleaning_network
echo_summary "Configuring Ironic rescue network"
configure_ironic_rescue_network
+ echo_summary "Configuring Neutron Private Subnet, if needed."
+ configure_ironic_private_subnet
+}
+
+function configure_ironic_private_subnet {
+ if [[ "${IRONIC_ANACONDA_IMAGE_REF:-}" != "" ]]; then
+ # NOTE(TheJulia): Anaconda needs DNS for FQDN resolution
+ # and devstack doesn't create this network with dns.
+ subnet_id=$(openstack --os-cloud $OS_CLOUD subnet show private-subnet -f value -c id)
+ openstack --os-cloud $OS_CLOUD subnet set --dns-nameserver 8.8.8.8 $subnet_id
+ fi
}
function configure_ironic_cleaning_network {
@@ -1405,7 +1416,8 @@ function configure_ironic_provision_network {
${net_segment_id:+--network-segment $net_segment_id} \
$IRONIC_PROVISION_PROVIDER_SUBNET_NAME \
--gateway $IRONIC_PROVISION_SUBNET_GATEWAY --network $net_id \
- --subnet-range $IRONIC_PROVISION_SUBNET_PREFIX -f value -c id)"
+ --subnet-range $IRONIC_PROVISION_SUBNET_PREFIX \
+ --dns-nameserver 8.8.8.8 -f value -c id)"
else
# NOTE(TheJulia): Consider changing this to stateful to support UEFI once we move
# CI to Ubuntu Jammy as it will support v6 and v4 UEFI firmware driven boot ops.
@@ -3057,6 +3069,16 @@ function upload_baremetal_ironic_deploy {
iniset $IRONIC_CONF_FILE conductor deploy_ramdisk $IRONIC_DEPLOY_RAMDISK_ID
iniset $IRONIC_CONF_FILE conductor rescue_kernel $IRONIC_DEPLOY_KERNEL_ID
iniset $IRONIC_CONF_FILE conductor rescue_ramdisk $IRONIC_DEPLOY_RAMDISK_ID
+
+ if [[ "${IRONIC_ANACONDA_INSECURE_HEARTBEAT:-}" != "" ]]; then
+ iniset $IRONIC_CONF_FILE anaconda insecure_heartbeat ${IRONIC_ANACONDA_INSECURE_HEARTBEAT:-}
+ fi
+ # NOTE(TheJulia): Compared to an image deploy, anaconda is relatively
+ # slow as it installs packages one at a time. As such, we need an option
+ # to extend.
+ if [[ "${IRONIC_DEPLOY_CALLBACK_WAIT_TIMEOUT:-}" != "" ]]; then
+ iniset $IRONIC_CONF_FILE conductor deploy_callback_timeout ${IRONIC_DEPLOY_CALLBACK_WAIT_TIMEOUT:-}
+ fi
}
function prepare_baremetal_basic_ops {
@@ -3221,6 +3243,23 @@ function ironic_configure_tempest {
if [[ "$IRONIC_RAMDISK_IMAGE" != "" ]]; then
iniset $TEMPEST_CONFIG baremetal ramdisk_iso_image_ref "$IRONIC_RAMDISK_IMAGE"
fi
+ if [[ "${IRONIC_ANACONDA_IMAGE_REF:-}" != "" ]]; then
+ # In a perfect world we would use *just* the opendev repo
+ # mirror, and let things be magical, but OpenDev Infra cannot
+ # mirror the /images path with the limited storage space.
+ iniset $TEMPEST_CONFIG baremetal anaconda_image_ref ${IRONIC_ANACONDA_IMAGE_REF:-}
+ fi
+ if [[ "${IRONIC_ANACONDA_KERNEL_REF:-}" != "" ]]; then
+ iniset $TEMPEST_CONFIG baremetal anaconda_kernel_ref ${IRONIC_ANACONDA_KERNEL_REF:-}
+ fi
+ if [[ "${IRONIC_ANACONDA_RAMDISK_REF:-}" != "" ]]; then
+ iniset $TEMPEST_CONFIG baremetal anaconda_initial_ramdisk_ref ${IRONIC_ANACONDA_RAMDISK_REF:-}
+ fi
+ if [[ "${IRONIC_ANACONDA_STAGE2_REF:-}" != "" ]]; then
+ iniset $TEMPEST_CONFIG baremetal anaconda_stage2_ramdisk_ref ${IRONIC_ANACONDA_STAGE2_REF:-}
+
+ fi
+
# NOTE(dtantsur): keep this option here until the defaults change in
# ironic-tempest-plugin to disable classic drivers testing.
iniset $TEMPEST_CONFIG baremetal enabled_drivers ""
diff --git a/doc/source/admin/drivers.rst b/doc/source/admin/drivers.rst
index c3d8eb377..f35cb2dfa 100644
--- a/doc/source/admin/drivers.rst
+++ b/doc/source/admin/drivers.rst
@@ -26,6 +26,7 @@ Hardware Types
drivers/redfish
drivers/snmp
drivers/xclarity
+ drivers/fake
Changing Hardware Types and Interfaces
--------------------------------------
diff --git a/doc/source/admin/drivers/fake.rst b/doc/source/admin/drivers/fake.rst
new file mode 100644
index 000000000..ea7d7ef4c
--- /dev/null
+++ b/doc/source/admin/drivers/fake.rst
@@ -0,0 +1,36 @@
+===========
+Fake driver
+===========
+
+Overview
+========
+
+The ``fake-hardware`` hardware type is what it claims to be: fake. Use of this
+type or the ``fake`` interfaces should be temporary or limited to
+non-production environments, as the ``fake`` interfaces do not perform any of
+the actions typically expected.
+
+The ``fake`` interfaces can be configured to be combined with any of the
+"real" hardware interfaces, allowing you to effectively disable one or more
+hardware interfaces for testing by simply setting that interface to
+``fake``.
+
+Use cases
+=========
+
+Development
+-----------
+Developers can use ``fake-hardware`` hardware-type to mock out nodes for
+testing without those nodes needing to exist with physical or virtual hardware.
+
+Adoption
+--------
+Some OpenStack deployers have used ``fake`` interfaces in Ironic to allow an
+adoption-style workflow with Nova. By setting a node's hardware interfaces to
+``fake``, it's possible to deploy to that node with Nova without causing any
+actual changes to the hardware or an OS already deployed on it.
+
+This is generally an unsupported use case, but it is possible. For more
+information, see the relevant `post from CERN TechBlog`_.
+
+.. _`post from CERN TechBlog`: https://techblog.web.cern.ch/techblog/post/ironic-nova-adoption/
diff --git a/doc/source/admin/troubleshooting.rst b/doc/source/admin/troubleshooting.rst
index fa04d3006..2791430fd 100644
--- a/doc/source/admin/troubleshooting.rst
+++ b/doc/source/admin/troubleshooting.rst
@@ -973,3 +973,40 @@ Unfortunately, due to the way the conductor is designed, it is not possible to
gracefully break a stuck lock held in ``*-ing`` states. As the last resort, you
may need to restart the affected conductor. See `Why are my nodes stuck in a
"-ing" state?`_.
+
+What is ConcurrentActionLimit?
+==============================
+
+ConcurrentActionLimit is an exception which is raised to clients when an
+operation is requested, but cannot be serviced at that moment because the
+overall threshold of nodes in concurrent "Deployment" or "Cleaning"
+operations has been reached.
+
+These limits exist for two distinct reasons.
+
+The first is they allow an operator to tune a deployment such that too many
+concurrent deployments cannot be triggered at any given time, as a single
+conductor has an internal limit to the number of overall concurrent tasks,
+this restricts only the number of running concurrent actions. As such, this
+accounts for the number of nodes in ``deploy`` and ``deploy wait`` states.
+In the case of deployments, the default value is relatively high and should
+be suitable for *most* larger operators.
+
+The second is to help slow down the ability in which an entire population of
+baremetal nodes can be moved into and through cleaning, in order to help
+guard against authenticated malicious users, or accidental script driven
+operations. In this case, the total number of nodes in ``deleting``,
+``cleaning``, and ``clean wait`` are evaluated. The default maximum limit
+for cleaning operations is *50* and should be suitable for the majority of
+baremetal operators.
+
+These settings can be modified by using the
+``[conductor]max_concurrent_deploy`` and ``[conductor]max_concurrent_clean``
+settings from the ironic.conf file supporting the ``ironic-conductor``
+service. Neither setting can be explicity disabled, however there is also no
+upper limit to the setting.
+
+.. note::
+ This was an infrastructure operator requested feature from actual lessons
+ learned in the operation of Ironic in large scale production. The defaults
+ may not be suitable for the largest scale operators.
diff --git a/ironic/common/exception.py b/ironic/common/exception.py
index ddbce6f47..38316d2e7 100644
--- a/ironic/common/exception.py
+++ b/ironic/common/exception.py
@@ -851,3 +851,13 @@ class ImageRefIsARedirect(IronicException):
message=msg,
image_ref=image_ref,
redirect_url=redirect_url)
+
+
+class ConcurrentActionLimit(IronicException):
+ # NOTE(TheJulia): We explicitly don't report the concurrent
+ # action limit configuration value as a security guard since
+ # if informed of the limit, an attacker can tailor their attack.
+ _msg_fmt = _("Unable to process request at this time. "
+ "The concurrent action limit for %(task_type)s "
+ "has been reached. Please contact your administrator "
+ "and try again later.")
diff --git a/ironic/common/pxe_utils.py b/ironic/common/pxe_utils.py
index ec0719b75..33a24ecb6 100644
--- a/ironic/common/pxe_utils.py
+++ b/ironic/common/pxe_utils.py
@@ -674,20 +674,33 @@ def get_instance_image_info(task, ipxe_enabled=False):
os.path.join(root_dir, node.uuid, 'boot_iso'))
return image_info
-
image_properties = None
d_info = deploy_utils.get_image_instance_info(node)
+ isap = node.driver_internal_info.get('is_source_a_path')
def _get_image_properties():
- nonlocal image_properties
- if not image_properties:
+ nonlocal image_properties, isap
+ if not image_properties and not isap:
i_service = service.get_image_service(
d_info['image_source'],
context=ctx)
image_properties = i_service.show(
d_info['image_source'])['properties']
+ # TODO(TheJulia): At some point, we should teach this code
+ # to understand that with a path, it *can* retrieve the
+ # manifest from the HTTP(S) endpoint, which can populate
+ # image_properties, and drive path to variable population
+ # like is done with basically Glance.
labels = ('kernel', 'ramdisk')
+ if not isap:
+ anaconda_labels = ('stage2', 'ks_template', 'ks_cfg')
+ else:
+ # When a path is used, a stage2 ramdisk can be determiend
+ # automatically by anaconda, so it is not an explicit
+ # requirement.
+ anaconda_labels = ('ks_template', 'ks_cfg')
+
if not (i_info.get('kernel') and i_info.get('ramdisk')):
# NOTE(rloo): If both are not specified in instance_info
# we won't use any of them. We'll use the values specified
@@ -700,20 +713,13 @@ def get_instance_image_info(task, ipxe_enabled=False):
i_info[label] = str(image_properties[label + '_id'])
node.instance_info = i_info
node.save()
+ # TODO(TheJulia): Add functionality to look/grab the hints file
+ # for anaconda and just run with the entire path.
- anaconda_labels = ()
- if deploy_utils.get_boot_option(node) == 'kickstart':
- isap = node.driver_internal_info.get('is_source_a_path')
# stage2: installer stage2 squashfs image
# ks_template: anaconda kickstart template
# ks_cfg - rendered ks_template
- if not isap:
- anaconda_labels = ('stage2', 'ks_template', 'ks_cfg')
- else:
- # When a path is used, a stage2 ramdisk can be determiend
- # automatically by anaconda, so it is not an explicit
- # requirement.
- anaconda_labels = ('ks_template', 'ks_cfg')
+
# NOTE(rloo): We save stage2 & ks_template values in case they
# are changed by the user after we start using them and to
# prevent re-computing them again.
@@ -733,26 +739,31 @@ def get_instance_image_info(task, ipxe_enabled=False):
else:
node.set_driver_internal_info(
'stage2', str(image_properties['stage2_id']))
- # NOTE(TheJulia): A kickstart template is entirely independent
- # of the stage2 ramdisk. In the end, it was the configuration which
- # told anaconda how to execute.
- if i_info.get('ks_template'):
- # If the value is set, we always overwrite it, in the event
- # a rebuild is occuring or something along those lines.
- node.set_driver_internal_info('ks_template',
- i_info['ks_template'])
+ # NOTE(TheJulia): A kickstart template is entirely independent
+ # of the stage2 ramdisk. In the end, it was the configuration which
+ # told anaconda how to execute.
+ if i_info.get('ks_template'):
+ # If the value is set, we always overwrite it, in the event
+ # a rebuild is occuring or something along those lines.
+ node.set_driver_internal_info('ks_template',
+ i_info['ks_template'])
+ else:
+ _get_image_properties()
+ # ks_template is an optional property on the image
+ if image_properties and 'ks_template' in image_properties:
+ node.set_driver_internal_info(
+ 'ks_template', str(image_properties['ks_template']))
else:
- _get_image_properties()
- # ks_template is an optional property on the image
- if 'ks_template' not in image_properties:
- # If not defined, default to the overall system default
- # kickstart template, as opposed to a user supplied
- # template.
- node.set_driver_internal_info(
- 'ks_template', CONF.anaconda.default_ks_template)
- else:
- node.set_driver_internal_info(
- 'ks_template', str(image_properties['ks_template']))
+ # If not defined, default to the overall system default
+ # kickstart template, as opposed to a user supplied
+ # template.
+ node.set_driver_internal_info(
+ 'ks_template',
+ 'file://' + os.path.abspath(
+ CONF.anaconda.default_ks_template
+ )
+ )
+
node.save()
for label in labels + anaconda_labels:
@@ -1253,6 +1264,8 @@ def cache_ramdisk_kernel(task, pxe_info, ipxe_enabled=False):
CONF.deploy.http_root,
'stage2')
ensure_tree(os.path.dirname(file_path))
+
+ if 'ks_cfg' in pxe_info:
# ks_cfg is rendered later by the driver using ks_template. It cannot
# be fetched and cached.
t_pxe_info.pop('ks_cfg')
diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py
index e3151d4b8..53d66ddd8 100644
--- a/ironic/conductor/cleaning.py
+++ b/ironic/conductor/cleaning.py
@@ -69,7 +69,7 @@ def do_node_clean(task, clean_steps=None, disable_ramdisk=False):
task.driver.power.validate(task)
if not disable_ramdisk:
task.driver.network.validate(task)
- except exception.InvalidParameterValue as e:
+ except (exception.InvalidParameterValue, exception.NetworkError) as e:
msg = (_('Validation of node %(node)s for cleaning failed: %(msg)s') %
{'node': node.uuid, 'msg': e})
return utils.cleaning_error_handler(task, msg)
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 13d11d1d9..7e98459ff 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -886,7 +886,8 @@ class ConductorManager(base_manager.BaseConductorManager):
exception.NodeInMaintenance,
exception.InstanceDeployFailure,
exception.InvalidStateRequested,
- exception.NodeProtected)
+ exception.NodeProtected,
+ exception.ConcurrentActionLimit)
def do_node_deploy(self, context, node_id, rebuild=False,
configdrive=None, deploy_steps=None):
"""RPC method to initiate deployment to a node.
@@ -910,8 +911,11 @@ class ConductorManager(base_manager.BaseConductorManager):
:raises: InvalidStateRequested when the requested state is not a valid
target from the current state.
:raises: NodeProtected if the node is protected.
+ :raises: ConcurrentActionLimit if this action would exceed the maximum
+ number of configured concurrent actions of this type.
"""
LOG.debug("RPC do_node_deploy called for node %s.", node_id)
+ self._concurrent_action_limit(action='provisioning')
event = 'rebuild' if rebuild else 'deploy'
# NOTE(comstud): If the _sync_power_states() periodic task happens
@@ -983,7 +987,8 @@ class ConductorManager(base_manager.BaseConductorManager):
exception.NodeLocked,
exception.InstanceDeployFailure,
exception.InvalidStateRequested,
- exception.NodeProtected)
+ exception.NodeProtected,
+ exception.ConcurrentActionLimit)
def do_node_tear_down(self, context, node_id):
"""RPC method to tear down an existing node deployment.
@@ -998,8 +1003,11 @@ class ConductorManager(base_manager.BaseConductorManager):
:raises: InvalidStateRequested when the requested state is not a valid
target from the current state.
:raises: NodeProtected if the node is protected.
+ :raises: ConcurrentActionLimit if this action would exceed the maximum
+ number of configured concurrent actions of this type.
"""
LOG.debug("RPC do_node_tear_down called for node %s.", node_id)
+ self._concurrent_action_limit(action='unprovisioning')
with task_manager.acquire(context, node_id, shared=False,
purpose='node tear down') as task:
@@ -1121,7 +1129,8 @@ class ConductorManager(base_manager.BaseConductorManager):
exception.InvalidStateRequested,
exception.NodeInMaintenance,
exception.NodeLocked,
- exception.NoFreeConductorWorker)
+ exception.NoFreeConductorWorker,
+ exception.ConcurrentActionLimit)
def do_node_clean(self, context, node_id, clean_steps,
disable_ramdisk=False):
"""RPC method to initiate manual cleaning.
@@ -1150,7 +1159,10 @@ class ConductorManager(base_manager.BaseConductorManager):
:raises: NodeLocked if node is locked by another conductor.
:raises: NoFreeConductorWorker when there is no free worker to start
async task.
+ :raises: ConcurrentActionLimit If this action would exceed the
+ configured limits of the deployment.
"""
+ self._concurrent_action_limit(action='cleaning')
with task_manager.acquire(context, node_id, shared=False,
purpose='node manual cleaning') as task:
node = task.node
@@ -3549,6 +3561,40 @@ class ConductorManager(base_manager.BaseConductorManager):
# impact DB access if done in excess.
eventlet.sleep(0)
+ def _concurrent_action_limit(self, action):
+ """Check Concurrency limits and block operations if needed.
+
+ This method is used to serve as a central place for the logic
+ for checks on concurrency limits. If a limit is reached, then
+ an appropriate exception is raised.
+
+ :raises: ConcurrentActionLimit If the system configuration
+ is exceeded.
+ """
+ # NOTE(TheJulia): Keeping this all in one place for simplicity.
+ if action == 'provisioning':
+ node_count = self.dbapi.count_nodes_in_provision_state([
+ states.DEPLOYING,
+ states.DEPLOYWAIT
+ ])
+ if node_count >= CONF.conductor.max_concurrent_deploy:
+ raise exception.ConcurrentActionLimit(
+ task_type=action)
+
+ if action == 'unprovisioning' or action == 'cleaning':
+ # NOTE(TheJulia): This also checks for the deleting state
+ # which is super transitory, *but* you can get a node into
+ # the state. So in order to guard against a DoS attack, we
+ # need to check even the super transitory node state.
+ node_count = self.dbapi.count_nodes_in_provision_state([
+ states.DELETING,
+ states.CLEANING,
+ states.CLEANWAIT
+ ])
+ if node_count >= CONF.conductor.max_concurrent_clean:
+ raise exception.ConcurrentActionLimit(
+ task_type=action)
+
@METRICS.timer('get_vendor_passthru_metadata')
def get_vendor_passthru_metadata(route_dict):
diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py
index b1d6bae4f..2161b9434 100644
--- a/ironic/conf/conductor.py
+++ b/ironic/conf/conductor.py
@@ -358,6 +358,32 @@ opts = [
'model. The conductor does *not* record this value '
'otherwise, and this information is not backfilled '
'for prior instances which have been deployed.')),
+ cfg.IntOpt('max_concurrent_deploy',
+ default=250,
+ min=1,
+ mutable=True,
+ help=_('The maximum number of concurrent nodes in deployment '
+ 'which are permitted in this Ironic system. '
+ 'If this limit is reached, new requests will be '
+ 'rejected until the number of deployments in progress '
+ 'is lower than this maximum. As this is a security '
+ 'mechanism requests are not queued, and this setting '
+ 'is a global setting applying to all requests this '
+ 'conductor receives, regardless of access rights. '
+ 'The concurrent deployment limit cannot be disabled.')),
+ cfg.IntOpt('max_concurrent_clean',
+ default=50,
+ min=1,
+ mutable=True,
+ help=_('The maximum number of concurrent nodes in cleaning '
+ 'which are permitted in this Ironic system. '
+ 'If this limit is reached, new requests will be '
+ 'rejected until the number of nodes in cleaning '
+ 'is lower than this maximum. As this is a security '
+ 'mechanism requests are not queued, and this setting '
+ 'is a global setting applying to all requests this '
+ 'conductor receives, regardless of access rights. '
+ 'The concurrent clean limit cannot be disabled.')),
]
diff --git a/ironic/conf/deploy.py b/ironic/conf/deploy.py
index 198e0ac95..ff2020105 100644
--- a/ironic/conf/deploy.py
+++ b/ironic/conf/deploy.py
@@ -108,7 +108,7 @@ opts = [
'state. If True, shred will be invoked and cleaning '
'will continue.')),
cfg.IntOpt('disk_erasure_concurrency',
- default=1,
+ default=4,
min=1,
mutable=True,
help=_('Defines the target pool size used by Ironic Python '
diff --git a/ironic/db/api.py b/ironic/db/api.py
index 712919bb3..45e3fe2ca 100644
--- a/ironic/db/api.py
+++ b/ironic/db/api.py
@@ -1416,3 +1416,12 @@ class Connection(object, metaclass=abc.ABCMeta):
:param entires: A list of node history entriy id's to be
queried for deletion.
"""
+
+ @abc.abstractmethod
+ def count_nodes_in_provision_state(self, state):
+ """Count the number of nodes in given provision state.
+
+ :param state: A provision_state value to match for the
+ count operation. This can be a single provision
+ state value or a list of values.
+ """
diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py
index 05d5cc45e..c14719af8 100644
--- a/ironic/db/sqlalchemy/api.py
+++ b/ironic/db/sqlalchemy/api.py
@@ -30,6 +30,7 @@ from oslo_utils import timeutils
from oslo_utils import uuidutils
from osprofiler import sqlalchemy as osp_sqlalchemy
import sqlalchemy as sa
+from sqlalchemy import or_
from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Load
@@ -2400,3 +2401,26 @@ class Connection(api.Connection):
).filter(
models.NodeHistory.id.in_(entries)
).delete(synchronize_session=False)
+
+ def count_nodes_in_provision_state(self, state):
+ if not isinstance(state, list):
+ state = [state]
+ with _session_for_read() as session:
+ # Intentionally does not use the full ORM model
+ # because that is de-duped by pkey, but we already
+ # have unique constraints on UUID/name, so... shouldn't
+ # be a big deal. #JuliaFamousLastWords.
+ # Anyway, intent here is to be as quick as possible and
+ # literally have the DB do *all* of the world, so no
+ # client side ops occur. The column is also indexed,
+ # which means this will be an index based response.
+ # TODO(TheJulia): This might need to be revised for
+ # SQLAlchemy 2.0 as it should be a scaler select and count
+ # instead.
+ return session.query(
+ models.Node.provision_state
+ ).filter(
+ or_(
+ models.Node.provision_state == v for v in state
+ )
+ ).count()
diff --git a/ironic/tests/base.py b/ironic/tests/base.py
index 4b34ef0a4..348f15c20 100644
--- a/ironic/tests/base.py
+++ b/ironic/tests/base.py
@@ -27,6 +27,7 @@ import subprocess
import sys
import tempfile
from unittest import mock
+import warnings
import eventlet
eventlet.monkey_patch(os=False)
@@ -38,6 +39,7 @@ from oslo_log import log as logging
from oslo_serialization import jsonutils
from oslo_utils import uuidutils
from oslotest import base as oslo_test_base
+from sqlalchemy import exc as sqla_exc
from ironic.common import config as ironic_config
from ironic.common import context as ironic_context
@@ -70,6 +72,84 @@ def _patch_mock_callable(obj):
return False
+class WarningsFixture(fixtures.Fixture):
+ """Filters out warnings during test runs."""
+
+ def setUp(self):
+ super().setUp()
+
+ self._original_warning_filters = warnings.filters[:]
+
+ # NOTE(sdague): Make deprecation warnings only happen once. Otherwise
+ # this gets kind of crazy given the way that upstream python libs use
+ # this.
+ warnings.simplefilter('once', DeprecationWarning)
+
+ # NOTE(stephenfin): We get way too many of these. Silence them.
+ warnings.filterwarnings(
+ 'ignore',
+ message=(
+ 'Policy enforcement is depending on the value of .*. '
+ 'This key is deprecated. Please update your policy '
+ 'file to use the standard policy values.'
+ ),
+ )
+
+ # NOTE(mriedem): Ignore scope check UserWarnings from oslo.policy.
+ warnings.filterwarnings(
+ 'ignore',
+ message='Policy .* failed scope check',
+ category=UserWarning,
+ )
+
+ # Enable deprecation warnings to capture upcoming SQLAlchemy changes
+
+ warnings.filterwarnings(
+ 'ignore',
+ category=sqla_exc.SADeprecationWarning,
+ )
+
+ warnings.filterwarnings(
+ 'error',
+ module='ironic',
+ category=sqla_exc.SADeprecationWarning,
+ )
+
+ # Enable general SQLAlchemy warnings also to ensure we're not doing
+ # silly stuff. It's possible that we'll need to filter things out here
+ # with future SQLAlchemy versions, but that's a good thing
+
+ warnings.filterwarnings(
+ 'error',
+ module='ironic',
+ category=sqla_exc.SAWarning,
+ )
+
+ # ...but filter everything out until we get around to fixing them
+ # TODO(stephenfin): Fix all of these
+
+ warnings.filterwarnings(
+ 'ignore',
+ module='ironic',
+ message='SELECT statement has a cartesian product ',
+ category=sqla_exc.SAWarning,
+ )
+
+ # FIXME(stephenfin): We can remove this once oslo.db is fixed
+ # https://review.opendev.org/c/openstack/oslo.db/+/856453
+ warnings.filterwarnings(
+ 'ignore',
+ module='ironic',
+ message='TypeDecorator .* will not produce a cache key',
+ category=sqla_exc.SAWarning,
+ )
+
+ self.addCleanup(self._reset_warning_filters)
+
+ def _reset_warning_filters(self):
+ warnings.filters[:] = self._original_warning_filters
+
+
class ReplaceModule(fixtures.Fixture):
"""Replace a module with a fake module."""
@@ -113,6 +193,7 @@ class TestCase(oslo_test_base.BaseTestCase):
self.addCleanup(hash_ring.HashRingManager().reset)
self.useFixture(fixtures.EnvironmentVariable('http_proxy'))
self.policy = self.useFixture(policy_fixture.PolicyFixture())
+ self.useFixture(WarningsFixture())
driver_factory.HardwareTypesFactory._extension_manager = None
for factory in driver_factory._INTERFACE_LOADERS.values():
diff --git a/ironic/tests/unit/common/test_pxe_utils.py b/ironic/tests/unit/common/test_pxe_utils.py
index 3e57b83ed..4d4fbb5b5 100644
--- a/ironic/tests/unit/common/test_pxe_utils.py
+++ b/ironic/tests/unit/common/test_pxe_utils.py
@@ -1357,7 +1357,7 @@ class PXEInterfacesTestCase(db_base.DbTestCase):
'LiveOS',
'squashfs.img')),
'ks_template':
- (CONF.anaconda.default_ks_template,
+ ('file://' + CONF.anaconda.default_ks_template,
os.path.join(CONF.deploy.http_root,
self.node.uuid,
'ks.cfg.template')),
@@ -1375,63 +1375,7 @@ class PXEInterfacesTestCase(db_base.DbTestCase):
self.assertEqual(expected_info, image_info)
# In the absense of kickstart template in both instance_info and
# image default kickstart template is used
- self.assertEqual(CONF.anaconda.default_ks_template,
- image_info['ks_template'][0])
- calls = [mock.call(task.node), mock.call(task.node)]
- boot_opt_mock.assert_has_calls(calls)
- # Instance info gets presedence over kickstart template on the
- # image
- properties['properties'] = {'ks_template': 'glance://template_id'}
- task.node.instance_info['ks_template'] = 'https://server/fake.tmpl'
- image_show_mock.return_value = properties
- image_info = pxe_utils.get_instance_image_info(
- task, ipxe_enabled=False)
- self.assertEqual('https://server/fake.tmpl',
- image_info['ks_template'][0])
-
- @mock.patch('ironic.drivers.modules.deploy_utils.get_boot_option',
- return_value='kickstart', autospec=True)
- @mock.patch.object(image_service.GlanceImageService, 'show', autospec=True)
- def test_get_instance_image_info_with_kickstart_url(
- self, image_show_mock, boot_opt_mock):
- properties = {'properties': {u'kernel_id': u'instance_kernel_uuid',
- u'ramdisk_id': u'instance_ramdisk_uuid',
- u'image_source': u'http://path/to/os/'}}
-
- expected_info = {'ramdisk':
- ('instance_ramdisk_uuid',
- os.path.join(CONF.pxe.tftp_root,
- self.node.uuid,
- 'ramdisk')),
- 'kernel':
- ('instance_kernel_uuid',
- os.path.join(CONF.pxe.tftp_root,
- self.node.uuid,
- 'kernel')),
- 'ks_template':
- (CONF.anaconda.default_ks_template,
- os.path.join(CONF.deploy.http_root,
- self.node.uuid,
- 'ks.cfg.template')),
- 'ks_cfg':
- ('',
- os.path.join(CONF.deploy.http_root,
- self.node.uuid,
- 'ks.cfg'))}
- image_show_mock.return_value = properties
- self.context.auth_token = 'fake'
- with task_manager.acquire(self.context, self.node.uuid,
- shared=True) as task:
- dii = task.node.driver_internal_info
- dii['is_source_a_path'] = True
- task.node.driver_internal_info = dii
- task.node.save()
- image_info = pxe_utils.get_instance_image_info(
- task, ipxe_enabled=False)
- self.assertEqual(expected_info, image_info)
- # In the absense of kickstart template in both instance_info and
- # image default kickstart template is used
- self.assertEqual(CONF.anaconda.default_ks_template,
+ self.assertEqual('file://' + CONF.anaconda.default_ks_template,
image_info['ks_template'][0])
calls = [mock.call(task.node), mock.call(task.node)]
boot_opt_mock.assert_has_calls(calls)
@@ -1463,7 +1407,7 @@ class PXEInterfacesTestCase(db_base.DbTestCase):
self.node.uuid,
'kernel')),
'ks_template':
- (CONF.anaconda.default_ks_template,
+ ('file://' + CONF.anaconda.default_ks_template,
os.path.join(CONF.deploy.http_root,
self.node.uuid,
'ks.cfg.template')),
@@ -1490,7 +1434,7 @@ class PXEInterfacesTestCase(db_base.DbTestCase):
self.assertEqual(expected_info, image_info)
# In the absense of kickstart template in both instance_info and
# image default kickstart template is used
- self.assertEqual(CONF.anaconda.default_ks_template,
+ self.assertEqual('file://' + CONF.anaconda.default_ks_template,
image_info['ks_template'][0])
calls = [mock.call(task.node), mock.call(task.node)]
boot_opt_mock.assert_has_calls(calls)
@@ -1577,6 +1521,46 @@ class PXEInterfacesTestCase(db_base.DbTestCase):
list(fake_pxe_info.values()),
True)
+ @mock.patch.object(os, 'chmod', autospec=True)
+ @mock.patch.object(pxe_utils, 'TFTPImageCache', lambda: None)
+ @mock.patch.object(pxe_utils, 'ensure_tree', autospec=True)
+ @mock.patch.object(deploy_utils, 'fetch_images', autospec=True)
+ def test_cache_ramdisk_kernel_ipxe_anaconda(self, mock_fetch_image,
+ mock_ensure_tree, mock_chmod):
+ expected_path = os.path.join(CONF.deploy.http_root,
+ self.node.uuid)
+ fake_pxe_info = {'ramdisk':
+ ('instance_ramdisk_uuid',
+ os.path.join(CONF.pxe.tftp_root,
+ self.node.uuid,
+ 'ramdisk')),
+ 'kernel':
+ ('instance_kernel_uuid',
+ os.path.join(CONF.pxe.tftp_root,
+ self.node.uuid,
+ 'kernel')),
+ 'ks_template':
+ ('file://' + CONF.anaconda.default_ks_template,
+ os.path.join(CONF.deploy.http_root,
+ self.node.uuid,
+ 'ks.cfg.template')),
+ 'ks_cfg':
+ ('',
+ os.path.join(CONF.deploy.http_root,
+ self.node.uuid,
+ 'ks.cfg'))}
+ expected = fake_pxe_info.copy()
+ expected.pop('ks_cfg')
+
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=True) as task:
+ pxe_utils.cache_ramdisk_kernel(task, fake_pxe_info,
+ ipxe_enabled=True)
+ mock_ensure_tree.assert_called_with(expected_path)
+ mock_fetch_image.assert_called_once_with(self.context, mock.ANY,
+ list(expected.values()),
+ True)
+
@mock.patch.object(pxe.PXEBoot, '__init__', lambda self: None)
class PXEBuildKickstartConfigOptionsTestCase(db_base.DbTestCase):
diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py
index 65261450a..a4c3d57b6 100644
--- a/ironic/tests/unit/conductor/test_cleaning.py
+++ b/ironic/tests/unit/conductor/test_cleaning.py
@@ -51,8 +51,6 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
'step': 'build_raid', 'priority': 0, 'interface': 'deploy'}
def __do_node_clean_validate_fail(self, mock_validate, clean_steps=None):
- # InvalidParameterValue should cause node to go to CLEANFAIL
- mock_validate.side_effect = exception.InvalidParameterValue('error')
tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
@@ -68,26 +66,42 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
self.assertIsNone(node.fault)
mock_validate.assert_called_once_with(mock.ANY, mock.ANY)
+ def __do_node_clean_validate_fail_invalid(self, mock_validate,
+ clean_steps=None):
+ # InvalidParameterValue should cause node to go to CLEANFAIL
+ mock_validate.side_effect = exception.InvalidParameterValue('error')
+ self.__do_node_clean_validate_fail(mock_validate,
+ clean_steps=clean_steps)
+
@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
autospec=True)
def test__do_node_clean_automated_power_validate_fail(self, mock_validate):
- self.__do_node_clean_validate_fail(mock_validate)
+ self.__do_node_clean_validate_fail_invalid(mock_validate)
@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
autospec=True)
def test__do_node_clean_manual_power_validate_fail(self, mock_validate):
- self.__do_node_clean_validate_fail(mock_validate, clean_steps=[])
+ self.__do_node_clean_validate_fail_invalid(mock_validate,
+ clean_steps=[])
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
autospec=True)
def test__do_node_clean_automated_network_validate_fail(self,
mock_validate):
- self.__do_node_clean_validate_fail(mock_validate)
+ self.__do_node_clean_validate_fail_invalid(mock_validate)
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
autospec=True)
def test__do_node_clean_manual_network_validate_fail(self, mock_validate):
- self.__do_node_clean_validate_fail(mock_validate, clean_steps=[])
+ self.__do_node_clean_validate_fail_invalid(mock_validate,
+ clean_steps=[])
+
+ @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
+ autospec=True)
+ def test__do_node_clean_network_error_fail(self, mock_validate):
+ # NetworkError should cause node to go to CLEANFAIL
+ mock_validate.side_effect = exception.NetworkError()
+ self.__do_node_clean_validate_fail(mock_validate)
@mock.patch.object(conductor_utils, 'LOG', autospec=True)
@mock.patch.object(conductor_steps, 'set_node_cleaning_steps',
diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py
index 378d65f15..5d84dbbef 100644
--- a/ironic/tests/unit/conductor/test_manager.py
+++ b/ironic/tests/unit/conductor/test_manager.py
@@ -1829,6 +1829,7 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
def test_do_node_deploy_maintenance(self, mock_iwdi):
mock_iwdi.return_value = False
+ self._start_service()
node = obj_utils.create_test_node(self.context, driver='fake-hardware',
maintenance=True)
exc = self.assertRaises(messaging.rpc.ExpectedException,
@@ -1843,6 +1844,7 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
self.assertFalse(mock_iwdi.called)
def _test_do_node_deploy_validate_fail(self, mock_validate, mock_iwdi):
+ self._start_service()
mock_iwdi.return_value = False
# InvalidParameterValue should be re-raised as InstanceDeployFailure
mock_validate.side_effect = exception.InvalidParameterValue('error')
@@ -2389,6 +2391,7 @@ class DoNodeTearDownTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
autospec=True)
def test_do_node_tear_down_validate_fail(self, mock_validate):
+ self._start_service()
# InvalidParameterValue should be re-raised as InstanceDeployFailure
mock_validate.side_effect = exception.InvalidParameterValue('error')
node = obj_utils.create_test_node(
@@ -8374,7 +8377,6 @@ class NodeHistoryRecordCleanupTestCase(mgr_utils.ServiceSetUpMixin,
# 9 retained due to days, 3 to config
self.service._manage_node_history(self.context)
events = objects.NodeHistory.list(self.context)
- print(events)
self.assertEqual(12, len(events))
events = objects.NodeHistory.list_by_node_id(self.context, 10)
self.assertEqual(4, len(events))
@@ -8394,3 +8396,73 @@ class NodeHistoryRecordCleanupTestCase(mgr_utils.ServiceSetUpMixin,
self.assertEqual('one', events[1].event)
self.assertEqual('two', events[2].event)
self.assertEqual('three', events[3].event)
+
+
+class ConcurrentActionLimitTestCase(mgr_utils.ServiceSetUpMixin,
+ db_base.DbTestCase):
+
+ def setUp(self):
+ super(ConcurrentActionLimitTestCase, self).setUp()
+ self._start_service()
+ self.node1 = obj_utils.get_test_node(
+ self.context,
+ driver='fake-hardware',
+ id=110,
+ uuid=uuidutils.generate_uuid())
+ self.node2 = obj_utils.get_test_node(
+ self.context,
+ driver='fake-hardware',
+ id=111,
+ uuid=uuidutils.generate_uuid())
+ self.node3 = obj_utils.get_test_node(
+ self.context,
+ driver='fake-hardware',
+ id=112,
+ uuid=uuidutils.generate_uuid())
+ self.node4 = obj_utils.get_test_node(
+ self.context,
+ driver='fake-hardware',
+ id=113,
+ uuid=uuidutils.generate_uuid())
+ # Create the nodes, as the tasks need to operate across tables.
+ self.node1.create()
+ self.node2.create()
+ self.node3.create()
+ self.node4.create()
+
+ def test_concurrent_action_limit_deploy(self):
+ self.node1.provision_state = states.DEPLOYING
+ self.node2.provision_state = states.DEPLOYWAIT
+ self.node1.save()
+ self.node2.save()
+ CONF.set_override('max_concurrent_deploy', 2, group='conductor')
+ self.assertRaises(
+ exception.ConcurrentActionLimit,
+ self.service._concurrent_action_limit,
+ 'provisioning')
+ self.service._concurrent_action_limit('unprovisioning')
+ self.service._concurrent_action_limit('cleaning')
+ CONF.set_override('max_concurrent_deploy', 3, group='conductor')
+ self.service._concurrent_action_limit('provisioning')
+
+ def test_concurrent_action_limit_cleaning(self):
+ self.node1.provision_state = states.DELETING
+ self.node2.provision_state = states.CLEANING
+ self.node3.provision_state = states.CLEANWAIT
+ self.node1.save()
+ self.node2.save()
+ self.node3.save()
+
+ CONF.set_override('max_concurrent_clean', 3, group='conductor')
+ self.assertRaises(
+ exception.ConcurrentActionLimit,
+ self.service._concurrent_action_limit,
+ 'cleaning')
+ self.assertRaises(
+ exception.ConcurrentActionLimit,
+ self.service._concurrent_action_limit,
+ 'unprovisioning')
+ self.service._concurrent_action_limit('provisioning')
+ CONF.set_override('max_concurrent_clean', 4, group='conductor')
+ self.service._concurrent_action_limit('cleaning')
+ self.service._concurrent_action_limit('unprovisioning')
diff --git a/ironic/tests/unit/db/test_nodes.py b/ironic/tests/unit/db/test_nodes.py
index eb5200f4e..b4d70b2dd 100644
--- a/ironic/tests/unit/db/test_nodes.py
+++ b/ironic/tests/unit/db/test_nodes.py
@@ -1081,3 +1081,39 @@ class DbNodeTestCase(base.DbTestCase):
self.dbapi.check_node_list,
[node1.uuid, 'this/cannot/be/a/name'])
self.assertIn('this/cannot/be/a/name', str(exc))
+
+ def test_node_provision_state_count(self):
+ active_nodes = 5
+ manageable_nodes = 3
+ deploywait_nodes = 1
+ for i in range(0, active_nodes):
+ utils.create_test_node(uuid=uuidutils.generate_uuid(),
+ provision_state=states.ACTIVE)
+ for i in range(0, manageable_nodes):
+ utils.create_test_node(uuid=uuidutils.generate_uuid(),
+ provision_state=states.MANAGEABLE)
+ for i in range(0, deploywait_nodes):
+ utils.create_test_node(uuid=uuidutils.generate_uuid(),
+ provision_state=states.DEPLOYWAIT)
+
+ self.assertEqual(
+ active_nodes,
+ self.dbapi.count_nodes_in_provision_state(states.ACTIVE)
+ )
+ self.assertEqual(
+ manageable_nodes,
+ self.dbapi.count_nodes_in_provision_state(states.MANAGEABLE)
+ )
+ self.assertEqual(
+ deploywait_nodes,
+ self.dbapi.count_nodes_in_provision_state(states.DEPLOYWAIT)
+ )
+ total = active_nodes + manageable_nodes + deploywait_nodes
+ self.assertEqual(
+ total,
+ self.dbapi.count_nodes_in_provision_state([
+ states.ACTIVE,
+ states.MANAGEABLE,
+ states.DEPLOYWAIT
+ ])
+ )
diff --git a/releasenotes/notes/concurrency-limit-control-4b101bca7136e08d.yaml b/releasenotes/notes/concurrency-limit-control-4b101bca7136e08d.yaml
new file mode 100644
index 000000000..5eb8dd449
--- /dev/null
+++ b/releasenotes/notes/concurrency-limit-control-4b101bca7136e08d.yaml
@@ -0,0 +1,23 @@
+---
+features:
+ - |
+ Adds a concurrency limiter for number of nodes in states related to
+ *Cleaning* and *Provisioning* operations across the ironic deployment.
+ These settings default to a maximum number of concurrent deployments to
+ ``250`` and a maximum number of concurrent deletes and cleaning operations
+ to ``50``. These settings can be tuned using
+ ``[conductor]max_concurrent_deploy`` and
+ ``[conductor]max_concurrent_clean``, respectively.
+ The defaults should generally be good for most operators in most cases.
+ Large scale operators should evaluate the defaults and tune appropriately
+ as this feature cannot be disabled, as it is a security mechanism.
+upgrade:
+ - |
+ Large scale operators should be aware that a new feature, referred to as
+ "Concurrent Action Limit" was introduced as a security mechanism to
+ provide a means to limit attackers, or faulty scripts, from potentially
+ causing irreperable harm to an environment. This feature cannot be
+ disabled, and operators are encouraged to tune the new settings
+ ``[conductor]max_concurrent_deploy`` and
+ ``[conductor]max_concurrent_clean`` to match the needs of their
+ environment.
diff --git a/releasenotes/notes/correct-source-path-handling-lookups-4ce2023a56372f10.yaml b/releasenotes/notes/correct-source-path-handling-lookups-4ce2023a56372f10.yaml
new file mode 100644
index 000000000..10d270a45
--- /dev/null
+++ b/releasenotes/notes/correct-source-path-handling-lookups-4ce2023a56372f10.yaml
@@ -0,0 +1,16 @@
+---
+fixes:
+ - |
+ Fixes an issue where image information retrieval would fail when a
+ path was supplied when using the ``anaconda`` deploy interface,
+ as `HTTP` ``HEAD`` requests on a URL path have no ``Content-Length``.
+ We now consider if a path is used prior to attempting to collect
+ additional configuration data from what is normally expected to
+ be Glance.
+ - |
+ Fixes an issue where the fallback to a default kickstart template
+ value would result in error indicating
+ "Scheme-less image href is not a UUID".
+ This was becaues the handling code falling back to the default
+ did not explicitly indicate it was a file URL before saving the
+ value.
diff --git a/releasenotes/notes/fix-cleaning-stuck-on-networkerror-4aedbf3673413af6.yaml b/releasenotes/notes/fix-cleaning-stuck-on-networkerror-4aedbf3673413af6.yaml
new file mode 100644
index 000000000..f7769afc1
--- /dev/null
+++ b/releasenotes/notes/fix-cleaning-stuck-on-networkerror-4aedbf3673413af6.yaml
@@ -0,0 +1,8 @@
+---
+fixes:
+ - |
+ Fixes an issue where cleaning operations could fail in such a way that was
+ not easily recoverable when pre-cleaning network interface configuration
+ was validated, yet contained invalid configuration.
+ Now Ironic properly captures the error and exits from cleaning in a
+ state which allows for cleaning to be retried.
diff --git a/releasenotes/notes/maximum-disk-erasure-concurrency-6d132bd84e3df4cf.yaml b/releasenotes/notes/maximum-disk-erasure-concurrency-6d132bd84e3df4cf.yaml
new file mode 100644
index 000000000..f09421593
--- /dev/null
+++ b/releasenotes/notes/maximum-disk-erasure-concurrency-6d132bd84e3df4cf.yaml
@@ -0,0 +1,10 @@
+---
+other:
+ - |
+ The maximum disk erasure concurrency setting,
+ ``[deploy]disk_erasure_concurrency`` has been incremed to 4.
+ Previously, this was kept at 1 in order to maintain continuity of
+ experience, but operators have not reported any issues with an increased
+ concurrency, and as such we feel comfortable upstream enabling concurrent
+ disk erasure/cleaning. This setting applies to the ``erase_devices`` clean
+ step.
diff --git a/tox.ini b/tox.ini
index cffaa8f1d..247e819a4 100644
--- a/tox.ini
+++ b/tox.ini
@@ -11,7 +11,6 @@ setenv = VIRTUAL_ENV={envdir}
PYTHONDONTWRITEBYTECODE = 1
LANGUAGE=en_US
LC_ALL=en_US.UTF-8
- PYTHONWARNINGS=default::DeprecationWarning
deps =
-c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/master}
-r{toxinidir}/requirements.txt
diff --git a/zuul.d/ironic-jobs.yaml b/zuul.d/ironic-jobs.yaml
index c9b969d4f..9d7435bd3 100644
--- a/zuul.d/ironic-jobs.yaml
+++ b/zuul.d/ironic-jobs.yaml
@@ -217,6 +217,48 @@
s-proxy: False
- job:
+ name: ironic-standalone-anaconda
+ parent: ironic-standalone-redfish
+ description:
+ Test ironic with the anaconda deployment interface.
+ Test also uses Redfish.
+ required-projects:
+ - opendev.org/openstack/sushy-tools
+ irrelevant-files:
+ - ^.*\.rst$
+ - ^api-ref/.*$
+ - ^doc/.*$
+ - ^install-guide/.*$
+ - ^ironic/locale/.*$
+ - ^ironic/tests/.*$
+ - ^releasenotes/.*$
+ - ^setup.cfg$
+ - ^test-requirements.txt$
+ - ^tools/.*$
+ - ^tox.ini$
+ vars:
+ tempest_test_regex: BaremetalRedfishIPxeAnacondaNoGlance
+ tempest_test_timeout: 4800
+ tempest_concurrency: 2
+ devstack_localrc:
+ IRONIC_ENABLED_DEPLOY_INTERFACES: "anaconda"
+ IRONIC_VM_COUNT: 2
+ IRONIC_VM_VOLUME_COUNT: 1
+ IRONIC_VM_SPECS_RAM: 3192
+ IRONIC_VM_SPECS_CPU: 3
+ IRONIC_ENFORCE_SCOPE: True
+ # We're using a lot of disk space in this job. Some testing nodes have
+ # a small root partition, so use /opt which is mounted from a bigger
+ # ephemeral partition on such nodes
+ LIBVIRT_STORAGE_POOL_PATH: /opt/libvirt/images
+ IRONIC_ANACONDA_IMAGE_REF: http://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/
+ IRONIC_ANACONDA_KERNEL_REF: http://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/images/pxeboot/vmlinuz
+ IRONIC_ANACONDA_RAMDISK_REF: http://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/images/pxeboot/initrd.img
+ IRONIC_ANACONDA_INSECURE_HEARTBEAT: True
+ IRONIC_DEPLOY_CALLBACK_WAIT_TIMEOUT: 3600
+ IRONIC_PXE_BOOT_RETRY_TIMEOUT: 3600
+
+- job:
name: ironic-tempest-bios-redfish-pxe
description: "Deploy ironic node over PXE using BIOS boot mode"
parent: ironic-tempest-uefi-redfish-vmedia
diff --git a/zuul.d/project.yaml b/zuul.d/project.yaml
index 8b821f816..586675f87 100644
--- a/zuul.d/project.yaml
+++ b/zuul.d/project.yaml
@@ -45,6 +45,8 @@
voting: false
- ironic-tempest-ipxe-ipv6:
voting: false
+ - ironic-standalone-anaconda:
+ voting: false
- ironic-inspector-tempest-rbac-scope-enforced:
voting: false
- bifrost-integration-tinyipa-ubuntu-focal: