summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/source/admin/fast-track.rst13
-rw-r--r--doc/source/admin/interfaces/deploy.rst40
-rw-r--r--doc/source/install/refarch/common.rst5
-rw-r--r--ironic/api/controllers/v1/ramdisk.py6
-rw-r--r--ironic/common/image_service.py35
-rw-r--r--ironic/common/states.py3
-rw-r--r--ironic/conductor/cleaning.py26
-rw-r--r--ironic/conductor/manager.py3
-rw-r--r--ironic/conductor/task_manager.py11
-rw-r--r--ironic/conductor/utils.py19
-rw-r--r--ironic/db/sqlalchemy/api.py33
-rw-r--r--ironic/drivers/modules/deploy_utils.py37
-rw-r--r--ironic/tests/unit/common/test_image_service.py107
-rw-r--r--ironic/tests/unit/conductor/test_cleaning.py21
-rw-r--r--ironic/tests/unit/conductor/test_manager.py3
-rw-r--r--ironic/tests/unit/conductor/test_utils.py55
-rw-r--r--ironic/tests/unit/db/test_api.py29
-rw-r--r--ironic/tests/unit/drivers/modules/test_deploy_utils.py73
-rw-r--r--releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml8
-rw-r--r--releasenotes/notes/cross-link-1ffd1a4958f14fd7.yaml5
-rw-r--r--releasenotes/notes/fix-online-version-migration-db432a7b239647fa.yaml14
-rw-r--r--releasenotes/notes/fix-power-off-token-wipe-e7d605997f00d39d.yaml6
-rw-r--r--releasenotes/notes/no-recalculate-653e524fd6160e72.yaml5
-rw-r--r--releasenotes/notes/wipe-agent-token-upon-cleaning-timeout-c9add514fad1b02c.yaml7
24 files changed, 392 insertions, 172 deletions
diff --git a/doc/source/admin/fast-track.rst b/doc/source/admin/fast-track.rst
index 20ca6199f..e42942818 100644
--- a/doc/source/admin/fast-track.rst
+++ b/doc/source/admin/fast-track.rst
@@ -15,6 +15,19 @@ provisioned within a short period of time.
the ``noop`` networking. The case where inspection, cleaning and
provisioning networks are different is not supported.
+.. note::
+ Fast track mode is very sensitive to long-running processes on the conductor
+ side that may prevent agent heartbeats from being registered.
+
+ For example, converting a large image to the raw format may take long enough
+ to reach the fast track timeout. In this case, you can either :ref:`use raw
+ images <stream_raw_images>` or move the conversion to the agent side with:
+
+ .. code-block:: ini
+
+ [DEFAULT]
+ force_raw_images = False
+
Enabling
========
diff --git a/doc/source/admin/interfaces/deploy.rst b/doc/source/admin/interfaces/deploy.rst
index 7db5a24ff..79d004ad0 100644
--- a/doc/source/admin/interfaces/deploy.rst
+++ b/doc/source/admin/interfaces/deploy.rst
@@ -81,6 +81,46 @@ accessible from HTTP service. Please refer to configuration option
``FollowSymLinks`` if you are using Apache HTTP server, or
``disable_symlinks`` if Nginx HTTP server is in use.
+.. _stream_raw_images:
+
+Streaming raw images
+--------------------
+
+The Bare Metal service is capable of streaming raw images directly to the
+target disk of a node, without caching them in the node's RAM. When the source
+image is not already raw, the conductor will convert the image and calculate
+the new checksum.
+
+.. note::
+ If no algorithm is specified via the ``image_os_hash_algo`` field, or if
+ this field is set to ``md5``, SHA256 is used for the updated checksum.
+
+For HTTP or local file images that are already raw, you need to explicitly set
+the disk format to prevent the checksum from being unnecessarily re-calculated.
+For example:
+
+.. code-block:: shell
+
+ baremetal node set <node> \
+ --instance-info image_source=http://server/myimage.img \
+ --instance-info image_os_hash_algo=sha512 \
+ --instance-info image_os_hash_value=<SHA512 of the raw image> \
+ --instance-info image_disk_format=raw
+
+To disable this feature and cache images in the node's RAM, set
+
+.. code-block:: ini
+
+ [agent]
+ stream_raw_images = False
+
+To disable the conductor-side conversion completely, set
+
+.. code-block:: ini
+
+ [DEFAULT]
+ force_raw_images = False
+
.. _ansible-deploy:
Ansible deploy
diff --git a/doc/source/install/refarch/common.rst b/doc/source/install/refarch/common.rst
index 800632fd5..ce0dedfb1 100644
--- a/doc/source/install/refarch/common.rst
+++ b/doc/source/install/refarch/common.rst
@@ -277,9 +277,8 @@ the space requirements are different:
In both cases a cached image is converted to raw if ``force_raw_images``
is ``True`` (the default).
- .. note::
- ``image_download_source`` can also be provided in the node's
- ``driver_info`` or ``instance_info``. See :ref:`image_download_source`.
+ See :ref:`image_download_source` and :ref:`stream_raw_images` for more
+ details.
* When network boot is used, the instance image kernel and ramdisk are cached
locally while the instance is active.
diff --git a/ironic/api/controllers/v1/ramdisk.py b/ironic/api/controllers/v1/ramdisk.py
index 5feef5e02..b98eb7dc2 100644
--- a/ironic/api/controllers/v1/ramdisk.py
+++ b/ironic/api/controllers/v1/ramdisk.py
@@ -131,13 +131,17 @@ class LookupController(rest.RestController):
else:
node = objects.Node.get_by_port_addresses(
api.request.context, valid_addresses)
- except exception.NotFound:
+ except exception.NotFound as e:
# NOTE(dtantsur): we are reraising the same exception to make sure
# we don't disclose the difference between nodes that are not found
# at all and nodes in a wrong state by different error messages.
+ LOG.error('No node has been found during lookup: %s', e)
raise exception.NotFound()
if CONF.api.restrict_lookup and not self.lookup_allowed(node):
+ LOG.error('Lookup is not allowed for node %(node)s in the '
+ 'provision state %(state)s',
+ {'node': node.uuid, 'state': node.provision_state})
raise exception.NotFound()
if api_utils.allow_agent_token():
diff --git a/ironic/common/image_service.py b/ironic/common/image_service.py
index 5280ee6bc..4b95b5152 100644
--- a/ironic/common/image_service.py
+++ b/ironic/common/image_service.py
@@ -34,10 +34,6 @@ from ironic.common import utils
from ironic.conf import CONF
IMAGE_CHUNK_SIZE = 1024 * 1024 # 1mb
-# NOTE(kaifeng) Image will be truncated to 2GiB by sendfile,
-# we use a large chunk size here for a better performance
-# while keep the chunk size less than the size limit.
-SENDFILE_CHUNK_SIZE = 1024 * 1024 * 1024 # 1Gb
LOG = log.getLogger(__name__)
@@ -264,26 +260,23 @@ class FileImageService(BaseImageService):
"""
source_image_path = self.validate_href(image_href)
dest_image_path = image_file.name
- local_device = os.stat(dest_image_path).st_dev
try:
- # We should have read and write access to source file to create
- # hard link to it.
- if (local_device == os.stat(source_image_path).st_dev
- and os.access(source_image_path, os.R_OK | os.W_OK)):
- image_file.close()
- os.remove(dest_image_path)
+ image_file.close()
+ os.remove(dest_image_path)
+
+ try:
os.link(source_image_path, dest_image_path)
+ except OSError as exc:
+ LOG.debug('Could not create a link from %(src)s to %(dest)s, '
+ 'will copy the content instead. Error: %(exc)s.',
+ {'src': source_image_path, 'dest': dest_image_path,
+ 'exc': exc})
else:
- filesize = os.path.getsize(source_image_path)
- offset = 0
- with open(source_image_path, 'rb') as input_img:
- while offset < filesize:
- count = min(SENDFILE_CHUNK_SIZE, filesize - offset)
- nbytes_out = os.sendfile(image_file.fileno(),
- input_img.fileno(),
- offset,
- count)
- offset += nbytes_out
+ return
+
+ # NOTE(dtantsur): starting with Python 3.8, copyfile() uses
+ # efficient copying (i.e. sendfile) under the hood.
+ shutil.copyfile(source_image_path, dest_image_path)
except Exception as e:
raise exception.ImageDownloadFailed(image_href=image_href,
reason=str(e))
diff --git a/ironic/common/states.py b/ironic/common/states.py
index 89b710189..f2238b41b 100644
--- a/ironic/common/states.py
+++ b/ironic/common/states.py
@@ -269,6 +269,9 @@ _FASTTRACK_LOOKUP_ALLOWED_STATES = (ENROLL, MANAGEABLE, AVAILABLE,
FASTTRACK_LOOKUP_ALLOWED_STATES = frozenset(_FASTTRACK_LOOKUP_ALLOWED_STATES)
"""States where API lookups are permitted with fast track enabled."""
+FAILURE_STATES = frozenset((DEPLOYFAIL, CLEANFAIL, INSPECTFAIL,
+ RESCUEFAIL, UNRESCUEFAIL, ADOPTFAIL))
+
##############
# Power states
diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py
index 53d66ddd8..5f69a4ab9 100644
--- a/ironic/conductor/cleaning.py
+++ b/ironic/conductor/cleaning.py
@@ -247,12 +247,21 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None):
task.process_event(event)
+def get_last_error(node):
+ last_error = _('By request, the clean operation was aborted')
+ if node.clean_step:
+ last_error += (
+ _(' during or after the completion of step "%s"')
+ % conductor_steps.step_id(node.clean_step)
+ )
+ return last_error
+
+
@task_manager.require_exclusive_lock
-def do_node_clean_abort(task, step_name=None):
+def do_node_clean_abort(task):
"""Internal method to abort an ongoing operation.
:param task: a TaskManager instance with an exclusive lock
- :param step_name: The name of the clean step.
"""
node = task.node
try:
@@ -270,12 +279,13 @@ def do_node_clean_abort(task, step_name=None):
set_fail_state=False)
return
+ last_error = get_last_error(node)
info_message = _('Clean operation aborted for node %s') % node.uuid
- last_error = _('By request, the clean operation was aborted')
- if step_name:
- msg = _(' after the completion of step "%s"') % step_name
- last_error += msg
- info_message += msg
+ if node.clean_step:
+ info_message += (
+ _(' during or after the completion of step "%s"')
+ % node.clean_step
+ )
node.last_error = last_error
node.clean_step = None
@@ -317,7 +327,7 @@ def continue_node_clean(task):
target_state = None
task.process_event('fail', target_state=target_state)
- do_node_clean_abort(task, step_name)
+ do_node_clean_abort(task)
return
LOG.debug('The cleaning operation for node %(node)s was '
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 7e98459ff..a50722597 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -1348,7 +1348,8 @@ class ConductorManager(base_manager.BaseConductorManager):
callback=self._spawn_worker,
call_args=(cleaning.do_node_clean_abort, task),
err_handler=utils.provisioning_error_handler,
- target_state=target_state)
+ target_state=target_state,
+ last_error=cleaning.get_last_error(node))
return
if node.provision_state == states.RESCUEWAIT:
diff --git a/ironic/conductor/task_manager.py b/ironic/conductor/task_manager.py
index 509c9ce92..922e74cf6 100644
--- a/ironic/conductor/task_manager.py
+++ b/ironic/conductor/task_manager.py
@@ -527,7 +527,8 @@ class TaskManager(object):
self.release_resources()
def process_event(self, event, callback=None, call_args=None,
- call_kwargs=None, err_handler=None, target_state=None):
+ call_kwargs=None, err_handler=None, target_state=None,
+ last_error=None):
"""Process the given event for the task's current state.
:param event: the name of the event to process
@@ -540,6 +541,8 @@ class TaskManager(object):
prev_target_state)
:param target_state: if specified, the target provision state for the
node. Otherwise, use the target state from the fsm
+ :param last_error: last error to set on the node together with
+ the state transition.
:raises: InvalidState if the event is not allowed by the associated
state machine
"""
@@ -572,13 +575,15 @@ class TaskManager(object):
# set up the async worker
if callback:
- # clear the error if we're going to start work in a callback
- self.node.last_error = None
+ # update the error if we're going to start work in a callback
+ self.node.last_error = last_error
if call_args is None:
call_args = ()
if call_kwargs is None:
call_kwargs = {}
self.spawn_after(callback, *call_args, **call_kwargs)
+ elif last_error is not None:
+ self.node.last_error = last_error
# publish the state transition by saving the Node
self.node.save()
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py
index c107f076f..2272c0df7 100644
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@@ -297,14 +297,23 @@ def node_power_action(task, new_state, timeout=None):
node = task.node
if _can_skip_state_change(task, new_state):
+ # NOTE(TheJulia): Even if we are not changing the power state,
+ # we need to wipe the token out, just in case for some reason
+ # the power was turned off outside of our interaction/management.
+ if new_state in (states.POWER_OFF, states.SOFT_POWER_OFF,
+ states.REBOOT, states.SOFT_REBOOT):
+ wipe_internal_info_on_power_off(node)
+ node.save()
return
target_state = _calculate_target_state(new_state)
# Set the target_power_state and clear any last_error, if we're
# starting a new operation. This will expose to other processes
- # and clients that work is in progress.
- node['target_power_state'] = target_state
- node['last_error'] = None
+ # and clients that work is in progress. Keep the last_error intact
+ # if the power action happens as a result of a failure.
+ node.target_power_state = target_state
+ if node.provision_state not in states.FAILURE_STATES:
+ node.last_error = None
node.timestamp_driver_internal_info('last_power_state_change')
# NOTE(dtantsur): wipe token on shutting down, otherwise a reboot in
# fast-track (or an accidentally booted agent) will cause subsequent
@@ -479,9 +488,9 @@ def cleaning_error_handler(task, logmsg, errmsg=None, traceback=False,
node.del_driver_internal_info('cleaning_reboot')
node.del_driver_internal_info('cleaning_polling')
node.del_driver_internal_info('skip_current_clean_step')
- # We don't need to keep the old agent URL
+ # We don't need to keep the old agent URL, or token
# as it should change upon the next cleaning attempt.
- node.del_driver_internal_info('agent_url')
+ wipe_token_and_url(task)
# For manual cleaning, the target provision state is MANAGEABLE, whereas
# for automated cleaning, it is AVAILABLE.
manual_clean = node.target_provision_state == states.MANAGEABLE
diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py
index c14719af8..2618f106c 100644
--- a/ironic/db/sqlalchemy/api.py
+++ b/ironic/db/sqlalchemy/api.py
@@ -1716,9 +1716,12 @@ class Connection(api.Connection):
max_to_migrate = max_count or total_to_migrate
for model in sql_models:
+ use_node_id = False
+ if (not hasattr(model, 'id') and hasattr(model, 'node_id')):
+ use_node_id = True
version = mapping[model.__name__][0]
num_migrated = 0
- with _session_for_write():
+ with _session_for_write() as session:
query = model_query(model).filter(model.version != version)
# NOTE(rloo) Caution here; after doing query.count(), it is
# possible that the value is different in the
@@ -1729,13 +1732,27 @@ class Connection(api.Connection):
# max_to_migrate objects.
ids = []
for obj in query.slice(0, max_to_migrate):
- ids.append(obj['id'])
- num_migrated = (
- model_query(model).
- filter(sql.and_(model.id.in_(ids),
- model.version != version)).
- update({model.version: version},
- synchronize_session=False))
+ if not use_node_id:
+ ids.append(obj['id'])
+ else:
+ # BIOSSettings, NodeTrait, NodeTag do not have id
+ # columns, fallback to node_id as they both have
+ # it.
+ ids.append(obj['node_id'])
+ if not use_node_id:
+ num_migrated = (
+ session.query(model).
+ filter(sql.and_(model.id.in_(ids),
+ model.version != version)).
+ update({model.version: version},
+ synchronize_session=False))
+ else:
+ num_migrated = (
+ session.query(model).
+ filter(sql.and_(model.node_id.in_(ids),
+ model.version != version)).
+ update({model.version: version},
+ synchronize_session=False))
else:
num_migrated = (
model_query(model).
diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py
index 13f91e9cd..fd83f9f08 100644
--- a/ironic/drivers/modules/deploy_utils.py
+++ b/ironic/drivers/modules/deploy_utils.py
@@ -1093,6 +1093,11 @@ def _cache_and_convert_image(task, instance_info, image_info=None):
_, image_path = cache_instance_image(task.context, task.node,
force_raw=force_raw)
if force_raw or image_info is None:
+ if image_info is None:
+ initial_format = instance_info.get('image_disk_format')
+ else:
+ initial_format = image_info.get('disk_format')
+
if force_raw:
instance_info['image_disk_format'] = 'raw'
else:
@@ -1108,21 +1113,29 @@ def _cache_and_convert_image(task, instance_info, image_info=None):
# sha256.
if image_info is None:
os_hash_algo = instance_info.get('image_os_hash_algo')
+ hash_value = instance_info.get('image_os_hash_value')
+ old_checksum = instance_info.get('image_checksum')
else:
os_hash_algo = image_info.get('os_hash_algo')
+ hash_value = image_info.get('os_hash_value')
+ old_checksum = image_info.get('checksum')
+
+ if initial_format != instance_info['image_disk_format']:
+ if not os_hash_algo or os_hash_algo == 'md5':
+ LOG.debug("Checksum algorithm for image %(image)s for node "
+ "%(node)s is set to '%(algo)s', changing to sha256",
+ {'algo': os_hash_algo, 'node': task.node.uuid,
+ 'image': image_path})
+ os_hash_algo = 'sha256'
+
+ LOG.debug('Recalculating checksum for image %(image)s for node '
+ '%(node)s due to image conversion',
+ {'image': image_path, 'node': task.node.uuid})
+ instance_info['image_checksum'] = None
+ hash_value = compute_image_checksum(image_path, os_hash_algo)
+ else:
+ instance_info['image_checksum'] = old_checksum
- if not os_hash_algo or os_hash_algo == 'md5':
- LOG.debug("Checksum algorithm for image %(image)s for node "
- "%(node)s is set to '%(algo)s', changing to 'sha256'",
- {'algo': os_hash_algo, 'node': task.node.uuid,
- 'image': image_path})
- os_hash_algo = 'sha256'
-
- LOG.debug('Recalculating checksum for image %(image)s for node '
- '%(node)s due to image conversion',
- {'image': image_path, 'node': task.node.uuid})
- instance_info['image_checksum'] = None
- hash_value = compute_image_checksum(image_path, os_hash_algo)
instance_info['image_os_hash_algo'] = os_hash_algo
instance_info['image_os_hash_value'] = hash_value
else:
diff --git a/ironic/tests/unit/common/test_image_service.py b/ironic/tests/unit/common/test_image_service.py
index 197518e1a..297a1b4b9 100644
--- a/ironic/tests/unit/common/test_image_service.py
+++ b/ironic/tests/unit/common/test_image_service.py
@@ -10,7 +10,6 @@
# License for the specific language governing permissions and limitations
# under the License.
-import builtins
import datetime
from http import client as http_client
import io
@@ -483,119 +482,55 @@ class FileImageServiceTestCase(base.TestCase):
'properties': {},
'no_cache': True}, result)
+ @mock.patch.object(shutil, 'copyfile', autospec=True)
@mock.patch.object(os, 'link', autospec=True)
@mock.patch.object(os, 'remove', autospec=True)
- @mock.patch.object(os, 'access', return_value=True, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
@mock.patch.object(image_service.FileImageService, 'validate_href',
autospec=True)
- def test_download_hard_link(self, _validate_mock, stat_mock, access_mock,
- remove_mock, link_mock):
+ def test_download_hard_link(self, _validate_mock, remove_mock, link_mock,
+ copy_mock):
_validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
file_mock = mock.Mock(spec=io.BytesIO)
file_mock.name = 'file'
self.service.download(self.href, file_mock)
_validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
remove_mock.assert_called_once_with('file')
link_mock.assert_called_once_with(self.href_path, 'file')
+ copy_mock.assert_not_called()
- @mock.patch.object(os, 'sendfile', return_value=42, autospec=True)
- @mock.patch.object(os.path, 'getsize', return_value=42, autospec=True)
- @mock.patch.object(builtins, 'open', autospec=True)
- @mock.patch.object(os, 'access', return_value=False, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
- @mock.patch.object(image_service.FileImageService, 'validate_href',
- autospec=True)
- def test_download_copy(self, _validate_mock, stat_mock, access_mock,
- open_mock, size_mock, copy_mock):
- _validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
- file_mock = mock.MagicMock(spec=io.BytesIO)
- file_mock.name = 'file'
- input_mock = mock.MagicMock(spec=io.BytesIO)
- open_mock.return_value = input_mock
- self.service.download(self.href, file_mock)
- _validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
- copy_mock.assert_called_once_with(file_mock.fileno(),
- input_mock.__enter__().fileno(),
- 0, 42)
-
- @mock.patch.object(os, 'sendfile', autospec=True)
- @mock.patch.object(os.path, 'getsize', return_value=42, autospec=True)
- @mock.patch.object(builtins, 'open', autospec=True)
- @mock.patch.object(os, 'access', return_value=False, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
+ @mock.patch.object(shutil, 'copyfile', autospec=True)
+ @mock.patch.object(os, 'link', autospec=True)
+ @mock.patch.object(os, 'remove', autospec=True)
@mock.patch.object(image_service.FileImageService, 'validate_href',
autospec=True)
- def test_download_copy_segmented(self, _validate_mock, stat_mock,
- access_mock, open_mock, size_mock,
- copy_mock):
- # Fake a 3G + 1k image
- chunk_size = image_service.SENDFILE_CHUNK_SIZE
- fake_image_size = chunk_size * 3 + 1024
- fake_chunk_seq = [chunk_size, chunk_size, chunk_size, 1024]
+ def test_download_copy(self, _validate_mock, remove_mock, link_mock,
+ copy_mock):
_validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
+ link_mock.side_effect = PermissionError
file_mock = mock.MagicMock(spec=io.BytesIO)
file_mock.name = 'file'
- input_mock = mock.MagicMock(spec=io.BytesIO)
- open_mock.return_value = input_mock
- size_mock.return_value = fake_image_size
- copy_mock.side_effect = fake_chunk_seq
self.service.download(self.href, file_mock)
_validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
- copy_calls = [mock.call(file_mock.fileno(),
- input_mock.__enter__().fileno(),
- chunk_size * i,
- fake_chunk_seq[i]) for i in range(4)]
- copy_mock.assert_has_calls(copy_calls)
- size_mock.assert_called_once_with(self.href_path)
-
- @mock.patch.object(os, 'remove', side_effect=OSError, autospec=True)
- @mock.patch.object(os, 'access', return_value=True, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
- @mock.patch.object(image_service.FileImageService, 'validate_href',
- autospec=True)
- def test_download_hard_link_fail(self, _validate_mock, stat_mock,
- access_mock, remove_mock):
- _validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
- file_mock = mock.MagicMock(spec=io.BytesIO)
- file_mock.name = 'file'
- self.assertRaises(exception.ImageDownloadFailed,
- self.service.download, self.href, file_mock)
- _validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
+ link_mock.assert_called_once_with(self.href_path, 'file')
+ copy_mock.assert_called_once_with(self.href_path, 'file')
- @mock.patch.object(os, 'sendfile', side_effect=OSError, autospec=True)
- @mock.patch.object(os.path, 'getsize', return_value=42, autospec=True)
- @mock.patch.object(builtins, 'open', autospec=True)
- @mock.patch.object(os, 'access', return_value=False, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
+ @mock.patch.object(shutil, 'copyfile', autospec=True)
+ @mock.patch.object(os, 'link', autospec=True)
+ @mock.patch.object(os, 'remove', autospec=True)
@mock.patch.object(image_service.FileImageService, 'validate_href',
autospec=True)
- def test_download_copy_fail(self, _validate_mock, stat_mock, access_mock,
- open_mock, size_mock, copy_mock):
+ def test_download_copy_fail(self, _validate_mock, remove_mock, link_mock,
+ copy_mock):
_validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
+ link_mock.side_effect = PermissionError
+ copy_mock.side_effect = PermissionError
file_mock = mock.MagicMock(spec=io.BytesIO)
file_mock.name = 'file'
- input_mock = mock.MagicMock(spec=io.BytesIO)
- open_mock.return_value = input_mock
self.assertRaises(exception.ImageDownloadFailed,
self.service.download, self.href, file_mock)
_validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
- size_mock.assert_called_once_with(self.href_path)
+ link_mock.assert_called_once_with(self.href_path, 'file')
+ copy_mock.assert_called_once_with(self.href_path, 'file')
class ServiceGetterTestCase(base.TestCase):
diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py
index a4c3d57b6..34e805deb 100644
--- a/ironic/tests/unit/conductor/test_cleaning.py
+++ b/ironic/tests/unit/conductor/test_cleaning.py
@@ -1138,12 +1138,12 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
class DoNodeCleanAbortTestCase(db_base.DbTestCase):
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
- def _test__do_node_clean_abort(self, step_name, tear_mock):
+ def _test_do_node_clean_abort(self, clean_step, tear_mock):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
- provision_state=states.CLEANFAIL,
+ provision_state=states.CLEANWAIT,
target_provision_state=states.AVAILABLE,
- clean_step={'step': 'foo', 'abortable': True},
+ clean_step=clean_step,
driver_internal_info={
'agent_url': 'some url',
'agent_secret_token': 'token',
@@ -1153,11 +1153,11 @@ class DoNodeCleanAbortTestCase(db_base.DbTestCase):
'skip_current_clean_step': True})
with task_manager.acquire(self.context, node.uuid) as task:
- cleaning.do_node_clean_abort(task, step_name=step_name)
+ cleaning.do_node_clean_abort(task)
self.assertIsNotNone(task.node.last_error)
tear_mock.assert_called_once_with(task.driver.deploy, task)
- if step_name:
- self.assertIn(step_name, task.node.last_error)
+ if clean_step:
+ self.assertIn(clean_step['step'], task.node.last_error)
# assert node's clean_step and metadata was cleaned up
self.assertEqual({}, task.node.clean_step)
self.assertNotIn('clean_step_index',
@@ -1173,11 +1173,12 @@ class DoNodeCleanAbortTestCase(db_base.DbTestCase):
self.assertNotIn('agent_secret_token',
task.node.driver_internal_info)
- def test__do_node_clean_abort(self):
- self._test__do_node_clean_abort(None)
+ def test_do_node_clean_abort_early(self):
+ self._test_do_node_clean_abort(None)
- def test__do_node_clean_abort_with_step_name(self):
- self._test__do_node_clean_abort('foo')
+ def test_do_node_clean_abort_with_step(self):
+ self._test_do_node_clean_abort({'step': 'foo', 'interface': 'deploy',
+ 'abortable': True})
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
def test__do_node_clean_abort_tear_down_fail(self, tear_mock):
diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py
index 5d84dbbef..ece5a8a58 100644
--- a/ironic/tests/unit/conductor/test_manager.py
+++ b/ironic/tests/unit/conductor/test_manager.py
@@ -2733,7 +2733,8 @@ class DoProvisioningActionTestCase(mgr_utils.ServiceSetUpMixin,
# Node will be moved to tgt_prov_state after cleaning, not tested here
self.assertEqual(states.CLEANFAIL, node.provision_state)
self.assertEqual(tgt_prov_state, node.target_provision_state)
- self.assertIsNone(node.last_error)
+ self.assertEqual('By request, the clean operation was aborted',
+ node.last_error)
mock_spawn.assert_called_with(
self.service, cleaning.do_node_clean_abort, mock.ANY)
diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py
index a424e5132..52fc72436 100644
--- a/ironic/tests/unit/conductor/test_utils.py
+++ b/ironic/tests/unit/conductor/test_utils.py
@@ -196,7 +196,8 @@ class NodePowerActionTestCase(db_base.DbTestCase):
node = obj_utils.create_test_node(self.context,
uuid=uuidutils.generate_uuid(),
driver='fake-hardware',
- power_state=states.POWER_OFF)
+ power_state=states.POWER_OFF,
+ last_error='failed before')
task = task_manager.TaskManager(self.context, node.uuid)
get_power_mock.return_value = states.POWER_OFF
@@ -209,6 +210,27 @@ class NodePowerActionTestCase(db_base.DbTestCase):
self.assertIsNone(node['target_power_state'])
self.assertIsNone(node['last_error'])
+ @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
+ def test_node_power_action_keep_last_error(self, get_power_mock):
+ """Test node_power_action to keep last_error for failed states."""
+ node = obj_utils.create_test_node(self.context,
+ uuid=uuidutils.generate_uuid(),
+ driver='fake-hardware',
+ power_state=states.POWER_OFF,
+ provision_state=states.CLEANFAIL,
+ last_error='failed before')
+ task = task_manager.TaskManager(self.context, node.uuid)
+
+ get_power_mock.return_value = states.POWER_OFF
+
+ conductor_utils.node_power_action(task, states.POWER_ON)
+
+ node.refresh()
+ get_power_mock.assert_called_once_with(mock.ANY, mock.ANY)
+ self.assertEqual(states.POWER_ON, node['power_state'])
+ self.assertIsNone(node['target_power_state'])
+ self.assertEqual('failed before', node['last_error'])
+
@mock.patch('ironic.objects.node.NodeSetPowerStateNotification',
autospec=True)
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
@@ -282,6 +304,31 @@ class NodePowerActionTestCase(db_base.DbTestCase):
node['driver_internal_info'])
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
+ def test_node_power_action_power_off_already(self, get_power_mock):
+ """Test node_power_action to turn node power off, but already off."""
+ dii = {'agent_secret_token': 'token',
+ 'agent_cached_deploy_steps': ['steps']}
+ node = obj_utils.create_test_node(self.context,
+ uuid=uuidutils.generate_uuid(),
+ driver='fake-hardware',
+ power_state=states.POWER_ON,
+ driver_internal_info=dii)
+ task = task_manager.TaskManager(self.context, node.uuid)
+
+ get_power_mock.return_value = states.POWER_OFF
+
+ conductor_utils.node_power_action(task, states.POWER_OFF)
+
+ node.refresh()
+ get_power_mock.assert_called_once_with(mock.ANY, mock.ANY)
+ self.assertEqual(states.POWER_OFF, node['power_state'])
+ self.assertIsNone(node['target_power_state'])
+ self.assertIsNone(node['last_error'])
+ self.assertNotIn('agent_secret_token', node['driver_internal_info'])
+ self.assertNotIn('agent_cached_deploy_steps',
+ node['driver_internal_info'])
+
+ @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
def test_node_power_action_power_off_pregenerated_token(self,
get_power_mock):
dii = {'agent_secret_token': 'token',
@@ -1150,6 +1197,9 @@ class ErrorHandlersTestCase(db_base.DbTestCase):
self.node.set_driver_internal_info('skip_current_clean_step', True)
self.node.set_driver_internal_info('clean_step_index', 0)
self.node.set_driver_internal_info('agent_url', 'url')
+ self.node.set_driver_internal_info('agent_secret_token', 'foo')
+ self.node.set_driver_internal_info('agent_secret_token_pregenerated',
+ False)
msg = 'error bar'
last_error = "last error"
@@ -1162,6 +1212,9 @@ class ErrorHandlersTestCase(db_base.DbTestCase):
self.assertNotIn('cleaning_polling', self.node.driver_internal_info)
self.assertNotIn('skip_current_clean_step',
self.node.driver_internal_info)
+ self.assertNotIn('agent_secret_token', self.node.driver_internal_info)
+ self.assertNotIn('agent_secret_token_pregenerated',
+ self.node.driver_internal_info)
self.assertEqual(last_error, self.node.last_error)
self.assertTrue(self.node.maintenance)
self.assertEqual(last_error, self.node.maintenance_reason)
diff --git a/ironic/tests/unit/db/test_api.py b/ironic/tests/unit/db/test_api.py
index 6142fdfae..2396b1253 100644
--- a/ironic/tests/unit/db/test_api.py
+++ b/ironic/tests/unit/db/test_api.py
@@ -226,6 +226,11 @@ class UpdateToLatestVersionsTestCase(base.DbTestCase):
for i in range(0, num_nodes):
node = utils.create_test_node(version=version,
uuid=uuidutils.generate_uuid())
+ # Create entries on the tables so we force field upgrades
+ utils.create_test_node_trait(node_id=node.id, trait='foo',
+ version='0.0')
+ utils.create_test_bios_setting(node_id=node.id, version='1.0')
+
nodes.append(node.uuid)
for uuid in nodes:
node = self.dbapi.get_node_by_uuid(uuid)
@@ -238,10 +243,15 @@ class UpdateToLatestVersionsTestCase(base.DbTestCase):
return
nodes = self._create_nodes(5)
+ # Check/migrate 2, 10 remain.
+ self.assertEqual(
+ (10, 2), self.dbapi.update_to_latest_versions(self.context, 2))
+ # Check/migrate 10, 8 migrated, 8 remain.
self.assertEqual(
- (5, 2), self.dbapi.update_to_latest_versions(self.context, 2))
+ (8, 8), self.dbapi.update_to_latest_versions(self.context, 10))
+ # Just make sure it is still 0, 0 in case more things are added.
self.assertEqual(
- (3, 3), self.dbapi.update_to_latest_versions(self.context, 10))
+ (0, 0), self.dbapi.update_to_latest_versions(self.context, 10))
for uuid in nodes:
node = self.dbapi.get_node_by_uuid(uuid)
self.assertEqual(self.node_ver, node.version)
@@ -250,10 +260,19 @@ class UpdateToLatestVersionsTestCase(base.DbTestCase):
if self.node_version_same:
# can't test if we don't have diff versions of the node
return
-
- nodes = self._create_nodes(5)
+ vm_count = 5
+ nodes = self._create_nodes(vm_count)
+ # NOTE(TheJulia): Under current testing, 5 node will result in 10
+ # records implicitly needing to be migrated.
+ migrate_count = vm_count * 2
+ self.assertEqual(
+ (migrate_count, migrate_count),
+ self.dbapi.update_to_latest_versions(self.context,
+ migrate_count))
self.assertEqual(
- (5, 5), self.dbapi.update_to_latest_versions(self.context, 5))
+ (0, 0), self.dbapi.update_to_latest_versions(self.context,
+ migrate_count))
+
for uuid in nodes:
node = self.dbapi.get_node_by_uuid(uuid)
self.assertEqual(self.node_ver, node.version)
diff --git a/ironic/tests/unit/drivers/modules/test_deploy_utils.py b/ironic/tests/unit/drivers/modules/test_deploy_utils.py
index 1177e9743..7220697cb 100644
--- a/ironic/tests/unit/drivers/modules/test_deploy_utils.py
+++ b/ironic/tests/unit/drivers/modules/test_deploy_utils.py
@@ -1940,7 +1940,7 @@ class TestBuildInstanceInfoForHttpProvisioning(db_base.DbTestCase):
self.node.save()
self.checksum_mock = self.useFixture(fixtures.MockPatchObject(
- fileutils, 'compute_file_checksum')).mock
+ fileutils, 'compute_file_checksum', autospec=True)).mock
self.checksum_mock.return_value = 'fake-checksum'
self.cache_image_mock = self.useFixture(fixtures.MockPatchObject(
utils, 'cache_instance_image', autospec=True)).mock
@@ -2012,9 +2012,25 @@ class TestBuildInstanceInfoForHttpProvisioning(db_base.DbTestCase):
image_info=self.image_info, expect_raw=True)
self.assertIsNone(instance_info['image_checksum'])
+ self.assertEqual(instance_info['image_os_hash_algo'], 'sha512')
+ self.assertEqual(instance_info['image_os_hash_value'],
+ 'fake-checksum')
self.assertEqual(instance_info['image_disk_format'], 'raw')
- calls = [mock.call(image_path, algorithm='sha512')]
- self.checksum_mock.assert_has_calls(calls)
+ self.checksum_mock.assert_called_once_with(image_path,
+ algorithm='sha512')
+
+ def test_build_instance_info_already_raw(self):
+ cfg.CONF.set_override('force_raw_images', True)
+ self.image_info['disk_format'] = 'raw'
+ image_path, instance_info = self._test_build_instance_info(
+ image_info=self.image_info, expect_raw=True)
+
+ self.assertEqual(instance_info['image_checksum'], 'aa')
+ self.assertEqual(instance_info['image_os_hash_algo'], 'sha512')
+ self.assertEqual(instance_info['image_os_hash_value'],
+ 'fake-sha512')
+ self.assertEqual(instance_info['image_disk_format'], 'raw')
+ self.checksum_mock.assert_not_called()
def test_build_instance_info_force_raw_drops_md5(self):
cfg.CONF.set_override('force_raw_images', True)
@@ -2027,6 +2043,17 @@ class TestBuildInstanceInfoForHttpProvisioning(db_base.DbTestCase):
calls = [mock.call(image_path, algorithm='sha256')]
self.checksum_mock.assert_has_calls(calls)
+ def test_build_instance_info_already_raw_keeps_md5(self):
+ cfg.CONF.set_override('force_raw_images', True)
+ self.image_info['os_hash_algo'] = 'md5'
+ self.image_info['disk_format'] = 'raw'
+ image_path, instance_info = self._test_build_instance_info(
+ image_info=self.image_info, expect_raw=True)
+
+ self.assertEqual(instance_info['image_checksum'], 'aa')
+ self.assertEqual(instance_info['image_disk_format'], 'raw')
+ self.checksum_mock.assert_not_called()
+
@mock.patch.object(image_service.HttpImageService, 'validate_href',
autospec=True)
def test_build_instance_info_file_image(self, validate_href_mock):
@@ -2035,7 +2062,6 @@ class TestBuildInstanceInfoForHttpProvisioning(db_base.DbTestCase):
i_info['image_source'] = 'file://image-ref'
i_info['image_checksum'] = 'aa'
i_info['root_gb'] = 10
- i_info['image_checksum'] = 'aa'
driver_internal_info['is_whole_disk_image'] = True
self.node.instance_info = i_info
self.node.driver_internal_info = driver_internal_info
@@ -2052,6 +2078,7 @@ class TestBuildInstanceInfoForHttpProvisioning(db_base.DbTestCase):
self.assertEqual(expected_url, info['image_url'])
self.assertEqual('sha256', info['image_os_hash_algo'])
self.assertEqual('fake-checksum', info['image_os_hash_value'])
+ self.assertEqual('raw', info['image_disk_format'])
self.cache_image_mock.assert_called_once_with(
task.context, task.node, force_raw=True)
self.checksum_mock.assert_called_once_with(
@@ -2068,7 +2095,6 @@ class TestBuildInstanceInfoForHttpProvisioning(db_base.DbTestCase):
i_info['image_source'] = 'http://image-ref'
i_info['image_checksum'] = 'aa'
i_info['root_gb'] = 10
- i_info['image_checksum'] = 'aa'
driver_internal_info['is_whole_disk_image'] = True
self.node.instance_info = i_info
self.node.driver_internal_info = driver_internal_info
@@ -2102,7 +2128,6 @@ class TestBuildInstanceInfoForHttpProvisioning(db_base.DbTestCase):
i_info['image_source'] = 'http://image-ref'
i_info['image_checksum'] = 'aa'
i_info['root_gb'] = 10
- i_info['image_checksum'] = 'aa'
i_info['image_download_source'] = 'local'
driver_internal_info['is_whole_disk_image'] = True
self.node.instance_info = i_info
@@ -2138,7 +2163,6 @@ class TestBuildInstanceInfoForHttpProvisioning(db_base.DbTestCase):
i_info['image_source'] = 'http://image-ref'
i_info['image_checksum'] = 'aa'
i_info['root_gb'] = 10
- i_info['image_checksum'] = 'aa'
d_info['image_download_source'] = 'local'
driver_internal_info['is_whole_disk_image'] = True
self.node.instance_info = i_info
@@ -2164,6 +2188,41 @@ class TestBuildInstanceInfoForHttpProvisioning(db_base.DbTestCase):
validate_href_mock.assert_called_once_with(
mock.ANY, expected_url, False)
+ @mock.patch.object(image_service.HttpImageService, 'validate_href',
+ autospec=True)
+ def test_build_instance_info_local_image_already_raw(self,
+ validate_href_mock):
+ cfg.CONF.set_override('image_download_source', 'local', group='agent')
+ i_info = self.node.instance_info
+ driver_internal_info = self.node.driver_internal_info
+ i_info['image_source'] = 'http://image-ref'
+ i_info['image_checksum'] = 'aa'
+ i_info['root_gb'] = 10
+ i_info['image_disk_format'] = 'raw'
+ driver_internal_info['is_whole_disk_image'] = True
+ self.node.instance_info = i_info
+ self.node.driver_internal_info = driver_internal_info
+ self.node.save()
+
+ expected_url = (
+ 'http://172.172.24.10:8080/agent_images/%s' % self.node.uuid)
+
+ with task_manager.acquire(
+ self.context, self.node.uuid, shared=False) as task:
+
+ info = utils.build_instance_info_for_deploy(task)
+
+ self.assertEqual(expected_url, info['image_url'])
+ self.assertEqual('aa', info['image_checksum'])
+ self.assertEqual('raw', info['image_disk_format'])
+ self.assertIsNone(info['image_os_hash_algo'])
+ self.assertIsNone(info['image_os_hash_value'])
+ self.cache_image_mock.assert_called_once_with(
+ task.context, task.node, force_raw=True)
+ self.checksum_mock.assert_not_called()
+ validate_href_mock.assert_called_once_with(
+ mock.ANY, expected_url, False)
+
class TestStorageInterfaceUtils(db_base.DbTestCase):
def setUp(self):
diff --git a/releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml b/releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml
new file mode 100644
index 000000000..270278f1b
--- /dev/null
+++ b/releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml
@@ -0,0 +1,8 @@
+---
+fixes:
+ - |
+ When aborting cleaning, the ``last_error`` field is no longer initially
+ empty. It is now populated on the state transition to ``clean failed``.
+ - |
+ When cleaning or deployment fails, the ``last_error`` field is no longer
+ temporary set to ``None`` while the power off action is running.
diff --git a/releasenotes/notes/cross-link-1ffd1a4958f14fd7.yaml b/releasenotes/notes/cross-link-1ffd1a4958f14fd7.yaml
new file mode 100644
index 000000000..caac13dd4
--- /dev/null
+++ b/releasenotes/notes/cross-link-1ffd1a4958f14fd7.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+ - |
+ Fixes ``Invalid cross-device link`` in some cases when using ``file://``
+ image URLs.
diff --git a/releasenotes/notes/fix-online-version-migration-db432a7b239647fa.yaml b/releasenotes/notes/fix-online-version-migration-db432a7b239647fa.yaml
new file mode 100644
index 000000000..824185aab
--- /dev/null
+++ b/releasenotes/notes/fix-online-version-migration-db432a7b239647fa.yaml
@@ -0,0 +1,14 @@
+---
+fixes:
+ - |
+ Fixes an issue in the online upgrade logic where database models for
+ Node Traits and BIOS Settings resulted in an error when performing
+ the online data migration. This was because these tables were originally
+ created as extensions of the Nodes database table, and the schema
+ of the database was slightly different enough to result in an error
+ if there was data to migrate in these tables upon upgrade,
+ which would have occured if an early BIOS Setting adopter had
+ data in the database prior to upgrading to the Yoga release of Ironic.
+
+ The online upgrade parameter now subsitutes an alternate primary key name
+ name when applicable.
diff --git a/releasenotes/notes/fix-power-off-token-wipe-e7d605997f00d39d.yaml b/releasenotes/notes/fix-power-off-token-wipe-e7d605997f00d39d.yaml
new file mode 100644
index 000000000..14a489b46
--- /dev/null
+++ b/releasenotes/notes/fix-power-off-token-wipe-e7d605997f00d39d.yaml
@@ -0,0 +1,6 @@
+---
+fixes:
+ - |
+ Fixes an issue where an agent token could be inadvertently orphaned
+ if a node is already in the target power state when we attempt to turn
+ the node off.
diff --git a/releasenotes/notes/no-recalculate-653e524fd6160e72.yaml b/releasenotes/notes/no-recalculate-653e524fd6160e72.yaml
new file mode 100644
index 000000000..3d2e6dad4
--- /dev/null
+++ b/releasenotes/notes/no-recalculate-653e524fd6160e72.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+ - |
+ No longer re-calculates checksums for images that are already raw.
+ Previously, it would cause significant delays in deploying raw images.
diff --git a/releasenotes/notes/wipe-agent-token-upon-cleaning-timeout-c9add514fad1b02c.yaml b/releasenotes/notes/wipe-agent-token-upon-cleaning-timeout-c9add514fad1b02c.yaml
new file mode 100644
index 000000000..0aa828ccd
--- /dev/null
+++ b/releasenotes/notes/wipe-agent-token-upon-cleaning-timeout-c9add514fad1b02c.yaml
@@ -0,0 +1,7 @@
+---
+fixes:
+ - |
+ Fixes an issue where an agent token was being orphaned if a baremetal node
+ timed out during cleaning operations, leading to issues where the node
+ would not be able to establish a new token with Ironic upon future
+ in some cases. We now always wipe the token in this case.