summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Booth <mbooth@redhat.com>2019-05-13 16:04:39 +0100
committerMatthew Booth <mbooth@redhat.com>2019-05-20 11:57:50 +0100
commitf9c2503609bfad0b871d9e87227aa2bb5c137467 (patch)
treef0898173c70991151741110da00c270b66fb804c
parent01141392db549728e78865b6a56468d17102bee0 (diff)
downloadnova-f9c2503609bfad0b871d9e87227aa2bb5c137467.tar.gz
Fix retry of instance_update_and_get_original
_instance_update modifies its 'values' argument. Consequently if it is retried due to an update conflict, the second invocation has the wrong arguments. A specific issue this causes is that if we called it with expected_task_state a concurrent modification to task_state will cause us to fail and retry. However, expected_task_state will have been popped from values on the first invocation and will not be present for the second. Consequently the second invocation will fail to perform the task_state check and therefore succeed, resulting in a race. We rewrite the old race unit test which wasn't testing the correct thing for 2 reasons: 1. Due to the bug fixed in this patch, although we were calling update_on_match() twice, the second call didn't check the task state. 2. side_effect=iterable returns function items without executing them, but we weren't hitting this due to the bug fixed in this patch. Closes-Bug: #1821373 Change-Id: I01c63e685113bf30e687ccb14a4d18e344b306f6 (cherry picked from commit aae5c7aa3819ad9161fd2effed3872d540099230) (cherry picked from commit 61fef49b1555c6509398fc47c21319c1b5e50505) (cherry picked from commit 4540cd6ef9731837cf4ed1e56850bcfa3512ae1c)
-rw-r--r--nova/db/sqlalchemy/api.py21
-rw-r--r--nova/tests/unit/db/test_db_api.py58
2 files changed, 57 insertions, 22 deletions
diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py
index fbb3e8d19a..e3ffc10721 100644
--- a/nova/db/sqlalchemy/api.py
+++ b/nova/db/sqlalchemy/api.py
@@ -2775,6 +2775,11 @@ def _instance_update(context, instance_uuid, values, expected, original=None):
if not uuidutils.is_uuid_like(instance_uuid):
raise exception.InvalidUUID(instance_uuid)
+ # NOTE(mdbooth): We pop values from this dict below, so we copy it here to
+ # ensure there are no side effects for the caller or if we retry the
+ # function due to a db conflict.
+ updates = copy.copy(values)
+
if expected is None:
expected = {}
else:
@@ -2786,8 +2791,8 @@ def _instance_update(context, instance_uuid, values, expected, original=None):
# updates
for field in ('task_state', 'vm_state'):
expected_field = 'expected_%s' % field
- if expected_field in values:
- value = values.pop(expected_field, None)
+ if expected_field in updates:
+ value = updates.pop(expected_field, None)
# Coerce all single values to singleton lists
if value is None:
expected[field] = [None]
@@ -2795,23 +2800,23 @@ def _instance_update(context, instance_uuid, values, expected, original=None):
expected[field] = sqlalchemyutils.to_list(value)
# Values which need to be updated separately
- metadata = values.pop('metadata', None)
- system_metadata = values.pop('system_metadata', None)
+ metadata = updates.pop('metadata', None)
+ system_metadata = updates.pop('system_metadata', None)
- _handle_objects_related_type_conversions(values)
+ _handle_objects_related_type_conversions(updates)
# Hostname is potentially unique, but this is enforced in code rather
# than the DB. The query below races, but the number of users of
# osapi_compute_unique_server_name_scope is small, and a robust fix
# will be complex. This is intentionally left as is for the moment.
- if 'hostname' in values:
- _validate_unique_server_name(context, values['hostname'])
+ if 'hostname' in updates:
+ _validate_unique_server_name(context, updates['hostname'])
compare = models.Instance(uuid=instance_uuid, **expected)
try:
instance_ref = model_query(context, models.Instance,
project_only=True).\
- update_on_match(compare, 'uuid', values)
+ update_on_match(compare, 'uuid', updates)
except update_match.NoRowsMatched:
# Update failed. Try to find why and raise a specific error.
diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py
index d1d96d7a0d..ffd3ff0526 100644
--- a/nova/tests/unit/db/test_db_api.py
+++ b/nova/tests/unit/db/test_db_api.py
@@ -44,6 +44,7 @@ from sqlalchemy import inspect
from sqlalchemy import Integer
from sqlalchemy import MetaData
from sqlalchemy.orm import query
+from sqlalchemy.orm import session as sqla_session
from sqlalchemy import sql
from sqlalchemy import Table
@@ -3004,21 +3005,50 @@ class InstanceTestCase(test.TestCase, ModelsObjectComparatorMixin):
test(self.ctxt)
def test_instance_update_and_get_original_conflict_race(self):
- # Ensure that we retry if update_on_match fails for no discernable
- # reason
- instance = self.create_instance_with_args()
-
- orig_update_on_match = update_match.update_on_match
+ # Ensure that we correctly process expected_task_state when retrying
+ # due to an unknown conflict
+
+ # This requires modelling the MySQL read view, which means that if we
+ # have read something in the current transaction and we read it again,
+ # we will read the same data every time even if another committed
+ # transaction has since altered that data. In this test we have an
+ # instance whose task state was originally None, but has been set to
+ # SHELVING by another, concurrent transaction. Therefore the first time
+ # we read the data we will read None, but when we restart the
+ # transaction we will read the correct data.
- # Reproduce the conditions of a race between fetching and updating the
- # instance by making update_on_match fail for no discernable reason the
- # first time it is called, but work normally the second time.
- with mock.patch.object(update_match, 'update_on_match',
- side_effect=[update_match.NoRowsMatched,
- orig_update_on_match]):
- db.instance_update_and_get_original(
- self.ctxt, instance['uuid'], {'metadata': {'mk1': 'mv3'}})
- self.assertEqual(update_match.update_on_match.call_count, 2)
+ instance = self.create_instance_with_args(
+ task_state=task_states.SHELVING)
+
+ instance_out_of_date = copy.copy(instance)
+ instance_out_of_date['task_state'] = None
+
+ # NOTE(mdbooth): SQLA magic which makes this dirty object look
+ # like a freshly loaded one.
+ sqla_session.make_transient(instance_out_of_date)
+ sqla_session.make_transient_to_detached(instance_out_of_date)
+
+ # update_on_match will fail first time because the actual task state
+ # (SHELVING) doesn't match the expected task state (None). However,
+ # we ensure that the first time we fetch the instance object we get
+ # out-of-date data. This forces us to retry the operation to find out
+ # what really went wrong.
+ with mock.patch.object(sqlalchemy_api, '_instance_get_by_uuid',
+ side_effect=[instance_out_of_date, instance]), \
+ mock.patch.object(sqlalchemy_api, '_instance_update',
+ side_effect=sqlalchemy_api._instance_update):
+ self.assertRaises(exception.UnexpectedTaskStateError,
+ db.instance_update_and_get_original,
+ self.ctxt, instance['uuid'],
+ {'expected_task_state': [None]})
+ sqlalchemy_api._instance_update.assert_has_calls([
+ mock.call(self.ctxt, instance['uuid'],
+ {'expected_task_state': [None]}, None,
+ original=instance_out_of_date),
+ mock.call(self.ctxt, instance['uuid'],
+ {'expected_task_state': [None]}, None,
+ original=instance),
+ ])
def test_instance_update_and_get_original_conflict_race_fallthrough(self):
# Ensure that is update_match continuously fails for no discernable