diff options
author | Thomas Haller <thaller@redhat.com> | 2019-08-21 14:30:34 +0200 |
---|---|---|
committer | Thomas Haller <thaller@redhat.com> | 2019-08-28 16:27:00 +0200 |
commit | cca0c2b56a4c637d8fc222353126b6f46187ba53 (patch) | |
tree | 79f326a67fdcc1b2bb6022780e3538fcfa111691 /src | |
parent | c3d41fa4529ec9c9e266d117cc5b8ec4e2d917b8 (diff) | |
download | NetworkManager-cca0c2b56a4c637d8fc222353126b6f46187ba53.tar.gz |
device: move SR-IOV initialization to activate_stage1_device_prepare()
Note that all subclasses of NMDevice that implement act_stage1_prepare(), call
the parents implementation as very first thing.
Previously, NMDevice's act_stage1_prepare() was setting up SR-IOV. But that is
problemantic. Note that it may have returned NM_ACT_STAGE_RETURN_POSTPONE, in which
case subclasses would just skip their action and return to the caller. Later,
sriov_params_cb() would directly call nm_device_activate_schedule_stage2_device_config(),
and thus act_stage1_prepare() was never executed for the subclass. That
is wrong.
First, I don't think it is good to let subclasses decide whether to call a
parents implementation (and when). It just causes ambiguity. In
the best case we do it always in the same order, in the worst case we
call the parent at the wrong time or never. Instead, we want to initialize
SR-IOV always and early in stage1, so we should just do it directly from
activate_stage1_device_prepare(). Now NMDevice's act_stage1_prepare() does
nothing.
The bigger problem is that when a device wants to resume a stage that
it previously postponed, that it would schedule the next stage!
Instead, it should schedule the same stage again instead. That allows
to postpone the completion of a stage for multiple reasons, and each
call to a certain stage merely notifies that something changed and
we need to check whether we can now complete the stage.
For that to work, stages must become re-entrant. That means we need to
remember whether an action that we care about needs to be started, is pending
or already completed.
Compare this to nm_device_activate_schedule_stage3_ip_config_start(),
which checks whether firewall is configured. That is likewise the wrong
approach. Callers that were in stage2 and postponed stage2, and later would
schedule stage3 when they are ready.
Then nm_device_activate_schedule_stage3_ip_config_start() would check whether
firewall is also ready, and do nothing if that's not the case (relying
that when the firewall code completes to call
nm_device_activate_schedule_stage3_ip_config_start().
Diffstat (limited to 'src')
-rw-r--r-- | src/devices/nm-device-private.h | 6 | ||||
-rw-r--r-- | src/devices/nm-device.c | 226 |
2 files changed, 149 insertions, 83 deletions
diff --git a/src/devices/nm-device-private.h b/src/devices/nm-device-private.h index 0a414af96f..8b2cc47443 100644 --- a/src/devices/nm-device-private.h +++ b/src/devices/nm-device-private.h @@ -26,6 +26,12 @@ /* This file should only be used by subclasses of NMDevice */ typedef enum { + NM_DEVICE_STAGE_STATE_INIT = 0, + NM_DEVICE_STAGE_STATE_PENDING = 1, + NM_DEVICE_STAGE_STATE_COMPLETED = 2, +} NMDeviceStageState; + +typedef enum { NM_DEVICE_IP_STATE_NONE, NM_DEVICE_IP_STATE_WAIT, NM_DEVICE_IP_STATE_CONF, diff --git a/src/devices/nm-device.c b/src/devices/nm-device.c index bca194c79b..cbaeb81a88 100644 --- a/src/devices/nm-device.c +++ b/src/devices/nm-device.c @@ -413,6 +413,8 @@ typedef struct _NMDevicePrivate { bool concheck_rp_filter_checked:1; + NMDeviceStageState stage1_sriov_state:3; + /* Generic DHCP stuff */ char * dhcp_anycast_address; @@ -680,6 +682,8 @@ static void (*const activate_stage4_ip_config_timeout_x[2]) (NMDevice *self) = { activate_stage4_ip_config_timeout_4, }; +static void sriov_op_cb (GError *error, gpointer user_data); + static void activate_stage5_ip_config_result_4 (NMDevice *self); static void activate_stage5_ip_config_result_6 (NMDevice *self); @@ -4243,7 +4247,7 @@ nm_device_update_from_platform_link (NMDevice *self, const NMPlatformLink *plink } } -static void sriov_op_cb (GError *error, gpointer user_data); +/*****************************************************************************/ static void sriov_op_start (NMDevice *self, SriovOp *op) @@ -4276,11 +4280,14 @@ sriov_op_cb (GError *error, gpointer user_data) priv->sriov.pending = NULL; + g_clear_object (&op->cancellable); + if (op->callback) op->callback (error, op->callback_data); - g_clear_object (&op->cancellable); - g_slice_free (SriovOp, op); + nm_assert (!priv->sriov.pending); + + nm_g_slice_free (op); if (priv->sriov.next) { sriov_op_start (self, @@ -4289,41 +4296,82 @@ sriov_op_cb (GError *error, gpointer user_data) } static void -sriov_op_queue (NMDevice *self, - guint num_vfs, - NMTernary autoprobe, - NMPlatformAsyncCallback callback, - gpointer callback_data) +sriov_op_queue_op (NMDevice *self, + SriovOp *op) { NMDevicePrivate *priv = NM_DEVICE_GET_PRIVATE (self); - GError *error = NULL; - SriovOp *op; - - op = g_slice_new0 (SriovOp); - op->num_vfs = num_vfs; - op->autoprobe = autoprobe; - op->callback = callback; - op->callback_data = callback_data; if (priv->sriov.next) { + SriovOp *op_next = g_steal_pointer (&priv->sriov.next); + /* Cancel the next operation immediately */ - if (priv->sriov.next->callback) { + if (op_next->callback) { + gs_free_error GError *error = NULL; + nm_utils_error_set_cancelled (&error, FALSE, NULL); - priv->sriov.next->callback (error, priv->sriov.next->callback_data); - g_clear_error (&error); + op_next->callback (error, op_next->callback_data); } - g_slice_free (SriovOp, priv->sriov.next); - priv->sriov.next = NULL; - } - if (priv->sriov.pending) { + nm_g_slice_free (op_next); + + if (!priv->sriov.pending) { + /* This (having "next" set but "pending" not) can only happen if we are + * called from inside the callback again. + * + * That means we append the new request as "next" and return. Once + * the callback returns, it will schedule the request. */ + priv->sriov.next = op; + return; + } + } else if (priv->sriov.pending) { priv->sriov.next = op; g_cancellable_cancel (priv->sriov.pending->cancellable); - } else + return; + } + + if (op) sriov_op_start (self, op); } static void +sriov_op_queue (NMDevice *self, + guint num_vfs, + NMTernary autoprobe, + NMPlatformAsyncCallback callback, + gpointer callback_data) +{ + SriovOp *op; + + /* We usually never want to cancel an async write operation, unless it's superseded + * by a newer operation (that resets the state). That is, because we need to ensure + * that we never end up doing two concurrent writes (since we write on a background + * thread, that would be unordered/racy). + * Of course, since we queue requests only per-device, when devices get renamed we + * might end up writing the same sysctl concurrently still. But that's really + * unlikely, and don't rename after udev completes! + * + * The "next" operation is not yet even started. It can be replaced/canceled right away + * when a newer request comes. + * The "pending" operation is currently ongoing, and we may cancel it if + * we have a follow-up operation (queued in "next"). Unless we have a such + * a newer request, we cannot cancel it! + * + * FIXME(shutdown): However, during shutdown we don't have a follow-up write request to cancel + * this operation and we have to give it at least some time to complete. The solution is that + * we register a way to abort the last call during shutdown, and after NM_SHUTDOWN_TIMEOUT_MS + * grace period we pull the plug and cancel it. */ + + op = g_slice_new (SriovOp); + *op = (SriovOp) { + .num_vfs = num_vfs, + .autoprobe = autoprobe, + .callback = callback, + .callback_data = callback_data, + }; + sriov_op_queue_op (self, op); +} + +static void device_init_static_sriov_num_vfs (NMDevice *self) { NMDevicePrivate *priv = NM_DEVICE_GET_PRIVATE (self); @@ -6396,63 +6444,14 @@ sriov_params_cb (GError *error, gpointer data) return; } - nm_device_activate_schedule_stage2_device_config (self); + priv->stage1_sriov_state = NM_DEVICE_STAGE_STATE_COMPLETED; + + nm_device_activate_schedule_stage1_device_prepare (self); } static NMActStageReturn act_stage1_prepare (NMDevice *self, NMDeviceStateReason *out_failure_reason) { - NMDevicePrivate *priv = NM_DEVICE_GET_PRIVATE (self); - NMSettingSriov *s_sriov; - guint i, num; - - if ( priv->ifindex > 0 - && nm_device_has_capability (self, NM_DEVICE_CAP_SRIOV) - && (s_sriov = nm_device_get_applied_setting (self, NM_TYPE_SETTING_SRIOV))) { - nm_auto_freev NMPlatformVF **plat_vfs = NULL; - gs_free_error GError *error = NULL; - NMSriovVF *vf; - NMTernary autoprobe; - gpointer *data; - - autoprobe = nm_setting_sriov_get_autoprobe_drivers (s_sriov); - if (autoprobe == NM_TERNARY_DEFAULT) { - autoprobe = nm_config_data_get_connection_default_int64 (NM_CONFIG_GET_DATA, - NM_CON_DEFAULT ("sriov.autoprobe-drivers"), - self, - NM_TERNARY_FALSE, - NM_TERNARY_TRUE, - NM_TERNARY_TRUE); - } - - num = nm_setting_sriov_get_num_vfs (s_sriov); - plat_vfs = g_new0 (NMPlatformVF *, num + 1); - for (i = 0; i < num; i++) { - vf = nm_setting_sriov_get_vf (s_sriov, i); - plat_vfs[i] = sriov_vf_config_to_platform (self, vf, &error); - if (!plat_vfs[i]) { - _LOGE (LOGD_DEVICE, - "failed to apply SR-IOV VF '%s': %s", - nm_utils_sriov_vf_to_str (vf, FALSE, NULL), - error->message); - NM_SET_OUT (out_failure_reason, NM_DEVICE_STATE_REASON_SRIOV_CONFIGURATION_FAILED); - return NM_ACT_STAGE_RETURN_FAILURE; - } - } - - /* When changing the number of VFs the kernel can block - * for very long time in the write to sysfs, especially - * if autoprobe-drivers is enabled. Do it asynchronously - * to avoid blocking the entire NM process. - */ - data = nm_utils_user_data_pack (self, g_steal_pointer (&plat_vfs)); - sriov_op_queue (self, - nm_setting_sriov_get_total_vfs (s_sriov), - autoprobe, - sriov_params_cb, - data); - return NM_ACT_STAGE_RETURN_POSTPONE; - } return NM_ACT_STAGE_RETURN_SUCCESS; } @@ -6481,18 +6480,77 @@ activate_stage1_device_prepare (NMDevice *self) nm_device_state_changed (self, NM_DEVICE_STATE_PREPARE, NM_DEVICE_STATE_REASON_NONE); + if (priv->stage1_sriov_state != NM_DEVICE_STAGE_STATE_COMPLETED) { + NMSettingSriov *s_sriov; + + if (nm_device_sys_iface_state_is_external_or_assume (self)) { + /* pass */ + } else if (priv->stage1_sriov_state == NM_DEVICE_STAGE_STATE_PENDING) + return; + else if ( priv->ifindex > 0 + && nm_device_has_capability (self, NM_DEVICE_CAP_SRIOV) + && (s_sriov = nm_device_get_applied_setting (self, NM_TYPE_SETTING_SRIOV))) { + nm_auto_freev NMPlatformVF **plat_vfs = NULL; + gs_free_error GError *error = NULL; + NMSriovVF *vf; + NMTernary autoprobe; + guint i, num; + + autoprobe = nm_setting_sriov_get_autoprobe_drivers (s_sriov); + if (autoprobe == NM_TERNARY_DEFAULT) { + autoprobe = nm_config_data_get_connection_default_int64 (NM_CONFIG_GET_DATA, + NM_CON_DEFAULT ("sriov.autoprobe-drivers"), + self, + NM_TERNARY_FALSE, + NM_TERNARY_TRUE, + NM_TERNARY_TRUE); + } + + num = nm_setting_sriov_get_num_vfs (s_sriov); + plat_vfs = g_new0 (NMPlatformVF *, num + 1); + for (i = 0; i < num; i++) { + vf = nm_setting_sriov_get_vf (s_sriov, i); + plat_vfs[i] = sriov_vf_config_to_platform (self, vf, &error); + if (!plat_vfs[i]) { + _LOGE (LOGD_DEVICE, + "failed to apply SR-IOV VF '%s': %s", + nm_utils_sriov_vf_to_str (vf, FALSE, NULL), + error->message); + nm_device_state_changed (self, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_SRIOV_CONFIGURATION_FAILED); + return; + } + } + + /* When changing the number of VFs the kernel can block + * for very long time in the write to sysfs, especially + * if autoprobe-drivers is enabled. Do it asynchronously + * to avoid blocking the entire NM process. + */ + sriov_op_queue (self, + nm_setting_sriov_get_total_vfs (s_sriov), + autoprobe, + sriov_params_cb, + nm_utils_user_data_pack (self, + g_steal_pointer (&plat_vfs))); + priv->stage1_sriov_state = NM_DEVICE_STAGE_STATE_PENDING; + return; + } + priv->stage1_sriov_state = NM_DEVICE_STAGE_STATE_COMPLETED; + } + /* Assumed connections were already set up outside NetworkManager */ if (!nm_device_sys_iface_state_is_external_or_assume (self)) { NMDeviceStateReason failure_reason = NM_DEVICE_STATE_REASON_NONE; ret = NM_DEVICE_GET_CLASS (self)->act_stage1_prepare (self, &failure_reason); - if (ret == NM_ACT_STAGE_RETURN_POSTPONE) { - return; - } else if (ret == NM_ACT_STAGE_RETURN_FAILURE) { + if (ret == NM_ACT_STAGE_RETURN_FAILURE) { nm_device_state_changed (self, NM_DEVICE_STATE_FAILED, failure_reason); return; } - g_return_if_fail (ret == NM_ACT_STAGE_RETURN_SUCCESS); + if (ret == NM_ACT_STAGE_RETURN_POSTPONE) + return; + + nm_assert (ret == NM_ACT_STAGE_RETURN_SUCCESS); } nm_device_activate_schedule_stage2_device_config (self); @@ -9911,7 +9969,7 @@ _ip6_privacy_get (NMDevice *self) return ip6_privacy; if (!nm_device_get_ip_ifindex (self)) - return NM_SETTING_IP6_CONFIG_PRIVACY_UNKNOWN;; + return NM_SETTING_IP6_CONFIG_PRIVACY_UNKNOWN; /* 3.) No valid default-value configured. Fallback to reading sysctl. * @@ -14572,6 +14630,8 @@ _cleanup_generic_pre (NMDevice *self, CleanupType cleanup_type) _cancel_activation (self); + priv->stage1_sriov_state = NM_DEVICE_STAGE_STATE_INIT; + if (cleanup_type != CLEANUP_TYPE_KEEP) { nm_manager_device_route_metric_clear (nm_manager_get (), nm_device_get_ip_ifindex (self)); @@ -15010,9 +15070,9 @@ deactivate_ready (NMDevice *self, NMDeviceStateReason reason) if (priv->dispatcher.call_id) return; - if (priv->sriov.pending) + if ( priv->sriov.pending + || priv->sriov.next) return; - nm_assert (!priv->sriov.next); nm_device_queue_state (self, NM_DEVICE_STATE_DISCONNECTED, reason); } |