summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLubomir Rintel <lkundrak@v3.sk>2022-07-29 00:02:20 +0200
committerLubomir Rintel <lkundrak@v3.sk>2022-07-29 00:03:18 +0200
commit1dc8b7f4774827c889718d3b6748efe330a9ec6e (patch)
tree0c27af2e03bb07aaf68d1ce1a1f39662b95939be
parent7864e75e96582d0bac5c56669b2294e8fac473b6 (diff)
downloadNetworkManager-lr/up-carrier-wait.tar.gz
device: wait for carrier even if it wasn't us who brought the device IFF_UPlr/up-carrier-wait
The devices generally need to be IFF_UP and wait a little before the carrier detection is reliable. Some devices, actually need to wait more than a little -- r8169 needs up to 5 seconds. For this reason, we delay startup complete while the carrier is down after we bring the device up. We do this so that we don't reject activations due to carrier down until we're sure it's really down. This works well as long as it's us who brought the device up. If we're restarting the daemon, the device is going to be already up when we start up the daemon for the second time. There's, however, a slim chance that the device was brought down and up very shortly before the restart and therefore the carrier reporting is still not reliable. As a matter of fact, we bring the devices down and back up on some occassions, such as when enslaving to a team device. Therefore, the following events in quick succession cause trouble: # nmcli con up team-slave-eth0 [20099.205355] Generic FE-GE Realtek PHY r8169-0-300:00: attached PHY driver (mii_bus:phy_addr=r8169-0-300:00, irq=MAC) [20099.365641] nm-team: Port device eth0 added [20099.370728] r8169 0000:03:00.0 eth0: Link is Down [20099.436631] nm-team: Port device eth0 removed [20099.463422] Generic FE-GE Realtek PHY r8169-0-300:00: attached PHY driver (mii_bus:phy_addr=r8169-0-300:00, irq=MAC) [20099.628505] r8169 0000:03:00.0 eth0: Link is Down [20099.669425] Generic FE-GE Realtek PHY r8169-0-300:00: attached PHY driver (mii_bus:phy_addr=r8169-0-300:00, irq=MAC) [20099.833457] r8169 0000:03:00.0 eth0: Link is Down [20099.838471] nm-team: Port device eth0 added The device has been brought down, enslaved and brought up. "Link is Down" indicates carrier not being detected. Connection successfully activated (D-Bus active path: /org/freedesktop/NetworkManager/ActiveConnection/7) # systemctl restart NetworkManager Now NM sees the device being up, but carrier down. # nmcli con up testeth0 Error: Connection activation failed: No suitable device found for this connection (...). Activation failed, because eth0 carrier still appears down. # [20102.943464] r8169 0000:03:00.0 eth0: Link is Up - 1Gbps/Full - flow control rx/tx Now it's up, but the party is already over. Shiet. Let's wait whenever the device reaches unavailable state, whether we bring it up at that point or not. Fixes-test: @restart_L2_only_lacp https://bugzilla.redhat.com/show_bug.cgi?id=2092361
-rw-r--r--src/core/devices/nm-device.c59
1 files changed, 37 insertions, 22 deletions
diff --git a/src/core/devices/nm-device.c b/src/core/devices/nm-device.c
index f2de8c9c89..6cc823b2ee 100644
--- a/src/core/devices/nm-device.c
+++ b/src/core/devices/nm-device.c
@@ -13875,10 +13875,38 @@ _get_carrier_wait_ms(NMDevice *self)
CARRIER_WAIT_TIME_MS);
}
+/*
+ * Devices that support carrier detect must be IFF_UP to report carrier
+ * changes; so after setting the device IFF_UP we must suppress startup
+ * complete (via a pending action) until either the carrier turns on, or
+ * a timeout is reached.
+ */
+static void
+carrier_detect_wait(NMDevice *self)
+{
+ NMDevicePrivate *priv = NM_DEVICE_GET_PRIVATE(self);
+ gint64 now_ms, until_ms;
+
+ if (!nm_device_has_capability(self, NM_DEVICE_CAP_CARRIER_DETECT))
+ return;
+
+ /* we start a grace period of 5 seconds during which we will schedule
+ * a pending action whenever we have no carrier.
+ *
+ * If during that time carrier goes away, we declare the interface
+ * as not ready. */
+ nm_clear_g_source(&priv->carrier_wait_id);
+ if (!priv->carrier)
+ nm_device_add_pending_action(self, NM_PENDING_ACTION_CARRIER_WAIT, FALSE);
+
+ now_ms = nm_utils_get_monotonic_timestamp_msec();
+ until_ms = NM_MAX(now_ms + _get_carrier_wait_ms(self), priv->carrier_wait_until_ms);
+ priv->carrier_wait_id = g_timeout_add(until_ms - now_ms, carrier_wait_timeout, self);
+}
+
gboolean
nm_device_bring_up(NMDevice *self, gboolean block, gboolean *no_firmware)
{
- NMDevicePrivate *priv = NM_DEVICE_GET_PRIVATE(self);
gboolean device_is_up = FALSE;
NMDeviceCapabilities capabilities;
int ifindex;
@@ -13934,27 +13962,7 @@ nm_device_bring_up(NMDevice *self, gboolean block, gboolean *no_firmware)
capabilities |= NM_DEVICE_GET_CLASS(self)->get_generic_capabilities(self);
_add_capabilities(self, capabilities);
- /* Devices that support carrier detect must be IFF_UP to report carrier
- * changes; so after setting the device IFF_UP we must suppress startup
- * complete (via a pending action) until either the carrier turns on, or
- * a timeout is reached.
- */
- if (nm_device_has_capability(self, NM_DEVICE_CAP_CARRIER_DETECT)) {
- gint64 now_ms, until_ms;
-
- /* we start a grace period of 5 seconds during which we will schedule
- * a pending action whenever we have no carrier.
- *
- * If during that time carrier goes away, we declare the interface
- * as not ready. */
- nm_clear_g_source(&priv->carrier_wait_id);
- if (!priv->carrier)
- nm_device_add_pending_action(self, NM_PENDING_ACTION_CARRIER_WAIT, FALSE);
-
- now_ms = nm_utils_get_monotonic_timestamp_msec();
- until_ms = NM_MAX(now_ms + _get_carrier_wait_ms(self), priv->carrier_wait_until_ms);
- priv->carrier_wait_id = g_timeout_add(until_ms - now_ms, carrier_wait_timeout, self);
- }
+ carrier_detect_wait(self);
/* Can only get HW address of some devices when they are up */
nm_device_update_hw_address(self);
@@ -15713,6 +15721,13 @@ _set_state_full(NMDevice *self, NMDeviceState state, NMDeviceStateReason reason,
if (!nm_device_bring_up(self, TRUE, &no_firmware) && no_firmware)
_LOGW(LOGD_PLATFORM, "firmware may be missing.");
nm_device_set_firmware_missing(self, no_firmware ? TRUE : FALSE);
+ } else {
+ /* We didn't bring the device up and we have little idea
+ * when was it brought up. Play it safe and assume it could
+ * have been brought up very recently and it might one of
+ * those who take time to detect carrier.
+ */
+ carrier_detect_wait(self);
}
/* Ensure the device gets deactivated in response to stuff like