diff options
author | Thomas Haller <thaller@redhat.com> | 2021-01-08 11:09:03 +0100 |
---|---|---|
committer | Thomas Haller <thaller@redhat.com> | 2021-01-14 10:23:45 +0100 |
commit | cc8706f815468e8771739ac7e30af8c4431245f6 (patch) | |
tree | c92a21692ea4ea3a7713743a0f0ca30abfbfa17e | |
parent | 86107b6a52ab26f236fd411835e137c9c10fe99c (diff) | |
download | NetworkManager-cc8706f815468e8771739ac7e30af8c4431245f6.tar.gz |
wireguard: delay activation while resolving DNS names for WireGuard peers to avoid race
The endpoints of WireGuard peers can be configured as DNS name, which
NetworkManager will resolve.
Since activating a profile might affect now names get resolved, we must
first resolve names before completing the activation of the WireGuard
device (and before reconfiguring DNS accordingly).
For example, if you configure exclusive DNS resolution via the WireGuard
device, and if the peer needs to be resolved via DNS, then resolving the
peer name must happen before the reconfiguration of DNS. Otherwise the
new DNS configuration will be broken due to being unable to reach the
WireGuard peer.
Fix that by waiting.
There is still an unfixed problem. If resolving any peers fails,
activation silently proceeds -- again possibly breaking the network
setup. Of course, NetworkManager will repeatedly try to re-resolve
the name, but that may never succeed if DNS would be resolved via
the VPN itself.
That is different from `wg set` which resolves hostnames and fails.
Consequently `wg-quick up` would also fail. But these are both one shot
applications, they are not around and basically let the user handle the
error (by reading the log and invoking the command again). NetworkManager
can do something different and proceed activation (as it will also
periodically re-resolve the hostnames again). Note that it's also valid
to activate a WireGuard device without any peers (and to modify the
activated device later with Reapply()). As such, having no peers (or
being unable to resolve a hostname) may be a valid configuration.
I think we should add an option/flag that when enabled will cause
the activation to fail of names cannot be resolved.
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/535
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/616
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/721
-rw-r--r-- | src/devices/nm-device-wireguard.c | 109 |
1 files changed, 88 insertions, 21 deletions
diff --git a/src/devices/nm-device-wireguard.c b/src/devices/nm-device-wireguard.c index 0c467dfe73..1b109969ee 100644 --- a/src/devices/nm-device-wireguard.c +++ b/src/devices/nm-device-wireguard.c @@ -118,6 +118,9 @@ typedef struct { CList lst_peers_head; GHashTable *peers; + /* counts the numbers of peers that are currently resolving. */ + guint peers_resolving_cnt; + gint64 resolve_next_try_at; gint64 link_config_last_at; @@ -506,9 +509,52 @@ _peers_find(NMDeviceWireGuardPrivate *priv, NMWireGuardPeer *peer) return g_hash_table_lookup(priv->peers, &peer); } +static guint +_peers_resolving_cnt(NMDeviceWireGuardPrivate *priv) +{ + nm_assert(priv); +#if NM_MORE_ASSERTS > 3 + { + PeerData *peer_data; + guint cnt = 0; + + c_list_for_each_entry (peer_data, &priv->lst_peers_head, lst_peers) { + if (peer_data->ep_resolv.cancellable) + cnt++; + } + nm_assert(cnt == priv->peers_resolving_cnt); + } +#endif + + return priv->peers_resolving_cnt; +} + +static void +_peers_resolving_cnt_decrement(NMDeviceWireGuard *self) +{ + NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self); + + nm_assert(priv); + nm_assert(priv->peers_resolving_cnt > 0); + + priv->peers_resolving_cnt--; + + nm_assert(_peers_resolving_cnt(priv) == priv->peers_resolving_cnt); + + if (priv->peers_resolving_cnt == 0) { + if (nm_device_get_state(NM_DEVICE(self)) == NM_DEVICE_STATE_CONFIG) { + _LOGT(LOGD_DEVICE, + "activation delayed to resolve DNS names of peers: completed, proceed now"); + nm_device_activate_schedule_stage2_device_config(NM_DEVICE(self), FALSE); + } + } +} + static void -_peers_remove(NMDeviceWireGuardPrivate *priv, PeerData *peer_data) +_peers_remove(NMDeviceWireGuard *self, PeerData *peer_data) { + NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self); + nm_assert(peer_data); nm_assert(g_hash_table_lookup(priv->peers, peer_data) == peer_data); @@ -517,13 +563,16 @@ _peers_remove(NMDeviceWireGuardPrivate *priv, PeerData *peer_data) c_list_unlink_stale(&peer_data->lst_peers); nm_wireguard_peer_unref(peer_data->peer); - nm_clear_g_cancellable(&peer_data->ep_resolv.cancellable); + if (nm_clear_g_cancellable(&peer_data->ep_resolv.cancellable)) + _peers_resolving_cnt_decrement(self); g_slice_free(PeerData, peer_data); if (c_list_is_empty(&priv->lst_peers_head)) { nm_clear_g_source(&priv->resolve_next_try_id); nm_clear_g_source(&priv->link_config_delayed_id); } + + nm_assert(_peers_resolving_cnt(priv) == priv->peers_resolving_cnt); } static PeerData * @@ -675,8 +724,9 @@ _peers_retry_in_msec(PeerData *peer_data, gboolean after_failure) static void _peers_resolve_cb(GObject *source_object, GAsyncResult *res, gpointer user_data) { - NMDeviceWireGuard *self; - PeerData * peer_data; + NMDeviceWireGuard * self; + NMDeviceWireGuardPrivate *priv; + PeerData * peer_data; gs_free_error GError *resolv_error = NULL; GList * list; gboolean changed = FALSE; @@ -692,10 +742,13 @@ _peers_resolve_cb(GObject *source_object, GAsyncResult *res, gpointer user_data) peer_data = user_data; self = peer_data->self; + priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self); - g_clear_object(&peer_data->ep_resolv.cancellable); + if (nm_clear_g_object(&peer_data->ep_resolv.cancellable)) + _peers_resolving_cnt_decrement(self); nm_assert((!resolv_error) != (!list)); + nm_assert(_peers_resolving_cnt(priv) == priv->peers_resolving_cnt); #define _retry_in_msec_to_string(retry_in_msec, s_retry) \ ({ \ @@ -797,8 +850,6 @@ _peers_resolve_cb(GObject *source_object, GAsyncResult *res, gpointer user_data) _peers_resolve_retry_reschedule_for_peer(self, peer_data, retry_in_msec); if (changed) { - NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self); - /* schedule the job in the background, to give multiple resolve events time * to complete. */ nm_clear_g_source(&priv->link_config_delayed_id); @@ -812,6 +863,7 @@ _peers_resolve_cb(GObject *source_object, GAsyncResult *res, gpointer user_data) static void _peers_resolve_start(NMDeviceWireGuard *self, PeerData *peer_data) { + NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self); gs_unref_object GResolver *resolver = NULL; const char * host; @@ -820,6 +872,7 @@ _peers_resolve_start(NMDeviceWireGuard *self, PeerData *peer_data) nm_assert(!peer_data->ep_resolv.cancellable); peer_data->ep_resolv.cancellable = g_cancellable_new(); + priv->peers_resolving_cnt++; /* set a special next-try timestamp. It is positive, and indicates * that we are in the process of trying. @@ -841,6 +894,8 @@ _peers_resolve_start(NMDeviceWireGuard *self, PeerData *peer_data) nm_wireguard_peer_get_public_key(peer_data->peer), host, nm_wireguard_peer_get_endpoint(peer_data->peer)); + + nm_assert(_peers_resolving_cnt(priv) == priv->peers_resolving_cnt); } static void @@ -915,7 +970,8 @@ _peers_update(NMDeviceWireGuard *self, if (nm_sock_addr_union_cmp(&peer_data->ep_resolv.sockaddr, &sockaddr) != 0) changed = TRUE; - nm_clear_g_cancellable(&peer_data->ep_resolv.cancellable); + if (nm_clear_g_cancellable(&peer_data->ep_resolv.cancellable)) + _peers_resolving_cnt_decrement(self); peer_data->ep_resolv = (PeerEndpointResolveData){ .sockaddr = sockaddr, @@ -948,12 +1004,13 @@ _peers_update(NMDeviceWireGuard *self, } static void -_peers_remove_all(NMDeviceWireGuardPrivate *priv) +_peers_remove_all(NMDeviceWireGuard *self) { - PeerData *peer_data; + NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self); + PeerData * peer_data; while ((peer_data = c_list_first_entry(&priv->lst_peers_head, PeerData, lst_peers))) - _peers_remove(priv, peer_data); + _peers_remove(self, peer_data); } static void @@ -984,7 +1041,7 @@ _peers_update_all(NMDeviceWireGuard *self, NMSettingWireGuard *s_wg, gboolean *o c_list_for_each_entry_safe (peer_data, peer_data_safe, &priv->lst_peers_head, lst_peers) { if (peer_data->dirty_update_all) { - _peers_remove(priv, peer_data); + _peers_remove(self, peer_data); peers_removed = TRUE; } } @@ -1515,9 +1572,11 @@ link_config_delayed_resolver_cb(gpointer user_data) static NMActStageReturn act_stage2_config(NMDevice *device, NMDeviceStateReason *out_failure_reason) { - NMDeviceSysIfaceState sys_iface_state; - NMDeviceStateReason failure_reason; - NMActStageReturn ret; + NMDeviceWireGuard * self = NM_DEVICE_WIREGUARD(device); + NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self); + NMDeviceSysIfaceState sys_iface_state; + NMDeviceStateReason failure_reason; + NMActStageReturn ret; sys_iface_state = nm_device_sys_iface_state_get(device); @@ -1533,17 +1592,25 @@ act_stage2_config(NMDevice *device, NMDeviceStateReason *out_failure_reason) : LINK_CONFIG_MODE_FULL, &failure_reason); - if (sys_iface_state == NM_DEVICE_SYS_IFACE_STATE_ASSUME) { - /* this never fails. */ - return NM_ACT_STAGE_RETURN_SUCCESS; - } - if (ret == NM_ACT_STAGE_RETURN_FAILURE) { + if (sys_iface_state == NM_DEVICE_SYS_IFACE_STATE_ASSUME) { + /* this never fails. */ + return NM_ACT_STAGE_RETURN_SUCCESS; + } + nm_device_state_changed(device, NM_DEVICE_STATE_FAILED, failure_reason); NM_SET_OUT(out_failure_reason, failure_reason); return NM_ACT_STAGE_RETURN_FAILURE; } + nm_assert(NM_IN_SET(ret, NM_ACT_STAGE_RETURN_SUCCESS, NM_ACT_STAGE_RETURN_POSTPONE)); + + if (ret == NM_ACT_STAGE_RETURN_SUCCESS && _peers_resolving_cnt(priv) > 0u) { + _LOGT(LOGD_DEVICE, + "activation delayed to resolve DNS names of peers: resolving and waiting..."); + return NM_ACT_STAGE_RETURN_POSTPONE; + } + return ret; } @@ -1732,7 +1799,7 @@ _device_cleanup(NMDeviceWireGuard *self) { NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self); - _peers_remove_all(priv); + _peers_remove_all(self); _secrets_cancel(self); |