summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Haller <thaller@redhat.com>2021-01-08 11:09:03 +0100
committerThomas Haller <thaller@redhat.com>2021-01-14 10:23:45 +0100
commitcc8706f815468e8771739ac7e30af8c4431245f6 (patch)
treec92a21692ea4ea3a7713743a0f0ca30abfbfa17e
parent86107b6a52ab26f236fd411835e137c9c10fe99c (diff)
downloadNetworkManager-cc8706f815468e8771739ac7e30af8c4431245f6.tar.gz
wireguard: delay activation while resolving DNS names for WireGuard peers to avoid race
The endpoints of WireGuard peers can be configured as DNS name, which NetworkManager will resolve. Since activating a profile might affect now names get resolved, we must first resolve names before completing the activation of the WireGuard device (and before reconfiguring DNS accordingly). For example, if you configure exclusive DNS resolution via the WireGuard device, and if the peer needs to be resolved via DNS, then resolving the peer name must happen before the reconfiguration of DNS. Otherwise the new DNS configuration will be broken due to being unable to reach the WireGuard peer. Fix that by waiting. There is still an unfixed problem. If resolving any peers fails, activation silently proceeds -- again possibly breaking the network setup. Of course, NetworkManager will repeatedly try to re-resolve the name, but that may never succeed if DNS would be resolved via the VPN itself. That is different from `wg set` which resolves hostnames and fails. Consequently `wg-quick up` would also fail. But these are both one shot applications, they are not around and basically let the user handle the error (by reading the log and invoking the command again). NetworkManager can do something different and proceed activation (as it will also periodically re-resolve the hostnames again). Note that it's also valid to activate a WireGuard device without any peers (and to modify the activated device later with Reapply()). As such, having no peers (or being unable to resolve a hostname) may be a valid configuration. I think we should add an option/flag that when enabled will cause the activation to fail of names cannot be resolved. https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/535 https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/616 https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/721
-rw-r--r--src/devices/nm-device-wireguard.c109
1 files changed, 88 insertions, 21 deletions
diff --git a/src/devices/nm-device-wireguard.c b/src/devices/nm-device-wireguard.c
index 0c467dfe73..1b109969ee 100644
--- a/src/devices/nm-device-wireguard.c
+++ b/src/devices/nm-device-wireguard.c
@@ -118,6 +118,9 @@ typedef struct {
CList lst_peers_head;
GHashTable *peers;
+ /* counts the numbers of peers that are currently resolving. */
+ guint peers_resolving_cnt;
+
gint64 resolve_next_try_at;
gint64 link_config_last_at;
@@ -506,9 +509,52 @@ _peers_find(NMDeviceWireGuardPrivate *priv, NMWireGuardPeer *peer)
return g_hash_table_lookup(priv->peers, &peer);
}
+static guint
+_peers_resolving_cnt(NMDeviceWireGuardPrivate *priv)
+{
+ nm_assert(priv);
+#if NM_MORE_ASSERTS > 3
+ {
+ PeerData *peer_data;
+ guint cnt = 0;
+
+ c_list_for_each_entry (peer_data, &priv->lst_peers_head, lst_peers) {
+ if (peer_data->ep_resolv.cancellable)
+ cnt++;
+ }
+ nm_assert(cnt == priv->peers_resolving_cnt);
+ }
+#endif
+
+ return priv->peers_resolving_cnt;
+}
+
+static void
+_peers_resolving_cnt_decrement(NMDeviceWireGuard *self)
+{
+ NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self);
+
+ nm_assert(priv);
+ nm_assert(priv->peers_resolving_cnt > 0);
+
+ priv->peers_resolving_cnt--;
+
+ nm_assert(_peers_resolving_cnt(priv) == priv->peers_resolving_cnt);
+
+ if (priv->peers_resolving_cnt == 0) {
+ if (nm_device_get_state(NM_DEVICE(self)) == NM_DEVICE_STATE_CONFIG) {
+ _LOGT(LOGD_DEVICE,
+ "activation delayed to resolve DNS names of peers: completed, proceed now");
+ nm_device_activate_schedule_stage2_device_config(NM_DEVICE(self), FALSE);
+ }
+ }
+}
+
static void
-_peers_remove(NMDeviceWireGuardPrivate *priv, PeerData *peer_data)
+_peers_remove(NMDeviceWireGuard *self, PeerData *peer_data)
{
+ NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self);
+
nm_assert(peer_data);
nm_assert(g_hash_table_lookup(priv->peers, peer_data) == peer_data);
@@ -517,13 +563,16 @@ _peers_remove(NMDeviceWireGuardPrivate *priv, PeerData *peer_data)
c_list_unlink_stale(&peer_data->lst_peers);
nm_wireguard_peer_unref(peer_data->peer);
- nm_clear_g_cancellable(&peer_data->ep_resolv.cancellable);
+ if (nm_clear_g_cancellable(&peer_data->ep_resolv.cancellable))
+ _peers_resolving_cnt_decrement(self);
g_slice_free(PeerData, peer_data);
if (c_list_is_empty(&priv->lst_peers_head)) {
nm_clear_g_source(&priv->resolve_next_try_id);
nm_clear_g_source(&priv->link_config_delayed_id);
}
+
+ nm_assert(_peers_resolving_cnt(priv) == priv->peers_resolving_cnt);
}
static PeerData *
@@ -675,8 +724,9 @@ _peers_retry_in_msec(PeerData *peer_data, gboolean after_failure)
static void
_peers_resolve_cb(GObject *source_object, GAsyncResult *res, gpointer user_data)
{
- NMDeviceWireGuard *self;
- PeerData * peer_data;
+ NMDeviceWireGuard * self;
+ NMDeviceWireGuardPrivate *priv;
+ PeerData * peer_data;
gs_free_error GError *resolv_error = NULL;
GList * list;
gboolean changed = FALSE;
@@ -692,10 +742,13 @@ _peers_resolve_cb(GObject *source_object, GAsyncResult *res, gpointer user_data)
peer_data = user_data;
self = peer_data->self;
+ priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self);
- g_clear_object(&peer_data->ep_resolv.cancellable);
+ if (nm_clear_g_object(&peer_data->ep_resolv.cancellable))
+ _peers_resolving_cnt_decrement(self);
nm_assert((!resolv_error) != (!list));
+ nm_assert(_peers_resolving_cnt(priv) == priv->peers_resolving_cnt);
#define _retry_in_msec_to_string(retry_in_msec, s_retry) \
({ \
@@ -797,8 +850,6 @@ _peers_resolve_cb(GObject *source_object, GAsyncResult *res, gpointer user_data)
_peers_resolve_retry_reschedule_for_peer(self, peer_data, retry_in_msec);
if (changed) {
- NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self);
-
/* schedule the job in the background, to give multiple resolve events time
* to complete. */
nm_clear_g_source(&priv->link_config_delayed_id);
@@ -812,6 +863,7 @@ _peers_resolve_cb(GObject *source_object, GAsyncResult *res, gpointer user_data)
static void
_peers_resolve_start(NMDeviceWireGuard *self, PeerData *peer_data)
{
+ NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self);
gs_unref_object GResolver *resolver = NULL;
const char * host;
@@ -820,6 +872,7 @@ _peers_resolve_start(NMDeviceWireGuard *self, PeerData *peer_data)
nm_assert(!peer_data->ep_resolv.cancellable);
peer_data->ep_resolv.cancellable = g_cancellable_new();
+ priv->peers_resolving_cnt++;
/* set a special next-try timestamp. It is positive, and indicates
* that we are in the process of trying.
@@ -841,6 +894,8 @@ _peers_resolve_start(NMDeviceWireGuard *self, PeerData *peer_data)
nm_wireguard_peer_get_public_key(peer_data->peer),
host,
nm_wireguard_peer_get_endpoint(peer_data->peer));
+
+ nm_assert(_peers_resolving_cnt(priv) == priv->peers_resolving_cnt);
}
static void
@@ -915,7 +970,8 @@ _peers_update(NMDeviceWireGuard *self,
if (nm_sock_addr_union_cmp(&peer_data->ep_resolv.sockaddr, &sockaddr) != 0)
changed = TRUE;
- nm_clear_g_cancellable(&peer_data->ep_resolv.cancellable);
+ if (nm_clear_g_cancellable(&peer_data->ep_resolv.cancellable))
+ _peers_resolving_cnt_decrement(self);
peer_data->ep_resolv = (PeerEndpointResolveData){
.sockaddr = sockaddr,
@@ -948,12 +1004,13 @@ _peers_update(NMDeviceWireGuard *self,
}
static void
-_peers_remove_all(NMDeviceWireGuardPrivate *priv)
+_peers_remove_all(NMDeviceWireGuard *self)
{
- PeerData *peer_data;
+ NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self);
+ PeerData * peer_data;
while ((peer_data = c_list_first_entry(&priv->lst_peers_head, PeerData, lst_peers)))
- _peers_remove(priv, peer_data);
+ _peers_remove(self, peer_data);
}
static void
@@ -984,7 +1041,7 @@ _peers_update_all(NMDeviceWireGuard *self, NMSettingWireGuard *s_wg, gboolean *o
c_list_for_each_entry_safe (peer_data, peer_data_safe, &priv->lst_peers_head, lst_peers) {
if (peer_data->dirty_update_all) {
- _peers_remove(priv, peer_data);
+ _peers_remove(self, peer_data);
peers_removed = TRUE;
}
}
@@ -1515,9 +1572,11 @@ link_config_delayed_resolver_cb(gpointer user_data)
static NMActStageReturn
act_stage2_config(NMDevice *device, NMDeviceStateReason *out_failure_reason)
{
- NMDeviceSysIfaceState sys_iface_state;
- NMDeviceStateReason failure_reason;
- NMActStageReturn ret;
+ NMDeviceWireGuard * self = NM_DEVICE_WIREGUARD(device);
+ NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self);
+ NMDeviceSysIfaceState sys_iface_state;
+ NMDeviceStateReason failure_reason;
+ NMActStageReturn ret;
sys_iface_state = nm_device_sys_iface_state_get(device);
@@ -1533,17 +1592,25 @@ act_stage2_config(NMDevice *device, NMDeviceStateReason *out_failure_reason)
: LINK_CONFIG_MODE_FULL,
&failure_reason);
- if (sys_iface_state == NM_DEVICE_SYS_IFACE_STATE_ASSUME) {
- /* this never fails. */
- return NM_ACT_STAGE_RETURN_SUCCESS;
- }
-
if (ret == NM_ACT_STAGE_RETURN_FAILURE) {
+ if (sys_iface_state == NM_DEVICE_SYS_IFACE_STATE_ASSUME) {
+ /* this never fails. */
+ return NM_ACT_STAGE_RETURN_SUCCESS;
+ }
+
nm_device_state_changed(device, NM_DEVICE_STATE_FAILED, failure_reason);
NM_SET_OUT(out_failure_reason, failure_reason);
return NM_ACT_STAGE_RETURN_FAILURE;
}
+ nm_assert(NM_IN_SET(ret, NM_ACT_STAGE_RETURN_SUCCESS, NM_ACT_STAGE_RETURN_POSTPONE));
+
+ if (ret == NM_ACT_STAGE_RETURN_SUCCESS && _peers_resolving_cnt(priv) > 0u) {
+ _LOGT(LOGD_DEVICE,
+ "activation delayed to resolve DNS names of peers: resolving and waiting...");
+ return NM_ACT_STAGE_RETURN_POSTPONE;
+ }
+
return ret;
}
@@ -1732,7 +1799,7 @@ _device_cleanup(NMDeviceWireGuard *self)
{
NMDeviceWireGuardPrivate *priv = NM_DEVICE_WIREGUARD_GET_PRIVATE(self);
- _peers_remove_all(priv);
+ _peers_remove_all(self);
_secrets_cancel(self);