diff options
author | Thomas Haller <thaller@redhat.com> | 2022-10-04 12:35:35 +0200 |
---|---|---|
committer | Thomas Haller <thaller@redhat.com> | 2022-10-04 12:37:42 +0200 |
commit | 6ef929d19fb7622b4430ecb2434b1562221c65ae (patch) | |
tree | 844a5920aea0c07c9e8104ed70a38a512fbf8b92 | |
parent | 718392ef5ff484f6b77b3088417a52ef90331a83 (diff) | |
parent | 22f670687a5b91784bf04cedaf0a7f11c4eb2dd5 (diff) | |
download | NetworkManager-6ef929d19fb7622b4430ecb2434b1562221c65ae.tar.gz |
bond: merge branch 'th/mlag-bonding-slb'
https://bugzilla.redhat.com/show_bug.cgi?id=2128216
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1385
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | src/core/devices/nm-device-bond.c | 123 | ||||
-rw-r--r-- | src/core/meson.build | 1 | ||||
-rw-r--r-- | src/core/nm-bond-manager.c | 967 | ||||
-rw-r--r-- | src/core/nm-bond-manager.h | 32 | ||||
-rw-r--r-- | src/core/nm-firewall-utils.c | 248 | ||||
-rw-r--r-- | src/core/nm-firewall-utils.h | 6 | ||||
-rw-r--r-- | src/libnm-core-impl/nm-setting-bond.c | 40 | ||||
-rw-r--r-- | src/libnm-core-public/nm-setting-bond.h | 1 |
10 files changed, 1418 insertions, 6 deletions
diff --git a/Makefile.am b/Makefile.am index 6b9b82876b..13cadec6dc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2462,6 +2462,8 @@ src_core_libNetworkManagerBase_la_SOURCES = \ src/core/nm-l3cfg.h \ src/core/nm-ip-config.c \ src/core/nm-ip-config.h \ + src/core/nm-bond-manager.c \ + src/core/nm-bond-manager.h \ \ src/core/dhcp/nm-dhcp-client.c \ src/core/dhcp/nm-dhcp-client.h \ @@ -14,6 +14,10 @@ USE AT YOUR OWN RISK. NOT RECOMMENDED FOR PRODUCTION USE! in `nmcli connection $operator uuid $uuid`. * nmtui now supports editing Wi-Fi WPA-Enterprise, Ethernet with 802.1X authentication and MACsec connection profiles. +* bond: add "balance-slb" option which implements source load balancing + with "balance-xor" mode and "vlan-srcmac" xmit_hash_policy. In this + mode, NetworkManager configures nftables to prevent loops in the + switch. ============================================= NetworkManager-1.40 diff --git a/src/core/devices/nm-device-bond.c b/src/core/devices/nm-device-bond.c index dc5e1d5c7e..72ede7c19c 100644 --- a/src/core/devices/nm-device-bond.c +++ b/src/core/devices/nm-device-bond.c @@ -20,6 +20,7 @@ #include "libnm-core-intern/nm-core-internal.h" #include "nm-manager.h" #include "nm-setting-bond-port.h" +#include "nm-bond-manager.h" #define _NMLOG_DEVICE_TYPE NMDeviceBond #include "nm-device-logging.h" @@ -59,7 +60,8 @@ /*****************************************************************************/ struct _NMDeviceBond { - NMDevice parent; + NMDevice parent; + NMBondManager *bond_manager; }; struct _NMDeviceBondClass { @@ -178,7 +180,9 @@ update_connection(NMDevice *device, NMConnection *connection) gs_free char *value = NULL; char *p; - if (NM_IN_STRSET(option, NM_SETTING_BOND_OPTION_ACTIVE_SLAVE)) + if (NM_IN_STRSET(option, + NM_SETTING_BOND_OPTION_ACTIVE_SLAVE, + NM_SETTING_BOND_OPTION_BALANCE_SLB)) continue; value = @@ -460,10 +464,97 @@ _platform_lnk_bond_init_from_setting(NMSettingBond *s_bond, NMPlatformLnkBond *p props->tlb_dynamic_lb_has = NM_IN_SET(props->mode, NM_BOND_MODE_TLB, NM_BOND_MODE_ALB); } +static void +_balance_slb_cb(NMBondManager *bond_manager, NMBondManagerEventType event_type, gpointer user_data) +{ + NMDevice *device = user_data; + NMDeviceBond *self = NM_DEVICE_BOND(device); + + nm_assert(NM_IS_DEVICE_BOND(self)); + nm_assert(self->bond_manager == bond_manager); + + switch (event_type) { + case NM_BOND_MANAGER_EVENT_TYPE_STATE: + switch (nm_bond_manager_get_state(bond_manager)) { + case NM_OPTION_BOOL_FALSE: + if (nm_device_get_state(device) <= NM_DEVICE_STATE_ACTIVATED) { + _LOGD(LOGD_BOND, "balance-slb: failed"); + nm_device_state_changed(device, + NM_DEVICE_STATE_FAILED, + NM_DEVICE_STATE_REASON_CONFIG_FAILED); + } + return; + case NM_OPTION_BOOL_TRUE: + if (nm_device_get_state(device) <= NM_DEVICE_STATE_ACTIVATED + && nm_device_devip_get_state(device, AF_UNSPEC) <= NM_DEVICE_IP_STATE_PENDING) { + nm_device_devip_set_state(device, AF_UNSPEC, NM_DEVICE_IP_STATE_READY, NULL); + } + return; + case NM_OPTION_BOOL_DEFAULT: + if (nm_device_get_state(device) <= NM_DEVICE_STATE_ACTIVATED + && nm_device_devip_get_state(device, AF_UNSPEC) == NM_DEVICE_IP_STATE_READY) { + /* We are again busy. We can also go back to "pending" from "ready". + * If ip-config state is not yet complete, this will further delay it. + * Otherwise, it should have no effect. */ + nm_device_devip_set_state(device, AF_UNSPEC, NM_DEVICE_IP_STATE_PENDING, NULL); + } + return; + } + nm_assert_not_reached(); + return; + } + + nm_assert_not_reached(); +} + +static void +_balance_slb_setup(NMDeviceBond *self, NMConnection *connection) +{ + int ifindex = nm_device_get_ifindex(NM_DEVICE(self)); + gboolean balance_slb = FALSE; + const char *uuid; + NMSettingBond *s_bond; + + if (ifindex > 0 && connection && (s_bond = nm_connection_get_setting_bond(connection))) + balance_slb = _v_intbool(s_bond, NM_SETTING_BOND_OPTION_BALANCE_SLB); + + if (!balance_slb) { + if (nm_clear_pointer(&self->bond_manager, nm_bond_manager_destroy)) { + _LOGD(LOGD_BOND, "balance-slb: stopped"); + nm_device_devip_set_state(NM_DEVICE(self), AF_UNSPEC, NM_DEVICE_IP_STATE_NONE, NULL); + } + return; + } + + uuid = nm_connection_get_uuid(connection); + + if (self->bond_manager) { + if (nm_bond_manager_get_ifindex(self->bond_manager) == ifindex + && nm_streq0(nm_bond_manager_get_connection_uuid(self->bond_manager), uuid)) { + _LOGD(LOGD_BOND, "balance-slb: reapply"); + nm_bond_manager_reapply(self->bond_manager); + return; + } + nm_clear_pointer(&self->bond_manager, nm_bond_manager_destroy); + _LOGD(LOGD_BOND, "balance-slb: restart"); + } + + _LOGD(LOGD_BOND, "balance-slb: start"); + if (nm_device_devip_get_state(NM_DEVICE(self), AF_UNSPEC) < NM_DEVICE_IP_STATE_PENDING) + nm_device_devip_set_state(NM_DEVICE(self), AF_UNSPEC, NM_DEVICE_IP_STATE_PENDING, NULL); + self->bond_manager = nm_bond_manager_new(nm_device_get_platform(NM_DEVICE(self)), + ifindex, + uuid, + _balance_slb_cb, + self); + nm_assert(nm_bond_manager_get_state(self->bond_manager) == NM_OPTION_BOOL_DEFAULT); +} + static NMActStageReturn act_stage1_prepare(NMDevice *device, NMDeviceStateReason *out_failure_reason) { - NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS; + NMDeviceBond *self = NM_DEVICE_BOND(device); + NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS; NMConnection *connection; NMSettingBond *s_bond; NMPlatformLnkBond props; @@ -476,6 +567,14 @@ act_stage1_prepare(NMDevice *device, NMDeviceStateReason *out_failure_reason) s_bond = nm_connection_get_setting_bond(connection); g_return_val_if_fail(s_bond, NM_ACT_STAGE_RETURN_FAILURE); + if (nm_device_sys_iface_state_is_external(device)) + return NM_ACT_STAGE_RETURN_SUCCESS; + + _balance_slb_setup(self, connection); + + if (nm_device_sys_iface_state_is_external_or_assume(device)) + return NM_ACT_STAGE_RETURN_SUCCESS; + _platform_lnk_bond_init_from_setting(s_bond, &props); /* Interface must be down to set bond options */ @@ -684,7 +783,7 @@ can_reapply_change(NMDevice *device, const char *name = *option_list; /* We support changes to these */ - if (NM_IN_STRSET(name, OPTIONS_REAPPLY_FULL)) + if (NM_IN_STRSET(name, OPTIONS_REAPPLY_FULL, NM_SETTING_BOND_OPTION_BALANCE_SLB)) continue; /* Reject any other changes */ @@ -730,6 +829,16 @@ reapply_connection(NMDevice *device, NMConnection *con_old, NMConnection *con_ne set_bond_arp_ip_targets(device, s_bond); set_bond_attrs_or_default(device, s_bond, NM_MAKE_STRV(OPTIONS_REAPPLY_SUBSET)); + + _balance_slb_setup(self, con_new); +} + +static void +deactivate(NMDevice *device) +{ + NMDeviceBond *self = NM_DEVICE_BOND(device); + + _balance_slb_setup(self, NULL); } /*****************************************************************************/ @@ -768,13 +877,15 @@ nm_device_bond_class_init(NMDeviceBondClass *klass) device_class->update_connection = update_connection; device_class->master_update_slave_connection = controller_update_port_connection; - device_class->create_and_realize = create_and_realize; - device_class->act_stage1_prepare = act_stage1_prepare; + device_class->create_and_realize = create_and_realize; + device_class->act_stage1_prepare = act_stage1_prepare; + device_class->act_stage1_prepare_also_for_external_or_assume = TRUE; device_class->get_configured_mtu = nm_device_get_configured_mtu_for_wired; device_class->attach_port = attach_port; device_class->detach_port = detach_port; device_class->can_reapply_change = can_reapply_change; device_class->reapply_connection = reapply_connection; + device_class->deactivate = deactivate; } /*****************************************************************************/ diff --git a/src/core/meson.build b/src/core/meson.build index f3359ad0f5..6f11595aa0 100644 --- a/src/core/meson.build +++ b/src/core/meson.build @@ -53,6 +53,7 @@ libNetworkManagerBase = static_library( 'nm-l3-ipv4ll.c', 'nm-l3-ipv6ll.c', 'nm-l3cfg.c', + 'nm-bond-manager.c', 'nm-ip-config.c', ), dependencies: [ diff --git a/src/core/nm-bond-manager.c b/src/core/nm-bond-manager.c new file mode 100644 index 0000000000..2d15b0b5a0 --- /dev/null +++ b/src/core/nm-bond-manager.c @@ -0,0 +1,967 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "src/core/nm-default-daemon.h" + +#include "nm-bond-manager.h" + +#include <linux/if.h> + +#include "NetworkManagerUtils.h" +#include "libnm-core-aux-intern/nm-libnm-core-utils.h" +#include "libnm-glib-aux/nm-str-buf.h" +#include "libnm-platform/nm-platform.h" +#include "libnm-platform/nmp-object.h" +#include "nm-firewall-utils.h" + +/*****************************************************************************/ + +typedef enum _nm_packed { + REGISTRATION_STATE_NONE, + REGISTRATION_STATE_UPPING, + REGISTRATION_STATE_UP, + REGISTRATION_STATE_DOWNING, +} RegistrationState; + +struct _NMBondManager { + NMPlatform *platform; + + NMBondManagerCallback callback; + gpointer user_data; + + /* This is only used for structured logging. */ + char *connection_uuid; + + GSource *reconfigure_on_idle_source; + + /* During _reconfigure_check() we remember all ifindexes that are part + * of the current SLB bond. This is used during _link_changed_cb() to + * figure out whether a change on the interface might be relevant to + * trigger a _reconfigure_check() on idle. */ + GHashTable *previous_ifindexes; + + /* We need to keep track of active members that we configured in NFT. + * That is, because on update we use "add && flush" to reset the table, + * however that leaves empty chains around. If we previously had an active + * member, a chain for it was created that we need to clean up. + * + * Before every NFT call we use this to generate the list of members that + * are to be cleaned up. Thereby also adding the new active-memebers to + * the list. When the NFT calls returns with success, we can prune the + * now deleted member/chain. */ + GHashTable *previous_members; + + GCancellable *cancellable; + + struct { + char *bond_ifname_curr; + char *bond_ifname_next; + const char **active_members_curr; + const char **active_members_next; + } dat; + + gulong link_changed_id; + int ifindex; + RegistrationState reg_state; + bool destroyed : 1; + + /* Whether we noticed some changes that require us to _reconfigure_check(). + * Note that while a NFT call is pending, we postpone the check. */ + bool reconfigure_check : 1; + + /* Whether a `nft` call is in progress. Usually this corresponds to + * having a cancellable, however, we may also cancel and clear the + * cancellable while the call is still in progress. */ + bool nft_in_progress : 1; + + /* Whether the last NFT invocation was good. If not, we may have + * an invalid state. Actually unused, so far because it's not + * clear what to do about failure to configure NFT (aside logging + * a warning). */ + bool nft_good : 1; + + /* The overall state. DEFAULT means that an update is pending. + * FALSE means that the last "nft" command failed. + * TRUE means that the last "nft" command was good. */ + NMOptionBool state : 3; +}; + +#define NM_IS_BOND_MANAGER(self) \ + ({ \ + const NMBondManager *_self = (self); \ + \ + (_self && NM_IS_PLATFORM(_self->platform)); \ + }) + +/*****************************************************************************/ + +static void _nft_call(NMBondManager *self, + gboolean up, + const char *bond_ifname, + const char *const *bond_ifnames_down, + const char *const *active_members); + +static void _bond_manager_destroy(NMBondManager *self); + +static void _reconfigure_check(NMBondManager *self, gboolean reapply); + +/*****************************************************************************/ + +#define _NMLOG_DOMAIN LOGD_DEVICE +#define _NMLOG_PREFIX_NAME "mlag" +#define _NMLOG(level, ...) \ + G_STMT_START \ + { \ + const NMLogLevel _level = (level); \ + \ + if (nm_logging_enabled(_level, _NMLOG_DOMAIN)) { \ + NMBondManager *const _self = (self); \ + const char *_ifname = nm_platform_link_get_name(_self->platform, _self->ifindex); \ + char _sbuf[30]; \ + \ + _nm_log(_level, \ + _NMLOG_DOMAIN, \ + 0, \ + _ifname, \ + _self->connection_uuid, \ + "%s[" NM_HASH_OBFUSCATE_PTR_FMT ", %s]: " _NM_UTILS_MACRO_FIRST(__VA_ARGS__), \ + _NMLOG_PREFIX_NAME, \ + NM_HASH_OBFUSCATE_PTR(_self), \ + (_ifname ?: nm_sprintf_buf(_sbuf, "(%d)", _self->ifindex)) \ + _NM_UTILS_MACRO_REST(__VA_ARGS__)); \ + } \ + } \ + G_STMT_END + +static const char * +_log_info(NMStrBuf *strbuf, + const char *bond_ifname, + const char *const *active_members, + const char *const *previous_members) +{ + gsize i; + + nm_str_buf_reset(strbuf); + + if (!bond_ifname) + nm_str_buf_append(strbuf, "(disabled)"); + else { + nm_str_buf_append_printf(strbuf, "(enabled, \"%s\"", bond_ifname); + + for (i = 0; active_members && active_members[i]; i++) { + if (i == 0) + nm_str_buf_append(strbuf, ", active-members=[ \""); + else + nm_str_buf_append(strbuf, "\", \""); + nm_str_buf_append(strbuf, active_members[i]); + } + if (i > 0) + nm_str_buf_append(strbuf, "\" ]"); + + for (i = 0; previous_members && previous_members[i]; i++) { + nm_assert(!nm_strv_contains(active_members, -1, previous_members[i])); + if (i == 0) + nm_str_buf_append(strbuf, ", previous-members=[ \""); + else + nm_str_buf_append(strbuf, "\", \""); + nm_str_buf_append(strbuf, previous_members[i]); + } + if (i > 0) + nm_str_buf_append(strbuf, "\" ]"); + + nm_str_buf_append(strbuf, ")"); + } + + return nm_str_buf_get_str(strbuf); +} + +/*****************************************************************************/ + +static gboolean +_nm_assert_self_(NMBondManager *self) +{ + nm_assert(self); + nm_assert(NM_IS_PLATFORM(self->platform)); + nm_assert(!self->cancellable || G_IS_CANCELLABLE(self->cancellable)); + nm_assert(!self->cancellable || !g_cancellable_is_cancelled(self->cancellable)); + nm_assert(!self->dat.active_members_curr || self->dat.bond_ifname_curr); + nm_assert(!self->dat.active_members_next || self->dat.bond_ifname_next); + nm_assert(!self->cancellable || self->nft_in_progress); + nm_assert(!self->reconfigure_on_idle_source || self->reconfigure_check); + nm_assert(!self->nft_in_progress || !self->reconfigure_on_idle_source); + + nm_assert(!self->dat.active_members_curr || self->dat.bond_ifname_curr[0]); + nm_assert(!self->dat.active_members_next || self->dat.bond_ifname_next[0]); + + nm_assert(!self->destroyed || !self->dat.bond_ifname_next); + nm_assert(!self->destroyed + || NM_IN_SET((RegistrationState) self->reg_state, + REGISTRATION_STATE_UPPING, + REGISTRATION_STATE_DOWNING)); + + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + nm_assert(!self->nft_in_progress); + nm_assert(!self->cancellable); + nm_assert(!self->dat.bond_ifname_curr); + nm_assert(!self->dat.bond_ifname_next); + break; + case REGISTRATION_STATE_UPPING: + nm_assert(self->nft_in_progress); + nm_assert(self->dat.bond_ifname_curr); + break; + case REGISTRATION_STATE_UP: + nm_assert(!self->nft_in_progress); + nm_assert(!self->cancellable); + nm_assert(self->dat.bond_ifname_curr); + nm_assert(!self->dat.bond_ifname_next); + break; + case REGISTRATION_STATE_DOWNING: + nm_assert(self->nft_in_progress); + nm_assert(self->dat.bond_ifname_curr); + break; + default: + nm_assert_not_reached(); + break; + } + + return TRUE; +} + +#define _nm_assert_self(self) nm_assert(_nm_assert_self_(self)) + +/*****************************************************************************/ + +static void +_callback_invoke(NMBondManager *self, NMBondManagerEventType event_type) +{ + if (!self->callback) + return; + + self->callback(self, event_type, self->user_data); +} + +static void +_notify_state_change(NMBondManager *self) +{ + NMOptionBool state; + + if (self->nft_in_progress) + state = NM_OPTION_BOOL_DEFAULT; + else + state = !!self->nft_good; + + if (state == self->state) + return; + + self->state = state; + _callback_invoke(self, NM_BOND_MANAGER_EVENT_TYPE_STATE); +} + +/*****************************************************************************/ + +static void +_nft_call_cb(GObject *source, GAsyncResult *result, gpointer user_data) +{ + nm_auto_str_buf NMStrBuf strbuf = NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE); + NMBondManager *self; + gpointer ptr_up; + gs_free const char **previous_members = NULL; + gs_free_error GError *error = NULL; + + nm_utils_user_data_unpack(user_data, &self, &ptr_up, &previous_members); + + _nm_assert_self(self); + + self->nft_in_progress = FALSE; + + nm_firewall_nft_call_finish(result, &error); + + if (!error) { + gsize i; + + /* On success, we can forget about our previous members that we successfully + * deleted. */ + if (!GPOINTER_TO_INT(ptr_up)) { + /* We successfully deleted the NFT table. Forget all previous members. */ + g_hash_table_remove_all(self->previous_members); + } else if (previous_members) { + /* These previous members are now forgotten for good. */ + for (i = 0; previous_members[i]; i++) + g_hash_table_remove(self->previous_members, previous_members[i]); + } + } else { + /* If all our NFT calls keep failing, we never actually prune entries from + * self->previous_members. That is a problem, however, under normal operation + * NFT calls should not continuously fail, and we would have a small fixed + * number of active-members. */ + } + + nm_clear_g_cancellable(&self->cancellable); + + if (nm_utils_error_is_cancelled(error)) { + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + case REGISTRATION_STATE_UP: + case REGISTRATION_STATE_DOWNING: + /* It is not expected that we cancel anything in this state. */ + nm_assert_not_reached(); + goto out; + case REGISTRATION_STATE_UPPING: + nm_assert(self->dat.bond_ifname_curr); + /* We cancelled while upping. We need to issue another down, + * to make sure the data is gone. */ + if (!self->dat.bond_ifname_next) { + /* There is no other name to configure. We just need to down + * the current one. */ + _LOGT("reconfigure: configuration cancelled, deconfigure %s", + self->dat.bond_ifname_curr); + _nft_call(self, FALSE, self->dat.bond_ifname_curr, NULL, NULL); + self->reg_state = REGISTRATION_STATE_DOWNING; + goto out; + } + /* There is already another configuration. UPPING again. */ + _LOGT("reconfigure: configuration cancelled, configure %s", + _log_info(&strbuf, + self->dat.bond_ifname_next, + self->dat.active_members_next, + NULL)); + _nft_call(self, + TRUE, + self->dat.bond_ifname_next, + NM_MAKE_STRV(self->dat.bond_ifname_curr), + self->dat.active_members_next); + self->reg_state = REGISTRATION_STATE_UPPING; + nm_clear_g_free(&self->dat.bond_ifname_curr); + nm_clear_g_free(&self->dat.active_members_curr); + self->dat.bond_ifname_curr = g_steal_pointer(&self->dat.bond_ifname_next); + self->dat.active_members_curr = g_steal_pointer(&self->dat.active_members_next); + goto out; + } + nm_assert_not_reached(); + goto out; + } + + if (error) { + self->nft_good = FALSE; + } else { + /* Technically, if a previous downing failed, we cannot know that + * we were able to fix this bug a successful run now. That is, because + * if the interface got renamed, and the downing for the previous + * interface name failed, we leak that table and the success now doesn't + * fix that. + * + * That is a bug, but probably not severe because: + * - interfaces are not supposed to be renamed. + * - if this NFT command succeed, we expect that also the previous downings worked. + * + * The problem here is only that nft_good might lie and indicate + * no problem. However, when a downing fails, we anyway leak the table already + * and the bad thing happend. We cannot fix if `nft` command fails. + */ + self->nft_good = TRUE; + } + + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + case REGISTRATION_STATE_UP: + /* Unexpected to get a callback completion in these states. */ + nm_assert_not_reached(); + goto out; + case REGISTRATION_STATE_UPPING: + nm_assert(!self->dat.bond_ifname_next); + if (error) { + /* Unclear what to do about this error. Just log about it, nothing else. */ + _LOGW("reconfigure: nft configuration for balance-slb failed: %s", error->message); + } else + _LOGT("reconfigure: configuration completed"); + self->reg_state = REGISTRATION_STATE_UP; + goto out; + case REGISTRATION_STATE_DOWNING: + nm_assert(self->dat.bond_ifname_curr); + if (!self->dat.bond_ifname_next) { + if (error) { + /* Unclear what to do about this error. Just log about it, nothing else. */ + _LOGW("reconfigure: nft deconfiguration for balance-slb failed: %s", + error->message); + } else + _LOGT("reconfigure: deconfiguration completed"); + nm_clear_g_free(&self->dat.bond_ifname_curr); + nm_clear_g_free(&self->dat.active_members_curr); + self->reg_state = REGISTRATION_STATE_NONE; + + if (self->destroyed) { + _bond_manager_destroy(self); + return; + } + + goto out; + } + if (error) { + /* Unclear what to do about this error. Just log about it, nothing else. */ + _LOGW("reconfigure: nft deconfiguration failed before restart: %s", error->message); + } else + _LOGT("reconfigure: deconfiguration completed before restart"); + _nft_call(self, + TRUE, + self->dat.bond_ifname_next, + NM_MAKE_STRV(self->dat.bond_ifname_curr), + self->dat.active_members_next); + nm_clear_g_free(&self->dat.bond_ifname_curr); + nm_clear_g_free(&self->dat.active_members_curr); + self->dat.bond_ifname_curr = g_steal_pointer(&self->dat.bond_ifname_next); + self->dat.active_members_curr = g_steal_pointer(&self->dat.active_members_next); + self->reg_state = REGISTRATION_STATE_UPPING; + goto out; + } + + nm_assert_not_reached(); + +out: + if (self->reconfigure_check) { + if (self->destroyed) + nm_assert_not_reached(); + else if (!self->nft_in_progress) { + nm_assert(!self->reconfigure_on_idle_source); + _reconfigure_check(self, FALSE); + } + } + + _notify_state_change(self); +} + +static void +_nft_call(NMBondManager *self, + gboolean up, + const char *bond_ifname, + const char *const *bond_ifnames_down, + const char *const *active_members) +{ + gs_unref_bytes GBytes *stdin_buf = NULL; + gs_free const char *const *previous_members_strv = NULL; + + if (up) { + gs_unref_ptrarray GPtrArray *arr = NULL; + GHashTableIter iter; + const char *n; + gsize i; + + /* We need to track the active-members that we add, because, when we update the + * NFT table without the member from previously, we use "add && flush", which + * leaves empty chains for the previous members around. We need to cleanup those + * chains, hence the need to track which members we ever added. + * + * Before making an UP call, we add the newly configured active_members to the list + * of previous_members. All the while, passing a list of previous_members_strv + * which we currently no longer configure. + * + * Only when the call succeeds (in _nft_call_cb()), we will forget about previously added + * members. This is done by passing the list of members that we are forgetting now + * on to the callback below. */ + + /* Get the list of previous members that are no longer in the current + * active list. */ + g_hash_table_iter_init(&iter, self->previous_members); + while (g_hash_table_iter_next(&iter, (gpointer *) &n, NULL)) { + if (nm_strv_contains(active_members, -1, n)) + continue; + if (!arr) + arr = g_ptr_array_new(); + g_ptr_array_add(arr, (gpointer) n); + } + if (arr) { + nm_strv_sort((const char **) arr->pdata, arr->len); + previous_members_strv = nm_strv_dup_packed((const char *const *) arr->pdata, arr->len); + } + + /* The now active member also get tracked as previous members for the future. */ + if (active_members) { + for (i = 0; active_members[i]; i++) + g_hash_table_add(self->previous_members, g_strdup(active_members[i])); + } + } + + stdin_buf = nm_firewall_nft_stdio_mlag(up, + bond_ifname, + bond_ifnames_down, + active_members, + previous_members_strv); + + nm_clear_g_cancellable(&self->cancellable); + self->cancellable = g_cancellable_new(); + + nm_shutdown_wait_obj_register_cancellable(self->cancellable, "nft-mlag"); + + if (_LOGT_ENABLED()) { + if (up) { + nm_auto_str_buf NMStrBuf strbuf = + NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE); + + _LOGT("reconfigure: call nft: %s", + _log_info(&strbuf, bond_ifname, active_members, previous_members_strv)); + } else + _LOGT("reconfigure: call nft: disable on \"%s\"", bond_ifname); + } + + self->nft_in_progress = TRUE; + + if (self->reconfigure_check) + nm_clear_g_source_inst(&self->reconfigure_on_idle_source); + + nm_firewall_nft_call(stdin_buf, + self->cancellable, + _nft_call_cb, + nm_utils_user_data_pack(self, + GINT_TO_POINTER(up), + g_steal_pointer(&previous_members_strv))); +} + +/*****************************************************************************/ + +static void +_reconfigure_do(NMBondManager *self, + gboolean reapply, + const char *bond_ifname, + const char **active_members_take) +{ + nm_auto_str_buf NMStrBuf strbuf = NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE); + gs_free const char **active_members = g_steal_pointer(&active_members_take); + + _nm_assert_self(self); + nm_assert(!active_members || bond_ifname); + nm_assert(!active_members || active_members[0]); + + /* The difficulty of all of this is "state". In particular, since we make the nft call + * async, we need to handle all the possible cases, how an update event can invalidate + * a currently pending call. */ + + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + nm_assert(!self->dat.bond_ifname_curr); + nm_assert(!self->dat.active_members_curr); + nm_assert(!self->dat.bond_ifname_next); + nm_assert(!self->dat.active_members_next); + nm_assert(!self->cancellable); + nm_assert(!self->nft_in_progress); + + if (!bond_ifname) { + /* No configuration done. Nothing to do. */ + goto out; + } + + _LOGT("reconfigure: start configuring (%s)", + _log_info(&strbuf, bond_ifname, active_members, NULL)); + self->dat.bond_ifname_curr = g_strdup(bond_ifname); + self->dat.active_members_curr = nm_strv_dup_packed(active_members, -1); + _nft_call(self, TRUE, self->dat.bond_ifname_curr, NULL, self->dat.active_members_curr); + self->reg_state = REGISTRATION_STATE_UPPING; + goto out; + case REGISTRATION_STATE_UPPING: + nm_assert(self->dat.bond_ifname_curr); + nm_assert(self->nft_in_progress); + + /* We are UPPING, we cancel the pending operation and will + * handle the rest when the callback completes. */ + if (!bond_ifname) { + if (self->cancellable || self->dat.bond_ifname_next) + _LOGT("reconfigure: aborting configuring"); + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + nm_clear_g_cancellable(&self->cancellable); + goto out; + } + if (!reapply && self->cancellable && nm_streq0(bond_ifname, self->dat.bond_ifname_curr) + && nm_strv_equal(active_members, self->dat.active_members_curr)) { + /* Nothing to do. We are already upping this setup. */ + nm_assert(!self->dat.bond_ifname_next); + nm_assert(!self->dat.active_members_next); + goto out; + } + if (!reapply && !self->cancellable && nm_streq0(bond_ifname, self->dat.bond_ifname_next) + && nm_strv_equal(active_members, self->dat.active_members_next)) { + /* We already cancelled the current upping, and have scheduled another + * (identical) run. Nothing to do. */ + goto out; + } + _LOGT("reconfigure: abort configuring to configure %s", + _log_info(&strbuf, bond_ifname, active_members, NULL)); + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + self->dat.bond_ifname_next = g_strdup(bond_ifname); + self->dat.active_members_next = nm_strv_dup_packed(active_members, -1); + nm_clear_g_cancellable(&self->cancellable); + goto out; + case REGISTRATION_STATE_UP: + nm_assert(self->dat.bond_ifname_curr); + nm_assert(!self->dat.bond_ifname_next); + nm_assert(!self->dat.active_members_next); + nm_assert(!self->cancellable); + nm_assert(!self->nft_in_progress); + + if (!bond_ifname) { + _LOGT("reconfigure: deconfigure to disable"); + _nft_call(self, FALSE, self->dat.bond_ifname_curr, NULL, NULL); + self->reg_state = REGISTRATION_STATE_DOWNING; + goto out; + } + if (!reapply && nm_streq0(bond_ifname, self->dat.bond_ifname_curr) + && nm_strv_equal(active_members, self->dat.active_members_curr)) { + /* Nothing to do. The current configuration is already active. */ + goto out; + } + _LOGT("reconfigure: configure, update to %s", + _log_info(&strbuf, bond_ifname, active_members, NULL)); + _nft_call(self, + TRUE, + bond_ifname, + NM_MAKE_STRV(self->dat.bond_ifname_curr), + active_members); + self->reg_state = REGISTRATION_STATE_UPPING; + nm_clear_g_free(&self->dat.bond_ifname_curr); + nm_clear_g_free(&self->dat.active_members_curr); + self->dat.bond_ifname_curr = g_strdup(bond_ifname); + self->dat.active_members_curr = nm_strv_dup_packed(active_members, -1); + goto out; + case REGISTRATION_STATE_DOWNING: + nm_assert(self->dat.bond_ifname_curr); + nm_assert(self->nft_in_progress); + + /* we are already DOWNING. It suffices to clear the scheduled "next" + * config and wait, and reset the "next" configuration. */ + if (nm_streq0(bond_ifname, self->dat.bond_ifname_next) + && nm_strv_equal(active_members, self->dat.active_members_next)) { + /* Nothing to do. */ + goto out; + } + _LOGT("reconfigure: deconfiguring and waiting for %s", + _log_info(&strbuf, bond_ifname, active_members, NULL)); + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + if (bond_ifname) { + self->dat.bond_ifname_next = g_strdup(bond_ifname); + self->dat.active_members_next = nm_strv_dup_packed(active_members, -1); + } + goto out; + } + nm_assert_not_reached(); + +out: + _notify_state_change(self); +} + +static void +_reconfigure_check(NMBondManager *self, gboolean reapply) +{ + const NMPlatformLink *plink_ctrl; + const NMPlatformLink *plink_port; + const NMPlatformLnkBond *plnkbond_ctrl; + NMDedupMultiIter pliter; + const NMDedupMultiHeadEntry *pl_links_head_entry; + const char *active_members_lst_stack[16]; + gs_free const char **active_members_lst_heap = NULL; + const char **active_members_lst = active_members_lst_stack; + gsize active_members_alloc = G_N_ELEMENTS(active_members_lst_stack); + gsize active_members_n = 0; + gs_free const char **active_members_result = NULL; + const char *bond_ifname = NULL; + + _nm_assert_self(self); + nm_assert(!self->destroyed); + + self->reconfigure_check = FALSE; + nm_clear_g_source_inst(&self->reconfigure_on_idle_source); + + g_hash_table_remove_all(self->previous_ifindexes); + + plnkbond_ctrl = nm_platform_link_get_lnk_bond(self->platform, self->ifindex, &plink_ctrl); + + /* We only do bonding-slb MLAG handling if our ifindex is a bond with + * mode=balance-xor && xmit_hash_policy=vlan+srcmac. */ + if (!plnkbond_ctrl) + goto out; + if (!plink_ctrl) + goto out; + if (plink_ctrl->type != NM_LINK_TYPE_BOND) + goto out; + if (plnkbond_ctrl->mode != NM_BOND_MODE_XOR) + goto out; + if (plnkbond_ctrl->xmit_hash_policy != NM_BOND_XMIT_HASH_POLICY_VLAN_SRCMAC) + goto out; + + /* Find all the connected ports that are IFF_RUNNING. */ + pl_links_head_entry = nm_platform_lookup_obj_type(self->platform, NMP_OBJECT_TYPE_LINK); + nmp_cache_iter_for_each_link (&pliter, pl_links_head_entry, &plink_port) { + if (plink_port->master != self->ifindex) + continue; + if (!NM_FLAGS_HAS(plink_port->n_ifi_flags, IFF_RUNNING)) + continue; + + g_hash_table_add(self->previous_ifindexes, GINT_TO_POINTER(plink_port->ifindex)); + + if (active_members_n == active_members_alloc) { + active_members_alloc *= 2; + active_members_lst_heap = + g_renew(const char *, active_members_lst_heap, active_members_alloc); + if (active_members_lst == active_members_lst_stack) { + memcpy(active_members_lst_heap, + active_members_lst_stack, + sizeof(const char *) * active_members_n); + } + active_members_lst = active_members_lst_heap; + } + + active_members_lst[active_members_n++] = plink_port->name; + } + + if (active_members_n > 0) { + gsize i; + gsize j; + + /* We sort the active members by name */ + g_qsort_with_data(active_members_lst, + active_members_n, + sizeof(const char *), + nm_strcmp_p_with_data, + NULL); + + /* There really shouldn't be any duplicates. Nonetheless, check + * and drop them. They must be unique, because nm_firewall_nft_stdio_mlag() + * relies on that. */ + for (j = 1, i = 1; i < active_members_n; i++) { + if (nm_streq(active_members_lst[j - 1], active_members_lst[i])) { + /* Repeated. Skip. */ + continue; + } + if (j != i) + active_members_lst[j] = active_members_lst[i]; + j++; + } + active_members_n = j; + + active_members_result = g_new(const char *, active_members_n + 1u); + j = 0; + + if (self->dat.active_members_curr) { + /* We configured a list earlier. We want to preserve the sort order + * from before. Prefer entries that we already had, in their previous + * order. */ + for (i = 0; self->dat.active_members_curr[i]; i++) { + gssize idx; + + /* We cannot use binary search, because we steal the elements we found + * already. Hence this is O(n^2). We could use binary search if we would + * not modify active_members_lst, but then we would need to remember + * somehow which elements are already consumed. */ + idx = nm_strv_find_first(active_members_lst, + active_members_n, + self->dat.active_members_curr[i]); + if (idx >= 0) + active_members_result[j++] = g_steal_pointer(&active_members_lst[idx]); + } + } + + /* append the remaining entries, which are sorted by name. */ + for (i = 0; i < active_members_n; i++) { + if (active_members_lst[i]) + active_members_result[j++] = active_members_lst[i]; + } + + nm_assert(j == active_members_n); + active_members_result[j] = NULL; + } + + bond_ifname = plink_ctrl->name; + +out: + _reconfigure_do(self, reapply, bond_ifname, g_steal_pointer(&active_members_result)); +} + +static gboolean +_reconfigure_check_on_idle_cb(gpointer user_data) +{ + NMBondManager *self = user_data; + + nm_assert(!self->nft_in_progress); + _reconfigure_check(self, FALSE); + return G_SOURCE_CONTINUE; +} + +/*****************************************************************************/ + +static void +_link_changed_cb(NMPlatform *platform, + int obj_type_i, + int ifindex, + const NMPlatformLink *plink, + int change_type_i, + NMBondManager *self) +{ + if (self->reconfigure_check) { + /* Recheck already scheduled. */ + return; + } + + if (self->destroyed) { + /* We should not get another event at this point. Anyway, ignore. */ + return; + } + + if (ifindex == self->ifindex) + goto schedule; + + if (plink->master == self->ifindex) + goto schedule; + + if (g_hash_table_contains(self->previous_ifindexes, GINT_TO_POINTER(ifindex))) + goto schedule; + + /* This event is not relevant. Skip. */ + return; + +schedule: + self->reconfigure_check = TRUE; + if (!self->nft_in_progress) { + self->reconfigure_on_idle_source = + nm_g_idle_add_source(_reconfigure_check_on_idle_cb, self); + } +} + +/*****************************************************************************/ + +void +nm_bond_manager_reapply(NMBondManager *self) +{ + _reconfigure_check(self, TRUE); +} + +/*****************************************************************************/ + +int +nm_bond_manager_get_ifindex(NMBondManager *self) +{ + nm_assert(NM_IS_BOND_MANAGER(self)); + + return self->ifindex; +} + +const char * +nm_bond_manager_get_connection_uuid(NMBondManager *self) +{ + nm_assert(NM_IS_BOND_MANAGER(self)); + + return self->connection_uuid; +} + +NMOptionBool +nm_bond_manager_get_state(NMBondManager *self) +{ + nm_assert(NM_IS_BOND_MANAGER(self)); + + return self->state; +} + +/*****************************************************************************/ + +NMBondManager * +nm_bond_manager_new(struct _NMPlatform *platform, + int ifindex, + const char *connection_uuid, + NMBondManagerCallback callback, + gpointer user_data) +{ + NMBondManager *self; + + nm_assert(NM_IS_PLATFORM(platform)); + nm_assert(ifindex > 0); + + self = g_slice_new(NMBondManager); + *self = (NMBondManager){ + .platform = g_object_ref(platform), + .ifindex = ifindex, + .reg_state = REGISTRATION_STATE_NONE, + .destroyed = FALSE, + .nft_good = TRUE, + .callback = callback, + .user_data = user_data, + .previous_ifindexes = g_hash_table_new(nm_direct_hash, NULL), + .previous_members = g_hash_table_new_full(nm_str_hash, g_str_equal, g_free, NULL), + .connection_uuid = g_strdup(connection_uuid), + .state = NM_OPTION_BOOL_DEFAULT, + }; + + self->link_changed_id = g_signal_connect(self->platform, + NM_PLATFORM_SIGNAL_LINK_CHANGED, + G_CALLBACK(_link_changed_cb), + self); + + _LOGT("new balance-slb (MLAG) manager for interface %d", self->ifindex); + + _reconfigure_check(self, TRUE); + + return self; +} + +void +nm_bond_manager_destroy(NMBondManager *self) +{ + g_return_if_fail(self); + g_return_if_fail(!self->destroyed); + + self->destroyed = TRUE; + + self->callback = NULL; + self->user_data = NULL; + + nm_clear_g_signal_handler(self->platform, &self->link_changed_id); + + nm_clear_g_source_inst(&self->reconfigure_on_idle_source); + self->reconfigure_check = FALSE; + + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + + switch (self->reg_state) { + case REGISTRATION_STATE_NONE: + break; + case REGISTRATION_STATE_UPPING: + /* We still have some nfts registered. We need to wrap them up. */ + _LOGT("destroying but deconfigure pending configuration first"); + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + nm_clear_g_cancellable(&self->cancellable); + return; + case REGISTRATION_STATE_UP: + _LOGT("destroying but deconfigure first"); + _nft_call(self, FALSE, self->dat.bond_ifname_curr, NULL, NULL); + self->reg_state = REGISTRATION_STATE_DOWNING; + return; + case REGISTRATION_STATE_DOWNING: + _LOGT("destroying but wait for deconfiguring"); + return; + } + + _bond_manager_destroy(self); +} + +static void +_bond_manager_destroy(NMBondManager *self) +{ + _LOGT("destroyed"); + + nm_assert(self); + nm_assert(self->destroyed); + nm_assert(self->reg_state == REGISTRATION_STATE_NONE); + nm_assert(self->link_changed_id == 0); + nm_assert(!self->cancellable); + nm_assert(!self->dat.bond_ifname_curr); + nm_assert(!self->dat.active_members_curr); + nm_assert(!self->reconfigure_on_idle_source); + + nm_clear_g_free(&self->dat.bond_ifname_next); + nm_clear_g_free(&self->dat.active_members_next); + + g_object_unref(self->platform); + g_hash_table_unref(self->previous_ifindexes); + g_hash_table_unref(self->previous_members); + g_free(self->connection_uuid); + nm_g_slice_free(self); +} diff --git a/src/core/nm-bond-manager.h b/src/core/nm-bond-manager.h new file mode 100644 index 0000000000..92a89f0b92 --- /dev/null +++ b/src/core/nm-bond-manager.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#ifndef __NM_BOND_MANAGER_H__ +#define __NM_BOND_MANAGER_H__ + +typedef struct _NMBondManager NMBondManager; + +struct _NMPlatform; + +typedef enum { + NM_BOND_MANAGER_EVENT_TYPE_STATE, +} NMBondManagerEventType; + +typedef void (*NMBondManagerCallback)(NMBondManager *self, + NMBondManagerEventType event_type, + gpointer user_data); + +NMBondManager *nm_bond_manager_new(struct _NMPlatform *platform, + int ifindex, + const char *connection_uuid, + NMBondManagerCallback callback, + gpointer user_data); + +void nm_bond_manager_reapply(NMBondManager *self); + +void nm_bond_manager_destroy(NMBondManager *self); + +int nm_bond_manager_get_ifindex(NMBondManager *self); +const char *nm_bond_manager_get_connection_uuid(NMBondManager *self); +NMOptionBool nm_bond_manager_get_state(NMBondManager *self); + +#endif /* __NM_BOND_MANAGER_H__ */ diff --git a/src/core/nm-firewall-utils.c b/src/core/nm-firewall-utils.c index 92c9fd814e..7b5d2f47b6 100644 --- a/src/core/nm-firewall-utils.c +++ b/src/core/nm-firewall-utils.c @@ -39,6 +39,71 @@ static const struct { /*****************************************************************************/ +static const char * +_nft_ifname_valid(const char *str) +{ + gsize i; + + /* `nft -f -` takes certain strings, like "device $IFNAME", but + * those strings are from a limited character set. Check that + * @str is valid according to those rules. + * + * src/scanner.l: + * digit [0-9] + * letter [a-zA-Z] + * string ({letter}|[_.])({letter}|{digit}|[/\-_\.])* + **/ + + if (!str || !str[0]) + return NULL; + + for (i = 0; str[i]; i++) { + switch (str[i]) { + case 'a' ... 'z': + case 'A' ... 'Z': + case '_': + case '.': + continue; + case '0' ... '9': + case '/': + case '-': + if (i == 0) + return NULL; + continue; + default: + return NULL; + } + } + if (i >= NMP_IFNAMSIZ) + return NULL; + + return str; +} + +static const char * +_strbuf_set_sanitized(NMStrBuf *strbuf, const char *prefix, const char *str_to_sanitize) +{ + nm_str_buf_reset(strbuf); + + if (prefix) + nm_str_buf_append(strbuf, prefix); + + for (; str_to_sanitize[0] != '\0'; str_to_sanitize++) { + const char ch = str_to_sanitize[0]; + + if (g_ascii_isalpha(ch) || g_ascii_isdigit(ch)) { + nm_str_buf_append_c(strbuf, ch); + continue; + } + nm_str_buf_append_c(strbuf, '_'); + nm_str_buf_append_c_hex(strbuf, ch, FALSE); + } + + return nm_str_buf_get_str(strbuf); +} + +/*****************************************************************************/ + #define _SHARE_IPTABLES_SUBNET_TO_STR_LEN (INET_ADDRSTRLEN + 1 + 2 + 1) static const char * @@ -701,6 +766,189 @@ _fw_nft_set_shared_construct(gboolean up, const char *ip_iface, in_addr_t addr, /*****************************************************************************/ +GBytes * +nm_firewall_nft_stdio_mlag(gboolean up, + const char *bond_ifname, + const char *const *bond_ifnames_down, + const char *const *active_members, + const char *const *previous_members) +{ + nm_auto_str_buf NMStrBuf strbuf_table_name = + NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_32, FALSE); + nm_auto_str_buf NMStrBuf strbuf = NM_STR_BUF_INIT(NM_UTILS_GET_NEXT_REALLOC_SIZE_1000, FALSE); + const char *table_name; + gsize i; + + if (NM_MORE_ASSERTS > 10 && active_members) { + /* No duplicates. We make certain assumptions here, and we don't + * want to check that there are no duplicates. The caller must take + * care of this. */ + for (i = 0; active_members[i]; i++) + nm_assert(!nm_strv_contains(&active_members[i + 1], -1, active_members[i])); + } + + /* If an interface gets renamed, we need to update the nft tables. Since one nft + * invocation is atomic, it is reasonable to drop the previous tables(s) at the + * same time when creating the new one. */ + for (; bond_ifnames_down && bond_ifnames_down[0]; bond_ifnames_down++) { + if (nm_streq(bond_ifname, bond_ifnames_down[0])) + continue; + table_name = _strbuf_set_sanitized(&strbuf_table_name, "nm-mlag-", bond_ifnames_down[0]); + _fw_nft_append_cmd_table(&strbuf, "netdev", table_name, FALSE); + } + + table_name = _strbuf_set_sanitized(&strbuf_table_name, "nm-mlag-", bond_ifname); + + _fw_nft_append_cmd_table(&strbuf, "netdev", table_name, up); + + if (up) { + nm_auto_str_buf NMStrBuf strbuf_1 = + NM_STR_BUF_INIT_A(NM_UTILS_GET_NEXT_REALLOC_SIZE_232, FALSE); + const gsize n_active_members = NM_PTRARRAY_LEN(active_members); + + if (!_nft_ifname_valid(bond_ifname)) { + /* We cannot meaningfully express this interface name. Ignore all chains + * and only create an empty table. */ + goto out; + } + + for (; previous_members && previous_members[0]; previous_members++) { + const char *previous_member = previous_members[0]; + const char *chain_name; + + /* The caller already ensures that the previous member is not part of the new + * active members. Avoid the overhead of checking, and assert against that. */ + nm_assert(!nm_strv_contains(active_members, n_active_members, previous_member)); + + if (!_nft_ifname_valid(previous_member)) + continue; + + chain_name = _strbuf_set_sanitized(&strbuf_1, "rx-drop-bc-mc-", previous_member); + + /* We want atomically update our table, however, we don't want to delete + * and recreate it, because then the sets get lost (which we don't want). + * + * Instead, we only "add && flush" the table, which removes all rules from + * the chain. However, as our active-members change, we want to delete + * the obsolete chains too. + * + * nft has no way to delete all chains in a table, we have to name + * them one by one. So we keep track of active members that we had + * in the past, and which are now no longer in use. For those previous + * members we delete the chains (again, with the "add && delete" dance + * to avoid failure deleting a non-existing chain (in case our tracking + * is wrong or somebody else modified the table in the meantime). + * + * We need to track the previous members, because we don't want to first + * ask nft which chains exist. Doing that would be cumbersome as we would + * have to do one async program invocation and parse stdout. */ + _append(&strbuf, + "add chain netdev %s %s {" + " type filter hook ingress device %s priority filter; " + "}", + table_name, + chain_name, + previous_member); + _append(&strbuf, "delete chain netdev %s %s", table_name, chain_name); + } + + /* OVS SLB rule 1 + * + * "Open vSwitch avoids packet duplication by accepting multicast and broadcast + * packets on only the active member, and dropping multicast and broadcast + * packets on all other members." + * + * primary is first member, we drop on all others */ + for (i = 0; i < n_active_members; i++) { + const char *active_member = active_members[i]; + const char *chain_name; + + if (!_nft_ifname_valid(active_member)) + continue; + + chain_name = _strbuf_set_sanitized(&strbuf_1, "rx-drop-bc-mc-", active_member); + + _append(&strbuf, + "add chain netdev %s %s {" + " type filter hook ingress device %s priority filter; " + "}", + table_name, + chain_name, + active_member); + + if (i == 0) { + _append(&strbuf, "delete chain netdev %s %s", table_name, chain_name); + continue; + } + + _append(&strbuf, + "add rule netdev %s %s pkttype {" + " broadcast, multicast " + "} counter drop", + table_name, + chain_name); + } + + /* OVS SLB rule 2 + * + * "Open vSwitch deals with this case by dropping packets received on any SLB + * bonded link that have a source MAC+VLAN that has been learned on any other + * port." + */ + _append(&strbuf, + "add set netdev %s macset-tagged {" + " typeof ether saddr . vlan id; flags timeout; " + "}", + table_name); + _append(&strbuf, + "add set netdev %s macset-untagged {" + " typeof ether saddr; flags timeout;" + "}", + table_name); + + _append(&strbuf, + "add chain netdev %s tx-snoop-source-mac {" + " type filter hook egress device %s priority filter; " + "}", + table_name, + bond_ifname); + _append(&strbuf, + "add rule netdev %s tx-snoop-source-mac set update ether saddr . vlan id" + " timeout 5s @macset-tagged counter return" + "", /* tagged */ + table_name); + _append(&strbuf, + "add rule netdev %s tx-snoop-source-mac set update ether saddr" + " timeout 5s @macset-untagged counter" + "", /* untagged*/ + table_name); + + _append(&strbuf, + "add chain netdev %s rx-drop-looped-packets {" + " type filter hook ingress device %s priority filter; " + "}", + table_name, + bond_ifname); + _append(&strbuf, + "add rule netdev %s rx-drop-looped-packets ether saddr . vlan id" + " @macset-tagged counter drop", + table_name); + _append(&strbuf, + "add rule netdev %s rx-drop-looped-packets ether type vlan counter return" + "", /* avoid looking up tagged packets in untagged table */ + table_name); + _append(&strbuf, + "add rule netdev %s rx-drop-looped-packets ether saddr @macset-untagged" + " counter drop", + table_name); + } + +out: + return nm_str_buf_finalize_to_gbytes(&strbuf); +} + +/*****************************************************************************/ + struct _NMFirewallConfig { char *ip_iface; in_addr_t addr; diff --git a/src/core/nm-firewall-utils.h b/src/core/nm-firewall-utils.h index 9d883fea7b..ca138ccf78 100644 --- a/src/core/nm-firewall-utils.h +++ b/src/core/nm-firewall-utils.h @@ -35,4 +35,10 @@ void nm_firewall_nft_call(GBytes *stdin_buf, gboolean nm_firewall_nft_call_finish(GAsyncResult *result, GError **error); +GBytes *nm_firewall_nft_stdio_mlag(gboolean up, + const char *bond_ifname, + const char *const *bond_ifnames_down, + const char *const *active_members, + const char *const *previous_members); + #endif /* __NM_FIREWALL_UTILS_H__ */ diff --git a/src/libnm-core-impl/nm-setting-bond.c b/src/libnm-core-impl/nm-setting-bond.c index a7f64393b9..b03cc455a7 100644 --- a/src/libnm-core-impl/nm-setting-bond.c +++ b/src/libnm-core-impl/nm-setting-bond.c @@ -70,6 +70,7 @@ static const char *const valid_options_lst[] = { NM_SETTING_BOND_OPTION_ARP_INTERVAL, NM_SETTING_BOND_OPTION_ARP_IP_TARGET, NM_SETTING_BOND_OPTION_ARP_VALIDATE, + NM_SETTING_BOND_OPTION_BALANCE_SLB, NM_SETTING_BOND_OPTION_PRIMARY, NM_SETTING_BOND_OPTION_PRIMARY_RESELECT, NM_SETTING_BOND_OPTION_FAIL_OVER_MAC, @@ -195,6 +196,7 @@ static NM_UTILS_STRING_TABLE_LOOKUP_STRUCT_DEFINE( {NM_SETTING_BOND_OPTION_ARP_IP_TARGET, {"", NM_BOND_OPTION_TYPE_IP}}, {NM_SETTING_BOND_OPTION_ARP_VALIDATE, {"none", NM_BOND_OPTION_TYPE_BOTH, 0, 6, _option_default_strv_arp_validate}}, + {NM_SETTING_BOND_OPTION_BALANCE_SLB, {"0", NM_BOND_OPTION_TYPE_INT, 0, 1}}, {NM_SETTING_BOND_OPTION_DOWNDELAY, {"0", NM_BOND_OPTION_TYPE_INT, 0, G_MAXINT}}, {NM_SETTING_BOND_OPTION_FAIL_OVER_MAC, {"none", NM_BOND_OPTION_TYPE_BOTH, 0, 2, _option_default_strv_fail_over_mac}}, @@ -344,6 +346,17 @@ _bond_get_option_normalized(NMSettingBond *self, const char *option, gboolean ge value = _bond_get_option(self, NM_SETTING_BOND_OPTION_PRIMARY); if (!value) value = _bond_get_option(self, NM_SETTING_BOND_OPTION_ACTIVE_SLAVE); + } else if (nm_streq(option, NM_SETTING_BOND_OPTION_XMIT_HASH_POLICY)) { + if (_nm_utils_ascii_str_to_int64( + _bond_get_option(self, NM_SETTING_BOND_OPTION_BALANCE_SLB), + 10, + 0, + 1, + -1) + == 1) { + /* balance-slb implies vlan+srcmac */ + return "5"; + } } else value = _bond_get_option(self, option); @@ -840,6 +853,7 @@ verify(NMSetting *setting, NMConnection *connection, GError **error) const char *arp_ip_target = NULL; const char *lacp_rate; const char *primary; + const char *s; NMBondMode bond_mode; guint i; const NMUtilsNamedValue *n; @@ -1067,6 +1081,32 @@ verify(NMSetting *setting, NMConnection *connection, GError **error) return FALSE; } + s = _bond_get_option(self, NM_SETTING_BOND_OPTION_BALANCE_SLB); + if (s && _atoi(s) > 0) { + if (bond_mode != NM_BOND_MODE_XOR) { + g_set_error(error, + NM_CONNECTION_ERROR, + NM_CONNECTION_ERROR_INVALID_PROPERTY, + _("%s requires bond mode \"%s\""), + NM_SETTING_BOND_OPTION_BALANCE_SLB, + "balance-xor"); + g_prefix_error(error, "%s.%s: ", NM_SETTING_BOND_SETTING_NAME, NM_SETTING_BOND_OPTIONS); + return FALSE; + } + s = _bond_get_option(self, NM_SETTING_BOND_OPTION_XMIT_HASH_POLICY); + if (s + && _nm_setting_bond_xmit_hash_policy_from_string(s) + != NM_BOND_XMIT_HASH_POLICY_VLAN_SRCMAC) { + g_set_error(error, + NM_CONNECTION_ERROR, + NM_CONNECTION_ERROR_INVALID_PROPERTY, + _("%s requires xmit_hash_policy \"vlan+srcmac\""), + NM_SETTING_BOND_OPTION_BALANCE_SLB); + g_prefix_error(error, "%s.%s: ", NM_SETTING_BOND_SETTING_NAME, NM_SETTING_BOND_OPTIONS); + return FALSE; + } + } + if (!_nm_connection_verify_required_interface_name(connection, error)) return FALSE; diff --git a/src/libnm-core-public/nm-setting-bond.h b/src/libnm-core-public/nm-setting-bond.h index ed44abbff1..10d703bcaa 100644 --- a/src/libnm-core-public/nm-setting-bond.h +++ b/src/libnm-core-public/nm-setting-bond.h @@ -37,6 +37,7 @@ G_BEGIN_DECLS #define NM_SETTING_BOND_OPTION_ARP_IP_TARGET "arp_ip_target" #define NM_SETTING_BOND_OPTION_ARP_VALIDATE "arp_validate" #define NM_SETTING_BOND_OPTION_PRIMARY "primary" +#define NM_SETTING_BOND_OPTION_BALANCE_SLB "balance-slb" #define NM_SETTING_BOND_OPTION_PRIMARY_RESELECT "primary_reselect" #define NM_SETTING_BOND_OPTION_FAIL_OVER_MAC "fail_over_mac" #define NM_SETTING_BOND_OPTION_USE_CARRIER "use_carrier" |