summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Maximets <i.maximets@ovn.org>2022-07-29 22:38:36 +0200
committerIlya Maximets <i.maximets@ovn.org>2022-08-08 19:15:35 +0200
commitd76f81f41607cb8d2b76e5d3044a3a3dafd8138a (patch)
tree3efec4120bf9c9832251f4d2c120cd68e53d4d77
parenta037702f83ac03cd6b8a5c1c888dbbd2ace19485 (diff)
downloadopenvswitch-d76f81f41607cb8d2b76e5d3044a3a3dafd8138a.tar.gz
netdev-offload-tc: Disable offload of IPv6 fragments.
OVS kernel datapath and TC are parsing IPv6 fragments differently. For IPv6 later fragments, according to the original design [1], OVS always sets nw_proto=44 (IPPROTO_FRAGMENT), regardless of the type of the L4 protocol. This leads to situation where flow for nw_proto=44 gets installed to TC, but packets can not match on it, causing all IPv6 later fragments to go to userspace significantly degrading performance. Disabling offload for such packets, so the flow can be installed to the OVS kernel datapath instead. Disabling for all IPv6 fragments including the first one, because it doesn't make a lot of sense to handle them separately. It may also cause potential problems with conntrack trying to re-assemble a packet from fragments handled by different datapaths (first in HW, later in OVS kernel). Checking both 'nw_proto' and 'nw_frag' as classifier might decide to match only on one of them and also nw_proto will not be 44 for the first fragment. The issue was hidden for some time due to incorrect behavior of the OVS kernel datapath that was recently fixed in kernel commit: 12378a5a75e3 ("net: openvswitch: fix parsing of nw_proto for IPv6 fragments") To allow offloading in the future either flow dissector in TC should be changed to parse packets in the same way as OVS does, or parsing in OVS kernel and userspace should be made configurable, so users can opt-in to the behavior change. Silent change of the behavior (change by default) is not an option, because existing OpenFlow pipelines may depend on a certain behavior. [1] https://docs.openvswitch.org/en/latest/topics/design/#fragments Fixes: 83e866067ea6 ("netdev-tc-offloads: Add support for IP fragmentation") Reviewed-by: Roi Dayan <roid@nvidia.com> Reviewed-by: Marcelo Ricardo Leitner <mleitner@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
-rw-r--r--lib/netdev-offload-tc.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 30032e836..9a5d10d87 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -1260,6 +1260,21 @@ parse_put_flow_set_action(struct tc_flower *flower, struct tc_action *action,
return 0;
}
+static bool
+is_ipv6_fragment_and_masked(const struct flow *key, const struct flow *mask)
+{
+ if (key->dl_type != htons(ETH_P_IPV6)) {
+ return false;
+ }
+ if (mask->nw_proto && key->nw_proto == IPPROTO_FRAGMENT) {
+ return true;
+ }
+ if (key->nw_frag & (mask->nw_frag & FLOW_NW_FRAG_ANY)) {
+ return true;
+ }
+ return false;
+}
+
static int
test_key_and_mask(struct match *match)
{
@@ -1366,6 +1381,11 @@ test_key_and_mask(struct match *match)
return EOPNOTSUPP;
}
+ if (is_ipv6_fragment_and_masked(key, mask)) {
+ VLOG_DBG_RL(&rl, "offloading of IPv6 fragments isn't supported");
+ return EOPNOTSUPP;
+ }
+
if (!is_all_zeros(mask, sizeof *mask)) {
VLOG_DBG_RL(&rl, "offloading isn't supported, unknown attribute");
return EOPNOTSUPP;
@@ -1640,7 +1660,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
memset(&mask->arp_tha, 0, sizeof mask->arp_tha);
}
- if (is_ip_any(key)) {
+ if (is_ip_any(key) && !is_ipv6_fragment_and_masked(key, mask)) {
flower.key.ip_proto = key->nw_proto;
flower.mask.ip_proto = mask->nw_proto;
mask->nw_proto = 0;