summaryrefslogtreecommitdiff
path: root/lib/dpif-netlink.c
diff options
context:
space:
mode:
authorMichael Santana <msantana@redhat.com>2022-08-09 03:18:15 -0400
committerIlya Maximets <i.maximets@ovn.org>2022-08-15 19:44:59 +0200
commitf68ae52392ddff704588e376b9d58d1ef5b7e49b (patch)
tree7d99a144f2a216c8aa3979023e8adfe905c87aff /lib/dpif-netlink.c
parent3cbadc0b7427cc690d43ff41a69ae2cdd9bff2b1 (diff)
downloadopenvswitch-f68ae52392ddff704588e376b9d58d1ef5b7e49b.tar.gz
handlers: Fix handlers mapping.
The handler and CPU mapping in upcalls are incorrect, and this is specially noticeable systems with cpu isolation enabled. Say we have a 12 core system where only every even number CPU is enabled C0, C2, C4, C6, C8, C10 This means we will create an array of size 6 that will be sent to kernel that is populated with sockets [S0, S1, S2, S3, S4, S5] The problem is when the kernel does an upcall it checks the socket array via the index of the CPU, effectively adding additional load on some CPUs while leaving no work on other CPUs. e.g. C0 indexes to S0 C2 indexes to S2 (should be S1) C4 indexes to S4 (should be S2) Modulo of 6 (size of socket array) is applied, so we wrap back to S0 C6 indexes to S0 (should be S3) C8 indexes to S2 (should be S4) C10 indexes to S4 (should be S5) Effectively sockets S0, S2, S4 get overloaded while sockets S1, S3, S5 get no work assigned to them This leads to the kernel to throw the following message: "openvswitch: cpu_id mismatch with handler threads" Instead we will send the kernel a corrected array of sockets the size of all CPUs in the system, or the largest core_id on the system, which ever one is greatest. This is to take care of systems with non-continous core cpus. In the above example we would create a corrected array in a round-robin(assuming prime bias) fashion as follows: [S0, S1, S2, S3, S4, S5, S6, S0, S1, S2, S3, S4] Fixes: b1e517bd2f81 ("dpif-netlink: Introduce per-cpu upcall dispatch.") Co-authored-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Michael Santana <msantana@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Diffstat (limited to 'lib/dpif-netlink.c')
-rw-r--r--lib/dpif-netlink.c34
1 files changed, 27 insertions, 7 deletions
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 08b13f7e8..537d3951a 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -791,14 +791,28 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
uint32_t n_upcall_pids)
{
struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
+ int largest_cpu_id = ovs_numa_get_largest_core_id();
struct dpif_netlink_dp request, reply;
struct ofpbuf *bufp;
- int error;
- int n_cores;
- n_cores = count_cpu_cores();
- ovs_assert(n_cores == n_upcall_pids);
- VLOG_DBG("Dispatch mode(per-cpu): Number of CPUs is %d", n_cores);
+ uint32_t *corrected;
+ int error, i, n_cores;
+
+ if (largest_cpu_id == OVS_NUMA_UNSPEC) {
+ largest_cpu_id = -1;
+ }
+
+ /* Some systems have non-continuous cpu core ids. count_total_cores()
+ * would return an accurate number, however, this number cannot be used.
+ * e.g. If the largest core_id of a system is cpu9, but the system only
+ * has 4 cpus then the OVS kernel module would throw a "CPU mismatch"
+ * warning. With the MAX() in place in this example we send an array of
+ * size 10 and prevent the warning. This has no bearing on the number of
+ * threads created.
+ */
+ n_cores = MAX(count_total_cores(), largest_cpu_id + 1);
+ VLOG_DBG("Dispatch mode(per-cpu): Setting up handler PIDs for %d cores",
+ n_cores);
dpif_netlink_dp_init(&request);
request.cmd = OVS_DP_CMD_SET;
@@ -807,7 +821,12 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
request.user_features = dpif->user_features |
OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
- request.upcall_pids = upcall_pids;
+ corrected = xcalloc(n_cores, sizeof *corrected);
+
+ for (i = 0; i < n_cores; i++) {
+ corrected[i] = upcall_pids[i % n_upcall_pids];
+ }
+ request.upcall_pids = corrected;
request.n_upcall_pids = n_cores;
error = dpif_netlink_dp_transact(&request, &reply, &bufp);
@@ -815,9 +834,10 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
dpif->user_features = reply.user_features;
ofpbuf_delete(bufp);
if (!dpif_netlink_upcall_per_cpu(dpif)) {
- return -EOPNOTSUPP;
+ error = -EOPNOTSUPP;
}
}
+ free(corrected);
return error;
}