summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Maximets <i.maximets@ovn.org>2022-09-13 21:08:52 +0200
committerIlya Maximets <i.maximets@ovn.org>2022-09-16 13:51:23 +0200
commit914e870d9ff79bd0b2e831ba673f84f071caaccb (patch)
tree0643c9a9e90a23c9b4e81f88011f8cb03ac2b319
parentdcedc5cff6620d5529d09a66b8ea850fa5154daa (diff)
downloadopenvswitch-914e870d9ff79bd0b2e831ba673f84f071caaccb.tar.gz
bond: Avoid deadlock while updating post recirculation rules.
If the PACKET_OUT from controller ends up with sending packet to a bond interface, the main thread will take locks in the following order: handle_openflow --> take ofproto_mutex handle_packet_out packet_xlate output_normal bond_update_post_recirc_rules --> take rwlock in bond.c If at the same time revalidator thread is processing other packet with the output to the same bond: xlate_actions output_normal bond_update_post_recirc_rules --> take rwlock in bond.c update_recirc_rules ofproto_dpif_add_internal_flow ofproto_flow_mod --> take ofproto_mutex So, it is possible for these 2 threads to lock each other by taking one lock and waiting for another thread to release the second lock. It is also possible for the main thread to lock itself up by trying to acquire ofproto_mutex for the second time, if it will actually proceed with update_recirc_rules() after taking the bond rwlock. The problem appears to be that bond_update_post_recirc_rules() is called during the flow translation even if side effects are prohibited, which is the case for openflow PACKET_OUT handling. Skipping actual flow updates during the flow translation if side effects are disabled to avoid the deadlock. Since flows are not installed now when actions translated for very first packet, installing initial flows in bond_reconfigure(). This will cover the case of allocating a new recirc_id. Also checking if we need to update flows in bond_run() to cover link state changes. Regression test is added to catch the double lock case. Reported-at: https://github.com/openvswitch/ovs-issues/issues/259 Reported-by: Daniel Ding <zhihui.ding@easystack.cn> Fixes: adcf00ba35a0 ("ofproto/bond: Implement bond megaflow using recirculation") Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
-rw-r--r--ofproto/bond.c31
-rw-r--r--ofproto/bond.h3
-rw-r--r--ofproto/ofproto-dpif-xlate.c15
-rw-r--r--tests/ofproto-dpif.at83
4 files changed, 128 insertions, 4 deletions
diff --git a/ofproto/bond.c b/ofproto/bond.c
index c5585b7d4..985696cc0 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -186,10 +186,14 @@ static struct bond_slave *choose_output_slave(const struct bond *,
uint16_t vlan)
OVS_REQ_RDLOCK(rwlock);
static void update_recirc_rules__(struct bond *);
+static bool bond_may_recirc(const struct bond *);
+static void bond_update_post_recirc_rules__(struct bond *, bool force)
+ OVS_REQ_WRLOCK(rwlock);
static bool bond_is_falling_back_to_ab(const struct bond *);
static void bond_add_lb_output_buckets(const struct bond *);
static void bond_del_lb_output_buckets(const struct bond *);
+
/* Attempts to parse 's' as the name of a bond balancing mode. If successful,
* stores the mode in '*balance' and returns true. Otherwise returns false
* without modifying '*balance'. */
@@ -511,6 +515,12 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
bond_entry_reset(bond);
}
+ if (bond->ofproto->backer->rt_support.odp.recirc
+ && bond_may_recirc(bond)) {
+ /* Update rules to reflect possible recirc_id changes. */
+ update_recirc_rules(bond);
+ }
+
ovs_rwlock_unlock(&rwlock);
return revalidate;
}
@@ -719,6 +729,12 @@ bond_run(struct bond *bond, enum lacp_status lacp_status)
bond_choose_active_slave(bond);
}
+ if (bond->ofproto->backer->rt_support.odp.recirc
+ && bond_may_recirc(bond)) {
+ /* Update rules to reflect possible link state changes. */
+ bond_update_post_recirc_rules__(bond, false);
+ }
+
revalidate = bond->bond_revalidate;
bond->bond_revalidate = false;
ovs_rwlock_unlock(&rwlock);
@@ -1032,7 +1048,7 @@ bond_may_recirc(const struct bond *bond)
}
static void
-bond_update_post_recirc_rules__(struct bond* bond, const bool force)
+bond_update_post_recirc_rules__(struct bond* bond, bool force)
OVS_REQ_WRLOCK(rwlock)
{
struct bond_entry *e;
@@ -1080,6 +1096,19 @@ bond_update_post_recirc_rules(struct bond *bond, uint32_t *recirc_id,
}
}
+void
+bond_get_recirc_id_and_hash_basis(struct bond *bond, uint32_t *recirc_id,
+ uint32_t *hash_basis)
+{
+ ovs_rwlock_rdlock(&rwlock);
+ if (bond_may_recirc(bond)) {
+ *recirc_id = bond->recirc_id;
+ *hash_basis = bond->basis;
+ } else {
+ *recirc_id = *hash_basis = 0;
+ }
+ ovs_rwlock_unlock(&rwlock);
+}
/* Rebalancing. */
diff --git a/ofproto/bond.h b/ofproto/bond.h
index ecb90919c..085c303e4 100644
--- a/ofproto/bond.h
+++ b/ofproto/bond.h
@@ -127,6 +127,9 @@ void bond_rebalance(struct bond *);
void bond_update_post_recirc_rules(struct bond *, uint32_t *recirc_id,
uint32_t *hash_basis);
+void bond_get_recirc_id_and_hash_basis(struct bond *, uint32_t *recirc_id,
+ uint32_t *hash_basis);
+
bool bond_use_lb_output_action(const struct bond *bond);
#endif /* bond.h */
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 379ae623c..92bce50d3 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -2466,9 +2466,18 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
/* In case recirculation is not actually in use, 'xr.recirc_id'
* will be set to '0', since a valid 'recirc_id' can
* not be zero. */
- bond_update_post_recirc_rules(out_xbundle->bond,
- &xr.recirc_id,
- &xr.hash_basis);
+ if (ctx->xin->allow_side_effects) {
+ bond_update_post_recirc_rules(out_xbundle->bond,
+ &xr.recirc_id,
+ &xr.hash_basis);
+ } else {
+ /* If side effects are not allowed, only getting the bond
+ * configuration. Rule updates will be handled by the
+ * main thread later. */
+ bond_get_recirc_id_and_hash_basis(out_xbundle->bond,
+ &xr.recirc_id,
+ &xr.hash_basis);
+ }
if (xr.recirc_id) {
/* Use recirculation instead of output. */
use_recirc = true;
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 7836743dc..d874dfd6f 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -342,6 +342,22 @@ AT_CHECK([test `egrep 'in_port\(6\)' br1_flows.txt |wc -l` -gt 3])
OVS_VSWITCHD_STOP
AT_CLEANUP
+# SEND_TCP_BOND_PKTS([p_name], [p_ofport], [packet_len])
+#
+# Sends 256 packets to port 'p_name' with different TCP destination ports.
+m4_define([SEND_TCP_BOND_PKTS],
+ [
+ len_cmd=""
+ if test -n "$3"; then
+ len_cmd=" --len $3"
+ fi
+ for i in `seq 0 255`; do
+ pkt="in_port($2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(ack)"
+ ovs-appctl netdev-dummy/receive $1 $pkt$len_cmd
+ done
+ ]
+)
+
AT_SETUP([ofproto-dpif - balance-tcp bonding])
# Create br0 with interfaces bond0(p1, p2, p3) and p7,
# and br1 with interfaces bond1(p4, p5, p6) and p8.
@@ -423,6 +439,73 @@ OVS_WAIT_UNTIL([test -z "$(ovs-appctl dpif-netdev/bond-show)"])
OVS_VSWITCHD_STOP()
AT_CLEANUP
+dnl Regression test for a deadlock / double lock on post-recirculation rule
+dnl updates while processing PACKET_OUT.
+AT_SETUP([ofproto-dpif - balance-tcp bonding rule updates on packet-out])
+dnl Create br0 with interfaces bond0(p1, p2) and p5,
+dnl and br1 with interfaces bond1(p3, p4) and p6.
+dnl bond0 <-> bond1
+OVS_VSWITCHD_START(
+ [add-bond br0 bond0 p1 p2 bond_mode=balance-tcp lacp=active dnl
+ other-config:lacp-time=fast other-config:bond-rebalance-interval=1000 -- dnl
+ set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 mtu_request=65535 -- dnl
+ set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 mtu_request=65535 -- dnl
+ add-port br0 p5 -- set interface p5 ofport_request=5 type=dummy mtu_request=65535 -- dnl
+ add-br br1 -- dnl
+ set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- dnl
+ set bridge br1 datapath-type=dummy other-config:datapath-id=1234 dnl
+ fail-mode=secure -- dnl
+ add-bond br1 bond1 p3 p4 bond_mode=balance-tcp lacp=active dnl
+ other-config:lacp-time=fast other-config:bond-rebalance-interval=1000 -- dnl
+ set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 mtu_request=65535 -- dnl
+ set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 mtu_request=65535 -- dnl
+ add-port br1 p6 -- set interface p6 ofport_request=6 type=dummy mtu_request=65535 --])
+AT_CHECK([ovs-appctl vlog/set bond:dbg])
+AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK
+])
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+AT_CHECK([ovs-ofctl add-flow br1 action=normal])
+OVS_WAIT_WHILE([ovs-appctl bond/show | grep "may_enable: false"])
+
+ovs-appctl time/stop
+ovs-appctl time/warp 2000 200
+
+dnl Send some traffic to distribute all the hashes between ports.
+AT_CHECK([SEND_TCP_BOND_PKTS([p5], [5], [65500])])
+
+dnl Wait for rebalancing for per-hash stats accounting.
+ovs-appctl time/warp 1000 100
+
+dnl Check that p2 handles some hashes.
+ovs-appctl bond/show > bond1.txt
+AT_CHECK([sed -n '/slave p2/,/^$/p' bond1.txt | grep 'hash'], [0], [ignore])
+
+dnl Pause revalidators to be sure that they do not update flows while
+dnl the bonding configuration chnages.
+ovs-appctl revalidator/pause
+
+dnl Move p2 down to trigger update of bonding post-recirculation rules by
+dnl forcing move of all the hashes to p1.
+AT_CHECK([ovs-appctl netdev-dummy/set-admin-state p2 down], 0, [OK
+])
+
+dnl Send PACKET_OUT that may lead to flow updates since the bonding
+dnl configuration changed.
+packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=p5 packet=$packet actions=resubmit(,0)"])
+
+dnl Resume revalidators.
+ovs-appctl revalidator/resume
+ovs-appctl revalidator/wait
+
+ovs-appctl time/warp 200 100
+dnl Check that all hashes moved form p2 and OVS is still working.
+ovs-appctl bond/show > bond2.txt
+AT_CHECK([sed -n '/slave p2/,/^$/p' bond2.txt | grep 'hash'], [1], [ignore])
+
+OVS_VSWITCHD_STOP()
+AT_CLEANUP
+
# Makes sure recirculation does not change the way packet is handled.
AT_SETUP([ofproto-dpif - balance-tcp bonding, different recirc flow ])
OVS_VSWITCHD_START(