summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBeniamino Galvani <bgalvani@redhat.com>2020-04-03 09:13:51 +0200
committerBeniamino Galvani <bgalvani@redhat.com>2020-04-09 11:24:55 +0200
commit2337c52df467dd4178bf722a58cc91737f8833f0 (patch)
tree6fdb2ffb0fbf07d4b5759f9a74402006baa37064
parent177ee2d7bf99695d041cf87b600921c396a5b938 (diff)
downloadNetworkManager-bg/ovs-retry.tar.gz
ovsdb: retry calls in case of communication error with serverbg/ovs-retry
When the server is restarted the write to unix socket fails with EPIPE. In such case, don't fail all the calls in queue; instead, after a sync of the ovsdb state (through a monitor call), start processing the queue again, including the call that previously failed. Add a retry counter to avoid that calls are stuck in the queue forever in a hypothetical scenario in which the write always fails.
-rw-r--r--src/devices/ovs/nm-ovsdb.c81
1 files changed, 55 insertions, 26 deletions
diff --git a/src/devices/ovs/nm-ovsdb.c b/src/devices/ovs/nm-ovsdb.c
index d78c489dff..b6550b2531 100644
--- a/src/devices/ovs/nm-ovsdb.c
+++ b/src/devices/ovs/nm-ovsdb.c
@@ -64,6 +64,7 @@ typedef struct {
GHashTable *ports; /* port uuid => OpenvswitchPort */
GHashTable *bridges; /* bridge uuid => OpenvswitchBridge */
char *db_uuid;
+ guint num_failures;
} NMOvsdbPrivate;
struct _NMOvsdb {
@@ -87,7 +88,7 @@ NM_DEFINE_SINGLETON_GETTER (NMOvsdb, nm_ovsdb_get, NM_TYPE_OVSDB);
/*****************************************************************************/
static void ovsdb_try_connect (NMOvsdb *self);
-static void ovsdb_disconnect (NMOvsdb *self, gboolean is_disposing);
+static void ovsdb_disconnect (NMOvsdb *self, gboolean retry, gboolean is_disposing);
static void ovsdb_read (NMOvsdb *self);
static void ovsdb_write (NMOvsdb *self);
static void ovsdb_next_command (NMOvsdb *self);
@@ -127,6 +128,8 @@ typedef struct {
};
} OvsdbMethodCall;
+#define OVSDB_MAX_FAILURES 3
+
static void
_call_trace (const char *comment, OvsdbMethodCall *call, json_t *msg)
{
@@ -183,7 +186,8 @@ ovsdb_call_method (NMOvsdb *self, OvsdbCommand command,
const char *ifname,
NMConnection *bridge, NMConnection *port, NMConnection *interface,
NMDevice *bridge_device, NMDevice *interface_device,
- guint32 mtu, OvsdbMethodCallback callback, gpointer user_data)
+ guint32 mtu, OvsdbMethodCallback callback, gpointer user_data,
+ gboolean add_first)
{
NMOvsdbPrivate *priv = NM_OVSDB_GET_PRIVATE (self);
OvsdbMethodCall *call;
@@ -191,8 +195,13 @@ ovsdb_call_method (NMOvsdb *self, OvsdbCommand command,
/* Ensure we're not unsynchronized before we queue the method call. */
ovsdb_try_connect (self);
- g_array_set_size (priv->calls, priv->calls->len + 1);
- call = &g_array_index (priv->calls, OvsdbMethodCall, priv->calls->len - 1);
+ if (add_first) {
+ g_array_prepend_val (priv->calls, (OvsdbMethodCall) {});
+ call = &g_array_index (priv->calls, OvsdbMethodCall, 0);
+ } else {
+ g_array_set_size (priv->calls, priv->calls->len + 1);
+ call = &g_array_index (priv->calls, OvsdbMethodCall, priv->calls->len - 1);
+ }
call->id = COMMAND_PENDING;
call->command = command;
call->callback = callback;
@@ -1196,7 +1205,7 @@ ovsdb_got_msg (NMOvsdb *self, json_t *msg)
"result", &result,
"error", &error) == -1) {
_LOGW ("couldn't grok the message: %s", json_error.text);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
return;
}
@@ -1207,7 +1216,7 @@ ovsdb_got_msg (NMOvsdb *self, json_t *msg)
/* It's a method call! */
if (!params) {
_LOGW ("a method call with no params: '%s'", method);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
return;
}
@@ -1227,13 +1236,13 @@ ovsdb_got_msg (NMOvsdb *self, json_t *msg)
/* This is a response to a method call. */
if (!priv->calls->len) {
_LOGE ("there are no queued calls expecting response %" G_GUINT64_FORMAT, id);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
return;
}
call = &g_array_index (priv->calls, OvsdbMethodCall, 0);
if (call->id != id) {
_LOGE ("expected a response to call %" G_GUINT64_FORMAT ", not %" G_GUINT64_FORMAT, call->id, id);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
return;
}
/* Cool, we found a corresponding call. Finish it. */
@@ -1251,6 +1260,7 @@ ovsdb_got_msg (NMOvsdb *self, json_t *msg)
user_data = call->user_data;
g_array_remove_index (priv->calls, 0);
callback (self, result, local, user_data);
+ priv->num_failures = 0;
/* Don't progress further commands in case the callback hit an error
* and disconnected us. */
@@ -1309,9 +1319,11 @@ ovsdb_read_cb (GObject *source_object, GAsyncResult *res, gpointer user_data)
size = g_input_stream_read_finish (stream, res, &error);
if (size == -1) {
+ /* ovsdb-server was possibly restarted */
_LOGW ("short read from ovsdb: %s", error->message);
+ priv->num_failures++;
g_clear_error (&error);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, priv->num_failures <= OVSDB_MAX_FAILURES, FALSE);
return;
}
@@ -1357,9 +1369,11 @@ ovsdb_write_cb (GObject *source_object, GAsyncResult *res, gpointer user_data)
size = g_output_stream_write_finish (stream, res, &error);
if (size == -1) {
+ /* ovsdb-server was possibly restarted */
_LOGW ("short write to ovsdb: %s", error->message);
+ priv->num_failures++;
g_clear_error (&error);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, priv->num_failures <= OVSDB_MAX_FAILURES, FALSE);
return;
}
@@ -1402,7 +1416,7 @@ ovsdb_write (NMOvsdb *self)
* puts us back in sync.
*/
static void
-ovsdb_disconnect (NMOvsdb *self, gboolean is_disposing)
+ovsdb_disconnect (NMOvsdb *self, gboolean retry, gboolean is_disposing)
{
NMOvsdbPrivate *priv = NM_OVSDB_GET_PRIVATE (self);
OvsdbMethodCall *call;
@@ -1410,18 +1424,26 @@ ovsdb_disconnect (NMOvsdb *self, gboolean is_disposing)
gpointer user_data;
gs_free_error GError *error = NULL;
+ nm_assert (!retry || !is_disposing);
+
if (!priv->client)
return;
- _LOGD ("disconnecting from ovsdb");
- nm_utils_error_set_cancelled (&error, is_disposing, "NMOvsdb");
+ _LOGD ("disconnecting from ovsdb, retry %d", retry);
- while (priv->calls->len) {
- call = &g_array_index (priv->calls, OvsdbMethodCall, priv->calls->len - 1);
- callback = call->callback;
- user_data = call->user_data;
- g_array_remove_index (priv->calls, priv->calls->len - 1);
- callback (self, NULL, error, user_data);
+ if (retry) {
+ if (priv->calls->len != 0)
+ g_array_index (priv->calls, OvsdbMethodCall, 0).id = COMMAND_PENDING;
+ } else {
+ nm_utils_error_set_cancelled (&error, is_disposing, "NMOvsdb");
+
+ while (priv->calls->len) {
+ call = &g_array_index (priv->calls, OvsdbMethodCall, priv->calls->len - 1);
+ callback = call->callback;
+ user_data = call->user_data;
+ g_array_remove_index (priv->calls, priv->calls->len - 1);
+ callback (self, NULL, error, user_data);
+ }
}
priv->bufp = 0;
@@ -1431,6 +1453,9 @@ ovsdb_disconnect (NMOvsdb *self, gboolean is_disposing)
g_clear_object (&priv->conn);
nm_clear_g_free (&priv->db_uuid);
nm_clear_g_cancellable (&priv->cancellable);
+
+ if (retry)
+ ovsdb_try_connect (self);
}
static void
@@ -1439,7 +1464,7 @@ _monitor_bridges_cb (NMOvsdb *self, json_t *result, GError *error, gpointer user
if (error) {
if (!nm_utils_error_is_cancelled_or_disposing (error)) {
_LOGI ("%s", error->message);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
}
return;
}
@@ -1463,7 +1488,7 @@ _client_connect_cb (GObject *source_object, GAsyncResult *res, gpointer user_dat
if (!g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED))
_LOGI ("%s", error->message);
- ovsdb_disconnect (self, FALSE);
+ ovsdb_disconnect (self, FALSE, FALSE);
g_clear_error (&error);
return;
}
@@ -1504,7 +1529,8 @@ ovsdb_try_connect (NMOvsdb *self)
/* Queue a monitor call before any other command, ensuring that we have an up
* to date view of existing bridged that we need for add and remove ops. */
ovsdb_call_method (self, OVSDB_MONITOR, NULL,
- NULL, NULL, NULL, NULL, NULL, 0, _monitor_bridges_cb, NULL);
+ NULL, NULL, NULL, NULL, NULL, 0,
+ _monitor_bridges_cb, NULL, TRUE);
}
/*****************************************************************************/
@@ -1565,7 +1591,8 @@ nm_ovsdb_add_interface (NMOvsdb *self,
bridge_device, interface_device,
0,
_transact_cb,
- ovsdb_call_new (callback, user_data));
+ ovsdb_call_new (callback, user_data),
+ FALSE);
}
void
@@ -1575,7 +1602,8 @@ nm_ovsdb_del_interface (NMOvsdb *self, const char *ifname,
ovsdb_call_method (self, OVSDB_DEL_INTERFACE, ifname,
NULL, NULL, NULL, NULL, NULL, 0,
_transact_cb,
- ovsdb_call_new (callback, user_data));
+ ovsdb_call_new (callback, user_data),
+ FALSE);
}
void nm_ovsdb_set_interface_mtu (NMOvsdb *self, const char *ifname, guint32 mtu,
@@ -1584,7 +1612,8 @@ void nm_ovsdb_set_interface_mtu (NMOvsdb *self, const char *ifname, guint32 mtu,
ovsdb_call_method (self, OVSDB_SET_INTERFACE_MTU, ifname,
NULL, NULL, NULL, NULL, NULL, mtu,
_transact_cb,
- ovsdb_call_new (callback, user_data));
+ ovsdb_call_new (callback, user_data),
+ FALSE);
}
/*****************************************************************************/
@@ -1666,7 +1695,7 @@ dispose (GObject *object)
NMOvsdb *self = NM_OVSDB (object);
NMOvsdbPrivate *priv = NM_OVSDB_GET_PRIVATE (self);
- ovsdb_disconnect (self, TRUE);
+ ovsdb_disconnect (self, FALSE, TRUE);
if (priv->input) {
g_string_free (priv->input, TRUE);