summaryrefslogtreecommitdiff
path: root/ctdb/utils
diff options
context:
space:
mode:
authorDavid Disseldorp <ddiss@suse.de>2011-08-03 10:38:27 +0200
committerDavid Disseldorp <ddiss@suse.de>2011-09-06 14:01:18 +0200
commit2f925f1e64acc46d994e7ee74dffeb8438128f20 (patch)
treec6a4ef8f58ad040a6706b671a5d0aac89e74c5e8 /ctdb/utils
parent9699e5750db81fe9cdd25ef24cbe1fbe8a200090 (diff)
downloadsamba-2f925f1e64acc46d994e7ee74dffeb8438128f20.tar.gz
pmda: Attempt reconnects while ctdbd is unavailable
Attempt to reconnect to ctdbd on fetch while it is unreachable. We must provide our own queue callback wrapper, as ctdb_client_read_cb() exits on transport failure. (This used to be ctdb commit 28df6fbf1273b8d095a2bc38dca6a6c35c5c31bd)
Diffstat (limited to 'ctdb/utils')
-rw-r--r--ctdb/utils/pmda/pmda_ctdb.c191
1 files changed, 141 insertions, 50 deletions
diff --git a/ctdb/utils/pmda/pmda_ctdb.c b/ctdb/utils/pmda/pmda_ctdb.c
index 40efb28af31..daaa8573d97 100644
--- a/ctdb/utils/pmda/pmda_ctdb.c
+++ b/ctdb/utils/pmda/pmda_ctdb.c
@@ -163,6 +163,115 @@ static pmdaMetric metrictab[] = {
static struct ctdb_context *ctdb;
static struct event_context *ev;
+static void
+pmda_ctdb_q_read_cb(uint8_t *data, size_t cnt, void *args)
+{
+ if (cnt == 0) {
+ fprintf(stderr, "ctdbd unreachable\n");
+ /* cleanup on request timeout */
+ return;
+ }
+
+ ctdb_client_read_cb(data, cnt, args);
+}
+
+
+static int
+pmda_ctdb_daemon_connect(void)
+{
+ const char *socket_name;
+ int ret;
+ struct sockaddr_un addr;
+
+ ev = event_context_init(NULL);
+ if (ev == NULL) {
+ fprintf(stderr, "Failed to init event ctx\n");
+ return -1;
+ }
+
+ ctdb = ctdb_init(ev);
+ if (ctdb == NULL) {
+ fprintf(stderr, "Failed to init ctdb\n");
+ goto err_ev;
+ }
+
+ socket_name = getenv("CTDB_SOCKET");
+ if (socket_name == NULL) {
+ socket_name = "/var/lib/ctdb/ctdb.socket";
+ }
+
+ ret = ctdb_set_socketname(ctdb, socket_name);
+ if (ret == -1) {
+ fprintf(stderr, "ctdb_set_socketname failed - %s\n",
+ ctdb_errstr(ctdb));
+ goto err_ctdb;
+ }
+
+ /*
+ * ctdb_socket_connect() sets up a default queue callback handler calls
+ * exit() if ctdbd is unavailable on recv, override with our own to
+ * handle this
+ */
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
+
+ ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ctdb->daemon.sd == -1) {
+ fprintf(stderr, "Failed to open client socket\n");
+ goto err_ctdb;
+ }
+
+ set_nonblocking(ctdb->daemon.sd);
+ set_close_on_exec(ctdb->daemon.sd);
+
+ if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+ fprintf(stderr, "Failed to connect to ctdb daemon\n");
+ goto err_sd;
+ }
+
+ ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
+ CTDB_DS_ALIGNMENT,
+ pmda_ctdb_q_read_cb, ctdb);
+ if (ctdb->daemon.queue == NULL) {
+ fprintf(stderr, "Failed to setup queue\n");
+ goto err_sd;
+ }
+
+ ctdb->pnn = ctdb_ctrl_getpnn(ctdb, timeval_current_ofs(3, 0),
+ CTDB_CURRENT_NODE);
+ if (ctdb->pnn == (uint32_t)-1) {
+ fprintf(stderr, "Failed to get ctdb pnn\n");
+ goto err_sd;
+ }
+
+ return 0;
+err_sd:
+ close(ctdb->daemon.sd);
+err_ctdb:
+ talloc_free(ctdb);
+err_ev:
+ talloc_free(ev);
+ ctdb = NULL;
+ return -1;
+}
+
+static void
+pmda_ctdb_daemon_disconnect(void)
+{
+ if (ctdb->methods) {
+ ctdb->methods->shutdown(ctdb);
+ }
+
+ if (ctdb->daemon.sd != -1)
+ close(ctdb->daemon.sd);
+
+ talloc_free(ctdb);
+ talloc_free(ev);
+ ctdb = NULL;
+}
+
static int
fill_node(unsigned int item, struct ctdb_statistics *stats, pmAtomValue *atom)
{
@@ -246,17 +355,44 @@ pmda_ctdb_fetch_cb(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
{
struct ctdb_statistics stats;
int ret;
+ TDB_DATA data;
+ int32_t res;
+ struct timeval ctdb_timeout;
+
__pmID_int *id = (__pmID_int *)&(mdesc->m_desc.pmid);
if (inst != PM_IN_NULL)
return PM_ERR_INST;
- ret = ctdb_ctrl_statistics(ctdb, ctdb->pnn, &stats);
- if (ret) {
+ if (ctdb == NULL) {
+ fprintf(stderr, "ctdbd disconnected, stats not available\n");
ret = PM_ERR_VALUE;
goto err_out;
}
+ ctdb_timeout = timeval_current_ofs(1, 0);
+ ret = ctdb_control(ctdb, ctdb->pnn, 0,
+ CTDB_CONTROL_STATISTICS, 0, tdb_null,
+ ctdb, &data, &res, &ctdb_timeout, NULL);
+
+ if (ret != 0 || res != 0) {
+ fprintf(stderr, "ctdb control for statistics failed, reconnecting\n");
+ if (ctdb != NULL)
+ pmda_ctdb_daemon_disconnect();
+ ret = PM_ERR_VALUE;
+ goto err_out;
+ }
+
+ if (data.dsize != sizeof(struct ctdb_statistics)) {
+ fprintf(stderr, "incorrect statistics size %zu - not %zu\n",
+ data.dsize, sizeof(struct ctdb_statistics));
+ ret = PM_ERR_VALUE;
+ goto err_out;
+ }
+
+ stats = *(struct ctdb_statistics *)data.dptr;
+ talloc_free(data.dptr);
+
switch (id->cluster) {
case 0:
atom->ul = stats.num_clients;
@@ -356,56 +492,11 @@ err_out:
static int
pmda_ctdb_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda)
{
- return pmdaFetch(numpmid, pmidlist, resp, pmda);
-}
-
-static int
-pmda_ctdb_daemon_connect(void)
-{
- const char *socket_name;
- int ret;
-
- ev = event_context_init(NULL);
- if (ev == NULL) {
- fprintf(stderr, "Failed to init event ctx\n");
- return -1;
- }
-
- ctdb = ctdb_init(ev);
if (ctdb == NULL) {
- fprintf(stderr, "Failed to init ctdb\n");
- return -1;
+ fprintf(stderr, "attempting reconnect to ctdbd\n");
+ pmda_ctdb_daemon_connect();
}
-
- socket_name = getenv("CTDB_SOCKET");
- if (socket_name == NULL) {
- socket_name = "/var/lib/ctdb/ctdb.socket";
- }
-
- ret = ctdb_set_socketname(ctdb, socket_name);
- if (ret == -1) {
- fprintf(stderr, "ctdb_set_socketname failed - %s\n",
- ctdb_errstr(ctdb));
- talloc_free(ctdb);
- return -1;
- }
-
- ret = ctdb_socket_connect(ctdb);
- if (ret != 0) {
- fprintf(stderr, "Failed to connect to daemon\n");
- talloc_free(ctdb);
- return -1;
- }
-
- ctdb->pnn = ctdb_ctrl_getpnn(ctdb, timeval_current_ofs(3, 0),
- CTDB_CURRENT_NODE);
- if (ctdb->pnn == (uint32_t)-1) {
- fprintf(stderr, "Failed to get ctdb pnn\n");
- talloc_free(ctdb);
- return -1;
- }
-
- return 0;
+ return pmdaFetch(numpmid, pmidlist, resp, pmda);
}
/*