/* CTDB mutex helper using Ceph librados locks Copyright (C) David Disseldorp 2016 Based on ctdb_mutex_fcntl_helper.c, which is: Copyright (C) Martin Schwenke 2015 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include "replace.h" #include "tevent.h" #include "talloc.h" #include "rados/librados.h" #define CTDB_MUTEX_CEPH_LOCK_NAME "ctdb_reclock_mutex" #define CTDB_MUTEX_CEPH_LOCK_COOKIE CTDB_MUTEX_CEPH_LOCK_NAME #define CTDB_MUTEX_CEPH_LOCK_DESC "CTDB recovery lock" #define CTDB_MUTEX_STATUS_HOLDING "0" #define CTDB_MUTEX_STATUS_CONTENDED "1" #define CTDB_MUTEX_STATUS_TIMEOUT "2" #define CTDB_MUTEX_STATUS_ERROR "3" static char *progname = NULL; static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name, const char *ceph_auth_name, const char *pool_name, rados_t *_ceph_cluster, rados_ioctx_t *_ioctx) { rados_t ceph_cluster = NULL; rados_ioctx_t ioctx = NULL; int ret; ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0); if (ret < 0) { fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s" " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name, strerror(-ret)); return ret; } /* path=NULL tells librados to use default locations */ ret = rados_conf_read_file(ceph_cluster, NULL); if (ret < 0) { fprintf(stderr, "%s: failed to parse Ceph cluster config" " - (%s)\n", progname, strerror(-ret)); rados_shutdown(ceph_cluster); return ret; } ret = rados_connect(ceph_cluster); if (ret < 0) { fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s" " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name, strerror(-ret)); rados_shutdown(ceph_cluster); return ret; } ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx); if (ret < 0) { fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s" " - (%s)\n", progname, pool_name, strerror(-ret)); rados_shutdown(ceph_cluster); return ret; } *_ceph_cluster = ceph_cluster; *_ioctx = ioctx; return 0; } static void ctdb_mutex_rados_ctx_destroy(rados_t ceph_cluster, rados_ioctx_t ioctx) { rados_ioctx_destroy(ioctx); rados_shutdown(ceph_cluster); } static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx, const char *oid) { int ret; ret = rados_lock_exclusive(ioctx, oid, CTDB_MUTEX_CEPH_LOCK_NAME, CTDB_MUTEX_CEPH_LOCK_COOKIE, CTDB_MUTEX_CEPH_LOCK_DESC, NULL, /* infinite duration */ 0); if ((ret == -EEXIST) || (ret == -EBUSY)) { /* lock contention */ return ret; } else if (ret < 0) { /* unexpected failure */ fprintf(stderr, "%s: Failed to get lock on RADOS object '%s' - (%s)\n", progname, oid, strerror(-ret)); return ret; } /* lock obtained */ return 0; } static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx, const char *oid) { int ret; ret = rados_unlock(ioctx, oid, CTDB_MUTEX_CEPH_LOCK_NAME, CTDB_MUTEX_CEPH_LOCK_COOKIE); if (ret < 0) { fprintf(stderr, "%s: Failed to drop lock on RADOS object '%s' - (%s)\n", progname, oid, strerror(-ret)); return ret; } return 0; } struct ctdb_mutex_rados_state { bool holding_mutex; const char *ceph_cluster_name; const char *ceph_auth_name; const char *pool_name; const char *object; int ppid; struct tevent_context *ev; struct tevent_signal *sig_ev; struct tevent_timer *timer_ev; rados_t ceph_cluster; rados_ioctx_t ioctx; }; static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev, struct tevent_signal *se, int signum, int count, void *siginfo, void *private_data) { struct ctdb_mutex_rados_state *cmr_state = private_data; int ret; if (!cmr_state->holding_mutex) { fprintf(stderr, "Sigterm callback invoked without mutex!\n"); ret = -EINVAL; goto err_ctx_cleanup; } ret = ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object); err_ctx_cleanup: ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster, cmr_state->ioctx); talloc_free(cmr_state); exit(ret ? 1 : 0); } static void ctdb_mutex_rados_timer_cb(struct tevent_context *ev, struct tevent_timer *te, struct timeval current_time, void *private_data) { struct ctdb_mutex_rados_state *cmr_state = private_data; int ret; if (!cmr_state->holding_mutex) { fprintf(stderr, "Timer callback invoked without mutex!\n"); ret = -EINVAL; goto err_ctx_cleanup; } if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) { /* parent still around, keep waiting */ cmr_state->timer_ev = tevent_add_timer(cmr_state->ev, cmr_state, tevent_timeval_current_ofs(5, 0), ctdb_mutex_rados_timer_cb, cmr_state); if (cmr_state->timer_ev == NULL) { fprintf(stderr, "Failed to create timer event\n"); /* rely on signal cb */ } return; } /* parent ended, drop lock and exit */ ret = ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object); err_ctx_cleanup: ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster, cmr_state->ioctx); talloc_free(cmr_state); exit(ret ? 1 : 0); } int main(int argc, char *argv[]) { int ret; struct ctdb_mutex_rados_state *cmr_state; progname = argv[0]; if (argc != 5) { fprintf(stderr, "Usage: %s " " \n", progname); ret = -EINVAL; goto err_out; } ret = setvbuf(stdout, NULL, _IONBF, 0); if (ret != 0) { fprintf(stderr, "Failed to configure unbuffered stdout I/O\n"); } cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state); if (cmr_state == NULL) { fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); ret = -ENOMEM; goto err_out; } cmr_state->ceph_cluster_name = argv[1]; cmr_state->ceph_auth_name = argv[2]; cmr_state->pool_name = argv[3]; cmr_state->object = argv[4]; cmr_state->ppid = getppid(); if (cmr_state->ppid == 1) { /* * The original parent is gone and the process has * been reparented to init. This can happen if the * helper is started just as the parent is killed * during shutdown. The error message doesn't need to * be stellar, since there won't be anything around to * capture and log it... */ fprintf(stderr, "%s: PPID == 1\n", progname); ret = -EPIPE; goto err_state_free; } cmr_state->ev = tevent_context_init(cmr_state); if (cmr_state->ev == NULL) { fprintf(stderr, "tevent_context_init failed\n"); fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); ret = -ENOMEM; goto err_state_free; } /* wait for sigterm */ cmr_state->sig_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0, ctdb_mutex_rados_sigterm_cb, cmr_state); if (cmr_state->sig_ev == NULL) { fprintf(stderr, "Failed to create signal event\n"); fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); ret = -ENOMEM; goto err_state_free; } /* periodically check parent */ cmr_state->timer_ev = tevent_add_timer(cmr_state->ev, cmr_state, tevent_timeval_current_ofs(5, 0), ctdb_mutex_rados_timer_cb, cmr_state); if (cmr_state->timer_ev == NULL) { fprintf(stderr, "Failed to create timer event\n"); fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); ret = -ENOMEM; goto err_state_free; } ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name, cmr_state->ceph_auth_name, cmr_state->pool_name, &cmr_state->ceph_cluster, &cmr_state->ioctx); if (ret < 0) { fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); goto err_state_free; } ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object); if ((ret == -EEXIST) || (ret == -EBUSY)) { fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED); goto err_ctx_cleanup; } else if (ret < 0) { fprintf(stdout, CTDB_MUTEX_STATUS_ERROR); goto err_ctx_cleanup; } cmr_state->holding_mutex = true; fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING); /* wait for the signal / timer events to do their work */ ret = tevent_loop_wait(cmr_state->ev); if (ret < 0) { goto err_ctx_cleanup; } err_ctx_cleanup: ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster, cmr_state->ioctx); err_state_free: talloc_free(cmr_state); err_out: return ret ? 1 : 0; }