summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--support/include/cld.h1
-rw-r--r--utils/nfsdcld/Makefile.am2
-rw-r--r--utils/nfsdcld/cld-internal.h30
-rw-r--r--utils/nfsdcld/nfsdcld.c160
-rw-r--r--utils/nfsdcld/sqlite.c483
-rw-r--r--utils/nfsdcld/sqlite.h11
6 files changed, 579 insertions, 108 deletions
diff --git a/support/include/cld.h b/support/include/cld.h
index f14a9ab..c1f5b70 100644
--- a/support/include/cld.h
+++ b/support/include/cld.h
@@ -33,6 +33,7 @@ enum cld_command {
Cld_Remove, /* remove record of this cm_id */
Cld_Check, /* is this cm_id allowed? */
Cld_GraceDone, /* grace period is complete */
+ Cld_GraceStart,
};
/* representation of long-form NFSv4 client ID */
diff --git a/utils/nfsdcld/Makefile.am b/utils/nfsdcld/Makefile.am
index 8239be8..d1da749 100644
--- a/utils/nfsdcld/Makefile.am
+++ b/utils/nfsdcld/Makefile.am
@@ -13,7 +13,7 @@ sbin_PROGRAMS = nfsdcld
nfsdcld_SOURCES = nfsdcld.c sqlite.c
nfsdcld_LDADD = ../../support/nfs/libnfs.la $(LIBEVENT) $(LIBSQLITE) $(LIBCAP)
-noinst_HEADERS = sqlite.h
+noinst_HEADERS = sqlite.h cld-internal.h
MAINTAINERCLEANFILES = Makefile.in
diff --git a/utils/nfsdcld/cld-internal.h b/utils/nfsdcld/cld-internal.h
new file mode 100644
index 0000000..a90cced
--- /dev/null
+++ b/utils/nfsdcld/cld-internal.h
@@ -0,0 +1,30 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _CLD_INTERNAL_H_
+#define _CLD_INTERNAL_H_
+
+struct cld_client {
+ int cl_fd;
+ struct event cl_event;
+ struct cld_msg cl_msg;
+};
+
+uint64_t current_epoch;
+uint64_t recovery_epoch;
+
+#endif /* _CLD_INTERNAL_H_ */
diff --git a/utils/nfsdcld/nfsdcld.c b/utils/nfsdcld/nfsdcld.c
index 082f3ab..9b1ad98 100644
--- a/utils/nfsdcld/nfsdcld.c
+++ b/utils/nfsdcld/nfsdcld.c
@@ -42,7 +42,9 @@
#include "xlog.h"
#include "nfslib.h"
#include "cld.h"
+#include "cld-internal.h"
#include "sqlite.h"
+#include "../mount/version.h"
#ifndef PIPEFS_DIR
#define PIPEFS_DIR NFS_STATEDIR "/rpc_pipefs"
@@ -54,19 +56,17 @@
#define CLD_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcld"
#endif
+#define NFSD_END_GRACE_FILE "/proc/fs/nfsd/v4_end_grace"
+
#define UPCALL_VERSION 1
/* private data structures */
-struct cld_client {
- int cl_fd;
- struct event cl_event;
- struct cld_msg cl_msg;
-};
/* global variables */
static char *pipepath = DEFAULT_CLD_PATH;
static int inotify_fd = -1;
static struct event pipedir_event;
+static bool old_kernel = false;
static struct option longopts[] =
{
@@ -298,6 +298,43 @@ out:
return ret;
}
+/*
+ * Older kernels will not tell nfsdcld when a grace period has started.
+ * Therefore we have to peek at the /proc/fs/nfsd/v4_end_grace file to
+ * see if nfsd is in grace. We have to do this for create and remove
+ * upcalls to ensure that the correct table is being updated - otherwise
+ * we could lose client records when the grace period is lifted.
+ */
+static int
+cld_check_grace_period(void)
+{
+ int fd, ret = 0;
+ char c;
+
+ if (!old_kernel)
+ return 0;
+ if (recovery_epoch != 0)
+ return 0;
+ fd = open(NFSD_END_GRACE_FILE, O_RDONLY);
+ if (fd < 0) {
+ xlog(L_WARNING, "Unable to open %s: %m",
+ NFSD_END_GRACE_FILE);
+ return 1;
+ }
+ if (read(fd, &c, 1) < 0) {
+ xlog(L_WARNING, "Unable to read from %s: %m",
+ NFSD_END_GRACE_FILE);
+ return 1;
+ }
+ close(fd);
+ if (c == 'N') {
+ xlog(L_WARNING, "nfsd is in grace but didn't send a gracestart upcall, "
+ "please update the kernel");
+ ret = sqlite_grace_start();
+ }
+ return ret;
+}
+
static void
cld_not_implemented(struct cld_client *clnt)
{
@@ -332,14 +369,17 @@ cld_create(struct cld_client *clnt)
ssize_t bsize, wsize;
struct cld_msg *cmsg = &clnt->cl_msg;
+ ret = cld_check_grace_period();
+ if (ret)
+ goto reply;
+
xlog(D_GENERAL, "%s: create client record.", __func__);
ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id,
- cmsg->cm_u.cm_name.cn_len,
- false,
- false);
+ cmsg->cm_u.cm_name.cn_len);
+reply:
cmsg->cm_status = ret ? -EREMOTEIO : ret;
bsize = sizeof(*cmsg);
@@ -365,11 +405,16 @@ cld_remove(struct cld_client *clnt)
ssize_t bsize, wsize;
struct cld_msg *cmsg = &clnt->cl_msg;
+ ret = cld_check_grace_period();
+ if (ret)
+ goto reply;
+
xlog(D_GENERAL, "%s: remove client record.", __func__);
ret = sqlite_remove_client(cmsg->cm_u.cm_name.cn_id,
cmsg->cm_u.cm_name.cn_len);
+reply:
cmsg->cm_status = ret ? -EREMOTEIO : ret;
bsize = sizeof(*cmsg);
@@ -396,12 +441,26 @@ cld_check(struct cld_client *clnt)
ssize_t bsize, wsize;
struct cld_msg *cmsg = &clnt->cl_msg;
+ /*
+ * If we get a check upcall at all, it means we're talking to an old
+ * kernel. Furthermore, if we're not in grace it means this is the
+ * first client to do a reclaim. Log a message and use
+ * sqlite_grace_start() to advance the epoch numbers.
+ */
+ if (recovery_epoch == 0) {
+ xlog(D_GENERAL, "%s: received a check upcall, please update the kernel",
+ __func__);
+ ret = sqlite_grace_start();
+ if (ret)
+ goto reply;
+ }
+
xlog(D_GENERAL, "%s: check client record", __func__);
ret = sqlite_check_client(cmsg->cm_u.cm_name.cn_id,
- cmsg->cm_u.cm_name.cn_len,
- false);
+ cmsg->cm_u.cm_name.cn_len);
+reply:
/* set up reply */
cmsg->cm_status = ret ? -EACCES : ret;
@@ -429,11 +488,27 @@ cld_gracedone(struct cld_client *clnt)
ssize_t bsize, wsize;
struct cld_msg *cmsg = &clnt->cl_msg;
- xlog(D_GENERAL, "%s: grace done. cm_gracetime=%ld", __func__,
- cmsg->cm_u.cm_gracetime);
+ /*
+ * If we got a "gracedone" upcall while we're not in grace, then
+ * 1) we must be talking to an old kernel
+ * 2) no clients attempted to reclaim
+ * In that case, log a message and use sqlite_grace_start() to
+ * advance the epoch numbers, and then proceed as normal.
+ */
+ if (recovery_epoch == 0) {
+ xlog(D_GENERAL, "%s: received gracedone upcall "
+ "while not in grace, please update the kernel",
+ __func__);
+ ret = sqlite_grace_start();
+ if (ret)
+ goto reply;
+ }
+
+ xlog(D_GENERAL, "%s: grace done.", __func__);
- ret = sqlite_remove_unreclaimed(cmsg->cm_u.cm_gracetime);
+ ret = sqlite_grace_done();
+reply:
/* set up reply: downcall with 0 status */
cmsg->cm_status = ret ? -EREMOTEIO : ret;
@@ -453,6 +528,59 @@ cld_gracedone(struct cld_client *clnt)
}
}
+static int
+gracestart_callback(struct cld_client *clnt) {
+ ssize_t bsize, wsize;
+ struct cld_msg *cmsg = &clnt->cl_msg;
+
+ cmsg->cm_status = -EINPROGRESS;
+
+ bsize = sizeof(struct cld_msg);
+
+ xlog(D_GENERAL, "Sending client %.*s",
+ cmsg->cm_u.cm_name.cn_len, cmsg->cm_u.cm_name.cn_id);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize)
+ return -EIO;
+ return 0;
+}
+
+static void
+cld_gracestart(struct cld_client *clnt)
+{
+ int ret;
+ ssize_t bsize, wsize;
+ struct cld_msg *cmsg = &clnt->cl_msg;
+
+ xlog(D_GENERAL, "%s: updating grace epochs", __func__);
+
+ ret = sqlite_grace_start();
+ if (ret)
+ goto reply;
+
+ xlog(D_GENERAL, "%s: sending client records to the kernel", __func__);
+
+ ret = sqlite_iterate_recovery(&gracestart_callback, clnt);
+
+reply:
+ /* set up reply: downcall with 0 status */
+ cmsg->cm_status = ret ? -EREMOTEIO : ret;
+
+ bsize = sizeof(struct cld_msg);
+ xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize) {
+ xlog(L_ERROR, "%s: problem writing to cld pipe (%ld): %m",
+ __func__, wsize);
+ ret = cld_pipe_open(clnt);
+ if (ret) {
+ xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+ __func__, ret);
+ exit(ret);
+ }
+ }
+}
+
static void
cldcb(int UNUSED(fd), short which, void *data)
{
@@ -490,6 +618,9 @@ cldcb(int UNUSED(fd), short which, void *data)
case Cld_GraceDone:
cld_gracedone(clnt);
break;
+ case Cld_GraceStart:
+ cld_gracestart(clnt);
+ break;
default:
xlog(L_WARNING, "%s: command %u is not yet implemented",
__func__, cmsg->cm_cmd);
@@ -586,6 +717,9 @@ main(int argc, char **argv)
}
}
+ if (linux_version_code() < MAKE_VERSION(4, 20, 0))
+ old_kernel = true;
+
/* set up storage db */
rc = sqlite_prepare_dbh(storagedir);
if (rc) {
diff --git a/utils/nfsdcld/sqlite.c b/utils/nfsdcld/sqlite.c
index c59f777..82140ea 100644
--- a/utils/nfsdcld/sqlite.c
+++ b/utils/nfsdcld/sqlite.c
@@ -21,17 +21,24 @@
* Explanation:
*
* This file contains the code to manage the sqlite backend database for the
- * nfsdcltrack usermodehelper upcall program.
+ * nfsdcld client tracking daemon.
*
* The main database is called main.sqlite and contains the following tables:
*
* parameters: simple key/value pairs for storing database info
*
- * clients: an "id" column containing a BLOB with the long-form clientid as
- * sent by the client, a "time" column containing a timestamp (in
- * epoch seconds) of when the record was last updated, and a
- * "has_session" column containing a boolean value indicating
- * whether the client has sessions (v4.1+) or not (v4.0).
+ * grace: a "current" column containing an INTEGER representing the current
+ * epoch (where should new values be stored) and a "recovery" column
+ * containing an INTEGER representing the recovery epoch (from what
+ * epoch are we allowed to recover). A recovery epoch of 0 means
+ * normal operation (grace period not in force). Note: sqlite stores
+ * integers as signed values, so these must be cast to a uint64_t when
+ * retrieving them from the database and back to an int64_t when storing
+ * them in the database.
+ *
+ * rec-CCCCCCCCCCCCCCCC (where C is the hex representation of the epoch value):
+ * a single "id" column containing a BLOB with the long-form clientid
+ * as sent by the client.
*/
#ifdef HAVE_CONFIG_H
@@ -47,16 +54,21 @@
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
#include <sqlite3.h>
#include <linux/limits.h>
#include "xlog.h"
#include "sqlite.h"
+#include "cld.h"
+#include "cld-internal.h"
-#define CLTRACK_SQLITE_LATEST_SCHEMA_VERSION 2
+#define CLD_SQLITE_LATEST_SCHEMA_VERSION 3
/* in milliseconds */
-#define CLTRACK_SQLITE_BUSY_TIMEOUT 10000
+#define CLD_SQLITE_BUSY_TIMEOUT 10000
/* private data structures */
@@ -124,7 +136,7 @@ out:
}
static int
-sqlite_maindb_update_v1_to_v2(void)
+sqlite_maindb_update_schema(int oldversion)
{
int ret, ret2;
char *err;
@@ -142,32 +154,66 @@ sqlite_maindb_update_v1_to_v2(void)
* transaction to guard against racing DB setup attempts
*/
ret = sqlite_query_schema_version();
- switch (ret) {
- case 1:
- /* Still at v1 -- do conversion */
- break;
- case CLTRACK_SQLITE_LATEST_SCHEMA_VERSION:
- /* Someone else raced in and set it up */
- ret = 0;
+ if (ret != oldversion) {
+ if (ret == CLD_SQLITE_LATEST_SCHEMA_VERSION)
+ /* Someone else raced in and set it up */
+ ret = 0;
+ else
+ /* Something went wrong -- fail! */
+ ret = -EINVAL;
goto rollback;
- default:
- /* Something went wrong -- fail! */
- ret = -EINVAL;
+ }
+
+ /* Still at old version -- do conversion */
+
+ /* create grace table */
+ ret = sqlite3_exec(dbh, "CREATE TABLE grace "
+ "(current INTEGER , recovery INTEGER);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create grace table: %s", err);
+ goto rollback;
+ }
+
+ /* insert initial epochs into grace table */
+ ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
+ "values (1, 0);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to set initial epochs: %s", err);
+ goto rollback;
+ }
+
+ /* create recovery table for current epoch */
+ ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
+ "(id BLOB PRIMARY KEY);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create recovery table "
+ "for current epoch: %s", err);
+ goto rollback;
+ }
+
+ /* copy records from old clients table */
+ ret = sqlite3_exec(dbh, "INSERT INTO \"rec-0000000000000001\" "
+ "SELECT id FROM clients;",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to copy client records: %s", err);
goto rollback;
}
- /* create v2 clients table */
- ret = sqlite3_exec(dbh, "ALTER TABLE clients ADD COLUMN "
- "has_session INTEGER;",
+ /* drop the old clients table */
+ ret = sqlite3_exec(dbh, "DROP TABLE clients;",
NULL, NULL, &err);
if (ret != SQLITE_OK) {
- xlog(L_ERROR, "Unable to update clients table: %s", err);
+ xlog(L_ERROR, "Unable to drop old clients table: %s", err);
goto rollback;
}
ret = snprintf(buf, sizeof(buf), "UPDATE parameters SET value = %d "
"WHERE key = \"version\";",
- CLTRACK_SQLITE_LATEST_SCHEMA_VERSION);
+ CLD_SQLITE_LATEST_SCHEMA_VERSION);
if (ret < 0) {
xlog(L_ERROR, "sprintf failed!");
goto rollback;
@@ -205,7 +251,7 @@ rollback:
* transaction. On any error, rollback the transaction.
*/
static int
-sqlite_maindb_init_v2(void)
+sqlite_maindb_init_v3(void)
{
int ret, ret2;
char *err = NULL;
@@ -227,7 +273,7 @@ sqlite_maindb_init_v2(void)
case 0:
/* Query failed again -- set up DB */
break;
- case CLTRACK_SQLITE_LATEST_SCHEMA_VERSION:
+ case CLD_SQLITE_LATEST_SCHEMA_VERSION:
/* Someone else raced in and set it up */
ret = 0;
goto rollback;
@@ -245,20 +291,38 @@ sqlite_maindb_init_v2(void)
goto rollback;
}
- /* create the "clients" table */
- ret = sqlite3_exec(dbh, "CREATE TABLE clients (id BLOB PRIMARY KEY, "
- "time INTEGER, has_session INTEGER);",
+ /* create grace table */
+ ret = sqlite3_exec(dbh, "CREATE TABLE grace "
+ "(current INTEGER , recovery INTEGER);",
NULL, NULL, &err);
if (ret != SQLITE_OK) {
- xlog(L_ERROR, "Unable to create clients table: %s", err);
+ xlog(L_ERROR, "Unable to create grace table: %s", err);
goto rollback;
}
+ /* insert initial epochs into grace table */
+ ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
+ "values (1, 0);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to set initial epochs: %s", err);
+ goto rollback;
+ }
+
+ /* create recovery table for current epoch */
+ ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
+ "(id BLOB PRIMARY KEY);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create recovery table "
+ "for current epoch: %s", err);
+ goto rollback;
+ }
/* insert version into parameters table */
ret = snprintf(buf, sizeof(buf), "INSERT OR FAIL INTO parameters "
"values (\"version\", \"%d\");",
- CLTRACK_SQLITE_LATEST_SCHEMA_VERSION);
+ CLD_SQLITE_LATEST_SCHEMA_VERSION);
if (ret < 0) {
xlog(L_ERROR, "sprintf failed!");
goto rollback;
@@ -291,6 +355,42 @@ rollback:
goto out;
}
+static int
+sqlite_startup_query_grace(void)
+{
+ int ret;
+ uint64_t tcur;
+ uint64_t trec;
+ sqlite3_stmt *stmt = NULL;
+
+ /* prepare select query */
+ ret = sqlite3_prepare_v2(dbh, "SELECT * FROM grace;", -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(D_GENERAL, "Unable to prepare select statement: %s",
+ sqlite3_errmsg(dbh));
+ goto out;
+ }
+
+ ret = sqlite3_step(stmt);
+ if (ret != SQLITE_ROW) {
+ xlog(D_GENERAL, "Select statement execution failed: %s",
+ sqlite3_errmsg(dbh));
+ goto out;
+ }
+
+ tcur = (uint64_t)sqlite3_column_int64(stmt, 0);
+ trec = (uint64_t)sqlite3_column_int64(stmt, 1);
+
+ current_epoch = tcur;
+ recovery_epoch = trec;
+ ret = 0;
+ xlog(D_GENERAL, "%s: current_epoch=%lu recovery_epoch=%lu",
+ __func__, current_epoch, recovery_epoch);
+out:
+ sqlite3_finalize(stmt);
+ return ret;
+}
+
/* Open the database and set up the database handle for it */
int
sqlite_prepare_dbh(const char *topdir)
@@ -322,7 +422,7 @@ sqlite_prepare_dbh(const char *topdir)
}
/* set busy timeout */
- ret = sqlite3_busy_timeout(dbh, CLTRACK_SQLITE_BUSY_TIMEOUT);
+ ret = sqlite3_busy_timeout(dbh, CLD_SQLITE_BUSY_TIMEOUT);
if (ret != SQLITE_OK) {
xlog(L_ERROR, "Unable to set sqlite busy timeout: %s",
sqlite3_errmsg(dbh));
@@ -331,19 +431,26 @@ sqlite_prepare_dbh(const char *topdir)
ret = sqlite_query_schema_version();
switch (ret) {
- case CLTRACK_SQLITE_LATEST_SCHEMA_VERSION:
+ case CLD_SQLITE_LATEST_SCHEMA_VERSION:
/* DB is already set up. Do nothing */
ret = 0;
break;
+ case 2:
+ /* Old DB -- update to new schema */
+ ret = sqlite_maindb_update_schema(2);
+ if (ret)
+ goto out_close;
+ break;
+
case 1:
/* Old DB -- update to new schema */
- ret = sqlite_maindb_update_v1_to_v2();
+ ret = sqlite_maindb_update_schema(1);
if (ret)
goto out_close;
break;
case 0:
/* Query failed -- try to set up new DB */
- ret = sqlite_maindb_init_v2();
+ ret = sqlite_maindb_init_v3();
if (ret)
goto out_close;
break;
@@ -351,11 +458,13 @@ sqlite_prepare_dbh(const char *topdir)
/* Unknown DB version -- downgrade? Fail */
xlog(L_ERROR, "Unsupported database schema version! "
"Expected %d, got %d.",
- CLTRACK_SQLITE_LATEST_SCHEMA_VERSION, ret);
+ CLD_SQLITE_LATEST_SCHEMA_VERSION, ret);
ret = -EINVAL;
goto out_close;
}
+ ret = sqlite_startup_query_grace();
+
return ret;
out_close:
sqlite3_close(dbh);
@@ -369,20 +478,22 @@ out_close:
* Returns a non-zero sqlite error code, or SQLITE_OK (aka 0)
*/
int
-sqlite_insert_client(const unsigned char *clname, const size_t namelen,
- const bool has_session, const bool zerotime)
+sqlite_insert_client(const unsigned char *clname, const size_t namelen)
{
int ret;
sqlite3_stmt *stmt = NULL;
- if (zerotime)
- ret = sqlite3_prepare_v2(dbh, "INSERT OR REPLACE INTO clients "
- "VALUES (?, 0, ?);", -1, &stmt, NULL);
- else
- ret = sqlite3_prepare_v2(dbh, "INSERT OR REPLACE INTO clients "
- "VALUES (?, strftime('%s', 'now'), ?);", -1,
- &stmt, NULL);
+ ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016lx\" "
+ "VALUES (?);", current_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return ret;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
if (ret != SQLITE_OK) {
xlog(L_ERROR, "%s: insert statement prepare failed: %s",
__func__, sqlite3_errmsg(dbh));
@@ -397,13 +508,6 @@ sqlite_insert_client(const unsigned char *clname, const size_t namelen,
goto out_err;
}
- ret = sqlite3_bind_int(stmt, 2, (int)has_session);
- if (ret != SQLITE_OK) {
- xlog(L_ERROR, "%s: bind int failed: %s", __func__,
- sqlite3_errmsg(dbh));
- goto out_err;
- }
-
ret = sqlite3_step(stmt);
if (ret == SQLITE_DONE)
ret = SQLITE_OK;
@@ -424,8 +528,18 @@ sqlite_remove_client(const unsigned char *clname, const size_t namelen)
int ret;
sqlite3_stmt *stmt = NULL;
- ret = sqlite3_prepare_v2(dbh, "DELETE FROM clients WHERE id==?", -1,
- &stmt, NULL);
+ ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016lx\" "
+ "WHERE id==?;", current_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return ret;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+
if (ret != SQLITE_OK) {
xlog(L_ERROR, "%s: statement prepare failed: %s",
__func__, sqlite3_errmsg(dbh));
@@ -459,18 +573,26 @@ out_err:
* return an error.
*/
int
-sqlite_check_client(const unsigned char *clname, const size_t namelen,
- const bool has_session)
+sqlite_check_client(const unsigned char *clname, const size_t namelen)
{
int ret;
sqlite3_stmt *stmt = NULL;
- ret = sqlite3_prepare_v2(dbh, "SELECT count(*) FROM clients WHERE "
- "id==?", -1, &stmt, NULL);
+ ret = snprintf(buf, sizeof(buf), "SELECT count(*) FROM \"rec-%016lx\" "
+ "WHERE id==?;", recovery_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return ret;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
if (ret != SQLITE_OK) {
- xlog(L_ERROR, "%s: unable to prepare update statement: %s",
- __func__, sqlite3_errmsg(dbh));
- goto out_err;
+ xlog(L_ERROR, "%s: select statement prepare failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ return ret;
}
ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
@@ -495,37 +617,10 @@ sqlite_check_client(const unsigned char *clname, const size_t namelen,
goto out_err;
}
- /* Only update timestamp for v4.0 clients */
- if (has_session) {
- ret = SQLITE_OK;
- goto out_err;
- }
-
sqlite3_finalize(stmt);
- stmt = NULL;
- ret = sqlite3_prepare_v2(dbh, "UPDATE OR FAIL clients SET "
- "time=strftime('%s', 'now') WHERE id==?",
- -1, &stmt, NULL);
- if (ret != SQLITE_OK) {
- xlog(L_ERROR, "%s: unable to prepare update statement: %s",
- __func__, sqlite3_errmsg(dbh));
- goto out_err;
- }
- ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
- SQLITE_STATIC);
- if (ret != SQLITE_OK) {
- xlog(L_ERROR, "%s: bind blob failed: %s",
- __func__, sqlite3_errmsg(dbh));
- goto out_err;
- }
-
- ret = sqlite3_step(stmt);
- if (ret == SQLITE_DONE)
- ret = SQLITE_OK;
- else
- xlog(L_ERROR, "%s: unexpected return code from update: %s",
- __func__, sqlite3_errmsg(dbh));
+ /* Now insert the client into the table for the current epoch */
+ return sqlite_insert_client(clname, namelen);
out_err:
xlog(D_GENERAL, "%s: returning %d", __func__, ret);
@@ -599,3 +694,211 @@ sqlite_query_reclaiming(const time_t grace_start)
"reclaim", __func__, ret);
return ret;
}
+
+int
+sqlite_grace_start(void)
+{
+ int ret, ret2;
+ char *err;
+ uint64_t tcur = current_epoch;
+ uint64_t trec = recovery_epoch;
+
+ /* begin transaction */
+ ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+ &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to begin transaction: %s", err);
+ goto rollback;
+ }
+
+ if (trec == 0) {
+ /*
+ * A normal grace start - update the epoch values in the grace
+ * table and create a new table for the current reboot epoch.
+ */
+ trec = tcur;
+ tcur++;
+
+ ret = snprintf(buf, sizeof(buf), "UPDATE grace "
+ "SET current = %ld, recovery = %ld;",
+ (int64_t)tcur, (int64_t)trec);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)",
+ ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to update epochs: %s", err);
+ goto rollback;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "CREATE TABLE \"rec-%016lx\" "
+ "(id BLOB PRIMARY KEY);",
+ tcur);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)",
+ ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create table for current epoch: %s",
+ err);
+ goto rollback;
+ }
+ } else {
+ /* Server restarted while in grace - don't update the epoch
+ * values in the grace table, just clear out the records for
+ * the current reboot epoch.
+ */
+ ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016lx\";",
+ tcur);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to clear table for current epoch: %s",
+ err);
+ goto rollback;
+ }
+ }
+
+ ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to commit transaction: %s", err);
+ goto rollback;
+ }
+
+ current_epoch = tcur;
+ recovery_epoch = trec;
+ xlog(D_GENERAL, "%s: current_epoch=%lu recovery_epoch=%lu",
+ __func__, current_epoch, recovery_epoch);
+
+out:
+ sqlite3_free(err);
+ return ret;
+rollback:
+ ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+ if (ret2 != SQLITE_OK)
+ xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+ goto out;
+}
+
+int
+sqlite_grace_done(void)
+{
+ int ret, ret2;
+ char *err;
+
+ /* begin transaction */
+ ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+ &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to begin transaction: %s", err);
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, "UPDATE grace SET recovery = \"0\";",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to clear recovery epoch: %s", err);
+ goto rollback;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "DROP TABLE \"rec-%016lx\";",
+ recovery_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to drop table for recovery epoch: %s",
+ err);
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to commit transaction: %s", err);
+ goto rollback;
+ }
+
+ recovery_epoch = 0;
+ xlog(D_GENERAL, "%s: current_epoch=%lu recovery_epoch=%lu",
+ __func__, current_epoch, recovery_epoch);
+
+out:
+ sqlite3_free(err);
+ return ret;
+rollback:
+ ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+ if (ret2 != SQLITE_OK)
+ xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+ goto out;
+}
+
+
+int
+sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt)
+{
+ int ret;
+ sqlite3_stmt *stmt = NULL;
+ struct cld_msg *cmsg = &clnt->cl_msg;
+
+ if (recovery_epoch == 0) {
+ xlog(D_GENERAL, "%s: not in grace!", __func__);
+ return -EINVAL;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "SELECT * FROM \"rec-%016lx\";",
+ recovery_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return ret;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: select statement prepare failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ return ret;
+ }
+
+ while ((ret = sqlite3_step(stmt)) == SQLITE_ROW) {
+ memcpy(&cmsg->cm_u.cm_name.cn_id, sqlite3_column_blob(stmt, 0),
+ NFS4_OPAQUE_LIMIT);
+ cmsg->cm_u.cm_name.cn_len = sqlite3_column_bytes(stmt, 0);
+ cb(clnt);
+ }
+ if (ret == SQLITE_DONE)
+ ret = 0;
+ sqlite3_finalize(stmt);
+ return ret;
+}
diff --git a/utils/nfsdcld/sqlite.h b/utils/nfsdcld/sqlite.h
index 06e7c04..5c56f75 100644
--- a/utils/nfsdcld/sqlite.h
+++ b/utils/nfsdcld/sqlite.h
@@ -20,13 +20,16 @@
#ifndef _SQLITE_H_
#define _SQLITE_H_
+struct cld_client;
+
int sqlite_prepare_dbh(const char *topdir);
-int sqlite_insert_client(const unsigned char *clname, const size_t namelen,
- const bool has_session, const bool zerotime);
+int sqlite_insert_client(const unsigned char *clname, const size_t namelen);
int sqlite_remove_client(const unsigned char *clname, const size_t namelen);
-int sqlite_check_client(const unsigned char *clname, const size_t namelen,
- const bool has_session);
+int sqlite_check_client(const unsigned char *clname, const size_t namelen);
int sqlite_remove_unreclaimed(const time_t grace_start);
int sqlite_query_reclaiming(const time_t grace_start);
+int sqlite_grace_start(void);
+int sqlite_grace_done(void);
+int sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt);
#endif /* _SQLITE_H */