summaryrefslogtreecommitdiff
path: root/src/fileops/fop_basic.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fileops/fop_basic.c')
-rw-r--r--src/fileops/fop_basic.c216
1 files changed, 215 insertions, 1 deletions
diff --git a/src/fileops/fop_basic.c b/src/fileops/fop_basic.c
index d6c707f2..c1280d76 100644
--- a/src/fileops/fop_basic.c
+++ b/src/fileops/fop_basic.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2001, 2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015 Oracle and/or its affiliates. All rights reserved.
*
* $Id$
*/
@@ -253,6 +253,220 @@ err: if (local_open &&
}
/*
+ * Used to reduce the maximum amount of data that will be logged at a time.
+ * Large writes are logged as a series of smaller writes to prevent a
+ * single log from being larger than the log buffer or a log file.
+ */
+#define LOG_OVERWRITE_MULTIPLIER 0.75
+#define LOG_REDO_MULTIPLIER 0.75
+#define LOG_OVERWRITE_REDO_MULTIPLIER 0.33
+
+/*
+ * __fop_write_file
+ *
+ * Write "size" bytes from "buf" to file "name" beginning at offset "off."
+ * dirname is the directory in which the file is stored, fhp the file
+ * handle to write too, and flags contains whether this is creating or
+ * appending data, which changes how the data is logged.
+ * The other __fop_write is designed for writing pages to databases, this
+ * function writes generic data to files, usually blob files.
+ *
+ * PUBLIC: int __fop_write_file __P((ENV *, DB_TXN *,
+ * PUBLIC: const char *, const char *, APPNAME, DB_FH *,
+ * PUBLIC: off_t, void *, size_t, u_int32_t));
+ */
+int
+__fop_write_file(env, txn,
+ name, dirname, appname, fhp, off, buf, size, flags)
+ ENV *env;
+ DB_TXN *txn;
+ const char *name, *dirname;
+ APPNAME appname;
+ DB_FH *fhp;
+ off_t off;
+ void *buf;
+ size_t size;
+ u_int32_t flags;
+{
+ DBT new_data, old_data, namedbt, dirdbt;
+ DB_LOG *dblp;
+ DB_LSN lsn;
+ off_t cur_off;
+ int local_open, ret, t_ret;
+ size_t cur_size, nbytes, tmp_size;
+ u_int32_t lflags, lgbuf_size, lgsize, lgfile_size;
+ char *real_name;
+ void *cur_ptr;
+
+ ret = local_open = 0;
+ real_name = NULL;
+ lflags = 0;
+ memset(&new_data, 0, sizeof(new_data));
+ memset(&old_data, 0, sizeof(old_data));
+ ZERO_LSN(lsn);
+
+ if (fhp == NULL) {
+ /* File isn't open; we need to reopen it. */
+ if ((ret = __db_appname(env,
+ appname, name, &dirname, &real_name)) != 0)
+ return (ret);
+
+ if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0)
+ goto err;
+ local_open = 1;
+ }
+
+ if (DBENV_LOGGING(env)
+#if !defined(DEBUG_WOP)
+ && txn != NULL
+#endif
+ ) {
+ DB_INIT_DBT(namedbt, name, strlen(name) + 1);
+ if (dirname != NULL)
+ DB_INIT_DBT(dirdbt, dirname, strlen(dirname) + 1);
+ else
+ memset(&dirdbt, 0, sizeof(dirdbt));
+ /*
+ * If the write is larger than the log buffer or file size,
+ * then log it as a set of smaller writes.
+ */
+ cur_off = off;
+ cur_ptr = buf;
+ cur_size = size;
+ dblp = env->lg_handle;
+ LOG_SYSTEM_LOCK(env);
+ lgfile_size = ((LOG *)dblp->reginfo.primary)->log_nsize;
+ LOG_SYSTEM_UNLOCK(env);
+ if ((ret = __log_get_lg_bsize(env->dbenv, &lgbuf_size)) != 0)
+ goto err;
+
+ if (lgfile_size > lgbuf_size)
+ lgsize = lgbuf_size;
+ else
+ lgsize = lgfile_size;
+
+ /*
+ * Parial logging only logs enough data to undo an operation.
+ */
+ if (LF_ISSET(DB_FOP_PARTIAL_LOG)) {
+ /* No data needs to be logged for append and create. */
+ if (LF_ISSET(DB_FOP_APPEND | DB_FOP_CREATE)) {
+ lflags |=
+ flags & (DB_FOP_APPEND | DB_FOP_CREATE);
+ cur_size = 0;
+ goto log;
+ } else {
+ /*
+ * Writting in the middle of the blob requires
+ * logging the data being overwritten.
+ */
+ lgsize = (u_int32_t)
+ (lgsize * LOG_OVERWRITE_MULTIPLIER);
+ }
+ } else {
+ /* Log that the operation can be redone from logs. */
+ lflags |= DB_FOP_REDO;
+ /* Just log the new data for append and create */
+ if (LF_ISSET(DB_FOP_APPEND | DB_FOP_CREATE)) {
+ lgsize = (u_int32_t)
+ (lgsize * LOG_REDO_MULTIPLIER);
+ lflags |= flags &
+ (DB_FOP_APPEND | DB_FOP_CREATE);
+ } else {
+ /*
+ * Writting in the middle of the blob requires
+ * logging both the old and new data.
+ */
+ lgsize = (u_int32_t)
+ (lgsize * LOG_OVERWRITE_REDO_MULTIPLIER);
+ }
+ }
+
+ while (cur_size > 0) {
+ new_data.data = cur_ptr;
+ if (cur_size > lgsize) {
+ new_data.size = lgsize;
+ cur_size -= lgsize;
+ } else {
+ new_data.size = (u_int32_t)cur_size;
+ cur_size = 0;
+ }
+ cur_ptr = (unsigned char *)cur_ptr + new_data.size;
+ /*
+ * If not creating or appending the file, then
+ * the data being overwritten needs to be read
+ * in so it can be written back in on abort.
+ */
+ if (!(lflags & (DB_FOP_CREATE | DB_FOP_APPEND))) {
+ DB_ASSERT(env, old_data.data == NULL ||
+ new_data.size <= old_data.size);
+ old_data.size = new_data.size;
+ if (old_data.data == NULL) {
+ if ((ret = __os_malloc(env,
+ old_data.size,
+ &old_data.data)) != 0)
+ goto err;
+ }
+ if ((ret = __os_seek(
+ env, fhp, 0, 0, cur_off)) != 0)
+ goto err;
+ if ((ret = __os_read(env, fhp, old_data.data,
+ old_data.size, &nbytes)) != 0)
+ goto err;
+ }
+log: tmp_size = new_data.size;
+ /*
+ * No need to log the new data if this operation
+ * cannot be redone from logs.
+ */
+ if (!(lflags & DB_FOP_REDO))
+ memset(&new_data, 0, sizeof(new_data));
+ if ((ret = __fop_write_file_log(
+ env, txn, &lsn, flags, &namedbt, &dirdbt,
+ (u_int32_t)appname, (u_int64_t)cur_off,
+ &old_data, &new_data, lflags)) != 0)
+ goto err;
+ cur_off += tmp_size;
+ }
+ /*
+ * If not creating, we have to flush the logs so that they
+ * will be available to undo internal writes and appends in case
+ * of a crash.
+ */
+ if (!(LF_ISSET(DB_FOP_CREATE)) &&
+ txn != NULL && !F_ISSET(txn, TXN_NOSYNC))
+ if ((ret = __log_flush(env, &lsn)) != 0)
+ goto err;
+ }
+
+ /* Seek to offset. */
+ if ((ret = __os_seek(env, fhp, 0, 0, off)) != 0)
+ goto err;
+
+ /* Now do the write. */
+ if ((ret = __os_write(env, fhp, buf, size, &nbytes)) != 0)
+ goto err;
+
+ if (nbytes != size) {
+ __db_errx(env, DB_STR_A("0238",
+ "Error wrote %lld bytes to file %s instead of %lld .",
+ "%lld %s %lld"),
+ (long long)nbytes, name, (long long)size);
+ goto err;
+ }
+
+err: if (local_open &&
+ (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if (real_name != NULL)
+ __os_free(env, real_name);
+ if (old_data.data != NULL)
+ __os_free(env, old_data.data);
+ return (ret);
+}
+
+/*
* __fop_rename --
* Change a file's name.
*