diff options
Diffstat (limited to 'storage/bdb/log/log.c')
-rw-r--r-- | storage/bdb/log/log.c | 1084 |
1 files changed, 1084 insertions, 0 deletions
diff --git a/storage/bdb/log/log.c b/storage/bdb/log/log.c new file mode 100644 index 00000000000..f57caeccb95 --- /dev/null +++ b/storage/bdb/log/log.c @@ -0,0 +1,1084 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2002 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: log.c,v 11.111 2002/08/16 00:27:44 ubell Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> + +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#endif + +#include "db_int.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" + +static int __log_init __P((DB_ENV *, DB_LOG *)); +static int __log_recover __P((DB_LOG *)); +static size_t __log_region_size __P((DB_ENV *)); +static int __log_zero __P((DB_ENV *, DB_LSN *, DB_LSN *)); + +/* + * __log_open -- + * Internal version of log_open: only called from DB_ENV->open. + * + * PUBLIC: int __log_open __P((DB_ENV *)); + */ +int +__log_open(dbenv) + DB_ENV *dbenv; +{ + DB_LOG *dblp; + LOG *lp; + int ret; + + /* Create/initialize the DB_LOG structure. */ + if ((ret = __os_calloc(dbenv, 1, sizeof(DB_LOG), &dblp)) != 0) + return (ret); + dblp->dbenv = dbenv; + + /* Join/create the log region. */ + dblp->reginfo.type = REGION_TYPE_LOG; + dblp->reginfo.id = INVALID_REGION_ID; + dblp->reginfo.mode = dbenv->db_mode; + dblp->reginfo.flags = REGION_JOIN_OK; + if (F_ISSET(dbenv, DB_ENV_CREATE)) + F_SET(&dblp->reginfo, REGION_CREATE_OK); + if ((ret = __db_r_attach( + dbenv, &dblp->reginfo, __log_region_size(dbenv))) != 0) + goto err; + + /* If we created the region, initialize it. */ + if (F_ISSET(&dblp->reginfo, REGION_CREATE)) + if ((ret = __log_init(dbenv, dblp)) != 0) + goto err; + + /* Set the local addresses. */ + lp = dblp->reginfo.primary = + R_ADDR(&dblp->reginfo, dblp->reginfo.rp->primary); + + /* + * If the region is threaded, then we have to lock both the handles + * and the region, and we need to allocate a mutex for that purpose. + */ + if (F_ISSET(dbenv, DB_ENV_THREAD) && + (ret = __db_mutex_setup(dbenv, &dblp->reginfo, &dblp->mutexp, + MUTEX_ALLOC | MUTEX_NO_RLOCK)) != 0) + goto err; + + /* Initialize the rest of the structure. */ + dblp->bufp = R_ADDR(&dblp->reginfo, lp->buffer_off); + + /* + * Set the handle -- we may be about to run recovery, which allocates + * log cursors. Log cursors require logging be already configured, + * and the handle being set is what demonstrates that. + * + * If we created the region, run recovery. If that fails, make sure + * we reset the log handle before cleaning up, otherwise we will try + * and clean up again in the mainline DB_ENV initialization code. + */ + dbenv->lg_handle = dblp; + + if (F_ISSET(&dblp->reginfo, REGION_CREATE)) { + if ((ret = __log_recover(dblp)) != 0) { + dbenv->lg_handle = NULL; + goto err; + } + + /* + * We first take the log file size from the environment, if + * specified. If that wasn't set, recovery may have set it + * from the persistent information in a log file header. If + * that didn't set it either, we default. + */ + if (lp->log_size == 0) + lp->log_size = lp->log_nsize = LG_MAX_DEFAULT; + } else { + /* + * A process joining the region may have reset the log file + * size, too. If so, it only affects the next log file we + * create. + */ + if (dbenv->lg_size != 0) + lp->log_nsize = dbenv->lg_size; + } + + R_UNLOCK(dbenv, &dblp->reginfo); + return (0); + +err: if (dblp->reginfo.addr != NULL) { + if (F_ISSET(&dblp->reginfo, REGION_CREATE)) + ret = __db_panic(dbenv, ret); + R_UNLOCK(dbenv, &dblp->reginfo); + (void)__db_r_detach(dbenv, &dblp->reginfo, 0); + } + + if (dblp->mutexp != NULL) + __db_mutex_free(dbenv, &dblp->reginfo, dblp->mutexp); + + __os_free(dbenv, dblp); + + return (ret); +} + +/* + * __log_init -- + * Initialize a log region in shared memory. + */ +static int +__log_init(dbenv, dblp) + DB_ENV *dbenv; + DB_LOG *dblp; +{ + DB_MUTEX *flush_mutexp; + LOG *region; + int ret; + void *p; +#ifdef HAVE_MUTEX_SYSTEM_RESOURCES + u_int8_t *addr; +#endif + + if ((ret = __db_shalloc(dblp->reginfo.addr, + sizeof(*region), 0, &dblp->reginfo.primary)) != 0) + goto mem_err; + dblp->reginfo.rp->primary = + R_OFFSET(&dblp->reginfo, dblp->reginfo.primary); + region = dblp->reginfo.primary; + memset(region, 0, sizeof(*region)); + + region->fid_max = 0; + SH_TAILQ_INIT(®ion->fq); + region->free_fid_stack = INVALID_ROFF; + region->free_fids = region->free_fids_alloced = 0; + + /* Initialize LOG LSNs. */ + INIT_LSN(region->lsn); + INIT_LSN(region->ready_lsn); + INIT_LSN(region->t_lsn); + + /* + * It's possible to be waiting for an LSN of [1][0], if a replication + * client gets the first log record out of order. An LSN of [0][0] + * signifies that we're not waiting. + */ + ZERO_LSN(region->waiting_lsn); + + /* + * Log makes note of the fact that it ran into a checkpoint on + * startup if it did so, as a recovery optimization. A zero + * LSN signifies that it hasn't found one [yet]. + */ + ZERO_LSN(region->cached_ckp_lsn); + +#ifdef HAVE_MUTEX_SYSTEM_RESOURCES + /* Allocate room for the log maintenance info and initialize it. */ + if ((ret = __db_shalloc(dblp->reginfo.addr, + sizeof(REGMAINT) + LG_MAINT_SIZE, 0, &addr)) != 0) + goto mem_err; + __db_maintinit(&dblp->reginfo, addr, LG_MAINT_SIZE); + region->maint_off = R_OFFSET(&dblp->reginfo, addr); +#endif + + if ((ret = __db_mutex_setup(dbenv, &dblp->reginfo, ®ion->fq_mutex, + MUTEX_NO_RLOCK)) != 0) + return (ret); + + /* + * We must create a place for the flush mutex separately; mutexes have + * to be aligned to MUTEX_ALIGN, and the only way to guarantee that is + * to make sure they're at the beginning of a shalloc'ed chunk. + */ + if ((ret = __db_shalloc(dblp->reginfo.addr, + sizeof(DB_MUTEX), MUTEX_ALIGN, &flush_mutexp)) != 0) + goto mem_err; + if ((ret = __db_mutex_setup(dbenv, &dblp->reginfo, flush_mutexp, + MUTEX_NO_RLOCK)) != 0) + return (ret); + region->flush_mutex_off = R_OFFSET(&dblp->reginfo, flush_mutexp); + + /* Initialize the buffer. */ + if ((ret = + __db_shalloc(dblp->reginfo.addr, dbenv->lg_bsize, 0, &p)) != 0) { +mem_err: __db_err(dbenv, "Unable to allocate memory for the log buffer"); + return (ret); + } + region->buffer_size = dbenv->lg_bsize; + region->buffer_off = R_OFFSET(&dblp->reginfo, p); + region->log_size = region->log_nsize = dbenv->lg_size; + + /* Initialize the commit Queue. */ + SH_TAILQ_INIT(®ion->free_commits); + SH_TAILQ_INIT(®ion->commits); + region->ncommit = 0; + + /* + * Fill in the log's persistent header. Don't fill in the log file + * sizes, as they may change at any time and so have to be filled in + * as each log file is created. + */ + region->persist.magic = DB_LOGMAGIC; + region->persist.version = DB_LOGVERSION; + region->persist.mode = (u_int32_t)dbenv->db_mode; + + return (0); +} + +/* + * __log_recover -- + * Recover a log. + */ +static int +__log_recover(dblp) + DB_LOG *dblp; +{ + DBT dbt; + DB_ENV *dbenv; + DB_LOGC *logc; + DB_LSN lsn; + LOG *lp; + u_int32_t cnt, rectype; + int ret; + logfile_validity status; + + logc = NULL; + dbenv = dblp->dbenv; + lp = dblp->reginfo.primary; + + /* + * Find a log file. If none exist, we simply return, leaving + * everything initialized to a new log. + */ + if ((ret = __log_find(dblp, 0, &cnt, &status)) != 0) + return (ret); + if (cnt == 0) + return (0); + + /* + * If the last file is an old version, readable or no, start a new + * file. Don't bother finding the end of the last log file; + * we assume that it's valid in its entirety, since the user + * should have shut down cleanly or run recovery before upgrading. + */ + if (status == DB_LV_OLD_READABLE || status == DB_LV_OLD_UNREADABLE) { + lp->lsn.file = lp->s_lsn.file = cnt + 1; + lp->lsn.offset = lp->s_lsn.offset = 0; + goto skipsearch; + } + DB_ASSERT(status == DB_LV_NORMAL); + + /* + * We have the last useful log file and we've loaded any persistent + * information. Set the end point of the log past the end of the last + * file. Read the last file, looking for the last checkpoint and + * the log's end. + */ + lp->lsn.file = cnt + 1; + lp->lsn.offset = 0; + lsn.file = cnt; + lsn.offset = 0; + + /* + * Allocate a cursor and set it to the first record. This shouldn't + * fail, leave error messages on. + */ + if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0) + return (ret); + F_SET(logc, DB_LOG_LOCKED); + memset(&dbt, 0, sizeof(dbt)); + if ((ret = logc->get(logc, &lsn, &dbt, DB_SET)) != 0) + goto err; + + /* + * Read to the end of the file. This may fail at some point, so + * turn off error messages. + */ + F_SET(logc, DB_LOG_SILENT_ERR); + while (logc->get(logc, &lsn, &dbt, DB_NEXT) == 0) { + if (dbt.size < sizeof(u_int32_t)) + continue; + memcpy(&rectype, dbt.data, sizeof(u_int32_t)); + if (rectype == DB___txn_ckp) + /* + * If we happen to run into a checkpoint, cache its + * LSN so that the transaction system doesn't have + * to walk this log file again looking for it. + */ + lp->cached_ckp_lsn = lsn; + } + F_CLR(logc, DB_LOG_SILENT_ERR); + + /* + * We now know where the end of the log is. Set the first LSN that + * we want to return to an application and the LSN of the last known + * record on disk. + */ + lp->lsn = lsn; + lp->s_lsn = lsn; + lp->lsn.offset += logc->c_len; + lp->s_lsn.offset += logc->c_len; + + /* Set up the current buffer information, too. */ + lp->len = logc->c_len; + lp->b_off = 0; + lp->w_off = lp->lsn.offset; + +skipsearch: + if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) + __db_err(dbenv, + "Finding last valid log LSN: file: %lu offset %lu", + (u_long)lp->lsn.file, (u_long)lp->lsn.offset); + +err: if (logc != NULL) + (void)logc->close(logc, 0); + + return (ret); +} + +/* + * __log_find -- + * Try to find a log file. If find_first is set, valp will contain + * the number of the first readable log file, else it will contain the number + * of the last log file (which may be too old to read). + * + * PUBLIC: int __log_find __P((DB_LOG *, int, u_int32_t *, logfile_validity *)); + */ +int +__log_find(dblp, find_first, valp, statusp) + DB_LOG *dblp; + int find_first; + u_int32_t *valp; + logfile_validity *statusp; +{ + DB_ENV *dbenv; + logfile_validity logval_status, status; + u_int32_t clv, logval; + int cnt, fcnt, ret; + const char *dir; + char *c, **names, *p, *q, savech; + + dbenv = dblp->dbenv; + logval_status = status = DB_LV_NONEXISTENT; + + /* Return a value of 0 as the log file number on failure. */ + *valp = 0; + + /* Find the directory name. */ + if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0) + return (ret); + if ((q = __db_rpath(p)) == NULL) { + COMPQUIET(savech, 0); + dir = PATH_DOT; + } else { + savech = *q; + *q = '\0'; + dir = p; + } + + /* Get the list of file names. */ + ret = __os_dirlist(dbenv, dir, &names, &fcnt); + + /* + * !!! + * We overwrote a byte in the string with a nul. Restore the string + * so that the diagnostic checks in the memory allocation code work + * and any error messages display the right file name. + */ + if (q != NULL) + *q = savech; + + if (ret != 0) { + __db_err(dbenv, "%s: %s", dir, db_strerror(ret)); + __os_free(dbenv, p); + return (ret); + } + + /* Search for a valid log file name. */ + for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) { + if (strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1) != 0) + continue; + + /* + * Names of the form log\.[0-9]* are reserved for DB. Other + * names sharing LFPREFIX, such as "log.db", are legal. + */ + for (c = names[cnt] + sizeof(LFPREFIX) - 1; *c != '\0'; c++) + if (!isdigit((int)*c)) + break; + if (*c != '\0') + continue; + + /* + * Use atol, not atoi; if an "int" is 16-bits, the largest + * log file name won't fit. + */ + clv = atol(names[cnt] + (sizeof(LFPREFIX) - 1)); + + /* + * If searching for the first log file, we want to return the + * oldest log file we can read, or, if no readable log files + * exist, the newest log file we can't read (the crossover + * point between the old and new versions of the log file). + * + * If we're searching for the last log file, we want to return + * the newest log file, period. + * + * Readable log files should never preceede unreadable log + * files, that would mean the admin seriously screwed up. + */ + if (find_first) { + if (logval != 0 && + status != DB_LV_OLD_UNREADABLE && clv > logval) + continue; + } else + if (logval != 0 && clv < logval) + continue; + + if ((ret = __log_valid(dblp, clv, 1, &status)) != 0) { + __db_err(dbenv, "Invalid log file: %s: %s", + names[cnt], db_strerror(ret)); + goto err; + } + switch (status) { + case DB_LV_NONEXISTENT: + /* __log_valid never returns DB_LV_NONEXISTENT. */ + DB_ASSERT(0); + break; + case DB_LV_INCOMPLETE: + /* + * The last log file may not have been initialized -- + * it's possible to create a log file but not write + * anything to it. If performing recovery (that is, + * if find_first isn't set), ignore the file, it's + * not interesting. If we're searching for the first + * log record, return the file (assuming we don't find + * something better), as the "real" first log record + * is likely to be in the log buffer, and we want to + * set the file LSN for our return. + */ + if (find_first) + goto found; + break; + case DB_LV_OLD_UNREADABLE: + /* + * If we're searching for the first log file, then we + * only want this file if we don't yet have a file or + * already have an unreadable file and this one is + * newer than that one. If we're searching for the + * last log file, we always want this file because we + * wouldn't be here if it wasn't newer than our current + * choice. + */ + if (!find_first || logval == 0 || + (status == DB_LV_OLD_UNREADABLE && clv > logval)) + goto found; + break; + case DB_LV_NORMAL: + case DB_LV_OLD_READABLE: +found: logval = clv; + logval_status = status; + break; + } + } + + *valp = logval; + +err: __os_dirfree(dbenv, names, fcnt); + __os_free(dbenv, p); + *statusp = logval_status; + + return (ret); +} + +/* + * log_valid -- + * Validate a log file. Returns an error code in the event of + * a fatal flaw in a the specified log file; returns success with + * a code indicating the currentness and completeness of the specified + * log file if it is not unexpectedly flawed (that is, if it's perfectly + * normal, if it's zero-length, or if it's an old version). + * + * PUBLIC: int __log_valid __P((DB_LOG *, u_int32_t, int, logfile_validity *)); + */ +int +__log_valid(dblp, number, set_persist, statusp) + DB_LOG *dblp; + u_int32_t number; + int set_persist; + logfile_validity *statusp; +{ + DB_CIPHER *db_cipher; + DB_ENV *dbenv; + DB_FH fh; + HDR *hdr; + LOG *region; + LOGP *persist; + logfile_validity status; + size_t hdrsize, nw, recsize; + int is_hmac, need_free, ret; + u_int8_t *tmp; + char *fname; + + dbenv = dblp->dbenv; + db_cipher = dbenv->crypto_handle; + persist = NULL; + status = DB_LV_NORMAL; + + /* Try to open the log file. */ + if ((ret = __log_name(dblp, + number, &fname, &fh, DB_OSO_RDONLY | DB_OSO_SEQ)) != 0) { + __os_free(dbenv, fname); + return (ret); + } + + need_free = 0; + hdrsize = HDR_NORMAL_SZ; + is_hmac = 0; + recsize = sizeof(LOGP); + if (CRYPTO_ON(dbenv)) { + hdrsize = HDR_CRYPTO_SZ; + recsize = sizeof(LOGP); + recsize += db_cipher->adj_size(recsize); + is_hmac = 1; + } + if ((ret = __os_calloc(dbenv, 1, recsize + hdrsize, &tmp)) != 0) + return (ret); + need_free = 1; + hdr = (HDR *)tmp; + persist = (LOGP *)(tmp + hdrsize); + /* Try to read the header. */ + if ((ret = __os_read(dbenv, &fh, tmp, recsize + hdrsize, &nw)) != 0 || + nw != recsize + hdrsize) { + if (ret == 0) + status = DB_LV_INCOMPLETE; + else + /* + * The error was a fatal read error, not just an + * incompletely initialized log file. + */ + __db_err(dbenv, "Ignoring log file: %s: %s", + fname, db_strerror(ret)); + + (void)__os_closehandle(dbenv, &fh); + goto err; + } + (void)__os_closehandle(dbenv, &fh); + + /* + * Now we have to validate the persistent record. We have + * several scenarios we have to deal with: + * + * 1. User has crypto turned on: + * - They're reading an old, unencrypted log file + * . We will fail the record size match check below. + * - They're reading a current, unencrypted log file + * . We will fail the record size match check below. + * - They're reading an old, encrypted log file [NOT YET] + * . After decryption we'll fail the version check. [NOT YET] + * - They're reading a current, encrypted log file + * . We should proceed as usual. + * 2. User has crypto turned off: + * - They're reading an old, unencrypted log file + * . We will fail the version check. + * - They're reading a current, unencrypted log file + * . We should proceed as usual. + * - They're reading an old, encrypted log file [NOT YET] + * . We'll fail the magic number check (it is encrypted). + * - They're reading a current, encrypted log file + * . We'll fail the magic number check (it is encrypted). + */ + if (CRYPTO_ON(dbenv)) { + /* + * If we are trying to decrypt an unencrypted log + * we can only detect that by having an unreasonable + * data length for our persistent data. + */ + if ((hdr->len - hdrsize) != sizeof(LOGP)) { + __db_err(dbenv, "log record size mismatch"); + goto err; + } + /* Check the checksum and decrypt. */ + if ((ret = __db_check_chksum(dbenv, db_cipher, &hdr->chksum[0], + (u_int8_t *)persist, hdr->len - hdrsize, is_hmac)) != 0) { + __db_err(dbenv, "log record checksum mismatch"); + goto err; + } + if ((ret = db_cipher->decrypt(dbenv, db_cipher->data, + &hdr->iv[0], (u_int8_t *)persist, hdr->len - hdrsize)) != 0) + goto err; + } + + /* Validate the header. */ + if (persist->magic != DB_LOGMAGIC) { + __db_err(dbenv, + "Ignoring log file: %s: magic number %lx, not %lx", + fname, (u_long)persist->magic, (u_long)DB_LOGMAGIC); + ret = EINVAL; + goto err; + } + + /* + * Set our status code to indicate whether the log file + * belongs to an unreadable or readable old version; leave it + * alone if and only if the log file version is the current one. + */ + if (persist->version > DB_LOGVERSION) { + /* This is a fatal error--the log file is newer than DB. */ + __db_err(dbenv, + "Ignoring log file: %s: unsupported log version %lu", + fname, (u_long)persist->version); + ret = EINVAL; + goto err; + } else if (persist->version < DB_LOGOLDVER) { + status = DB_LV_OLD_UNREADABLE; + /* + * We don't want to set persistent info based on an + * unreadable region, so jump to "err". + */ + goto err; + } else if (persist->version < DB_LOGVERSION) + status = DB_LV_OLD_READABLE; + + /* + * Only if we have a current log do we verify the checksum. + * We could not check the checksum before checking the magic + * and version because old log hdrs have the length and checksum + * in a different location. + */ + if (!CRYPTO_ON(dbenv) && ((ret = __db_check_chksum(dbenv, + db_cipher, &hdr->chksum[0], (u_int8_t *)persist, + hdr->len - hdrsize, is_hmac)) != 0)) { + __db_err(dbenv, "log record checksum mismatch"); + goto err; + } + + /* + * If the log is readable so far and we're doing system initialization, + * set the region's persistent information based on the headers. + * + * Always set the current log file size. Only set the next log file's + * size if the application hasn't set it already. + * + * XXX + * Always use the persistent header's mode, regardless of what was set + * in the current environment. We've always done it this way, but it's + * probably a bug -- I can't think of a way not-changing the mode would + * be a problem, though. + */ + if (set_persist) { + region = dblp->reginfo.primary; + region->log_size = persist->log_size; + if (region->log_nsize == 0) + region->log_nsize = persist->log_size; + region->persist.mode = persist->mode; + } + +err: __os_free(dbenv, fname); + if (need_free) + __os_free(dbenv, tmp); + *statusp = status; + return (ret); +} + +/* + * __log_dbenv_refresh -- + * Clean up after the log system on a close or failed open. Called only + * from __dbenv_refresh. (Formerly called __log_close.) + * + * PUBLIC: int __log_dbenv_refresh __P((DB_ENV *)); + */ +int +__log_dbenv_refresh(dbenv) + DB_ENV *dbenv; +{ + DB_LOG *dblp; + int ret, t_ret; + + dblp = dbenv->lg_handle; + + /* We may have opened files as part of XA; if so, close them. */ + F_SET(dblp, DBLOG_RECOVER); + ret = __dbreg_close_files(dbenv); + + /* Discard the per-thread lock. */ + if (dblp->mutexp != NULL) + __db_mutex_free(dbenv, &dblp->reginfo, dblp->mutexp); + + /* Detach from the region. */ + if ((t_ret = + __db_r_detach(dbenv, &dblp->reginfo, 0)) != 0 && ret == 0) + ret = t_ret; + + /* Close open files, release allocated memory. */ + if (F_ISSET(&dblp->lfh, DB_FH_VALID) && + (t_ret = __os_closehandle(dbenv, &dblp->lfh)) != 0 && ret == 0) + ret = t_ret; + if (dblp->dbentry != NULL) + __os_free(dbenv, dblp->dbentry); + + __os_free(dbenv, dblp); + + dbenv->lg_handle = NULL; + return (ret); +} + +/* + * __log_stat -- + * Return log statistics. + * + * PUBLIC: int __log_stat __P((DB_ENV *, DB_LOG_STAT **, u_int32_t)); + */ +int +__log_stat(dbenv, statp, flags) + DB_ENV *dbenv; + DB_LOG_STAT **statp; + u_int32_t flags; +{ + DB_LOG *dblp; + DB_LOG_STAT *stats; + LOG *region; + int ret; + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, + dbenv->lg_handle, "DB_ENV->log_stat", DB_INIT_LOG); + + *statp = NULL; + if ((ret = __db_fchk(dbenv, + "DB_ENV->log_stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); + + dblp = dbenv->lg_handle; + region = dblp->reginfo.primary; + + if ((ret = __os_umalloc(dbenv, sizeof(DB_LOG_STAT), &stats)) != 0) + return (ret); + + /* Copy out the global statistics. */ + R_LOCK(dbenv, &dblp->reginfo); + *stats = region->stat; + if (LF_ISSET(DB_STAT_CLEAR)) + memset(®ion->stat, 0, sizeof(region->stat)); + + stats->st_magic = region->persist.magic; + stats->st_version = region->persist.version; + stats->st_mode = region->persist.mode; + stats->st_lg_bsize = region->buffer_size; + stats->st_lg_size = region->log_nsize; + + stats->st_region_wait = dblp->reginfo.rp->mutex.mutex_set_wait; + stats->st_region_nowait = dblp->reginfo.rp->mutex.mutex_set_nowait; + if (LF_ISSET(DB_STAT_CLEAR)) { + dblp->reginfo.rp->mutex.mutex_set_wait = 0; + dblp->reginfo.rp->mutex.mutex_set_nowait = 0; + } + stats->st_regsize = dblp->reginfo.rp->size; + + stats->st_cur_file = region->lsn.file; + stats->st_cur_offset = region->lsn.offset; + stats->st_disk_file = region->s_lsn.file; + stats->st_disk_offset = region->s_lsn.offset; + + R_UNLOCK(dbenv, &dblp->reginfo); + + *statp = stats; + return (0); +} + +/* + * __log_get_cached_ckp_lsn -- + * Retrieve any last checkpoint LSN that we may have found on startup. + * + * PUBLIC: void __log_get_cached_ckp_lsn __P((DB_ENV *, DB_LSN *)); + */ +void +__log_get_cached_ckp_lsn(dbenv, ckp_lsnp) + DB_ENV *dbenv; + DB_LSN *ckp_lsnp; +{ + DB_LOG *dblp; + LOG *lp; + + dblp = (DB_LOG *)dbenv->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + + R_LOCK(dbenv, &dblp->reginfo); + *ckp_lsnp = lp->cached_ckp_lsn; + R_UNLOCK(dbenv, &dblp->reginfo); +} + +/* + * __log_region_size -- + * Return the amount of space needed for the log region. + * Make the region large enough to hold txn_max transaction + * detail structures plus some space to hold thread handles + * and the beginning of the shalloc region and anything we + * need for mutex system resource recording. + */ +static size_t +__log_region_size(dbenv) + DB_ENV *dbenv; +{ + size_t s; + + s = dbenv->lg_regionmax + dbenv->lg_bsize; +#ifdef HAVE_MUTEX_SYSTEM_RESOURCES + if (F_ISSET(dbenv, DB_ENV_THREAD)) + s += sizeof(REGMAINT) + LG_MAINT_SIZE; +#endif + return (s); +} + +/* + * __log_region_destroy + * Destroy any region maintenance info. + * + * PUBLIC: void __log_region_destroy __P((DB_ENV *, REGINFO *)); + */ +void +__log_region_destroy(dbenv, infop) + DB_ENV *dbenv; + REGINFO *infop; +{ + __db_shlocks_destroy(infop, (REGMAINT *)R_ADDR(infop, + ((LOG *)R_ADDR(infop, infop->rp->primary))->maint_off)); + + COMPQUIET(dbenv, NULL); + COMPQUIET(infop, NULL); +} + +/* + * __log_vtruncate + * This is a virtual truncate. We set up the log indicators to + * make everyone believe that the given record is the last one in the + * log. Returns with the next valid LSN (i.e., the LSN of the next + * record to be written). This is used in replication to discard records + * in the log file that do not agree with the master. + * + * PUBLIC: int __log_vtruncate __P((DB_ENV *, DB_LSN *, DB_LSN *)); + */ +int +__log_vtruncate(dbenv, lsn, ckplsn) + DB_ENV *dbenv; + DB_LSN *lsn, *ckplsn; +{ + DBT log_dbt; + DB_FH fh; + DB_LOG *dblp; + DB_LOGC *logc; + DB_LSN end_lsn; + LOG *lp; + u_int32_t bytes, c_len; + int fn, ret, t_ret; + char *fname; + + /* Need to find out the length of this soon-to-be-last record. */ + if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0) + return (ret); + memset(&log_dbt, 0, sizeof(log_dbt)); + ret = logc->get(logc, lsn, &log_dbt, DB_SET); + c_len = logc->c_len; + if ((t_ret = logc->close(logc, 0)) != 0 && ret == 0) + ret = t_ret; + if (ret != 0) + return (ret); + + /* Now do the truncate. */ + dblp = (DB_LOG *)dbenv->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + + R_LOCK(dbenv, &dblp->reginfo); + end_lsn = lp->lsn; + lp->lsn = *lsn; + lp->len = c_len; + lp->lsn.offset += lp->len; + + /* + * I am going to assume that the number of bytes written since + * the last checkpoint doesn't exceed a 32-bit number. + */ + DB_ASSERT(lp->lsn.file >= ckplsn->file); + bytes = 0; + if (ckplsn->file != lp->lsn.file) { + bytes = lp->log_size - ckplsn->offset; + if (lp->lsn.file > ckplsn->file + 1) + bytes += lp->log_size * + (lp->lsn.file - ckplsn->file - 1); + bytes += lp->lsn.offset; + } else + bytes = lp->lsn.offset - ckplsn->offset; + + lp->stat.st_wc_mbytes += bytes / MEGABYTE; + lp->stat.st_wc_bytes += bytes % MEGABYTE; + + /* + * If the saved lsn is greater than our new end of log, reset it + * to our current end of log. + */ + if (log_compare(&lp->s_lsn, lsn) > 0) + lp->s_lsn = lp->lsn; + + /* + * If the new end of log is in the middle of the buffer, + * don't change the w_off or f_lsn. If the new end is + * before the w_off then reset w_off and f_lsn to the new + * end of log. + */ + if (lp->w_off >= lp->lsn.offset) { + lp->f_lsn = lp->lsn; + lp->w_off = lp->lsn.offset; + lp->b_off = 0; + } else + lp->b_off = lp->lsn.offset - lp->w_off; + + ZERO_LSN(lp->waiting_lsn); + lp->ready_lsn = lp->lsn; + lp->wait_recs = 0; + lp->rcvd_recs = 0; + + /* Now throw away any extra log files that we have around. */ + for (fn = lp->lsn.file + 1;; fn++) { + if (__log_name(dblp, fn, &fname, &fh, DB_OSO_RDONLY) != 0) { + __os_free(dbenv, fname); + break; + } + (void)__os_closehandle(dbenv, &fh); + ret = __os_unlink(dbenv, fname); + __os_free(dbenv, fname); + if (ret != 0) + goto err; + } + + /* Truncate the log to the new point. */ + if ((ret = __log_zero(dbenv, &lp->lsn, &end_lsn)) != 0) + goto err; + +err: R_UNLOCK(dbenv, &dblp->reginfo); + return (ret); +} + +/* + * __log_is_outdated -- + * Used by the replication system to identify if a client's logs + * are too old. The log represented by dbenv is compared to the file + * number passed in fnum. If the log file fnum does not exist and is + * lower-numbered than the current logs, the we return *outdatedp non + * zero, else we return it 0. + * + * PUBLIC: int __log_is_outdated __P((DB_ENV *dbenv, + * PUBLIC: u_int32_t fnum, int *outdatedp)); + */ +int +__log_is_outdated(dbenv, fnum, outdatedp) + DB_ENV *dbenv; + u_int32_t fnum; + int *outdatedp; +{ + DB_LOG *dblp; + LOG *lp; + char *name; + int ret; + u_int32_t cfile; + + dblp = dbenv->lg_handle; + *outdatedp = 0; + + if ((ret = __log_name(dblp, fnum, &name, NULL, 0)) != 0) + return (ret); + + /* If the file exists, we're just fine. */ + if (__os_exists(name, NULL) == 0) + goto out; + + /* + * It didn't exist, decide if the file number is too big or + * too little. If it's too little, then we need to indicate + * that the LSN is outdated. + */ + R_LOCK(dbenv, &dblp->reginfo); + lp = (LOG *)dblp->reginfo.primary; + cfile = lp->lsn.file; + R_UNLOCK(dbenv, &dblp->reginfo); + + if (cfile > fnum) + *outdatedp = 1; +out: __os_free(dbenv, name); + return (ret); +} + +/* + * __log_zero -- + * Zero out the tail of a log after a truncate. + */ +static int +__log_zero(dbenv, from_lsn, to_lsn) + DB_ENV *dbenv; + DB_LSN *from_lsn, *to_lsn; +{ + char *lname; + DB_LOG *dblp; + LOG *lp; + int ret; + size_t nbytes, len, nw; + u_int8_t buf[4096]; + u_int32_t mbytes, bytes; + + dblp = dbenv->lg_handle; + lp = (LOG *)dblp->reginfo.primary; + lname = NULL; + + if (dblp->lfname != lp->lsn.file) { + if (F_ISSET(&dblp->lfh, DB_FH_VALID)) + (void)__os_closehandle(dbenv, &dblp->lfh); + dblp->lfname = lp->lsn.file; + } + + if (from_lsn->file != to_lsn->file) { + /* We removed some log files; have to 0 to end of file. */ + if (!F_ISSET(&dblp->lfh, DB_FH_VALID) && (ret = + __log_name(dblp, dblp->lfname, &lname, &dblp->lfh, 0)) != 0) + return (ret); + if ((ret = __os_ioinfo(dbenv, + NULL, &dblp->lfh, &mbytes, &bytes, NULL)) != 0) + goto err; + len = mbytes * MEGABYTE + bytes - from_lsn->offset; + } else if (to_lsn->offset <= from_lsn->offset) + return (0); + else + len = to_lsn->offset = from_lsn->offset; + + memset(buf, 0, sizeof(buf)); + + /* Initialize the write position. */ + if (!F_ISSET(&dblp->lfh, DB_FH_VALID) && + (ret = __log_name(dblp, dblp->lfname, &lname, &dblp->lfh, 0)) != 0) + goto err; + + if ((ret = __os_seek(dbenv, + &dblp->lfh, 0, 0, from_lsn->offset, 0, DB_OS_SEEK_SET)) != 0) + return (ret); + + while (len > 0) { + nbytes = len > sizeof(buf) ? sizeof(buf) : len; + if ((ret = + __os_write(dbenv, &dblp->lfh, buf, nbytes, &nw)) != 0) + return (ret); + len -= nbytes; + } +err: if (lname != NULL) + __os_free(dbenv, lname); + + return (0); +} |