summaryrefslogtreecommitdiff
path: root/bdb/db/db_meta.c
diff options
context:
space:
mode:
Diffstat (limited to 'bdb/db/db_meta.c')
-rw-r--r--bdb/db/db_meta.c287
1 files changed, 215 insertions, 72 deletions
diff --git a/bdb/db/db_meta.c b/bdb/db/db_meta.c
index 5b57c369454..015ef5c8fc7 100644
--- a/bdb/db/db_meta.c
+++ b/bdb/db/db_meta.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: db_meta.c,v 11.26 2001/01/16 21:57:19 ubell Exp $";
+static const char revid[] = "$Id: db_meta.c,v 11.61 2002/08/08 03:57:48 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -53,12 +53,37 @@ static const char revid[] = "$Id: db_meta.c,v 11.26 2001/01/16 21:57:19 ubell Ex
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "txn.h"
-#include "db_am.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
+#include "dbinc/db_am.h"
+
+static void __db_init_meta __P((void *, u_int32_t, db_pgno_t, u_int32_t));
+
+/*
+ * __db_init_meta --
+ * Helper function for __db_new that initializes the important fields in
+ * a meta-data page (used instead of P_INIT). We need to make sure that we
+ * retain the page number and LSN of the existing page.
+ */
+static void
+__db_init_meta(p, pgsize, pgno, pgtype)
+ void *p;
+ u_int32_t pgsize;
+ db_pgno_t pgno;
+ u_int32_t pgtype;
+{
+ DB_LSN save_lsn;
+ DBMETA *meta;
+
+ meta = (DBMETA *)p;
+ save_lsn = meta->lsn;
+ memset(meta, 0, sizeof(DBMETA));
+ meta->lsn = save_lsn;
+ meta->pagesize = pgsize;
+ meta->pgno = pgno;
+ meta->type = (u_int8_t)pgtype;
+}
/*
* __db_new --
@@ -75,60 +100,110 @@ __db_new(dbc, type, pagepp)
DBMETA *meta;
DB *dbp;
DB_LOCK metalock;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
PAGE *h;
- db_pgno_t pgno;
- int ret;
+ db_pgno_t pgno, newnext;
+ int meta_flags, extend, ret;
- dbp = dbc->dbp;
meta = NULL;
+ meta_flags = 0;
+ dbp = dbc->dbp;
+ mpf = dbp->mpf;
h = NULL;
+ newnext = PGNO_INVALID;
pgno = PGNO_BASE_MD;
if ((ret = __db_lget(dbc,
LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, (PAGE **)&meta)) != 0)
goto err;
-
if (meta->free == PGNO_INVALID) {
- if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_NEW, &h)) != 0)
- goto err;
- ZERO_LSN(h->lsn);
- h->pgno = pgno;
+ pgno = meta->last_pgno + 1;
+ ZERO_LSN(lsn);
+ extend = 1;
} else {
pgno = meta->free;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0)
goto err;
- meta->free = h->next_pgno;
- (void)memp_fset(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY);
+
+ /*
+ * We want to take the first page off the free list and
+ * then set meta->free to the that page's next_pgno, but
+ * we need to log the change first.
+ */
+ newnext = h->next_pgno;
+ lsn = h->lsn;
+ extend = 0;
}
- DB_ASSERT(TYPE(h) == P_INVALID);
+ /*
+ * Log the allocation before fetching the new page. If we
+ * don't have room in the log then we don't want to tell
+ * mpool to extend the file.
+ */
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __db_pg_alloc_log(dbp, dbc->txn, &LSN(meta), 0,
+ &LSN(meta), PGNO_BASE_MD, &lsn, pgno,
+ (u_int32_t)type, newnext)) != 0)
+ goto err;
+ } else
+ LSN_NOT_LOGGED(LSN(meta));
- if (TYPE(h) != P_INVALID)
- return (__db_panic(dbp->dbenv, EINVAL));
+ meta_flags = DB_MPOOL_DIRTY;
+ meta->free = newnext;
- /* Log the change. */
- if (DB_LOGGING(dbc)) {
- if ((ret = __db_pg_alloc_log(dbp->dbenv,
- dbc->txn, &LSN(meta), 0, dbp->log_fileid,
- &LSN(meta), &h->lsn, h->pgno,
- (u_int32_t)type, meta->free)) != 0)
+ if (extend == 1) {
+ meta->last_pgno++;
+ if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_NEW, &h)) != 0)
goto err;
- LSN(h) = LSN(meta);
+ ZERO_LSN(h->lsn);
+ h->pgno = pgno;
+ DB_ASSERT(pgno == meta->last_pgno);
}
+ LSN(h) = LSN(meta);
+
+ DB_ASSERT(TYPE(h) == P_INVALID);
+
+ if (TYPE(h) != P_INVALID)
+ return (__db_panic(dbp->dbenv, EINVAL));
- (void)memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY);
+ (void)mpf->put(mpf, (PAGE *)meta, DB_MPOOL_DIRTY);
(void)__TLPUT(dbc, metalock);
- P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type);
+ switch (type) {
+ case P_BTREEMETA:
+ case P_HASHMETA:
+ case P_QAMMETA:
+ __db_init_meta(h, dbp->pgsize, h->pgno, type);
+ break;
+ default:
+ P_INIT(h, dbp->pgsize,
+ h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type);
+ break;
+ }
+
+ /*
+ * If dirty reads are enabled and we are in a transaction, we could
+ * abort this allocation after the page(s) pointing to this
+ * one have their locks downgraded. This would permit dirty readers
+ * to access this page which is ok, but they must be off the
+ * page when we abort. This will also prevent updates happening
+ * to this page until we commit.
+ */
+ if (F_ISSET(dbc->dbp, DB_AM_DIRTY) && dbc->txn != NULL) {
+ if ((ret = __db_lget(dbc, 0,
+ h->pgno, DB_LOCK_WWRITE, 0, &metalock)) != 0)
+ goto err;
+ }
*pagepp = h;
return (0);
err: if (h != NULL)
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)mpf->put(mpf, h, 0);
if (meta != NULL)
- (void)memp_fput(dbp->mpf, meta, 0);
+ (void)mpf->put(mpf, meta, meta_flags);
(void)__TLPUT(dbc, metalock);
return (ret);
}
@@ -148,11 +223,13 @@ __db_free(dbc, h)
DB *dbp;
DBT ldbt;
DB_LOCK metalock;
+ DB_MPOOLFILE *mpf;
db_pgno_t pgno;
u_int32_t dirty_flag;
int ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
/*
* Retrieve the metadata page and insert the page at the head of
@@ -163,43 +240,44 @@ __db_free(dbc, h)
dirty_flag = 0;
pgno = PGNO_BASE_MD;
if ((ret = __db_lget(dbc,
- LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
+ LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0) {
+ if ((ret = mpf->get(mpf, &pgno, 0, (PAGE **)&meta)) != 0) {
(void)__TLPUT(dbc, metalock);
goto err;
}
DB_ASSERT(h->pgno != meta->free);
/* Log the change. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
memset(&ldbt, 0, sizeof(ldbt));
ldbt.data = h;
- ldbt.size = P_OVERHEAD;
- if ((ret = __db_pg_free_log(dbp->dbenv,
- dbc->txn, &LSN(meta), 0, dbp->log_fileid, h->pgno,
- &LSN(meta), &ldbt, meta->free)) != 0) {
- (void)memp_fput(dbp->mpf, (PAGE *)meta, 0);
+ ldbt.size = P_OVERHEAD(dbp);
+ if ((ret = __db_pg_free_log(dbp,
+ dbc->txn, &LSN(meta), 0, h->pgno,
+ &LSN(meta), PGNO_BASE_MD, &ldbt, meta->free)) != 0) {
+ (void)mpf->put(mpf, (PAGE *)meta, 0);
(void)__TLPUT(dbc, metalock);
- return (ret);
+ goto err;
}
- LSN(h) = LSN(meta);
- }
+ } else
+ LSN_NOT_LOGGED(LSN(meta));
+ LSN(h) = LSN(meta);
P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
meta->free = h->pgno;
/* Discard the metadata page. */
- if ((t_ret = memp_fput(dbp->mpf,
- (PAGE *)meta, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ if ((t_ret =
+ mpf->put(mpf, (PAGE *)meta, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0)
ret = t_ret;
/* Discard the caller's page reference. */
dirty_flag = DB_MPOOL_DIRTY;
-err: if ((t_ret = memp_fput(dbp->mpf, h, dirty_flag)) != 0 && ret == 0)
+err: if ((t_ret = mpf->put(mpf, h, dirty_flag)) != 0 && ret == 0)
ret = t_ret;
/*
@@ -227,44 +305,63 @@ __db_lprint(dbc)
if (LOCKING_ON(dbp->dbenv)) {
req.op = DB_LOCK_DUMP;
- lock_vec(dbp->dbenv, dbc->locker, 0, &req, 1, NULL);
+ dbp->dbenv->lock_vec(dbp->dbenv, dbc->locker, 0, &req, 1, NULL);
}
return (0);
}
#endif
/*
+ * Implement the rules for transactional locking. We can release the previous
+ * lock if we are not in a transaction or COUPLE_ALWAYS is specifed (used in
+ * record locking). If we are doing dirty reads then we can release read locks
+ * and down grade write locks.
+ */
+#define DB_PUT_ACTION(dbc, action, lockp) \
+ (((action == LCK_COUPLE || action == LCK_COUPLE_ALWAYS) && \
+ LOCK_ISSET(*lockp)) ? \
+ (dbc->txn == NULL || action == LCK_COUPLE_ALWAYS || \
+ (F_ISSET(dbc, DBC_DIRTY_READ) && \
+ (lockp)->mode == DB_LOCK_DIRTY)) ? LCK_COUPLE : \
+ (F_ISSET((dbc)->dbp, DB_AM_DIRTY) && \
+ (lockp)->mode == DB_LOCK_WRITE) ? LCK_DOWNGRADE : 0 : 0)
+
+/*
* __db_lget --
* The standard lock get call.
*
* PUBLIC: int __db_lget __P((DBC *,
- * PUBLIC: int, db_pgno_t, db_lockmode_t, int, DB_LOCK *));
+ * PUBLIC: int, db_pgno_t, db_lockmode_t, u_int32_t, DB_LOCK *));
*/
int
-__db_lget(dbc, flags, pgno, mode, lkflags, lockp)
+__db_lget(dbc, action, pgno, mode, lkflags, lockp)
DBC *dbc;
- int flags, lkflags;
+ int action;
db_pgno_t pgno;
db_lockmode_t mode;
+ u_int32_t lkflags;
DB_LOCK *lockp;
{
DB *dbp;
DB_ENV *dbenv;
DB_LOCKREQ couple[2], *reqp;
- int ret;
+ DB_TXN *txn;
+ int has_timeout, ret;
dbp = dbc->dbp;
dbenv = dbp->dbenv;
+ txn = dbc->txn;
/*
* We do not always check if we're configured for locking before
* calling __db_lget to acquire the lock.
*/
- if (CDB_LOCKING(dbenv)
- || !LOCKING_ON(dbenv) || F_ISSET(dbc, DBC_COMPENSATE)
- || (!LF_ISSET(LCK_ROLLBACK) && F_ISSET(dbc, DBC_RECOVER))
- || (!LF_ISSET(LCK_ALWAYS) && F_ISSET(dbc, DBC_OPD))) {
- lockp->off = LOCK_INVALID;
+ if (CDB_LOCKING(dbenv) ||
+ !LOCKING_ON(dbenv) || F_ISSET(dbc, DBC_COMPENSATE) ||
+ (F_ISSET(dbc, DBC_RECOVER) &&
+ (action != LCK_ROLLBACK || F_ISSET(dbenv, DB_ENV_REP_CLIENT))) ||
+ (action != LCK_ALWAYS && F_ISSET(dbc, DBC_OPD))) {
+ LOCK_INIT(*lockp);
return (0);
}
@@ -282,27 +379,73 @@ __db_lget(dbc, flags, pgno, mode, lkflags, lockp)
if (DB_NONBLOCK(dbc))
lkflags |= DB_LOCK_NOWAIT;
- /*
- * If the object not currently locked, acquire the lock and return,
- * otherwise, lock couple.
- */
- if (LF_ISSET(LCK_COUPLE)) {
- couple[0].op = DB_LOCK_GET;
+ if (F_ISSET(dbc, DBC_DIRTY_READ) && mode == DB_LOCK_READ)
+ mode = DB_LOCK_DIRTY;
+
+ has_timeout = txn != NULL && F_ISSET(txn, TXN_LOCKTIMEOUT);
+
+ switch (DB_PUT_ACTION(dbc, action, lockp)) {
+ case LCK_COUPLE:
+lck_couple: couple[0].op = has_timeout? DB_LOCK_GET_TIMEOUT : DB_LOCK_GET;
couple[0].obj = &dbc->lock_dbt;
couple[0].mode = mode;
- couple[1].op = DB_LOCK_PUT;
- couple[1].lock = *lockp;
+ if (action == LCK_COUPLE_ALWAYS)
+ action = LCK_COUPLE;
+ UMRW_SET(couple[0].timeout);
+ if (has_timeout)
+ couple[0].timeout = txn->lock_timeout;
+ if (action == LCK_COUPLE) {
+ couple[1].op = DB_LOCK_PUT;
+ couple[1].lock = *lockp;
+ }
- ret = lock_vec(dbenv,
- dbc->locker, lkflags, couple, 2, &reqp);
+ ret = dbenv->lock_vec(dbenv, dbc->locker,
+ lkflags, couple, action == LCK_COUPLE ? 2 : 1, &reqp);
if (ret == 0 || reqp == &couple[1])
*lockp = couple[0].lock;
- } else {
- ret = lock_get(dbenv,
+ break;
+ case LCK_DOWNGRADE:
+ if ((ret = dbenv->lock_downgrade(
+ dbenv, lockp, DB_LOCK_WWRITE, 0)) != 0)
+ return (ret);
+ /* FALL THROUGH */
+ default:
+ if (has_timeout)
+ goto lck_couple;
+ ret = dbenv->lock_get(dbenv,
dbc->locker, lkflags, &dbc->lock_dbt, mode, lockp);
+ break;
+ }
+
+ return (ret);
+}
+
+/*
+ * __db_lput --
+ * The standard lock put call.
+ *
+ * PUBLIC: int __db_lput __P((DBC *, DB_LOCK *));
+ */
+int
+__db_lput(dbc, lockp)
+ DBC *dbc;
+ DB_LOCK *lockp;
+{
+ DB_ENV *dbenv;
+ int ret;
- if (ret != 0)
- lockp->off = LOCK_INVALID;
+ dbenv = dbc->dbp->dbenv;
+
+ switch (DB_PUT_ACTION(dbc, LCK_COUPLE, lockp)) {
+ case LCK_COUPLE:
+ ret = dbenv->lock_put(dbenv, lockp);
+ break;
+ case LCK_DOWNGRADE:
+ ret = __lock_downgrade(dbenv, lockp, DB_LOCK_WWRITE, 0);
+ break;
+ default:
+ ret = 0;
+ break;
}
return (ret);