diff options
Diffstat (limited to 'bdb/db/db_meta.c')
-rw-r--r-- | bdb/db/db_meta.c | 287 |
1 files changed, 215 insertions, 72 deletions
diff --git a/bdb/db/db_meta.c b/bdb/db/db_meta.c index 5b57c369454..015ef5c8fc7 100644 --- a/bdb/db/db_meta.c +++ b/bdb/db/db_meta.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ /* @@ -43,7 +43,7 @@ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: db_meta.c,v 11.26 2001/01/16 21:57:19 ubell Exp $"; +static const char revid[] = "$Id: db_meta.c,v 11.61 2002/08/08 03:57:48 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -53,12 +53,37 @@ static const char revid[] = "$Id: db_meta.c,v 11.26 2001/01/16 21:57:19 ubell Ex #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "lock.h" -#include "txn.h" -#include "db_am.h" -#include "btree.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/lock.h" +#include "dbinc/db_am.h" + +static void __db_init_meta __P((void *, u_int32_t, db_pgno_t, u_int32_t)); + +/* + * __db_init_meta -- + * Helper function for __db_new that initializes the important fields in + * a meta-data page (used instead of P_INIT). We need to make sure that we + * retain the page number and LSN of the existing page. + */ +static void +__db_init_meta(p, pgsize, pgno, pgtype) + void *p; + u_int32_t pgsize; + db_pgno_t pgno; + u_int32_t pgtype; +{ + DB_LSN save_lsn; + DBMETA *meta; + + meta = (DBMETA *)p; + save_lsn = meta->lsn; + memset(meta, 0, sizeof(DBMETA)); + meta->lsn = save_lsn; + meta->pagesize = pgsize; + meta->pgno = pgno; + meta->type = (u_int8_t)pgtype; +} /* * __db_new -- @@ -75,60 +100,110 @@ __db_new(dbc, type, pagepp) DBMETA *meta; DB *dbp; DB_LOCK metalock; + DB_LSN lsn; + DB_MPOOLFILE *mpf; PAGE *h; - db_pgno_t pgno; - int ret; + db_pgno_t pgno, newnext; + int meta_flags, extend, ret; - dbp = dbc->dbp; meta = NULL; + meta_flags = 0; + dbp = dbc->dbp; + mpf = dbp->mpf; h = NULL; + newnext = PGNO_INVALID; pgno = PGNO_BASE_MD; if ((ret = __db_lget(dbc, LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, (PAGE **)&meta)) != 0) goto err; - if (meta->free == PGNO_INVALID) { - if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_NEW, &h)) != 0) - goto err; - ZERO_LSN(h->lsn); - h->pgno = pgno; + pgno = meta->last_pgno + 1; + ZERO_LSN(lsn); + extend = 1; } else { pgno = meta->free; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) + if ((ret = mpf->get(mpf, &pgno, 0, &h)) != 0) goto err; - meta->free = h->next_pgno; - (void)memp_fset(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY); + + /* + * We want to take the first page off the free list and + * then set meta->free to the that page's next_pgno, but + * we need to log the change first. + */ + newnext = h->next_pgno; + lsn = h->lsn; + extend = 0; } - DB_ASSERT(TYPE(h) == P_INVALID); + /* + * Log the allocation before fetching the new page. If we + * don't have room in the log then we don't want to tell + * mpool to extend the file. + */ + if (DBC_LOGGING(dbc)) { + if ((ret = __db_pg_alloc_log(dbp, dbc->txn, &LSN(meta), 0, + &LSN(meta), PGNO_BASE_MD, &lsn, pgno, + (u_int32_t)type, newnext)) != 0) + goto err; + } else + LSN_NOT_LOGGED(LSN(meta)); - if (TYPE(h) != P_INVALID) - return (__db_panic(dbp->dbenv, EINVAL)); + meta_flags = DB_MPOOL_DIRTY; + meta->free = newnext; - /* Log the change. */ - if (DB_LOGGING(dbc)) { - if ((ret = __db_pg_alloc_log(dbp->dbenv, - dbc->txn, &LSN(meta), 0, dbp->log_fileid, - &LSN(meta), &h->lsn, h->pgno, - (u_int32_t)type, meta->free)) != 0) + if (extend == 1) { + meta->last_pgno++; + if ((ret = mpf->get(mpf, &pgno, DB_MPOOL_NEW, &h)) != 0) goto err; - LSN(h) = LSN(meta); + ZERO_LSN(h->lsn); + h->pgno = pgno; + DB_ASSERT(pgno == meta->last_pgno); } + LSN(h) = LSN(meta); + + DB_ASSERT(TYPE(h) == P_INVALID); + + if (TYPE(h) != P_INVALID) + return (__db_panic(dbp->dbenv, EINVAL)); - (void)memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY); + (void)mpf->put(mpf, (PAGE *)meta, DB_MPOOL_DIRTY); (void)__TLPUT(dbc, metalock); - P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type); + switch (type) { + case P_BTREEMETA: + case P_HASHMETA: + case P_QAMMETA: + __db_init_meta(h, dbp->pgsize, h->pgno, type); + break; + default: + P_INIT(h, dbp->pgsize, + h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type); + break; + } + + /* + * If dirty reads are enabled and we are in a transaction, we could + * abort this allocation after the page(s) pointing to this + * one have their locks downgraded. This would permit dirty readers + * to access this page which is ok, but they must be off the + * page when we abort. This will also prevent updates happening + * to this page until we commit. + */ + if (F_ISSET(dbc->dbp, DB_AM_DIRTY) && dbc->txn != NULL) { + if ((ret = __db_lget(dbc, 0, + h->pgno, DB_LOCK_WWRITE, 0, &metalock)) != 0) + goto err; + } *pagepp = h; return (0); err: if (h != NULL) - (void)memp_fput(dbp->mpf, h, 0); + (void)mpf->put(mpf, h, 0); if (meta != NULL) - (void)memp_fput(dbp->mpf, meta, 0); + (void)mpf->put(mpf, meta, meta_flags); (void)__TLPUT(dbc, metalock); return (ret); } @@ -148,11 +223,13 @@ __db_free(dbc, h) DB *dbp; DBT ldbt; DB_LOCK metalock; + DB_MPOOLFILE *mpf; db_pgno_t pgno; u_int32_t dirty_flag; int ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; /* * Retrieve the metadata page and insert the page at the head of @@ -163,43 +240,44 @@ __db_free(dbc, h) dirty_flag = 0; pgno = PGNO_BASE_MD; if ((ret = __db_lget(dbc, - LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) + LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) goto err; - if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0) { + if ((ret = mpf->get(mpf, &pgno, 0, (PAGE **)&meta)) != 0) { (void)__TLPUT(dbc, metalock); goto err; } DB_ASSERT(h->pgno != meta->free); /* Log the change. */ - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { memset(&ldbt, 0, sizeof(ldbt)); ldbt.data = h; - ldbt.size = P_OVERHEAD; - if ((ret = __db_pg_free_log(dbp->dbenv, - dbc->txn, &LSN(meta), 0, dbp->log_fileid, h->pgno, - &LSN(meta), &ldbt, meta->free)) != 0) { - (void)memp_fput(dbp->mpf, (PAGE *)meta, 0); + ldbt.size = P_OVERHEAD(dbp); + if ((ret = __db_pg_free_log(dbp, + dbc->txn, &LSN(meta), 0, h->pgno, + &LSN(meta), PGNO_BASE_MD, &ldbt, meta->free)) != 0) { + (void)mpf->put(mpf, (PAGE *)meta, 0); (void)__TLPUT(dbc, metalock); - return (ret); + goto err; } - LSN(h) = LSN(meta); - } + } else + LSN_NOT_LOGGED(LSN(meta)); + LSN(h) = LSN(meta); P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, meta->free, 0, P_INVALID); meta->free = h->pgno; /* Discard the metadata page. */ - if ((t_ret = memp_fput(dbp->mpf, - (PAGE *)meta, DB_MPOOL_DIRTY)) != 0 && ret == 0) + if ((t_ret = + mpf->put(mpf, (PAGE *)meta, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0) ret = t_ret; /* Discard the caller's page reference. */ dirty_flag = DB_MPOOL_DIRTY; -err: if ((t_ret = memp_fput(dbp->mpf, h, dirty_flag)) != 0 && ret == 0) +err: if ((t_ret = mpf->put(mpf, h, dirty_flag)) != 0 && ret == 0) ret = t_ret; /* @@ -227,44 +305,63 @@ __db_lprint(dbc) if (LOCKING_ON(dbp->dbenv)) { req.op = DB_LOCK_DUMP; - lock_vec(dbp->dbenv, dbc->locker, 0, &req, 1, NULL); + dbp->dbenv->lock_vec(dbp->dbenv, dbc->locker, 0, &req, 1, NULL); } return (0); } #endif /* + * Implement the rules for transactional locking. We can release the previous + * lock if we are not in a transaction or COUPLE_ALWAYS is specifed (used in + * record locking). If we are doing dirty reads then we can release read locks + * and down grade write locks. + */ +#define DB_PUT_ACTION(dbc, action, lockp) \ + (((action == LCK_COUPLE || action == LCK_COUPLE_ALWAYS) && \ + LOCK_ISSET(*lockp)) ? \ + (dbc->txn == NULL || action == LCK_COUPLE_ALWAYS || \ + (F_ISSET(dbc, DBC_DIRTY_READ) && \ + (lockp)->mode == DB_LOCK_DIRTY)) ? LCK_COUPLE : \ + (F_ISSET((dbc)->dbp, DB_AM_DIRTY) && \ + (lockp)->mode == DB_LOCK_WRITE) ? LCK_DOWNGRADE : 0 : 0) + +/* * __db_lget -- * The standard lock get call. * * PUBLIC: int __db_lget __P((DBC *, - * PUBLIC: int, db_pgno_t, db_lockmode_t, int, DB_LOCK *)); + * PUBLIC: int, db_pgno_t, db_lockmode_t, u_int32_t, DB_LOCK *)); */ int -__db_lget(dbc, flags, pgno, mode, lkflags, lockp) +__db_lget(dbc, action, pgno, mode, lkflags, lockp) DBC *dbc; - int flags, lkflags; + int action; db_pgno_t pgno; db_lockmode_t mode; + u_int32_t lkflags; DB_LOCK *lockp; { DB *dbp; DB_ENV *dbenv; DB_LOCKREQ couple[2], *reqp; - int ret; + DB_TXN *txn; + int has_timeout, ret; dbp = dbc->dbp; dbenv = dbp->dbenv; + txn = dbc->txn; /* * We do not always check if we're configured for locking before * calling __db_lget to acquire the lock. */ - if (CDB_LOCKING(dbenv) - || !LOCKING_ON(dbenv) || F_ISSET(dbc, DBC_COMPENSATE) - || (!LF_ISSET(LCK_ROLLBACK) && F_ISSET(dbc, DBC_RECOVER)) - || (!LF_ISSET(LCK_ALWAYS) && F_ISSET(dbc, DBC_OPD))) { - lockp->off = LOCK_INVALID; + if (CDB_LOCKING(dbenv) || + !LOCKING_ON(dbenv) || F_ISSET(dbc, DBC_COMPENSATE) || + (F_ISSET(dbc, DBC_RECOVER) && + (action != LCK_ROLLBACK || F_ISSET(dbenv, DB_ENV_REP_CLIENT))) || + (action != LCK_ALWAYS && F_ISSET(dbc, DBC_OPD))) { + LOCK_INIT(*lockp); return (0); } @@ -282,27 +379,73 @@ __db_lget(dbc, flags, pgno, mode, lkflags, lockp) if (DB_NONBLOCK(dbc)) lkflags |= DB_LOCK_NOWAIT; - /* - * If the object not currently locked, acquire the lock and return, - * otherwise, lock couple. - */ - if (LF_ISSET(LCK_COUPLE)) { - couple[0].op = DB_LOCK_GET; + if (F_ISSET(dbc, DBC_DIRTY_READ) && mode == DB_LOCK_READ) + mode = DB_LOCK_DIRTY; + + has_timeout = txn != NULL && F_ISSET(txn, TXN_LOCKTIMEOUT); + + switch (DB_PUT_ACTION(dbc, action, lockp)) { + case LCK_COUPLE: +lck_couple: couple[0].op = has_timeout? DB_LOCK_GET_TIMEOUT : DB_LOCK_GET; couple[0].obj = &dbc->lock_dbt; couple[0].mode = mode; - couple[1].op = DB_LOCK_PUT; - couple[1].lock = *lockp; + if (action == LCK_COUPLE_ALWAYS) + action = LCK_COUPLE; + UMRW_SET(couple[0].timeout); + if (has_timeout) + couple[0].timeout = txn->lock_timeout; + if (action == LCK_COUPLE) { + couple[1].op = DB_LOCK_PUT; + couple[1].lock = *lockp; + } - ret = lock_vec(dbenv, - dbc->locker, lkflags, couple, 2, &reqp); + ret = dbenv->lock_vec(dbenv, dbc->locker, + lkflags, couple, action == LCK_COUPLE ? 2 : 1, &reqp); if (ret == 0 || reqp == &couple[1]) *lockp = couple[0].lock; - } else { - ret = lock_get(dbenv, + break; + case LCK_DOWNGRADE: + if ((ret = dbenv->lock_downgrade( + dbenv, lockp, DB_LOCK_WWRITE, 0)) != 0) + return (ret); + /* FALL THROUGH */ + default: + if (has_timeout) + goto lck_couple; + ret = dbenv->lock_get(dbenv, dbc->locker, lkflags, &dbc->lock_dbt, mode, lockp); + break; + } + + return (ret); +} + +/* + * __db_lput -- + * The standard lock put call. + * + * PUBLIC: int __db_lput __P((DBC *, DB_LOCK *)); + */ +int +__db_lput(dbc, lockp) + DBC *dbc; + DB_LOCK *lockp; +{ + DB_ENV *dbenv; + int ret; - if (ret != 0) - lockp->off = LOCK_INVALID; + dbenv = dbc->dbp->dbenv; + + switch (DB_PUT_ACTION(dbc, LCK_COUPLE, lockp)) { + case LCK_COUPLE: + ret = dbenv->lock_put(dbenv, lockp); + break; + case LCK_DOWNGRADE: + ret = __lock_downgrade(dbenv, lockp, DB_LOCK_WWRITE, 0); + break; + default: + ret = 0; + break; } return (ret); |